runtime: allow specifying logical/physical sector size for block devices

Add two new configuration knobs that control the logical and physical
sector sizes advertised by virtio-blk devices to the guest:

  block_device_logical_sector_size  (config file)
  block_device_physical_sector_size (config file)

  io.katacontainers.config.hypervisor.blk_logical_sector_size  (annotation)
  io.katacontainers.config.hypervisor.blk_physical_sector_size (annotation)

The annotation names are abbreviated relative to the config file keys
because Kubernetes enforces a 63-character limit on annotation name
segments, and the full names would exceed it.

Both settings default to 0 (let QEMU decide). When set, they are passed
as logical_block_size and physical_block_size in the QMP device_add
command during block device hotplug.

Setting logical_sector_size smaller then container filesystem
block size will cause EINVAL on mount. The physical_sector_size can
always be set independently.

Values must be 0 or a power of 2 in the range [512, 65536]; other
values are rejected with an error at sandbox creation time.

Signed-off-by: PiotrProkop <pprokop@nvidia.com>
This commit is contained in:
PiotrProkop
2026-03-19 17:28:09 +01:00
committed by Fabiano Fidêncio
parent 30e030e18e
commit 64735222c6
23 changed files with 682 additions and 242 deletions

View File

@@ -46,6 +46,8 @@ There are several kinds of Kata configurations and they are listed below.
| `io.katacontainers.config.hypervisor.block_device_cache_noflush` | `boolean` | Denotes whether flush requests for the device are ignored |
| `io.katacontainers.config.hypervisor.block_device_cache_set` | `boolean` | cache-related options will be set to block devices or not |
| `io.katacontainers.config.hypervisor.block_device_driver` | string | the driver to be used for block device, valid values are `virtio-blk`, `virtio-scsi`, `nvdimm`|
| `io.katacontainers.config.hypervisor.blk_logical_sector_size` | uint32 | logical sector size in bytes reported by block devices to the guest (0 = hypervisor default, must be a power of 2 between 512 and 65536) |
| `io.katacontainers.config.hypervisor.blk_physical_sector_size` | uint32 | physical sector size in bytes reported by block devices to the guest (0 = hypervisor default, must be a power of 2 between 512 and 65536) |
| `io.katacontainers.config.hypervisor.cpu_features` | `string` | Comma-separated list of CPU features to pass to the CPU (QEMU) |
| `io.katacontainers.config.hypervisor.default_max_vcpus` | uint32| the maximum number of vCPUs allocated for the VM by the hypervisor |
| `io.katacontainers.config.hypervisor.default_memory` | uint32| the memory assigned for a VM by the hypervisor in `MiB` |

View File

@@ -235,6 +235,16 @@ block_device_cache_direct = false
# Default false
block_device_cache_noflush = false
# Specifies the logical sector size, in bytes, reported by block devices to the guest.
# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default.
# Default 0
block_device_logical_sector_size = 0
# Specifies the physical sector size, in bytes, reported by block devices to the guest.
# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default.
# Default 0
block_device_physical_sector_size = 0
# Enable iothreads (data-plane) to be used. This causes IO to be
# handled in a separate IO thread. This is currently only implemented
# for SCSI.

View File

@@ -247,6 +247,16 @@ block_device_cache_direct = false
# Default false
block_device_cache_noflush = false
# Specifies the logical sector size, in bytes, reported by block devices to the guest.
# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default.
# Default 0
block_device_logical_sector_size = 0
# Specifies the physical sector size, in bytes, reported by block devices to the guest.
# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default.
# Default 0
block_device_physical_sector_size = 0
# Enable iothreads (data-plane) to be used. This causes IO to be
# handled in a separate IO thread. This is currently implemented
# for virtio-scsi and virtio-blk.

View File

@@ -287,6 +287,16 @@ block_device_cache_direct = false
# Default false
block_device_cache_noflush = false
# Specifies the logical sector size, in bytes, reported by block devices to the guest.
# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default.
# Default 0
block_device_logical_sector_size = 0
# Specifies the physical sector size, in bytes, reported by block devices to the guest.
# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default.
# Default 0
block_device_physical_sector_size = 0
# Enable iothreads (data-plane) to be used. This causes IO to be
# handled in a separate IO thread. This is currently implemented
# for virtio-scsi and virtio-blk.

View File

@@ -264,6 +264,16 @@ block_device_cache_direct = false
# Default false
block_device_cache_noflush = false
# Specifies the logical sector size, in bytes, reported by block devices to the guest.
# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default.
# Default 0
block_device_logical_sector_size = 0
# Specifies the physical sector size, in bytes, reported by block devices to the guest.
# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default.
# Default 0
block_device_physical_sector_size = 0
# Enable iothreads (data-plane) to be used. This causes IO to be
# handled in a separate IO thread. This is currently implemented
# for virtio-scsi and virtio-blk.

View File

@@ -246,6 +246,16 @@ block_device_cache_direct = false
# Default false
block_device_cache_noflush = false
# Specifies the logical sector size, in bytes, reported by block devices to the guest.
# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default.
# Default 0
block_device_logical_sector_size = 0
# Specifies the physical sector size, in bytes, reported by block devices to the guest.
# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default.
# Default 0
block_device_physical_sector_size = 0
# Enable iothreads (data-plane) to be used. This causes IO to be
# handled in a separate IO thread. This is currently implemented
# for virtio-scsi and virtio-blk.

View File

@@ -249,6 +249,16 @@ block_device_cache_direct = false
# Default false
block_device_cache_noflush = false
# Specifies the logical sector size, in bytes, reported by block devices to the guest.
# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default.
# Default 0
block_device_logical_sector_size = 0
# Specifies the physical sector size, in bytes, reported by block devices to the guest.
# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default.
# Default 0
block_device_physical_sector_size = 0
# Enable iothreads (data-plane) to be used. This causes IO to be
# handled in a separate IO thread. This is currently implemented
# for virtio-scsi and virtio-blk.

View File

@@ -281,6 +281,16 @@ block_device_cache_direct = false
# Default false
block_device_cache_noflush = false
# Specifies the logical sector size, in bytes, reported by block devices to the guest.
# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default.
# Default 0
block_device_logical_sector_size = 0
# Specifies the physical sector size, in bytes, reported by block devices to the guest.
# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default.
# Default 0
block_device_physical_sector_size = 0
# Enable iothreads (data-plane) to be used. This causes IO to be
# handled in a separate IO thread. This is currently implemented
# for virtio-scsi and virtio-blk.

View File

@@ -263,6 +263,16 @@ block_device_cache_direct = false
# Default false
block_device_cache_noflush = false
# Specifies the logical sector size, in bytes, reported by block devices to the guest.
# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default.
# Default 0
block_device_logical_sector_size = 0
# Specifies the physical sector size, in bytes, reported by block devices to the guest.
# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default.
# Default 0
block_device_physical_sector_size = 0
# Enable iothreads (data-plane) to be used. This causes IO to be
# handled in a separate IO thread. This is currently implemented
# for virtio-scsi and virtio-blk.

View File

@@ -241,6 +241,16 @@ block_device_cache_direct = false
# Default false
block_device_cache_noflush = false
# Specifies the logical sector size, in bytes, reported by block devices to the guest.
# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default.
# Default 0
block_device_logical_sector_size = 0
# Specifies the physical sector size, in bytes, reported by block devices to the guest.
# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default.
# Default 0
block_device_physical_sector_size = 0
# Enable iothreads (data-plane) to be used. This causes IO to be
# handled in a separate IO thread. This is currently implemented
# for virtio-scsi and virtio-blk.

View File

@@ -181,6 +181,16 @@ block_device_cache_direct = false
# Default false
block_device_cache_noflush = false
# Specifies the logical sector size, in bytes, reported by block devices to the guest.
# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default.
# Default 0
block_device_logical_sector_size = 0
# Specifies the physical sector size, in bytes, reported by block devices to the guest.
# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default.
# Default 0
block_device_physical_sector_size = 0
# Enable huge pages for VM RAM, default false
# Enabling this will result in the VM memory
# being allocated using huge pages.

View File

@@ -859,8 +859,10 @@ func (q *QMP) ExecuteBlockdevAddWithDriverCache(ctx context.Context, driver stri
// shared denotes if the drive can be shared allowing it to be passed more than once.
// disableModern indicates if virtio version 1.0 should be replaced by the
// former version 0.9, as there is a KVM bug that occurs when using virtio
// 1.0 in nested environments.
func (q *QMP) ExecuteDeviceAdd(ctx context.Context, blockdevID, devID, driver, bus, romfile string, shared, disableModern bool) error {
// 1.0 in nested environments. logicalBlockSize and physicalBlockSize specify
// the logical and physical block sizes for the device; if either is 0, the
// hypervisor default is used for that size.
func (q *QMP) ExecuteDeviceAdd(ctx context.Context, blockdevID, devID, driver, bus, romfile string, shared, disableModern bool, logicalBlockSize, physicalBlockSize uint32) error {
args := map[string]interface{}{
"id": devID,
"driver": driver,
@@ -886,6 +888,14 @@ func (q *QMP) ExecuteDeviceAdd(ctx context.Context, blockdevID, devID, driver, b
}
}
if logicalBlockSize > 0 {
args["logical_block_size"] = logicalBlockSize
}
if physicalBlockSize > 0 {
args["physical_block_size"] = physicalBlockSize
}
return q.executeCommand(ctx, "device_add", args, nil)
}
@@ -1108,8 +1118,9 @@ func (q *QMP) ExecuteDeviceDel(ctx context.Context, devID string) error {
// a block device. shared denotes if the drive can be shared allowing it to be passed more than once.
// disableModern indicates if virtio version 1.0 should be replaced by the
// former version 0.9, as there is a KVM bug that occurs when using virtio
// 1.0 in nested environments.
func (q *QMP) ExecutePCIDeviceAdd(ctx context.Context, blockdevID, devID, driver, addr, bus, romfile string, queues int, shared, disableModern bool, iothreadID string) error {
// 1.0 in nested environments. logicalBlockSize and physicalBlockSize specify the logical and
// physical sector sizes reported to the guest; set to 0 to use the hypervisor default.
func (q *QMP) ExecutePCIDeviceAdd(ctx context.Context, blockdevID, devID, driver, addr, bus, romfile string, queues int, shared, disableModern bool, iothreadID string, logicalBlockSize, physicalBlockSize uint32) error {
args := map[string]interface{}{
"id": devID,
"driver": driver,
@@ -1140,6 +1151,14 @@ func (q *QMP) ExecutePCIDeviceAdd(ctx context.Context, blockdevID, devID, driver
args["iothread"] = iothreadID
}
if logicalBlockSize > 0 {
args["logical_block_size"] = logicalBlockSize
}
if physicalBlockSize > 0 {
args["physical_block_size"] = physicalBlockSize
}
return q.executeCommand(ctx, "device_add", args, nil)
}

View File

@@ -208,6 +208,31 @@ func (b *qmpTestCommandBuffer) Write(p []byte) (int, error) {
b.cmds[currentCmd].name, gotCmdName)
result = "error"
}
// When expected args are provided, verify that each expected key/value
// is present in the actual QMP arguments. Existing tests pass nil args
// and are unaffected by this check.
if expectedArgs := b.cmds[currentCmd].args; expectedArgs != nil {
gotArgs, _ := cmdJSON["arguments"].(map[string]interface{})
for k, v := range expectedArgs {
got, ok := gotArgs[k]
if !ok {
b.t.Errorf("Command %s: missing expected argument %q", gotCmdName, k)
continue
}
// JSON numbers decode as float64
expectedFloat, expectedIsFloat := toFloat64(v)
gotFloat, gotIsFloat := toFloat64(got)
if expectedIsFloat && gotIsFloat {
if expectedFloat != gotFloat {
b.t.Errorf("Command %s: argument %q = %v, want %v", gotCmdName, k, got, v)
}
} else if fmt.Sprintf("%v", got) != fmt.Sprintf("%v", v) {
b.t.Errorf("Command %s: argument %q = %v, want %v", gotCmdName, k, got, v)
}
}
}
resultMap := make(map[string]interface{})
resultMap[result] = b.results[currentCmd].data
encodedRes, err := json.Marshal(&resultMap)
@@ -219,6 +244,26 @@ func (b *qmpTestCommandBuffer) Write(p []byte) (int, error) {
return len(p), nil
}
// toFloat64 attempts to convert a numeric value to float64 for comparison.
// JSON unmarshalling decodes all numbers as float64, while Go code may pass
// int, uint32, etc. This helper normalises both sides for comparison.
func toFloat64(v interface{}) (float64, bool) {
switch n := v.(type) {
case float64:
return n, true
case int:
return float64(n), true
case int64:
return float64(n), true
case uint32:
return float64(n), true
case uint64:
return float64(n), true
default:
return 0, false
}
}
func checkVersion(t *testing.T, connectedCh <-chan *QMPVersion) *QMPVersion {
var version *QMPVersion
select {
@@ -605,7 +650,7 @@ func TestQMPDeviceAdd(t *testing.T) {
blockdevID := fmt.Sprintf("drive_%s", volumeUUID)
devID := fmt.Sprintf("device_%s", volumeUUID)
err := q.ExecuteDeviceAdd(context.Background(), blockdevID, devID,
"virtio-blk-pci", "", "", true, false)
"virtio-blk-pci", "", "", true, false, 0, 0)
if err != nil {
t.Fatalf("Unexpected error %v", err)
}
@@ -1070,7 +1115,31 @@ func TestQMPPCIDeviceAdd(t *testing.T) {
blockdevID := fmt.Sprintf("drive_%s", volumeUUID)
devID := fmt.Sprintf("device_%s", volumeUUID)
err := q.ExecutePCIDeviceAdd(context.Background(), blockdevID, devID,
"virtio-blk-pci", "0x1", "", "", 1, true, false, "")
"virtio-blk-pci", "0x1", "", "", 1, true, false, "", 0, 0)
if err != nil {
t.Fatalf("Unexpected error %v", err)
}
q.Shutdown()
<-disconnectedCh
}
// Checks that PCI block devices with explicit logical and physical block sizes are
// correctly added using device_add, and that the sizes appear in the QMP arguments.
func TestQMPPCIDeviceAddWithBlockSize(t *testing.T) {
connectedCh := make(chan *QMPVersion)
disconnectedCh := make(chan struct{})
buf := newQMPTestCommandBuffer(t)
buf.AddCommand("device_add", map[string]interface{}{
"logical_block_size": uint32(512),
"physical_block_size": uint32(4096),
}, "return", nil)
cfg := QMPConfig{Logger: qmpTestLogger{}}
q := startQMPLoop(buf, cfg, connectedCh, disconnectedCh)
q.version = checkVersion(t, connectedCh)
blockdevID := fmt.Sprintf("drive_%s", volumeUUID)
devID := fmt.Sprintf("device_%s", volumeUUID)
err := q.ExecutePCIDeviceAdd(context.Background(), blockdevID, devID,
"virtio-blk-pci", "0x1", "", "", 1, true, false, "", 512, 4096)
if err != nil {
t.Fatalf("Unexpected error %v", err)
}

View File

@@ -146,6 +146,8 @@ type hypervisor struct {
BlockDeviceCacheSet bool `toml:"block_device_cache_set"`
BlockDeviceCacheDirect bool `toml:"block_device_cache_direct"`
BlockDeviceCacheNoflush bool `toml:"block_device_cache_noflush"`
BlockDeviceLogicalSectorSize uint32 `toml:"block_device_logical_sector_size"`
BlockDevicePhysicalSectorSize uint32 `toml:"block_device_physical_sector_size"`
EnableVhostUserStore bool `toml:"enable_vhost_user_store"`
VhostUserDeviceReconnect uint32 `toml:"vhost_user_reconnect_timeout_sec"`
DisableBlockDeviceUse bool `toml:"disable_block_device_use"`
@@ -593,6 +595,20 @@ func (h hypervisor) blockDeviceDriver() (string, error) {
return "", fmt.Errorf("Invalid hypervisor block storage driver %v specified (supported drivers: %v)", h.BlockDeviceDriver, supportedBlockDrivers)
}
func (h hypervisor) blockDeviceLogicalSectorSize() (uint32, error) {
if err := validateBlockDeviceSectorSize(cfgBlockDeviceLogicalSectorSize, h.BlockDeviceLogicalSectorSize); err != nil {
return 0, err
}
return h.BlockDeviceLogicalSectorSize, nil
}
func (h hypervisor) blockDevicePhysicalSectorSize() (uint32, error) {
if err := validateBlockDeviceSectorSize(cfgBlockDevicePhysicalSectorSize, h.BlockDevicePhysicalSectorSize); err != nil {
return 0, err
}
return h.BlockDevicePhysicalSectorSize, nil
}
func (h hypervisor) blockDeviceAIO() (string, error) {
supportedBlockAIO := []string{config.AIOIOUring, config.AIONative, config.AIOThreads}
@@ -877,6 +893,28 @@ func newFirecrackerHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
}, nil
}
const (
cfgBlockDeviceLogicalSectorSize = "block_device_logical_sector_size"
cfgBlockDevicePhysicalSectorSize = "block_device_physical_sector_size"
)
func validateBlockDeviceSectorSize(name string, size uint32) error {
if size == 0 {
return nil
}
if size < 512 || size > 65536 || (size&(size-1)) != 0 {
return fmt.Errorf("invalid %s %d: must be 0 or a power of 2 between 512 and 65536", name, size)
}
return nil
}
func validateBlockDeviceSectorSizes(logical, physical uint32) error {
if logical != 0 && physical != 0 && logical > physical {
return fmt.Errorf("invalid sector sizes: logical (%d) must not be larger than physical (%d)", logical, physical)
}
return nil
}
func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
hypervisor, err := h.path()
if err != nil {
@@ -973,88 +1011,104 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
return vc.HypervisorConfig{}, err
}
blockLogicalSectorSize, err := h.blockDeviceLogicalSectorSize()
if err != nil {
return vc.HypervisorConfig{}, err
}
blockPhysicalSectorSize, err := h.blockDevicePhysicalSectorSize()
if err != nil {
return vc.HypervisorConfig{}, err
}
if err := validateBlockDeviceSectorSizes(blockLogicalSectorSize, blockPhysicalSectorSize); err != nil {
return vc.HypervisorConfig{}, err
}
return vc.HypervisorConfig{
HypervisorPath: hypervisor,
HypervisorPathList: h.HypervisorPathList,
KernelPath: kernel,
InitrdPath: initrd,
ImagePath: image,
RootfsType: rootfsType,
FirmwarePath: firmware,
FirmwareVolumePath: firmwareVolume,
PFlash: pflashes,
MachineAccelerators: machineAccelerators,
CPUFeatures: cpuFeatures,
KernelParams: vc.DeserializeParams(vc.KernelParamFields(kernelParams)),
KernelVerityParams: h.kernelVerityParams(),
HypervisorMachineType: machineType,
QgsPort: h.qgsPort(),
NumVCPUsF: h.defaultVCPUs(),
DefaultMaxVCPUs: h.defaultMaxVCPUs(),
MemorySize: h.defaultMemSz(),
MemSlots: h.defaultMemSlots(),
MemOffset: h.defaultMemOffset(),
DefaultMaxMemorySize: h.defaultMaxMemSz(),
VirtioMem: h.VirtioMem,
EntropySource: h.GetEntropySource(),
EntropySourceList: h.EntropySourceList,
DefaultBridges: h.defaultBridges(),
DisableBlockDeviceUse: h.DisableBlockDeviceUse,
SharedFS: sharedFS,
VirtioFSDaemon: h.VirtioFSDaemon,
VirtioFSDaemonList: h.VirtioFSDaemonList,
HypervisorLoglevel: h.defaultHypervisorLoglevel(),
VirtioFSCacheSize: h.VirtioFSCacheSize,
VirtioFSCache: h.defaultVirtioFSCache(),
VirtioFSQueueSize: h.VirtioFSQueueSize,
VirtioFSExtraArgs: h.VirtioFSExtraArgs,
MemPrealloc: h.MemPrealloc,
ReclaimGuestFreedMemory: h.ReclaimGuestFreedMemory,
HugePages: h.HugePages,
IOMMU: h.IOMMU,
IOMMUPlatform: h.getIOMMUPlatform(),
GuestNUMANodes: h.defaultGuestNUMANodes(),
FileBackedMemRootDir: h.FileBackedMemRootDir,
FileBackedMemRootList: h.FileBackedMemRootList,
Debug: h.Debug,
DisableNestingChecks: h.DisableNestingChecks,
BlockDeviceDriver: blockDriver,
BlockDeviceAIO: blockAIO,
BlockDeviceCacheSet: h.BlockDeviceCacheSet,
BlockDeviceCacheDirect: h.BlockDeviceCacheDirect,
BlockDeviceCacheNoflush: h.BlockDeviceCacheNoflush,
EnableIOThreads: h.EnableIOThreads,
IndepIOThreads: h.indepiothreads(),
Msize9p: h.msize9p(),
DisableImageNvdimm: h.DisableImageNvdimm,
HotPlugVFIO: h.hotPlugVFIO(),
ColdPlugVFIO: h.coldPlugVFIO(),
PCIeRootPort: h.pcieRootPort(),
PCIeSwitchPort: h.pcieSwitchPort(),
DisableVhostNet: h.DisableVhostNet,
EnableVhostUserStore: h.EnableVhostUserStore,
VhostUserStorePath: h.vhostUserStorePath(),
VhostUserStorePathList: h.VhostUserStorePathList,
VhostUserDeviceReconnect: h.VhostUserDeviceReconnect,
SeccompSandbox: h.SeccompSandbox,
GuestHookPath: h.guestHookPath(),
RxRateLimiterMaxRate: rxRateLimiterMaxRate,
TxRateLimiterMaxRate: txRateLimiterMaxRate,
EnableAnnotations: h.EnableAnnotations,
GuestMemoryDumpPath: h.GuestMemoryDumpPath,
GuestMemoryDumpPaging: h.GuestMemoryDumpPaging,
ConfidentialGuest: h.ConfidentialGuest,
SevSnpGuest: h.SevSnpGuest,
GuestSwap: h.GuestSwap,
Rootless: h.Rootless,
LegacySerial: h.LegacySerial,
DisableSeLinux: h.DisableSeLinux,
DisableGuestSeLinux: h.DisableGuestSeLinux,
ExtraMonitorSocket: extraMonitorSocket,
SnpIdBlock: h.SnpIdBlock,
SnpIdAuth: h.SnpIdAuth,
SnpGuestPolicy: h.SnpGuestPolicy,
MeasurementAlgo: h.GetMeasurementAlgo(),
HypervisorPath: hypervisor,
HypervisorPathList: h.HypervisorPathList,
KernelPath: kernel,
InitrdPath: initrd,
ImagePath: image,
RootfsType: rootfsType,
FirmwarePath: firmware,
FirmwareVolumePath: firmwareVolume,
PFlash: pflashes,
MachineAccelerators: machineAccelerators,
CPUFeatures: cpuFeatures,
KernelParams: vc.DeserializeParams(vc.KernelParamFields(kernelParams)),
KernelVerityParams: h.kernelVerityParams(),
HypervisorMachineType: machineType,
QgsPort: h.qgsPort(),
NumVCPUsF: h.defaultVCPUs(),
DefaultMaxVCPUs: h.defaultMaxVCPUs(),
MemorySize: h.defaultMemSz(),
MemSlots: h.defaultMemSlots(),
MemOffset: h.defaultMemOffset(),
DefaultMaxMemorySize: h.defaultMaxMemSz(),
VirtioMem: h.VirtioMem,
EntropySource: h.GetEntropySource(),
EntropySourceList: h.EntropySourceList,
DefaultBridges: h.defaultBridges(),
DisableBlockDeviceUse: h.DisableBlockDeviceUse,
SharedFS: sharedFS,
VirtioFSDaemon: h.VirtioFSDaemon,
VirtioFSDaemonList: h.VirtioFSDaemonList,
HypervisorLoglevel: h.defaultHypervisorLoglevel(),
VirtioFSCacheSize: h.VirtioFSCacheSize,
VirtioFSCache: h.defaultVirtioFSCache(),
VirtioFSQueueSize: h.VirtioFSQueueSize,
VirtioFSExtraArgs: h.VirtioFSExtraArgs,
MemPrealloc: h.MemPrealloc,
ReclaimGuestFreedMemory: h.ReclaimGuestFreedMemory,
HugePages: h.HugePages,
IOMMU: h.IOMMU,
IOMMUPlatform: h.getIOMMUPlatform(),
GuestNUMANodes: h.defaultGuestNUMANodes(),
FileBackedMemRootDir: h.FileBackedMemRootDir,
FileBackedMemRootList: h.FileBackedMemRootList,
Debug: h.Debug,
DisableNestingChecks: h.DisableNestingChecks,
BlockDeviceDriver: blockDriver,
BlockDeviceAIO: blockAIO,
BlockDeviceCacheSet: h.BlockDeviceCacheSet,
BlockDeviceCacheDirect: h.BlockDeviceCacheDirect,
BlockDeviceCacheNoflush: h.BlockDeviceCacheNoflush,
BlockDeviceLogicalSectorSize: blockLogicalSectorSize,
BlockDevicePhysicalSectorSize: blockPhysicalSectorSize,
EnableIOThreads: h.EnableIOThreads,
IndepIOThreads: h.indepiothreads(),
Msize9p: h.msize9p(),
DisableImageNvdimm: h.DisableImageNvdimm,
HotPlugVFIO: h.hotPlugVFIO(),
ColdPlugVFIO: h.coldPlugVFIO(),
PCIeRootPort: h.pcieRootPort(),
PCIeSwitchPort: h.pcieSwitchPort(),
DisableVhostNet: h.DisableVhostNet,
EnableVhostUserStore: h.EnableVhostUserStore,
VhostUserStorePath: h.vhostUserStorePath(),
VhostUserStorePathList: h.VhostUserStorePathList,
VhostUserDeviceReconnect: h.VhostUserDeviceReconnect,
SeccompSandbox: h.SeccompSandbox,
GuestHookPath: h.guestHookPath(),
RxRateLimiterMaxRate: rxRateLimiterMaxRate,
TxRateLimiterMaxRate: txRateLimiterMaxRate,
EnableAnnotations: h.EnableAnnotations,
GuestMemoryDumpPath: h.GuestMemoryDumpPath,
GuestMemoryDumpPaging: h.GuestMemoryDumpPaging,
ConfidentialGuest: h.ConfidentialGuest,
SevSnpGuest: h.SevSnpGuest,
GuestSwap: h.GuestSwap,
Rootless: h.Rootless,
LegacySerial: h.LegacySerial,
DisableSeLinux: h.DisableSeLinux,
DisableGuestSeLinux: h.DisableGuestSeLinux,
ExtraMonitorSocket: extraMonitorSocket,
SnpIdBlock: h.SnpIdBlock,
SnpIdAuth: h.SnpIdAuth,
SnpGuestPolicy: h.SnpGuestPolicy,
MeasurementAlgo: h.GetMeasurementAlgo(),
}, nil
}
@@ -1283,42 +1337,58 @@ func newStratovirtHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
fmt.Errorf("cannot enable %s without daemon path in configuration file", sharedFS)
}
blockLogicalSectorSize, err := h.blockDeviceLogicalSectorSize()
if err != nil {
return vc.HypervisorConfig{}, err
}
blockPhysicalSectorSize, err := h.blockDevicePhysicalSectorSize()
if err != nil {
return vc.HypervisorConfig{}, err
}
if err := validateBlockDeviceSectorSizes(blockLogicalSectorSize, blockPhysicalSectorSize); err != nil {
return vc.HypervisorConfig{}, err
}
return vc.HypervisorConfig{
HypervisorPath: hypervisor,
HypervisorPathList: h.HypervisorPathList,
KernelPath: kernel,
InitrdPath: initrd,
ImagePath: image,
RootfsType: rootfsType,
KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)),
KernelVerityParams: h.kernelVerityParams(),
HypervisorMachineType: machineType,
NumVCPUsF: h.defaultVCPUs(),
DefaultMaxVCPUs: h.defaultMaxVCPUs(),
MemorySize: h.defaultMemSz(),
MemSlots: h.defaultMemSlots(),
MemOffset: h.defaultMemOffset(),
DefaultMaxMemorySize: h.defaultMaxMemSz(),
EntropySource: h.GetEntropySource(),
DefaultBridges: h.defaultBridges(),
DisableBlockDeviceUse: h.DisableBlockDeviceUse,
SharedFS: sharedFS,
VirtioFSDaemon: h.VirtioFSDaemon,
VirtioFSDaemonList: h.VirtioFSDaemonList,
HypervisorLoglevel: h.defaultHypervisorLoglevel(),
VirtioFSCacheSize: h.VirtioFSCacheSize,
VirtioFSCache: h.defaultVirtioFSCache(),
VirtioFSExtraArgs: h.VirtioFSExtraArgs,
HugePages: h.HugePages,
Debug: h.Debug,
DisableNestingChecks: h.DisableNestingChecks,
BlockDeviceDriver: blockDriver,
DisableVhostNet: true,
GuestHookPath: h.guestHookPath(),
EnableAnnotations: h.EnableAnnotations,
DisableSeccomp: h.DisableSeccomp,
DisableSeLinux: h.DisableSeLinux,
DisableGuestSeLinux: h.DisableGuestSeLinux,
HypervisorPath: hypervisor,
HypervisorPathList: h.HypervisorPathList,
KernelPath: kernel,
InitrdPath: initrd,
ImagePath: image,
RootfsType: rootfsType,
KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)),
KernelVerityParams: h.kernelVerityParams(),
HypervisorMachineType: machineType,
NumVCPUsF: h.defaultVCPUs(),
DefaultMaxVCPUs: h.defaultMaxVCPUs(),
MemorySize: h.defaultMemSz(),
MemSlots: h.defaultMemSlots(),
MemOffset: h.defaultMemOffset(),
DefaultMaxMemorySize: h.defaultMaxMemSz(),
EntropySource: h.GetEntropySource(),
DefaultBridges: h.defaultBridges(),
DisableBlockDeviceUse: h.DisableBlockDeviceUse,
SharedFS: sharedFS,
VirtioFSDaemon: h.VirtioFSDaemon,
VirtioFSDaemonList: h.VirtioFSDaemonList,
HypervisorLoglevel: h.defaultHypervisorLoglevel(),
VirtioFSCacheSize: h.VirtioFSCacheSize,
VirtioFSCache: h.defaultVirtioFSCache(),
VirtioFSExtraArgs: h.VirtioFSExtraArgs,
HugePages: h.HugePages,
Debug: h.Debug,
DisableNestingChecks: h.DisableNestingChecks,
BlockDeviceDriver: blockDriver,
BlockDeviceLogicalSectorSize: blockLogicalSectorSize,
BlockDevicePhysicalSectorSize: blockPhysicalSectorSize,
DisableVhostNet: true,
GuestHookPath: h.guestHookPath(),
EnableAnnotations: h.EnableAnnotations,
DisableSeccomp: h.DisableSeccomp,
DisableSeLinux: h.DisableSeLinux,
DisableGuestSeLinux: h.DisableGuestSeLinux,
}, nil
}

View File

@@ -708,6 +708,41 @@ func TestNewQemuHypervisorConfig(t *testing.T) {
}
func TestValidateBlockDeviceSectorSize(t *testing.T) {
assert := assert.New(t)
for _, size := range []uint32{0, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536} {
assert.NoError(validateBlockDeviceSectorSize("test_field", size), "expected size %d to be accepted", size)
}
for _, size := range []uint32{3, 100, 1000, 3000, 5000} {
assert.Error(validateBlockDeviceSectorSize("test_field", size), "expected non-power-of-2 size %d to be rejected", size)
}
for _, size := range []uint32{1, 256} {
assert.Error(validateBlockDeviceSectorSize("test_field", size), "expected below-minimum size %d to be rejected", size)
}
for _, size := range []uint32{131072, 1048576} {
assert.Error(validateBlockDeviceSectorSize("test_field", size), "expected above-maximum size %d to be rejected", size)
}
}
func TestValidateBlockDeviceSectorSizes(t *testing.T) {
assert := assert.New(t)
assert.NoError(validateBlockDeviceSectorSizes(0, 0))
assert.NoError(validateBlockDeviceSectorSizes(512, 0))
assert.NoError(validateBlockDeviceSectorSizes(0, 4096))
assert.NoError(validateBlockDeviceSectorSizes(512, 4096))
assert.NoError(validateBlockDeviceSectorSizes(4096, 4096))
assert.NoError(validateBlockDeviceSectorSizes(512, 512))
assert.Error(validateBlockDeviceSectorSizes(4096, 512), "logical > physical should be rejected")
assert.Error(validateBlockDeviceSectorSizes(4096, 1024), "logical > physical should be rejected")
assert.Error(validateBlockDeviceSectorSizes(65536, 512), "logical > physical should be rejected")
}
func TestNewFirecrackerHypervisorConfig(t *testing.T) {
dir := t.TempDir()

View File

@@ -920,9 +920,39 @@ func addHypervisorBlockOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig)
return err
}
return newAnnotationConfiguration(ocispec, vcAnnotations.BlockDeviceCacheNoflush).setBool(func(blockDeviceCacheNoflush bool) {
if err := newAnnotationConfiguration(ocispec, vcAnnotations.BlockDeviceCacheNoflush).setBool(func(blockDeviceCacheNoflush bool) {
sbConfig.HypervisorConfig.BlockDeviceCacheNoflush = blockDeviceCacheNoflush
})
}); err != nil {
return err
}
if err := newAnnotationConfiguration(ocispec, vcAnnotations.BlockDeviceLogicalSectorSize).setUintWithCheck(func(size uint64) error {
if size != 0 && (size < 512 || size > 65536 || (size&(size-1)) != 0) {
return fmt.Errorf("invalid %s %d: must be 0 or a power of 2 between 512 and 65536", vcAnnotations.BlockDeviceLogicalSectorSize, size)
}
sbConfig.HypervisorConfig.BlockDeviceLogicalSectorSize = uint32(size)
return nil
}); err != nil {
return err
}
if err := newAnnotationConfiguration(ocispec, vcAnnotations.BlockDevicePhysicalSectorSize).setUintWithCheck(func(size uint64) error {
if size != 0 && (size < 512 || size > 65536 || (size&(size-1)) != 0) {
return fmt.Errorf("invalid %s %d: must be 0 or a power of 2 between 512 and 65536", vcAnnotations.BlockDevicePhysicalSectorSize, size)
}
sbConfig.HypervisorConfig.BlockDevicePhysicalSectorSize = uint32(size)
return nil
}); err != nil {
return err
}
logical := sbConfig.HypervisorConfig.BlockDeviceLogicalSectorSize
physical := sbConfig.HypervisorConfig.BlockDevicePhysicalSectorSize
if logical != 0 && physical != 0 && logical > physical {
return fmt.Errorf("invalid sector sizes: logical (%d) must not be larger than physical (%d)", logical, physical)
}
return nil
}
func addHypervisorVirtioFsOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig, runtime RuntimeConfig) error {

View File

@@ -665,6 +665,8 @@ func TestAddHypervisorAnnotations(t *testing.T) {
// 10Mbit
ocispec.Annotations[vcAnnotations.RxRateLimiterMaxRate] = "10000000"
ocispec.Annotations[vcAnnotations.TxRateLimiterMaxRate] = "10000000"
ocispec.Annotations[vcAnnotations.BlockDeviceLogicalSectorSize] = "512"
ocispec.Annotations[vcAnnotations.BlockDevicePhysicalSectorSize] = "4096"
err := addAnnotations(ocispec, &sbConfig, runtimeConfig)
assert.NoError(err)
@@ -706,6 +708,8 @@ func TestAddHypervisorAnnotations(t *testing.T) {
assert.Equal(sbConfig.HypervisorConfig.LegacySerial, true)
assert.Equal(sbConfig.HypervisorConfig.RxRateLimiterMaxRate, uint64(10000000))
assert.Equal(sbConfig.HypervisorConfig.TxRateLimiterMaxRate, uint64(10000000))
assert.Equal(sbConfig.HypervisorConfig.BlockDeviceLogicalSectorSize, uint32(512))
assert.Equal(sbConfig.HypervisorConfig.BlockDevicePhysicalSectorSize, uint32(4096))
// In case an absurd large value is provided, the config value if not over-ridden
ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "655536"
@@ -726,6 +730,80 @@ func TestAddHypervisorAnnotations(t *testing.T) {
assert.Error(err)
}
func TestBlockDeviceSectorSizeAnnotations(t *testing.T) {
assert := assert.New(t)
runtimeConfig := RuntimeConfig{
HypervisorType: vc.QemuHypervisor,
}
runtimeConfig.HypervisorConfig.EnableAnnotations = []string{".*"}
newSpec := func() specs.Spec {
return specs.Spec{Annotations: make(map[string]string)}
}
newConfig := func() vc.SandboxConfig {
return vc.SandboxConfig{Annotations: make(map[string]string)}
}
// Valid: 0 means "use hypervisor default", no override applied
for _, v := range []string{"0", "512", "1024", "2048", "4096", "8192", "16384", "32768", "65536"} {
spec := newSpec()
cfg := newConfig()
spec.Annotations[vcAnnotations.BlockDeviceLogicalSectorSize] = v
spec.Annotations[vcAnnotations.BlockDevicePhysicalSectorSize] = v
assert.NoError(addAnnotations(spec, &cfg, runtimeConfig), "expected valid size %s to be accepted", v)
}
// Invalid: not a power of 2
for _, v := range []string{"3", "100", "1000", "3000", "5000"} {
spec := newSpec()
cfg := newConfig()
spec.Annotations[vcAnnotations.BlockDeviceLogicalSectorSize] = v
assert.Error(addAnnotations(spec, &cfg, runtimeConfig), "expected non-power-of-2 size %s to be rejected", v)
}
// Invalid: below minimum (512)
for _, v := range []string{"1", "256"} {
spec := newSpec()
cfg := newConfig()
spec.Annotations[vcAnnotations.BlockDeviceLogicalSectorSize] = v
assert.Error(addAnnotations(spec, &cfg, runtimeConfig), "expected below-minimum size %s to be rejected", v)
}
// Invalid: above maximum (65536)
for _, v := range []string{"131072", "1048576"} {
spec := newSpec()
cfg := newConfig()
spec.Annotations[vcAnnotations.BlockDevicePhysicalSectorSize] = v
assert.Error(addAnnotations(spec, &cfg, runtimeConfig), "expected above-maximum size %s to be rejected", v)
}
// Logical 4096 with physical 4096 — both valid
spec := newSpec()
cfg := newConfig()
spec.Annotations[vcAnnotations.BlockDeviceLogicalSectorSize] = "4096"
spec.Annotations[vcAnnotations.BlockDevicePhysicalSectorSize] = "4096"
assert.NoError(addAnnotations(spec, &cfg, runtimeConfig))
assert.Equal(cfg.HypervisorConfig.BlockDeviceLogicalSectorSize, uint32(4096))
assert.Equal(cfg.HypervisorConfig.BlockDevicePhysicalSectorSize, uint32(4096))
// Logical 512 with physical 4096 — both valid
spec = newSpec()
cfg = newConfig()
spec.Annotations[vcAnnotations.BlockDeviceLogicalSectorSize] = "512"
spec.Annotations[vcAnnotations.BlockDevicePhysicalSectorSize] = "4096"
assert.NoError(addAnnotations(spec, &cfg, runtimeConfig))
assert.Equal(cfg.HypervisorConfig.BlockDeviceLogicalSectorSize, uint32(512))
assert.Equal(cfg.HypervisorConfig.BlockDevicePhysicalSectorSize, uint32(4096))
// Invalid: logical > physical
spec = newSpec()
cfg = newConfig()
spec.Annotations[vcAnnotations.BlockDeviceLogicalSectorSize] = "4096"
spec.Annotations[vcAnnotations.BlockDevicePhysicalSectorSize] = "512"
assert.Error(addAnnotations(spec, &cfg, runtimeConfig), "logical > physical should be rejected")
}
func TestAddRemoteHypervisorAnnotations(t *testing.T) {
// Remote hypervisor uses DefaultVCPUs, DefaultMemory etc as annotations to pick the size of the separate VM to create,
// so doesn't need to be bound by the host's capacity limits.

View File

@@ -754,6 +754,16 @@ type HypervisorConfig struct {
// Denotes whether flush requests for the device are ignored.
BlockDeviceCacheNoflush bool
// BlockDeviceLogicalSectorSize specifies the logical sector size reported
// by block devices to the guest, in bytes. Common values are 512 and 4096.
// Set to 0 to use the hypervisor default.
BlockDeviceLogicalSectorSize uint32
// BlockDevicePhysicalSectorSize specifies the physical sector size reported
// by block devices to the guest, in bytes. Common values are 512 and 4096.
// Set to 0 to use the hypervisor default.
BlockDevicePhysicalSectorSize uint32
// DisableBlockDeviceUse disallows a block device from being used.
DisableBlockDeviceUse bool

View File

@@ -201,62 +201,64 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) {
}
ss.Config.HypervisorConfig = persistapi.HypervisorConfig{
NumVCPUsF: sconfig.HypervisorConfig.NumVCPUsF,
DefaultMaxVCPUs: sconfig.HypervisorConfig.DefaultMaxVCPUs,
MemorySize: sconfig.HypervisorConfig.MemorySize,
DefaultBridges: sconfig.HypervisorConfig.DefaultBridges,
Msize9p: sconfig.HypervisorConfig.Msize9p,
MemSlots: sconfig.HypervisorConfig.MemSlots,
MemOffset: sconfig.HypervisorConfig.MemOffset,
VirtioMem: sconfig.HypervisorConfig.VirtioMem,
VirtioFSCacheSize: sconfig.HypervisorConfig.VirtioFSCacheSize,
KernelPath: sconfig.HypervisorConfig.KernelPath,
ImagePath: sconfig.HypervisorConfig.ImagePath,
InitrdPath: sconfig.HypervisorConfig.InitrdPath,
FirmwarePath: sconfig.HypervisorConfig.FirmwarePath,
MachineAccelerators: sconfig.HypervisorConfig.MachineAccelerators,
CPUFeatures: sconfig.HypervisorConfig.CPUFeatures,
HypervisorPath: sconfig.HypervisorConfig.HypervisorPath,
HypervisorPathList: sconfig.HypervisorConfig.HypervisorPathList,
JailerPath: sconfig.HypervisorConfig.JailerPath,
JailerPathList: sconfig.HypervisorConfig.JailerPathList,
BlockDeviceDriver: sconfig.HypervisorConfig.BlockDeviceDriver,
HypervisorMachineType: sconfig.HypervisorConfig.HypervisorMachineType,
MemoryPath: sconfig.HypervisorConfig.MemoryPath,
DevicesStatePath: sconfig.HypervisorConfig.DevicesStatePath,
EntropySource: sconfig.HypervisorConfig.EntropySource,
EntropySourceList: sconfig.HypervisorConfig.EntropySourceList,
SharedFS: sconfig.HypervisorConfig.SharedFS,
VirtioFSDaemon: sconfig.HypervisorConfig.VirtioFSDaemon,
VirtioFSDaemonList: sconfig.HypervisorConfig.VirtioFSDaemonList,
VirtioFSCache: sconfig.HypervisorConfig.VirtioFSCache,
VirtioFSExtraArgs: sconfig.HypervisorConfig.VirtioFSExtraArgs[:],
BlockDeviceCacheSet: sconfig.HypervisorConfig.BlockDeviceCacheSet,
BlockDeviceCacheDirect: sconfig.HypervisorConfig.BlockDeviceCacheDirect,
BlockDeviceCacheNoflush: sconfig.HypervisorConfig.BlockDeviceCacheNoflush,
DisableBlockDeviceUse: sconfig.HypervisorConfig.DisableBlockDeviceUse,
EnableIOThreads: sconfig.HypervisorConfig.EnableIOThreads,
IndepIOThreads: sconfig.HypervisorConfig.IndepIOThreads,
Debug: sconfig.HypervisorConfig.Debug,
MemPrealloc: sconfig.HypervisorConfig.MemPrealloc,
HugePages: sconfig.HypervisorConfig.HugePages,
FileBackedMemRootDir: sconfig.HypervisorConfig.FileBackedMemRootDir,
FileBackedMemRootList: sconfig.HypervisorConfig.FileBackedMemRootList,
DisableNestingChecks: sconfig.HypervisorConfig.DisableNestingChecks,
DisableImageNvdimm: sconfig.HypervisorConfig.DisableImageNvdimm,
BootToBeTemplate: sconfig.HypervisorConfig.BootToBeTemplate,
BootFromTemplate: sconfig.HypervisorConfig.BootFromTemplate,
DisableVhostNet: sconfig.HypervisorConfig.DisableVhostNet,
EnableVhostUserStore: sconfig.HypervisorConfig.EnableVhostUserStore,
SeccompSandbox: sconfig.HypervisorConfig.SeccompSandbox,
VhostUserStorePath: sconfig.HypervisorConfig.VhostUserStorePath,
VhostUserStorePathList: sconfig.HypervisorConfig.VhostUserStorePathList,
GuestHookPath: sconfig.HypervisorConfig.GuestHookPath,
VMid: sconfig.HypervisorConfig.VMid,
RxRateLimiterMaxRate: sconfig.HypervisorConfig.RxRateLimiterMaxRate,
TxRateLimiterMaxRate: sconfig.HypervisorConfig.TxRateLimiterMaxRate,
SGXEPCSize: sconfig.HypervisorConfig.SGXEPCSize,
EnableAnnotations: sconfig.HypervisorConfig.EnableAnnotations,
NumVCPUsF: sconfig.HypervisorConfig.NumVCPUsF,
DefaultMaxVCPUs: sconfig.HypervisorConfig.DefaultMaxVCPUs,
MemorySize: sconfig.HypervisorConfig.MemorySize,
DefaultBridges: sconfig.HypervisorConfig.DefaultBridges,
Msize9p: sconfig.HypervisorConfig.Msize9p,
MemSlots: sconfig.HypervisorConfig.MemSlots,
MemOffset: sconfig.HypervisorConfig.MemOffset,
VirtioMem: sconfig.HypervisorConfig.VirtioMem,
VirtioFSCacheSize: sconfig.HypervisorConfig.VirtioFSCacheSize,
KernelPath: sconfig.HypervisorConfig.KernelPath,
ImagePath: sconfig.HypervisorConfig.ImagePath,
InitrdPath: sconfig.HypervisorConfig.InitrdPath,
FirmwarePath: sconfig.HypervisorConfig.FirmwarePath,
MachineAccelerators: sconfig.HypervisorConfig.MachineAccelerators,
CPUFeatures: sconfig.HypervisorConfig.CPUFeatures,
HypervisorPath: sconfig.HypervisorConfig.HypervisorPath,
HypervisorPathList: sconfig.HypervisorConfig.HypervisorPathList,
JailerPath: sconfig.HypervisorConfig.JailerPath,
JailerPathList: sconfig.HypervisorConfig.JailerPathList,
BlockDeviceDriver: sconfig.HypervisorConfig.BlockDeviceDriver,
HypervisorMachineType: sconfig.HypervisorConfig.HypervisorMachineType,
MemoryPath: sconfig.HypervisorConfig.MemoryPath,
DevicesStatePath: sconfig.HypervisorConfig.DevicesStatePath,
EntropySource: sconfig.HypervisorConfig.EntropySource,
EntropySourceList: sconfig.HypervisorConfig.EntropySourceList,
SharedFS: sconfig.HypervisorConfig.SharedFS,
VirtioFSDaemon: sconfig.HypervisorConfig.VirtioFSDaemon,
VirtioFSDaemonList: sconfig.HypervisorConfig.VirtioFSDaemonList,
VirtioFSCache: sconfig.HypervisorConfig.VirtioFSCache,
VirtioFSExtraArgs: sconfig.HypervisorConfig.VirtioFSExtraArgs[:],
BlockDeviceCacheSet: sconfig.HypervisorConfig.BlockDeviceCacheSet,
BlockDeviceCacheDirect: sconfig.HypervisorConfig.BlockDeviceCacheDirect,
BlockDeviceCacheNoflush: sconfig.HypervisorConfig.BlockDeviceCacheNoflush,
BlockDeviceLogicalSectorSize: sconfig.HypervisorConfig.BlockDeviceLogicalSectorSize,
BlockDevicePhysicalSectorSize: sconfig.HypervisorConfig.BlockDevicePhysicalSectorSize,
DisableBlockDeviceUse: sconfig.HypervisorConfig.DisableBlockDeviceUse,
EnableIOThreads: sconfig.HypervisorConfig.EnableIOThreads,
IndepIOThreads: sconfig.HypervisorConfig.IndepIOThreads,
Debug: sconfig.HypervisorConfig.Debug,
MemPrealloc: sconfig.HypervisorConfig.MemPrealloc,
HugePages: sconfig.HypervisorConfig.HugePages,
FileBackedMemRootDir: sconfig.HypervisorConfig.FileBackedMemRootDir,
FileBackedMemRootList: sconfig.HypervisorConfig.FileBackedMemRootList,
DisableNestingChecks: sconfig.HypervisorConfig.DisableNestingChecks,
DisableImageNvdimm: sconfig.HypervisorConfig.DisableImageNvdimm,
BootToBeTemplate: sconfig.HypervisorConfig.BootToBeTemplate,
BootFromTemplate: sconfig.HypervisorConfig.BootFromTemplate,
DisableVhostNet: sconfig.HypervisorConfig.DisableVhostNet,
EnableVhostUserStore: sconfig.HypervisorConfig.EnableVhostUserStore,
SeccompSandbox: sconfig.HypervisorConfig.SeccompSandbox,
VhostUserStorePath: sconfig.HypervisorConfig.VhostUserStorePath,
VhostUserStorePathList: sconfig.HypervisorConfig.VhostUserStorePathList,
GuestHookPath: sconfig.HypervisorConfig.GuestHookPath,
VMid: sconfig.HypervisorConfig.VMid,
RxRateLimiterMaxRate: sconfig.HypervisorConfig.RxRateLimiterMaxRate,
TxRateLimiterMaxRate: sconfig.HypervisorConfig.TxRateLimiterMaxRate,
SGXEPCSize: sconfig.HypervisorConfig.SGXEPCSize,
EnableAnnotations: sconfig.HypervisorConfig.EnableAnnotations,
}
ss.Config.KataAgentConfig = &persistapi.KataAgentConfig{
@@ -441,65 +443,67 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) {
hconf := savedConf.HypervisorConfig
sconfig.HypervisorConfig = HypervisorConfig{
NumVCPUsF: hconf.NumVCPUsF,
DefaultMaxVCPUs: hconf.DefaultMaxVCPUs,
MemorySize: hconf.MemorySize,
DefaultBridges: hconf.DefaultBridges,
Msize9p: hconf.Msize9p,
MemSlots: hconf.MemSlots,
MemOffset: hconf.MemOffset,
VirtioMem: hconf.VirtioMem,
VirtioFSCacheSize: hconf.VirtioFSCacheSize,
KernelPath: hconf.KernelPath,
ImagePath: hconf.ImagePath,
InitrdPath: hconf.InitrdPath,
FirmwarePath: hconf.FirmwarePath,
MachineAccelerators: hconf.MachineAccelerators,
CPUFeatures: hconf.CPUFeatures,
HypervisorPath: hconf.HypervisorPath,
HypervisorPathList: hconf.HypervisorPathList,
JailerPath: hconf.JailerPath,
JailerPathList: hconf.JailerPathList,
BlockDeviceDriver: hconf.BlockDeviceDriver,
HypervisorMachineType: hconf.HypervisorMachineType,
MemoryPath: hconf.MemoryPath,
DevicesStatePath: hconf.DevicesStatePath,
EntropySource: hconf.EntropySource,
EntropySourceList: hconf.EntropySourceList,
SharedFS: hconf.SharedFS,
VirtioFSDaemon: hconf.VirtioFSDaemon,
VirtioFSDaemonList: hconf.VirtioFSDaemonList,
VirtioFSCache: hconf.VirtioFSCache,
VirtioFSExtraArgs: hconf.VirtioFSExtraArgs[:],
BlockDeviceCacheSet: hconf.BlockDeviceCacheSet,
BlockDeviceCacheDirect: hconf.BlockDeviceCacheDirect,
BlockDeviceCacheNoflush: hconf.BlockDeviceCacheNoflush,
DisableBlockDeviceUse: hconf.DisableBlockDeviceUse,
EnableIOThreads: hconf.EnableIOThreads,
IndepIOThreads: hconf.IndepIOThreads,
Debug: hconf.Debug,
MemPrealloc: hconf.MemPrealloc,
HugePages: hconf.HugePages,
FileBackedMemRootDir: hconf.FileBackedMemRootDir,
FileBackedMemRootList: hconf.FileBackedMemRootList,
DisableNestingChecks: hconf.DisableNestingChecks,
DisableImageNvdimm: hconf.DisableImageNvdimm,
HotPlugVFIO: hconf.HotPlugVFIO,
ColdPlugVFIO: hconf.ColdPlugVFIO,
PCIeRootPort: hconf.PCIeRootPort,
PCIeSwitchPort: hconf.PCIeSwitchPort,
BootToBeTemplate: hconf.BootToBeTemplate,
BootFromTemplate: hconf.BootFromTemplate,
DisableVhostNet: hconf.DisableVhostNet,
EnableVhostUserStore: hconf.EnableVhostUserStore,
VhostUserStorePath: hconf.VhostUserStorePath,
VhostUserStorePathList: hconf.VhostUserStorePathList,
GuestHookPath: hconf.GuestHookPath,
VMid: hconf.VMid,
RxRateLimiterMaxRate: hconf.RxRateLimiterMaxRate,
TxRateLimiterMaxRate: hconf.TxRateLimiterMaxRate,
SGXEPCSize: hconf.SGXEPCSize,
EnableAnnotations: hconf.EnableAnnotations,
NumVCPUsF: hconf.NumVCPUsF,
DefaultMaxVCPUs: hconf.DefaultMaxVCPUs,
MemorySize: hconf.MemorySize,
DefaultBridges: hconf.DefaultBridges,
Msize9p: hconf.Msize9p,
MemSlots: hconf.MemSlots,
MemOffset: hconf.MemOffset,
VirtioMem: hconf.VirtioMem,
VirtioFSCacheSize: hconf.VirtioFSCacheSize,
KernelPath: hconf.KernelPath,
ImagePath: hconf.ImagePath,
InitrdPath: hconf.InitrdPath,
FirmwarePath: hconf.FirmwarePath,
MachineAccelerators: hconf.MachineAccelerators,
CPUFeatures: hconf.CPUFeatures,
HypervisorPath: hconf.HypervisorPath,
HypervisorPathList: hconf.HypervisorPathList,
JailerPath: hconf.JailerPath,
JailerPathList: hconf.JailerPathList,
BlockDeviceDriver: hconf.BlockDeviceDriver,
HypervisorMachineType: hconf.HypervisorMachineType,
MemoryPath: hconf.MemoryPath,
DevicesStatePath: hconf.DevicesStatePath,
EntropySource: hconf.EntropySource,
EntropySourceList: hconf.EntropySourceList,
SharedFS: hconf.SharedFS,
VirtioFSDaemon: hconf.VirtioFSDaemon,
VirtioFSDaemonList: hconf.VirtioFSDaemonList,
VirtioFSCache: hconf.VirtioFSCache,
VirtioFSExtraArgs: hconf.VirtioFSExtraArgs[:],
BlockDeviceCacheSet: hconf.BlockDeviceCacheSet,
BlockDeviceCacheDirect: hconf.BlockDeviceCacheDirect,
BlockDeviceCacheNoflush: hconf.BlockDeviceCacheNoflush,
BlockDeviceLogicalSectorSize: hconf.BlockDeviceLogicalSectorSize,
BlockDevicePhysicalSectorSize: hconf.BlockDevicePhysicalSectorSize,
DisableBlockDeviceUse: hconf.DisableBlockDeviceUse,
EnableIOThreads: hconf.EnableIOThreads,
IndepIOThreads: hconf.IndepIOThreads,
Debug: hconf.Debug,
MemPrealloc: hconf.MemPrealloc,
HugePages: hconf.HugePages,
FileBackedMemRootDir: hconf.FileBackedMemRootDir,
FileBackedMemRootList: hconf.FileBackedMemRootList,
DisableNestingChecks: hconf.DisableNestingChecks,
DisableImageNvdimm: hconf.DisableImageNvdimm,
HotPlugVFIO: hconf.HotPlugVFIO,
ColdPlugVFIO: hconf.ColdPlugVFIO,
PCIeRootPort: hconf.PCIeRootPort,
PCIeSwitchPort: hconf.PCIeSwitchPort,
BootToBeTemplate: hconf.BootToBeTemplate,
BootFromTemplate: hconf.BootFromTemplate,
DisableVhostNet: hconf.DisableVhostNet,
EnableVhostUserStore: hconf.EnableVhostUserStore,
VhostUserStorePath: hconf.VhostUserStorePath,
VhostUserStorePathList: hconf.VhostUserStorePathList,
GuestHookPath: hconf.GuestHookPath,
VMid: hconf.VMid,
RxRateLimiterMaxRate: hconf.RxRateLimiterMaxRate,
TxRateLimiterMaxRate: hconf.TxRateLimiterMaxRate,
SGXEPCSize: hconf.SGXEPCSize,
EnableAnnotations: hconf.EnableAnnotations,
}
sconfig.AgentConfig = KataAgentConfig{

View File

@@ -157,6 +157,14 @@ type HypervisorConfig struct {
// Denotes whether flush requests for the device are ignored.
BlockDeviceCacheNoflush bool
// BlockDeviceLogicalSectorSize specifies the logical sector size reported
// by block devices to the guest, in bytes.
BlockDeviceLogicalSectorSize uint32
// BlockDevicePhysicalSectorSize specifies the physical sector size reported
// by block devices to the guest, in bytes.
BlockDevicePhysicalSectorSize uint32
// DisableBlockDeviceUse disallows a block device from being used.
DisableBlockDeviceUse bool

View File

@@ -242,6 +242,21 @@ const (
// Denotes whether flush requests for the device are ignored.
BlockDeviceCacheNoflush = kataAnnotHypervisorPrefix + "block_device_cache_noflush"
// BlockDeviceLogicalSectorSize is a sandbox annotation that specifies the logical sector size
// reported by block devices to the guest, in bytes. Common values are 512 and 4096.
// Set to 0 to use the hypervisor default.
// NOTE: the annotation key uses the abbreviated "blk_logical_sector_size" rather than
// "block_device_logical_sector_size" (as used in the config file) because Kubernetes
// enforces a 63-character limit on annotation name segments, and the full name with the
// "io.katacontainers.config.hypervisor." prefix would exceed that limit.
BlockDeviceLogicalSectorSize = kataAnnotHypervisorPrefix + "blk_logical_sector_size"
// BlockDevicePhysicalSectorSize is a sandbox annotation that specifies the physical sector size
// reported by block devices to the guest, in bytes. Common values are 512 and 4096.
// Set to 0 to use the hypervisor default.
// NOTE: see BlockDeviceLogicalSectorSize for the reason the annotation key is abbreviated.
BlockDevicePhysicalSectorSize = kataAnnotHypervisorPrefix + "blk_physical_sector_size"
// RxRateLimiterMaxRate is a sandbox annotation that specifies max rate on network I/O inbound bandwidth.
RxRateLimiterMaxRate = kataAnnotHypervisorPrefix + "rx_rate_limiter_max_rate"

View File

@@ -1686,7 +1686,7 @@ func (q *qemu) hotplugAddBlockDevice(ctx context.Context, drive *config.BlockDri
iothreadID = fmt.Sprintf("%s_%d", indepIOThreadsPrefix, 0)
}
if err = q.qmpMonitorCh.qmp.ExecutePCIDeviceAdd(q.qmpMonitorCh.ctx, drive.ID, devID, driver, addr, bridge.ID, romFile, queues, true, defaultDisableModern, iothreadID); err != nil {
if err = q.qmpMonitorCh.qmp.ExecutePCIDeviceAdd(q.qmpMonitorCh.ctx, drive.ID, devID, driver, addr, bridge.ID, romFile, queues, true, defaultDisableModern, iothreadID, q.config.BlockDeviceLogicalSectorSize, q.config.BlockDevicePhysicalSectorSize); err != nil {
return err
}
case q.config.BlockDeviceDriver == config.VirtioBlockCCW:
@@ -1705,7 +1705,7 @@ func (q *qemu) hotplugAddBlockDevice(ctx context.Context, drive *config.BlockDri
if err != nil {
return err
}
if err = q.qmpMonitorCh.qmp.ExecuteDeviceAdd(q.qmpMonitorCh.ctx, drive.ID, devID, driver, devNoHotplug, "", true, false); err != nil {
if err = q.qmpMonitorCh.qmp.ExecuteDeviceAdd(q.qmpMonitorCh.ctx, drive.ID, devID, driver, devNoHotplug, "", true, false, q.config.BlockDeviceLogicalSectorSize, q.config.BlockDevicePhysicalSectorSize); err != nil {
return err
}
case q.config.BlockDeviceDriver == config.VirtioSCSI:

View File

@@ -905,7 +905,7 @@ func (s *stratovirt) hotplugBlk(ctx context.Context, drive *config.BlockDrive, o
}
devAddr := fmt.Sprintf("%d", slot)
if err := s.qmpMonitorCh.qmp.ExecutePCIDeviceAdd(s.qmpMonitorCh.ctx, drive.ID, drive.ID, driver, devAddr, "", "", 0, false, false, ""); err != nil {
if err := s.qmpMonitorCh.qmp.ExecutePCIDeviceAdd(s.qmpMonitorCh.ctx, drive.ID, drive.ID, driver, devAddr, "", "", 0, false, false, "", s.config.BlockDeviceLogicalSectorSize, s.config.BlockDevicePhysicalSectorSize); err != nil {
return err
}
case RemoveDevice: