From b0157ad73a41b7e23d3c3ca7a761e27eab29ae29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 11 Aug 2023 14:55:11 +0200 Subject: [PATCH 1/4] runtime: confidential: Do not set the max_vcpu to cpu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't have to do this since we're relying on the `static_sandbox_resource_mgmt` feature, which gives us the correct amount of memory and CPUs to be allocated. Signed-off-by: Fabiano Fidêncio --- src/runtime/virtcontainers/hypervisor_config_linux.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/runtime/virtcontainers/hypervisor_config_linux.go b/src/runtime/virtcontainers/hypervisor_config_linux.go index f1f20d3151..570fef6a3a 100644 --- a/src/runtime/virtcontainers/hypervisor_config_linux.go +++ b/src/runtime/virtcontainers/hypervisor_config_linux.go @@ -54,11 +54,6 @@ func validateHypervisorConfig(conf *HypervisorConfig) error { conf.DefaultMaxVCPUs = defaultMaxVCPUs } - if conf.ConfidentialGuest && conf.NumVCPUs != conf.DefaultMaxVCPUs { - hvLogger.Warnf("Confidential guests do not support hotplugging of vCPUs. Setting DefaultMaxVCPUs to NumVCPUs (%d)", conf.NumVCPUs) - conf.DefaultMaxVCPUs = conf.NumVCPUs - } - if conf.Msize9p == 0 && conf.SharedFS != config.VirtioFS { conf.Msize9p = defaultMsize9p } From e477ed0e8642f9b85ce5d2c01c52318e9b2148d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 30 Jun 2023 12:17:51 +0000 Subject: [PATCH 2/4] runtime: Improve vCPU allocation for the VMMs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First of all, this is a controversial piece, and I know that. In this commit we're trying to make a less greedy approach regards the amount of vCPUs we allocate for the VMM, which will be advantageous mainly when using the `static_sandbox_resource_mgmt` feature, which is used by the confidential guests. The current approach we have basically does: * Gets the amount of vCPUs set in the config (an integer) * Gets the amount of vCPUs set as limit (an integer) * Sum those up * Starts / Updates the VMM to use that total amount of vCPUs The fact we're dealing with integers is logical, as we cannot request 500m vCPUs to the VMMs. However, it leads us to, in several cases, be wasting one vCPU. Let's take the example that we know the VMM requires 500m vCPUs to be running, and the workload sets 250m vCPUs as a resource limit. In that case, we'd do: * Gets the amount of vCPUs set in the config: 1 * Gets the amount of vCPUs set as limit: ceil(0.25) * 1 + ceil(0.25) = 1 + 1 = 2 vCPUs * Starts / Updates the VMM to use 2 vCPUs With the logic changed here, what we're doing is considering everything as float till just before we start / update the VMM. So, the flow describe above would be: * Gets the amount of vCPUs set in the config: 0.5 * Gets the amount of vCPUs set as limit: 0.25 * ceil(0.5 + 0.25) = 1 vCPUs * Starts / Updates the VMM to use 1 vCPUs In the way I've written this patch we introduce zero regressions, as the default values set are still the same, and those will only be changed for the TEE use cases (although I can see firecracker, or any other user of `static_sandbox_resource_mgmt=true` taking advantage of this). There's, though, an implicit assumption in this patch that we'd need to make explicit, and that's that the default_vcpus / default_memory is the amount of vcpus / memory required by the VMM, and absolutely nothing else. Also, the amount set there should be reflected in the podOverhead for the specific runtime class. One other possible approach, which I am not that much in favour of taking as I think it's **less clear**, is that we could actually get the podOverhead amount, subtract it from the default_vcpus (treating the result as a float), then sum up what the user set as limit (as a float), and finally ceil the result. It could work, but IMHO this is **less clear**, and **less explicit** on what we're actually doing, and how the default_vcpus / default_memory should be used. Fixes: #6909 Signed-off-by: Fabiano Fidêncio Signed-off-by: Christophe de Dinechin --- docs/how-to/how-to-set-sandbox-config-kata.md | 2 +- src/runtime/cmd/kata-runtime/kata-env_test.go | 2 +- src/runtime/pkg/katautils/config.go | 24 ++++++------ src/runtime/pkg/katautils/config_test.go | 22 +++++------ src/runtime/pkg/oci/utils.go | 39 +++++++++++++------ src/runtime/pkg/oci/utils_test.go | 6 +-- src/runtime/virtcontainers/acrn_test.go | 2 +- src/runtime/virtcontainers/clh.go | 4 +- src/runtime/virtcontainers/clh_test.go | 2 +- .../virtcontainers/factory/factory_linux.go | 6 +-- .../virtcontainers/factory/factory_test.go | 6 +-- src/runtime/virtcontainers/fc.go | 2 +- src/runtime/virtcontainers/hypervisor.go | 13 ++++++- .../hypervisor_config_darwin.go | 2 +- .../virtcontainers/hypervisor_config_linux.go | 4 +- .../hypervisor_config_linux_test.go | 2 +- .../virtcontainers/macvtap_endpoint.go | 4 +- src/runtime/virtcontainers/network_linux.go | 2 +- src/runtime/virtcontainers/persist.go | 4 +- .../virtcontainers/persist/api/config.go | 2 +- src/runtime/virtcontainers/qemu.go | 17 ++++---- src/runtime/virtcontainers/qemu_test.go | 6 +-- src/runtime/virtcontainers/sandbox.go | 17 ++++---- src/runtime/virtcontainers/sandbox_test.go | 2 +- src/runtime/virtcontainers/tap_endpoint.go | 2 +- src/runtime/virtcontainers/tuntap_endpoint.go | 2 +- src/runtime/virtcontainers/utils/utils.go | 11 +----- .../virtcontainers/utils/utils_test.go | 13 +++---- src/runtime/virtcontainers/vm.go | 2 +- 29 files changed, 120 insertions(+), 102 deletions(-) diff --git a/docs/how-to/how-to-set-sandbox-config-kata.md b/docs/how-to/how-to-set-sandbox-config-kata.md index b8ac511cd4..6540febbf5 100644 --- a/docs/how-to/how-to-set-sandbox-config-kata.md +++ b/docs/how-to/how-to-set-sandbox-config-kata.md @@ -47,7 +47,7 @@ There are several kinds of Kata configurations and they are listed below. | `io.katacontainers.config.hypervisor.ctlpath` (R) | `string` | Path to the `acrnctl` binary for the ACRN hypervisor | | `io.katacontainers.config.hypervisor.default_max_vcpus` | uint32| the maximum number of vCPUs allocated for the VM by the hypervisor | | `io.katacontainers.config.hypervisor.default_memory` | uint32| the memory assigned for a VM by the hypervisor in `MiB` | -| `io.katacontainers.config.hypervisor.default_vcpus` | uint32| the default vCPUs assigned for a VM by the hypervisor | +| `io.katacontainers.config.hypervisor.default_vcpus` | float32| the default vCPUs assigned for a VM by the hypervisor | | `io.katacontainers.config.hypervisor.disable_block_device_use` | `boolean` | disallow a block device from being used | | `io.katacontainers.config.hypervisor.disable_image_nvdimm` | `boolean` | specify if a `nvdimm` device should be used as rootfs for the guest (QEMU) | | `io.katacontainers.config.hypervisor.disable_vhost_net` | `boolean` | specify if `vhost-net` is not available on the host | diff --git a/src/runtime/cmd/kata-runtime/kata-env_test.go b/src/runtime/cmd/kata-runtime/kata-env_test.go index 5bf2c5a880..5c0070041e 100644 --- a/src/runtime/cmd/kata-runtime/kata-env_test.go +++ b/src/runtime/cmd/kata-runtime/kata-env_test.go @@ -134,7 +134,7 @@ func makeRuntimeConfig(prefixDir string) (configFile string, ociConfig oci.Runti HotPlugVFIO: hotPlugVFIO, ColdPlugVFIO: coldPlugVFIO, DisableNewNetNs: disableNewNetNs, - DefaultVCPUCount: hypConfig.NumVCPUs, + DefaultVCPUCount: hypConfig.NumVCPUs(), DefaultMaxVCPUCount: hypConfig.DefaultMaxVCPUs, DefaultMemSize: hypConfig.MemorySize, DefaultMsize9p: hypConfig.Msize9p, diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index ed9ddbf184..1afec55d50 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -133,7 +133,7 @@ type hypervisor struct { MemSlots uint32 `toml:"memory_slots"` DefaultBridges uint32 `toml:"default_bridges"` Msize9p uint32 `toml:"msize_9p"` - NumVCPUs int32 `toml:"default_vcpus"` + NumVCPUs float32 `toml:"default_vcpus"` BlockDeviceCacheSet bool `toml:"block_device_cache_set"` BlockDeviceCacheDirect bool `toml:"block_device_cache_direct"` BlockDeviceCacheNoflush bool `toml:"block_device_cache_noflush"` @@ -395,17 +395,17 @@ func getCurrentCpuNum() uint32 { return cpu } -func (h hypervisor) defaultVCPUs() uint32 { - numCPUs := getCurrentCpuNum() +func (h hypervisor) defaultVCPUs() float32 { + numCPUs := float32(getCurrentCpuNum()) - if h.NumVCPUs < 0 || h.NumVCPUs > int32(numCPUs) { + if h.NumVCPUs < 0 || h.NumVCPUs > numCPUs { return numCPUs } if h.NumVCPUs == 0 { // or unspecified - return defaultVCPUCount + return float32(defaultVCPUCount) } - return uint32(h.NumVCPUs) + return h.NumVCPUs } func (h hypervisor) defaultMaxVCPUs() uint32 { @@ -723,7 +723,7 @@ func newFirecrackerHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { RootfsType: rootfsType, FirmwarePath: firmware, KernelParams: vc.DeserializeParams(vc.KernelParamFields(kernelParams)), - NumVCPUs: h.defaultVCPUs(), + NumVCPUsF: h.defaultVCPUs(), DefaultMaxVCPUs: h.defaultMaxVCPUs(), MemorySize: h.defaultMemSz(), MemSlots: h.defaultMemSlots(), @@ -857,7 +857,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { CPUFeatures: cpuFeatures, KernelParams: vc.DeserializeParams(vc.KernelParamFields(kernelParams)), HypervisorMachineType: machineType, - NumVCPUs: h.defaultVCPUs(), + NumVCPUsF: h.defaultVCPUs(), DefaultMaxVCPUs: h.defaultMaxVCPUs(), MemorySize: h.defaultMemSz(), MemSlots: h.defaultMemSlots(), @@ -968,7 +968,7 @@ func newAcrnHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { HypervisorCtlPathList: h.CtlPathList, FirmwarePath: firmware, KernelParams: vc.DeserializeParams(vc.KernelParamFields(kernelParams)), - NumVCPUs: h.defaultVCPUs(), + NumVCPUsF: h.defaultVCPUs(), DefaultMaxVCPUs: h.defaultMaxVCPUs(), MemorySize: h.defaultMemSz(), MemSlots: h.defaultMemSlots(), @@ -1059,7 +1059,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { MachineAccelerators: machineAccelerators, KernelParams: vc.DeserializeParams(vc.KernelParamFields(kernelParams)), HypervisorMachineType: machineType, - NumVCPUs: h.defaultVCPUs(), + NumVCPUsF: h.defaultVCPUs(), DefaultMaxVCPUs: h.defaultMaxVCPUs(), MemorySize: h.defaultMemSz(), MemSlots: h.defaultMemSlots(), @@ -1132,7 +1132,7 @@ func newDragonballHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { ImagePath: image, RootfsType: rootfsType, KernelParams: vc.DeserializeParams(vc.KernelParamFields(kernelParams)), - NumVCPUs: h.defaultVCPUs(), + NumVCPUsF: h.defaultVCPUs(), DefaultMaxVCPUs: h.defaultMaxVCPUs(), MemorySize: h.defaultMemSz(), MemSlots: h.defaultMemSlots(), @@ -1297,7 +1297,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig { MachineAccelerators: defaultMachineAccelerators, CPUFeatures: defaultCPUFeatures, HypervisorMachineType: defaultMachineType, - NumVCPUs: defaultVCPUCount, + NumVCPUsF: float32(defaultVCPUCount), DefaultMaxVCPUs: defaultMaxVCPUCount, MemorySize: defaultMemSize, MemOffset: defaultMemOffset, diff --git a/src/runtime/pkg/katautils/config_test.go b/src/runtime/pkg/katautils/config_test.go index 56fee45b23..39ccb49f2b 100644 --- a/src/runtime/pkg/katautils/config_test.go +++ b/src/runtime/pkg/katautils/config_test.go @@ -161,7 +161,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (testConfig testRuntime RootfsType: rootfsType, KernelParams: vc.DeserializeParams(vc.KernelParamFields(kernelParams)), HypervisorMachineType: machineType, - NumVCPUs: defaultVCPUCount, + NumVCPUsF: float32(defaultVCPUCount), DefaultMaxVCPUs: getCurrentCpuNum(), MemorySize: defaultMemSize, DefaultMaxMemorySize: maxMemory, @@ -554,7 +554,7 @@ func TestMinimalRuntimeConfig(t *testing.T) { InitrdPath: defaultInitrdPath, RootfsType: defaultRootfsType, HypervisorMachineType: defaultMachineType, - NumVCPUs: defaultVCPUCount, + NumVCPUsF: float32(defaultVCPUCount), DefaultMaxVCPUs: defaultMaxVCPUCount, MemorySize: defaultMemSize, DisableBlockDeviceUse: defaultDisableBlockDeviceUse, @@ -926,7 +926,7 @@ func TestHypervisorDefaults(t *testing.T) { h := hypervisor{} assert.Equal(h.machineType(), defaultMachineType, "default hypervisor machine type wrong") - assert.Equal(h.defaultVCPUs(), defaultVCPUCount, "default vCPU number is wrong") + assert.Equal(h.defaultVCPUs(), float32(defaultVCPUCount), "default vCPU number is wrong") assert.Equal(h.defaultMaxVCPUs(), numCPUs, "default max vCPU number is wrong") assert.Equal(h.defaultMemSz(), defaultMemSize, "default memory size is wrong") @@ -936,13 +936,13 @@ func TestHypervisorDefaults(t *testing.T) { // auto inferring h.NumVCPUs = -1 - assert.Equal(h.defaultVCPUs(), numCPUs, "default vCPU number is wrong") + assert.Equal(h.defaultVCPUs(), float32(numCPUs), "default vCPU number is wrong") h.NumVCPUs = 2 - assert.Equal(h.defaultVCPUs(), uint32(2), "default vCPU number is wrong") + assert.Equal(h.defaultVCPUs(), float32(2), "default vCPU number is wrong") - h.NumVCPUs = int32(numCPUs) + 1 - assert.Equal(h.defaultVCPUs(), numCPUs, "default vCPU number is wrong") + h.NumVCPUs = float32(numCPUs + 1) + assert.Equal(h.defaultVCPUs(), float32(numCPUs), "default vCPU number is wrong") h.DefaultMaxVCPUs = 2 assert.Equal(h.defaultMaxVCPUs(), uint32(2), "default max vCPU number is wrong") @@ -1395,7 +1395,7 @@ func TestDefaultCPUFeatures(t *testing.T) { func TestUpdateRuntimeConfigurationVMConfig(t *testing.T) { assert := assert.New(t) - vcpus := uint(2) + vcpus := float32(2) mem := uint32(2048) config := oci.RuntimeConfig{} @@ -1404,7 +1404,7 @@ func TestUpdateRuntimeConfigurationVMConfig(t *testing.T) { tomlConf := tomlConfig{ Hypervisor: map[string]hypervisor{ qemuHypervisorTableType: { - NumVCPUs: int32(vcpus), + NumVCPUs: vcpus, MemorySize: mem, Path: "/", Kernel: "/", @@ -1727,7 +1727,7 @@ vfio_mode="vfio" assert.NoError(t, err) assert.Equal(t, config.Hypervisor["qemu"].Path, "/usr/bin/qemu-kvm") - assert.Equal(t, config.Hypervisor["qemu"].NumVCPUs, int32(2)) + assert.Equal(t, config.Hypervisor["qemu"].NumVCPUs, float32(2)) assert.Equal(t, config.Hypervisor["qemu"].DefaultBridges, uint32(4)) assert.Equal(t, config.Hypervisor["qemu"].SharedFS, "virtio-9p") assert.Equal(t, config.Runtime.Debug, true) @@ -1765,7 +1765,7 @@ func TestUpdateRuntimeConfigHypervisor(t *testing.T) { tomlConf := tomlConfig{ Hypervisor: map[string]hypervisor{ h.name: { - NumVCPUs: int32(2), + NumVCPUs: float32(2), MemorySize: uint32(2048), Path: "/", Kernel: "/", diff --git a/src/runtime/pkg/oci/utils.go b/src/runtime/pkg/oci/utils.go index 21864d94bd..21590749f0 100644 --- a/src/runtime/pkg/oci/utils.go +++ b/src/runtime/pkg/oci/utils.go @@ -12,6 +12,7 @@ import ( "encoding/json" "errors" "fmt" + "math" "path/filepath" "regexp" goruntime "runtime" @@ -137,7 +138,7 @@ type RuntimeConfig struct { // Sandbox sizing information which, if provided, indicates the size of // the sandbox needed for the workload(s) - SandboxCPUs uint32 + SandboxCPUs float32 SandboxMemMB uint32 // Determines if we should attempt to size the VM at boot time and skip @@ -683,11 +684,11 @@ func addHypervisorMemoryOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig func addHypervisorCPUOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) error { numCPUs := goruntime.NumCPU() - if err := newAnnotationConfiguration(ocispec, vcAnnotations.DefaultVCPUs).setUintWithCheck(func(vcpus uint64) error { - if uint32(vcpus) > uint32(numCPUs) { - return fmt.Errorf("Number of cpus %d specified in annotation default_vcpus is greater than the number of CPUs %d on the system", vcpus, numCPUs) + if err := newAnnotationConfiguration(ocispec, vcAnnotations.DefaultVCPUs).setFloat32WithCheck(func(vcpus float32) error { + if vcpus > float32(numCPUs) { + return fmt.Errorf("Number of cpus %f specified in annotation default_vcpus is greater than the number of CPUs %d on the system", vcpus, numCPUs) } - sbConfig.HypervisorConfig.NumVCPUs = uint32(vcpus) + sbConfig.HypervisorConfig.NumVCPUsF = float32(vcpus) return nil }); err != nil { return err @@ -1016,10 +1017,10 @@ func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid st // with the base number of CPU/memory (which is equal to the default CPU/memory specified for the runtime // configuration or annotations) as well as any specified workload resources. if sandboxConfig.StaticResourceMgmt { - sandboxConfig.SandboxResources.BaseCPUs = sandboxConfig.HypervisorConfig.NumVCPUs + sandboxConfig.SandboxResources.BaseCPUs = sandboxConfig.HypervisorConfig.NumVCPUsF sandboxConfig.SandboxResources.BaseMemMB = sandboxConfig.HypervisorConfig.MemorySize - sandboxConfig.HypervisorConfig.NumVCPUs += sandboxConfig.SandboxResources.WorkloadCPUs + sandboxConfig.HypervisorConfig.NumVCPUsF += sandboxConfig.SandboxResources.WorkloadCPUs sandboxConfig.HypervisorConfig.MemorySize += sandboxConfig.SandboxResources.WorkloadMemMB ociLog.WithFields(logrus.Fields{ @@ -1140,6 +1141,7 @@ func IsCRIOContainerManager(spec *specs.Spec) bool { const ( errAnnotationPositiveNumericKey = "Error parsing annotation for %s: Please specify positive numeric value" errAnnotationBoolKey = "Error parsing annotation for %s: Please specify boolean value 'true|false'" + errAnnotationNumericKeyIsTooBig = "Error parsing annotation for %s: The number exceeds the maximum allowed for its type" ) type annotationConfiguration struct { @@ -1183,9 +1185,24 @@ func (a *annotationConfiguration) setUintWithCheck(f func(uint64) error) error { return nil } +func (a *annotationConfiguration) setFloat32WithCheck(f func(float32) error) error { + if value, ok := a.ocispec.Annotations[a.key]; ok { + float64Value, err := strconv.ParseFloat(value, 32) + if err != nil || float64Value < 0 { + return fmt.Errorf(errAnnotationPositiveNumericKey, a.key) + } + if float64Value > math.MaxFloat32 { + return fmt.Errorf(errAnnotationNumericKeyIsTooBig, a.key) + } + float32Value := float32(float64Value) + return f(float32Value) + } + return nil +} + // CalculateSandboxSizing will calculate the number of CPUs and amount of Memory that should // be added to the VM if sandbox annotations are provided with this sizing details -func CalculateSandboxSizing(spec *specs.Spec) (numCPU, memSizeMB uint32) { +func CalculateSandboxSizing(spec *specs.Spec) (numCPU float32, memSizeMB uint32) { var memory, quota int64 var period uint64 var err error @@ -1232,7 +1249,7 @@ func CalculateSandboxSizing(spec *specs.Spec) (numCPU, memSizeMB uint32) { // CalculateContainerSizing will calculate the number of CPUs and amount of memory that is needed // based on the provided LinuxResources -func CalculateContainerSizing(spec *specs.Spec) (numCPU, memSizeMB uint32) { +func CalculateContainerSizing(spec *specs.Spec) (numCPU float32, memSizeMB uint32) { var memory, quota int64 var period uint64 @@ -1254,8 +1271,8 @@ func CalculateContainerSizing(spec *specs.Spec) (numCPU, memSizeMB uint32) { return calculateVMResources(period, quota, memory) } -func calculateVMResources(period uint64, quota int64, memory int64) (numCPU, memSizeMB uint32) { - numCPU = vcutils.CalculateVCpusFromMilliCpus(vcutils.CalculateMilliCPUs(quota, period)) +func calculateVMResources(period uint64, quota int64, memory int64) (numCPU float32, memSizeMB uint32) { + numCPU = vcutils.CalculateMilliCPUs(quota, period) if memory < 0 { // While spec allows for a negative value to indicate unconstrained, we don't diff --git a/src/runtime/pkg/oci/utils_test.go b/src/runtime/pkg/oci/utils_test.go index 4eeaedd115..778db87b55 100644 --- a/src/runtime/pkg/oci/utils_test.go +++ b/src/runtime/pkg/oci/utils_test.go @@ -671,7 +671,7 @@ func TestAddHypervisorAnnotations(t *testing.T) { err := addAnnotations(ocispec, &sbConfig, runtimeConfig) assert.NoError(err) - assert.Equal(sbConfig.HypervisorConfig.NumVCPUs, uint32(1)) + assert.Equal(sbConfig.HypervisorConfig.NumVCPUsF, float32(1)) assert.Equal(sbConfig.HypervisorConfig.DefaultMaxVCPUs, uint32(1)) assert.Equal(sbConfig.HypervisorConfig.MemorySize, uint32(1024)) assert.Equal(sbConfig.HypervisorConfig.MemSlots, uint32(20)) @@ -1087,7 +1087,7 @@ func TestCalculateContainerSizing(t *testing.T) { testCases := []struct { spec *specs.Spec - expectedCPU uint32 + expectedCPU float32 expectedMem uint32 }{ { @@ -1152,7 +1152,7 @@ func TestCalculateSandboxSizing(t *testing.T) { testCases := []struct { spec *specs.Spec - expectedCPU uint32 + expectedCPU float32 expectedMem uint32 }{ { diff --git a/src/runtime/virtcontainers/acrn_test.go b/src/runtime/virtcontainers/acrn_test.go index 8c7e646c13..f7ac655a78 100644 --- a/src/runtime/virtcontainers/acrn_test.go +++ b/src/runtime/virtcontainers/acrn_test.go @@ -25,7 +25,7 @@ func newAcrnConfig() HypervisorConfig { ImagePath: testAcrnImagePath, HypervisorPath: testAcrnPath, HypervisorCtlPath: testAcrnCtlPath, - NumVCPUs: defaultVCPUs, + NumVCPUsF: defaultVCPUs, MemorySize: defaultMemSzMiB, BlockDeviceDriver: config.VirtioBlock, DefaultBridges: defaultBridges, diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 9ce491f00b..3a6d5a408a 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -520,7 +520,7 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net clh.vmconfig.Memory.HotplugSize = func(i int64) *int64 { return &i }(int64((utils.MemUnit(hotplugSize) * utils.MiB).ToBytes())) } // Set initial amount of cpu's for the virtual machine - clh.vmconfig.Cpus = chclient.NewCpusConfig(int32(clh.config.NumVCPUs), int32(clh.config.DefaultMaxVCPUs)) + clh.vmconfig.Cpus = chclient.NewCpusConfig(int32(clh.config.NumVCPUs()), int32(clh.config.DefaultMaxVCPUs)) params, err := GetKernelRootParams(hypervisorConfig.RootfsType, clh.config.ConfidentialGuest, false) if err != nil { @@ -855,7 +855,7 @@ func (clh *cloudHypervisor) hotplugAddBlockDevice(drive *config.BlockDrive) erro clhDisk.Direct = &clh.config.BlockDeviceCacheDirect } - queues := int32(clh.config.NumVCPUs) + queues := int32(clh.config.NumVCPUs()) queueSize := int32(1024) clhDisk.NumQueues = &queues clhDisk.QueueSize = &queueSize diff --git a/src/runtime/virtcontainers/clh_test.go b/src/runtime/virtcontainers/clh_test.go index cc559f4414..41bd71650a 100644 --- a/src/runtime/virtcontainers/clh_test.go +++ b/src/runtime/virtcontainers/clh_test.go @@ -56,7 +56,7 @@ func newClhConfig() (HypervisorConfig, error) { ImagePath: testClhImagePath, RootfsType: string(EXT4), HypervisorPath: testClhPath, - NumVCPUs: defaultVCPUs, + NumVCPUsF: defaultVCPUs, BlockDeviceDriver: config.VirtioBlock, MemorySize: defaultMemSzMiB, DefaultBridges: defaultBridges, diff --git a/src/runtime/virtcontainers/factory/factory_linux.go b/src/runtime/virtcontainers/factory/factory_linux.go index 86a384d121..4a0cfcfe38 100644 --- a/src/runtime/virtcontainers/factory/factory_linux.go +++ b/src/runtime/virtcontainers/factory/factory_linux.go @@ -71,7 +71,7 @@ func NewFactory(ctx context.Context, config Config, fetchOnly bool) (vc.Factory, } func resetHypervisorConfig(config *vc.VMConfig) { - config.HypervisorConfig.NumVCPUs = 0 + config.HypervisorConfig.NumVCPUsF = 0 config.HypervisorConfig.MemorySize = 0 config.HypervisorConfig.BootToBeTemplate = false config.HypervisorConfig.BootFromTemplate = false @@ -156,8 +156,8 @@ func (f *factory) GetVM(ctx context.Context, config vc.VMConfig) (*vc.VM, error) online := false baseConfig := f.base.Config().HypervisorConfig - if baseConfig.NumVCPUs < hypervisorConfig.NumVCPUs { - err = vm.AddCPUs(ctx, hypervisorConfig.NumVCPUs-baseConfig.NumVCPUs) + if baseConfig.NumVCPUsF < hypervisorConfig.NumVCPUsF { + err = vm.AddCPUs(ctx, hypervisorConfig.NumVCPUs()-baseConfig.NumVCPUs()) if err != nil { return nil, err } diff --git a/src/runtime/virtcontainers/factory/factory_test.go b/src/runtime/virtcontainers/factory/factory_test.go index e7c40046b5..850eea0bfd 100644 --- a/src/runtime/virtcontainers/factory/factory_test.go +++ b/src/runtime/virtcontainers/factory/factory_test.go @@ -246,7 +246,7 @@ func TestFactoryGetVM(t *testing.T) { assert.Nil(err) // CPU hotplug - vmConfig.HypervisorConfig.NumVCPUs++ + vmConfig.HypervisorConfig.NumVCPUsF++ vm, err = f.GetVM(ctx, vmConfig) assert.Nil(err) @@ -278,9 +278,9 @@ func TestDeepCompare(t *testing.T) { bar := vc.VMConfig{} assert.True(utils.DeepCompare(foo, bar)) - foo.HypervisorConfig.NumVCPUs = 1 + foo.HypervisorConfig.NumVCPUsF = 1 assert.False(utils.DeepCompare(foo, bar)) - bar.HypervisorConfig.NumVCPUs = 1 + bar.HypervisorConfig.NumVCPUsF = 1 assert.True(utils.DeepCompare(foo, bar)) // slice diff --git a/src/runtime/virtcontainers/fc.go b/src/runtime/virtcontainers/fc.go index 1ecf366b4f..3442edadbc 100644 --- a/src/runtime/virtcontainers/fc.go +++ b/src/runtime/virtcontainers/fc.go @@ -692,7 +692,7 @@ func (fc *firecracker) fcInitConfiguration(ctx context.Context) error { } fc.fcSetVMBaseConfig(ctx, int64(fc.config.MemorySize), - int64(fc.config.NumVCPUs), false) + int64(fc.config.NumVCPUs()), false) kernelPath, err := fc.config.KernelAssetPath() if err != nil { diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index bb0195351c..63d391336b 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -9,6 +9,7 @@ import ( "bufio" "context" "fmt" + "math" "os" "runtime" "strings" @@ -58,7 +59,7 @@ const ( procCPUInfo = "/proc/cpuinfo" - defaultVCPUs = 1 + defaultVCPUs = float32(1) // 2 GiB defaultMemSzMiB = 2048 @@ -524,7 +525,7 @@ type HypervisorConfig struct { ColdPlugVFIO config.PCIePort // NumVCPUs specifies default number of vCPUs for the VM. - NumVCPUs uint32 + NumVCPUsF float32 //DefaultMaxVCPUs specifies the maximum number of vCPUs for the VM. DefaultMaxVCPUs uint32 @@ -838,6 +839,14 @@ func (conf *HypervisorConfig) FirmwareVolumeAssetPath() (string, error) { return conf.assetPath(types.FirmwareVolumeAsset) } +func RoundUpNumVCPUs(cpus float32) uint32 { + return uint32(math.Ceil(float64(cpus))) +} + +func (conf HypervisorConfig) NumVCPUs() uint32 { + return RoundUpNumVCPUs(conf.NumVCPUsF) +} + func appendParam(params []Param, parameter string, value string) []Param { return append(params, Param{parameter, value}) } diff --git a/src/runtime/virtcontainers/hypervisor_config_darwin.go b/src/runtime/virtcontainers/hypervisor_config_darwin.go index e074d59ea6..a949adf3a7 100644 --- a/src/runtime/virtcontainers/hypervisor_config_darwin.go +++ b/src/runtime/virtcontainers/hypervisor_config_darwin.go @@ -22,7 +22,7 @@ func validateHypervisorConfig(conf *HypervisorConfig) error { } if conf.NumVCPUs == 0 { - conf.NumVCPUs = defaultVCPUs + conf.NumVCPUsF = defaultVCPUs } if conf.MemorySize == 0 { diff --git a/src/runtime/virtcontainers/hypervisor_config_linux.go b/src/runtime/virtcontainers/hypervisor_config_linux.go index 570fef6a3a..8e34f98b5b 100644 --- a/src/runtime/virtcontainers/hypervisor_config_linux.go +++ b/src/runtime/virtcontainers/hypervisor_config_linux.go @@ -32,8 +32,8 @@ func validateHypervisorConfig(conf *HypervisorConfig) error { return err } - if conf.NumVCPUs == 0 { - conf.NumVCPUs = defaultVCPUs + if conf.NumVCPUsF == 0 { + conf.NumVCPUsF = defaultVCPUs } if conf.MemorySize == 0 { diff --git a/src/runtime/virtcontainers/hypervisor_config_linux_test.go b/src/runtime/virtcontainers/hypervisor_config_linux_test.go index 41cabb1c35..6be2cb3dec 100644 --- a/src/runtime/virtcontainers/hypervisor_config_linux_test.go +++ b/src/runtime/virtcontainers/hypervisor_config_linux_test.go @@ -103,7 +103,7 @@ func TestHypervisorConfigDefaults(t *testing.T) { KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), ImagePath: fmt.Sprintf("%s/%s", testDir, testImage), HypervisorPath: "", - NumVCPUs: defaultVCPUs, + NumVCPUsF: defaultVCPUs, MemorySize: defaultMemSzMiB, DefaultBridges: defaultBridges, BlockDeviceDriver: defaultBlockDriver, diff --git a/src/runtime/virtcontainers/macvtap_endpoint.go b/src/runtime/virtcontainers/macvtap_endpoint.go index d223c0bc76..b3fd9de7a1 100644 --- a/src/runtime/virtcontainers/macvtap_endpoint.go +++ b/src/runtime/virtcontainers/macvtap_endpoint.go @@ -71,13 +71,13 @@ func (endpoint *MacvtapEndpoint) Attach(ctx context.Context, s *Sandbox) error { h := s.hypervisor - endpoint.VMFds, err = createMacvtapFds(endpoint.EndpointProperties.Iface.Index, int(h.HypervisorConfig().NumVCPUs)) + endpoint.VMFds, err = createMacvtapFds(endpoint.EndpointProperties.Iface.Index, int(h.HypervisorConfig().NumVCPUs())) if err != nil { return fmt.Errorf("Could not setup macvtap fds %s: %s", endpoint.EndpointProperties.Iface.Name, err) } if !h.HypervisorConfig().DisableVhostNet { - vhostFds, err := createVhostFds(int(h.HypervisorConfig().NumVCPUs)) + vhostFds, err := createVhostFds(int(h.HypervisorConfig().NumVCPUs())) if err != nil { return fmt.Errorf("Could not setup vhost fds %s : %s", endpoint.EndpointProperties.Iface.Name, err) } diff --git a/src/runtime/virtcontainers/network_linux.go b/src/runtime/virtcontainers/network_linux.go index d4decdd2d2..74ba0659e2 100644 --- a/src/runtime/virtcontainers/network_linux.go +++ b/src/runtime/virtcontainers/network_linux.go @@ -592,7 +592,7 @@ func xConnectVMNetwork(ctx context.Context, endpoint Endpoint, h Hypervisor) err queues := 0 caps := h.Capabilities(ctx) if caps.IsMultiQueueSupported() { - queues = int(h.HypervisorConfig().NumVCPUs) + queues = int(h.HypervisorConfig().NumVCPUs()) } disableVhostNet := h.HypervisorConfig().DisableVhostNet diff --git a/src/runtime/virtcontainers/persist.go b/src/runtime/virtcontainers/persist.go index aa1875a3b2..034c61d312 100644 --- a/src/runtime/virtcontainers/persist.go +++ b/src/runtime/virtcontainers/persist.go @@ -200,7 +200,7 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) { } ss.Config.HypervisorConfig = persistapi.HypervisorConfig{ - NumVCPUs: sconfig.HypervisorConfig.NumVCPUs, + NumVCPUsF: sconfig.HypervisorConfig.NumVCPUsF, DefaultMaxVCPUs: sconfig.HypervisorConfig.DefaultMaxVCPUs, MemorySize: sconfig.HypervisorConfig.MemorySize, DefaultBridges: sconfig.HypervisorConfig.DefaultBridges, @@ -440,7 +440,7 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) { hconf := savedConf.HypervisorConfig sconfig.HypervisorConfig = HypervisorConfig{ - NumVCPUs: hconf.NumVCPUs, + NumVCPUsF: hconf.NumVCPUsF, DefaultMaxVCPUs: hconf.DefaultMaxVCPUs, MemorySize: hconf.MemorySize, DefaultBridges: hconf.DefaultBridges, diff --git a/src/runtime/virtcontainers/persist/api/config.go b/src/runtime/virtcontainers/persist/api/config.go index 5b43ab24ab..1e284f3897 100644 --- a/src/runtime/virtcontainers/persist/api/config.go +++ b/src/runtime/virtcontainers/persist/api/config.go @@ -132,7 +132,7 @@ type HypervisorConfig struct { SGXEPCSize int64 // NumVCPUs specifies default number of vCPUs for the VM. - NumVCPUs uint32 + NumVCPUsF float32 //DefaultMaxVCPUs specifies the maximum number of vCPUs for the VM. DefaultMaxVCPUs uint32 diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 6fc6416c7c..57c529058f 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -315,7 +315,7 @@ func (q *qemu) setup(ctx context.Context, id string, hypervisorConfig *Hyperviso } func (q *qemu) cpuTopology() govmmQemu.SMP { - return q.arch.cpuTopology(q.config.NumVCPUs, q.config.DefaultMaxVCPUs) + return q.arch.cpuTopology(q.config.NumVCPUs(), q.config.DefaultMaxVCPUs) } func (q *qemu) memoryTopology() (govmmQemu.Memory, error) { @@ -630,7 +630,7 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi // we must ensure we use something like: // `...,sockets=1,cores=numvcpus,threads=1,...` smp.Sockets = 1 - smp.Cores = q.config.NumVCPUs + smp.Cores = q.config.NumVCPUs() smp.Threads = 1 } } @@ -1590,7 +1590,7 @@ func (q *qemu) hotplugAddBlockDevice(ctx context.Context, drive *config.BlockDri return err } - queues := int(q.config.NumVCPUs) + queues := int(q.config.NumVCPUs()) if err = q.qmpMonitorCh.qmp.ExecutePCIDeviceAdd(q.qmpMonitorCh.ctx, drive.ID, devID, driver, addr, bridge.ID, romFile, queues, true, defaultDisableModern); err != nil { return err @@ -1922,7 +1922,7 @@ func (q *qemu) hotplugNetDevice(ctx context.Context, endpoint Endpoint, op Opera addr := "00" bridgeID := fmt.Sprintf("%s%d", config.PCIeRootPortPrefix, len(config.PCIeDevices[config.RootPort])) config.PCIeDevices[config.RootPort][devID] = true - return q.qmpMonitorCh.qmp.ExecuteNetPCIDeviceAdd(q.qmpMonitorCh.ctx, tap.Name, devID, endpoint.HardwareAddr(), addr, bridgeID, romFile, int(q.config.NumVCPUs), defaultDisableModern) + return q.qmpMonitorCh.qmp.ExecuteNetPCIDeviceAdd(q.qmpMonitorCh.ctx, tap.Name, devID, endpoint.HardwareAddr(), addr, bridgeID, romFile, int(q.config.NumVCPUs()), defaultDisableModern) } addr, bridge, err := q.arch.addDeviceToBridge(ctx, tap.ID, types.PCI) @@ -1954,9 +1954,9 @@ func (q *qemu) hotplugNetDevice(ctx context.Context, endpoint Endpoint, op Opera } if machine.Type == QemuCCWVirtio { devNoHotplug := fmt.Sprintf("fe.%x.%x", bridge.Addr, addr) - return q.qmpMonitorCh.qmp.ExecuteNetCCWDeviceAdd(q.qmpMonitorCh.ctx, tap.Name, devID, endpoint.HardwareAddr(), devNoHotplug, int(q.config.NumVCPUs)) + return q.qmpMonitorCh.qmp.ExecuteNetCCWDeviceAdd(q.qmpMonitorCh.ctx, tap.Name, devID, endpoint.HardwareAddr(), devNoHotplug, int(q.config.NumVCPUs())) } - return q.qmpMonitorCh.qmp.ExecuteNetPCIDeviceAdd(q.qmpMonitorCh.ctx, tap.Name, devID, endpoint.HardwareAddr(), addr, bridge.ID, romFile, int(q.config.NumVCPUs), defaultDisableModern) + return q.qmpMonitorCh.qmp.ExecuteNetPCIDeviceAdd(q.qmpMonitorCh.ctx, tap.Name, devID, endpoint.HardwareAddr(), addr, bridge.ID, romFile, int(q.config.NumVCPUs()), defaultDisableModern) } if err := q.arch.removeDeviceFromBridge(tap.ID); err != nil { @@ -2687,9 +2687,9 @@ func calcHotplugMemMiBSize(mem uint32, memorySectionSizeMB uint32) (uint32, erro } func (q *qemu) ResizeVCPUs(ctx context.Context, reqVCPUs uint32) (currentVCPUs uint32, newVCPUs uint32, err error) { - - currentVCPUs = q.config.NumVCPUs + uint32(len(q.state.HotpluggedVCPUs)) + currentVCPUs = q.config.NumVCPUs() + uint32(len(q.state.HotpluggedVCPUs)) newVCPUs = currentVCPUs + switch { case currentVCPUs < reqVCPUs: //hotplug @@ -2716,6 +2716,7 @@ func (q *qemu) ResizeVCPUs(ctx context.Context, reqVCPUs uint32) (currentVCPUs u } newVCPUs -= vCPUsRemoved } + return currentVCPUs, newVCPUs, nil } diff --git a/src/runtime/virtcontainers/qemu_test.go b/src/runtime/virtcontainers/qemu_test.go index b47d234909..4c51517ed5 100644 --- a/src/runtime/virtcontainers/qemu_test.go +++ b/src/runtime/virtcontainers/qemu_test.go @@ -30,7 +30,7 @@ func newQemuConfig() HypervisorConfig { KernelPath: testQemuKernelPath, InitrdPath: testQemuInitrdPath, HypervisorPath: testQemuPath, - NumVCPUs: defaultVCPUs, + NumVCPUsF: defaultVCPUs, MemorySize: defaultMemSzMiB, DefaultBridges: defaultBridges, BlockDeviceDriver: defaultBlockDriver, @@ -258,12 +258,12 @@ func TestQemuCreateVMMissingParentDirFail(t *testing.T) { func TestQemuCPUTopology(t *testing.T) { assert := assert.New(t) - vcpus := 1 + vcpus := float32(1) q := &qemu{ arch: &qemuArchBase{}, config: HypervisorConfig{ - NumVCPUs: uint32(vcpus), + NumVCPUsF: vcpus, DefaultMaxVCPUs: uint32(vcpus), }, } diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index a8c3bc5d04..eac9a3a677 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -116,9 +116,9 @@ type SandboxStats struct { type SandboxResourceSizing struct { // The number of CPUs required for the sandbox workload(s) - WorkloadCPUs uint32 + WorkloadCPUs float32 // The base number of CPUs for the VM that are assigned as overhead - BaseCPUs uint32 + BaseCPUs float32 // The amount of memory required for the sandbox workload(s) WorkloadMemMB uint32 // The base amount of memory required for that VM that is assigned as overhead @@ -2168,12 +2168,13 @@ func (s *Sandbox) updateResources(ctx context.Context) error { s.Logger().Debug("no resources updated: static resource management is set") return nil } + sandboxVCPUs, err := s.calculateSandboxCPUs() if err != nil { return err } // Add default vcpus for sandbox - sandboxVCPUs += s.hypervisor.HypervisorConfig().NumVCPUs + sandboxVCPUs += s.hypervisor.HypervisorConfig().NumVCPUsF sandboxMemoryByte, sandboxneedPodSwap, sandboxSwapByte := s.calculateSandboxMemory() @@ -2196,7 +2197,7 @@ func (s *Sandbox) updateResources(ctx context.Context) error { // Update VCPUs s.Logger().WithField("cpus-sandbox", sandboxVCPUs).Debugf("Request to hypervisor to update vCPUs") - oldCPUs, newCPUs, err := s.hypervisor.ResizeVCPUs(ctx, sandboxVCPUs) + oldCPUs, newCPUs, err := s.hypervisor.ResizeVCPUs(ctx, RoundUpNumVCPUs(sandboxVCPUs)) if err != nil { return err } @@ -2392,8 +2393,8 @@ func (s *Sandbox) calculateSandboxMemory() (uint64, bool, int64) { return memorySandbox, needPodSwap, swapSandbox } -func (s *Sandbox) calculateSandboxCPUs() (uint32, error) { - mCPU := uint32(0) +func (s *Sandbox) calculateSandboxCPUs() (float32, error) { + mCPU := float32(0) cpusetCount := int(0) for _, c := range s.config.Containers { @@ -2420,10 +2421,10 @@ func (s *Sandbox) calculateSandboxCPUs() (uint32, error) { // 1. BestEffort QoS: no proper support today in Kata. // 2. We could be constrained only by CPUSets. Check for this: if mCPU == 0 && cpusetCount > 0 { - return uint32(cpusetCount), nil + return float32(cpusetCount), nil } - return utils.CalculateVCpusFromMilliCpus(mCPU), nil + return mCPU, nil } // GetHypervisorType is used for getting Hypervisor name currently used. diff --git a/src/runtime/virtcontainers/sandbox_test.go b/src/runtime/virtcontainers/sandbox_test.go index b735c55115..29980fdffb 100644 --- a/src/runtime/virtcontainers/sandbox_test.go +++ b/src/runtime/virtcontainers/sandbox_test.go @@ -126,7 +126,7 @@ func TestCalculateSandboxCPUs(t *testing.T) { tests := []struct { name string containers []ContainerConfig - want uint32 + want float32 }{ {"1-unconstrained", []ContainerConfig{unconstrained}, 0}, {"2-unconstrained", []ContainerConfig{unconstrained, unconstrained}, 0}, diff --git a/src/runtime/virtcontainers/tap_endpoint.go b/src/runtime/virtcontainers/tap_endpoint.go index 4bd88d50fd..2f8ec33532 100644 --- a/src/runtime/virtcontainers/tap_endpoint.go +++ b/src/runtime/virtcontainers/tap_endpoint.go @@ -98,7 +98,7 @@ func (endpoint *TapEndpoint) HotAttach(ctx context.Context, h Hypervisor) error span, ctx := tapTrace(ctx, "HotAttach", endpoint) defer span.End() - if err := tapNetwork(endpoint, h.HypervisorConfig().NumVCPUs, h.HypervisorConfig().DisableVhostNet); err != nil { + if err := tapNetwork(endpoint, h.HypervisorConfig().NumVCPUs(), h.HypervisorConfig().DisableVhostNet); err != nil { networkLogger().WithError(err).Error("Error bridging tap ep") return err } diff --git a/src/runtime/virtcontainers/tuntap_endpoint.go b/src/runtime/virtcontainers/tuntap_endpoint.go index c76fef5add..ddd371e9ea 100644 --- a/src/runtime/virtcontainers/tuntap_endpoint.go +++ b/src/runtime/virtcontainers/tuntap_endpoint.go @@ -109,7 +109,7 @@ func (endpoint *TuntapEndpoint) HotAttach(ctx context.Context, h Hypervisor) err span, ctx := tuntapTrace(ctx, "HotAttach", endpoint) defer span.End() - if err := tuntapNetwork(endpoint, h.HypervisorConfig().NumVCPUs, h.HypervisorConfig().DisableVhostNet); err != nil { + if err := tuntapNetwork(endpoint, h.HypervisorConfig().NumVCPUs(), h.HypervisorConfig().DisableVhostNet); err != nil { networkLogger().WithError(err).Error("Error bridging tun/tap ep") return err } diff --git a/src/runtime/virtcontainers/utils/utils.go b/src/runtime/virtcontainers/utils/utils.go index 735cf3a2f5..08f61eded3 100644 --- a/src/runtime/virtcontainers/utils/utils.go +++ b/src/runtime/virtcontainers/utils/utils.go @@ -122,24 +122,17 @@ func WriteToFile(path string, data []byte) error { } // CalculateMilliCPUs converts CPU quota and period to milli-CPUs -func CalculateMilliCPUs(quota int64, period uint64) uint32 { - +func CalculateMilliCPUs(quota int64, period uint64) float32 { // If quota is -1, it means the CPU resource request is // unconstrained. In that case, we don't currently assign // additional CPUs. if quota >= 0 && period != 0 { - return uint32((uint64(quota) * 1000) / period) + return float32(quota) / float32(period) } return 0 } -// CalculateVCpusFromMilliCpus converts from mCPU to CPU, taking the ceiling -// value when necessary -func CalculateVCpusFromMilliCpus(mCPU uint32) uint32 { - return (mCPU + 999) / 1000 -} - // GetVirtDriveName returns the disk name format for virtio-blk // Reference: https://github.com/torvalds/linux/blob/master/drivers/block/virtio_blk.c @c0aa3e0916d7e531e69b02e426f7162dfb1c6c0 func GetVirtDriveName(index int) (string, error) { diff --git a/src/runtime/virtcontainers/utils/utils_test.go b/src/runtime/virtcontainers/utils/utils_test.go index bae9af8dbd..977e46288e 100644 --- a/src/runtime/virtcontainers/utils/utils_test.go +++ b/src/runtime/virtcontainers/utils/utils_test.go @@ -151,22 +151,19 @@ func TestCalculateMilliCPUs(t *testing.T) { assert := assert.New(t) n := CalculateMilliCPUs(1, 1) - expected := uint32(1000) + expected := float32(1) assert.Equal(n, expected) n = CalculateMilliCPUs(1, 0) - expected = uint32(0) + expected = float32(0) assert.Equal(n, expected) n = CalculateMilliCPUs(-1, 1) + expected = float32(0) assert.Equal(n, expected) -} -func TestCaluclateVCpusFromMilliCpus(t *testing.T) { - assert := assert.New(t) - - n := CalculateVCpusFromMilliCpus(1) - expected := uint32(1) + n = CalculateMilliCPUs(500, 1000) + expected = float32(0.5) assert.Equal(n, expected) } diff --git a/src/runtime/virtcontainers/vm.go b/src/runtime/virtcontainers/vm.go index 72afa9581c..8c60a8980d 100644 --- a/src/runtime/virtcontainers/vm.go +++ b/src/runtime/virtcontainers/vm.go @@ -160,7 +160,7 @@ func NewVM(ctx context.Context, config VMConfig) (*VM, error) { id: id, hypervisor: hypervisor, agent: agent, - cpu: config.HypervisorConfig.NumVCPUs, + cpu: config.HypervisorConfig.NumVCPUs(), memory: config.HypervisorConfig.MemorySize, store: store, }, nil From 5e9cf759376763aa33c8869e045f1e9d97ea2c44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 10 Nov 2023 18:14:04 +0100 Subject: [PATCH 3/4] vc: utils: Rename CalculateMilliCPUs() to CalculateCPUsF() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the change done in the last commit, instead of calculating milli cpus, we're actually converting the CPUs to a fraction number, a float. Let's update the function name (and associated vars) to represent that change. Signed-off-by: Fabiano Fidêncio --- src/runtime/pkg/oci/utils.go | 2 +- src/runtime/virtcontainers/sandbox.go | 8 ++++---- src/runtime/virtcontainers/utils/utils.go | 4 ++-- src/runtime/virtcontainers/utils/utils_test.go | 10 +++++----- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/runtime/pkg/oci/utils.go b/src/runtime/pkg/oci/utils.go index 21590749f0..08759c2066 100644 --- a/src/runtime/pkg/oci/utils.go +++ b/src/runtime/pkg/oci/utils.go @@ -1272,7 +1272,7 @@ func CalculateContainerSizing(spec *specs.Spec) (numCPU float32, memSizeMB uint3 } func calculateVMResources(period uint64, quota int64, memory int64) (numCPU float32, memSizeMB uint32) { - numCPU = vcutils.CalculateMilliCPUs(quota, period) + numCPU = vcutils.CalculateCPUsF(quota, period) if memory < 0 { // While spec allows for a negative value to indicate unconstrained, we don't diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index eac9a3a677..9762467411 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -2394,7 +2394,7 @@ func (s *Sandbox) calculateSandboxMemory() (uint64, bool, int64) { } func (s *Sandbox) calculateSandboxCPUs() (float32, error) { - mCPU := float32(0) + floatCPU := float32(0) cpusetCount := int(0) for _, c := range s.config.Containers { @@ -2406,7 +2406,7 @@ func (s *Sandbox) calculateSandboxCPUs() (float32, error) { if cpu := c.Resources.CPU; cpu != nil { if cpu.Period != nil && cpu.Quota != nil { - mCPU += utils.CalculateMilliCPUs(*cpu.Quota, *cpu.Period) + floatCPU += utils.CalculateCPUsF(*cpu.Quota, *cpu.Period) } set, err := cpuset.Parse(cpu.Cpus) @@ -2420,11 +2420,11 @@ func (s *Sandbox) calculateSandboxCPUs() (float32, error) { // If we aren't being constrained, then we could have two scenarios: // 1. BestEffort QoS: no proper support today in Kata. // 2. We could be constrained only by CPUSets. Check for this: - if mCPU == 0 && cpusetCount > 0 { + if floatCPU == 0 && cpusetCount > 0 { return float32(cpusetCount), nil } - return mCPU, nil + return floatCPU, nil } // GetHypervisorType is used for getting Hypervisor name currently used. diff --git a/src/runtime/virtcontainers/utils/utils.go b/src/runtime/virtcontainers/utils/utils.go index 08f61eded3..4893a0fe26 100644 --- a/src/runtime/virtcontainers/utils/utils.go +++ b/src/runtime/virtcontainers/utils/utils.go @@ -121,8 +121,8 @@ func WriteToFile(path string, data []byte) error { return nil } -// CalculateMilliCPUs converts CPU quota and period to milli-CPUs -func CalculateMilliCPUs(quota int64, period uint64) float32 { +// CalculateCPUsF converts CPU quota and period to a fraction number +func CalculateCPUsF(quota int64, period uint64) float32 { // If quota is -1, it means the CPU resource request is // unconstrained. In that case, we don't currently assign // additional CPUs. diff --git a/src/runtime/virtcontainers/utils/utils_test.go b/src/runtime/virtcontainers/utils/utils_test.go index 977e46288e..d2e4091060 100644 --- a/src/runtime/virtcontainers/utils/utils_test.go +++ b/src/runtime/virtcontainers/utils/utils_test.go @@ -147,22 +147,22 @@ func TestWriteToFile(t *testing.T) { assert.True(reflect.DeepEqual(testData, data)) } -func TestCalculateMilliCPUs(t *testing.T) { +func TestCalculateCPUsF(t *testing.T) { assert := assert.New(t) - n := CalculateMilliCPUs(1, 1) + n := CalculateCPUsF(1, 1) expected := float32(1) assert.Equal(n, expected) - n = CalculateMilliCPUs(1, 0) + n = CalculateCPUsF(1, 0) expected = float32(0) assert.Equal(n, expected) - n = CalculateMilliCPUs(-1, 1) + n = CalculateCPUsF(-1, 1) expected = float32(0) assert.Equal(n, expected) - n = CalculateMilliCPUs(500, 1000) + n = CalculateCPUsF(500, 1000) expected = float32(0.5) assert.Equal(n, expected) } From 849253e55c9984d6afc065eb266f260f0c00ea10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 9 Nov 2023 21:59:59 +0100 Subject: [PATCH 4/4] tests: Add a simple test to check the VMM vcpu allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As we've done some changes in the VMM vcpu allocation, let's introduce basic tests to make sure that we're getting the expected behaviour. The test consists in checking 3 scenarios: * default_vcpus = 0 | no limits set * this should allocate 1 vcpu * default_vcpus = 0.75 | limits set to 0.25 * this should allocate 1 vcpu * default_vcpus = 0.75 | limits set to 1.2 * this should allocate 2 vcpus The tests are very basic, but they do ensure we're rounding things up to what the new logic is supposed to do. Signed-off-by: Fabiano Fidêncio --- tests/integration/kubernetes/gha-run.sh | 4 +- .../k8s-sandbox-vcpus-allocation.bats | 40 ++++++++++++++ .../kubernetes/run_kubernetes_tests.sh | 1 + .../pod-sandbox-vcpus-allocation.yaml | 54 +++++++++++++++++++ 4 files changed, 98 insertions(+), 1 deletion(-) create mode 100644 tests/integration/kubernetes/k8s-sandbox-vcpus-allocation.bats create mode 100644 tests/integration/kubernetes/runtimeclass_workloads/pod-sandbox-vcpus-allocation.yaml diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index 7dd65644a3..b9ab940984 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -117,9 +117,11 @@ function deploy_kata() { yq write -i "${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" 'spec.template.spec.containers[0].env[4].value' --tag '!!str' "true" # Let the `kata-deploy` create the default `kata` runtime class yq write -i "${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" 'spec.template.spec.containers[0].env[5].value' --tag '!!str' "true" + # Enable 'default_vcpus' hypervisor annotation + yq write -i "${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" 'spec.template.spec.containers[0].env[6].value' "default_vcpus" if [ "${KATA_HOST_OS}" = "cbl-mariner" ]; then - yq write -i "${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" 'spec.template.spec.containers[0].env[6].value' "initrd kernel" + yq write -i "${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" 'spec.template.spec.containers[0].env[6].value' "initrd kernel default_vcpus" yq write -i "${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" 'spec.template.spec.containers[0].env[+].name' "HOST_OS" yq write -i "${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" 'spec.template.spec.containers[0].env[-1].value' "${KATA_HOST_OS}" fi diff --git a/tests/integration/kubernetes/k8s-sandbox-vcpus-allocation.bats b/tests/integration/kubernetes/k8s-sandbox-vcpus-allocation.bats new file mode 100644 index 0000000000..2f0a872c00 --- /dev/null +++ b/tests/integration/kubernetes/k8s-sandbox-vcpus-allocation.bats @@ -0,0 +1,40 @@ +#!/usr/bin/env bats +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +load "${BATS_TEST_DIRNAME}/../../common.bash" +load "${BATS_TEST_DIRNAME}/tests_common.sh" + +setup() { + [ "${KATA_HYPERVISOR}" == "dragonball" ] && \ + skip "runtime-rs is still using the old vcpus allocation algorithm, skipping the test" + + get_pod_config_dir + pods=( "vcpus-less-than-one-with-no-limits" "vcpus-less-than-one-with-limits" "vcpus-more-than-one-with-limits" ) + expected_vcpus=( 1 1 2 ) +} + +@test "Check the number vcpus are correctly allocated to the sandbox" { + # Create the pods + kubectl create -f "${pod_config_dir}/pod-sandbox-vcpus-allocation.yaml" + + # Check the pods + for i in {0..2}; do + kubectl wait --for=jsonpath='{.status.conditions[0].reason}'=PodCompleted --timeout=$timeout pod ${pods[$i]} + [ `kubectl logs ${pods[$i]}` -eq ${expected_vcpus[$i]} ] + done +} + +teardown() { + [ "${KATA_HYPERVISOR}" == "dragonball" ] && \ + skip "runtime-rs is still using the old vcpus allocation algorithm, skipping the test" + + for pod in "${pods[@]}"; do + kubectl logs ${pod} + done + + kubectl delete -f "${pod_config_dir}/pod-sandbox-vcpus-allocation.yaml" +} diff --git a/tests/integration/kubernetes/run_kubernetes_tests.sh b/tests/integration/kubernetes/run_kubernetes_tests.sh index 36f7a56c9d..b16c22ae64 100644 --- a/tests/integration/kubernetes/run_kubernetes_tests.sh +++ b/tests/integration/kubernetes/run_kubernetes_tests.sh @@ -60,6 +60,7 @@ else K8S_TEST_NORMAL_HOST_UNION=( \ "k8s-number-cpus.bats" \ "k8s-parallel.bats" \ + "k8s-sandbox-vcpus-allocation.bats" \ "k8s-scale-nginx.bats" \ ) diff --git a/tests/integration/kubernetes/runtimeclass_workloads/pod-sandbox-vcpus-allocation.yaml b/tests/integration/kubernetes/runtimeclass_workloads/pod-sandbox-vcpus-allocation.yaml new file mode 100644 index 0000000000..0730840fad --- /dev/null +++ b/tests/integration/kubernetes/runtimeclass_workloads/pod-sandbox-vcpus-allocation.yaml @@ -0,0 +1,54 @@ +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +--- +apiVersion: v1 +kind: Pod +metadata: + name: vcpus-less-than-one-with-no-limits + annotations: + io.katacontainers.config.hypervisor.default_vcpus: "0" +spec: + runtimeClassName: kata + containers: + - name: vcpus-less-than-one-with-no-limits + image: quay.io/prometheus/busybox:latest + command: ['nproc', '--all'] + restartPolicy: Never +--- +apiVersion: v1 +kind: Pod +metadata: + name: vcpus-less-than-one-with-limits + annotations: + io.katacontainers.config.hypervisor.default_vcpus: "0.75" +spec: + runtimeClassName: kata + containers: + - name: vcpus-less-than-one-with-limits + image: quay.io/prometheus/busybox:latest + resources: + limits: + cpu: "0.25" + command: ['nproc', '--all'] + restartPolicy: Never +--- +apiVersion: v1 +kind: Pod +metadata: + name: vcpus-more-than-one-with-limits + annotations: + io.katacontainers.config.hypervisor.default_vcpus: "0.75" +spec: + runtimeClassName: kata + containers: + - name: vcpus-more-than-one-with-limits + image: quay.io/prometheus/busybox:latest + resources: + limits: + cpu: "1.2" + command: ['nproc', '--all'] + restartPolicy: Never +---