From d0f9e6dc36fb0f6cfff95988e27eb3796c4e6bce Mon Sep 17 00:00:00 2001 From: Dmitry Verkhoturov Date: Tue, 30 Aug 2022 15:05:56 +0200 Subject: [PATCH] clarify CPUCFSQuotaPeriod values, set the minimum to 1ms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cpu.cfs_period_us is measured in microseconds in the kernel but provided in time.Duration by the user, that change clarifies the code to make this evident to the reader. Also, the minimum value for that feature is 1ms and not 1μs, and this change alters the validation to reject values smaller than 1ms. --- pkg/generated/openapi/zz_generated.openapi.go | 2 +- .../apis/config/validation/validation.go | 4 ++-- .../apis/config/validation/validation_test.go | 8 ++++---- pkg/kubelet/cm/container_manager_linux.go | 2 ++ pkg/kubelet/cm/helpers_linux.go | 12 ++++++++---- pkg/kubelet/cm/helpers_linux_test.go | 18 +++++++++--------- .../kuberuntime/fake_kuberuntime_manager.go | 2 +- pkg/kubelet/kuberuntime/helpers_linux.go | 12 ++++++++---- .../kuberuntime/kuberuntime_container_linux.go | 2 ++ .../src/k8s.io/kubelet/config/v1beta1/types.go | 2 +- 10 files changed, 38 insertions(+), 26 deletions(-) diff --git a/pkg/generated/openapi/zz_generated.openapi.go b/pkg/generated/openapi/zz_generated.openapi.go index 7090f4a835f..35fba242bf4 100644 --- a/pkg/generated/openapi/zz_generated.openapi.go +++ b/pkg/generated/openapi/zz_generated.openapi.go @@ -54495,7 +54495,7 @@ func schema_k8sio_kubelet_config_v1beta1_KubeletConfiguration(ref common.Referen }, "cpuCFSQuotaPeriod": { SchemaProps: spec.SchemaProps{ - Description: "cpuCFSQuotaPeriod is the CPU CFS quota period value, `cpu.cfs_period_us`. The value must be between 1 us and 1 second, inclusive. Requires the CustomCPUCFSQuotaPeriod feature gate to be enabled. Default: \"100ms\"", + Description: "cpuCFSQuotaPeriod is the CPU CFS quota period value, `cpu.cfs_period_us`. The value must be between 1 ms and 1 second, inclusive. Requires the CustomCPUCFSQuotaPeriod feature gate to be enabled. Default: \"100ms\"", Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.Duration"), }, }, diff --git a/pkg/kubelet/apis/config/validation/validation.go b/pkg/kubelet/apis/config/validation/validation.go index afd43d70b8e..dc1c2b7c007 100644 --- a/pkg/kubelet/apis/config/validation/validation.go +++ b/pkg/kubelet/apis/config/validation/validation.go @@ -71,8 +71,8 @@ func ValidateKubeletConfiguration(kc *kubeletconfig.KubeletConfiguration, featur if !localFeatureGate.Enabled(features.CPUCFSQuotaPeriod) && kc.CPUCFSQuotaPeriod != defaultCFSQuota { allErrors = append(allErrors, fmt.Errorf("invalid configuration: cpuCFSQuotaPeriod (--cpu-cfs-quota-period) %v requires feature gate CustomCPUCFSQuotaPeriod", kc.CPUCFSQuotaPeriod)) } - if localFeatureGate.Enabled(features.CPUCFSQuotaPeriod) && utilvalidation.IsInRange(int(kc.CPUCFSQuotaPeriod.Duration), int(1*time.Microsecond), int(time.Second)) != nil { - allErrors = append(allErrors, fmt.Errorf("invalid configuration: cpuCFSQuotaPeriod (--cpu-cfs-quota-period) %v must be between 1usec and 1sec, inclusive", kc.CPUCFSQuotaPeriod)) + if localFeatureGate.Enabled(features.CPUCFSQuotaPeriod) && utilvalidation.IsInRange(int(kc.CPUCFSQuotaPeriod.Duration), int(1*time.Millisecond), int(time.Second)) != nil { + allErrors = append(allErrors, fmt.Errorf("invalid configuration: cpuCFSQuotaPeriod (--cpu-cfs-quota-period) %v must be between 1ms and 1sec, inclusive", kc.CPUCFSQuotaPeriod)) } if utilvalidation.IsInRange(int(kc.ImageGCHighThresholdPercent), 0, 100) != nil { allErrors = append(allErrors, fmt.Errorf("invalid configuration: imageGCHighThresholdPercent (--image-gc-high-threshold) %v must be between 0 and 100, inclusive", kc.ImageGCHighThresholdPercent)) diff --git a/pkg/kubelet/apis/config/validation/validation_test.go b/pkg/kubelet/apis/config/validation/validation_test.go index e6aa09596b0..2ff921ec9bf 100644 --- a/pkg/kubelet/apis/config/validation/validation_test.go +++ b/pkg/kubelet/apis/config/validation/validation_test.go @@ -59,7 +59,7 @@ var ( RegistryPullQPS: 5, HairpinMode: kubeletconfig.PromiscuousBridge, NodeLeaseDurationSeconds: 1, - CPUCFSQuotaPeriod: metav1.Duration{Duration: 25 * time.Microsecond}, + CPUCFSQuotaPeriod: metav1.Duration{Duration: 25 * time.Millisecond}, TopologyManagerScope: kubeletconfig.PodTopologyManagerScope, TopologyManagerPolicy: kubeletconfig.SingleNumaNodeTopologyManagerPolicy, ShutdownGracePeriod: metav1.Duration{Duration: 30 * time.Second}, @@ -145,10 +145,10 @@ func TestValidateKubeletConfiguration(t *testing.T) { name: "specify CPUCFSQuotaPeriod without enabling CPUCFSQuotaPeriod", configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { conf.FeatureGates = map[string]bool{"CustomCPUCFSQuotaPeriod": false} - conf.CPUCFSQuotaPeriod = metav1.Duration{Duration: 200 * time.Microsecond} + conf.CPUCFSQuotaPeriod = metav1.Duration{Duration: 200 * time.Millisecond} return conf }, - errMsg: "invalid configuration: cpuCFSQuotaPeriod (--cpu-cfs-quota-period) {200µs} requires feature gate CustomCPUCFSQuotaPeriod", + errMsg: "invalid configuration: cpuCFSQuotaPeriod (--cpu-cfs-quota-period) {200ms} requires feature gate CustomCPUCFSQuotaPeriod", }, { name: "invalid CPUCFSQuotaPeriod", @@ -157,7 +157,7 @@ func TestValidateKubeletConfiguration(t *testing.T) { conf.CPUCFSQuotaPeriod = metav1.Duration{Duration: 2 * time.Second} return conf }, - errMsg: "invalid configuration: cpuCFSQuotaPeriod (--cpu-cfs-quota-period) {2s} must be between 1usec and 1sec, inclusive", + errMsg: "invalid configuration: cpuCFSQuotaPeriod (--cpu-cfs-quota-period) {2s} must be between 1ms and 1sec, inclusive", }, { name: "invalid ImageGCHighThresholdPercent", diff --git a/pkg/kubelet/cm/container_manager_linux.go b/pkg/kubelet/cm/container_manager_linux.go index 44c8cda6c40..c72b185e2bb 100644 --- a/pkg/kubelet/cm/container_manager_linux.go +++ b/pkg/kubelet/cm/container_manager_linux.go @@ -359,6 +359,8 @@ func (cm *containerManagerImpl) NewPodContainerManager() PodContainerManager { cgroupManager: cm.cgroupManager, podPidsLimit: cm.ExperimentalPodPidsLimit, enforceCPULimits: cm.EnforceCPULimits, + // cpuCFSQuotaPeriod is in microseconds. NodeConfig.CPUCFSQuotaPeriod is time.Duration (measured in nano seconds). + // Convert (cm.CPUCFSQuotaPeriod) [nanoseconds] / time.Microsecond (1000) to get cpuCFSQuotaPeriod in microseconds. cpuCFSQuotaPeriod: uint64(cm.CPUCFSQuotaPeriod / time.Microsecond), } } diff --git a/pkg/kubelet/cm/helpers_linux.go b/pkg/kubelet/cm/helpers_linux.go index 5c6e5938525..e0292496fe9 100644 --- a/pkg/kubelet/cm/helpers_linux.go +++ b/pkg/kubelet/cm/helpers_linux.go @@ -44,16 +44,20 @@ const ( SharesPerCPU = 1024 MilliCPUToCPU = 1000 - // 100000 is equivalent to 100usec - QuotaPeriod = 100000 + // 100000 microseconds is equivalent to 100ms + QuotaPeriod = 100000 + // 1000 microseconds is equivalent to 1ms + // defined here: + // https://github.com/torvalds/linux/blob/cac03ac368fabff0122853de2422d4e17a32de08/kernel/sched/core.c#L10546 MinQuotaPeriod = 1000 ) // MilliCPUToQuota converts milliCPU to CFS quota and period values. +// Input parameters and resulting value is number of microseconds. func MilliCPUToQuota(milliCPU int64, period int64) (quota int64) { // CFS quota is measured in two values: - // - cfs_period_us=100usec (the amount of time to measure usage across given by period) - // - cfs_quota=20usec (the amount of cpu time allowed to be used across a period) + // - cfs_period_us=100ms (the amount of time to measure usage across given by period) + // - cfs_quota=20ms (the amount of cpu time allowed to be used across a period) // so in the above example, you are limited to 20% of a single CPU // for multi-cpu environments, you just scale equivalent amounts // see https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt for details diff --git a/pkg/kubelet/cm/helpers_linux_test.go b/pkg/kubelet/cm/helpers_linux_test.go index 101b21e682a..9296ea29e2a 100644 --- a/pkg/kubelet/cm/helpers_linux_test.go +++ b/pkg/kubelet/cm/helpers_linux_test.go @@ -54,8 +54,8 @@ func getResourceRequirements(requests, limits v1.ResourceList) v1.ResourceRequir } func TestResourceConfigForPod(t *testing.T) { - defaultQuotaPeriod := uint64(100 * time.Millisecond / time.Microsecond) - tunedQuotaPeriod := uint64(5 * time.Millisecond / time.Microsecond) + defaultQuotaPeriod := uint64(100 * time.Millisecond / time.Microsecond) // in microseconds + tunedQuotaPeriod := uint64(5 * time.Millisecond / time.Microsecond) // in microseconds minShares := uint64(MinShares) burstableShares := MilliCPUToShares(100) @@ -73,7 +73,7 @@ func TestResourceConfigForPod(t *testing.T) { pod *v1.Pod expected *ResourceConfig enforceCPULimits bool - quotaPeriod uint64 + quotaPeriod uint64 // in microseconds }{ "besteffort": { pod: &v1.Pod{ @@ -296,8 +296,8 @@ func TestResourceConfigForPod(t *testing.T) { } func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) { - defaultQuotaPeriod := uint64(100 * time.Millisecond / time.Microsecond) - tunedQuotaPeriod := uint64(5 * time.Millisecond / time.Microsecond) + defaultQuotaPeriod := uint64(100 * time.Millisecond / time.Microsecond) // in microseconds + tunedQuotaPeriod := uint64(5 * time.Millisecond / time.Microsecond) // in microseconds tunedQuota := int64(1 * time.Millisecond / time.Microsecond) defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.CPUCFSQuotaPeriod, true)() @@ -318,7 +318,7 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) { pod *v1.Pod expected *ResourceConfig enforceCPULimits bool - quotaPeriod uint64 + quotaPeriod uint64 // in microseconds }{ "besteffort": { pod: &v1.Pod{ @@ -650,8 +650,8 @@ func TestHugePageLimits(t *testing.T) { } func TestResourceConfigForPodWithEnforceMemoryQoS(t *testing.T) { - defaultQuotaPeriod := uint64(100 * time.Millisecond / time.Microsecond) - tunedQuotaPeriod := uint64(5 * time.Millisecond / time.Microsecond) + defaultQuotaPeriod := uint64(100 * time.Millisecond / time.Microsecond) // in microseconds + tunedQuotaPeriod := uint64(5 * time.Millisecond / time.Microsecond) // in microseconds minShares := uint64(MinShares) burstableShares := MilliCPUToShares(100) @@ -669,7 +669,7 @@ func TestResourceConfigForPodWithEnforceMemoryQoS(t *testing.T) { pod *v1.Pod expected *ResourceConfig enforceCPULimits bool - quotaPeriod uint64 + quotaPeriod uint64 // in microseconds }{ "besteffort": { pod: &v1.Pod{ diff --git a/pkg/kubelet/kuberuntime/fake_kuberuntime_manager.go b/pkg/kubelet/kuberuntime/fake_kuberuntime_manager.go index e62866eb1b7..1ef1b22a2aa 100644 --- a/pkg/kubelet/kuberuntime/fake_kuberuntime_manager.go +++ b/pkg/kubelet/kuberuntime/fake_kuberuntime_manager.go @@ -91,7 +91,7 @@ func newFakeKubeRuntimeManager(runtimeService internalapi.RuntimeService, imageS kubeRuntimeManager := &kubeGenericRuntimeManager{ recorder: recorder, cpuCFSQuota: false, - cpuCFSQuotaPeriod: metav1.Duration{Duration: time.Microsecond * 100}, + cpuCFSQuotaPeriod: metav1.Duration{Duration: time.Millisecond * 100}, livenessManager: proberesults.NewManager(), startupManager: proberesults.NewManager(), machineInfo: machineInfo, diff --git a/pkg/kubelet/kuberuntime/helpers_linux.go b/pkg/kubelet/kuberuntime/helpers_linux.go index 33616de3850..a96cbb6dfb5 100644 --- a/pkg/kubelet/kuberuntime/helpers_linux.go +++ b/pkg/kubelet/kuberuntime/helpers_linux.go @@ -22,16 +22,20 @@ package kuberuntime const ( milliCPUToCPU = 1000 - // 100000 is equivalent to 100usec - quotaPeriod = 100000 + // 100000 microseconds is equivalent to 100ms + quotaPeriod = 100000 + // 1000 microseconds is equivalent to 1ms + // defined here: + // https://github.com/torvalds/linux/blob/cac03ac368fabff0122853de2422d4e17a32de08/kernel/sched/core.c#L10546 minQuotaPeriod = 1000 ) // milliCPUToQuota converts milliCPU to CFS quota and period values +// Input parameters and resulting value is number of microseconds. func milliCPUToQuota(milliCPU int64, period int64) (quota int64) { // CFS quota is measured in two values: - // - cfs_period_us=100usec (the amount of time to measure usage across given by period) - // - cfs_quota=20usec (the amount of cpu time allowed to be used across a period) + // - cfs_period_us=100ms (the amount of time to measure usage across) + // - cfs_quota=20ms (the amount of cpu time allowed to be used across a period) // so in the above example, you are limited to 20% of a single CPU // for multi-cpu environments, you just scale equivalent amounts // see https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt for details diff --git a/pkg/kubelet/kuberuntime/kuberuntime_container_linux.go b/pkg/kubelet/kuberuntime/kuberuntime_container_linux.go index 3bb4e24dfcd..7bcc139e7e8 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_container_linux.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_container_linux.go @@ -166,6 +166,8 @@ func (m *kubeGenericRuntimeManager) calculateLinuxResources(cpuRequest, cpuLimit // to allow full usage of cpu resource. cpuPeriod := int64(quotaPeriod) if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.CPUCFSQuotaPeriod) { + // kubeGenericRuntimeManager.cpuCFSQuotaPeriod is provided in time.Duration, + // but we need to convert it to number of microseconds which is used by kernel. cpuPeriod = int64(m.cpuCFSQuotaPeriod.Duration / time.Microsecond) } cpuQuota := milliCPUToQuota(cpuLimit.MilliValue(), cpuPeriod) diff --git a/staging/src/k8s.io/kubelet/config/v1beta1/types.go b/staging/src/k8s.io/kubelet/config/v1beta1/types.go index aa67206ee40..b68f8474e65 100644 --- a/staging/src/k8s.io/kubelet/config/v1beta1/types.go +++ b/staging/src/k8s.io/kubelet/config/v1beta1/types.go @@ -440,7 +440,7 @@ type KubeletConfiguration struct { // +optional CPUCFSQuota *bool `json:"cpuCFSQuota,omitempty"` // cpuCFSQuotaPeriod is the CPU CFS quota period value, `cpu.cfs_period_us`. - // The value must be between 1 us and 1 second, inclusive. + // The value must be between 1 ms and 1 second, inclusive. // Requires the CustomCPUCFSQuotaPeriod feature gate to be enabled. // Default: "100ms" // +optional