From 19acf7d0515a8cda3be9822a6560e5be2b295bc4 Mon Sep 17 00:00:00 2001 From: Patrick Lang Date: Mon, 9 Dec 2019 18:55:11 -0800 Subject: [PATCH 1/3] Fix cpu resource limit on Windows --- pkg/kubelet/dockershim/BUILD | 1 + pkg/kubelet/dockershim/helpers_windows.go | 10 ++-- pkg/kubelet/kuberuntime/helpers_windows.go | 1 + .../kuberuntime_container_windows.go | 47 ++++++++++++------- 4 files changed, 37 insertions(+), 22 deletions(-) diff --git a/pkg/kubelet/dockershim/BUILD b/pkg/kubelet/dockershim/BUILD index c3d42a1ff47..6fd54ad24d2 100644 --- a/pkg/kubelet/dockershim/BUILD +++ b/pkg/kubelet/dockershim/BUILD @@ -78,6 +78,7 @@ go_library( "//pkg/kubelet/apis:go_default_library", "//pkg/kubelet/winstats:go_default_library", "//vendor/github.com/Microsoft/hcsshim:go_default_library", + "//vendor/github.com/docker/docker/pkg/sysinfo:go_default_library", "//vendor/golang.org/x/sys/windows/registry:go_default_library", ], "//conditions:default": [], diff --git a/pkg/kubelet/dockershim/helpers_windows.go b/pkg/kubelet/dockershim/helpers_windows.go index 09ff2335f8f..42d32839b5b 100644 --- a/pkg/kubelet/dockershim/helpers_windows.go +++ b/pkg/kubelet/dockershim/helpers_windows.go @@ -25,6 +25,7 @@ import ( dockertypes "github.com/docker/docker/api/types" dockercontainer "github.com/docker/docker/api/types/container" dockerfilters "github.com/docker/docker/api/types/filters" + "github.com/docker/docker/pkg/sysinfo" "k8s.io/klog" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" @@ -69,11 +70,12 @@ func (ds *dockerService) updateCreateConfig( if wc := config.GetWindows(); wc != nil { rOpts := wc.GetResources() if rOpts != nil { + // Precedence and units for these are described at length in kuberuntime_container_windows.go - generateWindowsContainerConfig() createConfig.HostConfig.Resources = dockercontainer.Resources{ - Memory: rOpts.MemoryLimitInBytes, - CPUShares: rOpts.CpuShares, - CPUCount: rOpts.CpuCount, - CPUPercent: rOpts.CpuMaximum, + Memory: rOpts.MemoryLimitInBytes, + CPUShares: rOpts.CpuShares, + CPUCount: rOpts.CpuCount, + NanoCPUs: rOpts.CpuMaximum * int64(sysinfo.NumCPU()) * (1e9 / 10000), } } diff --git a/pkg/kubelet/kuberuntime/helpers_windows.go b/pkg/kubelet/kuberuntime/helpers_windows.go index 2009fad8d0f..dc4b212beac 100644 --- a/pkg/kubelet/kuberuntime/helpers_windows.go +++ b/pkg/kubelet/kuberuntime/helpers_windows.go @@ -30,6 +30,7 @@ const ( milliCPUToCPU = 1000 ) +// TODO: remove - may be dead code // milliCPUToShares converts milliCPU to CPU shares func milliCPUToShares(milliCPU int64, hyperv bool) int64 { var minShares int64 = minSharesProcess diff --git a/pkg/kubelet/kuberuntime/kuberuntime_container_windows.go b/pkg/kubelet/kuberuntime/kuberuntime_container_windows.go index a782802d30b..7adb387d4f2 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_container_windows.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_container_windows.go @@ -53,7 +53,6 @@ func (m *kubeGenericRuntimeManager) generateWindowsContainerConfig(container *v1 SecurityContext: &runtimeapi.WindowsContainerSecurityContext{}, } - cpuRequest := container.Resources.Requests.Cpu() cpuLimit := container.Resources.Limits.Cpu() isolatedByHyperv := kubeletapis.ShouldIsolatedByHyperV(pod.Annotations) if !cpuLimit.IsZero() { @@ -61,7 +60,35 @@ func (m *kubeGenericRuntimeManager) generateWindowsContainerConfig(container *v1 // as only 64 processors are available for execution by a given process. This causes // some oddities on systems with more than 64 processors. // Refer https://msdn.microsoft.com/en-us/library/windows/desktop/dd405503(v=vs.85).aspx. + + // Since Kubernetes doesn't have any notion of weight in the Pod/Container API, only limits/reserves, then applying CpuMaximum only + // will better follow the intent of the user. At one point CpuWeights were set, but this prevented limits from having any effect. + + // There are 3 parts to how this works: + // Part one - Windows kernel + // cpuMaximum is documented at https://docs.microsoft.com/en-us/virtualization/windowscontainers/manage-containers/resource-controls + // the range and how it relates to number of CPUs is at https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-jobobject_cpu_rate_control_information + // For process isolation, these are applied to the job object setting JOB_OBJECT_CPU_RATE_CONTROL_ENABLE, which can be set to either + // JOB_OBJECT_CPU_RATE_CONTROL_WEIGHT_BASED or JOB_OBJECT_CPU_RATE_CONTROL_HARD_CAP. This is why the settings are mutually exclusive. + // Part two - Docker (doc: https://docs.docker.com/engine/api/v1.30) + // If both CpuWeight and CpuMaximum are passed to Docker, then it sets + // JOB_OBJECT_CPU_RATE_CONTROL_ENABLE = JOB_OBJECT_CPU_RATE_CONTROL_WEIGHT_BASED ignoring CpuMaximum. + // Option a: Set HostConfig.CpuPercent. The units are whole percent of the total CPU capacity of the system, meaning the resolution + // is different based on the number of cores. + // Option b: Set HostConfig.NanoCpus integer - CPU quota in units of 10e-9 CPUs. Moby scales this to the Windows job object + // resolution of 1-10000, so it's higher resolution than option a. + // src: https://github.com/moby/moby/blob/10866714412aea1bb587d1ad14b2ce1ba4cf4308/daemon/oci_windows.go#L426 + // Part three - CRI & ContainerD's implementation + // The kubelet sets these directly on CGroups in Linux, but needs to pass them across CRI on Windows. + // There is an existing cpu_maximum field, with a range of percent * 100, so 1-10000. This is different from Docker, but consistent with OCI + // https://github.com/kubernetes/kubernetes/blob/56d1c3b96d0a544130a82caad33dd57629b8a7f8/staging/src/k8s.io/cri-api/pkg/apis/runtime/v1alpha2/api.proto#L681-L682 + // https://github.com/opencontainers/runtime-spec/blob/ad53dcdc39f1f7f7472b10aa0a45648fe4865496/config-windows.md#cpu + // If both CpuWeight and CpuMaximum are set - ContainerD catches this invalid case and returns an error instead. + cpuMaximum := 10000 * cpuLimit.MilliValue() / int64(sysinfo.NumCPU()) / 1000 + + // TODO: This should be reviewed or removed once Hyper-V support is implemented with CRI-ContainerD + // in a future release. cpuCount may or may not be required if cpuMaximum is set. if isolatedByHyperv { cpuCount := int64(cpuLimit.MilliValue()+999) / 1000 wc.Resources.CpuCount = cpuCount @@ -80,31 +107,15 @@ func (m *kubeGenericRuntimeManager) generateWindowsContainerConfig(container *v1 wc.Resources.CpuMaximum = cpuMaximum } - cpuShares := milliCPUToShares(cpuLimit.MilliValue(), isolatedByHyperv) - if cpuShares == 0 { - cpuShares = milliCPUToShares(cpuRequest.MilliValue(), isolatedByHyperv) - } - wc.Resources.CpuShares = cpuShares - if !isolatedByHyperv { // The processor resource controls are mutually exclusive on // Windows Server Containers, the order of precedence is - // CPUCount first, then CPUShares, and CPUMaximum last. + // CPUCount first, then CPUMaximum. if wc.Resources.CpuCount > 0 { - if wc.Resources.CpuShares > 0 { - wc.Resources.CpuShares = 0 - klog.Warningf("Mutually exclusive options: CPUCount priority > CPUShares priority on Windows Server Containers. CPUShares should be ignored") - } if wc.Resources.CpuMaximum > 0 { wc.Resources.CpuMaximum = 0 klog.Warningf("Mutually exclusive options: CPUCount priority > CPUMaximum priority on Windows Server Containers. CPUMaximum should be ignored") } - } else if wc.Resources.CpuShares > 0 { - if wc.Resources.CpuMaximum > 0 { - wc.Resources.CpuMaximum = 0 - klog.Warningf("Mutually exclusive options: CPUShares priority > CPUMaximum priority on Windows Server Containers. CPUMaximum should be ignored") - } - } } From 63ff616aa8da7c50d638b20cacefaba8fb6b67f4 Mon Sep 17 00:00:00 2001 From: Patrick Lang Date: Wed, 11 Dec 2019 15:28:26 -0800 Subject: [PATCH 2/3] Adding Windows CPU limit tests --- pkg/kubelet/kuberuntime/BUILD | 1 - .../kuberuntime/helpers_unsupported.go | 2 +- pkg/kubelet/kuberuntime/helpers_windows.go | 56 ------- test/e2e/windows/BUILD | 1 + test/e2e/windows/cpu_limits.go | 138 ++++++++++++++++++ 5 files changed, 140 insertions(+), 58 deletions(-) delete mode 100644 pkg/kubelet/kuberuntime/helpers_windows.go create mode 100644 test/e2e/windows/cpu_limits.go diff --git a/pkg/kubelet/kuberuntime/BUILD b/pkg/kubelet/kuberuntime/BUILD index a79952b541e..3b3d3feeff7 100644 --- a/pkg/kubelet/kuberuntime/BUILD +++ b/pkg/kubelet/kuberuntime/BUILD @@ -14,7 +14,6 @@ go_library( "helpers.go", "helpers_linux.go", "helpers_unsupported.go", - "helpers_windows.go", "instrumented_services.go", "kuberuntime_container.go", "kuberuntime_container_linux.go", diff --git a/pkg/kubelet/kuberuntime/helpers_unsupported.go b/pkg/kubelet/kuberuntime/helpers_unsupported.go index 1e99c4f9317..cc1e88a5bef 100644 --- a/pkg/kubelet/kuberuntime/helpers_unsupported.go +++ b/pkg/kubelet/kuberuntime/helpers_unsupported.go @@ -1,4 +1,4 @@ -// +build !linux,!windows +// +build !linux /* Copyright 2018 The Kubernetes Authors. diff --git a/pkg/kubelet/kuberuntime/helpers_windows.go b/pkg/kubelet/kuberuntime/helpers_windows.go deleted file mode 100644 index dc4b212beac..00000000000 --- a/pkg/kubelet/kuberuntime/helpers_windows.go +++ /dev/null @@ -1,56 +0,0 @@ -// +build windows - -/* -Copyright 2018 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package kuberuntime - -import ( - "github.com/docker/docker/pkg/sysinfo" -) - -const ( - // Taken from https://docs.microsoft.com/en-us/virtualization/windowscontainers/manage-containers/resource-controls - minSharesProcess = 5000 - minSharesHyperV = 10 - maxShares = 10000 - milliCPUToCPU = 1000 -) - -// TODO: remove - may be dead code -// milliCPUToShares converts milliCPU to CPU shares -func milliCPUToShares(milliCPU int64, hyperv bool) int64 { - var minShares int64 = minSharesProcess - if hyperv { - minShares = minSharesHyperV - } - - if milliCPU == 0 { - // Return here to really match kernel default for zero milliCPU. - return minShares - } - - // Conceptually (milliCPU / milliCPUToCPU) * sharesPerCPU, but factored to improve rounding. - totalCPU := sysinfo.NumCPU() - shares := (milliCPU * (maxShares - minShares)) / int64(totalCPU) / milliCPUToCPU - if shares < minShares { - return minShares - } - if shares > maxShares { - return maxShares - } - return shares -} diff --git a/test/e2e/windows/BUILD b/test/e2e/windows/BUILD index 4bc0683042e..9f67136bc2b 100644 --- a/test/e2e/windows/BUILD +++ b/test/e2e/windows/BUILD @@ -5,6 +5,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library") go_library( name = "go_default_library", srcs = [ + "cpu_limits.go", "density.go", "dns.go", "framework.go", diff --git a/test/e2e/windows/cpu_limits.go b/test/e2e/windows/cpu_limits.go new file mode 100644 index 00000000000..79efefbae63 --- /dev/null +++ b/test/e2e/windows/cpu_limits.go @@ -0,0 +1,138 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package windows + +import ( + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/uuid" + "k8s.io/kubernetes/test/e2e/framework" + e2ekubelet "k8s.io/kubernetes/test/e2e/framework/kubelet" + imageutils "k8s.io/kubernetes/test/utils/image" + "time" + + "github.com/onsi/ginkgo" +) + +var _ = SIGDescribe("[Feature:Windows] Cpu Resources", func() { + f := framework.NewDefaultFramework("cpu-resources-test-windows") + + // The Windows 'BusyBox' image is PowerShell plus a collection of scripts and utilities to mimic common busybox commands + powershellImage := imageutils.GetConfig(imageutils.BusyBox) + + ginkgo.Context("Container limits", func() { + ginkgo.It("should not be exceeded after waiting 2 minutes", func() { + ginkgo.By("Creating one pod with limit set to '0.5'") + podsDecimal := newCPUBurnPods(1, powershellImage, "0.5", "1Gi") + f.PodClient().CreateBatch(podsDecimal) + ginkgo.By("Creating one pod with limit set to '500m'") + podsMilli := newCPUBurnPods(1, powershellImage, "500m", "1Gi") + f.PodClient().CreateBatch(podsMilli) + ginkgo.By("Waiting 2 minutes") + time.Sleep(2 * time.Minute) + ginkgo.By("Ensuring pods are still running") + var allPods [](*v1.Pod) + for _, p := range podsDecimal { + pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(p.Name, + metav1.GetOptions{}) + framework.ExpectNoError(err, "Error retrieving pod") + framework.ExpectEqual(pod.Status.Phase, v1.PodRunning) + allPods = append(allPods, pod) + } + for _, p := range podsMilli { + pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(p.Name, + metav1.GetOptions{}) + framework.ExpectNoError(err, "Error retrieving pod") + framework.ExpectEqual(pod.Status.Phase, v1.PodRunning) + allPods = append(allPods, pod) + } + ginkgo.By("Ensuring cpu doesn't exceed limit by >5%") + for _, p := range allPods { + ginkgo.By("Gathering node summary stats") + nodeStats, err := e2ekubelet.GetStatsSummary(f.ClientSet, p.Spec.NodeName) + framework.ExpectNoError(err, "Error grabbing node summary stats") + found := false + cpuUsage := float64(0) + for _, pod := range nodeStats.Pods { + if pod.PodRef.Name != p.Name || pod.PodRef.Namespace != p.Namespace { + continue + } + cpuUsage = float64(*pod.CPU.UsageNanoCores) * 1e-9 + found = true + break + } + framework.ExpectEqual(found, true, "Found pod in stats summary") + framework.Logf("Pod %s usage: %v", p.Name, cpuUsage) + framework.ExpectEqual(cpuUsage > 0, true, "Pods reported usage should be > 0") + framework.ExpectEqual((.5*1.05) > cpuUsage, true, "Pods reported usage should not exceed limit by >5%") + } + }) + }) +}) + +// newCPUBurnPods creates a list of pods (specification) with a workload that will consume all available CPU resources up to container limit +func newCPUBurnPods(numPods int, image imageutils.Config, cpuLimit string, memoryLimit string) []*v1.Pod { + var pods []*v1.Pod + + memLimitQuantity, err := resource.ParseQuantity(memoryLimit) + framework.ExpectNoError(err) + + cpuLimitQuantity, err := resource.ParseQuantity(cpuLimit) + framework.ExpectNoError(err) + + for i := 0; i < numPods; i++ { + + podName := "cpulimittest-" + string(uuid.NewUUID()) + pod := v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: podName, + Labels: map[string]string{ + "name": podName, + "testapp": "cpuburn", + }, + }, + Spec: v1.PodSpec{ + // Restart policy is always (default). + Containers: []v1.Container{ + { + Image: image.GetE2EImage(), + Name: podName, + Resources: v1.ResourceRequirements{ + Limits: v1.ResourceList{ + v1.ResourceMemory: memLimitQuantity, + v1.ResourceCPU: cpuLimitQuantity, + }, + }, + Command: []string{ + "powershell.exe", + "-Command", + "foreach ($loopnumber in 1..8) { Start-Job -ScriptBlock { $result = 1; foreach($mm in 1..2147483647){$res1=1;foreach($num in 1..2147483647){$res1=$mm*$num*1340371};$res1} } } ; get-job | wait-job", + }, + }, + }, + NodeSelector: map[string]string{ + "beta.kubernetes.io/os": "windows", + }, + }, + } + + pods = append(pods, &pod) + } + + return pods +} From 886214f48c6141fa220a248149e51837c8fa2fa5 Mon Sep 17 00:00:00 2001 From: Patrick Lang Date: Mon, 24 Feb 2020 19:53:25 +0000 Subject: [PATCH 3/3] Fix recent context change after rebase --- test/e2e/windows/cpu_limits.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/test/e2e/windows/cpu_limits.go b/test/e2e/windows/cpu_limits.go index 79efefbae63..c669c69f55e 100644 --- a/test/e2e/windows/cpu_limits.go +++ b/test/e2e/windows/cpu_limits.go @@ -17,6 +17,8 @@ limitations under the License. package windows import ( + "context" + v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -48,14 +50,18 @@ var _ = SIGDescribe("[Feature:Windows] Cpu Resources", func() { ginkgo.By("Ensuring pods are still running") var allPods [](*v1.Pod) for _, p := range podsDecimal { - pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(p.Name, + pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get( + context.TODO(), + p.Name, metav1.GetOptions{}) framework.ExpectNoError(err, "Error retrieving pod") framework.ExpectEqual(pod.Status.Phase, v1.PodRunning) allPods = append(allPods, pod) } for _, p := range podsMilli { - pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(p.Name, + pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get( + context.TODO(), + p.Name, metav1.GetOptions{}) framework.ExpectNoError(err, "Error retrieving pod") framework.ExpectEqual(pod.Status.Phase, v1.PodRunning)