Merge pull request #86101 from PatrickLang/fix-cpumaximum

Fix cpu resource limit on Windows
This commit is contained in:
Kubernetes Prow Robot 2020-02-26 00:20:26 -08:00 committed by GitHub
commit 16a7650e2b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 182 additions and 79 deletions

View File

@ -78,6 +78,7 @@ go_library(
"//pkg/kubelet/apis:go_default_library",
"//pkg/kubelet/winstats:go_default_library",
"//vendor/github.com/Microsoft/hcsshim:go_default_library",
"//vendor/github.com/docker/docker/pkg/sysinfo:go_default_library",
"//vendor/golang.org/x/sys/windows/registry:go_default_library",
],
"//conditions:default": [],

View File

@ -25,6 +25,7 @@ import (
dockertypes "github.com/docker/docker/api/types"
dockercontainer "github.com/docker/docker/api/types/container"
dockerfilters "github.com/docker/docker/api/types/filters"
"github.com/docker/docker/pkg/sysinfo"
"k8s.io/klog"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
@ -69,11 +70,12 @@ func (ds *dockerService) updateCreateConfig(
if wc := config.GetWindows(); wc != nil {
rOpts := wc.GetResources()
if rOpts != nil {
// Precedence and units for these are described at length in kuberuntime_container_windows.go - generateWindowsContainerConfig()
createConfig.HostConfig.Resources = dockercontainer.Resources{
Memory: rOpts.MemoryLimitInBytes,
CPUShares: rOpts.CpuShares,
CPUCount: rOpts.CpuCount,
CPUPercent: rOpts.CpuMaximum,
Memory: rOpts.MemoryLimitInBytes,
CPUShares: rOpts.CpuShares,
CPUCount: rOpts.CpuCount,
NanoCPUs: rOpts.CpuMaximum * int64(sysinfo.NumCPU()) * (1e9 / 10000),
}
}

View File

@ -14,7 +14,6 @@ go_library(
"helpers.go",
"helpers_linux.go",
"helpers_unsupported.go",
"helpers_windows.go",
"instrumented_services.go",
"kuberuntime_container.go",
"kuberuntime_container_linux.go",

View File

@ -1,4 +1,4 @@
// +build !linux,!windows
// +build !linux
/*
Copyright 2018 The Kubernetes Authors.

View File

@ -1,55 +0,0 @@
// +build windows
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package kuberuntime
import (
"github.com/docker/docker/pkg/sysinfo"
)
const (
// Taken from https://docs.microsoft.com/en-us/virtualization/windowscontainers/manage-containers/resource-controls
minSharesProcess = 5000
minSharesHyperV = 10
maxShares = 10000
milliCPUToCPU = 1000
)
// milliCPUToShares converts milliCPU to CPU shares
func milliCPUToShares(milliCPU int64, hyperv bool) int64 {
var minShares int64 = minSharesProcess
if hyperv {
minShares = minSharesHyperV
}
if milliCPU == 0 {
// Return here to really match kernel default for zero milliCPU.
return minShares
}
// Conceptually (milliCPU / milliCPUToCPU) * sharesPerCPU, but factored to improve rounding.
totalCPU := sysinfo.NumCPU()
shares := (milliCPU * (maxShares - minShares)) / int64(totalCPU) / milliCPUToCPU
if shares < minShares {
return minShares
}
if shares > maxShares {
return maxShares
}
return shares
}

View File

@ -53,7 +53,6 @@ func (m *kubeGenericRuntimeManager) generateWindowsContainerConfig(container *v1
SecurityContext: &runtimeapi.WindowsContainerSecurityContext{},
}
cpuRequest := container.Resources.Requests.Cpu()
cpuLimit := container.Resources.Limits.Cpu()
isolatedByHyperv := kubeletapis.ShouldIsolatedByHyperV(pod.Annotations)
if !cpuLimit.IsZero() {
@ -61,7 +60,35 @@ func (m *kubeGenericRuntimeManager) generateWindowsContainerConfig(container *v1
// as only 64 processors are available for execution by a given process. This causes
// some oddities on systems with more than 64 processors.
// Refer https://msdn.microsoft.com/en-us/library/windows/desktop/dd405503(v=vs.85).aspx.
// Since Kubernetes doesn't have any notion of weight in the Pod/Container API, only limits/reserves, then applying CpuMaximum only
// will better follow the intent of the user. At one point CpuWeights were set, but this prevented limits from having any effect.
// There are 3 parts to how this works:
// Part one - Windows kernel
// cpuMaximum is documented at https://docs.microsoft.com/en-us/virtualization/windowscontainers/manage-containers/resource-controls
// the range and how it relates to number of CPUs is at https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-jobobject_cpu_rate_control_information
// For process isolation, these are applied to the job object setting JOB_OBJECT_CPU_RATE_CONTROL_ENABLE, which can be set to either
// JOB_OBJECT_CPU_RATE_CONTROL_WEIGHT_BASED or JOB_OBJECT_CPU_RATE_CONTROL_HARD_CAP. This is why the settings are mutually exclusive.
// Part two - Docker (doc: https://docs.docker.com/engine/api/v1.30)
// If both CpuWeight and CpuMaximum are passed to Docker, then it sets
// JOB_OBJECT_CPU_RATE_CONTROL_ENABLE = JOB_OBJECT_CPU_RATE_CONTROL_WEIGHT_BASED ignoring CpuMaximum.
// Option a: Set HostConfig.CpuPercent. The units are whole percent of the total CPU capacity of the system, meaning the resolution
// is different based on the number of cores.
// Option b: Set HostConfig.NanoCpus integer <int64> - CPU quota in units of 10e-9 CPUs. Moby scales this to the Windows job object
// resolution of 1-10000, so it's higher resolution than option a.
// src: https://github.com/moby/moby/blob/10866714412aea1bb587d1ad14b2ce1ba4cf4308/daemon/oci_windows.go#L426
// Part three - CRI & ContainerD's implementation
// The kubelet sets these directly on CGroups in Linux, but needs to pass them across CRI on Windows.
// There is an existing cpu_maximum field, with a range of percent * 100, so 1-10000. This is different from Docker, but consistent with OCI
// https://github.com/kubernetes/kubernetes/blob/56d1c3b96d0a544130a82caad33dd57629b8a7f8/staging/src/k8s.io/cri-api/pkg/apis/runtime/v1alpha2/api.proto#L681-L682
// https://github.com/opencontainers/runtime-spec/blob/ad53dcdc39f1f7f7472b10aa0a45648fe4865496/config-windows.md#cpu
// If both CpuWeight and CpuMaximum are set - ContainerD catches this invalid case and returns an error instead.
cpuMaximum := 10000 * cpuLimit.MilliValue() / int64(sysinfo.NumCPU()) / 1000
// TODO: This should be reviewed or removed once Hyper-V support is implemented with CRI-ContainerD
// in a future release. cpuCount may or may not be required if cpuMaximum is set.
if isolatedByHyperv {
cpuCount := int64(cpuLimit.MilliValue()+999) / 1000
wc.Resources.CpuCount = cpuCount
@ -80,31 +107,15 @@ func (m *kubeGenericRuntimeManager) generateWindowsContainerConfig(container *v1
wc.Resources.CpuMaximum = cpuMaximum
}
cpuShares := milliCPUToShares(cpuLimit.MilliValue(), isolatedByHyperv)
if cpuShares == 0 {
cpuShares = milliCPUToShares(cpuRequest.MilliValue(), isolatedByHyperv)
}
wc.Resources.CpuShares = cpuShares
if !isolatedByHyperv {
// The processor resource controls are mutually exclusive on
// Windows Server Containers, the order of precedence is
// CPUCount first, then CPUShares, and CPUMaximum last.
// CPUCount first, then CPUMaximum.
if wc.Resources.CpuCount > 0 {
if wc.Resources.CpuShares > 0 {
wc.Resources.CpuShares = 0
klog.Warningf("Mutually exclusive options: CPUCount priority > CPUShares priority on Windows Server Containers. CPUShares should be ignored")
}
if wc.Resources.CpuMaximum > 0 {
wc.Resources.CpuMaximum = 0
klog.Warningf("Mutually exclusive options: CPUCount priority > CPUMaximum priority on Windows Server Containers. CPUMaximum should be ignored")
}
} else if wc.Resources.CpuShares > 0 {
if wc.Resources.CpuMaximum > 0 {
wc.Resources.CpuMaximum = 0
klog.Warningf("Mutually exclusive options: CPUShares priority > CPUMaximum priority on Windows Server Containers. CPUMaximum should be ignored")
}
}
}

View File

@ -5,6 +5,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "go_default_library",
srcs = [
"cpu_limits.go",
"density.go",
"dns.go",
"framework.go",

View File

@ -0,0 +1,144 @@
/*
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package windows
import (
"context"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/uuid"
"k8s.io/kubernetes/test/e2e/framework"
e2ekubelet "k8s.io/kubernetes/test/e2e/framework/kubelet"
imageutils "k8s.io/kubernetes/test/utils/image"
"time"
"github.com/onsi/ginkgo"
)
var _ = SIGDescribe("[Feature:Windows] Cpu Resources", func() {
f := framework.NewDefaultFramework("cpu-resources-test-windows")
// The Windows 'BusyBox' image is PowerShell plus a collection of scripts and utilities to mimic common busybox commands
powershellImage := imageutils.GetConfig(imageutils.BusyBox)
ginkgo.Context("Container limits", func() {
ginkgo.It("should not be exceeded after waiting 2 minutes", func() {
ginkgo.By("Creating one pod with limit set to '0.5'")
podsDecimal := newCPUBurnPods(1, powershellImage, "0.5", "1Gi")
f.PodClient().CreateBatch(podsDecimal)
ginkgo.By("Creating one pod with limit set to '500m'")
podsMilli := newCPUBurnPods(1, powershellImage, "500m", "1Gi")
f.PodClient().CreateBatch(podsMilli)
ginkgo.By("Waiting 2 minutes")
time.Sleep(2 * time.Minute)
ginkgo.By("Ensuring pods are still running")
var allPods [](*v1.Pod)
for _, p := range podsDecimal {
pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(
context.TODO(),
p.Name,
metav1.GetOptions{})
framework.ExpectNoError(err, "Error retrieving pod")
framework.ExpectEqual(pod.Status.Phase, v1.PodRunning)
allPods = append(allPods, pod)
}
for _, p := range podsMilli {
pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(
context.TODO(),
p.Name,
metav1.GetOptions{})
framework.ExpectNoError(err, "Error retrieving pod")
framework.ExpectEqual(pod.Status.Phase, v1.PodRunning)
allPods = append(allPods, pod)
}
ginkgo.By("Ensuring cpu doesn't exceed limit by >5%")
for _, p := range allPods {
ginkgo.By("Gathering node summary stats")
nodeStats, err := e2ekubelet.GetStatsSummary(f.ClientSet, p.Spec.NodeName)
framework.ExpectNoError(err, "Error grabbing node summary stats")
found := false
cpuUsage := float64(0)
for _, pod := range nodeStats.Pods {
if pod.PodRef.Name != p.Name || pod.PodRef.Namespace != p.Namespace {
continue
}
cpuUsage = float64(*pod.CPU.UsageNanoCores) * 1e-9
found = true
break
}
framework.ExpectEqual(found, true, "Found pod in stats summary")
framework.Logf("Pod %s usage: %v", p.Name, cpuUsage)
framework.ExpectEqual(cpuUsage > 0, true, "Pods reported usage should be > 0")
framework.ExpectEqual((.5*1.05) > cpuUsage, true, "Pods reported usage should not exceed limit by >5%")
}
})
})
})
// newCPUBurnPods creates a list of pods (specification) with a workload that will consume all available CPU resources up to container limit
func newCPUBurnPods(numPods int, image imageutils.Config, cpuLimit string, memoryLimit string) []*v1.Pod {
var pods []*v1.Pod
memLimitQuantity, err := resource.ParseQuantity(memoryLimit)
framework.ExpectNoError(err)
cpuLimitQuantity, err := resource.ParseQuantity(cpuLimit)
framework.ExpectNoError(err)
for i := 0; i < numPods; i++ {
podName := "cpulimittest-" + string(uuid.NewUUID())
pod := v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: podName,
Labels: map[string]string{
"name": podName,
"testapp": "cpuburn",
},
},
Spec: v1.PodSpec{
// Restart policy is always (default).
Containers: []v1.Container{
{
Image: image.GetE2EImage(),
Name: podName,
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceMemory: memLimitQuantity,
v1.ResourceCPU: cpuLimitQuantity,
},
},
Command: []string{
"powershell.exe",
"-Command",
"foreach ($loopnumber in 1..8) { Start-Job -ScriptBlock { $result = 1; foreach($mm in 1..2147483647){$res1=1;foreach($num in 1..2147483647){$res1=$mm*$num*1340371};$res1} } } ; get-job | wait-job",
},
},
},
NodeSelector: map[string]string{
"beta.kubernetes.io/os": "windows",
},
},
}
pods = append(pods, &pod)
}
return pods
}