mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 19:56:01 +00:00
Merge pull request #86101 from PatrickLang/fix-cpumaximum
Fix cpu resource limit on Windows
This commit is contained in:
commit
16a7650e2b
@ -78,6 +78,7 @@ go_library(
|
||||
"//pkg/kubelet/apis:go_default_library",
|
||||
"//pkg/kubelet/winstats:go_default_library",
|
||||
"//vendor/github.com/Microsoft/hcsshim:go_default_library",
|
||||
"//vendor/github.com/docker/docker/pkg/sysinfo:go_default_library",
|
||||
"//vendor/golang.org/x/sys/windows/registry:go_default_library",
|
||||
],
|
||||
"//conditions:default": [],
|
||||
|
@ -25,6 +25,7 @@ import (
|
||||
dockertypes "github.com/docker/docker/api/types"
|
||||
dockercontainer "github.com/docker/docker/api/types/container"
|
||||
dockerfilters "github.com/docker/docker/api/types/filters"
|
||||
"github.com/docker/docker/pkg/sysinfo"
|
||||
"k8s.io/klog"
|
||||
|
||||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
|
||||
@ -69,11 +70,12 @@ func (ds *dockerService) updateCreateConfig(
|
||||
if wc := config.GetWindows(); wc != nil {
|
||||
rOpts := wc.GetResources()
|
||||
if rOpts != nil {
|
||||
// Precedence and units for these are described at length in kuberuntime_container_windows.go - generateWindowsContainerConfig()
|
||||
createConfig.HostConfig.Resources = dockercontainer.Resources{
|
||||
Memory: rOpts.MemoryLimitInBytes,
|
||||
CPUShares: rOpts.CpuShares,
|
||||
CPUCount: rOpts.CpuCount,
|
||||
CPUPercent: rOpts.CpuMaximum,
|
||||
Memory: rOpts.MemoryLimitInBytes,
|
||||
CPUShares: rOpts.CpuShares,
|
||||
CPUCount: rOpts.CpuCount,
|
||||
NanoCPUs: rOpts.CpuMaximum * int64(sysinfo.NumCPU()) * (1e9 / 10000),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -14,7 +14,6 @@ go_library(
|
||||
"helpers.go",
|
||||
"helpers_linux.go",
|
||||
"helpers_unsupported.go",
|
||||
"helpers_windows.go",
|
||||
"instrumented_services.go",
|
||||
"kuberuntime_container.go",
|
||||
"kuberuntime_container_linux.go",
|
||||
|
@ -1,4 +1,4 @@
|
||||
// +build !linux,!windows
|
||||
// +build !linux
|
||||
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
@ -1,55 +0,0 @@
|
||||
// +build windows
|
||||
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package kuberuntime
|
||||
|
||||
import (
|
||||
"github.com/docker/docker/pkg/sysinfo"
|
||||
)
|
||||
|
||||
const (
|
||||
// Taken from https://docs.microsoft.com/en-us/virtualization/windowscontainers/manage-containers/resource-controls
|
||||
minSharesProcess = 5000
|
||||
minSharesHyperV = 10
|
||||
maxShares = 10000
|
||||
milliCPUToCPU = 1000
|
||||
)
|
||||
|
||||
// milliCPUToShares converts milliCPU to CPU shares
|
||||
func milliCPUToShares(milliCPU int64, hyperv bool) int64 {
|
||||
var minShares int64 = minSharesProcess
|
||||
if hyperv {
|
||||
minShares = minSharesHyperV
|
||||
}
|
||||
|
||||
if milliCPU == 0 {
|
||||
// Return here to really match kernel default for zero milliCPU.
|
||||
return minShares
|
||||
}
|
||||
|
||||
// Conceptually (milliCPU / milliCPUToCPU) * sharesPerCPU, but factored to improve rounding.
|
||||
totalCPU := sysinfo.NumCPU()
|
||||
shares := (milliCPU * (maxShares - minShares)) / int64(totalCPU) / milliCPUToCPU
|
||||
if shares < minShares {
|
||||
return minShares
|
||||
}
|
||||
if shares > maxShares {
|
||||
return maxShares
|
||||
}
|
||||
return shares
|
||||
}
|
@ -53,7 +53,6 @@ func (m *kubeGenericRuntimeManager) generateWindowsContainerConfig(container *v1
|
||||
SecurityContext: &runtimeapi.WindowsContainerSecurityContext{},
|
||||
}
|
||||
|
||||
cpuRequest := container.Resources.Requests.Cpu()
|
||||
cpuLimit := container.Resources.Limits.Cpu()
|
||||
isolatedByHyperv := kubeletapis.ShouldIsolatedByHyperV(pod.Annotations)
|
||||
if !cpuLimit.IsZero() {
|
||||
@ -61,7 +60,35 @@ func (m *kubeGenericRuntimeManager) generateWindowsContainerConfig(container *v1
|
||||
// as only 64 processors are available for execution by a given process. This causes
|
||||
// some oddities on systems with more than 64 processors.
|
||||
// Refer https://msdn.microsoft.com/en-us/library/windows/desktop/dd405503(v=vs.85).aspx.
|
||||
|
||||
// Since Kubernetes doesn't have any notion of weight in the Pod/Container API, only limits/reserves, then applying CpuMaximum only
|
||||
// will better follow the intent of the user. At one point CpuWeights were set, but this prevented limits from having any effect.
|
||||
|
||||
// There are 3 parts to how this works:
|
||||
// Part one - Windows kernel
|
||||
// cpuMaximum is documented at https://docs.microsoft.com/en-us/virtualization/windowscontainers/manage-containers/resource-controls
|
||||
// the range and how it relates to number of CPUs is at https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-jobobject_cpu_rate_control_information
|
||||
// For process isolation, these are applied to the job object setting JOB_OBJECT_CPU_RATE_CONTROL_ENABLE, which can be set to either
|
||||
// JOB_OBJECT_CPU_RATE_CONTROL_WEIGHT_BASED or JOB_OBJECT_CPU_RATE_CONTROL_HARD_CAP. This is why the settings are mutually exclusive.
|
||||
// Part two - Docker (doc: https://docs.docker.com/engine/api/v1.30)
|
||||
// If both CpuWeight and CpuMaximum are passed to Docker, then it sets
|
||||
// JOB_OBJECT_CPU_RATE_CONTROL_ENABLE = JOB_OBJECT_CPU_RATE_CONTROL_WEIGHT_BASED ignoring CpuMaximum.
|
||||
// Option a: Set HostConfig.CpuPercent. The units are whole percent of the total CPU capacity of the system, meaning the resolution
|
||||
// is different based on the number of cores.
|
||||
// Option b: Set HostConfig.NanoCpus integer <int64> - CPU quota in units of 10e-9 CPUs. Moby scales this to the Windows job object
|
||||
// resolution of 1-10000, so it's higher resolution than option a.
|
||||
// src: https://github.com/moby/moby/blob/10866714412aea1bb587d1ad14b2ce1ba4cf4308/daemon/oci_windows.go#L426
|
||||
// Part three - CRI & ContainerD's implementation
|
||||
// The kubelet sets these directly on CGroups in Linux, but needs to pass them across CRI on Windows.
|
||||
// There is an existing cpu_maximum field, with a range of percent * 100, so 1-10000. This is different from Docker, but consistent with OCI
|
||||
// https://github.com/kubernetes/kubernetes/blob/56d1c3b96d0a544130a82caad33dd57629b8a7f8/staging/src/k8s.io/cri-api/pkg/apis/runtime/v1alpha2/api.proto#L681-L682
|
||||
// https://github.com/opencontainers/runtime-spec/blob/ad53dcdc39f1f7f7472b10aa0a45648fe4865496/config-windows.md#cpu
|
||||
// If both CpuWeight and CpuMaximum are set - ContainerD catches this invalid case and returns an error instead.
|
||||
|
||||
cpuMaximum := 10000 * cpuLimit.MilliValue() / int64(sysinfo.NumCPU()) / 1000
|
||||
|
||||
// TODO: This should be reviewed or removed once Hyper-V support is implemented with CRI-ContainerD
|
||||
// in a future release. cpuCount may or may not be required if cpuMaximum is set.
|
||||
if isolatedByHyperv {
|
||||
cpuCount := int64(cpuLimit.MilliValue()+999) / 1000
|
||||
wc.Resources.CpuCount = cpuCount
|
||||
@ -80,31 +107,15 @@ func (m *kubeGenericRuntimeManager) generateWindowsContainerConfig(container *v1
|
||||
wc.Resources.CpuMaximum = cpuMaximum
|
||||
}
|
||||
|
||||
cpuShares := milliCPUToShares(cpuLimit.MilliValue(), isolatedByHyperv)
|
||||
if cpuShares == 0 {
|
||||
cpuShares = milliCPUToShares(cpuRequest.MilliValue(), isolatedByHyperv)
|
||||
}
|
||||
wc.Resources.CpuShares = cpuShares
|
||||
|
||||
if !isolatedByHyperv {
|
||||
// The processor resource controls are mutually exclusive on
|
||||
// Windows Server Containers, the order of precedence is
|
||||
// CPUCount first, then CPUShares, and CPUMaximum last.
|
||||
// CPUCount first, then CPUMaximum.
|
||||
if wc.Resources.CpuCount > 0 {
|
||||
if wc.Resources.CpuShares > 0 {
|
||||
wc.Resources.CpuShares = 0
|
||||
klog.Warningf("Mutually exclusive options: CPUCount priority > CPUShares priority on Windows Server Containers. CPUShares should be ignored")
|
||||
}
|
||||
if wc.Resources.CpuMaximum > 0 {
|
||||
wc.Resources.CpuMaximum = 0
|
||||
klog.Warningf("Mutually exclusive options: CPUCount priority > CPUMaximum priority on Windows Server Containers. CPUMaximum should be ignored")
|
||||
}
|
||||
} else if wc.Resources.CpuShares > 0 {
|
||||
if wc.Resources.CpuMaximum > 0 {
|
||||
wc.Resources.CpuMaximum = 0
|
||||
klog.Warningf("Mutually exclusive options: CPUShares priority > CPUMaximum priority on Windows Server Containers. CPUMaximum should be ignored")
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = [
|
||||
"cpu_limits.go",
|
||||
"density.go",
|
||||
"dns.go",
|
||||
"framework.go",
|
||||
|
144
test/e2e/windows/cpu_limits.go
Normal file
144
test/e2e/windows/cpu_limits.go
Normal file
@ -0,0 +1,144 @@
|
||||
/*
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package windows
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/uuid"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
e2ekubelet "k8s.io/kubernetes/test/e2e/framework/kubelet"
|
||||
imageutils "k8s.io/kubernetes/test/utils/image"
|
||||
"time"
|
||||
|
||||
"github.com/onsi/ginkgo"
|
||||
)
|
||||
|
||||
var _ = SIGDescribe("[Feature:Windows] Cpu Resources", func() {
|
||||
f := framework.NewDefaultFramework("cpu-resources-test-windows")
|
||||
|
||||
// The Windows 'BusyBox' image is PowerShell plus a collection of scripts and utilities to mimic common busybox commands
|
||||
powershellImage := imageutils.GetConfig(imageutils.BusyBox)
|
||||
|
||||
ginkgo.Context("Container limits", func() {
|
||||
ginkgo.It("should not be exceeded after waiting 2 minutes", func() {
|
||||
ginkgo.By("Creating one pod with limit set to '0.5'")
|
||||
podsDecimal := newCPUBurnPods(1, powershellImage, "0.5", "1Gi")
|
||||
f.PodClient().CreateBatch(podsDecimal)
|
||||
ginkgo.By("Creating one pod with limit set to '500m'")
|
||||
podsMilli := newCPUBurnPods(1, powershellImage, "500m", "1Gi")
|
||||
f.PodClient().CreateBatch(podsMilli)
|
||||
ginkgo.By("Waiting 2 minutes")
|
||||
time.Sleep(2 * time.Minute)
|
||||
ginkgo.By("Ensuring pods are still running")
|
||||
var allPods [](*v1.Pod)
|
||||
for _, p := range podsDecimal {
|
||||
pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(
|
||||
context.TODO(),
|
||||
p.Name,
|
||||
metav1.GetOptions{})
|
||||
framework.ExpectNoError(err, "Error retrieving pod")
|
||||
framework.ExpectEqual(pod.Status.Phase, v1.PodRunning)
|
||||
allPods = append(allPods, pod)
|
||||
}
|
||||
for _, p := range podsMilli {
|
||||
pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(
|
||||
context.TODO(),
|
||||
p.Name,
|
||||
metav1.GetOptions{})
|
||||
framework.ExpectNoError(err, "Error retrieving pod")
|
||||
framework.ExpectEqual(pod.Status.Phase, v1.PodRunning)
|
||||
allPods = append(allPods, pod)
|
||||
}
|
||||
ginkgo.By("Ensuring cpu doesn't exceed limit by >5%")
|
||||
for _, p := range allPods {
|
||||
ginkgo.By("Gathering node summary stats")
|
||||
nodeStats, err := e2ekubelet.GetStatsSummary(f.ClientSet, p.Spec.NodeName)
|
||||
framework.ExpectNoError(err, "Error grabbing node summary stats")
|
||||
found := false
|
||||
cpuUsage := float64(0)
|
||||
for _, pod := range nodeStats.Pods {
|
||||
if pod.PodRef.Name != p.Name || pod.PodRef.Namespace != p.Namespace {
|
||||
continue
|
||||
}
|
||||
cpuUsage = float64(*pod.CPU.UsageNanoCores) * 1e-9
|
||||
found = true
|
||||
break
|
||||
}
|
||||
framework.ExpectEqual(found, true, "Found pod in stats summary")
|
||||
framework.Logf("Pod %s usage: %v", p.Name, cpuUsage)
|
||||
framework.ExpectEqual(cpuUsage > 0, true, "Pods reported usage should be > 0")
|
||||
framework.ExpectEqual((.5*1.05) > cpuUsage, true, "Pods reported usage should not exceed limit by >5%")
|
||||
}
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
// newCPUBurnPods creates a list of pods (specification) with a workload that will consume all available CPU resources up to container limit
|
||||
func newCPUBurnPods(numPods int, image imageutils.Config, cpuLimit string, memoryLimit string) []*v1.Pod {
|
||||
var pods []*v1.Pod
|
||||
|
||||
memLimitQuantity, err := resource.ParseQuantity(memoryLimit)
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
cpuLimitQuantity, err := resource.ParseQuantity(cpuLimit)
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
for i := 0; i < numPods; i++ {
|
||||
|
||||
podName := "cpulimittest-" + string(uuid.NewUUID())
|
||||
pod := v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: podName,
|
||||
Labels: map[string]string{
|
||||
"name": podName,
|
||||
"testapp": "cpuburn",
|
||||
},
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
// Restart policy is always (default).
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: image.GetE2EImage(),
|
||||
Name: podName,
|
||||
Resources: v1.ResourceRequirements{
|
||||
Limits: v1.ResourceList{
|
||||
v1.ResourceMemory: memLimitQuantity,
|
||||
v1.ResourceCPU: cpuLimitQuantity,
|
||||
},
|
||||
},
|
||||
Command: []string{
|
||||
"powershell.exe",
|
||||
"-Command",
|
||||
"foreach ($loopnumber in 1..8) { Start-Job -ScriptBlock { $result = 1; foreach($mm in 1..2147483647){$res1=1;foreach($num in 1..2147483647){$res1=$mm*$num*1340371};$res1} } } ; get-job | wait-job",
|
||||
},
|
||||
},
|
||||
},
|
||||
NodeSelector: map[string]string{
|
||||
"beta.kubernetes.io/os": "windows",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
pods = append(pods, &pod)
|
||||
}
|
||||
|
||||
return pods
|
||||
}
|
Loading…
Reference in New Issue
Block a user