mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-28 14:07:14 +00:00
Merge pull request #86101 from PatrickLang/fix-cpumaximum
Fix cpu resource limit on Windows
This commit is contained in:
commit
16a7650e2b
@ -78,6 +78,7 @@ go_library(
|
|||||||
"//pkg/kubelet/apis:go_default_library",
|
"//pkg/kubelet/apis:go_default_library",
|
||||||
"//pkg/kubelet/winstats:go_default_library",
|
"//pkg/kubelet/winstats:go_default_library",
|
||||||
"//vendor/github.com/Microsoft/hcsshim:go_default_library",
|
"//vendor/github.com/Microsoft/hcsshim:go_default_library",
|
||||||
|
"//vendor/github.com/docker/docker/pkg/sysinfo:go_default_library",
|
||||||
"//vendor/golang.org/x/sys/windows/registry:go_default_library",
|
"//vendor/golang.org/x/sys/windows/registry:go_default_library",
|
||||||
],
|
],
|
||||||
"//conditions:default": [],
|
"//conditions:default": [],
|
||||||
|
@ -25,6 +25,7 @@ import (
|
|||||||
dockertypes "github.com/docker/docker/api/types"
|
dockertypes "github.com/docker/docker/api/types"
|
||||||
dockercontainer "github.com/docker/docker/api/types/container"
|
dockercontainer "github.com/docker/docker/api/types/container"
|
||||||
dockerfilters "github.com/docker/docker/api/types/filters"
|
dockerfilters "github.com/docker/docker/api/types/filters"
|
||||||
|
"github.com/docker/docker/pkg/sysinfo"
|
||||||
"k8s.io/klog"
|
"k8s.io/klog"
|
||||||
|
|
||||||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
|
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
|
||||||
@ -69,11 +70,12 @@ func (ds *dockerService) updateCreateConfig(
|
|||||||
if wc := config.GetWindows(); wc != nil {
|
if wc := config.GetWindows(); wc != nil {
|
||||||
rOpts := wc.GetResources()
|
rOpts := wc.GetResources()
|
||||||
if rOpts != nil {
|
if rOpts != nil {
|
||||||
|
// Precedence and units for these are described at length in kuberuntime_container_windows.go - generateWindowsContainerConfig()
|
||||||
createConfig.HostConfig.Resources = dockercontainer.Resources{
|
createConfig.HostConfig.Resources = dockercontainer.Resources{
|
||||||
Memory: rOpts.MemoryLimitInBytes,
|
Memory: rOpts.MemoryLimitInBytes,
|
||||||
CPUShares: rOpts.CpuShares,
|
CPUShares: rOpts.CpuShares,
|
||||||
CPUCount: rOpts.CpuCount,
|
CPUCount: rOpts.CpuCount,
|
||||||
CPUPercent: rOpts.CpuMaximum,
|
NanoCPUs: rOpts.CpuMaximum * int64(sysinfo.NumCPU()) * (1e9 / 10000),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -14,7 +14,6 @@ go_library(
|
|||||||
"helpers.go",
|
"helpers.go",
|
||||||
"helpers_linux.go",
|
"helpers_linux.go",
|
||||||
"helpers_unsupported.go",
|
"helpers_unsupported.go",
|
||||||
"helpers_windows.go",
|
|
||||||
"instrumented_services.go",
|
"instrumented_services.go",
|
||||||
"kuberuntime_container.go",
|
"kuberuntime_container.go",
|
||||||
"kuberuntime_container_linux.go",
|
"kuberuntime_container_linux.go",
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
// +build !linux,!windows
|
// +build !linux
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Copyright 2018 The Kubernetes Authors.
|
Copyright 2018 The Kubernetes Authors.
|
||||||
|
@ -1,55 +0,0 @@
|
|||||||
// +build windows
|
|
||||||
|
|
||||||
/*
|
|
||||||
Copyright 2018 The Kubernetes Authors.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package kuberuntime
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/docker/docker/pkg/sysinfo"
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
// Taken from https://docs.microsoft.com/en-us/virtualization/windowscontainers/manage-containers/resource-controls
|
|
||||||
minSharesProcess = 5000
|
|
||||||
minSharesHyperV = 10
|
|
||||||
maxShares = 10000
|
|
||||||
milliCPUToCPU = 1000
|
|
||||||
)
|
|
||||||
|
|
||||||
// milliCPUToShares converts milliCPU to CPU shares
|
|
||||||
func milliCPUToShares(milliCPU int64, hyperv bool) int64 {
|
|
||||||
var minShares int64 = minSharesProcess
|
|
||||||
if hyperv {
|
|
||||||
minShares = minSharesHyperV
|
|
||||||
}
|
|
||||||
|
|
||||||
if milliCPU == 0 {
|
|
||||||
// Return here to really match kernel default for zero milliCPU.
|
|
||||||
return minShares
|
|
||||||
}
|
|
||||||
|
|
||||||
// Conceptually (milliCPU / milliCPUToCPU) * sharesPerCPU, but factored to improve rounding.
|
|
||||||
totalCPU := sysinfo.NumCPU()
|
|
||||||
shares := (milliCPU * (maxShares - minShares)) / int64(totalCPU) / milliCPUToCPU
|
|
||||||
if shares < minShares {
|
|
||||||
return minShares
|
|
||||||
}
|
|
||||||
if shares > maxShares {
|
|
||||||
return maxShares
|
|
||||||
}
|
|
||||||
return shares
|
|
||||||
}
|
|
@ -53,7 +53,6 @@ func (m *kubeGenericRuntimeManager) generateWindowsContainerConfig(container *v1
|
|||||||
SecurityContext: &runtimeapi.WindowsContainerSecurityContext{},
|
SecurityContext: &runtimeapi.WindowsContainerSecurityContext{},
|
||||||
}
|
}
|
||||||
|
|
||||||
cpuRequest := container.Resources.Requests.Cpu()
|
|
||||||
cpuLimit := container.Resources.Limits.Cpu()
|
cpuLimit := container.Resources.Limits.Cpu()
|
||||||
isolatedByHyperv := kubeletapis.ShouldIsolatedByHyperV(pod.Annotations)
|
isolatedByHyperv := kubeletapis.ShouldIsolatedByHyperV(pod.Annotations)
|
||||||
if !cpuLimit.IsZero() {
|
if !cpuLimit.IsZero() {
|
||||||
@ -61,7 +60,35 @@ func (m *kubeGenericRuntimeManager) generateWindowsContainerConfig(container *v1
|
|||||||
// as only 64 processors are available for execution by a given process. This causes
|
// as only 64 processors are available for execution by a given process. This causes
|
||||||
// some oddities on systems with more than 64 processors.
|
// some oddities on systems with more than 64 processors.
|
||||||
// Refer https://msdn.microsoft.com/en-us/library/windows/desktop/dd405503(v=vs.85).aspx.
|
// Refer https://msdn.microsoft.com/en-us/library/windows/desktop/dd405503(v=vs.85).aspx.
|
||||||
|
|
||||||
|
// Since Kubernetes doesn't have any notion of weight in the Pod/Container API, only limits/reserves, then applying CpuMaximum only
|
||||||
|
// will better follow the intent of the user. At one point CpuWeights were set, but this prevented limits from having any effect.
|
||||||
|
|
||||||
|
// There are 3 parts to how this works:
|
||||||
|
// Part one - Windows kernel
|
||||||
|
// cpuMaximum is documented at https://docs.microsoft.com/en-us/virtualization/windowscontainers/manage-containers/resource-controls
|
||||||
|
// the range and how it relates to number of CPUs is at https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-jobobject_cpu_rate_control_information
|
||||||
|
// For process isolation, these are applied to the job object setting JOB_OBJECT_CPU_RATE_CONTROL_ENABLE, which can be set to either
|
||||||
|
// JOB_OBJECT_CPU_RATE_CONTROL_WEIGHT_BASED or JOB_OBJECT_CPU_RATE_CONTROL_HARD_CAP. This is why the settings are mutually exclusive.
|
||||||
|
// Part two - Docker (doc: https://docs.docker.com/engine/api/v1.30)
|
||||||
|
// If both CpuWeight and CpuMaximum are passed to Docker, then it sets
|
||||||
|
// JOB_OBJECT_CPU_RATE_CONTROL_ENABLE = JOB_OBJECT_CPU_RATE_CONTROL_WEIGHT_BASED ignoring CpuMaximum.
|
||||||
|
// Option a: Set HostConfig.CpuPercent. The units are whole percent of the total CPU capacity of the system, meaning the resolution
|
||||||
|
// is different based on the number of cores.
|
||||||
|
// Option b: Set HostConfig.NanoCpus integer <int64> - CPU quota in units of 10e-9 CPUs. Moby scales this to the Windows job object
|
||||||
|
// resolution of 1-10000, so it's higher resolution than option a.
|
||||||
|
// src: https://github.com/moby/moby/blob/10866714412aea1bb587d1ad14b2ce1ba4cf4308/daemon/oci_windows.go#L426
|
||||||
|
// Part three - CRI & ContainerD's implementation
|
||||||
|
// The kubelet sets these directly on CGroups in Linux, but needs to pass them across CRI on Windows.
|
||||||
|
// There is an existing cpu_maximum field, with a range of percent * 100, so 1-10000. This is different from Docker, but consistent with OCI
|
||||||
|
// https://github.com/kubernetes/kubernetes/blob/56d1c3b96d0a544130a82caad33dd57629b8a7f8/staging/src/k8s.io/cri-api/pkg/apis/runtime/v1alpha2/api.proto#L681-L682
|
||||||
|
// https://github.com/opencontainers/runtime-spec/blob/ad53dcdc39f1f7f7472b10aa0a45648fe4865496/config-windows.md#cpu
|
||||||
|
// If both CpuWeight and CpuMaximum are set - ContainerD catches this invalid case and returns an error instead.
|
||||||
|
|
||||||
cpuMaximum := 10000 * cpuLimit.MilliValue() / int64(sysinfo.NumCPU()) / 1000
|
cpuMaximum := 10000 * cpuLimit.MilliValue() / int64(sysinfo.NumCPU()) / 1000
|
||||||
|
|
||||||
|
// TODO: This should be reviewed or removed once Hyper-V support is implemented with CRI-ContainerD
|
||||||
|
// in a future release. cpuCount may or may not be required if cpuMaximum is set.
|
||||||
if isolatedByHyperv {
|
if isolatedByHyperv {
|
||||||
cpuCount := int64(cpuLimit.MilliValue()+999) / 1000
|
cpuCount := int64(cpuLimit.MilliValue()+999) / 1000
|
||||||
wc.Resources.CpuCount = cpuCount
|
wc.Resources.CpuCount = cpuCount
|
||||||
@ -80,31 +107,15 @@ func (m *kubeGenericRuntimeManager) generateWindowsContainerConfig(container *v1
|
|||||||
wc.Resources.CpuMaximum = cpuMaximum
|
wc.Resources.CpuMaximum = cpuMaximum
|
||||||
}
|
}
|
||||||
|
|
||||||
cpuShares := milliCPUToShares(cpuLimit.MilliValue(), isolatedByHyperv)
|
|
||||||
if cpuShares == 0 {
|
|
||||||
cpuShares = milliCPUToShares(cpuRequest.MilliValue(), isolatedByHyperv)
|
|
||||||
}
|
|
||||||
wc.Resources.CpuShares = cpuShares
|
|
||||||
|
|
||||||
if !isolatedByHyperv {
|
if !isolatedByHyperv {
|
||||||
// The processor resource controls are mutually exclusive on
|
// The processor resource controls are mutually exclusive on
|
||||||
// Windows Server Containers, the order of precedence is
|
// Windows Server Containers, the order of precedence is
|
||||||
// CPUCount first, then CPUShares, and CPUMaximum last.
|
// CPUCount first, then CPUMaximum.
|
||||||
if wc.Resources.CpuCount > 0 {
|
if wc.Resources.CpuCount > 0 {
|
||||||
if wc.Resources.CpuShares > 0 {
|
|
||||||
wc.Resources.CpuShares = 0
|
|
||||||
klog.Warningf("Mutually exclusive options: CPUCount priority > CPUShares priority on Windows Server Containers. CPUShares should be ignored")
|
|
||||||
}
|
|
||||||
if wc.Resources.CpuMaximum > 0 {
|
if wc.Resources.CpuMaximum > 0 {
|
||||||
wc.Resources.CpuMaximum = 0
|
wc.Resources.CpuMaximum = 0
|
||||||
klog.Warningf("Mutually exclusive options: CPUCount priority > CPUMaximum priority on Windows Server Containers. CPUMaximum should be ignored")
|
klog.Warningf("Mutually exclusive options: CPUCount priority > CPUMaximum priority on Windows Server Containers. CPUMaximum should be ignored")
|
||||||
}
|
}
|
||||||
} else if wc.Resources.CpuShares > 0 {
|
|
||||||
if wc.Resources.CpuMaximum > 0 {
|
|
||||||
wc.Resources.CpuMaximum = 0
|
|
||||||
klog.Warningf("Mutually exclusive options: CPUShares priority > CPUMaximum priority on Windows Server Containers. CPUMaximum should be ignored")
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,6 +5,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
|||||||
go_library(
|
go_library(
|
||||||
name = "go_default_library",
|
name = "go_default_library",
|
||||||
srcs = [
|
srcs = [
|
||||||
|
"cpu_limits.go",
|
||||||
"density.go",
|
"density.go",
|
||||||
"dns.go",
|
"dns.go",
|
||||||
"framework.go",
|
"framework.go",
|
||||||
|
144
test/e2e/windows/cpu_limits.go
Normal file
144
test/e2e/windows/cpu_limits.go
Normal file
@ -0,0 +1,144 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2020 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package windows
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
|
||||||
|
v1 "k8s.io/api/core/v1"
|
||||||
|
"k8s.io/apimachinery/pkg/api/resource"
|
||||||
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
|
"k8s.io/apimachinery/pkg/util/uuid"
|
||||||
|
"k8s.io/kubernetes/test/e2e/framework"
|
||||||
|
e2ekubelet "k8s.io/kubernetes/test/e2e/framework/kubelet"
|
||||||
|
imageutils "k8s.io/kubernetes/test/utils/image"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/onsi/ginkgo"
|
||||||
|
)
|
||||||
|
|
||||||
|
var _ = SIGDescribe("[Feature:Windows] Cpu Resources", func() {
|
||||||
|
f := framework.NewDefaultFramework("cpu-resources-test-windows")
|
||||||
|
|
||||||
|
// The Windows 'BusyBox' image is PowerShell plus a collection of scripts and utilities to mimic common busybox commands
|
||||||
|
powershellImage := imageutils.GetConfig(imageutils.BusyBox)
|
||||||
|
|
||||||
|
ginkgo.Context("Container limits", func() {
|
||||||
|
ginkgo.It("should not be exceeded after waiting 2 minutes", func() {
|
||||||
|
ginkgo.By("Creating one pod with limit set to '0.5'")
|
||||||
|
podsDecimal := newCPUBurnPods(1, powershellImage, "0.5", "1Gi")
|
||||||
|
f.PodClient().CreateBatch(podsDecimal)
|
||||||
|
ginkgo.By("Creating one pod with limit set to '500m'")
|
||||||
|
podsMilli := newCPUBurnPods(1, powershellImage, "500m", "1Gi")
|
||||||
|
f.PodClient().CreateBatch(podsMilli)
|
||||||
|
ginkgo.By("Waiting 2 minutes")
|
||||||
|
time.Sleep(2 * time.Minute)
|
||||||
|
ginkgo.By("Ensuring pods are still running")
|
||||||
|
var allPods [](*v1.Pod)
|
||||||
|
for _, p := range podsDecimal {
|
||||||
|
pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(
|
||||||
|
context.TODO(),
|
||||||
|
p.Name,
|
||||||
|
metav1.GetOptions{})
|
||||||
|
framework.ExpectNoError(err, "Error retrieving pod")
|
||||||
|
framework.ExpectEqual(pod.Status.Phase, v1.PodRunning)
|
||||||
|
allPods = append(allPods, pod)
|
||||||
|
}
|
||||||
|
for _, p := range podsMilli {
|
||||||
|
pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(
|
||||||
|
context.TODO(),
|
||||||
|
p.Name,
|
||||||
|
metav1.GetOptions{})
|
||||||
|
framework.ExpectNoError(err, "Error retrieving pod")
|
||||||
|
framework.ExpectEqual(pod.Status.Phase, v1.PodRunning)
|
||||||
|
allPods = append(allPods, pod)
|
||||||
|
}
|
||||||
|
ginkgo.By("Ensuring cpu doesn't exceed limit by >5%")
|
||||||
|
for _, p := range allPods {
|
||||||
|
ginkgo.By("Gathering node summary stats")
|
||||||
|
nodeStats, err := e2ekubelet.GetStatsSummary(f.ClientSet, p.Spec.NodeName)
|
||||||
|
framework.ExpectNoError(err, "Error grabbing node summary stats")
|
||||||
|
found := false
|
||||||
|
cpuUsage := float64(0)
|
||||||
|
for _, pod := range nodeStats.Pods {
|
||||||
|
if pod.PodRef.Name != p.Name || pod.PodRef.Namespace != p.Namespace {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
cpuUsage = float64(*pod.CPU.UsageNanoCores) * 1e-9
|
||||||
|
found = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
framework.ExpectEqual(found, true, "Found pod in stats summary")
|
||||||
|
framework.Logf("Pod %s usage: %v", p.Name, cpuUsage)
|
||||||
|
framework.ExpectEqual(cpuUsage > 0, true, "Pods reported usage should be > 0")
|
||||||
|
framework.ExpectEqual((.5*1.05) > cpuUsage, true, "Pods reported usage should not exceed limit by >5%")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
// newCPUBurnPods creates a list of pods (specification) with a workload that will consume all available CPU resources up to container limit
|
||||||
|
func newCPUBurnPods(numPods int, image imageutils.Config, cpuLimit string, memoryLimit string) []*v1.Pod {
|
||||||
|
var pods []*v1.Pod
|
||||||
|
|
||||||
|
memLimitQuantity, err := resource.ParseQuantity(memoryLimit)
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
|
||||||
|
cpuLimitQuantity, err := resource.ParseQuantity(cpuLimit)
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
|
||||||
|
for i := 0; i < numPods; i++ {
|
||||||
|
|
||||||
|
podName := "cpulimittest-" + string(uuid.NewUUID())
|
||||||
|
pod := v1.Pod{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Name: podName,
|
||||||
|
Labels: map[string]string{
|
||||||
|
"name": podName,
|
||||||
|
"testapp": "cpuburn",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Spec: v1.PodSpec{
|
||||||
|
// Restart policy is always (default).
|
||||||
|
Containers: []v1.Container{
|
||||||
|
{
|
||||||
|
Image: image.GetE2EImage(),
|
||||||
|
Name: podName,
|
||||||
|
Resources: v1.ResourceRequirements{
|
||||||
|
Limits: v1.ResourceList{
|
||||||
|
v1.ResourceMemory: memLimitQuantity,
|
||||||
|
v1.ResourceCPU: cpuLimitQuantity,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Command: []string{
|
||||||
|
"powershell.exe",
|
||||||
|
"-Command",
|
||||||
|
"foreach ($loopnumber in 1..8) { Start-Job -ScriptBlock { $result = 1; foreach($mm in 1..2147483647){$res1=1;foreach($num in 1..2147483647){$res1=$mm*$num*1340371};$res1} } } ; get-job | wait-job",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
NodeSelector: map[string]string{
|
||||||
|
"beta.kubernetes.io/os": "windows",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
pods = append(pods, &pod)
|
||||||
|
}
|
||||||
|
|
||||||
|
return pods
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user