From 3c5e493482bf30a570edfbffac61f39bc7419397 Mon Sep 17 00:00:00 2001 From: Pengfei Ni Date: Sun, 11 Feb 2018 14:58:27 +0800 Subject: [PATCH] Setup windows container config to kubelet CRI --- .../apis/well_known_annotations_windows.go | 41 ++++++++++ pkg/kubelet/kuberuntime/helpers.go | 50 ------------ pkg/kubelet/kuberuntime/helpers_linux.go | 69 ++++++++++++++++ .../kuberuntime/helpers_unsupported.go | 24 ++++++ pkg/kubelet/kuberuntime/helpers_windows.go | 55 +++++++++++++ .../kuberuntime/kuberuntime_container.go | 50 ++---------- .../kuberuntime_container_linux.go | 74 +++++++++++++++++ .../kuberuntime_container_unsupported.go | 29 +++++++ .../kuberuntime_container_windows.go | 81 +++++++++++++++++++ 9 files changed, 378 insertions(+), 95 deletions(-) create mode 100644 pkg/kubelet/apis/well_known_annotations_windows.go create mode 100644 pkg/kubelet/kuberuntime/helpers_linux.go create mode 100644 pkg/kubelet/kuberuntime/helpers_unsupported.go create mode 100644 pkg/kubelet/kuberuntime/helpers_windows.go create mode 100644 pkg/kubelet/kuberuntime/kuberuntime_container_linux.go create mode 100644 pkg/kubelet/kuberuntime/kuberuntime_container_unsupported.go create mode 100644 pkg/kubelet/kuberuntime/kuberuntime_container_windows.go diff --git a/pkg/kubelet/apis/well_known_annotations_windows.go b/pkg/kubelet/apis/well_known_annotations_windows.go new file mode 100644 index 00000000000..0e738895ee0 --- /dev/null +++ b/pkg/kubelet/apis/well_known_annotations_windows.go @@ -0,0 +1,41 @@ +// +build windows + +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package apis + +import ( + utilfeature "k8s.io/apiserver/pkg/util/feature" + "k8s.io/kubernetes/pkg/features" +) + +const ( + // HypervIsolationAnnotationKey and HypervIsolationValue are used to run windows containers with hyperv isolation. + // Refer https://aka.ms/hyperv-container. + HypervIsolationAnnotationKey = "experimental.windows.kubernetes.io/isolation-type" + HypervIsolationValue = "hyperv" +) + +// ShouldIsolatedByHyperV returns true if a windows container should be run with hyperv isolation. +func ShouldIsolatedByHyperV(annotations map[string]string) bool { + if !utilfeature.DefaultFeatureGate.Enabled(features.HyperVContainer) { + return false + } + + v, ok := annotations[HypervIsolationAnnotationKey] + return ok && v == HypervIsolationValue +} diff --git a/pkg/kubelet/kuberuntime/helpers.go b/pkg/kubelet/kuberuntime/helpers.go index e66440447aa..a1e6119a2a9 100644 --- a/pkg/kubelet/kuberuntime/helpers.go +++ b/pkg/kubelet/kuberuntime/helpers.go @@ -32,17 +32,6 @@ import ( kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" ) -const ( - // Taken from lmctfy https://github.com/google/lmctfy/blob/master/lmctfy/controllers/cpu_controller.cc - minShares = 2 - sharesPerCPU = 1024 - milliCPUToCPU = 1000 - - // 100000 is equivalent to 100ms - quotaPeriod = 100 * minQuotaPeriod - minQuotaPeriod = 1000 -) - type podsByID []*kubecontainer.Pod func (b podsByID) Len() int { return len(b) } @@ -160,45 +149,6 @@ func isContainerFailed(status *kubecontainer.ContainerStatus) bool { return false } -// milliCPUToShares converts milliCPU to CPU shares -func milliCPUToShares(milliCPU int64) int64 { - if milliCPU == 0 { - // Return 2 here to really match kernel default for zero milliCPU. - return minShares - } - // Conceptually (milliCPU / milliCPUToCPU) * sharesPerCPU, but factored to improve rounding. - shares := (milliCPU * sharesPerCPU) / milliCPUToCPU - if shares < minShares { - return minShares - } - return shares -} - -// milliCPUToQuota converts milliCPU to CFS quota and period values -func milliCPUToQuota(milliCPU int64) (quota int64, period int64) { - // CFS quota is measured in two values: - // - cfs_period_us=100ms (the amount of time to measure usage across) - // - cfs_quota=20ms (the amount of cpu time allowed to be used across a period) - // so in the above example, you are limited to 20% of a single CPU - // for multi-cpu environments, you just scale equivalent amounts - if milliCPU == 0 { - return - } - - // we set the period to 100ms by default - period = quotaPeriod - - // we then convert your milliCPU to a value normalized over a period - quota = (milliCPU * quotaPeriod) / milliCPUToCPU - - // quota needs to be a minimum of 1ms. - if quota < minQuotaPeriod { - quota = minQuotaPeriod - } - - return -} - // getStableKey generates a key (string) to uniquely identify a // (pod, container) tuple. The key should include the content of the // container, so that any change to the container generates a new key. diff --git a/pkg/kubelet/kuberuntime/helpers_linux.go b/pkg/kubelet/kuberuntime/helpers_linux.go new file mode 100644 index 00000000000..15abb4a0889 --- /dev/null +++ b/pkg/kubelet/kuberuntime/helpers_linux.go @@ -0,0 +1,69 @@ +// +build linux + +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kuberuntime + +const ( + // Taken from lmctfy https://github.com/google/lmctfy/blob/master/lmctfy/controllers/cpu_controller.cc + minShares = 2 + sharesPerCPU = 1024 + milliCPUToCPU = 1000 + + // 100000 is equivalent to 100ms + quotaPeriod = 100 * minQuotaPeriod + minQuotaPeriod = 1000 +) + +// milliCPUToShares converts milliCPU to CPU shares +func milliCPUToShares(milliCPU int64) int64 { + if milliCPU == 0 { + // Return 2 here to really match kernel default for zero milliCPU. + return minShares + } + // Conceptually (milliCPU / milliCPUToCPU) * sharesPerCPU, but factored to improve rounding. + shares := (milliCPU * sharesPerCPU) / milliCPUToCPU + if shares < minShares { + return minShares + } + return shares +} + +// milliCPUToQuota converts milliCPU to CFS quota and period values +func milliCPUToQuota(milliCPU int64) (quota int64, period int64) { + // CFS quota is measured in two values: + // - cfs_period_us=100ms (the amount of time to measure usage across) + // - cfs_quota=20ms (the amount of cpu time allowed to be used across a period) + // so in the above example, you are limited to 20% of a single CPU + // for multi-cpu environments, you just scale equivalent amounts + if milliCPU == 0 { + return + } + + // we set the period to 100ms by default + period = quotaPeriod + + // we then convert your milliCPU to a value normalized over a period + quota = (milliCPU * quotaPeriod) / milliCPUToCPU + + // quota needs to be a minimum of 1ms. + if quota < minQuotaPeriod { + quota = minQuotaPeriod + } + + return +} diff --git a/pkg/kubelet/kuberuntime/helpers_unsupported.go b/pkg/kubelet/kuberuntime/helpers_unsupported.go new file mode 100644 index 00000000000..1e99c4f9317 --- /dev/null +++ b/pkg/kubelet/kuberuntime/helpers_unsupported.go @@ -0,0 +1,24 @@ +// +build !linux,!windows + +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kuberuntime + +// milliCPUToShares converts milliCPU to CPU shares +func milliCPUToShares(milliCPU int64) int64 { + return 0 +} diff --git a/pkg/kubelet/kuberuntime/helpers_windows.go b/pkg/kubelet/kuberuntime/helpers_windows.go new file mode 100644 index 00000000000..2009fad8d0f --- /dev/null +++ b/pkg/kubelet/kuberuntime/helpers_windows.go @@ -0,0 +1,55 @@ +// +build windows + +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kuberuntime + +import ( + "github.com/docker/docker/pkg/sysinfo" +) + +const ( + // Taken from https://docs.microsoft.com/en-us/virtualization/windowscontainers/manage-containers/resource-controls + minSharesProcess = 5000 + minSharesHyperV = 10 + maxShares = 10000 + milliCPUToCPU = 1000 +) + +// milliCPUToShares converts milliCPU to CPU shares +func milliCPUToShares(milliCPU int64, hyperv bool) int64 { + var minShares int64 = minSharesProcess + if hyperv { + minShares = minSharesHyperV + } + + if milliCPU == 0 { + // Return here to really match kernel default for zero milliCPU. + return minShares + } + + // Conceptually (milliCPU / milliCPUToCPU) * sharesPerCPU, but factored to improve rounding. + totalCPU := sysinfo.NumCPU() + shares := (milliCPU * (maxShares - minShares)) / int64(totalCPU) / milliCPUToCPU + if shares < minShares { + return minShares + } + if shares > maxShares { + return maxShares + } + return shares +} diff --git a/pkg/kubelet/kuberuntime/kuberuntime_container.go b/pkg/kubelet/kuberuntime/kuberuntime_container.go index ad6f0d0686e..e4574464ea9 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_container.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_container.go @@ -42,7 +42,6 @@ import ( runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" "k8s.io/kubernetes/pkg/kubelet/events" - "k8s.io/kubernetes/pkg/kubelet/qos" "k8s.io/kubernetes/pkg/kubelet/types" "k8s.io/kubernetes/pkg/kubelet/util/format" "k8s.io/kubernetes/pkg/util/selinux" @@ -214,7 +213,11 @@ func (m *kubeGenericRuntimeManager) generateContainerConfig(container *v1.Contai Stdin: container.Stdin, StdinOnce: container.StdinOnce, Tty: container.TTY, - Linux: m.generateLinuxContainerConfig(container, pod, uid, username), + } + + // set platform specific configurations. + if err := m.applyPlatformSpecificContainerConfig(config, container, pod, uid, username); err != nil { + return nil, err } // set environment variables @@ -231,49 +234,6 @@ func (m *kubeGenericRuntimeManager) generateContainerConfig(container *v1.Contai return config, nil } -// generateLinuxContainerConfig generates linux container config for kubelet runtime v1. -func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *v1.Container, pod *v1.Pod, uid *int64, username string) *runtimeapi.LinuxContainerConfig { - lc := &runtimeapi.LinuxContainerConfig{ - Resources: &runtimeapi.LinuxContainerResources{}, - SecurityContext: m.determineEffectiveSecurityContext(pod, container, uid, username), - } - - // set linux container resources - var cpuShares int64 - cpuRequest := container.Resources.Requests.Cpu() - cpuLimit := container.Resources.Limits.Cpu() - memoryLimit := container.Resources.Limits.Memory().Value() - oomScoreAdj := int64(qos.GetContainerOOMScoreAdjust(pod, container, - int64(m.machineInfo.MemoryCapacity))) - // If request is not specified, but limit is, we want request to default to limit. - // API server does this for new containers, but we repeat this logic in Kubelet - // for containers running on existing Kubernetes clusters. - if cpuRequest.IsZero() && !cpuLimit.IsZero() { - cpuShares = milliCPUToShares(cpuLimit.MilliValue()) - } else { - // if cpuRequest.Amount is nil, then milliCPUToShares will return the minimal number - // of CPU shares. - cpuShares = milliCPUToShares(cpuRequest.MilliValue()) - } - lc.Resources.CpuShares = cpuShares - if memoryLimit != 0 { - lc.Resources.MemoryLimitInBytes = memoryLimit - } - // Set OOM score of the container based on qos policy. Processes in lower-priority pods should - // be killed first if the system runs out of memory. - lc.Resources.OomScoreAdj = oomScoreAdj - - if m.cpuCFSQuota { - // if cpuLimit.Amount is nil, then the appropriate default value is returned - // to allow full usage of cpu resource. - cpuQuota, cpuPeriod := milliCPUToQuota(cpuLimit.MilliValue()) - lc.Resources.CpuQuota = cpuQuota - lc.Resources.CpuPeriod = cpuPeriod - } - - return lc -} - // makeDevices generates container devices for kubelet runtime v1. func makeDevices(opts *kubecontainer.RunContainerOptions) []*runtimeapi.Device { devices := make([]*runtimeapi.Device, len(opts.Devices)) diff --git a/pkg/kubelet/kuberuntime/kuberuntime_container_linux.go b/pkg/kubelet/kuberuntime/kuberuntime_container_linux.go new file mode 100644 index 00000000000..cdac02ffb53 --- /dev/null +++ b/pkg/kubelet/kuberuntime/kuberuntime_container_linux.go @@ -0,0 +1,74 @@ +// +build linux + +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kuberuntime + +import ( + "k8s.io/api/core/v1" + runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2" + "k8s.io/kubernetes/pkg/kubelet/qos" +) + +// applyPlatformSpecificContainerConfig applies platform specific configurations to runtimeapi.ContainerConfig. +func (m *kubeGenericRuntimeManager) applyPlatformSpecificContainerConfig(config *runtimeapi.ContainerConfig, container *v1.Container, pod *v1.Pod, uid *int64, username string) error { + config.Linux = m.generateLinuxContainerConfig(container, pod, uid, username) + return nil +} + +// generateLinuxContainerConfig generates linux container config for kubelet runtime v1. +func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *v1.Container, pod *v1.Pod, uid *int64, username string) *runtimeapi.LinuxContainerConfig { + lc := &runtimeapi.LinuxContainerConfig{ + Resources: &runtimeapi.LinuxContainerResources{}, + SecurityContext: m.determineEffectiveSecurityContext(pod, container, uid, username), + } + + // set linux container resources + var cpuShares int64 + cpuRequest := container.Resources.Requests.Cpu() + cpuLimit := container.Resources.Limits.Cpu() + memoryLimit := container.Resources.Limits.Memory().Value() + oomScoreAdj := int64(qos.GetContainerOOMScoreAdjust(pod, container, + int64(m.machineInfo.MemoryCapacity))) + // If request is not specified, but limit is, we want request to default to limit. + // API server does this for new containers, but we repeat this logic in Kubelet + // for containers running on existing Kubernetes clusters. + if cpuRequest.IsZero() && !cpuLimit.IsZero() { + cpuShares = milliCPUToShares(cpuLimit.MilliValue()) + } else { + // if cpuRequest.Amount is nil, then milliCPUToShares will return the minimal number + // of CPU shares. + cpuShares = milliCPUToShares(cpuRequest.MilliValue()) + } + lc.Resources.CpuShares = cpuShares + if memoryLimit != 0 { + lc.Resources.MemoryLimitInBytes = memoryLimit + } + // Set OOM score of the container based on qos policy. Processes in lower-priority pods should + // be killed first if the system runs out of memory. + lc.Resources.OomScoreAdj = oomScoreAdj + + if m.cpuCFSQuota { + // if cpuLimit.Amount is nil, then the appropriate default value is returned + // to allow full usage of cpu resource. + cpuQuota, cpuPeriod := milliCPUToQuota(cpuLimit.MilliValue()) + lc.Resources.CpuQuota = cpuQuota + lc.Resources.CpuPeriod = cpuPeriod + } + + return lc +} diff --git a/pkg/kubelet/kuberuntime/kuberuntime_container_unsupported.go b/pkg/kubelet/kuberuntime/kuberuntime_container_unsupported.go new file mode 100644 index 00000000000..0344b7d9e93 --- /dev/null +++ b/pkg/kubelet/kuberuntime/kuberuntime_container_unsupported.go @@ -0,0 +1,29 @@ +// +build !linux,!windows + +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kuberuntime + +import ( + "k8s.io/api/core/v1" + runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2" +) + +// applyPlatformSpecificContainerConfig applies platform specific configurations to runtimeapi.ContainerConfig. +func (m *kubeGenericRuntimeManager) applyPlatformSpecificContainerConfig(config *runtimeapi.ContainerConfig, container *v1.Container, pod *v1.Pod, uid *int64, username string) error { + return nil +} diff --git a/pkg/kubelet/kuberuntime/kuberuntime_container_windows.go b/pkg/kubelet/kuberuntime/kuberuntime_container_windows.go new file mode 100644 index 00000000000..6ea1928cf93 --- /dev/null +++ b/pkg/kubelet/kuberuntime/kuberuntime_container_windows.go @@ -0,0 +1,81 @@ +// +build windows + +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kuberuntime + +import ( + "github.com/docker/docker/pkg/sysinfo" + + "k8s.io/api/core/v1" + kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis" + runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2" +) + +// applyPlatformSpecificContainerConfig applies platform specific configurations to runtimeapi.ContainerConfig. +func (m *kubeGenericRuntimeManager) applyPlatformSpecificContainerConfig(config *runtimeapi.ContainerConfig, container *v1.Container, pod *v1.Pod, uid *int64, username string) error { + config.Windows = m.generateWindowsContainerConfig(container, pod, uid, username) + return nil +} + +// generateWindowsContainerConfig generates windows container config for kubelet runtime v1. +// Refer https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node/cri-windows.md. +func (m *kubeGenericRuntimeManager) generateWindowsContainerConfig(container *v1.Container, pod *v1.Pod, uid *int64, username string) *runtimeapi.WindowsContainerConfig { + wc := &runtimeapi.WindowsContainerConfig{ + Resources: &runtimeapi.WindowsContainerResources{}, + } + + cpuRequest := container.Resources.Requests.Cpu() + cpuLimit := container.Resources.Limits.Cpu() + isolatedByHyperv := kubeletapis.ShouldIsolatedByHyperV(pod.Annotations) + if !cpuLimit.IsZero() { + // Note that sysinfo.NumCPU() is limited to 64 CPUs on Windows due to Processor Groups, + // as only 64 processors are available for execution by a given process. This causes + // some oddities on systems with more than 64 processors. + // Refer https://msdn.microsoft.com/en-us/library/windows/desktop/dd405503(v=vs.85).aspx. + cpuMaximum := 10000 * cpuLimit.MilliValue() / int64(sysinfo.NumCPU()) / 1000 + if isolatedByHyperv { + cpuCount := int64(cpuLimit.MilliValue()+999) / 1000 + wc.Resources.CpuCount = cpuCount + + if cpuCount != 0 { + cpuMaximum = cpuLimit.MilliValue() / cpuCount * 10000 / 1000 + } + } + // ensure cpuMaximum is in range [1, 10000]. + if cpuMaximum < 1 { + cpuMaximum = 1 + } else if cpuMaximum > 10000 { + cpuMaximum = 10000 + } + + wc.Resources.CpuMaximum = cpuMaximum + } + + cpuShares := milliCPUToShares(cpuLimit.MilliValue(), isolatedByHyperv) + if cpuShares == 0 { + cpuShares = milliCPUToShares(cpuRequest.MilliValue(), isolatedByHyperv) + } + wc.Resources.CpuShares = cpuShares + + memoryLimit := container.Resources.Limits.Memory().Value() + if memoryLimit != 0 { + wc.Resources.MemoryLimitInBytes = memoryLimit + } + + return wc +}