Setup windows container config to kubelet CRI

2025-07-31 07:20:13 +00:00 · 2018-02-11 14:58:27 +08:00 · 2018-02-11 14:58:27 +08:00 · 3c5e493482
commit 3c5e493482
parent af58729c86
9 changed files with 378 additions and 95 deletions
--- a/pkg/kubelet/apis/well_known_annotations_windows.go
+++ b/pkg/kubelet/apis/well_known_annotations_windows.go
@ -0,0 +1,41 @@
+// +build windows
+
+/*
+Copyright 2018 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package apis
+
+import (
+	utilfeature "k8s.io/apiserver/pkg/util/feature"
+	"k8s.io/kubernetes/pkg/features"
+)
+
+const (
+	// HypervIsolationAnnotationKey and HypervIsolationValue are used to run windows containers with hyperv isolation.
+	// Refer https://aka.ms/hyperv-container.
+	HypervIsolationAnnotationKey = "experimental.windows.kubernetes.io/isolation-type"
+	HypervIsolationValue         = "hyperv"
+)
+
+// ShouldIsolatedByHyperV returns true if a windows container should be run with hyperv isolation.
+func ShouldIsolatedByHyperV(annotations map[string]string) bool {
+	if !utilfeature.DefaultFeatureGate.Enabled(features.HyperVContainer) {
+		return false
+	}
+
+	v, ok := annotations[HypervIsolationAnnotationKey]
+	return ok && v == HypervIsolationValue
+}
--- a/pkg/kubelet/kuberuntime/helpers.go
+++ b/pkg/kubelet/kuberuntime/helpers.go
@ -32,17 +32,6 @@ import (
 	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
 )

-const (
-	// Taken from lmctfy https://github.com/google/lmctfy/blob/master/lmctfy/controllers/cpu_controller.cc
-	minShares     = 2
-	sharesPerCPU  = 1024
-	milliCPUToCPU = 1000
-
-	// 100000 is equivalent to 100ms
-	quotaPeriod    = 100 * minQuotaPeriod
-	minQuotaPeriod = 1000
-)
-
 type podsByID []*kubecontainer.Pod

 func (b podsByID) Len() int           { return len(b) }
@ -160,45 +149,6 @@ func isContainerFailed(status *kubecontainer.ContainerStatus) bool {
 	return false
 }

-// milliCPUToShares converts milliCPU to CPU shares
-func milliCPUToShares(milliCPU int64) int64 {
-	if milliCPU == 0 {
-		// Return 2 here to really match kernel default for zero milliCPU.
-		return minShares
-	}
-	// Conceptually (milliCPU / milliCPUToCPU) * sharesPerCPU, but factored to improve rounding.
-	shares := (milliCPU * sharesPerCPU) / milliCPUToCPU
-	if shares < minShares {
-		return minShares
-	}
-	return shares
-}
-
-// milliCPUToQuota converts milliCPU to CFS quota and period values
-func milliCPUToQuota(milliCPU int64) (quota int64, period int64) {
-	// CFS quota is measured in two values:
-	//  - cfs_period_us=100ms (the amount of time to measure usage across)
-	//  - cfs_quota=20ms (the amount of cpu time allowed to be used across a period)
-	// so in the above example, you are limited to 20% of a single CPU
-	// for multi-cpu environments, you just scale equivalent amounts
-	if milliCPU == 0 {
-		return
-	}
-
-	// we set the period to 100ms by default
-	period = quotaPeriod
-
-	// we then convert your milliCPU to a value normalized over a period
-	quota = (milliCPU * quotaPeriod) / milliCPUToCPU
-
-	// quota needs to be a minimum of 1ms.
-	if quota < minQuotaPeriod {
-		quota = minQuotaPeriod
-	}
-
-	return
-}
-
 // getStableKey generates a key (string) to uniquely identify a
 // (pod, container) tuple. The key should include the content of the
 // container, so that any change to the container generates a new key.
--- a/pkg/kubelet/kuberuntime/helpers_linux.go
+++ b/pkg/kubelet/kuberuntime/helpers_linux.go
@ -0,0 +1,69 @@
+// +build linux
+
+/*
+Copyright 2018 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package kuberuntime
+
+const (
+	// Taken from lmctfy https://github.com/google/lmctfy/blob/master/lmctfy/controllers/cpu_controller.cc
+	minShares     = 2
+	sharesPerCPU  = 1024
+	milliCPUToCPU = 1000
+
+	// 100000 is equivalent to 100ms
+	quotaPeriod    = 100 * minQuotaPeriod
+	minQuotaPeriod = 1000
+)
+
+// milliCPUToShares converts milliCPU to CPU shares
+func milliCPUToShares(milliCPU int64) int64 {
+	if milliCPU == 0 {
+		// Return 2 here to really match kernel default for zero milliCPU.
+		return minShares
+	}
+	// Conceptually (milliCPU / milliCPUToCPU) * sharesPerCPU, but factored to improve rounding.
+	shares := (milliCPU * sharesPerCPU) / milliCPUToCPU
+	if shares < minShares {
+		return minShares
+	}
+	return shares
+}
+
+// milliCPUToQuota converts milliCPU to CFS quota and period values
+func milliCPUToQuota(milliCPU int64) (quota int64, period int64) {
+	// CFS quota is measured in two values:
+	//  - cfs_period_us=100ms (the amount of time to measure usage across)
+	//  - cfs_quota=20ms (the amount of cpu time allowed to be used across a period)
+	// so in the above example, you are limited to 20% of a single CPU
+	// for multi-cpu environments, you just scale equivalent amounts
+	if milliCPU == 0 {
+		return
+	}
+
+	// we set the period to 100ms by default
+	period = quotaPeriod
+
+	// we then convert your milliCPU to a value normalized over a period
+	quota = (milliCPU * quotaPeriod) / milliCPUToCPU
+
+	// quota needs to be a minimum of 1ms.
+	if quota < minQuotaPeriod {
+		quota = minQuotaPeriod
+	}
+
+	return
+}
--- a/pkg/kubelet/kuberuntime/helpers_unsupported.go
+++ b/pkg/kubelet/kuberuntime/helpers_unsupported.go
@ -0,0 +1,24 @@
+// +build !linux,!windows
+
+/*
+Copyright 2018 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package kuberuntime
+
+// milliCPUToShares converts milliCPU to CPU shares
+func milliCPUToShares(milliCPU int64) int64 {
+	return 0
+}
--- a/pkg/kubelet/kuberuntime/helpers_windows.go
+++ b/pkg/kubelet/kuberuntime/helpers_windows.go
@ -0,0 +1,55 @@
+// +build windows
+
+/*
+Copyright 2018 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package kuberuntime
+
+import (
+	"github.com/docker/docker/pkg/sysinfo"
+)
+
+const (
+	// Taken from https://docs.microsoft.com/en-us/virtualization/windowscontainers/manage-containers/resource-controls
+	minSharesProcess = 5000
+	minSharesHyperV  = 10
+	maxShares        = 10000
+	milliCPUToCPU    = 1000
+)
+
+// milliCPUToShares converts milliCPU to CPU shares
+func milliCPUToShares(milliCPU int64, hyperv bool) int64 {
+	var minShares int64 = minSharesProcess
+	if hyperv {
+		minShares = minSharesHyperV
+	}
+
+	if milliCPU == 0 {
+		// Return here to really match kernel default for zero milliCPU.
+		return minShares
+	}
+
+	// Conceptually (milliCPU / milliCPUToCPU) * sharesPerCPU, but factored to improve rounding.
+	totalCPU := sysinfo.NumCPU()
+	shares := (milliCPU * (maxShares - minShares)) / int64(totalCPU) / milliCPUToCPU
+	if shares < minShares {
+		return minShares
+	}
+	if shares > maxShares {
+		return maxShares
+	}
+	return shares
+}
--- a/pkg/kubelet/kuberuntime/kuberuntime_container.go
+++ b/pkg/kubelet/kuberuntime/kuberuntime_container.go
@ -42,7 +42,6 @@ import (
 	runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2"
 	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
 	"k8s.io/kubernetes/pkg/kubelet/events"
-	"k8s.io/kubernetes/pkg/kubelet/qos"
 	"k8s.io/kubernetes/pkg/kubelet/types"
 	"k8s.io/kubernetes/pkg/kubelet/util/format"
 	"k8s.io/kubernetes/pkg/util/selinux"
@ -214,7 +213,11 @@ func (m *kubeGenericRuntimeManager) generateContainerConfig(container *v1.Contai
 		Stdin:       container.Stdin,
 		StdinOnce:   container.StdinOnce,
 		Tty:         container.TTY,
-		Linux:       m.generateLinuxContainerConfig(container, pod, uid, username),
+	}
+
+	// set platform specific configurations.
+	if err := m.applyPlatformSpecificContainerConfig(config, container, pod, uid, username); err != nil {
+		return nil, err
 	}

 	// set environment variables
@ -231,49 +234,6 @@ func (m *kubeGenericRuntimeManager) generateContainerConfig(container *v1.Contai
 	return config, nil
 }

-// generateLinuxContainerConfig generates linux container config for kubelet runtime v1.
-func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *v1.Container, pod *v1.Pod, uid *int64, username string) *runtimeapi.LinuxContainerConfig {
-	lc := &runtimeapi.LinuxContainerConfig{
-		Resources:       &runtimeapi.LinuxContainerResources{},
-		SecurityContext: m.determineEffectiveSecurityContext(pod, container, uid, username),
-	}
-
-	// set linux container resources
-	var cpuShares int64
-	cpuRequest := container.Resources.Requests.Cpu()
-	cpuLimit := container.Resources.Limits.Cpu()
-	memoryLimit := container.Resources.Limits.Memory().Value()
-	oomScoreAdj := int64(qos.GetContainerOOMScoreAdjust(pod, container,
-		int64(m.machineInfo.MemoryCapacity)))
-	// If request is not specified, but limit is, we want request to default to limit.
-	// API server does this for new containers, but we repeat this logic in Kubelet
-	// for containers running on existing Kubernetes clusters.
-	if cpuRequest.IsZero() && !cpuLimit.IsZero() {
-		cpuShares = milliCPUToShares(cpuLimit.MilliValue())
-	} else {
-		// if cpuRequest.Amount is nil, then milliCPUToShares will return the minimal number
-		// of CPU shares.
-		cpuShares = milliCPUToShares(cpuRequest.MilliValue())
-	}
-	lc.Resources.CpuShares = cpuShares
-	if memoryLimit != 0 {
-		lc.Resources.MemoryLimitInBytes = memoryLimit
-	}
-	// Set OOM score of the container based on qos policy. Processes in lower-priority pods should
-	// be killed first if the system runs out of memory.
-	lc.Resources.OomScoreAdj = oomScoreAdj
-
-	if m.cpuCFSQuota {
-		// if cpuLimit.Amount is nil, then the appropriate default value is returned
-		// to allow full usage of cpu resource.
-		cpuQuota, cpuPeriod := milliCPUToQuota(cpuLimit.MilliValue())
-		lc.Resources.CpuQuota = cpuQuota
-		lc.Resources.CpuPeriod = cpuPeriod
-	}
-
-	return lc
-}
-
 // makeDevices generates container devices for kubelet runtime v1.
 func makeDevices(opts *kubecontainer.RunContainerOptions) []*runtimeapi.Device {
 	devices := make([]*runtimeapi.Device, len(opts.Devices))
--- a/pkg/kubelet/kuberuntime/kuberuntime_container_linux.go
+++ b/pkg/kubelet/kuberuntime/kuberuntime_container_linux.go
@ -0,0 +1,74 @@
+// +build linux
+
+/*
+Copyright 2018 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package kuberuntime
+
+import (
+	"k8s.io/api/core/v1"
+	runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2"
+	"k8s.io/kubernetes/pkg/kubelet/qos"
+)
+
+// applyPlatformSpecificContainerConfig applies platform specific configurations to runtimeapi.ContainerConfig.
+func (m *kubeGenericRuntimeManager) applyPlatformSpecificContainerConfig(config *runtimeapi.ContainerConfig, container *v1.Container, pod *v1.Pod, uid *int64, username string) error {
+	config.Linux = m.generateLinuxContainerConfig(container, pod, uid, username)
+	return nil
+}
+
+// generateLinuxContainerConfig generates linux container config for kubelet runtime v1.
+func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *v1.Container, pod *v1.Pod, uid *int64, username string) *runtimeapi.LinuxContainerConfig {
+	lc := &runtimeapi.LinuxContainerConfig{
+		Resources:       &runtimeapi.LinuxContainerResources{},
+		SecurityContext: m.determineEffectiveSecurityContext(pod, container, uid, username),
+	}
+
+	// set linux container resources
+	var cpuShares int64
+	cpuRequest := container.Resources.Requests.Cpu()
+	cpuLimit := container.Resources.Limits.Cpu()
+	memoryLimit := container.Resources.Limits.Memory().Value()
+	oomScoreAdj := int64(qos.GetContainerOOMScoreAdjust(pod, container,
+		int64(m.machineInfo.MemoryCapacity)))
+	// If request is not specified, but limit is, we want request to default to limit.
+	// API server does this for new containers, but we repeat this logic in Kubelet
+	// for containers running on existing Kubernetes clusters.
+	if cpuRequest.IsZero() && !cpuLimit.IsZero() {
+		cpuShares = milliCPUToShares(cpuLimit.MilliValue())
+	} else {
+		// if cpuRequest.Amount is nil, then milliCPUToShares will return the minimal number
+		// of CPU shares.
+		cpuShares = milliCPUToShares(cpuRequest.MilliValue())
+	}
+	lc.Resources.CpuShares = cpuShares
+	if memoryLimit != 0 {
+		lc.Resources.MemoryLimitInBytes = memoryLimit
+	}
+	// Set OOM score of the container based on qos policy. Processes in lower-priority pods should
+	// be killed first if the system runs out of memory.
+	lc.Resources.OomScoreAdj = oomScoreAdj
+
+	if m.cpuCFSQuota {
+		// if cpuLimit.Amount is nil, then the appropriate default value is returned
+		// to allow full usage of cpu resource.
+		cpuQuota, cpuPeriod := milliCPUToQuota(cpuLimit.MilliValue())
+		lc.Resources.CpuQuota = cpuQuota
+		lc.Resources.CpuPeriod = cpuPeriod
+	}
+
+	return lc
+}
--- a/pkg/kubelet/kuberuntime/kuberuntime_container_unsupported.go
+++ b/pkg/kubelet/kuberuntime/kuberuntime_container_unsupported.go
@ -0,0 +1,29 @@
+// +build !linux,!windows
+
+/*
+Copyright 2018 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package kuberuntime
+
+import (
+	"k8s.io/api/core/v1"
+	runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2"
+)
+
+// applyPlatformSpecificContainerConfig applies platform specific configurations to runtimeapi.ContainerConfig.
+func (m *kubeGenericRuntimeManager) applyPlatformSpecificContainerConfig(config *runtimeapi.ContainerConfig, container *v1.Container, pod *v1.Pod, uid *int64, username string) error {
+	return nil
+}
--- a/pkg/kubelet/kuberuntime/kuberuntime_container_windows.go
+++ b/pkg/kubelet/kuberuntime/kuberuntime_container_windows.go
@ -0,0 +1,81 @@
+// +build windows
+
+/*
+Copyright 2018 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package kuberuntime
+
+import (
+	"github.com/docker/docker/pkg/sysinfo"
+
+	"k8s.io/api/core/v1"
+	kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
+	runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2"
+)
+
+// applyPlatformSpecificContainerConfig applies platform specific configurations to runtimeapi.ContainerConfig.
+func (m *kubeGenericRuntimeManager) applyPlatformSpecificContainerConfig(config *runtimeapi.ContainerConfig, container *v1.Container, pod *v1.Pod, uid *int64, username string) error {
+	config.Windows = m.generateWindowsContainerConfig(container, pod, uid, username)
+	return nil
+}
+
+// generateWindowsContainerConfig generates windows container config for kubelet runtime v1.
+// Refer https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node/cri-windows.md.
+func (m *kubeGenericRuntimeManager) generateWindowsContainerConfig(container *v1.Container, pod *v1.Pod, uid *int64, username string) *runtimeapi.WindowsContainerConfig {
+	wc := &runtimeapi.WindowsContainerConfig{
+		Resources: &runtimeapi.WindowsContainerResources{},
+	}
+
+	cpuRequest := container.Resources.Requests.Cpu()
+	cpuLimit := container.Resources.Limits.Cpu()
+	isolatedByHyperv := kubeletapis.ShouldIsolatedByHyperV(pod.Annotations)
+	if !cpuLimit.IsZero() {
+		// Note that sysinfo.NumCPU() is limited to 64 CPUs on Windows due to Processor Groups,
+		// as only 64 processors are available for execution by a given process. This causes
+		// some oddities on systems with more than 64 processors.
+		// Refer https://msdn.microsoft.com/en-us/library/windows/desktop/dd405503(v=vs.85).aspx.
+		cpuMaximum := 10000 * cpuLimit.MilliValue() / int64(sysinfo.NumCPU()) / 1000
+		if isolatedByHyperv {
+			cpuCount := int64(cpuLimit.MilliValue()+999) / 1000
+			wc.Resources.CpuCount = cpuCount
+
+			if cpuCount != 0 {
+				cpuMaximum = cpuLimit.MilliValue() / cpuCount * 10000 / 1000
+			}
+		}
+		// ensure cpuMaximum is in range [1, 10000].
+		if cpuMaximum < 1 {
+			cpuMaximum = 1
+		} else if cpuMaximum > 10000 {
+			cpuMaximum = 10000
+		}
+
+		wc.Resources.CpuMaximum = cpuMaximum
+	}
+
+	cpuShares := milliCPUToShares(cpuLimit.MilliValue(), isolatedByHyperv)
+	if cpuShares == 0 {
+		cpuShares = milliCPUToShares(cpuRequest.MilliValue(), isolatedByHyperv)
+	}
+	wc.Resources.CpuShares = cpuShares
+
+	memoryLimit := container.Resources.Limits.Memory().Value()
+	if memoryLimit != 0 {
+		wc.Resources.MemoryLimitInBytes = memoryLimit
+	}
+
+	return wc
+}