diff --git a/cmd/kubeadm/app/componentconfigs/validation_test.go b/cmd/kubeadm/app/componentconfigs/validation_test.go index 3e70abc6b31..1a9caea6e78 100644 --- a/cmd/kubeadm/app/componentconfigs/validation_test.go +++ b/cmd/kubeadm/app/componentconfigs/validation_test.go @@ -315,6 +315,7 @@ func TestValidateKubeletConfiguration(t *testing.T) { RegistryPullQPS: 5, HairpinMode: "promiscuous-bridge", NodeLeaseDurationSeconds: 40, + TopologyManagerPolicy: "none", }, }, }, @@ -346,6 +347,7 @@ func TestValidateKubeletConfiguration(t *testing.T) { ReadOnlyPort: -10, RegistryBurst: -10, RegistryPullQPS: -10, + TopologyManagerPolicy: "", }, }, }, diff --git a/cmd/kubelet/app/options/options.go b/cmd/kubelet/app/options/options.go index d260d2df67f..3b49f837c39 100644 --- a/cmd/kubelet/app/options/options.go +++ b/cmd/kubelet/app/options/options.go @@ -522,6 +522,7 @@ func AddKubeletConfigFlags(mainfs *pflag.FlagSet, c *kubeletconfig.KubeletConfig fs.StringVar(&c.CPUManagerPolicy, "cpu-manager-policy", c.CPUManagerPolicy, "CPU Manager policy to use. Possible values: 'none', 'static'. Default: 'none'") fs.DurationVar(&c.CPUManagerReconcilePeriod.Duration, "cpu-manager-reconcile-period", c.CPUManagerReconcilePeriod.Duration, " CPU Manager reconciliation period. Examples: '10s', or '1m'. If not supplied, defaults to `NodeStatusUpdateFrequency`") fs.Var(cliflag.NewMapStringString(&c.QOSReserved), "qos-reserved", " A set of ResourceName=Percentage (e.g. memory=50%) pairs that describe how pod resource requests are reserved at the QoS level. Currently only memory is supported. Requires the QOSReserved feature gate to be enabled.") + fs.StringVar(&c.TopologyManagerPolicy, "topology-manager-policy", c.TopologyManagerPolicy, "Topology Manager policy to use. Possible values: 'none', 'preferred', 'strict'.") fs.DurationVar(&c.RuntimeRequestTimeout.Duration, "runtime-request-timeout", c.RuntimeRequestTimeout.Duration, "Timeout of all runtime requests except long running request - pull, logs, exec and attach. When timeout exceeded, kubelet will cancel the request, throw out an error and retry later.") fs.StringVar(&c.HairpinMode, "hairpin-mode", c.HairpinMode, "How should the kubelet setup hairpin NAT. This allows endpoints of a Service to loadbalance back to themselves if they should try to access their own Service. Valid values are \"promiscuous-bridge\", \"hairpin-veth\" and \"none\".") fs.Int32Var(&c.MaxPods, "max-pods", c.MaxPods, "Number of Pods that can run on this Kubelet.") diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index b676ff126f8..7cd727ea2e0 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -162,6 +162,12 @@ const ( // Enable nodes to change CPUCFSQuotaPeriod CPUCFSQuotaPeriod featuregate.Feature = "CustomCPUCFSQuotaPeriod" + // owner: @lmdaly + // alpha: v1.16 + // + // Enable resource managers to make NUMA aligned decisions + TopologyManager featuregate.Feature = "TopologyManager" + // owner: @sjenning // beta: v1.11 // @@ -485,6 +491,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS AttachVolumeLimit: {Default: true, PreRelease: featuregate.Beta}, CPUManager: {Default: true, PreRelease: featuregate.Beta}, CPUCFSQuotaPeriod: {Default: false, PreRelease: featuregate.Alpha}, + TopologyManager: {Default: false, PreRelease: featuregate.Alpha}, ServiceNodeExclusion: {Default: false, PreRelease: featuregate.Alpha}, MountContainers: {Default: false, PreRelease: featuregate.Alpha}, CSIDriverRegistry: {Default: true, PreRelease: featuregate.Beta}, diff --git a/pkg/kubelet/apis/config/fuzzer/fuzzer.go b/pkg/kubelet/apis/config/fuzzer/fuzzer.go index 0384107cb3f..caab845f8e9 100644 --- a/pkg/kubelet/apis/config/fuzzer/fuzzer.go +++ b/pkg/kubelet/apis/config/fuzzer/fuzzer.go @@ -66,6 +66,7 @@ func Funcs(codecs runtimeserializer.CodecFactory) []interface{} { obj.NodeLeaseDurationSeconds = 40 obj.CPUManagerPolicy = "none" obj.CPUManagerReconcilePeriod = obj.NodeStatusUpdateFrequency + obj.TopologyManagerPolicy = kubeletconfig.NoneTopologyManagerPolicy obj.QOSReserved = map[string]string{ "memory": "50%", } diff --git a/pkg/kubelet/apis/config/helpers_test.go b/pkg/kubelet/apis/config/helpers_test.go index 76e71c74938..76d0eb97962 100644 --- a/pkg/kubelet/apis/config/helpers_test.go +++ b/pkg/kubelet/apis/config/helpers_test.go @@ -149,6 +149,7 @@ var ( "CPUCFSQuotaPeriod.Duration", "CPUManagerPolicy", "CPUManagerReconcilePeriod.Duration", + "TopologyManagerPolicy", "QOSReserved[*]", "CgroupDriver", "CgroupRoot", diff --git a/pkg/kubelet/apis/config/types.go b/pkg/kubelet/apis/config/types.go index e7d12ec3069..7ff18c62a0f 100644 --- a/pkg/kubelet/apis/config/types.go +++ b/pkg/kubelet/apis/config/types.go @@ -54,6 +54,15 @@ const ( // WatchChangeDetectionStrategy is a mode in which kubelet uses // watches to observe changes to objects that are in its interest. WatchChangeDetectionStrategy ResourceChangeDetectionStrategy = "Watch" + // StrictTopologyManagerPolicy is a mode in which kubelet only allows + // pods with NUMA alignment of CPU and device resources. + StrictTopologyManagerPolicy = "strict" + // PreferredTopologyManagerPolicy is a mode in which kubelet will favour + // pods with NUMA alignment of CPU and device resources. + PreferredTopologyManagerPolicy = "preferred" + // NoneTopologyManager Policy is a mode in which kubelet has no knowledge + // of NUMA alignment of a pod's CPU and device resources. + NoneTopologyManagerPolicy = "none" ) // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object @@ -197,6 +206,9 @@ type KubeletConfiguration struct { // CPU Manager reconciliation period. // Requires the CPUManager feature gate to be enabled. CPUManagerReconcilePeriod metav1.Duration + // TopologyManagerPolicy is the name of the policy to use. + // Policies other than "none" require the TopologyManager feature gate to be enabled. + TopologyManagerPolicy string // Map of QoS resource reservation percentages (memory only for now). // Requires the QOSReserved feature gate to be enabled. QOSReserved map[string]string diff --git a/pkg/kubelet/apis/config/v1beta1/defaults.go b/pkg/kubelet/apis/config/v1beta1/defaults.go index 8c1568e91e8..170b6ac8bda 100644 --- a/pkg/kubelet/apis/config/v1beta1/defaults.go +++ b/pkg/kubelet/apis/config/v1beta1/defaults.go @@ -149,6 +149,9 @@ func SetDefaults_KubeletConfiguration(obj *kubeletconfigv1beta1.KubeletConfigura // Keep the same as default NodeStatusUpdateFrequency obj.CPUManagerReconcilePeriod = metav1.Duration{Duration: 10 * time.Second} } + if obj.TopologyManagerPolicy == "" { + obj.TopologyManagerPolicy = kubeletconfigv1beta1.NoneTopologyManagerPolicy + } if obj.RuntimeRequestTimeout == zeroDuration { obj.RuntimeRequestTimeout = metav1.Duration{Duration: 2 * time.Minute} } diff --git a/pkg/kubelet/apis/config/v1beta1/zz_generated.conversion.go b/pkg/kubelet/apis/config/v1beta1/zz_generated.conversion.go index b2bbe42b3c2..cb761146ea9 100644 --- a/pkg/kubelet/apis/config/v1beta1/zz_generated.conversion.go +++ b/pkg/kubelet/apis/config/v1beta1/zz_generated.conversion.go @@ -270,6 +270,7 @@ func autoConvert_v1beta1_KubeletConfiguration_To_config_KubeletConfiguration(in out.CgroupDriver = in.CgroupDriver out.CPUManagerPolicy = in.CPUManagerPolicy out.CPUManagerReconcilePeriod = in.CPUManagerReconcilePeriod + out.TopologyManagerPolicy = in.TopologyManagerPolicy out.QOSReserved = *(*map[string]string)(unsafe.Pointer(&in.QOSReserved)) out.RuntimeRequestTimeout = in.RuntimeRequestTimeout out.HairpinMode = in.HairpinMode @@ -401,6 +402,7 @@ func autoConvert_config_KubeletConfiguration_To_v1beta1_KubeletConfiguration(in out.CgroupDriver = in.CgroupDriver out.CPUManagerPolicy = in.CPUManagerPolicy out.CPUManagerReconcilePeriod = in.CPUManagerReconcilePeriod + out.TopologyManagerPolicy = in.TopologyManagerPolicy out.QOSReserved = *(*map[string]string)(unsafe.Pointer(&in.QOSReserved)) out.RuntimeRequestTimeout = in.RuntimeRequestTimeout out.HairpinMode = in.HairpinMode diff --git a/pkg/kubelet/apis/config/validation/validation.go b/pkg/kubelet/apis/config/validation/validation.go index fdafba39079..31c6cedda28 100644 --- a/pkg/kubelet/apis/config/validation/validation.go +++ b/pkg/kubelet/apis/config/validation/validation.go @@ -111,6 +111,9 @@ func ValidateKubeletConfiguration(kc *kubeletconfig.KubeletConfiguration) error if kc.ServerTLSBootstrap && !localFeatureGate.Enabled(features.RotateKubeletServerCertificate) { allErrors = append(allErrors, fmt.Errorf("invalid configuration: ServerTLSBootstrap %v requires feature gate RotateKubeletServerCertificate", kc.ServerTLSBootstrap)) } + if kc.TopologyManagerPolicy != kubeletconfig.NoneTopologyManagerPolicy && !localFeatureGate.Enabled(features.TopologyManager) { + allErrors = append(allErrors, fmt.Errorf("invalid configuration: TopologyManager %v requires feature gate TopologyManager", kc.TopologyManagerPolicy)) + } for _, val := range kc.EnforceNodeAllocatable { switch val { case kubetypes.NodeAllocatableEnforcementKey: diff --git a/pkg/kubelet/apis/config/validation/validation_test.go b/pkg/kubelet/apis/config/validation/validation_test.go index fdb435c43a1..eb48c1e0514 100644 --- a/pkg/kubelet/apis/config/validation/validation_test.go +++ b/pkg/kubelet/apis/config/validation/validation_test.go @@ -53,6 +53,7 @@ func TestValidateKubeletConfiguration(t *testing.T) { HairpinMode: kubeletconfig.PromiscuousBridge, NodeLeaseDurationSeconds: 1, CPUCFSQuotaPeriod: metav1.Duration{Duration: 100 * time.Millisecond}, + TopologyManagerPolicy: "none", } if allErrors := ValidateKubeletConfiguration(successCase); allErrors != nil { t.Errorf("expect no errors, got %v", allErrors) @@ -83,8 +84,9 @@ func TestValidateKubeletConfiguration(t *testing.T) { HairpinMode: "foo", NodeLeaseDurationSeconds: -1, CPUCFSQuotaPeriod: metav1.Duration{Duration: 0}, + TopologyManagerPolicy: "", } - const numErrs = 25 + const numErrs = 26 if allErrors := ValidateKubeletConfiguration(errorCase); len(allErrors.(utilerrors.Aggregate).Errors()) != numErrs { t.Errorf("expect %d errors, got %v", numErrs, len(allErrors.(utilerrors.Aggregate).Errors())) } diff --git a/staging/src/k8s.io/kubelet/config/v1beta1/types.go b/staging/src/k8s.io/kubelet/config/v1beta1/types.go index 54e166f7685..7bca679b5e0 100644 --- a/staging/src/k8s.io/kubelet/config/v1beta1/types.go +++ b/staging/src/k8s.io/kubelet/config/v1beta1/types.go @@ -54,6 +54,15 @@ const ( // WatchChangeDetectionStrategy is a mode in which kubelet uses // watches to observe changes to objects that are in its interest. WatchChangeDetectionStrategy ResourceChangeDetectionStrategy = "Watch" + // StrictTopologyManagerPolicy is a mode in which kubelet only allows + // pods with NUMA alignment of CPU and device resources. + StrictTopologyManagerPolicy = "strict" + // PreferredTopologyManagerPolicy is a mode in which kubelet will favour + // pods with NUMA alignment of CPU and device resources. + PreferredTopologyManagerPolicy = "preferred" + // NoneTopologyManager Policy is a mode in which kubelet has no knowledge + // of NUMA alignment of a pod's CPU and device resources. + NoneTopologyManagerPolicy = "none" ) // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object @@ -409,6 +418,13 @@ type KubeletConfiguration struct { // Default: "10s" // +optional CPUManagerReconcilePeriod metav1.Duration `json:"cpuManagerReconcilePeriod,omitempty"` + // TopologyManagerPolicy is the name of the policy to use. + // Policies other than "none" require the TopologyManager feature gate to be enabled. + // Dynamic Kubelet Config (beta): This field should not be updated without a full node + // reboot. It is safest to keep this value the same as the local config. + // Default: "none" + // +optional + TopologyManagerPolicy string `json:"topologyManagerPolicy,omitempty"` // qosReserved is a set of resource name to percentage pairs that specify // the minimum percentage of a resource reserved for exclusive use by the // guaranteed QoS tier.