mirror of
				https://github.com/k3s-io/kubernetes.git
				synced 2025-10-31 13:50:01 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			251 lines
		
	
	
		
			9.0 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			251 lines
		
	
	
		
			9.0 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| /*
 | |
| Copyright 2017 The Kubernetes Authors.
 | |
| 
 | |
| Licensed under the Apache License, Version 2.0 (the "License");
 | |
| you may not use this file except in compliance with the License.
 | |
| You may obtain a copy of the License at
 | |
| 
 | |
|     http://www.apache.org/licenses/LICENSE-2.0
 | |
| 
 | |
| Unless required by applicable law or agreed to in writing, software
 | |
| distributed under the License is distributed on an "AS IS" BASIS,
 | |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| See the License for the specific language governing permissions and
 | |
| limitations under the License.
 | |
| */
 | |
| 
 | |
| package util
 | |
| 
 | |
| import (
 | |
| 	"fmt"
 | |
| 	"strconv"
 | |
| 
 | |
| 	apps "k8s.io/api/apps/v1"
 | |
| 	"k8s.io/api/core/v1"
 | |
| 	extensions "k8s.io/api/extensions/v1beta1"
 | |
| 	apiequality "k8s.io/apimachinery/pkg/api/equality"
 | |
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 | |
| 	utilfeature "k8s.io/apiserver/pkg/util/feature"
 | |
| 	podutil "k8s.io/kubernetes/pkg/api/v1/pod"
 | |
| 	v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
 | |
| 	"k8s.io/kubernetes/pkg/features"
 | |
| 	kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
 | |
| 	"k8s.io/kubernetes/pkg/scheduler/algorithm"
 | |
| )
 | |
| 
 | |
| // GetTemplateGeneration gets the template generation associated with a v1.DaemonSet by extracting it from the
 | |
| // deprecated annotation. If no annotation is found nil is returned. If the annotation is found and fails to parse
 | |
| // nil is returned with an error. If the generation can be parsed from the annotation, a pointer to the parsed int64
 | |
| // value is returned.
 | |
| func GetTemplateGeneration(ds *apps.DaemonSet) (*int64, error) {
 | |
| 	annotation, found := ds.Annotations[apps.DeprecatedTemplateGeneration]
 | |
| 	if !found {
 | |
| 		return nil, nil
 | |
| 	}
 | |
| 	generation, err := strconv.ParseInt(annotation, 10, 64)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	return &generation, nil
 | |
| }
 | |
| 
 | |
| // CreatePodTemplate returns copy of provided template with additional
 | |
| // label which contains templateGeneration (for backward compatibility),
 | |
| // hash of provided template and sets default daemon tolerations.
 | |
| func CreatePodTemplate(template v1.PodTemplateSpec, generation *int64, hash string) v1.PodTemplateSpec {
 | |
| 	newTemplate := *template.DeepCopy()
 | |
| 	// DaemonSet pods shouldn't be deleted by NodeController in case of node problems.
 | |
| 	// Add infinite toleration for taint notReady:NoExecute here
 | |
| 	// to survive taint-based eviction enforced by NodeController
 | |
| 	// when node turns not ready.
 | |
| 	v1helper.AddOrUpdateTolerationInPodSpec(&newTemplate.Spec, &v1.Toleration{
 | |
| 		Key:      algorithm.TaintNodeNotReady,
 | |
| 		Operator: v1.TolerationOpExists,
 | |
| 		Effect:   v1.TaintEffectNoExecute,
 | |
| 	})
 | |
| 
 | |
| 	// DaemonSet pods shouldn't be deleted by NodeController in case of node problems.
 | |
| 	// Add infinite toleration for taint unreachable:NoExecute here
 | |
| 	// to survive taint-based eviction enforced by NodeController
 | |
| 	// when node turns unreachable.
 | |
| 	v1helper.AddOrUpdateTolerationInPodSpec(&newTemplate.Spec, &v1.Toleration{
 | |
| 		Key:      algorithm.TaintNodeUnreachable,
 | |
| 		Operator: v1.TolerationOpExists,
 | |
| 		Effect:   v1.TaintEffectNoExecute,
 | |
| 	})
 | |
| 
 | |
| 	// According to TaintNodesByCondition feature, all DaemonSet pods should tolerate
 | |
| 	// MemoryPressure and DisPressure taints, and the critical pods should tolerate
 | |
| 	// OutOfDisk taint.
 | |
| 	v1helper.AddOrUpdateTolerationInPodSpec(&newTemplate.Spec, &v1.Toleration{
 | |
| 		Key:      algorithm.TaintNodeDiskPressure,
 | |
| 		Operator: v1.TolerationOpExists,
 | |
| 		Effect:   v1.TaintEffectNoSchedule,
 | |
| 	})
 | |
| 
 | |
| 	v1helper.AddOrUpdateTolerationInPodSpec(&newTemplate.Spec, &v1.Toleration{
 | |
| 		Key:      algorithm.TaintNodeMemoryPressure,
 | |
| 		Operator: v1.TolerationOpExists,
 | |
| 		Effect:   v1.TaintEffectNoSchedule,
 | |
| 	})
 | |
| 
 | |
| 	// TODO(#48843) OutOfDisk taints will be removed in 1.10
 | |
| 	if utilfeature.DefaultFeatureGate.Enabled(features.ExperimentalCriticalPodAnnotation) &&
 | |
| 		kubelettypes.IsCritical(newTemplate.Namespace, newTemplate.Annotations) {
 | |
| 		v1helper.AddOrUpdateTolerationInPodSpec(&newTemplate.Spec, &v1.Toleration{
 | |
| 			Key:      algorithm.TaintNodeOutOfDisk,
 | |
| 			Operator: v1.TolerationOpExists,
 | |
| 			Effect:   v1.TaintEffectNoExecute,
 | |
| 		})
 | |
| 	}
 | |
| 
 | |
| 	if newTemplate.ObjectMeta.Labels == nil {
 | |
| 		newTemplate.ObjectMeta.Labels = make(map[string]string)
 | |
| 	}
 | |
| 	if generation != nil {
 | |
| 		newTemplate.ObjectMeta.Labels[extensions.DaemonSetTemplateGenerationKey] = fmt.Sprint(*generation)
 | |
| 	}
 | |
| 	// TODO: do we need to validate if the DaemonSet is RollingUpdate or not?
 | |
| 	if len(hash) > 0 {
 | |
| 		newTemplate.ObjectMeta.Labels[extensions.DefaultDaemonSetUniqueLabelKey] = hash
 | |
| 	}
 | |
| 	return newTemplate
 | |
| }
 | |
| 
 | |
| // IsPodUpdated checks if pod contains label value that either matches templateGeneration or hash
 | |
| func IsPodUpdated(pod *v1.Pod, hash string, dsTemplateGeneration *int64) bool {
 | |
| 	// Compare with hash to see if the pod is updated, need to maintain backward compatibility of templateGeneration
 | |
| 	templateMatches := dsTemplateGeneration != nil &&
 | |
| 		pod.Labels[extensions.DaemonSetTemplateGenerationKey] == fmt.Sprint(dsTemplateGeneration)
 | |
| 	hashMatches := len(hash) > 0 && pod.Labels[extensions.DefaultDaemonSetUniqueLabelKey] == hash
 | |
| 	return hashMatches || templateMatches
 | |
| }
 | |
| 
 | |
| // SplitByAvailablePods splits provided daemon set pods by availability
 | |
| func SplitByAvailablePods(minReadySeconds int32, pods []*v1.Pod) ([]*v1.Pod, []*v1.Pod) {
 | |
| 	unavailablePods := []*v1.Pod{}
 | |
| 	availablePods := []*v1.Pod{}
 | |
| 	for _, pod := range pods {
 | |
| 		if podutil.IsPodAvailable(pod, minReadySeconds, metav1.Now()) {
 | |
| 			availablePods = append(availablePods, pod)
 | |
| 		} else {
 | |
| 			unavailablePods = append(unavailablePods, pod)
 | |
| 		}
 | |
| 	}
 | |
| 	return availablePods, unavailablePods
 | |
| }
 | |
| 
 | |
| // ReplaceDaemonSetPodNodeNameNodeAffinity replaces the RequiredDuringSchedulingIgnoredDuringExecution
 | |
| // NodeAffinity of the given affinity with a new NodeAffinity that selects the given nodeName.
 | |
| // Note that this function assumes that no NodeAffinity conflicts with the selected nodeName.
 | |
| func ReplaceDaemonSetPodNodeNameNodeAffinity(affinity *v1.Affinity, nodename string) *v1.Affinity {
 | |
| 	nodeSelReq := v1.NodeSelectorRequirement{
 | |
| 		Key:      algorithm.NodeFieldSelectorKeyNodeName,
 | |
| 		Operator: v1.NodeSelectorOpIn,
 | |
| 		Values:   []string{nodename},
 | |
| 	}
 | |
| 
 | |
| 	nodeSelector := &v1.NodeSelector{
 | |
| 		NodeSelectorTerms: []v1.NodeSelectorTerm{
 | |
| 			{
 | |
| 				MatchFields: []v1.NodeSelectorRequirement{nodeSelReq},
 | |
| 			},
 | |
| 		},
 | |
| 	}
 | |
| 
 | |
| 	if affinity == nil {
 | |
| 		return &v1.Affinity{
 | |
| 			NodeAffinity: &v1.NodeAffinity{
 | |
| 				RequiredDuringSchedulingIgnoredDuringExecution: nodeSelector,
 | |
| 			},
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if affinity.NodeAffinity == nil {
 | |
| 		affinity.NodeAffinity = &v1.NodeAffinity{
 | |
| 			RequiredDuringSchedulingIgnoredDuringExecution: nodeSelector,
 | |
| 		}
 | |
| 		return affinity
 | |
| 	}
 | |
| 
 | |
| 	nodeAffinity := affinity.NodeAffinity
 | |
| 
 | |
| 	if nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil {
 | |
| 		nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution = nodeSelector
 | |
| 		return affinity
 | |
| 	}
 | |
| 
 | |
| 	// Replace node selector with the new one.
 | |
| 	nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms = []v1.NodeSelectorTerm{
 | |
| 		{
 | |
| 			MatchFields: []v1.NodeSelectorRequirement{nodeSelReq},
 | |
| 		},
 | |
| 	}
 | |
| 
 | |
| 	return affinity
 | |
| }
 | |
| 
 | |
| // AppendNoScheduleTolerationIfNotExist appends unschedulable toleration to `.spec` if not exist; otherwise,
 | |
| // no changes to `.spec.tolerations`.
 | |
| func AppendNoScheduleTolerationIfNotExist(tolerations []v1.Toleration) []v1.Toleration {
 | |
| 	unschedulableToleration := v1.Toleration{
 | |
| 		Key:      algorithm.TaintNodeUnschedulable,
 | |
| 		Operator: v1.TolerationOpExists,
 | |
| 		Effect:   v1.TaintEffectNoSchedule,
 | |
| 	}
 | |
| 
 | |
| 	unschedulableTaintExist := false
 | |
| 
 | |
| 	for _, t := range tolerations {
 | |
| 		if apiequality.Semantic.DeepEqual(t, unschedulableToleration) {
 | |
| 			unschedulableTaintExist = true
 | |
| 			break
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if !unschedulableTaintExist {
 | |
| 		tolerations = append(tolerations, unschedulableToleration)
 | |
| 	}
 | |
| 
 | |
| 	return tolerations
 | |
| }
 | |
| 
 | |
| // GetTargetNodeName get the target node name of DaemonSet pods. If `.spec.NodeName` is not empty (nil),
 | |
| // return `.spec.NodeName`; otherwise, retrieve node name of pending pods from NodeAffinity. Return error
 | |
| // if failed to retrieve node name from `.spec.NodeName` and NodeAffinity.
 | |
| func GetTargetNodeName(pod *v1.Pod) (string, error) {
 | |
| 	if len(pod.Spec.NodeName) != 0 {
 | |
| 		return pod.Spec.NodeName, nil
 | |
| 	}
 | |
| 
 | |
| 	// If ScheduleDaemonSetPods was enabled before, retrieve node name of unscheduled pods from NodeAffinity
 | |
| 	if pod.Spec.Affinity == nil ||
 | |
| 		pod.Spec.Affinity.NodeAffinity == nil ||
 | |
| 		pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil {
 | |
| 		return "", fmt.Errorf("no spec.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution for pod %s/%s",
 | |
| 			pod.Namespace, pod.Name)
 | |
| 	}
 | |
| 
 | |
| 	terms := pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms
 | |
| 	if len(terms) < 1 {
 | |
| 		return "", fmt.Errorf("no nodeSelectorTerms in requiredDuringSchedulingIgnoredDuringExecution of pod %s/%s",
 | |
| 			pod.Namespace, pod.Name)
 | |
| 	}
 | |
| 
 | |
| 	for _, term := range terms {
 | |
| 		for _, exp := range term.MatchFields {
 | |
| 			if exp.Key == algorithm.NodeFieldSelectorKeyNodeName &&
 | |
| 				exp.Operator == v1.NodeSelectorOpIn {
 | |
| 				if len(exp.Values) != 1 {
 | |
| 					return "", fmt.Errorf("the matchFields value of '%s' is not unique for pod %s/%s",
 | |
| 						algorithm.NodeFieldSelectorKeyNodeName, pod.Namespace, pod.Name)
 | |
| 				}
 | |
| 
 | |
| 				return exp.Values[0], nil
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return "", fmt.Errorf("no node name found for pod %s/%s", pod.Namespace, pod.Name)
 | |
| }
 |