mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-31 15:25:57 +00:00
Merge pull request #78170 from DaiHao/daemon
ignore failed pods to not block rolling update daemonset
This commit is contained in:
commit
bbdd0557b6
@ -862,13 +862,12 @@ func (dsc *DaemonSetsController) resolveControllerRef(namespace string, controll
|
||||
// podsShouldBeOnNode figures out the DaemonSet pods to be created and deleted on the given node:
|
||||
// - nodesNeedingDaemonPods: the pods need to start on the node
|
||||
// - podsToDelete: the Pods need to be deleted on the node
|
||||
// - failedPodsObserved: the number of failed pods on node
|
||||
// - err: unexpected error
|
||||
func (dsc *DaemonSetsController) podsShouldBeOnNode(
|
||||
node *v1.Node,
|
||||
nodeToDaemonPods map[string][]*v1.Pod,
|
||||
ds *apps.DaemonSet,
|
||||
) (nodesNeedingDaemonPods, podsToDelete []string, failedPodsObserved int, err error) {
|
||||
) (nodesNeedingDaemonPods, podsToDelete []string, err error) {
|
||||
|
||||
wantToRun, shouldSchedule, shouldContinueRunning, err := dsc.nodeShouldRunDaemonPod(node, ds)
|
||||
if err != nil {
|
||||
@ -900,8 +899,6 @@ func (dsc *DaemonSetsController) podsShouldBeOnNode(
|
||||
continue
|
||||
}
|
||||
if pod.Status.Phase == v1.PodFailed {
|
||||
failedPodsObserved++
|
||||
|
||||
// This is a critical place where DS is often fighting with kubelet that rejects pods.
|
||||
// We need to avoid hot looping and backoff.
|
||||
backoffKey := failedPodsBackoffKey(ds, node.Name)
|
||||
@ -945,7 +942,7 @@ func (dsc *DaemonSetsController) podsShouldBeOnNode(
|
||||
}
|
||||
}
|
||||
|
||||
return nodesNeedingDaemonPods, podsToDelete, failedPodsObserved, nil
|
||||
return nodesNeedingDaemonPods, podsToDelete, nil
|
||||
}
|
||||
|
||||
// manage manages the scheduling and running of Pods of ds on nodes.
|
||||
@ -962,9 +959,8 @@ func (dsc *DaemonSetsController) manage(ds *apps.DaemonSet, nodeList []*v1.Node,
|
||||
// For each node, if the node is running the daemon pod but isn't supposed to, kill the daemon
|
||||
// pod. If the node is supposed to run the daemon pod, but isn't, create the daemon pod on the node.
|
||||
var nodesNeedingDaemonPods, podsToDelete []string
|
||||
var failedPodsObserved int
|
||||
for _, node := range nodeList {
|
||||
nodesNeedingDaemonPodsOnNode, podsToDeleteOnNode, failedPodsObservedOnNode, err := dsc.podsShouldBeOnNode(
|
||||
nodesNeedingDaemonPodsOnNode, podsToDeleteOnNode, err := dsc.podsShouldBeOnNode(
|
||||
node, nodeToDaemonPods, ds)
|
||||
|
||||
if err != nil {
|
||||
@ -973,7 +969,6 @@ func (dsc *DaemonSetsController) manage(ds *apps.DaemonSet, nodeList []*v1.Node,
|
||||
|
||||
nodesNeedingDaemonPods = append(nodesNeedingDaemonPods, nodesNeedingDaemonPodsOnNode...)
|
||||
podsToDelete = append(podsToDelete, podsToDeleteOnNode...)
|
||||
failedPodsObserved += failedPodsObservedOnNode
|
||||
}
|
||||
|
||||
// Remove unscheduled pods assigned to not existing nodes when daemonset pods are scheduled by scheduler.
|
||||
@ -987,11 +982,6 @@ func (dsc *DaemonSetsController) manage(ds *apps.DaemonSet, nodeList []*v1.Node,
|
||||
return err
|
||||
}
|
||||
|
||||
// Throw an error when the daemon pods fail, to use ratelimiter to prevent kill-recreate hot loop
|
||||
if failedPodsObserved > 0 {
|
||||
return fmt.Errorf("deleted %d failed pods of DaemonSet %s/%s", failedPodsObserved, ds.Namespace, ds.Name)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user