mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-03 01:06:27 +00:00
ignore failed pods to not stuck rolling update daemonset
This commit is contained in:
parent
8211cabfb2
commit
166147d5c4
@ -862,13 +862,12 @@ func (dsc *DaemonSetsController) resolveControllerRef(namespace string, controll
|
|||||||
// podsShouldBeOnNode figures out the DaemonSet pods to be created and deleted on the given node:
|
// podsShouldBeOnNode figures out the DaemonSet pods to be created and deleted on the given node:
|
||||||
// - nodesNeedingDaemonPods: the pods need to start on the node
|
// - nodesNeedingDaemonPods: the pods need to start on the node
|
||||||
// - podsToDelete: the Pods need to be deleted on the node
|
// - podsToDelete: the Pods need to be deleted on the node
|
||||||
// - failedPodsObserved: the number of failed pods on node
|
|
||||||
// - err: unexpected error
|
// - err: unexpected error
|
||||||
func (dsc *DaemonSetsController) podsShouldBeOnNode(
|
func (dsc *DaemonSetsController) podsShouldBeOnNode(
|
||||||
node *v1.Node,
|
node *v1.Node,
|
||||||
nodeToDaemonPods map[string][]*v1.Pod,
|
nodeToDaemonPods map[string][]*v1.Pod,
|
||||||
ds *apps.DaemonSet,
|
ds *apps.DaemonSet,
|
||||||
) (nodesNeedingDaemonPods, podsToDelete []string, failedPodsObserved int, err error) {
|
) (nodesNeedingDaemonPods, podsToDelete []string, err error) {
|
||||||
|
|
||||||
wantToRun, shouldSchedule, shouldContinueRunning, err := dsc.nodeShouldRunDaemonPod(node, ds)
|
wantToRun, shouldSchedule, shouldContinueRunning, err := dsc.nodeShouldRunDaemonPod(node, ds)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -896,8 +895,6 @@ func (dsc *DaemonSetsController) podsShouldBeOnNode(
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if pod.Status.Phase == v1.PodFailed {
|
if pod.Status.Phase == v1.PodFailed {
|
||||||
failedPodsObserved++
|
|
||||||
|
|
||||||
// This is a critical place where DS is often fighting with kubelet that rejects pods.
|
// This is a critical place where DS is often fighting with kubelet that rejects pods.
|
||||||
// We need to avoid hot looping and backoff.
|
// We need to avoid hot looping and backoff.
|
||||||
backoffKey := failedPodsBackoffKey(ds, node.Name)
|
backoffKey := failedPodsBackoffKey(ds, node.Name)
|
||||||
@ -938,7 +935,7 @@ func (dsc *DaemonSetsController) podsShouldBeOnNode(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nodesNeedingDaemonPods, podsToDelete, failedPodsObserved, nil
|
return nodesNeedingDaemonPods, podsToDelete, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// manage manages the scheduling and running of Pods of ds on nodes.
|
// manage manages the scheduling and running of Pods of ds on nodes.
|
||||||
@ -955,9 +952,8 @@ func (dsc *DaemonSetsController) manage(ds *apps.DaemonSet, nodeList []*v1.Node,
|
|||||||
// For each node, if the node is running the daemon pod but isn't supposed to, kill the daemon
|
// For each node, if the node is running the daemon pod but isn't supposed to, kill the daemon
|
||||||
// pod. If the node is supposed to run the daemon pod, but isn't, create the daemon pod on the node.
|
// pod. If the node is supposed to run the daemon pod, but isn't, create the daemon pod on the node.
|
||||||
var nodesNeedingDaemonPods, podsToDelete []string
|
var nodesNeedingDaemonPods, podsToDelete []string
|
||||||
var failedPodsObserved int
|
|
||||||
for _, node := range nodeList {
|
for _, node := range nodeList {
|
||||||
nodesNeedingDaemonPodsOnNode, podsToDeleteOnNode, failedPodsObservedOnNode, err := dsc.podsShouldBeOnNode(
|
nodesNeedingDaemonPodsOnNode, podsToDeleteOnNode, err := dsc.podsShouldBeOnNode(
|
||||||
node, nodeToDaemonPods, ds)
|
node, nodeToDaemonPods, ds)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -966,7 +962,6 @@ func (dsc *DaemonSetsController) manage(ds *apps.DaemonSet, nodeList []*v1.Node,
|
|||||||
|
|
||||||
nodesNeedingDaemonPods = append(nodesNeedingDaemonPods, nodesNeedingDaemonPodsOnNode...)
|
nodesNeedingDaemonPods = append(nodesNeedingDaemonPods, nodesNeedingDaemonPodsOnNode...)
|
||||||
podsToDelete = append(podsToDelete, podsToDeleteOnNode...)
|
podsToDelete = append(podsToDelete, podsToDeleteOnNode...)
|
||||||
failedPodsObserved += failedPodsObservedOnNode
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove unscheduled pods assigned to not existing nodes when daemonset pods are scheduled by scheduler.
|
// Remove unscheduled pods assigned to not existing nodes when daemonset pods are scheduled by scheduler.
|
||||||
@ -980,11 +975,6 @@ func (dsc *DaemonSetsController) manage(ds *apps.DaemonSet, nodeList []*v1.Node,
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Throw an error when the daemon pods fail, to use ratelimiter to prevent kill-recreate hot loop
|
|
||||||
if failedPodsObserved > 0 {
|
|
||||||
return fmt.Errorf("deleted %d failed pods of DaemonSet %s/%s", failedPodsObserved, ds.Namespace, ds.Name)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user