mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-24 20:24:09 +00:00
Throw an error on failed daemon pods to prevent hotloop
This commit is contained in:
parent
e46d445045
commit
634b695573
@ -461,6 +461,7 @@ func (dsc *DaemonSetsController) manage(ds *extensions.DaemonSet) error {
|
||||
return fmt.Errorf("couldn't get list of nodes when syncing daemon set %#v: %v", ds, err)
|
||||
}
|
||||
var nodesNeedingDaemonPods, podsToDelete []string
|
||||
var failedPodsObserved int
|
||||
for _, node := range nodeList.Items {
|
||||
_, shouldSchedule, shouldContinueRunning, err := dsc.nodeShouldRunDaemonPod(&node, ds)
|
||||
if err != nil {
|
||||
@ -476,13 +477,13 @@ func (dsc *DaemonSetsController) manage(ds *extensions.DaemonSet) error {
|
||||
case shouldContinueRunning:
|
||||
// If a daemon pod failed, delete it
|
||||
// If there's no daemon pods left on this node, we will create it in the next sync loop
|
||||
// TODO: handle the case when the daemon pods fail consistently and causes kill-recreate hot loop
|
||||
var daemonPodsRunning []*v1.Pod
|
||||
for i := range daemonPods {
|
||||
pod := daemonPods[i]
|
||||
if pod.Status.Phase == v1.PodFailed {
|
||||
glog.V(2).Infof("Found failed daemon pod %s/%s on node %s, will try to kill it", pod.Namespace, node.Name, pod.Name)
|
||||
podsToDelete = append(podsToDelete, pod.Name)
|
||||
failedPodsObserved++
|
||||
} else {
|
||||
daemonPodsRunning = append(daemonPodsRunning, pod)
|
||||
}
|
||||
@ -562,6 +563,10 @@ func (dsc *DaemonSetsController) manage(ds *extensions.DaemonSet) error {
|
||||
for err := range errCh {
|
||||
errors = append(errors, err)
|
||||
}
|
||||
if failedPodsObserved > 0 {
|
||||
// Throw an error when the daemon pods fail to prevent kill-recreate hot loop
|
||||
errors = append(errors, fmt.Errorf("Deleted %d failed pods", failedPodsObserved))
|
||||
}
|
||||
return utilerrors.NewAggregate(errors)
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user