mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-28 14:07:14 +00:00
Throw an error on failed daemon pods to prevent hotloop
This commit is contained in:
parent
e46d445045
commit
634b695573
@ -461,6 +461,7 @@ func (dsc *DaemonSetsController) manage(ds *extensions.DaemonSet) error {
|
|||||||
return fmt.Errorf("couldn't get list of nodes when syncing daemon set %#v: %v", ds, err)
|
return fmt.Errorf("couldn't get list of nodes when syncing daemon set %#v: %v", ds, err)
|
||||||
}
|
}
|
||||||
var nodesNeedingDaemonPods, podsToDelete []string
|
var nodesNeedingDaemonPods, podsToDelete []string
|
||||||
|
var failedPodsObserved int
|
||||||
for _, node := range nodeList.Items {
|
for _, node := range nodeList.Items {
|
||||||
_, shouldSchedule, shouldContinueRunning, err := dsc.nodeShouldRunDaemonPod(&node, ds)
|
_, shouldSchedule, shouldContinueRunning, err := dsc.nodeShouldRunDaemonPod(&node, ds)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -476,13 +477,13 @@ func (dsc *DaemonSetsController) manage(ds *extensions.DaemonSet) error {
|
|||||||
case shouldContinueRunning:
|
case shouldContinueRunning:
|
||||||
// If a daemon pod failed, delete it
|
// If a daemon pod failed, delete it
|
||||||
// If there's no daemon pods left on this node, we will create it in the next sync loop
|
// If there's no daemon pods left on this node, we will create it in the next sync loop
|
||||||
// TODO: handle the case when the daemon pods fail consistently and causes kill-recreate hot loop
|
|
||||||
var daemonPodsRunning []*v1.Pod
|
var daemonPodsRunning []*v1.Pod
|
||||||
for i := range daemonPods {
|
for i := range daemonPods {
|
||||||
pod := daemonPods[i]
|
pod := daemonPods[i]
|
||||||
if pod.Status.Phase == v1.PodFailed {
|
if pod.Status.Phase == v1.PodFailed {
|
||||||
glog.V(2).Infof("Found failed daemon pod %s/%s on node %s, will try to kill it", pod.Namespace, node.Name, pod.Name)
|
glog.V(2).Infof("Found failed daemon pod %s/%s on node %s, will try to kill it", pod.Namespace, node.Name, pod.Name)
|
||||||
podsToDelete = append(podsToDelete, pod.Name)
|
podsToDelete = append(podsToDelete, pod.Name)
|
||||||
|
failedPodsObserved++
|
||||||
} else {
|
} else {
|
||||||
daemonPodsRunning = append(daemonPodsRunning, pod)
|
daemonPodsRunning = append(daemonPodsRunning, pod)
|
||||||
}
|
}
|
||||||
@ -562,6 +563,10 @@ func (dsc *DaemonSetsController) manage(ds *extensions.DaemonSet) error {
|
|||||||
for err := range errCh {
|
for err := range errCh {
|
||||||
errors = append(errors, err)
|
errors = append(errors, err)
|
||||||
}
|
}
|
||||||
|
if failedPodsObserved > 0 {
|
||||||
|
// Throw an error when the daemon pods fail to prevent kill-recreate hot loop
|
||||||
|
errors = append(errors, fmt.Errorf("Deleted %d failed pods", failedPodsObserved))
|
||||||
|
}
|
||||||
return utilerrors.NewAggregate(errors)
|
return utilerrors.NewAggregate(errors)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user