mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-04 01:40:07 +00:00
MarkPodsNotReady retry fix
This commit is contained in:
parent
8577711b61
commit
9759a982c5
@ -236,6 +236,8 @@ type Controller struct {
|
|||||||
// workers that are responsible for tainting nodes.
|
// workers that are responsible for tainting nodes.
|
||||||
zoneNoExecuteTainter map[string]*scheduler.RateLimitedTimedQueue
|
zoneNoExecuteTainter map[string]*scheduler.RateLimitedTimedQueue
|
||||||
|
|
||||||
|
nodesToRetry sync.Map
|
||||||
|
|
||||||
zoneStates map[string]ZoneState
|
zoneStates map[string]ZoneState
|
||||||
|
|
||||||
daemonSetStore appsv1listers.DaemonSetLister
|
daemonSetStore appsv1listers.DaemonSetLister
|
||||||
@ -351,6 +353,7 @@ func NewNodeLifecycleController(
|
|||||||
nodeMonitorGracePeriod: nodeMonitorGracePeriod,
|
nodeMonitorGracePeriod: nodeMonitorGracePeriod,
|
||||||
zonePodEvictor: make(map[string]*scheduler.RateLimitedTimedQueue),
|
zonePodEvictor: make(map[string]*scheduler.RateLimitedTimedQueue),
|
||||||
zoneNoExecuteTainter: make(map[string]*scheduler.RateLimitedTimedQueue),
|
zoneNoExecuteTainter: make(map[string]*scheduler.RateLimitedTimedQueue),
|
||||||
|
nodesToRetry: sync.Map{},
|
||||||
zoneStates: make(map[string]ZoneState),
|
zoneStates: make(map[string]ZoneState),
|
||||||
podEvictionTimeout: podEvictionTimeout,
|
podEvictionTimeout: podEvictionTimeout,
|
||||||
evictionLimiterQPS: evictionLimiterQPS,
|
evictionLimiterQPS: evictionLimiterQPS,
|
||||||
@ -467,6 +470,10 @@ func NewNodeLifecycleController(
|
|||||||
nc.nodeUpdateQueue.Add(newNode.Name)
|
nc.nodeUpdateQueue.Add(newNode.Name)
|
||||||
return nil
|
return nil
|
||||||
}),
|
}),
|
||||||
|
DeleteFunc: nodeutil.CreateDeleteNodeHandler(func(node *v1.Node) error {
|
||||||
|
nc.nodesToRetry.Delete(node.Name)
|
||||||
|
return nil
|
||||||
|
}),
|
||||||
})
|
})
|
||||||
|
|
||||||
if nc.taintNodeByCondition {
|
if nc.taintNodeByCondition {
|
||||||
@ -787,25 +794,38 @@ func (nc *Controller) monitorNodeHealth() error {
|
|||||||
nc.processNoTaintBaseEviction(node, &observedReadyCondition, gracePeriod)
|
nc.processNoTaintBaseEviction(node, &observedReadyCondition, gracePeriod)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Report node event.
|
_, needsRetry := nc.nodesToRetry.Load(node.Name)
|
||||||
if currentReadyCondition.Status != v1.ConditionTrue && observedReadyCondition.Status == v1.ConditionTrue {
|
switch {
|
||||||
|
case currentReadyCondition.Status != v1.ConditionTrue && observedReadyCondition.Status == v1.ConditionTrue:
|
||||||
|
// Report node event only once when status changed.
|
||||||
nodeutil.RecordNodeStatusChange(nc.recorder, node, "NodeNotReady")
|
nodeutil.RecordNodeStatusChange(nc.recorder, node, "NodeNotReady")
|
||||||
pods, err := listPodsFromNode(nc.kubeClient, node.Name)
|
fallthrough
|
||||||
if err != nil {
|
case needsRetry && observedReadyCondition.Status != v1.ConditionTrue:
|
||||||
utilruntime.HandleError(fmt.Errorf("Unable to list pods from node %v: %v", node.Name, err))
|
if err := nc.markPodsNotReady(node.Name); err != nil {
|
||||||
|
utilruntime.HandleError(err)
|
||||||
|
nc.nodesToRetry.Store(node.Name, struct{}{})
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if err = nodeutil.MarkPodsNotReady(nc.kubeClient, pods, node.Name); err != nil {
|
|
||||||
utilruntime.HandleError(fmt.Errorf("Unable to mark all pods NotReady on node %v: %v", node.Name, err))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
nc.nodesToRetry.Delete(node.Name)
|
||||||
}
|
}
|
||||||
nc.handleDisruption(zoneToNodeConditions, nodes)
|
nc.handleDisruption(zoneToNodeConditions, nodes)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (nc *Controller) markPodsNotReady(nodeName string) error {
|
||||||
|
pods, err := listPodsFromNode(nc.kubeClient, nodeName)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("unable to list pods from node %v: %v", nodeName, err)
|
||||||
|
}
|
||||||
|
if err = nodeutil.MarkPodsNotReady(nc.kubeClient, pods, nodeName); err != nil {
|
||||||
|
return fmt.Errorf("unable to mark all pods NotReady on node %v: %v", nodeName, err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (nc *Controller) processTaintBaseEviction(node *v1.Node, observedReadyCondition *v1.NodeCondition) {
|
func (nc *Controller) processTaintBaseEviction(node *v1.Node, observedReadyCondition *v1.NodeCondition) {
|
||||||
decisionTimestamp := nc.now()
|
decisionTimestamp := nc.now()
|
||||||
// Check eviction timeout against decisionTimestamp
|
// Check eviction timeout against decisionTimestamp
|
||||||
|
Loading…
Reference in New Issue
Block a user