mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-26 05:03:09 +00:00
Rename node status to node health in NodeLifecycleController
Since we are going to treat both node status and node lease as node heartbeat/health signals, this PR makes the renmae changes, so that the follow-up PRs are easier to review.
This commit is contained in:
parent
e567c791aa
commit
88e7e186f0
@ -128,14 +128,14 @@ const (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
// The amount of time the nodecontroller should sleep between retrying NodeStatus updates
|
// The amount of time the nodecontroller should sleep between retrying node health updates
|
||||||
retrySleepTime = 20 * time.Millisecond
|
retrySleepTime = 20 * time.Millisecond
|
||||||
)
|
)
|
||||||
|
|
||||||
type nodeStatusData struct {
|
type nodeHealthData struct {
|
||||||
probeTimestamp metav1.Time
|
probeTimestamp metav1.Time
|
||||||
readyTransitionTimestamp metav1.Time
|
readyTransitionTimestamp metav1.Time
|
||||||
status v1.NodeStatus
|
status *v1.NodeStatus
|
||||||
}
|
}
|
||||||
|
|
||||||
// Controller is the controller that manages node's life cycle.
|
// Controller is the controller that manages node's life cycle.
|
||||||
@ -155,8 +155,8 @@ type Controller struct {
|
|||||||
computeZoneStateFunc func(nodeConditions []*v1.NodeCondition) (int, ZoneState)
|
computeZoneStateFunc func(nodeConditions []*v1.NodeCondition) (int, ZoneState)
|
||||||
|
|
||||||
knownNodeSet map[string]*v1.Node
|
knownNodeSet map[string]*v1.Node
|
||||||
// per Node map storing last observed Status together with a local time when it was observed.
|
// per Node map storing last observed health together with a local time when it was observed.
|
||||||
nodeStatusMap map[string]nodeStatusData
|
nodeHealthMap map[string]*nodeHealthData
|
||||||
|
|
||||||
// Lock to access evictor workers
|
// Lock to access evictor workers
|
||||||
evictorLock sync.Mutex
|
evictorLock sync.Mutex
|
||||||
@ -180,28 +180,31 @@ type Controller struct {
|
|||||||
recorder record.EventRecorder
|
recorder record.EventRecorder
|
||||||
|
|
||||||
// Value controlling Controller monitoring period, i.e. how often does Controller
|
// Value controlling Controller monitoring period, i.e. how often does Controller
|
||||||
// check node status posted from kubelet. This value should be lower than nodeMonitorGracePeriod.
|
// check node health signal posted from kubelet. This value should be lower than
|
||||||
// TODO: Change node status monitor to watch based.
|
// nodeMonitorGracePeriod.
|
||||||
|
// TODO: Change node health monitor to watch based.
|
||||||
nodeMonitorPeriod time.Duration
|
nodeMonitorPeriod time.Duration
|
||||||
|
|
||||||
// Value used if sync_nodes_status=False, only for node startup. When node
|
// When node is just created, e.g. cluster bootstrap or node creation, we give
|
||||||
// is just created, e.g. cluster bootstrap or node creation, we give a longer grace period.
|
// a longer grace period.
|
||||||
nodeStartupGracePeriod time.Duration
|
nodeStartupGracePeriod time.Duration
|
||||||
|
|
||||||
// Value used if sync_nodes_status=False. Controller will not proactively
|
// Controller will not proactively sync node health, but will monitor node
|
||||||
// sync node status in this case, but will monitor node status updated from kubelet. If
|
// health signal updated from kubelet. If it doesn't receive update for this
|
||||||
// it doesn't receive update for this amount of time, it will start posting "NodeReady==
|
// amount of time, it will start posting "NodeReady==ConditionUnknown". The
|
||||||
// ConditionUnknown". The amount of time before which Controller start evicting pods
|
// amount of time before which Controller start evicting pods is controlled
|
||||||
// is controlled via flag 'pod-eviction-timeout'.
|
// via flag 'pod-eviction-timeout'.
|
||||||
// Note: be cautious when changing the constant, it must work with nodeStatusUpdateFrequency
|
// Note: be cautious when changing the constant, it must work with
|
||||||
// in kubelet. There are several constraints:
|
// nodeStatusUpdateFrequency in kubelet. There are several constraints:
|
||||||
// 1. nodeMonitorGracePeriod must be N times more than nodeStatusUpdateFrequency, where
|
// 1. nodeMonitorGracePeriod must be N times more than
|
||||||
// N means number of retries allowed for kubelet to post node status. It is pointless
|
// nodeStatusUpdateFrequency, where N means number of retries allowed for
|
||||||
// to make nodeMonitorGracePeriod be less than nodeStatusUpdateFrequency, since there
|
// kubelet to post node health signal. It is pointless to make
|
||||||
// will only be fresh values from Kubelet at an interval of nodeStatusUpdateFrequency.
|
// nodeMonitorGracePeriod be less than nodeStatusUpdateFrequency, since
|
||||||
// The constant must be less than podEvictionTimeout.
|
// there will only be fresh values from Kubelet at an interval of
|
||||||
// 2. nodeMonitorGracePeriod can't be too large for user experience - larger value takes
|
// nodeStatusUpdateFrequency. The constant must be less than
|
||||||
// longer for user to see up-to-date node status.
|
// podEvictionTimeout.
|
||||||
|
// 2. nodeMonitorGracePeriod can't be too large for user experience - larger
|
||||||
|
// value takes longer for user to see up-to-date node health.
|
||||||
nodeMonitorGracePeriod time.Duration
|
nodeMonitorGracePeriod time.Duration
|
||||||
|
|
||||||
podEvictionTimeout time.Duration
|
podEvictionTimeout time.Duration
|
||||||
@ -259,7 +262,7 @@ func NewNodeLifecycleController(podInformer coreinformers.PodInformer,
|
|||||||
kubeClient: kubeClient,
|
kubeClient: kubeClient,
|
||||||
now: metav1.Now,
|
now: metav1.Now,
|
||||||
knownNodeSet: make(map[string]*v1.Node),
|
knownNodeSet: make(map[string]*v1.Node),
|
||||||
nodeStatusMap: make(map[string]nodeStatusData),
|
nodeHealthMap: make(map[string]*nodeHealthData),
|
||||||
nodeExistsInCloudProvider: func(nodeName types.NodeName) (bool, error) {
|
nodeExistsInCloudProvider: func(nodeName types.NodeName) (bool, error) {
|
||||||
return nodeutil.ExistsInCloudProvider(cloud, nodeName)
|
return nodeutil.ExistsInCloudProvider(cloud, nodeName)
|
||||||
},
|
},
|
||||||
@ -419,10 +422,10 @@ func (nc *Controller) Run(stopCh <-chan struct{}) {
|
|||||||
go wait.Until(nc.doEvictionPass, scheduler.NodeEvictionPeriod, stopCh)
|
go wait.Until(nc.doEvictionPass, scheduler.NodeEvictionPeriod, stopCh)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Incorporate the results of node status pushed from kubelet to master.
|
// Incorporate the results of node health signal pushed from kubelet to master.
|
||||||
go wait.Until(func() {
|
go wait.Until(func() {
|
||||||
if err := nc.monitorNodeStatus(); err != nil {
|
if err := nc.monitorNodeHealth(); err != nil {
|
||||||
glog.Errorf("Error monitoring node status: %v", err)
|
glog.Errorf("Error monitoring node health: %v", err)
|
||||||
}
|
}
|
||||||
}, nc.nodeMonitorPeriod, stopCh)
|
}, nc.nodeMonitorPeriod, stopCh)
|
||||||
|
|
||||||
@ -608,10 +611,10 @@ func (nc *Controller) doEvictionPass() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// monitorNodeStatus verifies node status are constantly updated by kubelet, and if not,
|
// monitorNodeHealth verifies node health are constantly updated by kubelet, and
|
||||||
// post "NodeReady==ConditionUnknown". It also evicts all pods if node is not ready or
|
// if not, post "NodeReady==ConditionUnknown". It also evicts all pods if node
|
||||||
// not reachable for a long period of time.
|
// is not ready or not reachable for a long period of time.
|
||||||
func (nc *Controller) monitorNodeStatus() error {
|
func (nc *Controller) monitorNodeHealth() error {
|
||||||
// We are listing nodes from local cache as we can tolerate some small delays
|
// We are listing nodes from local cache as we can tolerate some small delays
|
||||||
// comparing to state from etcd and there is eventual consistency anyway.
|
// comparing to state from etcd and there is eventual consistency anyway.
|
||||||
nodes, err := nc.nodeLister.List(labels.Everything())
|
nodes, err := nc.nodeLister.List(labels.Everything())
|
||||||
@ -648,20 +651,20 @@ func (nc *Controller) monitorNodeStatus() error {
|
|||||||
var observedReadyCondition v1.NodeCondition
|
var observedReadyCondition v1.NodeCondition
|
||||||
var currentReadyCondition *v1.NodeCondition
|
var currentReadyCondition *v1.NodeCondition
|
||||||
node := nodes[i].DeepCopy()
|
node := nodes[i].DeepCopy()
|
||||||
if err := wait.PollImmediate(retrySleepTime, retrySleepTime*scheduler.NodeStatusUpdateRetry, func() (bool, error) {
|
if err := wait.PollImmediate(retrySleepTime, retrySleepTime*scheduler.NodeHealthUpdateRetry, func() (bool, error) {
|
||||||
gracePeriod, observedReadyCondition, currentReadyCondition, err = nc.tryUpdateNodeStatus(node)
|
gracePeriod, observedReadyCondition, currentReadyCondition, err = nc.tryUpdateNodeHealth(node)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
return true, nil
|
return true, nil
|
||||||
}
|
}
|
||||||
name := node.Name
|
name := node.Name
|
||||||
node, err = nc.kubeClient.CoreV1().Nodes().Get(name, metav1.GetOptions{})
|
node, err = nc.kubeClient.CoreV1().Nodes().Get(name, metav1.GetOptions{})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorf("Failed while getting a Node to retry updating NodeStatus. Probably Node %s was deleted.", name)
|
glog.Errorf("Failed while getting a Node to retry updating node health. Probably Node %s was deleted.", name)
|
||||||
return false, err
|
return false, err
|
||||||
}
|
}
|
||||||
return false, nil
|
return false, nil
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
glog.Errorf("Update status of Node '%v' from Controller error: %v. "+
|
glog.Errorf("Update health of Node '%v' from Controller error: %v. "+
|
||||||
"Skipping - no pods will be evicted.", node.Name, err)
|
"Skipping - no pods will be evicted.", node.Name, err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@ -689,12 +692,12 @@ func (nc *Controller) monitorNodeStatus() error {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if decisionTimestamp.After(nc.nodeStatusMap[node.Name].readyTransitionTimestamp.Add(nc.podEvictionTimeout)) {
|
if decisionTimestamp.After(nc.nodeHealthMap[node.Name].readyTransitionTimestamp.Add(nc.podEvictionTimeout)) {
|
||||||
if nc.evictPods(node) {
|
if nc.evictPods(node) {
|
||||||
glog.V(2).Infof("Node is NotReady. Adding Pods on Node %s to eviction queue: %v is later than %v + %v",
|
glog.V(2).Infof("Node is NotReady. Adding Pods on Node %s to eviction queue: %v is later than %v + %v",
|
||||||
node.Name,
|
node.Name,
|
||||||
decisionTimestamp,
|
decisionTimestamp,
|
||||||
nc.nodeStatusMap[node.Name].readyTransitionTimestamp,
|
nc.nodeHealthMap[node.Name].readyTransitionTimestamp,
|
||||||
nc.podEvictionTimeout,
|
nc.podEvictionTimeout,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@ -716,12 +719,12 @@ func (nc *Controller) monitorNodeStatus() error {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if decisionTimestamp.After(nc.nodeStatusMap[node.Name].probeTimestamp.Add(nc.podEvictionTimeout)) {
|
if decisionTimestamp.After(nc.nodeHealthMap[node.Name].probeTimestamp.Add(nc.podEvictionTimeout)) {
|
||||||
if nc.evictPods(node) {
|
if nc.evictPods(node) {
|
||||||
glog.V(2).Infof("Node is unresponsive. Adding Pods on Node %s to eviction queues: %v is later than %v + %v",
|
glog.V(2).Infof("Node is unresponsive. Adding Pods on Node %s to eviction queues: %v is later than %v + %v",
|
||||||
node.Name,
|
node.Name,
|
||||||
decisionTimestamp,
|
decisionTimestamp,
|
||||||
nc.nodeStatusMap[node.Name].readyTransitionTimestamp,
|
nc.nodeHealthMap[node.Name].readyTransitionTimestamp,
|
||||||
nc.podEvictionTimeout-gracePeriod,
|
nc.podEvictionTimeout-gracePeriod,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@ -799,9 +802,9 @@ func (nc *Controller) monitorNodeStatus() error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// tryUpdateNodeStatus checks a given node's conditions and tries to update it. Returns grace period to
|
// tryUpdateNodeHealth checks a given node's conditions and tries to update it. Returns grace period to
|
||||||
// which given node is entitled, state of current and last observed Ready Condition, and an error if it occurred.
|
// which given node is entitled, state of current and last observed Ready Condition, and an error if it occurred.
|
||||||
func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.NodeCondition, *v1.NodeCondition, error) {
|
func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.NodeCondition, *v1.NodeCondition, error) {
|
||||||
var err error
|
var err error
|
||||||
var gracePeriod time.Duration
|
var gracePeriod time.Duration
|
||||||
var observedReadyCondition v1.NodeCondition
|
var observedReadyCondition v1.NodeCondition
|
||||||
@ -817,8 +820,8 @@ func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.Node
|
|||||||
LastTransitionTime: node.CreationTimestamp,
|
LastTransitionTime: node.CreationTimestamp,
|
||||||
}
|
}
|
||||||
gracePeriod = nc.nodeStartupGracePeriod
|
gracePeriod = nc.nodeStartupGracePeriod
|
||||||
nc.nodeStatusMap[node.Name] = nodeStatusData{
|
nc.nodeHealthMap[node.Name] = &nodeHealthData{
|
||||||
status: node.Status,
|
status: &node.Status,
|
||||||
probeTimestamp: node.CreationTimestamp,
|
probeTimestamp: node.CreationTimestamp,
|
||||||
readyTransitionTimestamp: node.CreationTimestamp,
|
readyTransitionTimestamp: node.CreationTimestamp,
|
||||||
}
|
}
|
||||||
@ -828,7 +831,7 @@ func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.Node
|
|||||||
gracePeriod = nc.nodeMonitorGracePeriod
|
gracePeriod = nc.nodeMonitorGracePeriod
|
||||||
}
|
}
|
||||||
|
|
||||||
savedNodeStatus, found := nc.nodeStatusMap[node.Name]
|
savedNodeHealth, found := nc.nodeHealthMap[node.Name]
|
||||||
// There are following cases to check:
|
// There are following cases to check:
|
||||||
// - both saved and new status have no Ready Condition set - we leave everything as it is,
|
// - both saved and new status have no Ready Condition set - we leave everything as it is,
|
||||||
// - saved status have no Ready Condition, but current one does - Controller was restarted with Node data already present in etcd,
|
// - saved status have no Ready Condition, but current one does - Controller was restarted with Node data already present in etcd,
|
||||||
@ -845,28 +848,28 @@ func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.Node
|
|||||||
// if that's the case, but it does not seem necessary.
|
// if that's the case, but it does not seem necessary.
|
||||||
var savedCondition *v1.NodeCondition
|
var savedCondition *v1.NodeCondition
|
||||||
if found {
|
if found {
|
||||||
_, savedCondition = v1node.GetNodeCondition(&savedNodeStatus.status, v1.NodeReady)
|
_, savedCondition = v1node.GetNodeCondition(savedNodeHealth.status, v1.NodeReady)
|
||||||
}
|
}
|
||||||
_, observedCondition := v1node.GetNodeCondition(&node.Status, v1.NodeReady)
|
_, observedCondition := v1node.GetNodeCondition(&node.Status, v1.NodeReady)
|
||||||
if !found {
|
if !found {
|
||||||
glog.Warningf("Missing timestamp for Node %s. Assuming now as a timestamp.", node.Name)
|
glog.Warningf("Missing timestamp for Node %s. Assuming now as a timestamp.", node.Name)
|
||||||
savedNodeStatus = nodeStatusData{
|
savedNodeHealth = &nodeHealthData{
|
||||||
status: node.Status,
|
status: &node.Status,
|
||||||
probeTimestamp: nc.now(),
|
probeTimestamp: nc.now(),
|
||||||
readyTransitionTimestamp: nc.now(),
|
readyTransitionTimestamp: nc.now(),
|
||||||
}
|
}
|
||||||
} else if savedCondition == nil && observedCondition != nil {
|
} else if savedCondition == nil && observedCondition != nil {
|
||||||
glog.V(1).Infof("Creating timestamp entry for newly observed Node %s", node.Name)
|
glog.V(1).Infof("Creating timestamp entry for newly observed Node %s", node.Name)
|
||||||
savedNodeStatus = nodeStatusData{
|
savedNodeHealth = &nodeHealthData{
|
||||||
status: node.Status,
|
status: &node.Status,
|
||||||
probeTimestamp: nc.now(),
|
probeTimestamp: nc.now(),
|
||||||
readyTransitionTimestamp: nc.now(),
|
readyTransitionTimestamp: nc.now(),
|
||||||
}
|
}
|
||||||
} else if savedCondition != nil && observedCondition == nil {
|
} else if savedCondition != nil && observedCondition == nil {
|
||||||
glog.Errorf("ReadyCondition was removed from Status of Node %s", node.Name)
|
glog.Errorf("ReadyCondition was removed from Status of Node %s", node.Name)
|
||||||
// TODO: figure out what to do in this case. For now we do the same thing as above.
|
// TODO: figure out what to do in this case. For now we do the same thing as above.
|
||||||
savedNodeStatus = nodeStatusData{
|
savedNodeHealth = &nodeHealthData{
|
||||||
status: node.Status,
|
status: &node.Status,
|
||||||
probeTimestamp: nc.now(),
|
probeTimestamp: nc.now(),
|
||||||
readyTransitionTimestamp: nc.now(),
|
readyTransitionTimestamp: nc.now(),
|
||||||
}
|
}
|
||||||
@ -878,22 +881,22 @@ func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.Node
|
|||||||
glog.V(3).Infof("ReadyCondition for Node %s transitioned from %v to %v", node.Name, savedCondition, observedCondition)
|
glog.V(3).Infof("ReadyCondition for Node %s transitioned from %v to %v", node.Name, savedCondition, observedCondition)
|
||||||
transitionTime = nc.now()
|
transitionTime = nc.now()
|
||||||
} else {
|
} else {
|
||||||
transitionTime = savedNodeStatus.readyTransitionTimestamp
|
transitionTime = savedNodeHealth.readyTransitionTimestamp
|
||||||
}
|
}
|
||||||
if glog.V(5) {
|
if glog.V(5) {
|
||||||
glog.V(5).Infof("Node %s ReadyCondition updated. Updating timestamp: %+v vs %+v.", node.Name, savedNodeStatus.status, node.Status)
|
glog.V(5).Infof("Node %s ReadyCondition updated. Updating timestamp: %+v vs %+v.", node.Name, savedNodeHealth.status, node.Status)
|
||||||
} else {
|
} else {
|
||||||
glog.V(3).Infof("Node %s ReadyCondition updated. Updating timestamp.", node.Name)
|
glog.V(3).Infof("Node %s ReadyCondition updated. Updating timestamp.", node.Name)
|
||||||
}
|
}
|
||||||
savedNodeStatus = nodeStatusData{
|
savedNodeHealth = &nodeHealthData{
|
||||||
status: node.Status,
|
status: &node.Status,
|
||||||
probeTimestamp: nc.now(),
|
probeTimestamp: nc.now(),
|
||||||
readyTransitionTimestamp: transitionTime,
|
readyTransitionTimestamp: transitionTime,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
nc.nodeStatusMap[node.Name] = savedNodeStatus
|
nc.nodeHealthMap[node.Name] = savedNodeHealth
|
||||||
|
|
||||||
if nc.now().After(savedNodeStatus.probeTimestamp.Add(gracePeriod)) {
|
if nc.now().After(savedNodeHealth.probeTimestamp.Add(gracePeriod)) {
|
||||||
// NodeReady condition was last set longer ago than gracePeriod, so update it to Unknown
|
// NodeReady condition was last set longer ago than gracePeriod, so update it to Unknown
|
||||||
// (regardless of its current value) in the master.
|
// (regardless of its current value) in the master.
|
||||||
if currentReadyCondition == nil {
|
if currentReadyCondition == nil {
|
||||||
@ -908,7 +911,7 @@ func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.Node
|
|||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
glog.V(4).Infof("node %v hasn't been updated for %+v. Last ready condition is: %+v",
|
glog.V(4).Infof("node %v hasn't been updated for %+v. Last ready condition is: %+v",
|
||||||
node.Name, nc.now().Time.Sub(savedNodeStatus.probeTimestamp.Time), observedReadyCondition)
|
node.Name, nc.now().Time.Sub(savedNodeHealth.probeTimestamp.Time), observedReadyCondition)
|
||||||
if observedReadyCondition.Status != v1.ConditionUnknown {
|
if observedReadyCondition.Status != v1.ConditionUnknown {
|
||||||
currentReadyCondition.Status = v1.ConditionUnknown
|
currentReadyCondition.Status = v1.ConditionUnknown
|
||||||
currentReadyCondition.Reason = "NodeStatusUnknown"
|
currentReadyCondition.Reason = "NodeStatusUnknown"
|
||||||
@ -944,7 +947,7 @@ func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.Node
|
|||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
glog.V(4).Infof("node %v hasn't been updated for %+v. Last %v is: %+v",
|
glog.V(4).Infof("node %v hasn't been updated for %+v. Last %v is: %+v",
|
||||||
node.Name, nc.now().Time.Sub(savedNodeStatus.probeTimestamp.Time), nodeConditionType, currentCondition)
|
node.Name, nc.now().Time.Sub(savedNodeHealth.probeTimestamp.Time), nodeConditionType, currentCondition)
|
||||||
if currentCondition.Status != v1.ConditionUnknown {
|
if currentCondition.Status != v1.ConditionUnknown {
|
||||||
currentCondition.Status = v1.ConditionUnknown
|
currentCondition.Status = v1.ConditionUnknown
|
||||||
currentCondition.Reason = "NodeStatusUnknown"
|
currentCondition.Reason = "NodeStatusUnknown"
|
||||||
@ -960,9 +963,9 @@ func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.Node
|
|||||||
glog.Errorf("Error updating node %s: %v", node.Name, err)
|
glog.Errorf("Error updating node %s: %v", node.Name, err)
|
||||||
return gracePeriod, observedReadyCondition, currentReadyCondition, err
|
return gracePeriod, observedReadyCondition, currentReadyCondition, err
|
||||||
}
|
}
|
||||||
nc.nodeStatusMap[node.Name] = nodeStatusData{
|
nc.nodeHealthMap[node.Name] = &nodeHealthData{
|
||||||
status: node.Status,
|
status: &node.Status,
|
||||||
probeTimestamp: nc.nodeStatusMap[node.Name].probeTimestamp,
|
probeTimestamp: nc.nodeHealthMap[node.Name].probeTimestamp,
|
||||||
readyTransitionTimestamp: nc.now(),
|
readyTransitionTimestamp: nc.now(),
|
||||||
}
|
}
|
||||||
return gracePeriod, observedReadyCondition, currentReadyCondition, nil
|
return gracePeriod, observedReadyCondition, currentReadyCondition, nil
|
||||||
@ -1044,10 +1047,10 @@ func (nc *Controller) handleDisruption(zoneToNodeConditions map[string][]*v1.Nod
|
|||||||
// When exiting disruption mode update probe timestamps on all Nodes.
|
// When exiting disruption mode update probe timestamps on all Nodes.
|
||||||
now := nc.now()
|
now := nc.now()
|
||||||
for i := range nodes {
|
for i := range nodes {
|
||||||
v := nc.nodeStatusMap[nodes[i].Name]
|
v := nc.nodeHealthMap[nodes[i].Name]
|
||||||
v.probeTimestamp = now
|
v.probeTimestamp = now
|
||||||
v.readyTransitionTimestamp = now
|
v.readyTransitionTimestamp = now
|
||||||
nc.nodeStatusMap[nodes[i].Name] = v
|
nc.nodeHealthMap[nodes[i].Name] = v
|
||||||
}
|
}
|
||||||
// We reset all rate limiters to settings appropriate for the given state.
|
// We reset all rate limiters to settings appropriate for the given state.
|
||||||
for k := range nc.zoneStates {
|
for k := range nc.zoneStates {
|
||||||
|
@ -146,7 +146,7 @@ func newNodeLifecycleControllerFromClient(
|
|||||||
return &nodeLifecycleController{nc, nodeInformer, daemonSetInformer}, nil
|
return &nodeLifecycleController{nc, nodeInformer, daemonSetInformer}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestMonitorNodeStatusEvictPods(t *testing.T) {
|
func TestMonitorNodeHealthEvictPods(t *testing.T) {
|
||||||
fakeNow := metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
|
fakeNow := metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
|
||||||
evictionTimeout := 10 * time.Minute
|
evictionTimeout := 10 * time.Minute
|
||||||
labels := map[string]string{
|
labels := map[string]string{
|
||||||
@ -628,7 +628,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
|
|||||||
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
|
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
if err := nodeController.monitorNodeStatus(); err != nil {
|
if err := nodeController.monitorNodeHealth(); err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
if item.timeToPass > 0 {
|
if item.timeToPass > 0 {
|
||||||
@ -643,7 +643,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
|
|||||||
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
|
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
if err := nodeController.monitorNodeStatus(); err != nil {
|
if err := nodeController.monitorNodeHealth(); err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
zones := testutil.GetZones(item.fakeNodeHandler)
|
zones := testutil.GetZones(item.fakeNodeHandler)
|
||||||
@ -787,7 +787,7 @@ func TestPodStatusChange(t *testing.T) {
|
|||||||
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
|
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
if err := nodeController.monitorNodeStatus(); err != nil {
|
if err := nodeController.monitorNodeHealth(); err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
if item.timeToPass > 0 {
|
if item.timeToPass > 0 {
|
||||||
@ -798,7 +798,7 @@ func TestPodStatusChange(t *testing.T) {
|
|||||||
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
|
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
if err := nodeController.monitorNodeStatus(); err != nil {
|
if err := nodeController.monitorNodeHealth(); err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
zones := testutil.GetZones(item.fakeNodeHandler)
|
zones := testutil.GetZones(item.fakeNodeHandler)
|
||||||
@ -827,7 +827,7 @@ func TestPodStatusChange(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestMonitorNodeStatusEvictPodsWithDisruption(t *testing.T) {
|
func TestMonitorNodeHealthEvictPodsWithDisruption(t *testing.T) {
|
||||||
fakeNow := metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
|
fakeNow := metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
|
||||||
evictionTimeout := 10 * time.Minute
|
evictionTimeout := 10 * time.Minute
|
||||||
timeToPass := 60 * time.Minute
|
timeToPass := 60 * time.Minute
|
||||||
@ -1318,7 +1318,7 @@ func TestMonitorNodeStatusEvictPodsWithDisruption(t *testing.T) {
|
|||||||
if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil {
|
if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
if err := nodeController.monitorNodeStatus(); err != nil {
|
if err := nodeController.monitorNodeHealth(); err != nil {
|
||||||
t.Errorf("%v: unexpected error: %v", item.description, err)
|
t.Errorf("%v: unexpected error: %v", item.description, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1336,7 +1336,7 @@ func TestMonitorNodeStatusEvictPodsWithDisruption(t *testing.T) {
|
|||||||
if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil {
|
if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
if err := nodeController.monitorNodeStatus(); err != nil {
|
if err := nodeController.monitorNodeHealth(); err != nil {
|
||||||
t.Errorf("%v: unexpected error: %v", item.description, err)
|
t.Errorf("%v: unexpected error: %v", item.description, err)
|
||||||
}
|
}
|
||||||
for zone, state := range item.expectedFollowingStates {
|
for zone, state := range item.expectedFollowingStates {
|
||||||
@ -1449,7 +1449,7 @@ func TestCloudProviderNodeShutdown(t *testing.T) {
|
|||||||
if err := nodeController.syncNodeStore(fnh); err != nil {
|
if err := nodeController.syncNodeStore(fnh); err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
if err := nodeController.monitorNodeStatus(); err != nil {
|
if err := nodeController.monitorNodeHealth(); err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1520,11 +1520,11 @@ func TestCloudProviderNoRateLimit(t *testing.T) {
|
|||||||
nodeController.nodeShutdownInCloudProvider = func(ctx context.Context, node *v1.Node) (bool, error) {
|
nodeController.nodeShutdownInCloudProvider = func(ctx context.Context, node *v1.Node) (bool, error) {
|
||||||
return false, nil
|
return false, nil
|
||||||
}
|
}
|
||||||
// monitorNodeStatus should allow this node to be immediately deleted
|
// monitorNodeHealth should allow this node to be immediately deleted
|
||||||
if err := nodeController.syncNodeStore(fnh); err != nil {
|
if err := nodeController.syncNodeStore(fnh); err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
if err := nodeController.monitorNodeStatus(); err != nil {
|
if err := nodeController.monitorNodeHealth(); err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
select {
|
select {
|
||||||
@ -1540,7 +1540,7 @@ func TestCloudProviderNoRateLimit(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestMonitorNodeStatusUpdateStatus(t *testing.T) {
|
func TestMonitorNodeHealthUpdateStatus(t *testing.T) {
|
||||||
fakeNow := metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
|
fakeNow := metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
|
||||||
table := []struct {
|
table := []struct {
|
||||||
fakeNodeHandler *testutil.FakeNodeHandler
|
fakeNodeHandler *testutil.FakeNodeHandler
|
||||||
@ -1804,7 +1804,7 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) {
|
|||||||
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
|
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
if err := nodeController.monitorNodeStatus(); err != nil {
|
if err := nodeController.monitorNodeHealth(); err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
if item.timeToPass > 0 {
|
if item.timeToPass > 0 {
|
||||||
@ -1813,7 +1813,7 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) {
|
|||||||
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
|
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
if err := nodeController.monitorNodeStatus(); err != nil {
|
if err := nodeController.monitorNodeHealth(); err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1829,7 +1829,7 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestMonitorNodeStatusMarkPodsNotReady(t *testing.T) {
|
func TestMonitorNodeHealthMarkPodsNotReady(t *testing.T) {
|
||||||
fakeNow := metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
|
fakeNow := metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
|
||||||
table := []struct {
|
table := []struct {
|
||||||
fakeNodeHandler *testutil.FakeNodeHandler
|
fakeNodeHandler *testutil.FakeNodeHandler
|
||||||
@ -1951,7 +1951,7 @@ func TestMonitorNodeStatusMarkPodsNotReady(t *testing.T) {
|
|||||||
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
|
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
if err := nodeController.monitorNodeStatus(); err != nil {
|
if err := nodeController.monitorNodeHealth(); err != nil {
|
||||||
t.Errorf("Case[%d] unexpected error: %v", i, err)
|
t.Errorf("Case[%d] unexpected error: %v", i, err)
|
||||||
}
|
}
|
||||||
if item.timeToPass > 0 {
|
if item.timeToPass > 0 {
|
||||||
@ -1960,7 +1960,7 @@ func TestMonitorNodeStatusMarkPodsNotReady(t *testing.T) {
|
|||||||
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
|
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
if err := nodeController.monitorNodeStatus(); err != nil {
|
if err := nodeController.monitorNodeHealth(); err != nil {
|
||||||
t.Errorf("Case[%d] unexpected error: %v", i, err)
|
t.Errorf("Case[%d] unexpected error: %v", i, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2071,7 +2071,7 @@ func TestSwapUnreachableNotReadyTaints(t *testing.T) {
|
|||||||
if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil {
|
if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
if err := nodeController.monitorNodeStatus(); err != nil {
|
if err := nodeController.monitorNodeHealth(); err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
nodeController.doNoExecuteTaintingPass()
|
nodeController.doNoExecuteTaintingPass()
|
||||||
@ -2109,7 +2109,7 @@ func TestSwapUnreachableNotReadyTaints(t *testing.T) {
|
|||||||
if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil {
|
if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
if err := nodeController.monitorNodeStatus(); err != nil {
|
if err := nodeController.monitorNodeHealth(); err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
nodeController.doNoExecuteTaintingPass()
|
nodeController.doNoExecuteTaintingPass()
|
||||||
@ -2421,7 +2421,7 @@ func TestNodeEventGeneration(t *testing.T) {
|
|||||||
if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil {
|
if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
if err := nodeController.monitorNodeStatus(); err != nil {
|
if err := nodeController.monitorNodeHealth(); err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
if len(fakeRecorder.Events) != 2 {
|
if len(fakeRecorder.Events) != 2 {
|
||||||
|
@ -28,9 +28,9 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
// NodeStatusUpdateRetry controls the number of retries of writing
|
// NodeHealthUpdateRetry controls the number of retries of writing
|
||||||
// NodeStatus update.
|
// node health update.
|
||||||
NodeStatusUpdateRetry = 5
|
NodeHealthUpdateRetry = 5
|
||||||
// NodeEvictionPeriod controls how often NodeController will try to
|
// NodeEvictionPeriod controls how often NodeController will try to
|
||||||
// evict Pods from non-responsive Nodes.
|
// evict Pods from non-responsive Nodes.
|
||||||
NodeEvictionPeriod = 100 * time.Millisecond
|
NodeEvictionPeriod = 100 * time.Millisecond
|
||||||
|
Loading…
Reference in New Issue
Block a user