mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-25 20:53:33 +00:00
Check the health of PLEG when updating the node status
This commit is contained in:
parent
03106dd1cb
commit
ec0e99c2ed
@ -75,6 +75,11 @@ const (
|
||||
plegContainerExited plegContainerState = "exited"
|
||||
plegContainerUnknown plegContainerState = "unknown"
|
||||
plegContainerNonExistent plegContainerState = "non-existent"
|
||||
|
||||
// The threshold needs to be greater than the relisting period + the
|
||||
// relisting time, which can vary significantly. Set a conservative
|
||||
// threshold to avoid flipping between healthy and unhealthy.
|
||||
relistThreshold = 3 * time.Minute
|
||||
)
|
||||
|
||||
func convertState(state kubecontainer.ContainerState) plegContainerState {
|
||||
@ -126,13 +131,9 @@ func (g *GenericPLEG) Start() {
|
||||
|
||||
func (g *GenericPLEG) Healthy() (bool, error) {
|
||||
relistTime := g.getRelistTime()
|
||||
// TODO: Evaluate if we can reduce this threshold.
|
||||
// The threshold needs to be greater than the relisting period + the
|
||||
// relisting time, which can vary significantly. Set a conservative
|
||||
// threshold so that we don't cause kubelet to be restarted unnecessarily.
|
||||
threshold := 2 * time.Minute
|
||||
if g.clock.Since(relistTime) > threshold {
|
||||
return false, fmt.Errorf("pleg was last seen active at %v", relistTime)
|
||||
elapsed := g.clock.Since(relistTime)
|
||||
if elapsed > relistThreshold {
|
||||
return false, fmt.Errorf("pleg was last seen active %v ago; threshold is %v", elapsed, relistThreshold)
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
@ -30,6 +30,22 @@ type runtimeState struct {
|
||||
internalError error
|
||||
cidr string
|
||||
initError error
|
||||
healthChecks []*healthCheck
|
||||
}
|
||||
|
||||
// A health check function should be efficient and not rely on external
|
||||
// components (e.g., container runtime).
|
||||
type healthCheckFnType func() (bool, error)
|
||||
|
||||
type healthCheck struct {
|
||||
name string
|
||||
fn healthCheckFnType
|
||||
}
|
||||
|
||||
func (s *runtimeState) addHealthCheck(name string, f healthCheckFnType) {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
s.healthChecks = append(s.healthChecks, &healthCheck{name: name, fn: f})
|
||||
}
|
||||
|
||||
func (s *runtimeState) setRuntimeSync(t time.Time) {
|
||||
@ -81,6 +97,12 @@ func (s *runtimeState) runtimeErrors() []string {
|
||||
if s.internalError != nil {
|
||||
ret = append(ret, s.internalError.Error())
|
||||
}
|
||||
for _, hc := range s.healthChecks {
|
||||
if ok, err := hc.fn(); !ok {
|
||||
ret = append(ret, fmt.Sprintf("%s is not healthy: %v", hc.name, err))
|
||||
}
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user