mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-30 15:05:27 +00:00
Merge pull request #29581 from Random-Liu/panic-if-cadvisor-not-started
Automatic merge from submit-queue Kubelet: Fail kubelet if cadvisor is not started. Fixes https://github.com/kubernetes/kubernetes/issues/28997. We started cadvisor in `sync.Do()`, which only run once no matter cadvisor successfully starts or not. Once it fails, kubelet will be stuck in a bad state. Kubelet could never start sync loop because there is an internal error, but kubelet would never retry starting cadvisor again. This PR just fails kubelet when cadvisor start fails, and then relies on the babysitter to restart kubelet. In the future, we may want to add backoff logic in the babysitter to protect the system. On the other hand, https://github.com/kubernetes/kubernetes/pull/29492 will fix cadvisor side to prevent cadvisor failing because of these kind of transient error. Mark P1 to match the original issue. @dchen1107 @vishh
This commit is contained in:
commit
ed3a29bd6a
@ -914,7 +914,9 @@ func (kl *Kubelet) initializeModules() error {
|
||||
// initializeRuntimeDependentModules will initialize internal modules that require the container runtime to be up.
|
||||
func (kl *Kubelet) initializeRuntimeDependentModules() {
|
||||
if err := kl.cadvisor.Start(); err != nil {
|
||||
kl.runtimeState.setInternalError(fmt.Errorf("failed to start cAdvisor %v", err))
|
||||
// Fail kubelet and rely on the babysitter to retry starting kubelet.
|
||||
// TODO(random-liu): Add backoff logic in the babysitter
|
||||
glog.Fatalf("Failed to start cAdvisor %v", err)
|
||||
}
|
||||
// eviction manager must start after cadvisor because it needs to know if the container runtime has a dedicated imagefs
|
||||
if err := kl.evictionManager.Start(kl, kl.getActivePods, evictionMonitoringPeriod); err != nil {
|
||||
|
Loading…
Reference in New Issue
Block a user