kubelet: monitor the health of pleg

PLEG is reponsible for listing the pods running on the node. If it's hung
due to non-responsive container runtime or internal bugs, we should restart
kubelet.
This commit is contained in:
Yu-Ju Hong
2016-02-29 16:11:48 -08:00
parent 719158d2c8
commit 94368df91a
5 changed files with 64 additions and 7 deletions

View File

@@ -166,6 +166,7 @@ type HostInterface interface {
DockerImagesFsInfo() (cadvisorapiv2.FsInfo, error)
RootFsInfo() (cadvisorapiv2.FsInfo, error)
ListVolumesForPod(podUID types.UID) (map[string]volume.Volume, bool)
PLEGHealthCheck() (bool, error)
}
// NewServer initializes and configures a kubelet.Server object to handle HTTP requests.
@@ -223,6 +224,7 @@ func (s *Server) InstallDefaultHandlers() {
healthz.InstallHandler(s.restfulCont,
healthz.PingHealthz,
healthz.NamedCheck("syncloop", s.syncLoopHealthCheck),
healthz.NamedCheck("pleg", s.plegHealthCheck),
)
var ws *restful.WebService
ws = new(restful.WebService)
@@ -385,6 +387,14 @@ func (s *Server) syncLoopHealthCheck(req *http.Request) error {
return nil
}
// Checks if pleg, which lists pods periodically, is healthy.
func (s *Server) plegHealthCheck(req *http.Request) error {
if ok, err := s.host.PLEGHealthCheck(); !ok {
return fmt.Errorf("PLEG took longer than expected: %v", err)
}
return nil
}
// getContainerLogs handles containerLogs request against the Kubelet
func (s *Server) getContainerLogs(request *restful.Request, response *restful.Response) {
podNamespace := request.PathParameter("podNamespace")