Add timeouts to HealthChecks and retry checks

Fixes issue #3532. Added timeouts for HTTP and TCP checks
and enabled kubelet/probe to kubelet#maxRetries times
before declaring Failure.

Added Probe.TimeoutSecs to API

Probe variants now check container.LivenessProbe.TimeoutSeconds
Also added a test for timeouts in http_test.go.
This commit is contained in:
George Kuan
2015-01-28 20:35:49 -08:00
parent e335e2d3e2
commit e8c33b7916
12 changed files with 60 additions and 21 deletions

View File

@@ -55,6 +55,7 @@ const defaultChanSize = 1024
const minShares = 2
const sharesPerCPU = 1024
const milliCPUToCPU = 1000
const maxRetries int = 3
// SyncHandler is an interface implemented by Kubelet, for testability
type SyncHandler interface {
@@ -1417,7 +1418,7 @@ func (kl *Kubelet) GetPodStatus(podFullName string, uid types.UID) (api.PodStatu
return podStatus, err
}
func (kl *Kubelet) probeLiveness(podFullName string, podUID types.UID, status api.PodStatus, container api.Container, dockerContainer *docker.APIContainers) (probe.Status, error) {
func (kl *Kubelet) probeLiveness(podFullName string, podUID types.UID, status api.PodStatus, container api.Container, dockerContainer *docker.APIContainers) (healthStatus probe.Status, err error) {
// Give the container 60 seconds to start up.
if container.LivenessProbe == nil {
return probe.Success, nil
@@ -1425,7 +1426,13 @@ func (kl *Kubelet) probeLiveness(podFullName string, podUID types.UID, status ap
if time.Now().Unix()-dockerContainer.Created < container.LivenessProbe.InitialDelaySeconds {
return probe.Success, nil
}
return kl.probeContainer(container.LivenessProbe, podFullName, podUID, status, container)
for i := 0; i < maxRetries; i++ {
healthStatus, err = kl.probeContainer(container.LivenessProbe, podFullName, podUID, status, container)
if healthStatus == probe.Success {
return
}
}
return healthStatus, err
}
// Returns logs of current machine.

View File

@@ -19,6 +19,7 @@ package kubelet
import (
"fmt"
"strconv"
"time"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
"github.com/GoogleCloudPlatform/kubernetes/pkg/probe"
@@ -39,6 +40,14 @@ var (
)
func (kl *Kubelet) probeContainer(p *api.Probe, podFullName string, podUID types.UID, status api.PodStatus, container api.Container) (probe.Status, error) {
var timeout time.Duration
secs := container.LivenessProbe.TimeoutSeconds
if secs > 0 {
timeout = time.Duration(secs) * time.Second
} else {
timeout = 1 * time.Second
}
if p.Exec != nil {
return execprober.Probe(kl.newExecInContainer(podFullName, podUID, container))
}
@@ -47,14 +56,15 @@ func (kl *Kubelet) probeContainer(p *api.Probe, podFullName string, podUID types
if err != nil {
return probe.Unknown, err
}
return httprober.Probe(extractGetParams(p.HTTPGet, status, port))
host, port, path := extractGetParams(p.HTTPGet, status, port)
return httprober.Probe(host, port, path, timeout)
}
if p.TCPSocket != nil {
port, err := extractPort(p.TCPSocket.Port, container)
if err != nil {
return probe.Unknown, err
}
return tcprober.Probe(status.PodIP, port)
return tcprober.Probe(status.PodIP, port, timeout)
}
glog.Warningf("Failed to find probe builder for %s %+v", container.Name, container.LivenessProbe)
return probe.Unknown, nil