Merge pull request #3593 from commonlisp/master

Add timeouts to HealthChecks and retry checks
This commit is contained in:
Brendan Burns
2015-02-05 15:40:22 -08:00
12 changed files with 60 additions and 21 deletions

View File

@@ -55,6 +55,7 @@ const defaultChanSize = 1024
const minShares = 2
const sharesPerCPU = 1024
const milliCPUToCPU = 1000
const maxRetries int = 3
// SyncHandler is an interface implemented by Kubelet, for testability
type SyncHandler interface {
@@ -1445,7 +1446,7 @@ func (kl *Kubelet) GetPodStatus(podFullName string, uid types.UID) (api.PodStatu
return podStatus, err
}
func (kl *Kubelet) probeLiveness(podFullName string, podUID types.UID, status api.PodStatus, container api.Container, dockerContainer *docker.APIContainers) (probe.Status, error) {
func (kl *Kubelet) probeLiveness(podFullName string, podUID types.UID, status api.PodStatus, container api.Container, dockerContainer *docker.APIContainers) (healthStatus probe.Status, err error) {
// Give the container 60 seconds to start up.
if container.LivenessProbe == nil {
return probe.Success, nil
@@ -1453,7 +1454,13 @@ func (kl *Kubelet) probeLiveness(podFullName string, podUID types.UID, status ap
if time.Now().Unix()-dockerContainer.Created < container.LivenessProbe.InitialDelaySeconds {
return probe.Success, nil
}
return kl.probeContainer(container.LivenessProbe, podFullName, podUID, status, container)
for i := 0; i < maxRetries; i++ {
healthStatus, err = kl.probeContainer(container.LivenessProbe, podFullName, podUID, status, container)
if healthStatus == probe.Success {
return
}
}
return healthStatus, err
}
// Returns logs of current machine.

View File

@@ -19,6 +19,7 @@ package kubelet
import (
"fmt"
"strconv"
"time"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
"github.com/GoogleCloudPlatform/kubernetes/pkg/probe"
@@ -39,6 +40,14 @@ var (
)
func (kl *Kubelet) probeContainer(p *api.Probe, podFullName string, podUID types.UID, status api.PodStatus, container api.Container) (probe.Status, error) {
var timeout time.Duration
secs := container.LivenessProbe.TimeoutSeconds
if secs > 0 {
timeout = time.Duration(secs) * time.Second
} else {
timeout = 1 * time.Second
}
if p.Exec != nil {
return execprober.Probe(kl.newExecInContainer(podFullName, podUID, container))
}
@@ -47,14 +56,15 @@ func (kl *Kubelet) probeContainer(p *api.Probe, podFullName string, podUID types
if err != nil {
return probe.Unknown, err
}
return httprober.Probe(extractGetParams(p.HTTPGet, status, port))
host, port, path := extractGetParams(p.HTTPGet, status, port)
return httprober.Probe(host, port, path, timeout)
}
if p.TCPSocket != nil {
port, err := extractPort(p.TCPSocket.Port, container)
if err != nil {
return probe.Unknown, err
}
return tcprober.Probe(status.PodIP, port)
return tcprober.Probe(status.PodIP, port, timeout)
}
glog.Warningf("Failed to find probe builder for %s %+v", container.Name, container.LivenessProbe)
return probe.Unknown, nil