mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-09-16 14:45:28 +00:00
Add timeouts to HealthChecks and retry checks
Fixes issue #3532. Added timeouts for HTTP and TCP checks and enabled kubelet/probe to kubelet#maxRetries times before declaring Failure. Added Probe.TimeoutSecs to API Probe variants now check container.LivenessProbe.TimeoutSeconds Also added a test for timeouts in http_test.go.
This commit is contained in:
@@ -55,6 +55,7 @@ const defaultChanSize = 1024
|
||||
const minShares = 2
|
||||
const sharesPerCPU = 1024
|
||||
const milliCPUToCPU = 1000
|
||||
const maxRetries int = 3
|
||||
|
||||
// SyncHandler is an interface implemented by Kubelet, for testability
|
||||
type SyncHandler interface {
|
||||
@@ -1417,7 +1418,7 @@ func (kl *Kubelet) GetPodStatus(podFullName string, uid types.UID) (api.PodStatu
|
||||
return podStatus, err
|
||||
}
|
||||
|
||||
func (kl *Kubelet) probeLiveness(podFullName string, podUID types.UID, status api.PodStatus, container api.Container, dockerContainer *docker.APIContainers) (probe.Status, error) {
|
||||
func (kl *Kubelet) probeLiveness(podFullName string, podUID types.UID, status api.PodStatus, container api.Container, dockerContainer *docker.APIContainers) (healthStatus probe.Status, err error) {
|
||||
// Give the container 60 seconds to start up.
|
||||
if container.LivenessProbe == nil {
|
||||
return probe.Success, nil
|
||||
@@ -1425,7 +1426,13 @@ func (kl *Kubelet) probeLiveness(podFullName string, podUID types.UID, status ap
|
||||
if time.Now().Unix()-dockerContainer.Created < container.LivenessProbe.InitialDelaySeconds {
|
||||
return probe.Success, nil
|
||||
}
|
||||
return kl.probeContainer(container.LivenessProbe, podFullName, podUID, status, container)
|
||||
for i := 0; i < maxRetries; i++ {
|
||||
healthStatus, err = kl.probeContainer(container.LivenessProbe, podFullName, podUID, status, container)
|
||||
if healthStatus == probe.Success {
|
||||
return
|
||||
}
|
||||
}
|
||||
return healthStatus, err
|
||||
}
|
||||
|
||||
// Returns logs of current machine.
|
||||
|
@@ -19,6 +19,7 @@ package kubelet
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/probe"
|
||||
@@ -39,6 +40,14 @@ var (
|
||||
)
|
||||
|
||||
func (kl *Kubelet) probeContainer(p *api.Probe, podFullName string, podUID types.UID, status api.PodStatus, container api.Container) (probe.Status, error) {
|
||||
var timeout time.Duration
|
||||
secs := container.LivenessProbe.TimeoutSeconds
|
||||
if secs > 0 {
|
||||
timeout = time.Duration(secs) * time.Second
|
||||
} else {
|
||||
timeout = 1 * time.Second
|
||||
}
|
||||
|
||||
if p.Exec != nil {
|
||||
return execprober.Probe(kl.newExecInContainer(podFullName, podUID, container))
|
||||
}
|
||||
@@ -47,14 +56,15 @@ func (kl *Kubelet) probeContainer(p *api.Probe, podFullName string, podUID types
|
||||
if err != nil {
|
||||
return probe.Unknown, err
|
||||
}
|
||||
return httprober.Probe(extractGetParams(p.HTTPGet, status, port))
|
||||
host, port, path := extractGetParams(p.HTTPGet, status, port)
|
||||
return httprober.Probe(host, port, path, timeout)
|
||||
}
|
||||
if p.TCPSocket != nil {
|
||||
port, err := extractPort(p.TCPSocket.Port, container)
|
||||
if err != nil {
|
||||
return probe.Unknown, err
|
||||
}
|
||||
return tcprober.Probe(status.PodIP, port)
|
||||
return tcprober.Probe(status.PodIP, port, timeout)
|
||||
}
|
||||
glog.Warningf("Failed to find probe builder for %s %+v", container.Name, container.LivenessProbe)
|
||||
return probe.Unknown, nil
|
||||
|
Reference in New Issue
Block a user