Merge pull request #91179 from SataQiu/startup-probe-20200517

kubeadm: add startup probes for static Pods to protect slow starting containers
This commit is contained in:
Kubernetes Prow Robot 2020-06-02 18:10:31 -07:00 committed by GitHub
commit 40076c856e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 23 additions and 1 deletions

View File

@ -58,6 +58,7 @@ func GetStaticPodSpecs(cfg *kubeadmapi.ClusterConfiguration, endpoint *kubeadmap
VolumeMounts: staticpodutil.VolumeMountMapToSlice(mounts.GetVolumeMounts(kubeadmconstants.KubeAPIServer)),
LivenessProbe: staticpodutil.LivenessProbe(staticpodutil.GetAPIServerProbeAddress(endpoint), "/livez", int(endpoint.BindPort), v1.URISchemeHTTPS),
ReadinessProbe: staticpodutil.ReadinessProbe(staticpodutil.GetAPIServerProbeAddress(endpoint), "/readyz", int(endpoint.BindPort), v1.URISchemeHTTPS),
StartupProbe: staticpodutil.StartupProbe(staticpodutil.GetAPIServerProbeAddress(endpoint), "/livez", int(endpoint.BindPort), v1.URISchemeHTTPS, cfg.APIServer.TimeoutForControlPlane),
Resources: staticpodutil.ComponentResources("250m"),
Env: kubeadmutil.GetProxyEnvVars(),
}, mounts.GetVolumes(kubeadmconstants.KubeAPIServer),
@ -69,6 +70,7 @@ func GetStaticPodSpecs(cfg *kubeadmapi.ClusterConfiguration, endpoint *kubeadmap
Command: getControllerManagerCommand(cfg),
VolumeMounts: staticpodutil.VolumeMountMapToSlice(mounts.GetVolumeMounts(kubeadmconstants.KubeControllerManager)),
LivenessProbe: staticpodutil.LivenessProbe(staticpodutil.GetControllerManagerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeControllerManagerPort, v1.URISchemeHTTPS),
StartupProbe: staticpodutil.StartupProbe(staticpodutil.GetControllerManagerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeControllerManagerPort, v1.URISchemeHTTPS, cfg.APIServer.TimeoutForControlPlane),
Resources: staticpodutil.ComponentResources("200m"),
Env: kubeadmutil.GetProxyEnvVars(),
}, mounts.GetVolumes(kubeadmconstants.KubeControllerManager), nil),
@ -79,6 +81,7 @@ func GetStaticPodSpecs(cfg *kubeadmapi.ClusterConfiguration, endpoint *kubeadmap
Command: getSchedulerCommand(cfg),
VolumeMounts: staticpodutil.VolumeMountMapToSlice(mounts.GetVolumeMounts(kubeadmconstants.KubeScheduler)),
LivenessProbe: staticpodutil.LivenessProbe(staticpodutil.GetSchedulerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeSchedulerPort, v1.URISchemeHTTPS),
StartupProbe: staticpodutil.StartupProbe(staticpodutil.GetSchedulerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeSchedulerPort, v1.URISchemeHTTPS, cfg.APIServer.TimeoutForControlPlane),
Resources: staticpodutil.ComponentResources("100m"),
Env: kubeadmutil.GetProxyEnvVars(),
}, mounts.GetVolumes(kubeadmconstants.KubeScheduler), nil),

View File

@ -209,6 +209,7 @@ func GetEtcdPodSpec(cfg *kubeadmapi.ClusterConfiguration, endpoint *kubeadmapi.A
staticpodutil.NewVolumeMount(certsVolumeName, cfg.CertificatesDir+"/etcd", false),
},
LivenessProbe: staticpodutil.LivenessProbe(probeHostname, "/health", probePort, probeScheme),
StartupProbe: staticpodutil.StartupProbe(probeHostname, "/health", probePort, probeScheme, cfg.APIServer.TimeoutForControlPlane),
},
etcdMounts,
// etcd will listen on the advertise address of the API server, in a different port (2379)

View File

@ -20,6 +20,7 @@ import (
"bytes"
"fmt"
"io/ioutil"
"math"
"net/url"
"os"
"sort"
@ -32,6 +33,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
kubeadmapi "k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm"
"k8s.io/kubernetes/cmd/kubeadm/app/constants"
kubeadmconstants "k8s.io/kubernetes/cmd/kubeadm/app/constants"
kubeadmutil "k8s.io/kubernetes/cmd/kubeadm/app/util"
"k8s.io/kubernetes/cmd/kubeadm/app/util/kustomize"
@ -220,14 +222,30 @@ func ReadStaticPodFromDisk(manifestPath string) (*v1.Pod, error) {
// LivenessProbe creates a Probe object with a HTTPGet handler
func LivenessProbe(host, path string, port int, scheme v1.URIScheme) *v1.Probe {
return createHTTPProbe(host, path, port, scheme, 15, 15, 8, 10)
// sets initialDelaySeconds same as periodSeconds to skip one period before running a check
return createHTTPProbe(host, path, port, scheme, 10, 15, 8, 10)
}
// ReadinessProbe creates a Probe object with a HTTPGet handler
func ReadinessProbe(host, path string, port int, scheme v1.URIScheme) *v1.Probe {
// sets initialDelaySeconds as '0' because we don't want to delay user infrastructure checks
// looking for "ready" status on kubeadm static Pods
return createHTTPProbe(host, path, port, scheme, 0, 15, 3, 1)
}
// StartupProbe creates a Probe object with a HTTPGet handler
func StartupProbe(host, path string, port int, scheme v1.URIScheme, timeoutForControlPlane *metav1.Duration) *v1.Probe {
periodSeconds, timeoutForControlPlaneSeconds := int32(10), constants.DefaultControlPlaneTimeout.Seconds()
if timeoutForControlPlane != nil {
timeoutForControlPlaneSeconds = timeoutForControlPlane.Seconds()
}
// sets failureThreshold big enough to guarantee the full timeout can cover the worst case scenario for the control-plane to come alive
// we ignore initialDelaySeconds in the calculation here for simplicity
failureThreshold := int32(math.Ceil(timeoutForControlPlaneSeconds / float64(periodSeconds)))
// sets initialDelaySeconds same as periodSeconds to skip one period before running a check
return createHTTPProbe(host, path, port, scheme, periodSeconds, 15, failureThreshold, periodSeconds)
}
func createHTTPProbe(host, path string, port int, scheme v1.URIScheme, initialDelaySeconds, timeoutSeconds, failureThreshold, periodSeconds int32) *v1.Probe {
return &v1.Probe{
Handler: v1.Handler{