From e04a2b3b26f721a8133a49e463e5f41762f2588c Mon Sep 17 00:00:00 2001 From: SataQiu <1527062125@qq.com> Date: Thu, 21 May 2020 11:12:36 +0800 Subject: [PATCH] kubeadm: add startup probes for static Pods to protect slow starting containers Signed-off-by: SataQiu <1527062125@qq.com> --- .../app/phases/controlplane/manifests.go | 3 +++ cmd/kubeadm/app/phases/etcd/local.go | 1 + cmd/kubeadm/app/util/staticpod/utils.go | 20 ++++++++++++++++++- 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/cmd/kubeadm/app/phases/controlplane/manifests.go b/cmd/kubeadm/app/phases/controlplane/manifests.go index 3124e6b25ad..55dd08ae450 100644 --- a/cmd/kubeadm/app/phases/controlplane/manifests.go +++ b/cmd/kubeadm/app/phases/controlplane/manifests.go @@ -58,6 +58,7 @@ func GetStaticPodSpecs(cfg *kubeadmapi.ClusterConfiguration, endpoint *kubeadmap VolumeMounts: staticpodutil.VolumeMountMapToSlice(mounts.GetVolumeMounts(kubeadmconstants.KubeAPIServer)), LivenessProbe: staticpodutil.LivenessProbe(staticpodutil.GetAPIServerProbeAddress(endpoint), "/livez", int(endpoint.BindPort), v1.URISchemeHTTPS), ReadinessProbe: staticpodutil.ReadinessProbe(staticpodutil.GetAPIServerProbeAddress(endpoint), "/readyz", int(endpoint.BindPort), v1.URISchemeHTTPS), + StartupProbe: staticpodutil.StartupProbe(staticpodutil.GetAPIServerProbeAddress(endpoint), "/livez", int(endpoint.BindPort), v1.URISchemeHTTPS, cfg.APIServer.TimeoutForControlPlane), Resources: staticpodutil.ComponentResources("250m"), Env: kubeadmutil.GetProxyEnvVars(), }, mounts.GetVolumes(kubeadmconstants.KubeAPIServer), @@ -69,6 +70,7 @@ func GetStaticPodSpecs(cfg *kubeadmapi.ClusterConfiguration, endpoint *kubeadmap Command: getControllerManagerCommand(cfg), VolumeMounts: staticpodutil.VolumeMountMapToSlice(mounts.GetVolumeMounts(kubeadmconstants.KubeControllerManager)), LivenessProbe: staticpodutil.LivenessProbe(staticpodutil.GetControllerManagerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeControllerManagerPort, v1.URISchemeHTTPS), + StartupProbe: staticpodutil.StartupProbe(staticpodutil.GetControllerManagerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeControllerManagerPort, v1.URISchemeHTTPS, cfg.APIServer.TimeoutForControlPlane), Resources: staticpodutil.ComponentResources("200m"), Env: kubeadmutil.GetProxyEnvVars(), }, mounts.GetVolumes(kubeadmconstants.KubeControllerManager), nil), @@ -79,6 +81,7 @@ func GetStaticPodSpecs(cfg *kubeadmapi.ClusterConfiguration, endpoint *kubeadmap Command: getSchedulerCommand(cfg), VolumeMounts: staticpodutil.VolumeMountMapToSlice(mounts.GetVolumeMounts(kubeadmconstants.KubeScheduler)), LivenessProbe: staticpodutil.LivenessProbe(staticpodutil.GetSchedulerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeSchedulerPort, v1.URISchemeHTTPS), + StartupProbe: staticpodutil.StartupProbe(staticpodutil.GetSchedulerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeSchedulerPort, v1.URISchemeHTTPS, cfg.APIServer.TimeoutForControlPlane), Resources: staticpodutil.ComponentResources("100m"), Env: kubeadmutil.GetProxyEnvVars(), }, mounts.GetVolumes(kubeadmconstants.KubeScheduler), nil), diff --git a/cmd/kubeadm/app/phases/etcd/local.go b/cmd/kubeadm/app/phases/etcd/local.go index fef23f0bf0e..05edc6798ff 100644 --- a/cmd/kubeadm/app/phases/etcd/local.go +++ b/cmd/kubeadm/app/phases/etcd/local.go @@ -193,6 +193,7 @@ func GetEtcdPodSpec(cfg *kubeadmapi.ClusterConfiguration, endpoint *kubeadmapi.A staticpodutil.NewVolumeMount(certsVolumeName, cfg.CertificatesDir+"/etcd", false), }, LivenessProbe: staticpodutil.LivenessProbe(probeHostname, "/health", probePort, probeScheme), + StartupProbe: staticpodutil.StartupProbe(probeHostname, "/health", probePort, probeScheme, cfg.APIServer.TimeoutForControlPlane), }, etcdMounts, // etcd will listen on the advertise address of the API server, in a different port (2379) diff --git a/cmd/kubeadm/app/util/staticpod/utils.go b/cmd/kubeadm/app/util/staticpod/utils.go index bfe76bf29cc..aa8d0f77a1b 100644 --- a/cmd/kubeadm/app/util/staticpod/utils.go +++ b/cmd/kubeadm/app/util/staticpod/utils.go @@ -20,6 +20,7 @@ import ( "bytes" "fmt" "io/ioutil" + "math" "net/url" "os" "sort" @@ -32,6 +33,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" kubeadmapi "k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm" + "k8s.io/kubernetes/cmd/kubeadm/app/constants" kubeadmconstants "k8s.io/kubernetes/cmd/kubeadm/app/constants" kubeadmutil "k8s.io/kubernetes/cmd/kubeadm/app/util" "k8s.io/kubernetes/cmd/kubeadm/app/util/kustomize" @@ -220,14 +222,30 @@ func ReadStaticPodFromDisk(manifestPath string) (*v1.Pod, error) { // LivenessProbe creates a Probe object with a HTTPGet handler func LivenessProbe(host, path string, port int, scheme v1.URIScheme) *v1.Probe { - return createHTTPProbe(host, path, port, scheme, 15, 15, 8, 10) + // sets initialDelaySeconds same as periodSeconds to skip one period before running a check + return createHTTPProbe(host, path, port, scheme, 10, 15, 8, 10) } // ReadinessProbe creates a Probe object with a HTTPGet handler func ReadinessProbe(host, path string, port int, scheme v1.URIScheme) *v1.Probe { + // sets initialDelaySeconds as '0' because we don't want to delay user infrastructure checks + // looking for "ready" status on kubeadm static Pods return createHTTPProbe(host, path, port, scheme, 0, 15, 3, 1) } +// StartupProbe creates a Probe object with a HTTPGet handler +func StartupProbe(host, path string, port int, scheme v1.URIScheme, timeoutForControlPlane *metav1.Duration) *v1.Probe { + periodSeconds, timeoutForControlPlaneSeconds := int32(10), constants.DefaultControlPlaneTimeout.Seconds() + if timeoutForControlPlane != nil { + timeoutForControlPlaneSeconds = timeoutForControlPlane.Seconds() + } + // sets failureThreshold big enough to guarantee the full timeout can cover the worst case scenario for the control-plane to come alive + // we ignore initialDelaySeconds in the calculation here for simplicity + failureThreshold := int32(math.Ceil(timeoutForControlPlaneSeconds / float64(periodSeconds))) + // sets initialDelaySeconds same as periodSeconds to skip one period before running a check + return createHTTPProbe(host, path, port, scheme, periodSeconds, 15, failureThreshold, periodSeconds) +} + func createHTTPProbe(host, path string, port int, scheme v1.URIScheme, initialDelaySeconds, timeoutSeconds, failureThreshold, periodSeconds int32) *v1.Probe { return &v1.Probe{ Handler: v1.Handler{