mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-29 06:27:05 +00:00
Add activeDeadlineSeconds to kubeadm upgrade-health-check job
With https://github.com/kubernetes/kubernetes/pull/122079, kubeadm now relies on `ttlSecondsAfterFinished` to clean up `upgrade-health-check` once its pod reaches a terminal state. However, there is a case where the pod won't reach a terminal state and the job will not register a terminal state, hence no garbage collection. For example, if the pause image is not present, `ErrImagePull` will make the pod keep retrying to pull the image and the pod will never reach a terminal state on its own. And the job will continue to wait for the pod to reach a terminal state which will not happen. So we need to set `activeDeadlineSeconds` to prevent the job from waiting forever for the pod to reach a terminal state. Without this, users invoking `kubeadm upgrade plan` need to cleanup the job outside of kubeadm even if they ignore the preflight result because the job still runs when the result is configured to be ignored via `--ignore-prelight-errors=CreateJob` flag. Since the timeout for the polling in the `CreateJob` step in kubeadm is 15 seconds, we should set the `activeDeadlineSeconds` to the same timeout.
This commit is contained in:
parent
1f7b707f26
commit
ebc460e8cc
@ -98,6 +98,7 @@ func createJob(client clientset.Interface, cfg *kubeadmapi.ClusterConfiguration)
|
|||||||
fieldSelector = "spec.unschedulable=false"
|
fieldSelector = "spec.unschedulable=false"
|
||||||
ns = metav1.NamespaceSystem
|
ns = metav1.NamespaceSystem
|
||||||
timeout = 15 * time.Second
|
timeout = 15 * time.Second
|
||||||
|
timeoutMargin = 5 * time.Second
|
||||||
)
|
)
|
||||||
var (
|
var (
|
||||||
err, lastError error
|
err, lastError error
|
||||||
@ -132,6 +133,9 @@ func createJob(client clientset.Interface, cfg *kubeadmapi.ClusterConfiguration)
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Adding a margin of error to the polling timeout.
|
||||||
|
timeoutWithMargin := timeout.Seconds() + timeoutMargin.Seconds()
|
||||||
|
|
||||||
// Prepare Job
|
// Prepare Job
|
||||||
job := &batchv1.Job{
|
job := &batchv1.Job{
|
||||||
ObjectMeta: metav1.ObjectMeta{
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
@ -140,7 +144,8 @@ func createJob(client clientset.Interface, cfg *kubeadmapi.ClusterConfiguration)
|
|||||||
},
|
},
|
||||||
Spec: batchv1.JobSpec{
|
Spec: batchv1.JobSpec{
|
||||||
BackoffLimit: ptr.To[int32](0),
|
BackoffLimit: ptr.To[int32](0),
|
||||||
TTLSecondsAfterFinished: ptr.To[int32](int32(timeout.Seconds()) + 5), // Make sure it's more than 'timeout'.
|
TTLSecondsAfterFinished: ptr.To[int32](int32(timeoutWithMargin)),
|
||||||
|
ActiveDeadlineSeconds: ptr.To[int64](int64(timeoutWithMargin)),
|
||||||
Template: v1.PodTemplateSpec{
|
Template: v1.PodTemplateSpec{
|
||||||
Spec: v1.PodSpec{
|
Spec: v1.PodSpec{
|
||||||
RestartPolicy: v1.RestartPolicyNever,
|
RestartPolicy: v1.RestartPolicyNever,
|
||||||
|
Loading…
Reference in New Issue
Block a user