mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-05 10:19:50 +00:00
Merge pull request #122079 from carlory/kubeadm-upgrade-health
refactor healthCheck's CreateJob with TTLSecondsAfterFinished
This commit is contained in:
commit
1a1941eebb
@ -93,7 +93,7 @@ func CheckClusterHealth(client clientset.Interface, cfg *kubeadmapi.ClusterConfi
|
|||||||
// createJob is a check that verifies that a Job can be created in the cluster
|
// createJob is a check that verifies that a Job can be created in the cluster
|
||||||
func createJob(client clientset.Interface, cfg *kubeadmapi.ClusterConfiguration) (lastError error) {
|
func createJob(client clientset.Interface, cfg *kubeadmapi.ClusterConfiguration) (lastError error) {
|
||||||
const (
|
const (
|
||||||
jobName = "upgrade-health-check"
|
prefix = "upgrade-health-check"
|
||||||
ns = metav1.NamespaceSystem
|
ns = metav1.NamespaceSystem
|
||||||
timeout = 15 * time.Second
|
timeout = 15 * time.Second
|
||||||
)
|
)
|
||||||
@ -101,18 +101,19 @@ func createJob(client clientset.Interface, cfg *kubeadmapi.ClusterConfiguration)
|
|||||||
// If client.Discovery().RESTClient() is nil, the fake client is used.
|
// If client.Discovery().RESTClient() is nil, the fake client is used.
|
||||||
// Return early because the kubeadm dryrun dynamic client only handles the core/v1 GroupVersion.
|
// Return early because the kubeadm dryrun dynamic client only handles the core/v1 GroupVersion.
|
||||||
if client.Discovery().RESTClient() == nil {
|
if client.Discovery().RESTClient() == nil {
|
||||||
fmt.Printf("[upgrade/health] Would create the Job %q in namespace %q and wait until it completes\n", jobName, ns)
|
fmt.Printf("[upgrade/health] Would create the Job with the prefix %q in namespace %q and wait until it completes\n", prefix, ns)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Prepare Job
|
// Prepare Job
|
||||||
job := &batchv1.Job{
|
job := &batchv1.Job{
|
||||||
ObjectMeta: metav1.ObjectMeta{
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
Name: jobName,
|
GenerateName: prefix + "-",
|
||||||
Namespace: ns,
|
Namespace: ns,
|
||||||
},
|
},
|
||||||
Spec: batchv1.JobSpec{
|
Spec: batchv1.JobSpec{
|
||||||
BackoffLimit: ptr.To[int32](0),
|
BackoffLimit: ptr.To[int32](0),
|
||||||
|
TTLSecondsAfterFinished: ptr.To[int32](2),
|
||||||
Template: v1.PodTemplateSpec{
|
Template: v1.PodTemplateSpec{
|
||||||
Spec: v1.PodSpec{
|
Spec: v1.PodSpec{
|
||||||
RestartPolicy: v1.RestartPolicyNever,
|
RestartPolicy: v1.RestartPolicyNever,
|
||||||
@ -129,7 +130,7 @@ func createJob(client clientset.Interface, cfg *kubeadmapi.ClusterConfiguration)
|
|||||||
},
|
},
|
||||||
Containers: []v1.Container{
|
Containers: []v1.Container{
|
||||||
{
|
{
|
||||||
Name: jobName,
|
Name: prefix,
|
||||||
Image: images.GetPauseImage(cfg),
|
Image: images.GetPauseImage(cfg),
|
||||||
Args: []string{"-v"},
|
Args: []string{"-v"},
|
||||||
},
|
},
|
||||||
@ -139,38 +140,29 @@ func createJob(client clientset.Interface, cfg *kubeadmapi.ClusterConfiguration)
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if the Job already exists and delete it
|
ctx := context.Background()
|
||||||
if _, err := client.BatchV1().Jobs(ns).Get(context.TODO(), jobName, metav1.GetOptions{}); err == nil {
|
|
||||||
if err = deleteHealthCheckJob(client, ns, jobName); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Cleanup the Job on exit
|
// Create the Job, but retry if it fails
|
||||||
defer func() {
|
klog.V(2).Infof("Creating a Job with the prefix %q in the namespace %q", prefix, ns)
|
||||||
lastError = deleteHealthCheckJob(client, ns, jobName)
|
var jobName string
|
||||||
}()
|
err := wait.PollUntilContextTimeout(ctx, time.Second*1, timeout, true, func(ctx context.Context) (bool, error) {
|
||||||
|
createdJob, err := client.BatchV1().Jobs(ns).Create(ctx, job, metav1.CreateOptions{})
|
||||||
// Create the Job, but retry in case it is being currently deleted
|
if err != nil {
|
||||||
klog.V(2).Infof("Creating Job %q in the namespace %q", jobName, ns)
|
klog.V(2).Infof("Could not create a Job with the prefix %q in the namespace %q, retrying: %v", prefix, ns, err)
|
||||||
err := wait.PollImmediate(time.Second*1, timeout, func() (bool, error) {
|
|
||||||
if _, err := client.BatchV1().Jobs(ns).Create(context.TODO(), job, metav1.CreateOptions{}); err != nil {
|
|
||||||
klog.V(2).Infof("Could not create Job %q in the namespace %q, retrying: %v", jobName, ns, err)
|
|
||||||
lastError = err
|
lastError = err
|
||||||
return false, nil
|
return false, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
jobName = createdJob.Name
|
||||||
return true, nil
|
return true, nil
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return errors.Wrapf(lastError, "could not create Job %q in the namespace %q", jobName, ns)
|
return errors.Wrapf(lastError, "could not create a Job with the prefix %q in the namespace %q", prefix, ns)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Waiting and manually deleting the Job is a workaround to not enabling the TTL controller.
|
|
||||||
// TODO: refactor this if the TTL controller is enabled in kubeadm once it goes Beta.
|
|
||||||
|
|
||||||
// Wait for the Job to complete
|
// Wait for the Job to complete
|
||||||
err = wait.PollImmediate(time.Second*1, timeout, func() (bool, error) {
|
err = wait.PollUntilContextTimeout(ctx, time.Second*1, timeout, true, func(ctx context.Context) (bool, error) {
|
||||||
job, err := client.BatchV1().Jobs(ns).Get(context.TODO(), jobName, metav1.GetOptions{})
|
job, err := client.BatchV1().Jobs(ns).Get(ctx, jobName, metav1.GetOptions{})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
lastError = err
|
lastError = err
|
||||||
klog.V(2).Infof("could not get Job %q in the namespace %q, retrying: %v", jobName, ns, err)
|
klog.V(2).Infof("could not get Job %q in the namespace %q, retrying: %v", jobName, ns, err)
|
||||||
@ -194,15 +186,6 @@ func createJob(client clientset.Interface, cfg *kubeadmapi.ClusterConfiguration)
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func deleteHealthCheckJob(client clientset.Interface, ns, jobName string) error {
|
|
||||||
klog.V(2).Infof("Deleting Job %q in the namespace %q", jobName, ns)
|
|
||||||
propagation := metav1.DeletePropagationForeground
|
|
||||||
if err := client.BatchV1().Jobs(ns).Delete(context.TODO(), jobName, metav1.DeleteOptions{PropagationPolicy: &propagation}); err != nil {
|
|
||||||
return errors.Wrapf(err, "could not delete Job %q in the namespace %q", jobName, ns)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// controlPlaneNodesReady checks whether all control-plane Nodes in the cluster are in the Running state
|
// controlPlaneNodesReady checks whether all control-plane Nodes in the cluster are in the Running state
|
||||||
func controlPlaneNodesReady(client clientset.Interface, _ *kubeadmapi.ClusterConfiguration) error {
|
func controlPlaneNodesReady(client clientset.Interface, _ *kubeadmapi.ClusterConfiguration) error {
|
||||||
selectorControlPlane := labels.SelectorFromSet(map[string]string{
|
selectorControlPlane := labels.SelectorFromSet(map[string]string{
|
||||||
|
Loading…
Reference in New Issue
Block a user