From 67cf648ab7701890f6b41804407b5a43f8eaf62f Mon Sep 17 00:00:00 2001 From: Maciej Szulik Date: Mon, 8 Jan 2024 12:47:25 +0100 Subject: [PATCH 1/2] Add a new neverTerminate job behavior just for upgrade --- test/e2e/framework/job/fixtures.go | 6 ++++++ test/e2e/upgrades/apps/job.go | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/test/e2e/framework/job/fixtures.go b/test/e2e/framework/job/fixtures.go index 27c46289043..6514bb3e243 100644 --- a/test/e2e/framework/job/fixtures.go +++ b/test/e2e/framework/job/fixtures.go @@ -92,6 +92,12 @@ func NewTestJobOnNode(behavior, name string, rPol v1.RestartPolicy, parallelism, } } switch behavior { + case "neverTerminate": + // this job is being used in an upgrade job see test/e2e/upgrades/apps/job.go + // it should never be optimized, as it always has to restart during an upgrade + // and continue running + job.Spec.Template.Spec.Containers[0].Command = []string{"sleep", "1000000"} + job.Spec.Template.Spec.TerminationGracePeriodSeconds = ptr.To(int64(1)) case "notTerminate": job.Spec.Template.Spec.Containers[0].Image = imageutils.GetPauseImageName() case "fail": diff --git a/test/e2e/upgrades/apps/job.go b/test/e2e/upgrades/apps/job.go index eb59b4a8cdc..4c1b6e1b298 100644 --- a/test/e2e/upgrades/apps/job.go +++ b/test/e2e/upgrades/apps/job.go @@ -47,7 +47,7 @@ func (t *JobUpgradeTest) Setup(ctx context.Context, f *framework.Framework) { t.namespace = f.Namespace.Name ginkgo.By("Creating a job") - t.job = e2ejob.NewTestJob("notTerminate", "foo", v1.RestartPolicyOnFailure, 2, 2, nil, 6) + t.job = e2ejob.NewTestJob("neverTerminate", "foo", v1.RestartPolicyOnFailure, 2, 2, nil, 6) job, err := e2ejob.CreateJob(ctx, f.ClientSet, t.namespace, t.job) t.job = job framework.ExpectNoError(err) From f8abe71238f01d7e69c7eddcde1295d8751670ce Mon Sep 17 00:00:00 2001 From: Maciej Szulik Date: Mon, 8 Jan 2024 13:52:03 +0100 Subject: [PATCH 2/2] Add an post-upgrade condition to ensure the job is running --- test/e2e/upgrades/apps/job.go | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/test/e2e/upgrades/apps/job.go b/test/e2e/upgrades/apps/job.go index 4c1b6e1b298..b1311c82665 100644 --- a/test/e2e/upgrades/apps/job.go +++ b/test/e2e/upgrades/apps/job.go @@ -60,8 +60,11 @@ func (t *JobUpgradeTest) Setup(ctx context.Context, f *framework.Framework) { // Test verifies that the Jobs Pods are running after the an upgrade func (t *JobUpgradeTest) Test(ctx context.Context, f *framework.Framework, done <-chan struct{}, upgrade upgrades.UpgradeType) { <-done + ginkgo.By("Ensuring job is running") + err := ensureJobRunning(ctx, f.ClientSet, t.namespace, t.job.Name) + framework.ExpectNoError(err) ginkgo.By("Ensuring active pods == parallelism") - err := ensureAllJobPodsRunning(ctx, f.ClientSet, t.namespace, t.job.Name, 2) + err = ensureAllJobPodsRunning(ctx, f.ClientSet, t.namespace, t.job.Name, 2) framework.ExpectNoError(err) } @@ -70,7 +73,7 @@ func (t *JobUpgradeTest) Teardown(ctx context.Context, f *framework.Framework) { // rely on the namespace deletion to clean up everything } -// ensureAllJobPodsRunning uses c to check in the Job named jobName in ns +// ensureAllJobPodsRunning uses c to check if the Job named jobName in ns // is running, returning an error if the expected parallelism is not // satisfied. func ensureAllJobPodsRunning(ctx context.Context, c clientset.Interface, ns, jobName string, parallelism int32) error { @@ -93,3 +96,19 @@ func ensureAllJobPodsRunning(ctx context.Context, c clientset.Interface, ns, job } return nil } + +// ensureJobRunning uses c to check if the Job named jobName in ns is running, +// (not completed, nor failed, nor suspended) returning an error if it can't +// read the job or when it's not runnig +func ensureJobRunning(ctx context.Context, c clientset.Interface, ns, jobName string) error { + job, err := e2ejob.GetJob(ctx, c, ns, jobName) + if err != nil { + return err + } + for _, c := range job.Status.Conditions { + if (c.Type == batchv1.JobComplete || c.Type == batchv1.JobFailed || c.Type == batchv1.JobSuspended) && c.Status == v1.ConditionTrue { + return fmt.Errorf("job is not running %#v", job) + } + } + return nil +}