From 1abbb00067c1add6215d71d8bb43ec22ff106b99 Mon Sep 17 00:00:00 2001 From: Davanum Srinivas Date: Sun, 22 Sep 2024 17:24:42 -0400 Subject: [PATCH] Double a couple of other timeouts Signed-off-by: Davanum Srinivas --- test/e2e/framework/job/wait.go | 21 ++++++++++++++++----- test/e2e/node/gpu.go | 4 ++-- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/test/e2e/framework/job/wait.go b/test/e2e/framework/job/wait.go index a7c3d265e5f..27355ec3501 100644 --- a/test/e2e/framework/job/wait.go +++ b/test/e2e/framework/job/wait.go @@ -42,17 +42,23 @@ type JobState func(job *batchv1.Job) string // WaitForJobPodsRunning wait for all pods for the Job named JobName in namespace ns to become Running. Only use // when pods will run for a long time, or it will be racy. func WaitForJobPodsRunning(ctx context.Context, c clientset.Interface, ns, jobName string, expectedCount int32) error { - return waitForJobPodsInPhase(ctx, c, ns, jobName, expectedCount, v1.PodRunning) + return waitForJobPodsInPhase(ctx, c, ns, jobName, expectedCount, v1.PodRunning, JobTimeout) +} + +// WaitForJobPodsRunningWithTimeout wait for all pods for the Job named JobName in namespace ns to become Running. Only use +// when pods will run for a long time, or it will be racy. same as WaitForJobPodsRunning but with an additional timeout parameter +func WaitForJobPodsRunningWithTimeout(ctx context.Context, c clientset.Interface, ns, jobName string, expectedCount int32, timeout time.Duration) error { + return waitForJobPodsInPhase(ctx, c, ns, jobName, expectedCount, v1.PodRunning, timeout) } // WaitForJobPodsSucceeded wait for all pods for the Job named JobName in namespace ns to become Succeeded. func WaitForJobPodsSucceeded(ctx context.Context, c clientset.Interface, ns, jobName string, expectedCount int32) error { - return waitForJobPodsInPhase(ctx, c, ns, jobName, expectedCount, v1.PodSucceeded) + return waitForJobPodsInPhase(ctx, c, ns, jobName, expectedCount, v1.PodSucceeded, JobTimeout) } // waitForJobPodsInPhase wait for all pods for the Job named JobName in namespace ns to be in a given phase. -func waitForJobPodsInPhase(ctx context.Context, c clientset.Interface, ns, jobName string, expectedCount int32, phase v1.PodPhase) error { - return wait.PollUntilContextTimeout(ctx, framework.Poll, JobTimeout, false, func(ctx context.Context) (bool, error) { +func waitForJobPodsInPhase(ctx context.Context, c clientset.Interface, ns, jobName string, expectedCount int32, phase v1.PodPhase, timeout time.Duration) error { + return wait.PollUntilContextTimeout(ctx, framework.Poll, timeout, false, func(ctx context.Context) (bool, error) { pods, err := GetJobPods(ctx, c, ns, jobName) if err != nil { return false, err @@ -157,7 +163,12 @@ func isJobFailed(j *batchv1.Job) bool { // WaitForJobFinish uses c to wait for the Job jobName in namespace ns to finish (either Failed or Complete). func WaitForJobFinish(ctx context.Context, c clientset.Interface, ns, jobName string) error { - return wait.PollUntilContextTimeout(ctx, framework.Poll, JobTimeout, true, func(ctx context.Context) (bool, error) { + return WaitForJobFinishWithTimeout(ctx, c, ns, jobName, JobTimeout) +} + +// WaitForJobFinishWithTimeout uses c to wait for the Job jobName in namespace ns to finish (either Failed or Complete). +func WaitForJobFinishWithTimeout(ctx context.Context, c clientset.Interface, ns, jobName string, timeout time.Duration) error { + return wait.PollUntilContextTimeout(ctx, framework.Poll, timeout, true, func(ctx context.Context) (bool, error) { curr, err := c.BatchV1().Jobs(ns).Get(ctx, jobName, metav1.GetOptions{}) if err != nil { return false, err diff --git a/test/e2e/node/gpu.go b/test/e2e/node/gpu.go index e225b02e71c..3e0a0376ab3 100644 --- a/test/e2e/node/gpu.go +++ b/test/e2e/node/gpu.go @@ -130,7 +130,7 @@ var _ = SIGDescribe(feature.GPUDevicePlugin, framework.WithSerial(), "Test using framework.ExpectNoError(err) // make sure job is running by waiting for its first pod to start running - err = e2ejob.WaitForJobPodsRunning(ctx, f.ClientSet, f.Namespace.Name, job.Name, 1) + err = e2ejob.WaitForJobPodsRunningWithTimeout(ctx, f.ClientSet, f.Namespace.Name, job.Name, 1, e2ejob.JobTimeout*2) framework.ExpectNoError(err) numNodes, err := e2enode.TotalRegistered(ctx, f.ClientSet) @@ -139,7 +139,7 @@ var _ = SIGDescribe(feature.GPUDevicePlugin, framework.WithSerial(), "Test using framework.ExpectNoError(err) ginkgo.By("Waiting for gpu job to finish") - err = e2ejob.WaitForJobFinish(ctx, f.ClientSet, f.Namespace.Name, job.Name) + err = e2ejob.WaitForJobFinishWithTimeout(ctx, f.ClientSet, f.Namespace.Name, job.Name, e2ejob.JobTimeout*2) framework.ExpectNoError(err) ginkgo.By("Done with gpu job")