Merge pull request #88440 from smarterclayton/container_success_fix

Ensure Kubelet always reports terminating pod container status
2025-07-29 22:46:12 +00:00 · 2020-03-04 20:13:04 -08:00 · 2020-03-04 20:13:04 -08:00 · 7a513b575a
commit 7a513b575a
parent 7d6d790b43 8bc5cb01a9
9 changed files with 361 additions and 24 deletions
--- a/pkg/kubelet/container/helpers.go
+++ b/pkg/kubelet/container/helpers.go
@ -61,6 +61,10 @@ type RuntimeHelper interface {
 // ShouldContainerBeRestarted checks whether a container needs to be restarted.
 // TODO(yifan): Think about how to refactor this.
 func ShouldContainerBeRestarted(container *v1.Container, pod *v1.Pod, podStatus *PodStatus) bool {
+	// Once a pod has been marked deleted, it should not be restarted
+	if pod.DeletionTimestamp != nil {
+		return false
+	}
 	// Get latest container status.
 	status := podStatus.FindContainerStatusByName(container.Name)
 	// If the container was never started before, we should start it.
--- a/pkg/kubelet/container/helpers_test.go
+++ b/pkg/kubelet/container/helpers_test.go
@ -19,6 +19,7 @@ package container
 import (
 	"reflect"
 	"testing"
+	"time"

 	"github.com/google/go-cmp/cmp"
 	"github.com/stretchr/testify/assert"
@ -449,6 +450,8 @@ func TestShouldContainerBeRestarted(t *testing.T) {
 		v1.RestartPolicyOnFailure,
 		v1.RestartPolicyAlways,
 	}
+
+	// test policies
 	expected := map[string][]bool{
 		"no-history": {true, true, true},
 		"alive":      {false, false, false},
@ -467,6 +470,27 @@ func TestShouldContainerBeRestarted(t *testing.T) {
 			}
 		}
 	}
+
+	// test deleted pod
+	pod.DeletionTimestamp = &metav1.Time{Time: time.Now()}
+	expected = map[string][]bool{
+		"no-history": {false, false, false},
+		"alive":      {false, false, false},
+		"succeed":    {false, false, false},
+		"failed":     {false, false, false},
+		"unknown":    {false, false, false},
+	}
+	for _, c := range pod.Spec.Containers {
+		for i, policy := range policies {
+			pod.Spec.RestartPolicy = policy
+			e := expected[c.Name][i]
+			r := ShouldContainerBeRestarted(&c, pod, podStatus)
+			if r != e {
+				t.Errorf("Restart for container %q with restart policy %q expected %t, got %t",
+					c.Name, policy, e, r)
+			}
+		}
+	}
 }

 func TestHasPrivilegedContainer(t *testing.T) {
--- a/pkg/kubelet/kubelet.go
+++ b/pkg/kubelet/kubelet.go
@ -2001,18 +2001,22 @@ func (kl *Kubelet) syncLoopIteration(configCh <-chan kubetypes.PodUpdate, handle
 }

 // dispatchWork starts the asynchronous sync of the pod in a pod worker.
-// If the pod is terminated, dispatchWork
+// If the pod has completed termination, dispatchWork will perform no action.
 func (kl *Kubelet) dispatchWork(pod *v1.Pod, syncType kubetypes.SyncPodType, mirrorPod *v1.Pod, start time.Time) {
-	if kl.podIsTerminated(pod) {
-		if pod.DeletionTimestamp != nil {
-			// If the pod is in a terminated state, there is no pod worker to
-			// handle the work item. Check if the DeletionTimestamp has been
-			// set, and force a status update to trigger a pod deletion request
-			// to the apiserver.
-			kl.statusManager.TerminatePod(pod)
-		}
+	// check whether we are ready to delete the pod from the API server (all status up to date)
+	containersTerminal, podWorkerTerminal := kl.podAndContainersAreTerminal(pod)
+	if pod.DeletionTimestamp != nil && containersTerminal {
+		klog.V(4).Infof("Pod %q has completed execution and should be deleted from the API server: %s", format.Pod(pod), syncType)
+		kl.statusManager.TerminatePod(pod)
 		return
 	}
+
+	// optimization: avoid invoking the pod worker if no further changes are possible to the pod definition
+	if podWorkerTerminal {
+		klog.V(4).Infof("Pod %q has completed, ignoring remaining sync work: %s", format.Pod(pod), syncType)
+		return
+	}
+
 	// Run the sync in an async worker.
 	kl.podWorkers.UpdatePod(&UpdatePodOptions{
 		Pod:        pod,
--- a/pkg/kubelet/kubelet_pods.go
+++ b/pkg/kubelet/kubelet_pods.go
@ -865,8 +865,9 @@ func (kl *Kubelet) getPullSecretsForPod(pod *v1.Pod) []v1.Secret {
 	return pullSecrets
 }

-// podIsTerminated returns true if pod is in the terminated state ("Failed" or "Succeeded").
-func (kl *Kubelet) podIsTerminated(pod *v1.Pod) bool {
+// podStatusIsTerminal reports when the specified pod has no running containers or is no longer accepting
+// spec changes.
+func (kl *Kubelet) podAndContainersAreTerminal(pod *v1.Pod) (containersTerminal, podWorkerTerminal bool) {
 	// Check the cached pod status which was set after the last sync.
 	status, ok := kl.statusManager.GetPodStatus(pod.UID)
 	if !ok {
@ -875,11 +876,28 @@ func (kl *Kubelet) podIsTerminated(pod *v1.Pod) bool {
 		// restarted.
 		status = pod.Status
 	}
-	return status.Phase == v1.PodFailed || status.Phase == v1.PodSucceeded || (pod.DeletionTimestamp != nil && notRunning(status.ContainerStatuses))
+	// A pod transitions into failed or succeeded from either container lifecycle (RestartNever container
+	// fails) or due to external events like deletion or eviction. A terminal pod *should* have no running
+	// containers, but to know that the pod has completed its lifecycle you must wait for containers to also
+	// be terminal.
+	containersTerminal = notRunning(status.ContainerStatuses)
+	// The kubelet must accept config changes from the pod spec until it has reached a point where changes would
+	// have no effect on any running container.
+	podWorkerTerminal = status.Phase == v1.PodFailed || status.Phase == v1.PodSucceeded || (pod.DeletionTimestamp != nil && containersTerminal)
+	return
 }

-// IsPodTerminated returns true if the pod with the provided UID is in a terminated state ("Failed" or "Succeeded")
-// or if the pod has been deleted or removed
+// podIsTerminated returns true if the provided pod is in a terminal phase ("Failed", "Succeeded") or
+// has been deleted and has no running containers. This corresponds to when a pod must accept changes to
+// its pod spec (e.g. terminating containers allow grace period to be shortened).
+func (kl *Kubelet) podIsTerminated(pod *v1.Pod) bool {
+	_, podWorkerTerminal := kl.podAndContainersAreTerminal(pod)
+	return podWorkerTerminal
+}
+
+// IsPodTerminated returns true if the pod with the provided UID is in a terminal phase ("Failed",
+// "Succeeded") or has been deleted and has no running containers. This corresponds to when a pod must
+// accept changes to its pod spec (e.g. terminating containers allow grace period to be shortened)
 func (kl *Kubelet) IsPodTerminated(uid types.UID) bool {
 	pod, podFound := kl.podManager.GetPodByUID(uid)
 	if !podFound {
--- a/pkg/kubelet/status/BUILD
+++ b/pkg/kubelet/status/BUILD
@ -54,6 +54,7 @@ go_test(
        "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
        "//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
        "//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
+        "//staging/src/k8s.io/apimachinery/pkg/util/diff:go_default_library",
        "//staging/src/k8s.io/client-go/kubernetes:go_default_library",
        "//staging/src/k8s.io/client-go/kubernetes/fake:go_default_library",
        "//staging/src/k8s.io/client-go/testing:go_default_library",
--- a/pkg/kubelet/status/status_manager.go
+++ b/pkg/kubelet/status/status_manager.go
@ -160,13 +160,20 @@ func (m *manager) Start() {
 	syncTicker := time.Tick(syncPeriod)
 	// syncPod and syncBatch share the same go routine to avoid sync races.
 	go wait.Forever(func() {
-		select {
-		case syncRequest := <-m.podStatusChannel:
-			klog.V(5).Infof("Status Manager: syncing pod: %q, with status: (%d, %v) from podStatusChannel",
-				syncRequest.podUID, syncRequest.status.version, syncRequest.status.status)
-			m.syncPod(syncRequest.podUID, syncRequest.status)
-		case <-syncTicker:
-			m.syncBatch()
+		for {
+			select {
+			case syncRequest := <-m.podStatusChannel:
+				klog.V(5).Infof("Status Manager: syncing pod: %q, with status: (%d, %v) from podStatusChannel",
+					syncRequest.podUID, syncRequest.status.version, syncRequest.status.status)
+				m.syncPod(syncRequest.podUID, syncRequest.status)
+			case <-syncTicker:
+				klog.V(5).Infof("Status Manager: syncing batch")
+				// remove any entries in the status channel since the batch will handle them
+				for i := len(m.podStatusChannel); i > 0; i-- {
+					<-m.podStatusChannel
+				}
+				m.syncBatch()
+			}
 		}
 	}, 0)
 }
@ -314,21 +321,39 @@ func findContainerStatus(status *v1.PodStatus, containerID string) (containerSta
 func (m *manager) TerminatePod(pod *v1.Pod) {
 	m.podStatusesLock.Lock()
 	defer m.podStatusesLock.Unlock()
+
+	// ensure that all containers have a terminated state - because we do not know whether the container
+	// was successful, always report an error
 	oldStatus := &pod.Status
 	if cachedStatus, ok := m.podStatuses[pod.UID]; ok {
 		oldStatus = &cachedStatus.status
 	}
 	status := *oldStatus.DeepCopy()
 	for i := range status.ContainerStatuses {
+		if status.ContainerStatuses[i].State.Terminated != nil || status.ContainerStatuses[i].State.Waiting != nil {
+			continue
+		}
 		status.ContainerStatuses[i].State = v1.ContainerState{
-			Terminated: &v1.ContainerStateTerminated{},
+			Terminated: &v1.ContainerStateTerminated{
+				Reason:   "ContainerStatusUnknown",
+				Message:  "The container could not be located when the pod was terminated",
+				ExitCode: 137,
+			},
 		}
 	}
 	for i := range status.InitContainerStatuses {
+		if status.InitContainerStatuses[i].State.Terminated != nil || status.InitContainerStatuses[i].State.Waiting != nil {
+			continue
+		}
 		status.InitContainerStatuses[i].State = v1.ContainerState{
-			Terminated: &v1.ContainerStateTerminated{},
+			Terminated: &v1.ContainerStateTerminated{
+				Reason:   "ContainerStatusUnknown",
+				Message:  "The container could not be located when the pod was terminated",
+				ExitCode: 137,
+			},
 		}
 	}
+
 	m.updateStatusInternal(pod, status, true)
 }

--- a/pkg/kubelet/status/status_manager_test.go
+++ b/pkg/kubelet/status/status_manager_test.go
@ -27,11 +27,12 @@ import (

 	"github.com/stretchr/testify/assert"

-	"k8s.io/api/core/v1"
+	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/errors"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/apimachinery/pkg/util/diff"
 	clientset "k8s.io/client-go/kubernetes"
 	"k8s.io/client-go/kubernetes/fake"
 	core "k8s.io/client-go/testing"
@ -569,6 +570,16 @@ func TestTerminatePod(t *testing.T) {
 	t.Logf("update the pod's status to Failed.  TerminatePod should preserve this status update.")
 	firstStatus := getRandomPodStatus()
 	firstStatus.Phase = v1.PodFailed
+	firstStatus.InitContainerStatuses = []v1.ContainerStatus{
+		{Name: "init-test-1"},
+		{Name: "init-test-2", State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{Reason: "InitTest", ExitCode: 0}}},
+		{Name: "init-test-3", State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{Reason: "InitTest", ExitCode: 3}}},
+	}
+	firstStatus.ContainerStatuses = []v1.ContainerStatus{
+		{Name: "test-1"},
+		{Name: "test-2", State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{Reason: "Test", ExitCode: 2}}},
+		{Name: "test-3", State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{Reason: "Test", ExitCode: 0}}},
+	}
 	syncer.SetPodStatus(testPod, firstStatus)

 	t.Logf("set the testPod to a pod with Phase running, to simulate a stale pod")
@ -586,6 +597,26 @@ func TestTerminatePod(t *testing.T) {
 		assert.False(t, newStatus.InitContainerStatuses[i].State.Terminated == nil, "expected init containers to be terminated")
 	}

+	expectUnknownState := v1.ContainerState{Terminated: &v1.ContainerStateTerminated{Reason: "ContainerStatusUnknown", Message: "The container could not be located when the pod was terminated", ExitCode: 137}}
+	if !reflect.DeepEqual(newStatus.InitContainerStatuses[0].State, expectUnknownState) {
+		t.Errorf("terminated container state not defaulted: %s", diff.ObjectReflectDiff(newStatus.InitContainerStatuses[0].State, expectUnknownState))
+	}
+	if !reflect.DeepEqual(newStatus.InitContainerStatuses[1].State, firstStatus.InitContainerStatuses[1].State) {
+		t.Errorf("existing terminated container state not preserved: %#v", newStatus.ContainerStatuses)
+	}
+	if !reflect.DeepEqual(newStatus.InitContainerStatuses[2].State, firstStatus.InitContainerStatuses[2].State) {
+		t.Errorf("existing terminated container state not preserved: %#v", newStatus.ContainerStatuses)
+	}
+	if !reflect.DeepEqual(newStatus.ContainerStatuses[0].State, expectUnknownState) {
+		t.Errorf("terminated container state not defaulted: %s", diff.ObjectReflectDiff(newStatus.ContainerStatuses[0].State, expectUnknownState))
+	}
+	if !reflect.DeepEqual(newStatus.ContainerStatuses[1].State, firstStatus.ContainerStatuses[1].State) {
+		t.Errorf("existing terminated container state not preserved: %#v", newStatus.ContainerStatuses)
+	}
+	if !reflect.DeepEqual(newStatus.ContainerStatuses[2].State, firstStatus.ContainerStatuses[2].State) {
+		t.Errorf("existing terminated container state not preserved: %#v", newStatus.ContainerStatuses)
+	}
+
 	t.Logf("we expect the previous status update to be preserved.")
 	assert.Equal(t, newStatus.Phase, firstStatus.Phase)
 	assert.Equal(t, newStatus.Message, firstStatus.Message)
--- a/test/e2e/node/BUILD
+++ b/test/e2e/node/BUILD
@ -37,6 +37,7 @@ go_library(
        "//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
        "//staging/src/k8s.io/apimachinery/pkg/util/uuid:go_default_library",
        "//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library",
+        "//staging/src/k8s.io/apimachinery/pkg/watch:go_default_library",
        "//staging/src/k8s.io/client-go/kubernetes:go_default_library",
        "//test/e2e/framework:go_default_library",
        "//test/e2e/framework/job:go_default_library",
--- a/test/e2e/node/pods.go
+++ b/test/e2e/node/pods.go
@ -19,8 +19,13 @@ package node
 import (
 	"context"
 	"encoding/json"
+	"fmt"
+	"math/rand"
 	"net/http"
+	"regexp"
 	"strconv"
+	"strings"
+	"sync"
 	"time"

 	v1 "k8s.io/api/core/v1"
@ -29,6 +34,7 @@ import (
 	"k8s.io/apimachinery/pkg/labels"
 	"k8s.io/apimachinery/pkg/util/uuid"
 	"k8s.io/apimachinery/pkg/util/wait"
+	"k8s.io/apimachinery/pkg/watch"
 	"k8s.io/kubernetes/test/e2e/framework"
 	e2ekubelet "k8s.io/kubernetes/test/e2e/framework/kubelet"
 	imageutils "k8s.io/kubernetes/test/utils/image"
@ -197,4 +203,227 @@ var _ = SIGDescribe("Pods Extended", func() {
 			framework.ExpectEqual(pod.Status.QOSClass, v1.PodQOSGuaranteed)
 		})
 	})
+
+	framework.KubeDescribe("Pod Container Status", func() {
+		var podClient *framework.PodClient
+		ginkgo.BeforeEach(func() {
+			podClient = f.PodClient()
+		})
+
+		ginkgo.It("should never report success for a pending container", func() {
+			ginkgo.By("creating pods that should always exit 1 and terminating the pod after a random delay")
+
+			var reBug88766 = regexp.MustCompile(`ContainerCannotRun.*rootfs_linux\.go.*kubernetes\.io~secret.*no such file or directory`)
+
+			var (
+				lock sync.Mutex
+				errs []error
+
+				wg sync.WaitGroup
+			)
+
+			const delay = 2000
+			const workers = 3
+			const pods = 15
+			var min, max time.Duration
+			for i := 0; i < workers; i++ {
+				wg.Add(1)
+				go func(i int) {
+					defer wg.Done()
+					for retries := 0; retries < pods; retries++ {
+						name := fmt.Sprintf("pod-submit-status-%d-%d", i, retries)
+						value := strconv.Itoa(time.Now().Nanosecond())
+						one := int64(1)
+						pod := &v1.Pod{
+							ObjectMeta: metav1.ObjectMeta{
+								Name: name,
+								Labels: map[string]string{
+									"name": "foo",
+									"time": value,
+								},
+							},
+							Spec: v1.PodSpec{
+								RestartPolicy:                 v1.RestartPolicyNever,
+								TerminationGracePeriodSeconds: &one,
+								Containers: []v1.Container{
+									{
+										Name:  "busybox",
+										Image: imageutils.GetE2EImage(imageutils.BusyBox),
+										Command: []string{
+											"/bin/false",
+										},
+										Resources: v1.ResourceRequirements{
+											Requests: v1.ResourceList{
+												v1.ResourceCPU:    resource.MustParse("5m"),
+												v1.ResourceMemory: resource.MustParse("10Mi"),
+											},
+										},
+									},
+								},
+							},
+						}
+
+						// create the pod, capture the change events, then delete the pod
+						start := time.Now()
+						created := podClient.Create(pod)
+						ch := make(chan []watch.Event)
+						go func() {
+							defer close(ch)
+							w, err := podClient.Watch(context.TODO(), metav1.ListOptions{
+								ResourceVersion: created.ResourceVersion,
+								FieldSelector:   fmt.Sprintf("metadata.name=%s", pod.Name),
+							})
+							if err != nil {
+								framework.Logf("Unable to watch pod %s: %v", pod.Name, err)
+								return
+							}
+							defer w.Stop()
+							events := []watch.Event{
+								{Type: watch.Added, Object: created},
+							}
+							for event := range w.ResultChan() {
+								events = append(events, event)
+								if event.Type == watch.Deleted {
+									break
+								}
+							}
+							ch <- events
+						}()
+
+						t := time.Duration(rand.Intn(delay)) * time.Millisecond
+						time.Sleep(t)
+						err := podClient.Delete(context.TODO(), pod.Name, nil)
+						framework.ExpectNoError(err, "failed to delete pod")
+
+						events, ok := <-ch
+						if !ok {
+							continue
+						}
+						if len(events) < 2 {
+							framework.Fail("only got a single event")
+						}
+
+						end := time.Now()
+
+						// check the returned events for consistency
+						var duration, completeDuration time.Duration
+						var hasContainers, hasTerminated, hasTerminalPhase, hasRunningContainers bool
+						verifyFn := func(event watch.Event) error {
+							var ok bool
+							pod, ok = event.Object.(*v1.Pod)
+							if !ok {
+								framework.Logf("Unexpected event object: %s %#v", event.Type, event.Object)
+								return nil
+							}
+
+							if len(pod.Status.InitContainerStatuses) != 0 {
+								return fmt.Errorf("pod %s on node %s had incorrect init containers: %#v", pod.Name, pod.Spec.NodeName, pod.Status.InitContainerStatuses)
+							}
+							if len(pod.Status.ContainerStatuses) == 0 {
+								if hasContainers {
+									return fmt.Errorf("pod %s on node %s had incorrect containers: %#v", pod.Name, pod.Spec.NodeName, pod.Status.ContainerStatuses)
+								}
+								return nil
+							}
+							hasContainers = true
+							if len(pod.Status.ContainerStatuses) != 1 {
+								return fmt.Errorf("pod %s on node %s had incorrect containers: %#v", pod.Name, pod.Spec.NodeName, pod.Status.ContainerStatuses)
+							}
+							status := pod.Status.ContainerStatuses[0]
+							t := status.State.Terminated
+							if hasTerminated {
+								if status.State.Waiting != nil || status.State.Running != nil {
+									return fmt.Errorf("pod %s on node %s was terminated and then changed state: %#v", pod.Name, pod.Spec.NodeName, status)
+								}
+								if t == nil {
+									return fmt.Errorf("pod %s on node %s was terminated and then had termination cleared: %#v", pod.Name, pod.Spec.NodeName, status)
+								}
+							}
+							hasRunningContainers = status.State.Waiting == nil && status.State.Terminated == nil
+							if t != nil {
+								if !t.FinishedAt.Time.IsZero() {
+									duration = t.FinishedAt.Sub(t.StartedAt.Time)
+									completeDuration = t.FinishedAt.Sub(pod.CreationTimestamp.Time)
+								}
+
+								defer func() { hasTerminated = true }()
+								switch {
+								case t.ExitCode == 1:
+									// expected
+								case t.ExitCode == 128 && reBug88766.MatchString(t.Message):
+									// pod volume teardown races with container start in CRI, which reports a failure
+									framework.Logf("pod %s on node %s failed with the symptoms of https://github.com/kubernetes/kubernetes/issues/88766")
+								default:
+									return fmt.Errorf("pod %s on node %s container unexpected exit code %d: start=%s end=%s reason=%s message=%s", pod.Name, pod.Spec.NodeName, t.ExitCode, t.StartedAt, t.FinishedAt, t.Reason, t.Message)
+								}
+							}
+							if pod.Status.Phase == v1.PodFailed || pod.Status.Phase == v1.PodSucceeded {
+								hasTerminalPhase = true
+							} else {
+								if hasTerminalPhase {
+									return fmt.Errorf("pod %s on node %s was in a terminal phase and then reverted: %#v", pod.Name, pod.Spec.NodeName, pod.Status)
+								}
+							}
+							return nil
+						}
+
+						var eventErr error
+						for _, event := range events[1:] {
+							if err := verifyFn(event); err != nil {
+								eventErr = err
+								break
+							}
+						}
+						func() {
+							defer lock.Unlock()
+							lock.Lock()
+
+							if eventErr != nil {
+								errs = append(errs, eventErr)
+								return
+							}
+
+							if !hasTerminalPhase {
+								var names []string
+								for _, status := range pod.Status.ContainerStatuses {
+									if status.State.Terminated != nil || status.State.Running != nil {
+										names = append(names, status.Name)
+									}
+								}
+								switch {
+								case len(names) > 0:
+									errs = append(errs, fmt.Errorf("pod %s on node %s did not reach a terminal phase before being deleted but had running containers: phase=%s, running-containers=%s", pod.Name, pod.Spec.NodeName, pod.Status.Phase, strings.Join(names, ",")))
+								case pod.Status.Phase != v1.PodPending:
+									errs = append(errs, fmt.Errorf("pod %s on node %s was not Pending but has no running containers: phase=%s", pod.Name, pod.Spec.NodeName, pod.Status.Phase))
+								}
+							}
+							if hasRunningContainers {
+								data, _ := json.MarshalIndent(pod.Status.ContainerStatuses, "", "  ")
+								errs = append(errs, fmt.Errorf("pod %s on node %s had running or unknown container status before being deleted:\n%s", pod.Name, pod.Spec.NodeName, string(data)))
+							}
+						}()
+
+						if duration < min {
+							min = duration
+						}
+						if duration > max || max == 0 {
+							max = duration
+						}
+						framework.Logf("Pod %s on node %s timings total=%s t=%s run=%s execute=%s", pod.Name, pod.Spec.NodeName, end.Sub(start), t, completeDuration, duration)
+					}
+
+				}(i)
+			}
+
+			wg.Wait()
+
+			if len(errs) > 0 {
+				var messages []string
+				for _, err := range errs {
+					messages = append(messages, err.Error())
+				}
+				framework.Failf("%d errors:\n%v", len(errs), strings.Join(messages, "\n"))
+			}
+		})
+	})
 })