Merge pull request #43741 from wojtek-t/ignore_image_puller_errors

Automatic merge from submit-queue

Fix problems of not-starting image pullers

In e2e.go there are the following lines:
https://github.com/kubernetes/kubernetes/blob/master/test/e2e/e2e.go#L150
```
	if err := framework.WaitForPodsSuccess(c, metav1.NamespaceSystem, framework.ImagePullerLabels, imagePrePullingTimeout); err != nil {
		// There is no guarantee that the image pulling will succeed in 3 minutes
		// and we don't even run the image puller on all platforms (including GKE).
		// We wait for it so we get an indication of failures in the logs, and to
		// maximize benefit of image pre-pulling.
		framework.Logf("WARNING: Image pulling pods failed to enter success in %v: %v", imagePrePullingTimeout, err)
	}
```

However, few lines above:
https://github.com/kubernetes/kubernetes/blob/master/test/e2e/e2e.go#L143

we were waiting for all image pullers to actually enter Success state. It's pretty clear that the latter wasn't expected.

This PR is fixing this problem.

Ref #43728

@anhowe @davidopp
This commit is contained in:
Kubernetes Submit Queue 2017-03-28 05:40:35 -07:00 committed by GitHub
commit c9356c6af6
4 changed files with 8 additions and 22 deletions

View File

@ -140,7 +140,7 @@ var _ = ginkgo.SynchronizedBeforeSuite(func() []byte {
// #41007. To avoid those pods preventing the whole test runs (and just
// wasting the whole run), we allow for some not-ready pods (with the
// number equal to the number of allowed not-ready nodes).
if err := framework.WaitForPodsRunningReady(c, metav1.NamespaceSystem, int32(framework.TestContext.MinStartupPods), int32(framework.TestContext.AllowedNotReadyNodes), podStartupTimeout, framework.ImagePullerLabels, true); err != nil {
if err := framework.WaitForPodsRunningReady(c, metav1.NamespaceSystem, int32(framework.TestContext.MinStartupPods), int32(framework.TestContext.AllowedNotReadyNodes), podStartupTimeout, framework.ImagePullerLabels); err != nil {
framework.DumpAllNamespaceInfo(c, metav1.NamespaceSystem)
framework.LogFailedContainers(c, metav1.NamespaceSystem, framework.Logf)
runKubernetesServiceTestContainer(c, metav1.NamespaceDefault)

View File

@ -485,29 +485,19 @@ func WaitForPodsSuccess(c clientset.Interface, ns string, successPodLabels map[s
// ready. It has separate behavior from other 'wait for' pods functions in
// that it requests the list of pods on every iteration. This is useful, for
// example, in cluster startup, because the number of pods increases while
// waiting.
// If ignoreLabels is not empty, pods matching this selector are ignored and
// this function waits for minPods to enter Running/Ready and for all pods
// matching ignoreLabels to enter Success phase. Otherwise an error is returned
// even if there are minPods pods, some of which are in Running/Ready
// and some in Success. This is to allow the client to decide if "Success"
// means "Ready" or not.
// If skipSucceeded is true, any pods that are Succeeded are not counted.
func WaitForPodsRunningReady(c clientset.Interface, ns string, minPods, allowedNotReadyPods int32, timeout time.Duration, ignoreLabels map[string]string, skipSucceeded bool) error {
// waiting. All pods that are in SUCCESS state are not counted.
//
// If ignoreLabels is not empty, pods matching this selector are ignored.
func WaitForPodsRunningReady(c clientset.Interface, ns string, minPods, allowedNotReadyPods int32, timeout time.Duration, ignoreLabels map[string]string) error {
ignoreSelector := labels.SelectorFromSet(ignoreLabels)
start := time.Now()
Logf("Waiting up to %v for all pods (need at least %d) in namespace '%s' to be running and ready",
timeout, minPods, ns)
wg := sync.WaitGroup{}
wg.Add(1)
var waitForSuccessError error
var ignoreNotReady bool
badPods := []v1.Pod{}
desiredPods := 0
go func() {
waitForSuccessError = WaitForPodsSuccess(c, ns, ignoreLabels, timeout)
wg.Done()
}()
if wait.PollImmediate(Poll, timeout, func() (bool, error) {
// We get the new list of pods, replication controllers, and
@ -554,7 +544,7 @@ func WaitForPodsRunningReady(c clientset.Interface, ns string, minPods, allowedN
switch {
case res && err == nil:
nOk++
case pod.Status.Phase == v1.PodSucceeded && skipSucceeded:
case pod.Status.Phase == v1.PodSucceeded:
continue
case pod.Status.Phase == v1.PodSucceeded:
Logf("The status of Pod %s is Succeeded which is unexpected", pod.ObjectMeta.Name)
@ -590,10 +580,6 @@ func WaitForPodsRunningReady(c clientset.Interface, ns string, minPods, allowedN
}
Logf("Number of not-ready pods is allowed.")
}
wg.Wait()
if waitForSuccessError != nil {
return waitForSuccessError
}
return nil
}

View File

@ -237,7 +237,7 @@ var _ = framework.KubeDescribe("Nodes [Disruptive]", func() {
// Many e2e tests assume that the cluster is fully healthy before they start. Wait until
// the cluster is restored to health.
By("waiting for system pods to successfully restart")
err := framework.WaitForPodsRunningReady(c, metav1.NamespaceSystem, systemPodsNo, 0, framework.PodReadyBeforeTimeout, ignoreLabels, true)
err := framework.WaitForPodsRunningReady(c, metav1.NamespaceSystem, systemPodsNo, 0, framework.PodReadyBeforeTimeout, ignoreLabels)
Expect(err).NotTo(HaveOccurred())
By("waiting for image prepulling pods to complete")
framework.WaitForPodsSuccess(c, metav1.NamespaceSystem, framework.ImagePullerLabels, imagePrePullingTimeout)

View File

@ -92,7 +92,7 @@ var _ = framework.KubeDescribe("SchedulerPredicates [Serial]", func() {
}
}
err = framework.WaitForPodsRunningReady(cs, metav1.NamespaceSystem, int32(systemPodsNo), 0, framework.PodReadyBeforeTimeout, ignoreLabels, true)
err = framework.WaitForPodsRunningReady(cs, metav1.NamespaceSystem, int32(systemPodsNo), 0, framework.PodReadyBeforeTimeout, ignoreLabels)
Expect(err).NotTo(HaveOccurred())
for _, node := range nodeList.Items {