From 98058a1b68da3726e1110dc8bba3838459d03b29 Mon Sep 17 00:00:00 2001 From: wojtekt Date: Thu, 23 Apr 2020 22:05:55 +0200 Subject: [PATCH 1/2] Fix the flaky kubectl tests at scale --- .../src/k8s.io/kubectl/pkg/cmd/debug/debug.go | 17 +---------------- .../src/k8s.io/kubectl/pkg/cmd/rollout/BUILD | 1 - .../kubectl/pkg/cmd/rollout/rollout_status.go | 17 +---------------- staging/src/k8s.io/kubectl/pkg/cmd/run/run.go | 19 +------------------ 4 files changed, 3 insertions(+), 51 deletions(-) diff --git a/staging/src/k8s.io/kubectl/pkg/cmd/debug/debug.go b/staging/src/k8s.io/kubectl/pkg/cmd/debug/debug.go index 31589b1eb3f..16296f7bbe2 100644 --- a/staging/src/k8s.io/kubectl/pkg/cmd/debug/debug.go +++ b/staging/src/k8s.io/kubectl/pkg/cmd/debug/debug.go @@ -355,21 +355,6 @@ func waitForEphemeralContainer(ctx context.Context, podClient corev1client.PodsG ctx, cancel := watchtools.ContextWithOptionalTimeout(ctx, 0*time.Second) defer cancel() - preconditionFunc := func(store cache.Store) (bool, error) { - _, exists, err := store.Get(&metav1.ObjectMeta{Namespace: ns, Name: podName}) - if err != nil { - return true, err - } - if !exists { - // We need to make sure we see the object in the cache before we start waiting for events - // or we would be waiting for the timeout if such object didn't exist. - // (e.g. it was deleted before we started informers so they wouldn't even see the delete event) - return true, errors.NewNotFound(corev1.Resource("pods"), podName) - } - - return false, nil - } - fieldSelector := fields.OneTermEqualSelector("metadata.name", podName).String() lw := &cache.ListWatch{ ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { @@ -385,7 +370,7 @@ func waitForEphemeralContainer(ctx context.Context, podClient corev1client.PodsG intr := interrupt.New(nil, cancel) var result *corev1.Pod err := intr.Run(func() error { - ev, err := watchtools.UntilWithSync(ctx, lw, &corev1.Pod{}, preconditionFunc, func(ev watch.Event) (bool, error) { + ev, err := watchtools.UntilWithSync(ctx, lw, &corev1.Pod{}, nil, func(ev watch.Event) (bool, error) { switch ev.Type { case watch.Deleted: return false, errors.NewNotFound(schema.GroupResource{Resource: "pods"}, "") diff --git a/staging/src/k8s.io/kubectl/pkg/cmd/rollout/BUILD b/staging/src/k8s.io/kubectl/pkg/cmd/rollout/BUILD index 063845982f0..ea51f15c41d 100644 --- a/staging/src/k8s.io/kubectl/pkg/cmd/rollout/BUILD +++ b/staging/src/k8s.io/kubectl/pkg/cmd/rollout/BUILD @@ -21,7 +21,6 @@ go_library( "//build/visible_to:pkg_kubectl_cmd_rollout_CONSUMERS", ], deps = [ - "//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/api/meta:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1/unstructured:go_default_library", diff --git a/staging/src/k8s.io/kubectl/pkg/cmd/rollout/rollout_status.go b/staging/src/k8s.io/kubectl/pkg/cmd/rollout/rollout_status.go index 23f07253a81..6f29e146343 100644 --- a/staging/src/k8s.io/kubectl/pkg/cmd/rollout/rollout_status.go +++ b/staging/src/k8s.io/kubectl/pkg/cmd/rollout/rollout_status.go @@ -23,7 +23,6 @@ import ( "github.com/spf13/cobra" - apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" @@ -200,25 +199,11 @@ func (o *RolloutStatusOptions) Run() error { }, } - preconditionFunc := func(store cache.Store) (bool, error) { - _, exists, err := store.Get(&metav1.ObjectMeta{Namespace: info.Namespace, Name: info.Name}) - if err != nil { - return true, err - } - if !exists { - // We need to make sure we see the object in the cache before we start waiting for events - // or we would be waiting for the timeout if such object didn't exist. - return true, apierrors.NewNotFound(mapping.Resource.GroupResource(), info.Name) - } - - return false, nil - } - // if the rollout isn't done yet, keep watching deployment status ctx, cancel := watchtools.ContextWithOptionalTimeout(context.Background(), o.Timeout) intr := interrupt.New(nil, cancel) return intr.Run(func() error { - _, err = watchtools.UntilWithSync(ctx, lw, &unstructured.Unstructured{}, preconditionFunc, func(e watch.Event) (bool, error) { + _, err = watchtools.UntilWithSync(ctx, lw, &unstructured.Unstructured{}, nil, func(e watch.Event) (bool, error) { switch t := e.Type; t { case watch.Added, watch.Modified: status, done, err := statusViewer.Status(e.Object.(runtime.Unstructured), o.Revision) diff --git a/staging/src/k8s.io/kubectl/pkg/cmd/run/run.go b/staging/src/k8s.io/kubectl/pkg/cmd/run/run.go index 1a6713a015a..1b370f2b500 100644 --- a/staging/src/k8s.io/kubectl/pkg/cmd/run/run.go +++ b/staging/src/k8s.io/kubectl/pkg/cmd/run/run.go @@ -457,21 +457,6 @@ func waitForPod(podClient corev1client.PodsGetter, ns, name string, timeout time ctx, cancel := watchtools.ContextWithOptionalTimeout(context.Background(), timeout) defer cancel() - preconditionFunc := func(store cache.Store) (bool, error) { - _, exists, err := store.Get(&metav1.ObjectMeta{Namespace: ns, Name: name}) - if err != nil { - return true, err - } - if !exists { - // We need to make sure we see the object in the cache before we start waiting for events - // or we would be waiting for the timeout if such object didn't exist. - // (e.g. it was deleted before we started informers so they wouldn't even see the delete event) - return true, errors.NewNotFound(corev1.Resource("pods"), name) - } - - return false, nil - } - fieldSelector := fields.OneTermEqualSelector("metadata.name", name).String() lw := &cache.ListWatch{ ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { @@ -487,9 +472,7 @@ func waitForPod(podClient corev1client.PodsGetter, ns, name string, timeout time intr := interrupt.New(nil, cancel) var result *corev1.Pod err := intr.Run(func() error { - ev, err := watchtools.UntilWithSync(ctx, lw, &corev1.Pod{}, preconditionFunc, func(ev watch.Event) (bool, error) { - return exitCondition(ev) - }) + ev, err := watchtools.UntilWithSync(ctx, lw, &corev1.Pod{}, nil, exitCondition) if ev != nil { result = ev.Object.(*corev1.Pod) } From 6f1484256db9579361d218aef7f7960446fd541d Mon Sep 17 00:00:00 2001 From: Maciej Szulik Date: Fri, 24 Apr 2020 13:04:42 +0200 Subject: [PATCH 2/2] waitForPod should use different exitCondition depending on restart policy --- staging/src/k8s.io/kubectl/pkg/cmd/run/run.go | 32 +++++++++++++++---- test/e2e/kubectl/kubectl.go | 12 +++++-- 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/staging/src/k8s.io/kubectl/pkg/cmd/run/run.go b/staging/src/k8s.io/kubectl/pkg/cmd/run/run.go index 1b370f2b500..62820995ccc 100644 --- a/staging/src/k8s.io/kubectl/pkg/cmd/run/run.go +++ b/staging/src/k8s.io/kubectl/pkg/cmd/run/run.go @@ -381,17 +381,21 @@ func (o *RunOptions) Run(f cmdutil.Factory, cmd *cobra.Command, args []string) e } var pod *corev1.Pod - leaveStdinOpen := o.LeaveStdinOpen - waitForExitCode := !leaveStdinOpen && restartPolicy == corev1.RestartPolicyNever + waitForExitCode := !o.LeaveStdinOpen && (restartPolicy == corev1.RestartPolicyNever || restartPolicy == corev1.RestartPolicyOnFailure) if waitForExitCode { - pod, err = waitForPod(clientset.CoreV1(), attachablePod.Namespace, attachablePod.Name, opts.GetPodTimeout, podCompleted) + // we need different exit condition depending on restart policy + // for Never it can either fail or succeed, for OnFailure only + // success matters + exitCondition := podCompleted + if restartPolicy == corev1.RestartPolicyOnFailure { + exitCondition = podSucceeded + } + pod, err = waitForPod(clientset.CoreV1(), attachablePod.Namespace, attachablePod.Name, opts.GetPodTimeout, exitCondition) if err != nil { return err } - } - - // after removal is done, return successfully if we are not interested in the exit code - if !waitForExitCode { + } else { + // after removal is done, return successfully if we are not interested in the exit code return nil } @@ -691,6 +695,20 @@ func podCompleted(event watch.Event) (bool, error) { return false, nil } +// podSucceeded returns true if the pod has run to completion, false if the pod has not yet +// reached running state, or an error in any other case. +func podSucceeded(event watch.Event) (bool, error) { + switch event.Type { + case watch.Deleted: + return false, errors.NewNotFound(schema.GroupResource{Resource: "pods"}, "") + } + switch t := event.Object.(type) { + case *corev1.Pod: + return t.Status.Phase == corev1.PodSucceeded, nil + } + return false, nil +} + // podRunningAndReady returns true if the pod is running and ready, false if the pod has not // yet reached those states, returns ErrPodCompleted if the pod has run to completion, or // an error in any other case. diff --git a/test/e2e/kubectl/kubectl.go b/test/e2e/kubectl/kubectl.go index f0e1195a312..17c7104fbec 100644 --- a/test/e2e/kubectl/kubectl.go +++ b/test/e2e/kubectl/kubectl.go @@ -522,13 +522,21 @@ var _ = SIGDescribe("Kubectl client", func() { _, err = framework.NewKubectlCommand(ns, nsFlag, "run", "-i", "--image="+busyboxImage, "--restart=OnFailure", "failure-2", "--", "/bin/sh", "-c", "cat && exit 42"). WithStdinData("abcd1234"). Exec() - framework.ExpectNoError(err) + ee, ok = err.(uexec.ExitError) + framework.ExpectEqual(ok, true) + if !strings.Contains(ee.String(), "timed out") { + framework.Failf("Missing expected 'timed out' error, got: %#v", ee) + } ginkgo.By("running a failing command without --restart=Never, but with --rm") _, err = framework.NewKubectlCommand(ns, nsFlag, "run", "-i", "--image="+busyboxImage, "--restart=OnFailure", "--rm", "failure-3", "--", "/bin/sh", "-c", "cat && exit 42"). WithStdinData("abcd1234"). Exec() - framework.ExpectNoError(err) + ee, ok = err.(uexec.ExitError) + framework.ExpectEqual(ok, true) + if !strings.Contains(ee.String(), "timed out") { + framework.Failf("Missing expected 'timed out' error, got: %#v", ee) + } e2epod.WaitForPodToDisappear(f.ClientSet, ns, "failure-3", labels.Everything(), 2*time.Second, wait.ForeverTestTimeout) ginkgo.By("running a failing command with --leave-stdin-open")