diff --git a/test/e2e/node/pods.go b/test/e2e/node/pods.go index a4cf9f3bf5e..79a4c488647 100644 --- a/test/e2e/node/pods.go +++ b/test/e2e/node/pods.go @@ -37,6 +37,7 @@ import ( "k8s.io/apimachinery/pkg/util/uuid" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/apimachinery/pkg/watch" + v1core "k8s.io/client-go/kubernetes/typed/core/v1" "k8s.io/kubernetes/pkg/kubelet/events" "k8s.io/kubernetes/test/e2e/framework" e2ekubelet "k8s.io/kubernetes/test/e2e/framework/kubelet" @@ -204,274 +205,19 @@ var _ = SIGDescribe("Pods Extended", func() { ginkgo.It("should never report success for a pending container", func() { ginkgo.By("creating pods that should always exit 1 and terminating the pod after a random delay") - - var reBug88766 = regexp.MustCompile(`rootfs_linux.*kubernetes\.io~(secret|projected).*no such file or directory`) - - var ( - lock sync.Mutex - errs []error - - wg sync.WaitGroup + createAndTestPodRepeatedly( + 3, 15, + podFastDeleteScenario{client: podClient.PodInterface, delayMs: 2000}, + podClient.PodInterface, + ) + }) + ginkgo.It("should never report container start when an init container fails", func() { + ginkgo.By("creating pods with an init container that always exit 1 and terminating the pod after a random delay") + createAndTestPodRepeatedly( + 3, 15, + podFastDeleteScenario{client: podClient.PodInterface, delayMs: 2000, initContainer: true}, + podClient.PodInterface, ) - - r := prometheus.NewRegistry() - h := prometheus.NewSummaryVec(prometheus.SummaryOpts{ - Name: "start_latency", - Objectives: map[float64]float64{ - 0.5: 0.05, - 0.75: 0.025, - 0.9: 0.01, - 0.99: 0.001, - }, - }, []string{"node"}) - r.MustRegister(h) - - const delay = 2000 - const workers = 3 - const pods = 15 - var min, max time.Duration - for i := 0; i < workers; i++ { - wg.Add(1) - go func(i int) { - defer ginkgo.GinkgoRecover() - defer wg.Done() - for retries := 0; retries < pods; retries++ { - name := fmt.Sprintf("pod-submit-status-%d-%d", i, retries) - value := strconv.Itoa(time.Now().Nanosecond()) - one := int64(1) - pod := &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Labels: map[string]string{ - "name": "foo", - "time": value, - }, - }, - Spec: v1.PodSpec{ - RestartPolicy: v1.RestartPolicyNever, - TerminationGracePeriodSeconds: &one, - Containers: []v1.Container{ - { - Name: "busybox", - Image: imageutils.GetE2EImage(imageutils.BusyBox), - Command: []string{ - "/bin/false", - }, - Resources: v1.ResourceRequirements{ - Requests: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("5m"), - v1.ResourceMemory: resource.MustParse("10Mi"), - }, - }, - }, - }, - }, - } - - // create the pod, capture the change events, then delete the pod - start := time.Now() - created := podClient.Create(pod) - ch := make(chan []watch.Event) - waitForWatch := make(chan struct{}) - go func() { - defer ginkgo.GinkgoRecover() - defer close(ch) - w, err := podClient.Watch(context.TODO(), metav1.ListOptions{ - ResourceVersion: created.ResourceVersion, - FieldSelector: fmt.Sprintf("metadata.name=%s", pod.Name), - }) - if err != nil { - framework.Logf("Unable to watch pod %s: %v", pod.Name, err) - return - } - defer w.Stop() - close(waitForWatch) - events := []watch.Event{ - {Type: watch.Added, Object: created}, - } - for event := range w.ResultChan() { - events = append(events, event) - if event.Type == watch.Error { - framework.Logf("watch error seen for %s: %#v", pod.Name, event.Object) - } - if event.Type == watch.Deleted { - framework.Logf("watch delete seen for %s", pod.Name) - break - } - } - ch <- events - }() - - select { - case <-ch: // in case the goroutine above exits before establishing the watch - case <-waitForWatch: // when the watch is established - } - t := time.Duration(rand.Intn(delay)) * time.Millisecond - time.Sleep(t) - err := podClient.Delete(context.TODO(), pod.Name, metav1.DeleteOptions{}) - framework.ExpectNoError(err, "failed to delete pod") - - var ( - events []watch.Event - ok bool - ) - select { - case events, ok = <-ch: - if !ok { - continue - } - if len(events) < 2 { - framework.Fail("only got a single event") - } - case <-time.After(5 * time.Minute): - framework.Failf("timed out waiting for watch events for %s", pod.Name) - } - - end := time.Now() - - // check the returned events for consistency - var duration, completeDuration time.Duration - var hasContainers, hasTerminated, hasTerminalPhase, hasRunningContainers bool - verifyFn := func(event watch.Event) error { - var ok bool - pod, ok = event.Object.(*v1.Pod) - if !ok { - framework.Logf("Unexpected event object: %s %#v", event.Type, event.Object) - return nil - } - - if len(pod.Status.InitContainerStatuses) != 0 { - return fmt.Errorf("pod %s on node %s had incorrect init containers: %#v", pod.Name, pod.Spec.NodeName, pod.Status.InitContainerStatuses) - } - if len(pod.Status.ContainerStatuses) == 0 { - if hasContainers { - return fmt.Errorf("pod %s on node %s had incorrect containers: %#v", pod.Name, pod.Spec.NodeName, pod.Status.ContainerStatuses) - } - return nil - } - hasContainers = true - if len(pod.Status.ContainerStatuses) != 1 { - return fmt.Errorf("pod %s on node %s had incorrect containers: %#v", pod.Name, pod.Spec.NodeName, pod.Status.ContainerStatuses) - } - status := pod.Status.ContainerStatuses[0] - t := status.State.Terminated - if hasTerminated { - if status.State.Waiting != nil || status.State.Running != nil { - return fmt.Errorf("pod %s on node %s was terminated and then changed state: %#v", pod.Name, pod.Spec.NodeName, status) - } - if t == nil { - return fmt.Errorf("pod %s on node %s was terminated and then had termination cleared: %#v", pod.Name, pod.Spec.NodeName, status) - } - } - var hasNoStartTime bool - hasRunningContainers = status.State.Waiting == nil && status.State.Terminated == nil - if t != nil { - if !t.FinishedAt.Time.IsZero() { - if t.StartedAt.IsZero() { - hasNoStartTime = true - } else { - duration = t.FinishedAt.Sub(t.StartedAt.Time) - } - completeDuration = t.FinishedAt.Sub(pod.CreationTimestamp.Time) - } - - defer func() { hasTerminated = true }() - switch { - case t.ExitCode == 1: - // expected - case t.ExitCode == 137 && (t.Reason == "ContainerStatusUnknown" || t.Reason == "Error"): - // expected, pod was force-killed after grace period - case t.ExitCode == 128 && (t.Reason == "StartError" || t.Reason == "ContainerCannotRun") && reBug88766.MatchString(t.Message): - // pod volume teardown races with container start in CRI, which reports a failure - framework.Logf("pod %s on node %s failed with the symptoms of https://github.com/kubernetes/kubernetes/issues/88766", pod.Name, pod.Spec.NodeName) - default: - data, _ := json.MarshalIndent(pod.Status, "", " ") - framework.Logf("pod %s on node %s had incorrect final status:\n%s", pod.Name, pod.Spec.NodeName, string(data)) - return fmt.Errorf("pod %s on node %s container unexpected exit code %d: start=%s end=%s reason=%s message=%s", pod.Name, pod.Spec.NodeName, t.ExitCode, t.StartedAt, t.FinishedAt, t.Reason, t.Message) - } - switch { - case duration > time.Hour: - // problem with status reporting - return fmt.Errorf("pod %s container %s on node %s had very long duration %s: start=%s end=%s", pod.Name, status.Name, pod.Spec.NodeName, duration, t.StartedAt, t.FinishedAt) - case hasNoStartTime: - // should never happen - return fmt.Errorf("pod %s container %s on node %s had finish time but not start time: end=%s", pod.Name, status.Name, pod.Spec.NodeName, t.FinishedAt) - } - } - if pod.Status.Phase == v1.PodFailed || pod.Status.Phase == v1.PodSucceeded { - hasTerminalPhase = true - } else { - if hasTerminalPhase { - return fmt.Errorf("pod %s on node %s was in a terminal phase and then reverted: %#v", pod.Name, pod.Spec.NodeName, pod.Status) - } - } - return nil - } - - var eventErr error - for _, event := range events[1:] { - if err := verifyFn(event); err != nil { - eventErr = err - break - } - } - func() { - lock.Lock() - defer lock.Unlock() - - if eventErr != nil { - errs = append(errs, eventErr) - return - } - - if !hasTerminalPhase { - var names []string - for _, status := range pod.Status.ContainerStatuses { - if status.State.Running != nil { - names = append(names, status.Name) - } - } - switch { - case len(names) > 0: - errs = append(errs, fmt.Errorf("pod %s on node %s did not reach a terminal phase before being deleted but had running containers: phase=%s, running-containers=%s", pod.Name, pod.Spec.NodeName, pod.Status.Phase, strings.Join(names, ","))) - case pod.Status.Phase != v1.PodPending: - errs = append(errs, fmt.Errorf("pod %s on node %s was not Pending but has no running containers: phase=%s", pod.Name, pod.Spec.NodeName, pod.Status.Phase)) - } - } - if hasRunningContainers { - data, _ := json.MarshalIndent(pod.Status.ContainerStatuses, "", " ") - errs = append(errs, fmt.Errorf("pod %s on node %s had running or unknown container status before being deleted:\n%s", pod.Name, pod.Spec.NodeName, string(data))) - } - }() - - if duration < min { - min = duration - } - if duration > max || max == 0 { - max = duration - } - h.WithLabelValues(pod.Spec.NodeName).Observe(end.Sub(start).Seconds()) - framework.Logf("Pod %s on node %s timings total=%s t=%s run=%s execute=%s", pod.Name, pod.Spec.NodeName, end.Sub(start), t, completeDuration, duration) - } - - }(i) - } - - wg.Wait() - - if len(errs) > 0 { - var messages []string - for _, err := range errs { - messages = append(messages, err.Error()) - } - framework.Failf("%d errors:\n%v", len(errs), strings.Join(messages, "\n")) - } - values, _ := r.Gather() - var buf bytes.Buffer - for _, m := range values { - expfmt.MetricFamilyToText(&buf, m) - } - framework.Logf("Summary of latencies:\n%s", buf.String()) }) }) @@ -552,3 +298,422 @@ var _ = SIGDescribe("Pods Extended", func() { }) }) }) + +func createAndTestPodRepeatedly(workers, iterations int, scenario podScenario, podClient v1core.PodInterface) { + var ( + lock sync.Mutex + errs []error + + wg sync.WaitGroup + ) + + r := prometheus.NewRegistry() + h := prometheus.NewSummaryVec(prometheus.SummaryOpts{ + Name: "latency", + Objectives: map[float64]float64{ + 0.5: 0.05, + 0.75: 0.025, + 0.9: 0.01, + 0.99: 0.001, + }, + }, []string{"node"}) + r.MustRegister(h) + + for i := 0; i < workers; i++ { + wg.Add(1) + go func(i int) { + defer ginkgo.GinkgoRecover() + defer wg.Done() + for retries := 0; retries < iterations; retries++ { + pod := scenario.Pod(i, retries) + + // create the pod, capture the change events, then delete the pod + start := time.Now() + created, err := podClient.Create(context.TODO(), pod, metav1.CreateOptions{}) + framework.ExpectNoError(err, "failed to create pod") + + ch := make(chan []watch.Event) + waitForWatch := make(chan struct{}) + go func() { + defer ginkgo.GinkgoRecover() + defer close(ch) + w, err := podClient.Watch(context.TODO(), metav1.ListOptions{ + ResourceVersion: created.ResourceVersion, + FieldSelector: fmt.Sprintf("metadata.name=%s", pod.Name), + }) + if err != nil { + framework.Logf("Unable to watch pod %s: %v", pod.Name, err) + return + } + defer w.Stop() + close(waitForWatch) + events := []watch.Event{ + {Type: watch.Added, Object: created}, + } + for event := range w.ResultChan() { + events = append(events, event) + if event.Type == watch.Error { + framework.Logf("watch error seen for %s: %#v", pod.Name, event.Object) + } + if scenario.IsLastEvent(event) { + framework.Logf("watch last event seen for %s", pod.Name) + break + } + } + ch <- events + }() + + select { + case <-ch: // in case the goroutine above exits before establishing the watch + case <-waitForWatch: // when the watch is established + } + + verifier, scenario, err := scenario.Action(pod) + framework.ExpectNoError(err, "failed to take action") + + var ( + events []watch.Event + ok bool + ) + select { + case events, ok = <-ch: + if !ok { + continue + } + if len(events) < 2 { + framework.Fail("only got a single event") + } + case <-time.After(5 * time.Minute): + framework.Failf("timed out waiting for watch events for %s", pod.Name) + } + + end := time.Now() + + var eventErr error + for _, event := range events[1:] { + if err := verifier.Verify(event); err != nil { + eventErr = err + break + } + } + + total := end.Sub(start) + + var lastPod *v1.Pod = pod + func() { + lock.Lock() + defer lock.Unlock() + + if eventErr != nil { + errs = append(errs, eventErr) + return + } + pod, verifyErrs := verifier.VerifyFinal(scenario, total) + if pod != nil { + lastPod = pod + } + errs = append(errs, verifyErrs...) + }() + + h.WithLabelValues(lastPod.Spec.NodeName).Observe(total.Seconds()) + } + }(i) + } + + wg.Wait() + + if len(errs) > 0 { + var messages []string + for _, err := range errs { + messages = append(messages, err.Error()) + } + framework.Failf("%d errors:\n%v", len(errs), strings.Join(messages, "\n")) + } + values, _ := r.Gather() + var buf bytes.Buffer + for _, m := range values { + expfmt.MetricFamilyToText(&buf, m) + } + framework.Logf("Summary of latencies:\n%s", buf.String()) +} + +type podScenario interface { + Pod(worker, attempt int) *v1.Pod + Action(*v1.Pod) (podScenarioVerifier, string, error) + IsLastEvent(event watch.Event) bool +} + +type podScenarioVerifier interface { + Verify(event watch.Event) error + VerifyFinal(scenario string, duration time.Duration) (*v1.Pod, []error) +} + +type podFastDeleteScenario struct { + client v1core.PodInterface + delayMs int + + initContainer bool +} + +func (s podFastDeleteScenario) Verifier(pod *v1.Pod) podScenarioVerifier { + return &podStartVerifier{} +} + +func (s podFastDeleteScenario) IsLastEvent(event watch.Event) bool { + if event.Type == watch.Deleted { + return true + } + return false +} + +func (s podFastDeleteScenario) Action(pod *v1.Pod) (podScenarioVerifier, string, error) { + t := time.Duration(rand.Intn(s.delayMs)) * time.Millisecond + scenario := fmt.Sprintf("t=%s", t) + time.Sleep(t) + return &podStartVerifier{pod: pod}, scenario, s.client.Delete(context.TODO(), pod.Name, metav1.DeleteOptions{}) +} + +func (s podFastDeleteScenario) Pod(worker, attempt int) *v1.Pod { + name := fmt.Sprintf("pod-terminate-status-%d-%d", worker, attempt) + value := strconv.Itoa(time.Now().Nanosecond()) + one := int64(1) + if s.initContainer { + return &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Labels: map[string]string{ + "name": "foo", + "time": value, + }, + }, + Spec: v1.PodSpec{ + RestartPolicy: v1.RestartPolicyNever, + TerminationGracePeriodSeconds: &one, + InitContainers: []v1.Container{ + { + Name: "fail", + Image: imageutils.GetE2EImage(imageutils.BusyBox), + Command: []string{ + "/bin/false", + }, + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: resource.MustParse("10Mi"), + }, + }, + }, + }, + Containers: []v1.Container{ + { + Name: "blocked", + Image: imageutils.GetE2EImage(imageutils.BusyBox), + Command: []string{ + "/bin/true", + }, + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: resource.MustParse("10Mi"), + }, + }, + }, + }, + }, + } + } + return &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Labels: map[string]string{ + "name": "foo", + "time": value, + }, + }, + Spec: v1.PodSpec{ + RestartPolicy: v1.RestartPolicyNever, + TerminationGracePeriodSeconds: &one, + Containers: []v1.Container{ + { + Name: "fail", + Image: imageutils.GetE2EImage(imageutils.BusyBox), + Command: []string{ + "/bin/false", + }, + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: resource.MustParse("10Mi"), + }, + }, + }, + }, + }, + } +} + +// podStartVerifier checks events for a given pod and looks for unexpected +// transitions. It assumes one container running to completion. +type podStartVerifier struct { + pod *v1.Pod + hasInitContainers bool + hasContainers bool + hasTerminated bool + hasRunningContainers bool + hasTerminalPhase bool + duration time.Duration + completeDuration time.Duration +} + +var reBug88766 = regexp.MustCompile(`rootfs_linux.*kubernetes\.io~(secret|projected).*no such file or directory`) + +// Verify takes successive watch events for a given pod and returns an error if the status is unexpected. +// This verifier works for any pod which has 0 init containers and 1 regular container. +func (v *podStartVerifier) Verify(event watch.Event) error { + var ok bool + pod, ok := event.Object.(*v1.Pod) + if !ok { + framework.Logf("Unexpected event object: %s %#v", event.Type, event.Object) + return nil + } + v.pod = pod + + if len(pod.Spec.InitContainers) > 0 { + if len(pod.Status.InitContainerStatuses) == 0 { + if v.hasInitContainers { + return fmt.Errorf("pod %s on node %s had incorrect init containers: %#v", pod.Name, pod.Spec.NodeName, pod.Status.InitContainerStatuses) + } + return nil + } + v.hasInitContainers = true + if len(pod.Status.InitContainerStatuses) != 1 { + return fmt.Errorf("pod %s on node %s had incorrect init containers: %#v", pod.Name, pod.Spec.NodeName, pod.Status.InitContainerStatuses) + } + + } else { + if len(pod.Status.InitContainerStatuses) != 0 { + return fmt.Errorf("pod %s on node %s had incorrect init containers: %#v", pod.Name, pod.Spec.NodeName, pod.Status.InitContainerStatuses) + } + } + + if len(pod.Status.ContainerStatuses) == 0 { + if v.hasContainers { + return fmt.Errorf("pod %s on node %s had incorrect containers: %#v", pod.Name, pod.Spec.NodeName, pod.Status.ContainerStatuses) + } + return nil + } + v.hasContainers = true + if len(pod.Status.ContainerStatuses) != 1 { + return fmt.Errorf("pod %s on node %s had incorrect containers: %#v", pod.Name, pod.Spec.NodeName, pod.Status.ContainerStatuses) + } + + if status := findContainerStatusInPod(pod, "blocked"); status != nil { + if (status.Started != nil && *status.Started == true) || status.LastTerminationState.Terminated != nil || status.State.Waiting == nil { + return fmt.Errorf("pod %s on node %s should not have started the blocked container: %#v", pod.Name, pod.Spec.NodeName, status) + } + } + + status := findContainerStatusInPod(pod, "fail") + if status == nil { + return fmt.Errorf("pod %s on node %s had incorrect containers: %#v", pod.Name, pod.Spec.NodeName, pod.Status) + } + + t := status.State.Terminated + if v.hasTerminated { + if status.State.Waiting != nil || status.State.Running != nil { + return fmt.Errorf("pod %s on node %s was terminated and then changed state: %#v", pod.Name, pod.Spec.NodeName, status) + } + if t == nil { + return fmt.Errorf("pod %s on node %s was terminated and then had termination cleared: %#v", pod.Name, pod.Spec.NodeName, status) + } + } + var hasNoStartTime bool + v.hasRunningContainers = status.State.Waiting == nil && status.State.Terminated == nil + if t != nil { + if !t.FinishedAt.Time.IsZero() { + if t.StartedAt.IsZero() { + hasNoStartTime = true + } else { + v.duration = t.FinishedAt.Sub(t.StartedAt.Time) + } + v.completeDuration = t.FinishedAt.Sub(pod.CreationTimestamp.Time) + } + + defer func() { v.hasTerminated = true }() + switch { + case t.ExitCode == 1: + // expected + case t.ExitCode == 137 && (t.Reason == "ContainerStatusUnknown" || t.Reason == "Error"): + // expected, pod was force-killed after grace period + case t.ExitCode == 128 && (t.Reason == "StartError" || t.Reason == "ContainerCannotRun") && reBug88766.MatchString(t.Message): + // pod volume teardown races with container start in CRI, which reports a failure + framework.Logf("pod %s on node %s failed with the symptoms of https://github.com/kubernetes/kubernetes/issues/88766", pod.Name, pod.Spec.NodeName) + default: + data, _ := json.MarshalIndent(pod.Status, "", " ") + framework.Logf("pod %s on node %s had incorrect final status:\n%s", pod.Name, pod.Spec.NodeName, string(data)) + return fmt.Errorf("pod %s on node %s container unexpected exit code %d: start=%s end=%s reason=%s message=%s", pod.Name, pod.Spec.NodeName, t.ExitCode, t.StartedAt, t.FinishedAt, t.Reason, t.Message) + } + switch { + case v.duration > time.Hour: + // problem with status reporting + return fmt.Errorf("pod %s container %s on node %s had very long duration %s: start=%s end=%s", pod.Name, status.Name, pod.Spec.NodeName, v.duration, t.StartedAt, t.FinishedAt) + case hasNoStartTime: + // should never happen + return fmt.Errorf("pod %s container %s on node %s had finish time but not start time: end=%s", pod.Name, status.Name, pod.Spec.NodeName, t.FinishedAt) + } + } + if pod.Status.Phase == v1.PodFailed || pod.Status.Phase == v1.PodSucceeded { + v.hasTerminalPhase = true + } else { + if v.hasTerminalPhase { + return fmt.Errorf("pod %s on node %s was in a terminal phase and then reverted: %#v", pod.Name, pod.Spec.NodeName, pod.Status) + } + } + return nil +} + +func (v *podStartVerifier) VerifyFinal(scenario string, total time.Duration) (*v1.Pod, []error) { + var errs []error + pod := v.pod + if !v.hasTerminalPhase { + var names []string + for _, status := range pod.Status.ContainerStatuses { + if status.State.Running != nil { + names = append(names, status.Name) + } + } + switch { + case len(names) > 0: + errs = append(errs, fmt.Errorf("pod %s on node %s did not reach a terminal phase before being deleted but had running containers: phase=%s, running-containers=%s", pod.Name, pod.Spec.NodeName, pod.Status.Phase, strings.Join(names, ","))) + case pod.Status.Phase != v1.PodPending: + errs = append(errs, fmt.Errorf("pod %s on node %s was not Pending but has no running containers: phase=%s", pod.Name, pod.Spec.NodeName, pod.Status.Phase)) + } + } + if v.hasRunningContainers { + data, _ := json.MarshalIndent(pod.Status.ContainerStatuses, "", " ") + errs = append(errs, fmt.Errorf("pod %s on node %s had running or unknown container status before being deleted:\n%s", pod.Name, pod.Spec.NodeName, string(data))) + } + + framework.Logf("Pod %s on node %s %s total=%s run=%s execute=%s", pod.Name, pod.Spec.NodeName, scenario, total, v.completeDuration, v.duration) + return pod, errs +} + +// findContainerStatusInPod finds a container status by its name in the provided pod +func findContainerStatusInPod(pod *v1.Pod, containerName string) *v1.ContainerStatus { + for _, container := range pod.Status.InitContainerStatuses { + if container.Name == containerName { + return &container + } + } + for _, container := range pod.Status.ContainerStatuses { + if container.Name == containerName { + return &container + } + } + for _, container := range pod.Status.EphemeralContainerStatuses { + if container.Name == containerName { + return &container + } + } + return nil +}