From f8dbdc1695ae636dbb42cbe88fc5a68e7ae060c8 Mon Sep 17 00:00:00 2001 From: gmarek Date: Tue, 30 Aug 2016 14:07:22 +0200 Subject: [PATCH] AllNodesReady waits for all system pods to be running --- test/e2e/framework/util.go | 77 +++++++++++++++++++++++++++++++- test/e2e/scheduler_predicates.go | 1 + 2 files changed, 77 insertions(+), 1 deletion(-) diff --git a/test/e2e/framework/util.go b/test/e2e/framework/util.go index 6ce06bb9f1a..d54cf362b61 100644 --- a/test/e2e/framework/util.go +++ b/test/e2e/framework/util.go @@ -167,6 +167,13 @@ var ( // For parsing Kubectl version for version-skewed testing. gitVersionRegexp = regexp.MustCompile("GitVersion:\"(v.+?)\"") + + // Slice of regexps for names of pods that have to be running to consider a Node "healthy" + requiredPerNodePods = []*regexp.Regexp{ + regexp.MustCompile(".*kube-proxy.*"), + regexp.MustCompile(".*fluentd-elasticsearch.*"), + regexp.MustCompile(".*node-problem-detector.*"), + } ) // GetServerArchitecture fetches the architecture of the cluster's apiserver. @@ -4003,7 +4010,10 @@ func WaitForNodeToBe(c *client.Client, name string, conditionType api.NodeCondit return false } -// checks whether all registered nodes are ready +// Checks whether all registered nodes are ready. +// TODO: we should change the AllNodesReady call in AfterEach to WaitForAllNodesHealthy, +// and figure out how to do it in a configurable way, as we can't expect all setups to run +// default test add-ons. func AllNodesReady(c *client.Client, timeout time.Duration) error { Logf("Waiting up to %v for all nodes to be ready", timeout) @@ -4033,6 +4043,71 @@ func AllNodesReady(c *client.Client, timeout time.Duration) error { return nil } +// checks whether all registered nodes are ready and all required Pods are running on them. +func WaitForAllNodesHealthy(c *client.Client, timeout time.Duration) error { + Logf("Waiting up to %v for all nodes to be ready", timeout) + + var notReady []api.Node + var missingPodsPerNode map[string][]string + err := wait.PollImmediate(Poll, timeout, func() (bool, error) { + notReady = nil + // It should be OK to list unschedulable Nodes here. + nodes, err := c.Nodes().List(api.ListOptions{ResourceVersion: "0"}) + if err != nil { + return false, err + } + for _, node := range nodes.Items { + if !IsNodeConditionSetAsExpected(&node, api.NodeReady, true) { + notReady = append(notReady, node) + } + } + pods, err := c.Pods(api.NamespaceAll).List(api.ListOptions{ResourceVersion: "0"}) + if err != nil { + return false, err + } + + systemPodsPerNode := make(map[string][]string) + for _, pod := range pods.Items { + if pod.Namespace == api.NamespaceSystem && pod.Status.Phase == api.PodRunning { + if pod.Spec.NodeName != "" { + systemPodsPerNode[pod.Spec.NodeName] = append(systemPodsPerNode[pod.Spec.NodeName], pod.Name) + } + } + } + missingPodsPerNode = make(map[string][]string) + for _, node := range nodes.Items { + if !system.IsMasterNode(&node) { + for _, requiredPod := range requiredPerNodePods { + foundRequired := false + for _, presentPod := range systemPodsPerNode[node.Name] { + if requiredPod.MatchString(presentPod) { + foundRequired = true + break + } + } + if !foundRequired { + missingPodsPerNode[node.Name] = append(missingPodsPerNode[node.Name], requiredPod.String()) + } + } + } + } + return len(notReady) == 0 && len(missingPodsPerNode) == 0, nil + }) + + if err != nil && err != wait.ErrWaitTimeout { + return err + } + + if len(notReady) > 0 { + return fmt.Errorf("Not ready nodes: %v", notReady) + } + if len(missingPodsPerNode) > 0 { + return fmt.Errorf("Not running system Pods: %v", missingPodsPerNode) + } + return nil + +} + // Filters nodes in NodeList in place, removing nodes that do not // satisfy the given condition // TODO: consider merging with pkg/client/cache.NodeLister diff --git a/test/e2e/scheduler_predicates.go b/test/e2e/scheduler_predicates.go index 843160cb1e8..5747517657f 100644 --- a/test/e2e/scheduler_predicates.go +++ b/test/e2e/scheduler_predicates.go @@ -198,6 +198,7 @@ var _ = framework.KubeDescribe("SchedulerPredicates [Serial]", func() { ns = f.Namespace.Name nodeList = &api.NodeList{} + framework.WaitForAllNodesHealthy(c, time.Minute) masterNodes, nodeList = framework.GetMasterAndWorkerNodesOrDie(c) err := framework.CheckTestingNSDeletedExcept(c, ns)