From eb9d1cb5ccabf2c7f06464bf77fbb07bead27222 Mon Sep 17 00:00:00 2001 From: Kenichi Omichi Date: Tue, 5 Nov 2019 23:57:47 +0000 Subject: [PATCH] Move functions from e2e/framework/util.go Part-2 This PR moves functions from test/e2e/framework.util.go for making e2e core framework small and simple: - RestartKubeProxy: Moved to e2e network package - CheckConnectivityToHost: Moved to e2e network package - RemoveAvoidPodsOffNode: Move to e2e scheduling package - AddOrUpdateAvoidPodOnNode: Move to e2e scheduling package - UpdateDaemonSetWithRetries: Move to e2e apps package - CheckForControllerManagerHealthy: Moved to e2e storage package - ParseKVLines: Removed because of e9345ae5f05163adc84473ddaa1f819d62fe0927 - AddOrUpdateLabelOnNodeAndReturnOldValue: Removed because of ff7b07c43c9992bb03cbf00beec735133a0c5d90 --- test/e2e/apps/daemon_set.go | 34 ++- test/e2e/framework/util.go | 232 ------------------ test/e2e/network/networking.go | 58 ++++- test/e2e/network/service.go | 39 ++- test/e2e/scheduling/priorities.go | 67 ++++- .../nfs_persistent_volume-disruptive.go | 31 ++- 6 files changed, 221 insertions(+), 240 deletions(-) diff --git a/test/e2e/apps/daemon_set.go b/test/e2e/apps/daemon_set.go index 82ffbd7aacf..f7528160962 100644 --- a/test/e2e/apps/daemon_set.go +++ b/test/e2e/apps/daemon_set.go @@ -38,6 +38,7 @@ import ( schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo" "k8s.io/kubernetes/test/e2e/framework" e2enode "k8s.io/kubernetes/test/e2e/framework/node" + testutils "k8s.io/kubernetes/test/utils" "github.com/onsi/ginkgo" "github.com/onsi/gomega" @@ -58,6 +59,35 @@ const ( // node selectors labels to namespaces var NamespaceNodeSelectors = []string{"scheduler.alpha.kubernetes.io/node-selector"} +type updateDSFunc func(*appsv1.DaemonSet) + +// updateDaemonSetWithRetries updates daemonsets with the given applyUpdate func +// until it succeeds or a timeout expires. +func updateDaemonSetWithRetries(c clientset.Interface, namespace, name string, applyUpdate updateDSFunc) (ds *appsv1.DaemonSet, err error) { + daemonsets := c.AppsV1().DaemonSets(namespace) + var updateErr error + pollErr := wait.PollImmediate(10*time.Millisecond, 1*time.Minute, func() (bool, error) { + if ds, err = daemonsets.Get(name, metav1.GetOptions{}); err != nil { + if testutils.IsRetryableAPIError(err) { + return false, nil + } + return false, err + } + // Apply the update, then attempt to push it to the apiserver. + applyUpdate(ds) + if ds, err = daemonsets.Update(ds); err == nil { + framework.Logf("Updating DaemonSet %s", name) + return true, nil + } + updateErr = err + return false, nil + }) + if pollErr == wait.ErrWaitTimeout { + pollErr = fmt.Errorf("couldn't apply the provided updated to DaemonSet %q: %v", name, updateErr) + } + return ds, pollErr +} + // This test must be run in serial because it assumes the Daemon Set pods will // always get scheduled. If we run other tests in parallel, this may not // happen. In the future, running in parallel may work if we have an eviction @@ -399,7 +429,7 @@ var _ = SIGDescribe("Daemon set [Serial]", func() { framework.Logf("Update the DaemonSet to trigger a rollout") // We use a nonexistent image here, so that we make sure it won't finish newImage := "foo:non-existent" - newDS, err := framework.UpdateDaemonSetWithRetries(c, ns, ds.Name, func(update *appsv1.DaemonSet) { + newDS, err := updateDaemonSetWithRetries(c, ns, ds.Name, func(update *appsv1.DaemonSet) { update.Spec.Template.Spec.Containers[0].Image = newImage }) framework.ExpectNoError(err) @@ -432,7 +462,7 @@ var _ = SIGDescribe("Daemon set [Serial]", func() { framework.ExpectNotEqual(len(newPods), 0) framework.Logf("Roll back the DaemonSet before rollout is complete") - rollbackDS, err := framework.UpdateDaemonSetWithRetries(c, ns, ds.Name, func(update *appsv1.DaemonSet) { + rollbackDS, err := updateDaemonSetWithRetries(c, ns, ds.Name, func(update *appsv1.DaemonSet) { update.Spec.Template.Spec.Containers[0].Image = image }) framework.ExpectNoError(err) diff --git a/test/e2e/framework/util.go b/test/e2e/framework/util.go index 8b8c6b0197a..b4ca71ed933 100644 --- a/test/e2e/framework/util.go +++ b/test/e2e/framework/util.go @@ -1379,16 +1379,6 @@ func AddOrUpdateLabelOnNode(c clientset.Interface, nodeName string, labelKey, la ExpectNoError(testutils.AddLabelsToNode(c, nodeName, map[string]string{labelKey: labelValue})) } -// AddOrUpdateLabelOnNodeAndReturnOldValue adds the given label key and value to the given node or updates value and returns the old label value. -func AddOrUpdateLabelOnNodeAndReturnOldValue(c clientset.Interface, nodeName string, labelKey, labelValue string) string { - var oldValue string - node, err := c.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{}) - ExpectNoError(err) - oldValue = node.Labels[labelKey] - ExpectNoError(testutils.AddLabelsToNode(c, nodeName, map[string]string{labelKey: labelValue})) - return oldValue -} - // ExpectNodeHasLabel expects that the given node has the given label pair. func ExpectNodeHasLabel(c clientset.Interface, nodeName string, labelKey string, labelValue string) { ginkgo.By("verifying the node has the label " + labelKey + " " + labelValue) @@ -1451,67 +1441,6 @@ func NodeHasTaint(c clientset.Interface, nodeName string, taint *v1.Taint) (bool return true, nil } -// AddOrUpdateAvoidPodOnNode adds avoidPods annotations to node, will override if it exists -func AddOrUpdateAvoidPodOnNode(c clientset.Interface, nodeName string, avoidPods v1.AvoidPods) { - err := wait.PollImmediate(Poll, SingleCallTimeout, func() (bool, error) { - node, err := c.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{}) - if err != nil { - if testutils.IsRetryableAPIError(err) { - return false, nil - } - return false, err - } - - taintsData, err := json.Marshal(avoidPods) - ExpectNoError(err) - - if node.Annotations == nil { - node.Annotations = make(map[string]string) - } - node.Annotations[v1.PreferAvoidPodsAnnotationKey] = string(taintsData) - _, err = c.CoreV1().Nodes().Update(node) - if err != nil { - if !apierrs.IsConflict(err) { - ExpectNoError(err) - } else { - Logf("Conflict when trying to add/update avoidPods %v to %v with error %v", avoidPods, nodeName, err) - return false, nil - } - } - return true, nil - }) - ExpectNoError(err) -} - -// RemoveAvoidPodsOffNode removes AvoidPods annotations from the node. It does not fail if no such annotation exists. -func RemoveAvoidPodsOffNode(c clientset.Interface, nodeName string) { - err := wait.PollImmediate(Poll, SingleCallTimeout, func() (bool, error) { - node, err := c.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{}) - if err != nil { - if testutils.IsRetryableAPIError(err) { - return false, nil - } - return false, err - } - - if node.Annotations == nil { - return true, nil - } - delete(node.Annotations, v1.PreferAvoidPodsAnnotationKey) - _, err = c.CoreV1().Nodes().Update(node) - if err != nil { - if !apierrs.IsConflict(err) { - ExpectNoError(err) - } else { - Logf("Conflict when trying to remove avoidPods to %v", nodeName) - return false, nil - } - } - return true, nil - }) - ExpectNoError(err) -} - // ScaleResource scales resource to the given size. func ScaleResource( clientset clientset.Interface, @@ -1602,35 +1531,6 @@ func DeleteResourceAndWaitForGC(c clientset.Interface, kind schema.GroupKind, ns return nil } -type updateDSFunc func(*appsv1.DaemonSet) - -// UpdateDaemonSetWithRetries updates daemonsets with the given applyUpdate func -// until it succeeds or a timeout expires. -func UpdateDaemonSetWithRetries(c clientset.Interface, namespace, name string, applyUpdate updateDSFunc) (ds *appsv1.DaemonSet, err error) { - daemonsets := c.AppsV1().DaemonSets(namespace) - var updateErr error - pollErr := wait.PollImmediate(10*time.Millisecond, 1*time.Minute, func() (bool, error) { - if ds, err = daemonsets.Get(name, metav1.GetOptions{}); err != nil { - if testutils.IsRetryableAPIError(err) { - return false, nil - } - return false, err - } - // Apply the update, then attempt to push it to the apiserver. - applyUpdate(ds) - if ds, err = daemonsets.Update(ds); err == nil { - Logf("Updating DaemonSet %s", name) - return true, nil - } - updateErr = err - return false, nil - }) - if pollErr == wait.ErrWaitTimeout { - pollErr = fmt.Errorf("couldn't apply the provided updated to DaemonSet %q: %v", name, updateErr) - } - return ds, pollErr -} - // RunHostCmd runs the given cmd in the context of the given pod using `kubectl exec` // inside of a shell. func RunHostCmd(ns, name, cmd string) (string, error) { @@ -1709,61 +1609,6 @@ func AllNodesReady(c clientset.Interface, timeout time.Duration) error { return nil } -// ParseKVLines parses output that looks like lines containing ": " -// and returns if is found. Otherwise, it returns the empty string. -func ParseKVLines(output, key string) string { - delim := ":" - key = key + delim - for _, line := range strings.Split(output, "\n") { - pieces := strings.SplitAfterN(line, delim, 2) - if len(pieces) != 2 { - continue - } - k, v := pieces[0], pieces[1] - if k == key { - return strings.TrimSpace(v) - } - } - return "" -} - -// RestartKubeProxy restarts kube-proxy on the given host. -func RestartKubeProxy(host string) error { - // TODO: Make it work for all providers. - if !ProviderIs("gce", "gke", "aws") { - return fmt.Errorf("unsupported provider for RestartKubeProxy: %s", TestContext.Provider) - } - // kubelet will restart the kube-proxy since it's running in a static pod - Logf("Killing kube-proxy on node %v", host) - result, err := e2essh.SSH("sudo pkill kube-proxy", host, TestContext.Provider) - if err != nil || result.Code != 0 { - e2essh.LogResult(result) - return fmt.Errorf("couldn't restart kube-proxy: %v", err) - } - // wait for kube-proxy to come back up - sshCmd := "sudo /bin/sh -c 'pgrep kube-proxy | wc -l'" - err = wait.Poll(5*time.Second, 60*time.Second, func() (bool, error) { - Logf("Waiting for kubeproxy to come back up with %v on %v", sshCmd, host) - result, err := e2essh.SSH(sshCmd, host, TestContext.Provider) - if err != nil { - return false, err - } - if result.Code != 0 { - e2essh.LogResult(result) - return false, fmt.Errorf("failed to run command, exited %d", result.Code) - } - if result.Stdout == "0\n" { - return false, nil - } - Logf("kube-proxy is back up.") - return true, nil - }) - if err != nil { - return fmt.Errorf("kube-proxy didn't recover: %v", err) - } - return nil -} - // RestartKubelet restarts kubelet on the given host. func RestartKubelet(host string) error { // TODO: Make it work for all providers and distros. @@ -1951,33 +1796,6 @@ func WaitForControllerManagerUp() error { return fmt.Errorf("waiting for controller-manager timed out") } -// CheckForControllerManagerHealthy checks that the controller manager does not crash within "duration" -func CheckForControllerManagerHealthy(duration time.Duration) error { - var PID string - cmd := "pidof kube-controller-manager" - for start := time.Now(); time.Since(start) < duration; time.Sleep(5 * time.Second) { - result, err := e2essh.SSH(cmd, net.JoinHostPort(GetMasterHost(), sshPort), TestContext.Provider) - if err != nil { - // We don't necessarily know that it crashed, pipe could just be broken - e2essh.LogResult(result) - return fmt.Errorf("master unreachable after %v", time.Since(start)) - } else if result.Code != 0 { - e2essh.LogResult(result) - return fmt.Errorf("SSH result code not 0. actually: %v after %v", result.Code, time.Since(start)) - } else if result.Stdout != PID { - if PID == "" { - PID = result.Stdout - } else { - //its dead - return fmt.Errorf("controller manager crashed, old PID: %s, new PID: %s", PID, result.Stdout) - } - } else { - Logf("kube-controller-manager still healthy after %v", time.Since(start)) - } - } - return nil -} - // GenerateMasterRegexp returns a regex for matching master node name. func GenerateMasterRegexp(prefix string) string { return prefix + "(-...)?" @@ -2191,56 +2009,6 @@ func UnblockNetwork(from string, to string) { } } -// CheckConnectivityToHost launches a pod to test connectivity to the specified -// host. An error will be returned if the host is not reachable from the pod. -// -// An empty nodeName will use the schedule to choose where the pod is executed. -func CheckConnectivityToHost(f *Framework, nodeName, podName, host string, port, timeout int) error { - contName := fmt.Sprintf("%s-container", podName) - - command := []string{ - "nc", - "-vz", - "-w", strconv.Itoa(timeout), - host, - strconv.Itoa(port), - } - - pod := &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: podName, - }, - Spec: v1.PodSpec{ - Containers: []v1.Container{ - { - Name: contName, - Image: AgnHostImage, - Command: command, - }, - }, - NodeName: nodeName, - RestartPolicy: v1.RestartPolicyNever, - }, - } - podClient := f.ClientSet.CoreV1().Pods(f.Namespace.Name) - _, err := podClient.Create(pod) - if err != nil { - return err - } - err = e2epod.WaitForPodSuccessInNamespace(f.ClientSet, podName, f.Namespace.Name) - - if err != nil { - logs, logErr := e2epod.GetPodLogs(f.ClientSet, f.Namespace.Name, pod.Name, contName) - if logErr != nil { - Logf("Warning: Failed to get logs from pod %q: %v", pod.Name, logErr) - } else { - Logf("pod %s/%s logs:\n%s", f.Namespace.Name, pod.Name, logs) - } - } - - return err -} - // CoreDump SSHs to the master and all nodes and dumps their logs into dir. // It shells out to cluster/log-dump/log-dump.sh to accomplish this. func CoreDump(dir string) { diff --git a/test/e2e/network/networking.go b/test/e2e/network/networking.go index 0375e9601ae..7d9345165fd 100644 --- a/test/e2e/network/networking.go +++ b/test/e2e/network/networking.go @@ -19,19 +19,73 @@ package network import ( "fmt" "net/http" + "strconv" "strings" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/sets" utilwait "k8s.io/apimachinery/pkg/util/wait" "k8s.io/kubernetes/pkg/master/ports" "k8s.io/kubernetes/test/e2e/framework" e2enetwork "k8s.io/kubernetes/test/e2e/framework/network" + e2epod "k8s.io/kubernetes/test/e2e/framework/pod" e2eservice "k8s.io/kubernetes/test/e2e/framework/service" e2essh "k8s.io/kubernetes/test/e2e/framework/ssh" "github.com/onsi/ginkgo" ) +// checkConnectivityToHost launches a pod to test connectivity to the specified +// host. An error will be returned if the host is not reachable from the pod. +// +// An empty nodeName will use the schedule to choose where the pod is executed. +func checkConnectivityToHost(f *framework.Framework, nodeName, podName, host string, port, timeout int) error { + contName := fmt.Sprintf("%s-container", podName) + + command := []string{ + "nc", + "-vz", + "-w", strconv.Itoa(timeout), + host, + strconv.Itoa(port), + } + + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: podName, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: contName, + Image: framework.AgnHostImage, + Command: command, + }, + }, + NodeName: nodeName, + RestartPolicy: v1.RestartPolicyNever, + }, + } + podClient := f.ClientSet.CoreV1().Pods(f.Namespace.Name) + _, err := podClient.Create(pod) + if err != nil { + return err + } + err = e2epod.WaitForPodSuccessInNamespace(f.ClientSet, podName, f.Namespace.Name) + + if err != nil { + logs, logErr := e2epod.GetPodLogs(f.ClientSet, f.Namespace.Name, pod.Name, contName) + if logErr != nil { + framework.Logf("Warning: Failed to get logs from pod %q: %v", pod.Name, logErr) + } else { + framework.Logf("pod %s/%s logs:\n%s", f.Namespace.Name, pod.Name, logs) + } + } + + return err +} + var _ = SIGDescribe("Networking", func() { var svcname = "nettest" f := framework.NewDefaultFramework(svcname) @@ -53,7 +107,7 @@ var _ = SIGDescribe("Networking", func() { ginkgo.It("should provide Internet connection for containers [Feature:Networking-IPv4]", func() { ginkgo.By("Running container which tries to connect to 8.8.8.8") framework.ExpectNoError( - framework.CheckConnectivityToHost(f, "", "connectivity-test", "8.8.8.8", 53, 30)) + checkConnectivityToHost(f, "", "connectivity-test", "8.8.8.8", 53, 30)) }) ginkgo.It("should provide Internet connection for containers [Feature:Networking-IPv6][Experimental][LinuxOnly]", func() { @@ -61,7 +115,7 @@ var _ = SIGDescribe("Networking", func() { framework.SkipIfNodeOSDistroIs("windows") ginkgo.By("Running container which tries to connect to 2001:4860:4860::8888") framework.ExpectNoError( - framework.CheckConnectivityToHost(f, "", "connectivity-test", "2001:4860:4860::8888", 53, 30)) + checkConnectivityToHost(f, "", "connectivity-test", "2001:4860:4860::8888", 53, 30)) }) // First test because it has no dependencies on variables created later on. diff --git a/test/e2e/network/service.go b/test/e2e/network/service.go index b390327029a..52a8f0f5b4a 100644 --- a/test/e2e/network/service.go +++ b/test/e2e/network/service.go @@ -83,6 +83,43 @@ func getServeHostnameService(name string) *v1.Service { return svc } +// restartKubeProxy restarts kube-proxy on the given host. +func restartKubeProxy(host string) error { + // TODO: Make it work for all providers. + if !framework.ProviderIs("gce", "gke", "aws") { + return fmt.Errorf("unsupported provider for restartKubeProxy: %s", framework.TestContext.Provider) + } + // kubelet will restart the kube-proxy since it's running in a static pod + framework.Logf("Killing kube-proxy on node %v", host) + result, err := e2essh.SSH("sudo pkill kube-proxy", host, framework.TestContext.Provider) + if err != nil || result.Code != 0 { + e2essh.LogResult(result) + return fmt.Errorf("couldn't restart kube-proxy: %v", err) + } + // wait for kube-proxy to come back up + sshCmd := "sudo /bin/sh -c 'pgrep kube-proxy | wc -l'" + err = wait.Poll(5*time.Second, 60*time.Second, func() (bool, error) { + framework.Logf("Waiting for kubeproxy to come back up with %v on %v", sshCmd, host) + result, err := e2essh.SSH(sshCmd, host, framework.TestContext.Provider) + if err != nil { + return false, err + } + if result.Code != 0 { + e2essh.LogResult(result) + return false, fmt.Errorf("failed to run command, exited %d", result.Code) + } + if result.Stdout == "0\n" { + return false, nil + } + framework.Logf("kube-proxy is back up.") + return true, nil + }) + if err != nil { + return fmt.Errorf("kube-proxy didn't recover: %v", err) + } + return nil +} + var _ = SIGDescribe("Services", func() { f := framework.NewDefaultFramework("services") @@ -466,7 +503,7 @@ var _ = SIGDescribe("Services", func() { framework.ExpectNoError(e2eservice.VerifyServeHostnameServiceUp(cs, ns, host, podNames2, svc2IP, servicePort)) ginkgo.By(fmt.Sprintf("Restarting kube-proxy on %v", host)) - if err := framework.RestartKubeProxy(host); err != nil { + if err := restartKubeProxy(host); err != nil { framework.Failf("error restarting kube-proxy: %v", err) } framework.ExpectNoError(e2eservice.VerifyServeHostnameServiceUp(cs, ns, host, podNames1, svc1IP, servicePort)) diff --git a/test/e2e/scheduling/priorities.go b/test/e2e/scheduling/priorities.go index 0db6c326b93..64ac2ead093 100644 --- a/test/e2e/scheduling/priorities.go +++ b/test/e2e/scheduling/priorities.go @@ -28,9 +28,11 @@ import ( _ "github.com/stretchr/testify/assert" v1 "k8s.io/api/core/v1" + apierrs "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/uuid" + "k8s.io/apimachinery/pkg/util/wait" clientset "k8s.io/client-go/kubernetes" v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util" @@ -61,6 +63,67 @@ var podRequestedResource = &v1.ResourceRequirements{ }, } +// addOrUpdateAvoidPodOnNode adds avoidPods annotations to node, will override if it exists +func addOrUpdateAvoidPodOnNode(c clientset.Interface, nodeName string, avoidPods v1.AvoidPods) { + err := wait.PollImmediate(framework.Poll, framework.SingleCallTimeout, func() (bool, error) { + node, err := c.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{}) + if err != nil { + if testutils.IsRetryableAPIError(err) { + return false, nil + } + return false, err + } + + taintsData, err := json.Marshal(avoidPods) + framework.ExpectNoError(err) + + if node.Annotations == nil { + node.Annotations = make(map[string]string) + } + node.Annotations[v1.PreferAvoidPodsAnnotationKey] = string(taintsData) + _, err = c.CoreV1().Nodes().Update(node) + if err != nil { + if !apierrs.IsConflict(err) { + framework.ExpectNoError(err) + } else { + framework.Logf("Conflict when trying to add/update avoidPods %v to %v with error %v", avoidPods, nodeName, err) + return false, nil + } + } + return true, nil + }) + framework.ExpectNoError(err) +} + +// removeAvoidPodsOffNode removes AvoidPods annotations from the node. It does not fail if no such annotation exists. +func removeAvoidPodsOffNode(c clientset.Interface, nodeName string) { + err := wait.PollImmediate(framework.Poll, framework.SingleCallTimeout, func() (bool, error) { + node, err := c.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{}) + if err != nil { + if testutils.IsRetryableAPIError(err) { + return false, nil + } + return false, err + } + + if node.Annotations == nil { + return true, nil + } + delete(node.Annotations, v1.PreferAvoidPodsAnnotationKey) + _, err = c.CoreV1().Nodes().Update(node) + if err != nil { + if !apierrs.IsConflict(err) { + framework.ExpectNoError(err) + } else { + framework.Logf("Conflict when trying to remove avoidPods to %v", nodeName) + return false, nil + } + } + return true, nil + }) + framework.ExpectNoError(err) +} + // This test suite is used to verifies scheduler priority functions based on the default provider var _ = SIGDescribe("SchedulerPriorities [Serial]", func() { var cs clientset.Interface @@ -209,7 +272,7 @@ var _ = SIGDescribe("SchedulerPriorities [Serial]", func() { }, } action := func() error { - framework.AddOrUpdateAvoidPodOnNode(cs, nodeName, avoidPod) + addOrUpdateAvoidPodOnNode(cs, nodeName, avoidPod) return nil } predicate := func(node *v1.Node) bool { @@ -223,7 +286,7 @@ var _ = SIGDescribe("SchedulerPriorities [Serial]", func() { framework.ExpectNoError(err) framework.ExpectEqual(success, true) - defer framework.RemoveAvoidPodsOffNode(cs, nodeName) + defer removeAvoidPodsOffNode(cs, nodeName) ginkgo.By(fmt.Sprintf("Scale the RC: %s to len(nodeList.Item)-1 : %v.", rc.Name, len(nodeList.Items)-1)) diff --git a/test/e2e/storage/nfs_persistent_volume-disruptive.go b/test/e2e/storage/nfs_persistent_volume-disruptive.go index 5dfcadd61ca..0ce61d875dd 100644 --- a/test/e2e/storage/nfs_persistent_volume-disruptive.go +++ b/test/e2e/storage/nfs_persistent_volume-disruptive.go @@ -18,6 +18,7 @@ package storage import ( "fmt" + "net" "time" "github.com/onsi/ginkgo" @@ -32,6 +33,7 @@ import ( e2enode "k8s.io/kubernetes/test/e2e/framework/node" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" e2epv "k8s.io/kubernetes/test/e2e/framework/pv" + e2essh "k8s.io/kubernetes/test/e2e/framework/ssh" "k8s.io/kubernetes/test/e2e/framework/volume" "k8s.io/kubernetes/test/e2e/storage/utils" ) @@ -42,6 +44,33 @@ type disruptiveTest struct { runTest testBody } +// checkForControllerManagerHealthy checks that the controller manager does not crash within "duration" +func checkForControllerManagerHealthy(duration time.Duration) error { + var PID string + cmd := "pidof kube-controller-manager" + for start := time.Now(); time.Since(start) < duration; time.Sleep(5 * time.Second) { + result, err := e2essh.SSH(cmd, net.JoinHostPort(framework.GetMasterHost(), sshPort), framework.TestContext.Provider) + if err != nil { + // We don't necessarily know that it crashed, pipe could just be broken + e2essh.LogResult(result) + return fmt.Errorf("master unreachable after %v", time.Since(start)) + } else if result.Code != 0 { + e2essh.LogResult(result) + return fmt.Errorf("SSH result code not 0. actually: %v after %v", result.Code, time.Since(start)) + } else if result.Stdout != PID { + if PID == "" { + PID = result.Stdout + } else { + //its dead + return fmt.Errorf("controller manager crashed, old PID: %s, new PID: %s", PID, result.Stdout) + } + } else { + framework.Logf("kube-controller-manager still healthy after %v", time.Since(start)) + } + } + return nil +} + var _ = utils.SIGDescribe("NFSPersistentVolumes[Disruptive][Flaky]", func() { f := framework.NewDefaultFramework("disruptive-pv") @@ -191,7 +220,7 @@ var _ = utils.SIGDescribe("NFSPersistentVolumes[Disruptive][Flaky]", func() { ginkgo.By("Observing the kube-controller-manager healthy for at least 2 minutes") // Continue checking for 2 minutes to make sure kube-controller-manager is healthy - err = framework.CheckForControllerManagerHealthy(2 * time.Minute) + err = checkForControllerManagerHealthy(2 * time.Minute) framework.ExpectNoError(err) })