From 37212fa93426e49091f5eddb0c9e9c5c72492de2 Mon Sep 17 00:00:00 2001 From: John Schnake Date: Wed, 7 Aug 2019 10:24:39 -0500 Subject: [PATCH] Ensure tests which rely on SSH behavior are skipped if unavailable A number of tests rely on SSH behavior but are not marked with SkipUnlessSSHKeysPresent(). This means that the tests will run and hang. This can be confusing for users because they may not know why the tests failed. Instead, we should be reporting on the known issue and skipping the test (hence why the helper Skip... function exists). --- test/e2e/apimachinery/etcd_failure.go | 1 + test/e2e/apps/network_partition.go | 9 +++++++++ test/e2e/autoscaling/cluster_size_autoscaling.go | 3 +++ test/e2e/network/service.go | 4 ++++ test/e2e/node/node_problem_detector.go | 2 ++ test/e2e/scalability/density.go | 3 +++ test/e2e/scalability/load.go | 4 ++++ test/e2e/scheduling/taint_based_evictions.go | 2 ++ test/e2e/storage/detach_mounted.go | 2 ++ test/e2e/storage/flexvolume_online_resize.go | 2 ++ test/e2e/storage/nfs_persistent_volume-disruptive.go | 2 ++ test/e2e/storage/vsphere/persistent_volumes-vsphere.go | 2 ++ .../e2e/storage/vsphere/vsphere_volume_master_restart.go | 2 ++ test/e2e/storage/vsphere/vsphere_volume_vpxd_restart.go | 2 ++ 14 files changed, 40 insertions(+) diff --git a/test/e2e/apimachinery/etcd_failure.go b/test/e2e/apimachinery/etcd_failure.go index 286b1136fdb..c6b43eb5143 100644 --- a/test/e2e/apimachinery/etcd_failure.go +++ b/test/e2e/apimachinery/etcd_failure.go @@ -44,6 +44,7 @@ var _ = SIGDescribe("Etcd failure [Disruptive]", func() { // ... so the provider check should be identical to the intersection of // providers that provide those capabilities. framework.SkipUnlessProviderIs("gce") + framework.SkipUnlessSSHKeyPresent() err := framework.RunRC(testutils.RCConfig{ Client: f.ClientSet, diff --git a/test/e2e/apps/network_partition.go b/test/e2e/apps/network_partition.go index a3f44fc98bd..8b92fcd6c29 100644 --- a/test/e2e/apps/network_partition.go +++ b/test/e2e/apps/network_partition.go @@ -130,6 +130,7 @@ var _ = SIGDescribe("Network Partition [Disruptive] [Slow]", func() { ginkgo.Context("should return to running and ready state after network partition is healed", func() { ginkgo.BeforeEach(func() { framework.SkipUnlessNodeCountIsAtLeast(2) + framework.SkipUnlessSSHKeyPresent() }) // What happens in this test: @@ -238,6 +239,7 @@ var _ = SIGDescribe("Network Partition [Disruptive] [Slow]", func() { framework.KubeDescribe("[ReplicationController]", func() { ginkgo.It("should recreate pods scheduled on the unreachable node "+ "AND allow scheduling of pods on a node after it rejoins the cluster", func() { + framework.SkipUnlessSSHKeyPresent() // Create a replication controller for a service that serves its hostname. // The source for the Docker container kubernetes/serve_hostname is in contrib/for-demos/serve_hostname @@ -304,6 +306,8 @@ var _ = SIGDescribe("Network Partition [Disruptive] [Slow]", func() { }) ginkgo.It("should eagerly create replacement pod during network partition when termination grace is non-zero", func() { + framework.SkipUnlessSSHKeyPresent() + // Create a replication controller for a service that serves its hostname. // The source for the Docker container kubernetes/serve_hostname is in contrib/for-demos/serve_hostname name := "my-hostname-net" @@ -394,6 +398,8 @@ var _ = SIGDescribe("Network Partition [Disruptive] [Slow]", func() { }) ginkgo.It("should not reschedule stateful pods if there is a network partition [Slow] [Disruptive]", func() { + framework.SkipUnlessSSHKeyPresent() + ps := e2esset.NewStatefulSet(psName, ns, headlessSvcName, 3, []v1.VolumeMount{}, []v1.VolumeMount{}, labels) _, err := c.AppsV1().StatefulSets(ns).Create(ps) framework.ExpectNoError(err) @@ -425,6 +431,8 @@ var _ = SIGDescribe("Network Partition [Disruptive] [Slow]", func() { framework.KubeDescribe("[Job]", func() { ginkgo.It("should create new pods when node is partitioned", func() { + framework.SkipUnlessSSHKeyPresent() + parallelism := int32(2) completions := int32(4) backoffLimit := int32(6) // default value @@ -482,6 +490,7 @@ var _ = SIGDescribe("Network Partition [Disruptive] [Slow]", func() { // 3. After enough time passess all Pods are evicted from the given Node ginkgo.It("[Feature:TaintEviction] All pods on the unreachable node should be marked as NotReady upon the node turn NotReady "+ "AND all pods should be evicted after eviction timeout passes", func() { + framework.SkipUnlessSSHKeyPresent() ginkgo.By("choose a node - we will block all network traffic on this node") var podOpts metav1.ListOptions nodes := framework.GetReadySchedulableNodesOrDie(c) diff --git a/test/e2e/autoscaling/cluster_size_autoscaling.go b/test/e2e/autoscaling/cluster_size_autoscaling.go index f670118c0e1..972dcde5b21 100644 --- a/test/e2e/autoscaling/cluster_size_autoscaling.go +++ b/test/e2e/autoscaling/cluster_size_autoscaling.go @@ -685,6 +685,7 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() { ginkgo.It("should correctly scale down after a node is not needed and one node is broken [Feature:ClusterSizeAutoscalingScaleDown]", func() { + framework.SkipUnlessSSHKeyPresent() framework.TestUnderTemporaryNetworkFailure(c, "default", getAnyNode(c), func() { simpleScaleDownTest(1) }) }) @@ -876,6 +877,8 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() { }) ginkgo.It("Shouldn't perform scale up operation and should list unhealthy status if most of the cluster is broken[Feature:ClusterSizeAutoscalingScaleUp]", func() { + framework.SkipUnlessSSHKeyPresent() + clusterSize := nodeCount for clusterSize < unhealthyClusterThreshold+1 { clusterSize = manuallyIncreaseClusterSize(f, originalSizes) diff --git a/test/e2e/network/service.go b/test/e2e/network/service.go index 418c7cadbb1..89dfb489695 100644 --- a/test/e2e/network/service.go +++ b/test/e2e/network/service.go @@ -404,6 +404,7 @@ var _ = SIGDescribe("Services", func() { ginkgo.It("should work after restarting kube-proxy [Disruptive]", func() { // TODO: use the ServiceTestJig here framework.SkipUnlessProviderIs("gce", "gke") + framework.SkipUnlessSSHKeyPresent() ns := f.Namespace.Name numPods, servicePort := 3, defaultServeHostnameServicePort @@ -460,6 +461,7 @@ var _ = SIGDescribe("Services", func() { ginkgo.It("should work after restarting apiserver [Disruptive]", func() { // TODO: use the ServiceTestJig here framework.SkipUnlessProviderIs("gce", "gke") + framework.SkipUnlessSSHKeyPresent() ns := f.Namespace.Name numPods, servicePort := 3, 80 @@ -1658,6 +1660,8 @@ var _ = SIGDescribe("Services", func() { // This test is for clusters on GCE. // (It restarts kube-controller-manager, which we don't support on GKE) framework.SkipUnlessProviderIs("gce") + framework.SkipUnlessSSHKeyPresent() + clusterID, err := gce.GetClusterID(cs) if err != nil { e2elog.Failf("framework.GetClusterID(cs) = _, %v; want nil", err) diff --git a/test/e2e/node/node_problem_detector.go b/test/e2e/node/node_problem_detector.go index 9eaeb12d713..bd2e12b4d25 100644 --- a/test/e2e/node/node_problem_detector.go +++ b/test/e2e/node/node_problem_detector.go @@ -56,6 +56,8 @@ var _ = SIGDescribe("NodeProblemDetector [DisabledForLargeClusters]", func() { }) ginkgo.It("should run without error", func() { + framework.SkipUnlessSSHKeyPresent() + ginkgo.By("Getting all nodes and their SSH-able IP addresses") nodes := framework.GetReadySchedulableNodesOrDie(f.ClientSet) gomega.Expect(len(nodes.Items)).NotTo(gomega.BeZero()) diff --git a/test/e2e/scalability/density.go b/test/e2e/scalability/density.go index 5bb6ddbf134..aaea3b1a9f0 100644 --- a/test/e2e/scalability/density.go +++ b/test/e2e/scalability/density.go @@ -503,6 +503,9 @@ var _ = SIGDescribe("Density", func() { f.NamespaceDeletionTimeout = time.Hour ginkgo.BeforeEach(func() { + // Gathering the metrics currently uses a path which uses SSH. + framework.SkipUnlessSSHKeyPresent() + var err error c = f.ClientSet ns = f.Namespace.Name diff --git a/test/e2e/scalability/load.go b/test/e2e/scalability/load.go index 85e03e5e726..0de7771c41b 100644 --- a/test/e2e/scalability/load.go +++ b/test/e2e/scalability/load.go @@ -109,6 +109,10 @@ var _ = SIGDescribe("Load capacity", func() { var testPhaseDurations *timer.TestPhaseTimer var profileGathererStopCh chan struct{} + ginkgo.BeforeEach(func() { + framework.SkipUnlessSSHKeyPresent() + }) + // Gathers metrics before teardown // TODO add flag that allows to skip cleanup on failure ginkgo.AfterEach(func() { diff --git a/test/e2e/scheduling/taint_based_evictions.go b/test/e2e/scheduling/taint_based_evictions.go index 2e4041f541c..4131635d276 100644 --- a/test/e2e/scheduling/taint_based_evictions.go +++ b/test/e2e/scheduling/taint_based_evictions.go @@ -76,6 +76,8 @@ var _ = SIGDescribe("TaintBasedEvictions [Serial]", func() { // 5. node lifecycle manager generate a status change: [NodeReady=true, status=ConditionTrue] // 6. node.kubernetes.io/unreachable=:NoExecute taint is taken off the node ginkgo.It("Checks that the node becomes unreachable", func() { + framework.SkipUnlessSSHKeyPresent() + // find an available node nodeName := GetNodeThatCanRunPod(f) ginkgo.By("Finding an available node " + nodeName) diff --git a/test/e2e/storage/detach_mounted.go b/test/e2e/storage/detach_mounted.go index 25d72fc62fe..6081ee629c8 100644 --- a/test/e2e/storage/detach_mounted.go +++ b/test/e2e/storage/detach_mounted.go @@ -64,6 +64,8 @@ var _ = utils.SIGDescribe("Detaching volumes", func() { }) ginkgo.It("should not work when mount is in progress [Slow]", func() { + framework.SkipUnlessSSHKeyPresent() + driver := "attachable-with-long-mount" driverInstallAs := driver + "-" + suffix diff --git a/test/e2e/storage/flexvolume_online_resize.go b/test/e2e/storage/flexvolume_online_resize.go index 863f5cf8e97..535bad70e35 100644 --- a/test/e2e/storage/flexvolume_online_resize.go +++ b/test/e2e/storage/flexvolume_online_resize.go @@ -116,6 +116,8 @@ var _ = utils.SIGDescribe("Mounted flexvolume volume expand [Slow] [Feature:Expa }) ginkgo.It("should be resizable when mounted", func() { + framework.SkipUnlessSSHKeyPresent() + driver := "dummy-attachable" node := nodeList.Items[0] diff --git a/test/e2e/storage/nfs_persistent_volume-disruptive.go b/test/e2e/storage/nfs_persistent_volume-disruptive.go index c9ccd4dfdf7..2137245dd14 100644 --- a/test/e2e/storage/nfs_persistent_volume-disruptive.go +++ b/test/e2e/storage/nfs_persistent_volume-disruptive.go @@ -175,6 +175,8 @@ var _ = utils.SIGDescribe("NFSPersistentVolumes[Disruptive][Flaky]", func() { }) ginkgo.It("should delete a bound PVC from a clientPod, restart the kube-control-manager, and ensure the kube-controller-manager does not crash", func() { + framework.SkipUnlessSSHKeyPresent() + ginkgo.By("Deleting PVC for volume 2") err = framework.DeletePersistentVolumeClaim(c, pvc2.Name, ns) framework.ExpectNoError(err) diff --git a/test/e2e/storage/vsphere/persistent_volumes-vsphere.go b/test/e2e/storage/vsphere/persistent_volumes-vsphere.go index 051de8e0167..4239f094e47 100644 --- a/test/e2e/storage/vsphere/persistent_volumes-vsphere.go +++ b/test/e2e/storage/vsphere/persistent_volumes-vsphere.go @@ -179,6 +179,7 @@ var _ = utils.SIGDescribe("PersistentVolumes:vsphere", func() { 3. Verify that written file is accessible after kubelet restart */ ginkgo.It("should test that a file written to the vspehre volume mount before kubelet restart can be read after restart [Disruptive]", func() { + framework.SkipUnlessSSHKeyPresent() utils.TestKubeletRestartsAndRestoresMount(c, f, clientPod) }) @@ -194,6 +195,7 @@ var _ = utils.SIGDescribe("PersistentVolumes:vsphere", func() { 5. Verify that volume mount not to be found. */ ginkgo.It("should test that a vspehre volume mounted to a pod that is deleted while the kubelet is down unmounts when the kubelet returns [Disruptive]", func() { + framework.SkipUnlessSSHKeyPresent() utils.TestVolumeUnmountsFromDeletedPod(c, f, clientPod) }) diff --git a/test/e2e/storage/vsphere/vsphere_volume_master_restart.go b/test/e2e/storage/vsphere/vsphere_volume_master_restart.go index 34ac8164593..8b03ee5249d 100644 --- a/test/e2e/storage/vsphere/vsphere_volume_master_restart.go +++ b/test/e2e/storage/vsphere/vsphere_volume_master_restart.go @@ -81,6 +81,8 @@ var _ = utils.SIGDescribe("Volume Attach Verify [Feature:vsphere][Serial][Disrup }) ginkgo.It("verify volume remains attached after master kubelet restart", func() { + framework.SkipUnlessSSHKeyPresent() + // Create pod on each node for i := 0; i < numNodes; i++ { ginkgo.By(fmt.Sprintf("%d: Creating a test vsphere volume", i)) diff --git a/test/e2e/storage/vsphere/vsphere_volume_vpxd_restart.go b/test/e2e/storage/vsphere/vsphere_volume_vpxd_restart.go index 6d570cee79e..09f28a7a680 100644 --- a/test/e2e/storage/vsphere/vsphere_volume_vpxd_restart.go +++ b/test/e2e/storage/vsphere/vsphere_volume_vpxd_restart.go @@ -99,6 +99,8 @@ var _ = utils.SIGDescribe("Verify Volume Attach Through vpxd Restart [Feature:vs }) ginkgo.It("verify volume remains attached through vpxd restart", func() { + framework.SkipUnlessSSHKeyPresent() + for vcHost, nodes := range vcNodesMap { var ( volumePaths []string