From 37212fa93426e49091f5eddb0c9e9c5c72492de2 Mon Sep 17 00:00:00 2001
From: John Schnake <jschnake@vmware.com>
Date: Wed, 7 Aug 2019 10:24:39 -0500
Subject: [PATCH] Ensure tests which rely on SSH behavior are skipped if
 unavailable

A number of tests rely on SSH behavior but are not marked with
SkipUnlessSSHKeysPresent(). This means that the tests will run and
hang. This can be confusing for users because they may not know why
the tests failed. Instead, we should be reporting on the known issue
and skipping the test (hence why the helper Skip... function exists).
---
 test/e2e/apimachinery/etcd_failure.go                    | 1 +
 test/e2e/apps/network_partition.go                       | 9 +++++++++
 test/e2e/autoscaling/cluster_size_autoscaling.go         | 3 +++
 test/e2e/network/service.go                              | 4 ++++
 test/e2e/node/node_problem_detector.go                   | 2 ++
 test/e2e/scalability/density.go                          | 3 +++
 test/e2e/scalability/load.go                             | 4 ++++
 test/e2e/scheduling/taint_based_evictions.go             | 2 ++
 test/e2e/storage/detach_mounted.go                       | 2 ++
 test/e2e/storage/flexvolume_online_resize.go             | 2 ++
 test/e2e/storage/nfs_persistent_volume-disruptive.go     | 2 ++
 test/e2e/storage/vsphere/persistent_volumes-vsphere.go   | 2 ++
 .../e2e/storage/vsphere/vsphere_volume_master_restart.go | 2 ++
 test/e2e/storage/vsphere/vsphere_volume_vpxd_restart.go  | 2 ++
 14 files changed, 40 insertions(+)

diff --git a/test/e2e/apimachinery/etcd_failure.go b/test/e2e/apimachinery/etcd_failure.go
index 286b1136fdb..c6b43eb5143 100644
--- a/test/e2e/apimachinery/etcd_failure.go
+++ b/test/e2e/apimachinery/etcd_failure.go
@@ -44,6 +44,7 @@ var _ = SIGDescribe("Etcd failure [Disruptive]", func() {
 		// ... so the provider check should be identical to the intersection of
 		// providers that provide those capabilities.
 		framework.SkipUnlessProviderIs("gce")
+		framework.SkipUnlessSSHKeyPresent()
 
 		err := framework.RunRC(testutils.RCConfig{
 			Client:    f.ClientSet,
diff --git a/test/e2e/apps/network_partition.go b/test/e2e/apps/network_partition.go
index a3f44fc98bd..8b92fcd6c29 100644
--- a/test/e2e/apps/network_partition.go
+++ b/test/e2e/apps/network_partition.go
@@ -130,6 +130,7 @@ var _ = SIGDescribe("Network Partition [Disruptive] [Slow]", func() {
 		ginkgo.Context("should return to running and ready state after network partition is healed", func() {
 			ginkgo.BeforeEach(func() {
 				framework.SkipUnlessNodeCountIsAtLeast(2)
+				framework.SkipUnlessSSHKeyPresent()
 			})
 
 			// What happens in this test:
@@ -238,6 +239,7 @@ var _ = SIGDescribe("Network Partition [Disruptive] [Slow]", func() {
 	framework.KubeDescribe("[ReplicationController]", func() {
 		ginkgo.It("should recreate pods scheduled on the unreachable node "+
 			"AND allow scheduling of pods on a node after it rejoins the cluster", func() {
+			framework.SkipUnlessSSHKeyPresent()
 
 			// Create a replication controller for a service that serves its hostname.
 			// The source for the Docker container kubernetes/serve_hostname is in contrib/for-demos/serve_hostname
@@ -304,6 +306,8 @@ var _ = SIGDescribe("Network Partition [Disruptive] [Slow]", func() {
 		})
 
 		ginkgo.It("should eagerly create replacement pod during network partition when termination grace is non-zero", func() {
+			framework.SkipUnlessSSHKeyPresent()
+
 			// Create a replication controller for a service that serves its hostname.
 			// The source for the Docker container kubernetes/serve_hostname is in contrib/for-demos/serve_hostname
 			name := "my-hostname-net"
@@ -394,6 +398,8 @@ var _ = SIGDescribe("Network Partition [Disruptive] [Slow]", func() {
 		})
 
 		ginkgo.It("should not reschedule stateful pods if there is a network partition [Slow] [Disruptive]", func() {
+			framework.SkipUnlessSSHKeyPresent()
+
 			ps := e2esset.NewStatefulSet(psName, ns, headlessSvcName, 3, []v1.VolumeMount{}, []v1.VolumeMount{}, labels)
 			_, err := c.AppsV1().StatefulSets(ns).Create(ps)
 			framework.ExpectNoError(err)
@@ -425,6 +431,8 @@ var _ = SIGDescribe("Network Partition [Disruptive] [Slow]", func() {
 
 	framework.KubeDescribe("[Job]", func() {
 		ginkgo.It("should create new pods when node is partitioned", func() {
+			framework.SkipUnlessSSHKeyPresent()
+
 			parallelism := int32(2)
 			completions := int32(4)
 			backoffLimit := int32(6) // default value
@@ -482,6 +490,7 @@ var _ = SIGDescribe("Network Partition [Disruptive] [Slow]", func() {
 			// 3. After enough time passess all Pods are evicted from the given Node
 			ginkgo.It("[Feature:TaintEviction] All pods on the unreachable node should be marked as NotReady upon the node turn NotReady "+
 				"AND all pods should be evicted after eviction timeout passes", func() {
+				framework.SkipUnlessSSHKeyPresent()
 				ginkgo.By("choose a node - we will block all network traffic on this node")
 				var podOpts metav1.ListOptions
 				nodes := framework.GetReadySchedulableNodesOrDie(c)
diff --git a/test/e2e/autoscaling/cluster_size_autoscaling.go b/test/e2e/autoscaling/cluster_size_autoscaling.go
index f670118c0e1..972dcde5b21 100644
--- a/test/e2e/autoscaling/cluster_size_autoscaling.go
+++ b/test/e2e/autoscaling/cluster_size_autoscaling.go
@@ -685,6 +685,7 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
 
 	ginkgo.It("should correctly scale down after a node is not needed and one node is broken [Feature:ClusterSizeAutoscalingScaleDown]",
 		func() {
+			framework.SkipUnlessSSHKeyPresent()
 			framework.TestUnderTemporaryNetworkFailure(c, "default", getAnyNode(c), func() { simpleScaleDownTest(1) })
 		})
 
@@ -876,6 +877,8 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
 	})
 
 	ginkgo.It("Shouldn't perform scale up operation and should list unhealthy status if most of the cluster is broken[Feature:ClusterSizeAutoscalingScaleUp]", func() {
+		framework.SkipUnlessSSHKeyPresent()
+
 		clusterSize := nodeCount
 		for clusterSize < unhealthyClusterThreshold+1 {
 			clusterSize = manuallyIncreaseClusterSize(f, originalSizes)
diff --git a/test/e2e/network/service.go b/test/e2e/network/service.go
index 418c7cadbb1..89dfb489695 100644
--- a/test/e2e/network/service.go
+++ b/test/e2e/network/service.go
@@ -404,6 +404,7 @@ var _ = SIGDescribe("Services", func() {
 	ginkgo.It("should work after restarting kube-proxy [Disruptive]", func() {
 		// TODO: use the ServiceTestJig here
 		framework.SkipUnlessProviderIs("gce", "gke")
+		framework.SkipUnlessSSHKeyPresent()
 
 		ns := f.Namespace.Name
 		numPods, servicePort := 3, defaultServeHostnameServicePort
@@ -460,6 +461,7 @@ var _ = SIGDescribe("Services", func() {
 	ginkgo.It("should work after restarting apiserver [Disruptive]", func() {
 		// TODO: use the ServiceTestJig here
 		framework.SkipUnlessProviderIs("gce", "gke")
+		framework.SkipUnlessSSHKeyPresent()
 
 		ns := f.Namespace.Name
 		numPods, servicePort := 3, 80
@@ -1658,6 +1660,8 @@ var _ = SIGDescribe("Services", func() {
 		// This test is for clusters on GCE.
 		// (It restarts kube-controller-manager, which we don't support on GKE)
 		framework.SkipUnlessProviderIs("gce")
+		framework.SkipUnlessSSHKeyPresent()
+
 		clusterID, err := gce.GetClusterID(cs)
 		if err != nil {
 			e2elog.Failf("framework.GetClusterID(cs) = _, %v; want nil", err)
diff --git a/test/e2e/node/node_problem_detector.go b/test/e2e/node/node_problem_detector.go
index 9eaeb12d713..bd2e12b4d25 100644
--- a/test/e2e/node/node_problem_detector.go
+++ b/test/e2e/node/node_problem_detector.go
@@ -56,6 +56,8 @@ var _ = SIGDescribe("NodeProblemDetector [DisabledForLargeClusters]", func() {
 	})
 
 	ginkgo.It("should run without error", func() {
+		framework.SkipUnlessSSHKeyPresent()
+
 		ginkgo.By("Getting all nodes and their SSH-able IP addresses")
 		nodes := framework.GetReadySchedulableNodesOrDie(f.ClientSet)
 		gomega.Expect(len(nodes.Items)).NotTo(gomega.BeZero())
diff --git a/test/e2e/scalability/density.go b/test/e2e/scalability/density.go
index 5bb6ddbf134..aaea3b1a9f0 100644
--- a/test/e2e/scalability/density.go
+++ b/test/e2e/scalability/density.go
@@ -503,6 +503,9 @@ var _ = SIGDescribe("Density", func() {
 	f.NamespaceDeletionTimeout = time.Hour
 
 	ginkgo.BeforeEach(func() {
+		// Gathering the metrics currently uses a path which uses SSH.
+		framework.SkipUnlessSSHKeyPresent()
+
 		var err error
 		c = f.ClientSet
 		ns = f.Namespace.Name
diff --git a/test/e2e/scalability/load.go b/test/e2e/scalability/load.go
index 85e03e5e726..0de7771c41b 100644
--- a/test/e2e/scalability/load.go
+++ b/test/e2e/scalability/load.go
@@ -109,6 +109,10 @@ var _ = SIGDescribe("Load capacity", func() {
 	var testPhaseDurations *timer.TestPhaseTimer
 	var profileGathererStopCh chan struct{}
 
+	ginkgo.BeforeEach(func() {
+		framework.SkipUnlessSSHKeyPresent()
+	})
+
 	// Gathers metrics before teardown
 	// TODO add flag that allows to skip cleanup on failure
 	ginkgo.AfterEach(func() {
diff --git a/test/e2e/scheduling/taint_based_evictions.go b/test/e2e/scheduling/taint_based_evictions.go
index 2e4041f541c..4131635d276 100644
--- a/test/e2e/scheduling/taint_based_evictions.go
+++ b/test/e2e/scheduling/taint_based_evictions.go
@@ -76,6 +76,8 @@ var _ = SIGDescribe("TaintBasedEvictions [Serial]", func() {
 	// 5. node lifecycle manager generate a status change: [NodeReady=true, status=ConditionTrue]
 	// 6. node.kubernetes.io/unreachable=:NoExecute taint is taken off the node
 	ginkgo.It("Checks that the node becomes unreachable", func() {
+		framework.SkipUnlessSSHKeyPresent()
+
 		// find an available node
 		nodeName := GetNodeThatCanRunPod(f)
 		ginkgo.By("Finding an available node " + nodeName)
diff --git a/test/e2e/storage/detach_mounted.go b/test/e2e/storage/detach_mounted.go
index 25d72fc62fe..6081ee629c8 100644
--- a/test/e2e/storage/detach_mounted.go
+++ b/test/e2e/storage/detach_mounted.go
@@ -64,6 +64,8 @@ var _ = utils.SIGDescribe("Detaching volumes", func() {
 	})
 
 	ginkgo.It("should not work when mount is in progress [Slow]", func() {
+		framework.SkipUnlessSSHKeyPresent()
+
 		driver := "attachable-with-long-mount"
 		driverInstallAs := driver + "-" + suffix
 
diff --git a/test/e2e/storage/flexvolume_online_resize.go b/test/e2e/storage/flexvolume_online_resize.go
index 863f5cf8e97..535bad70e35 100644
--- a/test/e2e/storage/flexvolume_online_resize.go
+++ b/test/e2e/storage/flexvolume_online_resize.go
@@ -116,6 +116,8 @@ var _ = utils.SIGDescribe("Mounted flexvolume volume expand [Slow] [Feature:Expa
 	})
 
 	ginkgo.It("should be resizable when mounted", func() {
+		framework.SkipUnlessSSHKeyPresent()
+
 		driver := "dummy-attachable"
 
 		node := nodeList.Items[0]
diff --git a/test/e2e/storage/nfs_persistent_volume-disruptive.go b/test/e2e/storage/nfs_persistent_volume-disruptive.go
index c9ccd4dfdf7..2137245dd14 100644
--- a/test/e2e/storage/nfs_persistent_volume-disruptive.go
+++ b/test/e2e/storage/nfs_persistent_volume-disruptive.go
@@ -175,6 +175,8 @@ var _ = utils.SIGDescribe("NFSPersistentVolumes[Disruptive][Flaky]", func() {
 		})
 
 		ginkgo.It("should delete a bound PVC from a clientPod, restart the kube-control-manager, and ensure the kube-controller-manager does not crash", func() {
+			framework.SkipUnlessSSHKeyPresent()
+
 			ginkgo.By("Deleting PVC for volume 2")
 			err = framework.DeletePersistentVolumeClaim(c, pvc2.Name, ns)
 			framework.ExpectNoError(err)
diff --git a/test/e2e/storage/vsphere/persistent_volumes-vsphere.go b/test/e2e/storage/vsphere/persistent_volumes-vsphere.go
index 051de8e0167..4239f094e47 100644
--- a/test/e2e/storage/vsphere/persistent_volumes-vsphere.go
+++ b/test/e2e/storage/vsphere/persistent_volumes-vsphere.go
@@ -179,6 +179,7 @@ var _ = utils.SIGDescribe("PersistentVolumes:vsphere", func() {
 		3. Verify that written file is accessible after kubelet restart
 	*/
 	ginkgo.It("should test that a file written to the vspehre volume mount before kubelet restart can be read after restart [Disruptive]", func() {
+		framework.SkipUnlessSSHKeyPresent()
 		utils.TestKubeletRestartsAndRestoresMount(c, f, clientPod)
 	})
 
@@ -194,6 +195,7 @@ var _ = utils.SIGDescribe("PersistentVolumes:vsphere", func() {
 		5. Verify that volume mount not to be found.
 	*/
 	ginkgo.It("should test that a vspehre volume mounted to a pod that is deleted while the kubelet is down unmounts when the kubelet returns [Disruptive]", func() {
+		framework.SkipUnlessSSHKeyPresent()
 		utils.TestVolumeUnmountsFromDeletedPod(c, f, clientPod)
 	})
 
diff --git a/test/e2e/storage/vsphere/vsphere_volume_master_restart.go b/test/e2e/storage/vsphere/vsphere_volume_master_restart.go
index 34ac8164593..8b03ee5249d 100644
--- a/test/e2e/storage/vsphere/vsphere_volume_master_restart.go
+++ b/test/e2e/storage/vsphere/vsphere_volume_master_restart.go
@@ -81,6 +81,8 @@ var _ = utils.SIGDescribe("Volume Attach Verify [Feature:vsphere][Serial][Disrup
 	})
 
 	ginkgo.It("verify volume remains attached after master kubelet restart", func() {
+		framework.SkipUnlessSSHKeyPresent()
+
 		// Create pod on each node
 		for i := 0; i < numNodes; i++ {
 			ginkgo.By(fmt.Sprintf("%d: Creating a test vsphere volume", i))
diff --git a/test/e2e/storage/vsphere/vsphere_volume_vpxd_restart.go b/test/e2e/storage/vsphere/vsphere_volume_vpxd_restart.go
index 6d570cee79e..09f28a7a680 100644
--- a/test/e2e/storage/vsphere/vsphere_volume_vpxd_restart.go
+++ b/test/e2e/storage/vsphere/vsphere_volume_vpxd_restart.go
@@ -99,6 +99,8 @@ var _ = utils.SIGDescribe("Verify Volume Attach Through vpxd Restart [Feature:vs
 	})
 
 	ginkgo.It("verify volume remains attached through vpxd restart", func() {
+		framework.SkipUnlessSSHKeyPresent()
+
 		for vcHost, nodes := range vcNodesMap {
 			var (
 				volumePaths  []string