diff --git a/hack/jenkins/e2e.sh b/hack/jenkins/e2e.sh index 7f18af39fa4..3f6c667e8a0 100755 --- a/hack/jenkins/e2e.sh +++ b/hack/jenkins/e2e.sh @@ -65,7 +65,6 @@ function join_regex_no_empty() { # $GCE_DEFAULT_SKIP_TESTS # $GCE_FLAKY_TESTS # $GCE_SLOW_TESTS -# $GKE_FLAKY_TESTS # # Args: # $1 old_version: the version to deploy a cluster at, and old e2e tests to run @@ -99,7 +98,6 @@ function configure_upgrade_step() { ${GCE_DEFAULT_SKIP_TESTS[@]:+${GCE_DEFAULT_SKIP_TESTS[@]}} \ ${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \ ${GCE_SLOW_TESTS[@]:+${GCE_SLOW_TESTS[@]}} \ - ${GKE_FLAKY_TESTS[@]:+${GKE_FLAKY_TESTS[@]}} \ )" if [[ "${KUBERNETES_PROVIDER}" == "gce" ]]; then @@ -302,11 +300,6 @@ GKE_REQUIRED_SKIP_TESTS=( "Deployment" ) -# Tests wchich are known to be flaky on GKE -GKE_FLAKY_TESTS=( - "NodeOutOfDisk" - ) - # Specialized tests which should be skipped by default for GKE. GKE_DEFAULT_SKIP_TESTS=( "Autoscaling\sSuite" @@ -330,12 +323,7 @@ DISRUPTIVE_TESTS=( # The following tests are known to be flaky, and are thus run only in their own # -flaky- build variants. GCE_FLAKY_TESTS=( - "GCE\sL7\sLoadBalancer\sController" # issue: #17518 - "DaemonRestart\sController\sManager" # issue: #17829 - "Daemon\sset\sshould\srun\sand\sstop\scomplex\sdaemon" # issue: #16623 - "Resource\susage\sof\ssystem\scontainers" # issue: #13931 - "NodeOutOfDisk" # issue: #17687 - "Cluster\slevel\slogging\susing\sElasticsearch" # issue: #17873 + "\[Flaky\]" ) # The following tests are known to be slow running (> 2 min), and are @@ -349,14 +337,7 @@ GCE_SLOW_TESTS=( # make sure the associated project has enough quota. At the time of this # writing a GCE project is allowed 3 backend services by default. This # test requires at least 5. - "GCE\sL7\sLoadBalancer\sController" # 10 min, file: ingress.go, slow by design - "SchedulerPredicates\svalidates\sMaxPods\slimit " # 8 min, file: scheduler_predicates.go, PR: #13315 - "Nodes\sResize" # 3 min 30 sec, file: resize_nodes.go, issue: #13323 - "resource\susage\stracking" # 1 hour, file: kubelet_perf.go, slow by design - "monotonically\sincreasing\srestart\scount" # 1.5 to 5 min, file: pods.go, slow by design - "Garbage\scollector\sshould" # 7 min, file: garbage_collector.go, slow by design - "KubeProxy\sshould\stest\skube-proxy" # 9 min 30 sec, file: kubeproxy.go, issue: #14204 - "cap\sback-off\sat\sMaxContainerBackOff" # 20 mins file: manager.go, PR: #12648 + "\[Slow\]" ) # Tests which are not able to be run in parallel. @@ -367,16 +348,6 @@ GCE_PARALLEL_SKIP_TESTS=( "\[Disruptive\]" ) -# Tests which are known to be flaky when run in parallel. -GCE_PARALLEL_FLAKY_TESTS=( - "DaemonRestart" - "Elasticsearch" - "Namespaces.*should\sdelete\sfast" - "Pods.*back-off\srestarting.*LivenessProbe" # issue: #18293 - "ServiceAccounts" - "Services.*identically\snamed" # error waiting for reachability, issue: #16285 - ) - # Tests that should not run on soak cluster. GCE_SOAK_CONTINUOUS_SKIP_TESTS=( "GCE\sL7\sLoadBalancer\sController" # issue: #17119 @@ -496,7 +467,6 @@ case ${JOB_NAME} in ${GCE_DEFAULT_SKIP_TESTS[@]:+${GCE_DEFAULT_SKIP_TESTS[@]}} \ ${GCE_PARALLEL_SKIP_TESTS[@]:+${GCE_PARALLEL_SKIP_TESTS[@]}} \ ${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \ - ${GCE_PARALLEL_FLAKY_TESTS[@]:+${GCE_PARALLEL_FLAKY_TESTS[@]}} \ ${GCE_SLOW_TESTS[@]:+${GCE_SLOW_TESTS[@]}} \ )"} : ${KUBE_GCE_INSTANCE_PREFIX:="e2e-gce-${NODE_NAME}-${EXECUTOR_NUMBER}"} @@ -516,7 +486,6 @@ case ${JOB_NAME} in ${GCE_DEFAULT_SKIP_TESTS[@]:+${GCE_DEFAULT_SKIP_TESTS[@]}} \ ${GCE_PARALLEL_SKIP_TESTS[@]:+${GCE_PARALLEL_SKIP_TESTS[@]}} \ ${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \ - ${GCE_PARALLEL_FLAKY_TESTS[@]:+${GCE_PARALLEL_FLAKY_TESTS[@]}} \ ${GCE_SLOW_TESTS[@]:+${GCE_SLOW_TESTS[@]}} \ )"} : ${KUBE_GCE_INSTANCE_PREFIX:="e2e-test-parallel"} @@ -535,7 +504,6 @@ case ${JOB_NAME} in ${GCE_DEFAULT_SKIP_TESTS[@]:+${GCE_DEFAULT_SKIP_TESTS[@]}} \ ${GCE_PARALLEL_SKIP_TESTS[@]:+${GCE_PARALLEL_SKIP_TESTS[@]}} \ ${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \ - ${GCE_PARALLEL_FLAKY_TESTS[@]:+${GCE_PARALLEL_FLAKY_TESTS[@]}} \ ${AWS_REQUIRED_SKIP_TESTS[@]:+${AWS_REQUIRED_SKIP_TESTS[@]}} \ )"} : ${ENABLE_DEPLOYMENTS:=true} @@ -553,7 +521,6 @@ case ${JOB_NAME} in ${GCE_PARALLEL_SKIP_TESTS[@]:+${GCE_PARALLEL_SKIP_TESTS[@]}} \ ) --ginkgo.focus=$(join_regex_no_empty \ ${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \ - ${GCE_PARALLEL_FLAKY_TESTS[@]:+${GCE_PARALLEL_FLAKY_TESTS[@]}} \ )"} : ${KUBE_GCE_INSTANCE_PREFIX:="parallel-flaky"} : ${PROJECT:="k8s-jkns-e2e-gce-prl-flaky"} @@ -597,7 +564,6 @@ case ${JOB_NAME} in : ${E2E_NETWORK:="e2e-gce-flannel"} : ${GINKGO_TEST_ARGS:="--ginkgo.skip=$(join_regex_allow_empty \ ${GCE_DEFAULT_SKIP_TESTS[@]:+${GCE_DEFAULT_SKIP_TESTS[@]}} \ - ${GCE_PARALLEL_FLAKY_TESTS[@]:+${GCE_PARALLEL_FLAKY_TESTS[@]}} \ ${GCE_PARALLEL_SKIP_TESTS[@]:+${GCE_PARALLEL_SKIP_TESTS[@]}} \ ${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \ ${GCE_SLOW_TESTS[@]:+${GCE_SLOW_TESTS[@]}} \ @@ -672,7 +638,6 @@ case ${JOB_NAME} in ${GKE_DEFAULT_SKIP_TESTS[@]:+${GKE_DEFAULT_SKIP_TESTS[@]}} \ ${GCE_DEFAULT_SKIP_TESTS[@]:+${GCE_DEFAULT_SKIP_TESTS[@]}} \ ${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \ - ${GKE_FLAKY_TESTS[@]:+${GKE_FLAKY_TESTS[@]}} \ )"} ;; @@ -687,7 +652,6 @@ case ${JOB_NAME} in ${GKE_DEFAULT_SKIP_TESTS[@]:+${GKE_DEFAULT_SKIP_TESTS[@]}} \ ${REBOOT_SKIP_TESTS[@]:+${REBOOT_SKIP_TESTS[@]}} \ ${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \ - ${GKE_FLAKY_TESTS[@]:+${GKE_FLAKY_TESTS[@]}} \ ${GCE_SLOW_TESTS[@]:+${GCE_SLOW_TESTS[@]}} \ )"} ;; @@ -701,7 +665,6 @@ case ${JOB_NAME} in : ${GINKGO_TEST_ARGS:="--ginkgo.skip=$(join_regex_allow_empty \ ${GKE_REQUIRED_SKIP_TESTS[@]:+${GKE_REQUIRED_SKIP_TESTS[@]}}) \ --ginkgo.focus=$(join_regex_no_empty \ - ${GKE_FLAKY_TESTS[@]:+${GKE_FLAKY_TESTS[@]}} \ ${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \ )"} ;; @@ -734,7 +697,6 @@ case ${JOB_NAME} in ${GKE_REQUIRED_SKIP_TESTS[@]:+${GKE_REQUIRED_SKIP_TESTS[@]}} \ ${GCE_DEFAULT_SKIP_TESTS[@]:+${GCE_DEFAULT_SKIP_TESTS[@]}} \ ${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \ - ${GKE_FLAKY_TESTS[@]:+${GKE_FLAKY_TESTS[@]}} \ ${GCE_SOAK_CONTINUOUS_SKIP_TESTS[@]:+${GCE_SOAK_CONTINUOUS_SKIP_TESTS[@]}} \ )"} ;; diff --git a/test/e2e/daemon_restart.go b/test/e2e/daemon_restart.go index 6bd4ebe685e..7ef21b5fd31 100644 --- a/test/e2e/daemon_restart.go +++ b/test/e2e/daemon_restart.go @@ -183,7 +183,8 @@ func getContainerRestarts(c *client.Client, ns string, labelSelector labels.Sele return failedContainers, containerRestartNodes.List() } -var _ = Describe("DaemonRestart [Disruptive]", func() { +// Flaky issues #17829, #19023 +var _ = Describe("DaemonRestart [Disruptive] [Flaky]", func() { framework := NewFramework("daemonrestart") rcName := "daemonrestart" + strconv.Itoa(numPods) + "-" + string(util.NewUUID()) diff --git a/test/e2e/daemon_set.go b/test/e2e/daemon_set.go index 8a12b44ba88..64fea07b2b6 100644 --- a/test/e2e/daemon_set.go +++ b/test/e2e/daemon_set.go @@ -126,7 +126,8 @@ var _ = Describe("Daemon set", func() { }) - It("should run and stop complex daemon", func() { + // Flaky issue #16623 + It("should run and stop complex daemon [Flaky]", func() { complexLabel := map[string]string{daemonsetNameLabel: dsName} nodeSelector := map[string]string{daemonsetColorLabel: "blue"} Logf("Creating daemon with a node selector %s", dsName) diff --git a/test/e2e/es_cluster_logging.go b/test/e2e/es_cluster_logging.go index b4c5d08eb1f..0e40bdf4f8b 100644 --- a/test/e2e/es_cluster_logging.go +++ b/test/e2e/es_cluster_logging.go @@ -30,7 +30,8 @@ import ( . "github.com/onsi/gomega" ) -var _ = Describe("Cluster level logging using Elasticsearch", func() { +// Flaky issue #17873 +var _ = Describe("Cluster level logging using Elasticsearch [Flaky]", func() { f := NewFramework("es-logging") BeforeEach(func() { diff --git a/test/e2e/garbage_collector.go b/test/e2e/garbage_collector.go index 2fd02e5fce3..e30a5c1cc2f 100644 --- a/test/e2e/garbage_collector.go +++ b/test/e2e/garbage_collector.go @@ -27,7 +27,9 @@ import ( ) // This test requires that --terminated-pod-gc-threshold=100 be set on the controller manager -var _ = Describe("Garbage collector", func() { +// +// Slow by design (7 min) +var _ = Describe("Garbage collector [Slow]", func() { f := NewFramework("garbage-collector") It("should handle the creation of 1000 pods", func() { SkipUnlessProviderIs("gce") diff --git a/test/e2e/ingress.go b/test/e2e/ingress.go index c304718c0a6..2611786c0a3 100644 --- a/test/e2e/ingress.go +++ b/test/e2e/ingress.go @@ -376,7 +376,9 @@ func (cont *IngressController) Cleanup(del bool) error { return fmt.Errorf(errMsg) } -var _ = Describe("GCE L7 LoadBalancer Controller [Serial]", func() { +// Slow by design (10 min) +// Flaky issue #17518 +var _ = Describe("GCE L7 LoadBalancer Controller [Serial] [Slow] [Flaky]", func() { // These variables are initialized after framework's beforeEach. var ns string var addonDir string diff --git a/test/e2e/kubelet_perf.go b/test/e2e/kubelet_perf.go index d768eaf19c1..568224902f1 100644 --- a/test/e2e/kubelet_perf.go +++ b/test/e2e/kubelet_perf.go @@ -138,7 +138,8 @@ func verifyCPULimits(expected containersCPUSummary, actual nodesCPUSummary) { } } -var _ = Describe("Kubelet [Serial]", func() { +// Slow by design (1 hour) +var _ = Describe("Kubelet [Serial] [Slow]", func() { var nodeNames sets.String framework := NewFramework("kubelet-perf") var rm *resourceMonitor diff --git a/test/e2e/kubeproxy.go b/test/e2e/kubeproxy.go index 934e4d07edf..05fce57d892 100644 --- a/test/e2e/kubeproxy.go +++ b/test/e2e/kubeproxy.go @@ -71,7 +71,8 @@ var _ = Describe("KubeProxy", func() { f: f, } - It("should test kube-proxy", func() { + // Slow issue #14204 (10 min) + It("should test kube-proxy [Slow]", func() { By("cleaning up any pre-existing namespaces used by this test") config.cleanup() diff --git a/test/e2e/monitor_resources.go b/test/e2e/monitor_resources.go index 780c5e58217..a2f8ccce7da 100644 --- a/test/e2e/monitor_resources.go +++ b/test/e2e/monitor_resources.go @@ -72,7 +72,8 @@ func computeAverage(sliceOfUsages []resourceUsagePerContainer) (result resourceU // This tests does nothing except checking current resource usage of containers defined in kubelet_stats systemContainers variable. // Test fails if an average container resource consumption over datapointAmount tries exceeds amount defined in allowedUsage. -var _ = Describe("Resource usage of system containers [Serial]", func() { +// Flaky issue #13931 +var _ = Describe("Resource usage of system containers [Serial] [Flaky]", func() { var c *client.Client BeforeEach(func() { var err error diff --git a/test/e2e/namespace.go b/test/e2e/namespace.go index 4861364a4ec..c3c63546a6f 100644 --- a/test/e2e/namespace.go +++ b/test/e2e/namespace.go @@ -91,7 +91,9 @@ var _ = Describe("Namespaces", func() { //Confirms that namespace draining is functioning reasonably //at minute intervals. - It("should delete fast enough (90 percent of 100 namespaces in 150 seconds)", + // + // Flaky issue #19026 + It("should delete fast enough (90 percent of 100 namespaces in 150 seconds) [Flaky]", func() { extinguish(c, 100, 10, 150) }) //comprehensive draining ; uncomment after #7372 diff --git a/test/e2e/nodeoutofdisk.go b/test/e2e/nodeoutofdisk.go index 8e639729a5c..0fc97d83287 100644 --- a/test/e2e/nodeoutofdisk.go +++ b/test/e2e/nodeoutofdisk.go @@ -63,7 +63,8 @@ const ( // choose that node to be node with index 1. // 7. Observe that the pod in pending status schedules on that node. // -var _ = Describe("NodeOutOfDisk [Serial]", func() { +// Flaky issue #17687 +var _ = Describe("NodeOutOfDisk [Serial] [Flaky]", func() { var c *client.Client var unfilledNodeName, recoveredNodeName string framework := Framework{BaseName: "node-outofdisk"} diff --git a/test/e2e/pods.go b/test/e2e/pods.go index 7a3632a3b55..f76aa8dfd31 100644 --- a/test/e2e/pods.go +++ b/test/e2e/pods.go @@ -635,7 +635,8 @@ var _ = Describe("Pods", func() { }, 1, defaultObservationTimeout) }) - It("should have monotonically increasing restart count [Conformance]", func() { + // Slow by design (5 min) + It("should have monotonically increasing restart count [Conformance] [Slow]", func() { runLivenessTest(framework.Client, framework.Namespace.Name, &api.Pod{ ObjectMeta: api.ObjectMeta{ Name: "liveness-http", @@ -896,7 +897,8 @@ var _ = Describe("Pods", func() { } }) - It("should not back-off restarting a container on LivenessProbe failure", func() { + // Flaky issue #18293 + It("should not back-off restarting a container on LivenessProbe failure [Flaky]", func() { podClient := framework.Client.Pods(framework.Namespace.Name) podName := "pod-back-off-liveness" containerName := "back-off-liveness" @@ -936,7 +938,8 @@ var _ = Describe("Pods", func() { } }) - It("should cap back-off at MaxContainerBackOff", func() { + // Slow issue #19027 (20 mins) + It("should cap back-off at MaxContainerBackOff [Slow]", func() { podClient := framework.Client.Pods(framework.Namespace.Name) podName := "back-off-cap" containerName := "back-off-cap" diff --git a/test/e2e/resize_nodes.go b/test/e2e/resize_nodes.go index 6c71b01c824..6c32bf695e7 100644 --- a/test/e2e/resize_nodes.go +++ b/test/e2e/resize_nodes.go @@ -402,7 +402,8 @@ var _ = Describe("Nodes [Disruptive]", func() { systemPodsNo = len(systemPods.Items) }) - Describe("Resize", func() { + // Slow issue #13323 (8 min) + Describe("Resize [Slow]", func() { var skipped bool BeforeEach(func() { diff --git a/test/e2e/scheduler_predicates.go b/test/e2e/scheduler_predicates.go index e286ed8dab9..dbae84a92d3 100644 --- a/test/e2e/scheduler_predicates.go +++ b/test/e2e/scheduler_predicates.go @@ -200,7 +200,9 @@ var _ = Describe("SchedulerPredicates [Serial]", func() { // This test verifies that max-pods flag works as advertised. It assumes that cluster add-on pods stay stable // and cannot be run in parallel with any other test that touches Nodes or Pods. It is so because to check // if max-pods is working we need to fully saturate the cluster and keep it in this state for few seconds. - It("validates MaxPods limit number of pods that are allowed to run", func() { + // + // Slow PR #13315 (8 min) + It("validates MaxPods limit number of pods that are allowed to run [Slow]", func() { totalPodCapacity = 0 for _, node := range nodeList.Items { diff --git a/test/e2e/service.go b/test/e2e/service.go index e83f63fe21c..30961950e8c 100644 --- a/test/e2e/service.go +++ b/test/e2e/service.go @@ -778,7 +778,8 @@ var _ = Describe("Services", func() { }) // This test hits several load-balancer cases because LB turnup is slow. - It("should serve identically named services in different namespaces on different load-balancers", func() { + // Flaky issue #18952 + It("should serve identically named services in different namespaces on different load-balancers [Flaky]", func() { // requires ExternalLoadBalancer SkipUnlessProviderIs("gce", "gke", "aws") diff --git a/test/e2e/service_accounts.go b/test/e2e/service_accounts.go index 0169e546440..bc958345a89 100644 --- a/test/e2e/service_accounts.go +++ b/test/e2e/service_accounts.go @@ -30,7 +30,8 @@ import ( . "github.com/onsi/ginkgo" ) -var _ = Describe("ServiceAccounts", func() { +// Flaky issue #19024 +var _ = Describe("ServiceAccounts [Flaky]", func() { f := NewFramework("svcaccounts") It("should mount an API token into pods [Conformance]", func() {