From c3e1c129856bc5effd10add7439aa508757c6814 Mon Sep 17 00:00:00 2001
From: Phillip Wittrock <pwittroc@google.com>
Date: Fri, 27 May 2016 12:29:28 -0700
Subject: [PATCH] Kubemark e2e tests should exit 0 for test failures.

Jenkins relies on junit.xml to identify test failures
and non-0 exit codes to indentify infrastructure failures.
Test failures in kubemark tests should not cause the test
script to exit non-0.  Infrastructure failures should.

- Add function to dump cluster logs without exiting (refactor)
- Change `test/kubemark/stop-kubemark.sh` to be run regardless of whether tests fail or not
- Exit code for failed tests overwritten to be the exit code of dumping the cluster logs
---
 hack/jenkins/e2e-runner.sh | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/hack/jenkins/e2e-runner.sh b/hack/jenkins/e2e-runner.sh
index aa8855d9221..d6f77e0d529 100755
--- a/hack/jenkins/e2e-runner.sh
+++ b/hack/jenkins/e2e-runner.sh
@@ -140,9 +140,7 @@ function install_google_cloud_sdk_tarball() {
 # bringing the cluster down.
 function dump_cluster_logs_and_exit() {
     local -r exit_status=$?
-    if [[ -x "cluster/log-dump.sh"  ]]; then
-        ./cluster/log-dump.sh "${ARTIFACTS}"
-    fi
+    dump_cluster_logs
     if [[ "${E2E_DOWN,,}" == "true" ]]; then
       # If we tried to bring the cluster up, make a courtesy attempt
       # to bring the cluster down so we're not leaving resources
@@ -154,6 +152,14 @@ function dump_cluster_logs_and_exit() {
     exit ${exit_status}
 }
 
+# Only call after attempting to bring the cluster up. Don't call after
+# bringing the cluster down.
+function dump_cluster_logs() {
+    if [[ -x "cluster/log-dump.sh"  ]]; then
+        ./cluster/log-dump.sh "${ARTIFACTS}"
+    fi
+}
+
 ### Pre Set Up ###
 if running_in_docker; then
     curl -fsSL --retry 3 -o "${WORKSPACE}/google-cloud-sdk.tar.gz" 'https://dl.google.com/dl/cloudsdk/channels/rapid/google-cloud-sdk.tar.gz'
@@ -364,7 +370,11 @@ if [[ "${USE_KUBEMARK:-}" == "true" ]]; then
   # If start-kubemark fails, we trigger empty set of tests that would trigger storing logs from the base cluster.
   ./test/kubemark/start-kubemark.sh || dump_cluster_logs_and_exit
   # Similarly, if tests fail, we trigger empty set of tests that would trigger storing logs from the base cluster.
-  ./test/kubemark/run-e2e-tests.sh --ginkgo.focus="${KUBEMARK_TESTS:-starting\s30\spods}" "${KUBEMARK_TEST_ARGS:-}" || dump_cluster_logs_and_exit
+  # We intentionally overwrite the exit-code from `run-e2e-tests.sh` because we want jenkins to look at the
+  # junit.xml results for test failures and not process the exit code.  This is needed by jenkins to more gracefully
+  # handle blocking the merge queue as a result of test failure flakes.  Infrastructure failures should continue to
+  # exit non-0.
+  ./test/kubemark/run-e2e-tests.sh --ginkgo.focus="${KUBEMARK_TESTS:-starting\s30\spods}" "${KUBEMARK_TEST_ARGS:-}" || dump_cluster_logs
   ./test/kubemark/stop-kubemark.sh
   NUM_NODES=${NUM_NODES_BKP}
   MASTER_SIZE=${MASTER_SIZE_BKP}