From 27369806b5f65a37dad6ea7959c69ee5fc5e3bb6 Mon Sep 17 00:00:00 2001 From: Joe Finney Date: Fri, 19 Feb 2016 14:25:54 -0800 Subject: [PATCH] Move e2e environment variables for most jobs into YAML. --- hack/jenkins/e2e.sh | 346 ------------------ hack/jenkins/job-configs/global.yaml | 53 +++ .../kubernetes-e2e-gce-enormous-startup.yaml | 32 +- hack/jenkins/job-configs/kubernetes-e2e.yaml | 189 +++++++++- .../job-configs/kubernetes-kubemark.yaml | 55 ++- hack/jenkins/job-configs/kubernetes-soak.yaml | 45 ++- 6 files changed, 350 insertions(+), 370 deletions(-) diff --git a/hack/jenkins/e2e.sh b/hack/jenkins/e2e.sh index a2fab68e24f..2e9ac5bb34e 100755 --- a/hack/jenkins/e2e.sh +++ b/hack/jenkins/e2e.sh @@ -52,23 +52,6 @@ elif [[ ${JOB_NAME} =~ ^kubernetes-.*-aws ]]; then export NUM_NODES="3" fi -# Set environment variables based on soak jobs -if [[ ${JOB_NAME} =~ soak-weekly ]]; then - export FAIL_ON_GCP_RESOURCE_LEAK="false" - export E2E_TEST="false" - export E2E_DOWN="false" -elif [[ ${JOB_NAME} =~ soak-continuous ]]; then - export FAIL_ON_GCP_RESOURCE_LEAK="false" - export E2E_UP="false" - export E2E_DOWN="false" - # Clear out any orphaned namespaces in case previous run was interrupted. - export E2E_CLEAN_START="true" - # We should be testing the reliability of a long-running cluster. The - # [Disruptive] tests kill/restart components or nodes in the cluster, - # defeating the purpose of a soak cluster. (#15722) - export GINKGO_TEST_ARGS="--ginkgo.skip=\[Disruptive\]|\[Flaky\]|\[Feature:.+\]" -fi - # Set environment variables based on upgrade jobs if [[ ${JOB_NAME} =~ ^kubernetes-upgrade ]]; then # Upgrade jobs bounce back and forth between versions; just force @@ -134,335 +117,6 @@ case ${JOB_NAME} in export NUM_NODES="6" ;; - # GCE core jobs - - # Runs all non-slow, non-serial, non-flaky, tests on GCE in parallel. - kubernetes-e2e-gce) - # This is the *only* job that should publish the last green version. - export E2E_PUBLISH_GREEN_VERSION="true" - # This list should match the list in kubernetes-pull-build-test-e2e-gce. - export GINKGO_TEST_ARGS="--ginkgo.skip=\[Slow\]|\[Serial\]|\[Disruptive\]|\[Flaky\]|\[Feature:.+\]" - export GINKGO_PARALLEL="y" - export PROJECT="k8s-jkns-e2e-gce" - ;; - - # Runs slow tests on GCE, sequentially. - kubernetes-e2e-gce-slow) - export GINKGO_TEST_ARGS="--ginkgo.focus=\[Slow\] \ - --ginkgo.skip=\[Serial\]|\[Disruptive\]|\[Flaky\]|\[Feature:.+\]" - export GINKGO_PARALLEL="y" - export PROJECT="k8s-jkns-e2e-gce-slow" - ;; - - # Runs all non-flaky, non-slow tests on GCE, sequentially, - # and in a multi-zone ("Ubernetes Lite") cluster. - kubernetes-e2e-gce-ubernetes-lite) - export PROJECT="k8s-jkns-e2e-gce-ubelite" - export MULTIZONE="true" - export KUBE_GCE_ZONE="" - export E2E_ZONES="us-central1-a us-central1-b us-central1-f" - ;; - - # Run the [Serial], [Disruptive], and [Feature:Restart] tests on GCE. - kubernetes-e2e-gce-serial) - export GINKGO_TEST_ARGS="--ginkgo.focus=\[Serial\]|\[Disruptive\] \ - --ginkgo.skip=\[Flaky\]|\[Feature:.+\]" - export PROJECT="kubernetes-jkns-e2e-gce-serial" - ;; - - # Runs only the ingress tests on GCE. - kubernetes-e2e-gce-ingress) - # XXX Not a unique project - export E2E_NAME="e2e-ingress" - export GINKGO_TEST_ARGS="--ginkgo.focus=\[Feature:Ingress\]" - # TODO: Move this into a different project. Currently, since this test - # shares resources with various other networking tests, so it's easier - # to zero in on the source of a leak if it's run in isolation. - export PROJECT="kubernetes-flannel" - ;; - - # Runs only the ingress tests on GKE. - kubernetes-e2e-gke-ingress) - # XXX Not a unique project - export E2E_NAME="e2e-gke-ingress" - export GINKGO_TEST_ARGS="--ginkgo.focus=\[Feature:Ingress\]" - # TODO: Move this into a different project. Currently, since this test - # shares resources with various other networking tests, it's easier to - # zero in on the source of a leak if it's run in isolation. - export PROJECT="kubernetes-flannel" - ;; - - # Runs the flaky tests on GCE, sequentially. - kubernetes-e2e-gce-flaky) - export GINKGO_TEST_ARGS="--ginkgo.focus=\[Flaky\] \ - --ginkgo.skip=\[Feature:.+\]" - export PROJECT="k8s-jkns-e2e-gce-flaky" - ;; - - # GKE core jobs - - # Runs all non-slow, non-serial, non-flaky, tests on GKE in parallel. - kubernetes-e2e-gke) - export PROJECT="k8s-jkns-e2e-gke-ci" - export GINKGO_TEST_ARGS="--ginkgo.skip=\[Slow\]|\[Serial\]|\[Disruptive\]|\[Flaky\]|\[Feature:.+\]" - export GINKGO_PARALLEL="y" - ;; - - kubernetes-e2e-gke-slow) - export PROJECT="k8s-jkns-e2e-gke-slow" - export GINKGO_TEST_ARGS="--ginkgo.focus=\[Slow\] \ - --ginkgo.skip=\[Serial\]|\[Disruptive\]|\[Flaky\]|\[Feature:.+\]" - export GINKGO_PARALLEL="y" - ;; - - # Run the [Serial], [Disruptive], and [Feature:Restart] tests on GKE. - kubernetes-e2e-gke-serial) - export GINKGO_TEST_ARGS="--ginkgo.focus=\[Serial\]|\[Disruptive\] \ - --ginkgo.skip=\[Flaky\]|\[Feature:.+\]" - export PROJECT="jenkins-gke-e2e-serial" - ;; - - kubernetes-e2e-gke-flaky) - export PROJECT="k8s-jkns-e2e-gke-ci-flaky" - export GINKGO_TEST_ARGS="--ginkgo.focus=\[Flaky\] \ - --ginkgo.skip=\[Feature:.+\]" - ;; - - # AWS core jobs - - # Runs all non-flaky, non-slow tests on AWS, sequentially. - kubernetes-e2e-aws) - export GINKGO_TEST_ARGS="--ginkgo.skip=\[Slow\]|\[Serial\]|\[Disruptive\]|\[Flaky\]|\[Feature:.+\]" - export GINKGO_PARALLEL="y" - export PROJECT="k8s-jkns-e2e-aws" - export AWS_CONFIG_FILE='/var/lib/jenkins/.aws/credentials' - export AWS_SSH_KEY='/var/lib/jenkins/.ssh/kube_aws_rsa' - export KUBE_SSH_USER='ubuntu' - # This is needed to be able to create PD from the e2e test - export AWS_SHARED_CREDENTIALS_FILE='/var/lib/jenkins/.aws/credentials' - ;; - - # Feature jobs - - # Runs only the reboot tests on GCE. - kubernetes-e2e-gce-reboot) - export GINKGO_TEST_ARGS="--ginkgo.focus=\[Feature:Reboot\]" - export PROJECT="k8s-jkns-e2e-gce-ci-reboot" - ;; - - kubernetes-e2e-gke-reboot) - export PROJECT="k8s-jkns-e2e-gke-ci-reboot" - export GINKGO_TEST_ARGS="--ginkgo.focus=\[Feature:Reboot\]" - ;; - - # Runs only the examples tests on GCE. - kubernetes-e2e-gce-examples) - export GINKGO_TEST_ARGS="--ginkgo.focus=\[Feature:Example\]" - export PROJECT="k8s-jkns-e2e-examples" - ;; - - # Runs only the elasticsearch logging tests on GCE. - kubernetes-e2e-gce-es-logging) - : ${E2E_CLUSTER_NAME:="jenkins-gce-e2e-es-logging"} - : ${E2E_NETWORK:="e2e-es-logging"} - : ${GINKGO_TEST_ARGS:="--ginkgo.focus=\[Feature:Elasticsearch\]"} - : ${KUBE_GCE_INSTANCE_PREFIX:="e2e-es-logging"} - : ${PROJECT:="kubernetes-es-logging"} - # Enable elasticsearch logging addon - KUBE_LOGGING_DESTINATION="elasticsearch" - ;; - - # Runs only the autoscaling tests on GCE. - kubernetes-e2e-gce-autoscaling) - export GINKGO_TEST_ARGS="--ginkgo.focus=\[Feature:ClusterSizeAutoscaling\]|\[Feature:InitialResources\] \ - --ginkgo.skip=\[Flaky\]" - export PROJECT="k8s-jnks-e2e-gce-autoscaling" - # Override GCE default for cluster size autoscaling purposes. - export KUBE_ENABLE_CLUSTER_MONITORING="googleinfluxdb" - export KUBE_ADMISSION_CONTROL="NamespaceLifecycle,InitialResources,LimitRanger,SecurityContextDeny,ServiceAccount,ResourceQuota" - ;; - - # Runs the performance/scalability tests on GCE. A larger cluster is used. - kubernetes-e2e-gce-scalability) - # XXX Not a unique project - export E2E_NAME="e2e-scalability" - export GINKGO_TEST_ARGS="--ginkgo.focus=\[Feature:Performance\] \ - --gather-resource-usage=true \ - --gather-metrics-at-teardown=true \ - --gather-logs-sizes=true \ - --output-print-type=json" - export PROJECT="kubernetes-jenkins" - export FAIL_ON_GCP_RESOURCE_LEAK="false" - # Override GCE defaults. - export MASTER_SIZE="n1-standard-4" - export NODE_SIZE="n1-standard-2" - export NODE_DISK_SIZE="50GB" - export NUM_NODES="100" - # Reduce logs verbosity - export TEST_CLUSTER_LOG_LEVEL="--v=2" - # TODO: Remove when we figure out the reason for occasional failures #19048 - export KUBELET_TEST_LOG_LEVEL="--v=4" - # Increase resync period to simulate production - export TEST_CLUSTER_RESYNC_PERIOD="--min-resync-period=12h" - ;; - - # Runs e2e on GCE with flannel and VXLAN. - kubernetes-e2e-gce-flannel) - # XXX Not a unique project - export E2E_NAME="e2e-flannel" - export PROJECT="kubernetes-flannel" - export FAIL_ON_GCP_RESOURCE_LEAK="false" - # Override GCE defaults. - export NETWORK_PROVIDER="flannel" - ;; - - # Runs the performance/scalability test on huge 1000-node cluster on GCE. - # Flannel is used as network provider. - # Allows a couple of nodes to be NotReady during startup - kubernetes-e2e-gce-enormous-cluster) - # XXX Not a unique project - export E2E_NAME="e2e-enormous-cluster" - # TODO: Currently run only density test. - # Once this is stable, run the whole [Performance] suite. - export GINKGO_TEST_ARGS="--ginkgo.focus=starting\s30\spods\sper\snode" - export PROJECT="kubernetes-scale" - export FAIL_ON_GCP_RESOURCE_LEAK="false" - # Override GCE defaults. - export NETWORK_PROVIDER="flannel" - # Temporarily switch of Heapster, as this will not schedule anywhere. - # TODO: Think of a solution to enable it. - export KUBE_ENABLE_CLUSTER_MONITORING="none" - export KUBE_GCE_ZONE="asia-east1-a" - export MASTER_SIZE="n1-standard-32" - export NODE_SIZE="n1-standard-1" - export NODE_DISK_SIZE="50GB" - export NUM_NODES="1000" - export ALLOWED_NOTREADY_NODES="2" - export EXIT_ON_WEAK_ERROR="false" - # Reduce logs verbosity - export TEST_CLUSTER_LOG_LEVEL="--v=1" - # Increase resync period to simulate production - export TEST_CLUSTER_RESYNC_PERIOD="--min-resync-period=12h" - ;; - - # Starts and tears down 1000-node cluster on GCE using flannel networking - # Requires all 1000 nodes to come up. - kubernetes-e2e-gce-enormous-startup) - # XXX Not a unique project - # TODO: increase a quota for networks in kubernetes-scale and move this test to its own network - export E2E_NAME="e2e-enormous-cluster" - export E2E_TEST="false" - export PROJECT="kubernetes-scale" - export FAIL_ON_GCP_RESOURCE_LEAK="false" - # Override GCE defaults. - export NETWORK_PROVIDER="flannel" - # Temporarily switch of Heapster, as this will not schedule anywhere. - # TODO: Think of a solution to enable it. - export KUBE_ENABLE_CLUSTER_MONITORING="none" - export KUBE_GCE_ZONE="us-east1-b" - export MASTER_SIZE="n1-standard-32" - export NODE_SIZE="n1-standard-1" - export NODE_DISK_SIZE="50GB" - export NUM_NODES="1000" - # Reduce logs verbosity - export TEST_CLUSTER_LOG_LEVEL="--v=1" - # Increase resync period to simulate production - export TEST_CLUSTER_RESYNC_PERIOD="--min-resync-period=12h" - ;; - - # Run Kubemark test on a fake 100 node cluster to have a comparison - # to the real results from scalability suite - kubernetes-kubemark-gce) - export PROJECT="k8s-jenkins-kubemark" - export E2E_TEST="false" - export USE_KUBEMARK="true" - export KUBEMARK_TESTS="\[Feature:Performance\]" - # Override defaults to be independent from GCE defaults and set kubemark parameters - export NUM_NODES="10" - export MASTER_SIZE="n1-standard-2" - export NODE_SIZE="n1-standard-1" - export KUBE_GCE_ZONE="us-central1-b" - export KUBEMARK_MASTER_SIZE="n1-standard-4" - export KUBEMARK_NUM_NODES="100" - ;; - - # Run Kubemark test on a fake 500 node cluster to test for regressions on - # bigger clusters - kubernetes-kubemark-500-gce) - # XXX Not a unique project - export E2E_NAME="kubemark-500" - export PROJECT="kubernetes-scale" - export E2E_TEST="false" - export USE_KUBEMARK="true" - export KUBEMARK_TESTS="\[Feature:Performance\]" - export FAIL_ON_GCP_RESOURCE_LEAK="false" - # Override defaults to be independent from GCE defaults and set kubemark parameters - export NUM_NODES="6" - export MASTER_SIZE="n1-standard-4" - export NODE_SIZE="n1-standard-8" - export KUBE_GCE_ZONE="us-east1-b" - export KUBEMARK_MASTER_SIZE="n1-standard-16" - export KUBEMARK_NUM_NODES="500" - ;; - - # Run big Kubemark test, this currently means a 1000 node cluster and 16 core master - kubernetes-kubemark-gce-scale) - # XXX Not a unique project - export E2E_NAME="kubemark-1000" - export PROJECT="kubernetes-scale" - export E2E_TEST="false" - export USE_KUBEMARK="true" - export KUBEMARK_TESTS="\[Feature:Performance\]" - export FAIL_ON_GCP_RESOURCE_LEAK="false" - # Override defaults to be independent from GCE defaults and set kubemark parameters - # We need 11 so that we won't hit max-pods limit (set to 100). TODO: do it in a nicer way. - export NUM_NODES="11" - export MASTER_SIZE="n1-standard-4" - # Note: can fit about 17 hollow nodes per core so NUM_NODES x - # cores_per_node should be set accordingly. - export NODE_SIZE="n1-standard-8" - export KUBEMARK_MASTER_SIZE="n1-standard-16" - export KUBEMARK_NUM_NODES="1000" - export KUBE_GCE_ZONE="us-east1-b" - ;; - - # Soak jobs - - # Sets up the GCE soak cluster weekly using the latest CI release. - kubernetes-soak-weekly-deploy-gce) - export HAIRPIN_MODE="false" - export PROJECT="k8s-jkns-gce-soak" - ;; - - # Runs tests on GCE soak cluster. - kubernetes-soak-continuous-e2e-gce) - export HAIRPIN_MODE="false" - export PROJECT="k8s-jkns-gce-soak" - ;; - - # Clone of kubernetes-soak-weekly-deploy-gce. Issue #20832. - kubernetes-soak-weekly-deploy-gce-2) - export PROJECT="k8s-jkns-gce-soak-2" - ;; - - # Clone of kubernetes-soak-continuous-e2e-gce. Issue #20832. - kubernetes-soak-continuous-e2e-gce-2) - export PROJECT="k8s-jkns-gce-soak-2" - ;; - - # Sets up the GKE soak cluster weekly using the latest CI release. - kubernetes-soak-weekly-deploy-gke) - export PROJECT="k8s-jkns-gke-soak" - # Need at least n1-standard-2 nodes to run kubelet_perf tests - export MACHINE_TYPE="n1-standard-2" - ;; - - # Runs tests on GKE soak cluster. - kubernetes-soak-continuous-e2e-gke) - export PROJECT="k8s-jkns-gke-soak" - export E2E_OPT="--check_version_skew=false" - ;; - # Upgrade jobs # kubernetes-upgrade-gke-1.0-master diff --git a/hack/jenkins/job-configs/global.yaml b/hack/jenkins/job-configs/global.yaml index e80dc2bca6a..2ba24ea43f8 100644 --- a/hack/jenkins/job-configs/global.yaml +++ b/hack/jenkins/job-configs/global.yaml @@ -90,3 +90,56 @@ name: global emails: '$DEFAULT_RECIPIENTS' cron-string: 'H/30 * * * *' + branch: 'master' + job-env: '' + runner: curl -fsS --retry 3 "https://raw.githubusercontent.com/kubernetes/kubernetes/master/hack/jenkins/e2e-runner.sh" | bash - + old-runner-1-1: curl -fsS --retry 3 "https://raw.githubusercontent.com/kubernetes/kubernetes/release-1.1/hack/jenkins/e2e.sh" | bash - + old-runner-1-0: curl -fsS --retry 3 "https://raw.githubusercontent.com/kubernetes/kubernetes/release-1.0/hack/jenkins/e2e.sh" | bash - + provider-env: '' + gce-provider-env: | + export KUBERNETES_PROVIDER="gce" + export E2E_MIN_STARTUP_PODS="1" + export KUBE_GCE_ZONE="us-central1-f" + export FAIL_ON_GCP_RESOURCE_LEAK="true" + gke-provider-env: | + export KUBERNETES_PROVIDER="gke" + export ZONE="us-central1-f" + # By default, GKE tests run against the GKE test endpoint using CI Cloud SDK. + # Release jobs (e.g. prod, staging, and test) override these two variables. + export CLOUDSDK_BUCKET="gs://cloud-sdk-build/testing/staging" + export CLOUDSDK_API_ENDPOINT_OVERRIDES_CONTAINER="https://test-container.sandbox.googleapis.com/" + export FAIL_ON_GCP_RESOURCE_LEAK="true" + aws-provider-env: | + export KUBERNETES_PROVIDER="aws" + export E2E_MIN_STARTUP_PODS="1" + export KUBE_AWS_ZONE="us-west-2a" + export MASTER_SIZE="m3.medium" + export NODE_SIZE="m3.medium" + export NUM_NODES="3" + post-env: | + # Nothing should want Jenkins $HOME + export HOME=${{WORKSPACE}} + + # Assume we're upping, testing, and downing a cluster + export E2E_UP="${{E2E_UP:-true}}" + export E2E_TEST="${{E2E_TEST:-true}}" + export E2E_DOWN="${{E2E_DOWN:-true}}" + + # Skip gcloud update checking + export CLOUDSDK_COMPONENT_MANAGER_DISABLE_UPDATE_CHECK=true + + # AWS variables + export KUBE_AWS_INSTANCE_PREFIX="${{E2E_NAME:-jenkins-e2e}}" + + # GCE variables + export INSTANCE_PREFIX="${{E2E_NAME:-jenkins-e2e}}" + export KUBE_GCE_NETWORK="${{E2E_NAME:-jenkins-e2e}}" + export KUBE_GCE_INSTANCE_PREFIX="${{E2E_NAME:-jenkins-e2e}}" + export GCE_SERVICE_ACCOUNT=$(gcloud auth list 2> /dev/null | grep active | cut -f3 -d' ') + + # GKE variables + export CLUSTER_NAME="${{E2E_NAME:-jenkins-e2e}}" + export KUBE_GKE_NETWORK="${{E2E_NAME:-jenkins-e2e}}" + + # Get golang into our PATH so we can run e2e.go + export PATH="${{PATH}}:/usr/local/go/bin" diff --git a/hack/jenkins/job-configs/kubernetes-e2e-gce-enormous-startup.yaml b/hack/jenkins/job-configs/kubernetes-e2e-gce-enormous-startup.yaml index 25ec28e8e9c..e3d2920f56a 100644 --- a/hack/jenkins/job-configs/kubernetes-e2e-gce-enormous-startup.yaml +++ b/hack/jenkins/job-configs/kubernetes-e2e-gce-enormous-startup.yaml @@ -1,11 +1,34 @@ -- job: +# FIXME: desnowflake this +- job-template: name: 'kubernetes-e2e-gce-enormous-startup' description: 'Starts and deletes empty 1000 node cluster. Does not allow Node failures. Test owner: gmarek' logrotate: daysToKeep: 7 builders: - shell: | - curl -fsS --retry 3 "https://raw.githubusercontent.com/kubernetes/kubernetes/master/hack/jenkins/e2e.sh" | bash - + {gce-provider-env} + # XXX Not a unique project + # TODO: increase a quota for networks in kubernetes-scale and move this test to its own network + export E2E_NAME="e2e-enormous-startup" + export E2E_TEST="false" + export PROJECT="kubernetes-scale" + export FAIL_ON_GCP_RESOURCE_LEAK="false" + # Override GCE defaults. + export NETWORK_PROVIDER="flannel" + # Temporarily switch of Heapster, as this will not schedule anywhere. + # TODO: Think of a solution to enable it. + export KUBE_ENABLE_CLUSTER_MONITORING="none" + export KUBE_GCE_ZONE="us-east1-b" + export MASTER_SIZE="n1-standard-32" + export NODE_SIZE="n1-standard-1" + export NODE_DISK_SIZE="50GB" + export NUM_NODES="1000" + # Reduce logs verbosity + export TEST_CLUSTER_LOG_LEVEL="--v=1" + # Increase resync period to simulate production + export TEST_CLUSTER_RESYNC_PERIOD="--min-resync-period=12h" + {post-env} + {runner} properties: - mail-watcher publishers: @@ -27,3 +50,8 @@ fail: true - timestamps - workspace-cleanup + +- project: + name: 'kubernetes-e2e-gce-enormous-startup' + jobs: + - 'kubernetes-e2e-gce-enormous-startup' diff --git a/hack/jenkins/job-configs/kubernetes-e2e.yaml b/hack/jenkins/job-configs/kubernetes-e2e.yaml index 05845765734..5ac81ca0a04 100644 --- a/hack/jenkins/job-configs/kubernetes-e2e.yaml +++ b/hack/jenkins/job-configs/kubernetes-e2e.yaml @@ -18,7 +18,10 @@ disabled: false builders: - shell: | - curl -fsS --retry 3 "https://raw.githubusercontent.com/kubernetes/kubernetes/{branch}/hack/jenkins/e2e.sh" | bash - + {provider-env} + {job-env} + {post-env} + {runner} properties: - mail-watcher wrappers: @@ -65,37 +68,96 @@ name: kubernetes-e2e-gce-master trigger-job: 'kubernetes-build' test-owner: 'Build Cop' - branch: 'master' + provider-env: '{gce-provider-env}' suffix: - 'gce': - description: 'Run E2E tests on GCE using the latest successful build.' + description: 'Runs all non-slow, non-serial, non-flaky, tests on GCE in parallel.' timeout: 30 + job-env: | + # This is the *only* job that should publish the last green version. + export E2E_PUBLISH_GREEN_VERSION="true" + # This list should match the list in kubernetes-pull-build-test-e2e-gce. + export GINKGO_TEST_ARGS="--ginkgo.skip=\[Slow\]|\[Serial\]|\[Disruptive\]|\[Flaky\]|\[Feature:.+\]" + export GINKGO_PARALLEL="y" + export PROJECT="k8s-jkns-e2e-gce" - 'gce-slow': - description: 'Run slow E2E tests on GCE using the latest successful build.' + description: 'Runs slow tests on GCE, sequentially.' timeout: 60 + job-env: | + export GINKGO_TEST_ARGS="--ginkgo.focus=\[Slow\] \ + --ginkgo.skip=\[Serial\]|\[Disruptive\]|\[Flaky\]|\[Feature:.+\]" + export GINKGO_PARALLEL="y" + export PROJECT="k8s-jkns-e2e-gce-slow" - 'gce-serial': - description: 'Run [Serial], [Disruptive], and [Feature:Restart] tests on GCE using the latest successful build.' + description: 'Run [Serial], [Disruptive], tests on GCE.' timeout: 300 + job-env: | + export GINKGO_TEST_ARGS="--ginkgo.focus=\[Serial\]|\[Disruptive\] \ + --ginkgo.skip=\[Flaky\]|\[Feature:.+\]" + export PROJECT="kubernetes-jkns-e2e-gce-serial" - 'gce-reboot': - description: 'Run [Feature:Reboot] tests on GCE using the latest successful build.' + description: 'Run [Feature:Reboot] tests on GCE.' timeout: 180 + job-env: | + export GINKGO_TEST_ARGS="--ginkgo.focus=\[Feature:Reboot\]" + export PROJECT="k8s-jkns-e2e-gce-ci-reboot" - 'gce-autoscaling': - description: 'Run autoscaling E2E tests on GCE using the latest successful build.' + description: 'Run autoscaling E2E tests on GCE.' timeout: 210 + job-env: | + export GINKGO_TEST_ARGS="--ginkgo.focus=\[Feature:ClusterSizeAutoscaling\]|\[Feature:InitialResources\] \ + --ginkgo.skip=\[Flaky\]" + export PROJECT="k8s-jnks-e2e-gce-autoscaling" + # Override GCE default for cluster size autoscaling purposes. + export KUBE_ENABLE_CLUSTER_MONITORING="googleinfluxdb" + export KUBE_ADMISSION_CONTROL="NamespaceLifecycle,InitialResources,LimitRanger,SecurityContextDeny,ServiceAccount,ResourceQuota" - 'gce-flaky': - description: 'Run E2E tests on GCE using the latest successful build. Limit to known-flaky tests.' + description: 'Run the flaky tests on GCE, sequentially.' timeout: 180 + job-env: | + export GINKGO_TEST_ARGS="--ginkgo.focus=\[Flaky\] \ + --ginkgo.skip=\[Feature:.+\]" + export PROJECT="k8s-jkns-e2e-gce-flaky" - 'gce-scalability': - description: 'Run scalability E2E tests on GCE using the latest successful build.' + description: 'Run the performance/scalability tests on GCE. A larger cluster is used.' timeout: 120 + job-env: | + # XXX Not a unique project + export E2E_NAME="e2e-scalability" + export GINKGO_TEST_ARGS="--ginkgo.focus=\[Feature:Performance\] \ + --gather-resource-usage=true \ + --gather-metrics-at-teardown=true \ + --gather-logs-sizes=true \ + --output-print-type=json" + export PROJECT="kubernetes-jenkins" + export FAIL_ON_GCP_RESOURCE_LEAK="false" + # Override GCE defaults. + export MASTER_SIZE="n1-standard-4" + export NODE_SIZE="n1-standard-2" + export NODE_DISK_SIZE="50GB" + export NUM_NODES="100" + # Reduce logs verbosity + export TEST_CLUSTER_LOG_LEVEL="--v=2" + # TODO: Remove when we figure out the reason for occasional failures #19048 + export KUBELET_TEST_LOG_LEVEL="--v=4" + # Increase resync period to simulate production + export TEST_CLUSTER_RESYNC_PERIOD="--min-resync-period=12h" - 'gce-examples': - description: 'Run e2e examples test on GCE using the latest successful Kubernetes build.' + description: 'Run E2E examples test on GCE.' timeout: 90 + job-env: | + export GINKGO_TEST_ARGS="--ginkgo.focus=\[Feature:Example\]" + export PROJECT="k8s-jkns-e2e-examples" - 'gce-ubernetes-lite': - description: 'Run E2E tests on GCE across multiple zones using the latest successful build.' + description: 'Run all non-flaky, non-slow tests on GCE, sequentially, and in a multi-zone (Ubernetes-lite) cluster.' timeout: 150 emails: '$DEFAULT_RECIPIENTS, quinton@google.com, justin@fathomdb.com' test-owner: 'quinton' + job-env: | + export PROJECT="k8s-jkns-e2e-gce-ubelite" + export MULTIZONE="true" + export KUBE_GCE_ZONE="" + export E2E_ZONES="us-central1-a us-central1-b us-central1-f" jobs: - 'kubernetes-e2e-{suffix}' @@ -103,21 +165,37 @@ name: kubernetes-e2e-gke-master trigger-job: 'kubernetes-build' test-owner: 'GKE on-call' - branch: 'master' emails: '$DEFAULT_RECIPIENTS, cloud-kubernetes-alerts@google.com' + provider-env: '{gke-provider-env}' suffix: - 'gke': - description: Runs all non-slow, non-serial, non-flaky, tests on GKE in parallel (against GKE test endpoint) + description: 'Runs all non-slow, non-serial, non-flaky, tests on GKE in parallel (against GKE test endpoint)' timeout: 30 + job-env: | + export PROJECT="k8s-jkns-e2e-gke-ci" + export GINKGO_TEST_ARGS="--ginkgo.skip=\[Slow\]|\[Serial\]|\[Disruptive\]|\[Flaky\]|\[Feature:.+\]" + export GINKGO_PARALLEL="y" - 'gke-slow': description: 'Run slow E2E tests on GKE using the latest successful build.' timeout: 60 + job-env: | + export PROJECT="k8s-jkns-e2e-gke-slow" + export GINKGO_TEST_ARGS="--ginkgo.focus=\[Slow\] \ + --ginkgo.skip=\[Serial\]|\[Disruptive\]|\[Flaky\]|\[Feature:.+\]" + export GINKGO_PARALLEL="y" - 'gke-serial': - description: 'Run [Serial], [Disruptive], and [Feature:Restart] tests on GKE using the latest successful build.' + description: 'Run [Serial], [Disruptive] tests on GKE.' timeout: 300 + job-env: | + export GINKGO_TEST_ARGS="--ginkgo.focus=\[Serial\]|\[Disruptive\] \ + --ginkgo.skip=\[Flaky\]|\[Feature:.+\]" + export PROJECT="jenkins-gke-e2e-serial" - 'gke-reboot': description: 'Run [Feature:Reboot] tests on GKE using the latest successful build.' timeout: 180 + job-env: | + export PROJECT="k8s-jkns-e2e-gke-ci-reboot" + export GINKGO_TEST_ARGS="--ginkgo.focus=\[Feature:Reboot\]" - 'gke-flaky': description: | Run flaky e2e tests using the following config:
@@ -128,6 +206,10 @@ - cluster (k8s): ci/latest.txt
- tests: ci/latest.txt timeout: 300 + job-env: | + export PROJECT="k8s-jkns-e2e-gke-ci-flaky" + export GINKGO_TEST_ARGS="--ginkgo.focus=\[Flaky\] \ + --ginkgo.skip=\[Feature:.+\]" jobs: - 'kubernetes-e2e-{suffix}' @@ -136,6 +218,8 @@ trigger-job: 'kubernetes-build-1.1' test-owner: 'GKE on-call' branch: 'release-1.1' + runner: '{old-runner-1-1}' + post-env: '' emails: '$DEFAULT_RECIPIENTS, cloud-kubernetes-alerts@google.com' suffix: - 'gke-1.1': @@ -191,6 +275,8 @@ trigger-job: 'kubernetes-build-1.1' test-owner: 'wonderfly@google.com' branch: 'release-1.1' + runner: '{old-runner-1-1}' + post-env: '' emails: 'wonderfly@google.com,qzheng@google.com' suffix: - 'gke-trusty-prod': @@ -231,6 +317,8 @@ trigger-job: 'kubernetes-build-1.1' test-owner: 'Build Cop' branch: 'release-1.1' + runner: '{old-runner-1-1}' + post-env: '' suffix: - 'gce-release-1.1': timeout: 175 @@ -249,6 +337,8 @@ trigger-job: 'kubernetes-build-1.0' test-owner: 'Build Cop' branch: 'release-1.0' + runner: '{old-runner-1-0}' + post-env: '' suffix: - 'gce-release-1.0': timeout: 150 @@ -259,28 +349,58 @@ - project: name: kubernetes-e2e-features trigger-job: 'kubernetes-build' - branch: 'master' suffix: - 'gke-ingress': description: 'Run [Feature:Ingress] tests on GKE using the latest successful build.' timeout: 90 emails: 'beeps@google.com' test-owner: 'beeps' + provider-env: '{gke-provider-env}' + job-env: | + # XXX Not a unique project + export E2E_NAME="e2e-gke-ingress" + export GINKGO_TEST_ARGS="--ginkgo.focus=\[Feature:Ingress\]" + # TODO: Move this into a different project. Currently, since this test + # shares resources with various other networking tests, it's easier to + # zero in on the source of a leak if it's run in isolation. + export PROJECT="kubernetes-flannel" - 'gce-ingress': - description: 'Run [Feature:Ingress] tests on GCE using the latest successful build.' + description: 'Run [Feature:Ingress] tests on GCE.' timeout: 90 emails: 'beeps@google.com' test-owner: 'beeps' + provider-env: '{gce-provider-env}' + job-env: | + # XXX Not a unique project + export E2E_NAME="e2e-ingress" + export GINKGO_TEST_ARGS="--ginkgo.focus=\[Feature:Ingress\]" + # TODO: Move this into a different project. Currently, since this test + # shares resources with various other networking tests, so it's easier + # to zero in on the source of a leak if it's run in isolation. + export PROJECT="kubernetes-flannel" - 'gce-flannel': description: 'Run E2E tests on GCE using Flannel and the latest successful build. This suite is quarantined in a dedicated project because Flannel integration is experimental.' timeout: 90 emails: 'beeps@google.com' test-owner: 'beeps' + provider-env: '{gce-provider-env}' + job-env: | + # XXX Not a unique project + export E2E_NAME="e2e-flannel" + export PROJECT="kubernetes-flannel" + export FAIL_ON_GCP_RESOURCE_LEAK="false" + # Override GCE defaults. + export NETWORK_PROVIDER="flannel" - 'gce-es-logging': description: 'Run [Feature:Elasticsearch] tests on GCE using the latest successful build.' timeout: 90 emails: '$DEFAULT_RECIPIENTS, mixia@google.com' test-owner: 'mixia' + provider-env: '{gce-provider-env}' + job-env: | + export PROJECT="kubernetes-es-logging" + export GINKGO_TEST_ARGS="--ginkgo.focus=\[Feature:Elasticsearch\]" + export KUBE_LOGGING_DESTINATION="elasticsearch" jobs: - 'kubernetes-e2e-{suffix}' @@ -294,11 +414,22 @@ - 'aws': description: 'Run e2e tests on AWS using the latest successful Kubernetes build.' timeout: 240 - branch: 'master' + provider-env: '{aws-provider-env}' + job-env: | + export GINKGO_TEST_ARGS="--ginkgo.skip=\[Slow\]|\[Serial\]|\[Disruptive\]|\[Flaky\]|\[Feature:.+\]" + export GINKGO_PARALLEL="y" + export PROJECT="k8s-jkns-e2e-aws" + export AWS_CONFIG_FILE='/var/lib/jenkins/.aws/credentials' + export AWS_SSH_KEY='/var/lib/jenkins/.ssh/kube_aws_rsa' + export KUBE_SSH_USER='ubuntu' + # This is needed to be able to create PD from the e2e test + export AWS_SHARED_CREDENTIALS_FILE='/var/lib/jenkins/.aws/credentials' - 'aws-release-1.1': description: 'Run e2e tests on AWS using the latest successful 1.1 Kubernetes build.' timeout: 240 branch: 'release-1.1' + runner: '{old-runner-1-1}' + post-env: '' jobs: - 'kubernetes-e2e-{suffix}' @@ -345,6 +476,30 @@ timeout: 480 branch: 'master' suffix: 'gce-enormous-cluster' + job-env: | + # XXX Not a unique project + export E2E_NAME="e2e-enormous-cluster" + # TODO: Currently run only density test. + # Once this is stable, run the whole [Performance] suite. + export GINKGO_TEST_ARGS="--ginkgo.focus=starting\s30\spods\sper\snode" + export PROJECT="kubernetes-scale" + export FAIL_ON_GCP_RESOURCE_LEAK="false" + # Override GCE defaults. + export NETWORK_PROVIDER="flannel" + # Temporarily switch of Heapster, as this will not schedule anywhere. + # TODO: Think of a solution to enable it. + export KUBE_ENABLE_CLUSTER_MONITORING="none" + export KUBE_GCE_ZONE="asia-east1-a" + export MASTER_SIZE="n1-standard-32" + export NODE_SIZE="n1-standard-1" + export NODE_DISK_SIZE="50GB" + export NUM_NODES="1000" + export ALLOWED_NOTREADY_NODES="2" + export EXIT_ON_WEAK_ERROR="false" + # Reduce logs verbosity + export TEST_CLUSTER_LOG_LEVEL="--v=1" + # Increase resync period to simulate production + export TEST_CLUSTER_RESYNC_PERIOD="--min-resync-period=12h" jobs: - 'kubernetes-e2e-{suffix}' diff --git a/hack/jenkins/job-configs/kubernetes-kubemark.yaml b/hack/jenkins/job-configs/kubernetes-kubemark.yaml index 760c56ce904..64d90676895 100644 --- a/hack/jenkins/job-configs/kubernetes-kubemark.yaml +++ b/hack/jenkins/job-configs/kubernetes-kubemark.yaml @@ -5,7 +5,10 @@ daysToKeep: 7 builders: - shell: | - curl -fsS --retry 3 "https://raw.githubusercontent.com/kubernetes/kubernetes/master/hack/jenkins/e2e.sh" | bash - + {provider-env} + {job-env} + {post-env} + {runner} properties: - mail-watcher publishers: @@ -31,19 +34,65 @@ - project: name: kubernetes-kubemark + provider-env: '{gce-provider-env}' suffix: - 'gce': - description: 'Continuously run Density test on Kubemark.' + description: 'Run Kubemark test on a fake 100 node cluster to have a comparison to the real results from scalability suite.' timeout: 120 cron-string: '@hourly' + job-env: | + export PROJECT="k8s-jenkins-kubemark" + export E2E_TEST="false" + export USE_KUBEMARK="true" + export KUBEMARK_TESTS="\[Feature:Performance\]" + # Override defaults to be independent from GCE defaults and set kubemark parameters + export NUM_NODES="10" + export MASTER_SIZE="n1-standard-2" + export NODE_SIZE="n1-standard-1" + export KUBE_GCE_ZONE="us-central1-b" + export KUBEMARK_MASTER_SIZE="n1-standard-4" + export KUBEMARK_NUM_NODES="100" - '500-gce': - description: 'Run Density test on Kubemark in a large cluster that we should be able to handle.' + description: 'Run Kubemark test on a fake 500 node cluster to test for regressions on bigger clusters' timeout: 300 cron-string: '@hourly' + job-env: | + # XXX Not a unique project + export E2E_NAME="kubemark-500" + export PROJECT="kubernetes-scale" + export E2E_TEST="false" + export USE_KUBEMARK="true" + export KUBEMARK_TESTS="\[Feature:Performance\]" + export FAIL_ON_GCP_RESOURCE_LEAK="false" + # Override defaults to be independent from GCE defaults and set kubemark parameters + export NUM_NODES="6" + export MASTER_SIZE="n1-standard-4" + export NODE_SIZE="n1-standard-8" + export KUBE_GCE_ZONE="us-east1-b" + export KUBEMARK_MASTER_SIZE="n1-standard-16" + export KUBEMARK_NUM_NODES="500" - 'gce-scale': description: 'Run Density test on Kubemark in very large cluster. Currently only scheduled to run every 6 hours so as not to waste too many resources.' # 12h - load tests take really, really, really long time. timeout: 720 cron-string: 'H H/8 * * *' + job-env: | + # XXX Not a unique project + export E2E_NAME="kubemark-1000" + export PROJECT="kubernetes-scale" + export E2E_TEST="false" + export USE_KUBEMARK="true" + export KUBEMARK_TESTS="\[Feature:Performance\]" + export FAIL_ON_GCP_RESOURCE_LEAK="false" + # Override defaults to be independent from GCE defaults and set kubemark parameters + # We need 11 so that we won't hit max-pods limit (set to 100). TODO: do it in a nicer way. + export NUM_NODES="11" + export MASTER_SIZE="n1-standard-4" + # Note: can fit about 17 hollow nodes per core so NUM_NODES x + # cores_per_node should be set accordingly. + export NODE_SIZE="n1-standard-8" + export KUBEMARK_MASTER_SIZE="n1-standard-16" + export KUBEMARK_NUM_NODES="1000" + export KUBE_GCE_ZONE="us-east1-b" jobs: - 'kubernetes-kubemark-{suffix}' diff --git a/hack/jenkins/job-configs/kubernetes-soak.yaml b/hack/jenkins/job-configs/kubernetes-soak.yaml index 218e3502387..e2340052328 100644 --- a/hack/jenkins/job-configs/kubernetes-soak.yaml +++ b/hack/jenkins/job-configs/kubernetes-soak.yaml @@ -4,7 +4,12 @@ logrotate: daysToKeep: 14 builders: - - shell: curl -fsS --retry 3 "https://raw.githubusercontent.com/kubernetes/kubernetes/{branch}/hack/jenkins/e2e.sh" | bash - + - shell: | + {provider-env} + {soak-deploy} + {job-env} + {post-env} + {runner} properties: - build-blocker: use-build-blocker: true @@ -30,7 +35,12 @@ logrotate: daysToKeep: 7 builders: - - shell: curl -fsS --retry 3 "https://raw.githubusercontent.com/kubernetes/kubernetes/{branch}/hack/jenkins/e2e.sh" | bash - + - shell: | + {provider-env} + {soak-continuous} + {job-env} + {post-env} + {runner} properties: - build-blocker: use-build-blocker: true @@ -56,6 +66,20 @@ - project: name: soak test-owner: 'Build Cop' + soak-deploy: | + export FAIL_ON_GCP_RESOURCE_LEAK="false" + export E2E_TEST="false" + export E2E_DOWN="false" + soak-continuous: | + export FAIL_ON_GCP_RESOURCE_LEAK="false" + export E2E_UP="false" + export E2E_DOWN="false" + # Clear out any orphaned namespaces in case previous run was interrupted. + export E2E_CLEAN_START="true" + # We should be testing the reliability of a long-running cluster. The + # [Disruptive] tests kill/restart components or nodes in the cluster, + # defeating the purpose of a soak cluster. (#15722) + export GINKGO_TEST_ARGS="--ginkgo.skip=\[Disruptive\]|\[Flaky\]|\[Feature:.+\]" suffix: - 'gce': deploy-description: | @@ -70,10 +94,17 @@ builds will be blocked and remain in the queue until the deployment is complete.
branch: 'master' + provider-env: '{gce-provider-env}' + job-env: | + export HAIRPIN_MODE="false" + export PROJECT="k8s-jkns-gce-soak" - 'gce-2': deploy-description: Clone of kubernetes-soak-weekly-deploy-gce. e2e-description: Clone of kubernetes-soak-continuous-e2e-gce. branch: 'master' + provider-env: '{gce-provider-env}' + job-env: | + export PROJECT="k8s-jkns-gce-soak-2" - 'gce-1.1': deploy-description: | Deploy Kubernetes to soak cluster using the latest successful @@ -88,6 +119,10 @@ builds will be blocked and remain in the queue until the deployment is complete.
branch: 'release-1.1' + runner: '{old-runner-1-1}' + post-env: '' + soak-deploy: '' + soak-continuous: '' - 'gke': deploy-description: | Deploy Kubernetes to a GKE soak cluster using the staging GKE @@ -108,6 +143,12 @@ builds will be blocked and remain in the queue until the deployment is complete.
branch: 'master' + provider-env: '{gke-provider-env}' + job-env: | + export PROJECT="k8s-jkns-gke-soak" + # Need at least n1-standard-2 nodes to run kubelet_perf tests + export MACHINE_TYPE="n1-standard-2" + export E2E_OPT="--check_version_skew=false" jobs: - 'kubernetes-soak-weekly-deploy-{suffix}' - 'kubernetes-soak-continuous-e2e-{suffix}'