Merge pull request #22962 from ihmccreery/split-upgrade-jobs

Add configuration for splitting upgrade jobs instead of using Jenkins steps
This commit is contained in:
Isaac Hollander McCreery 2016-04-05 13:05:25 -07:00
commit 9a7860855c
7 changed files with 172 additions and 54 deletions

View File

@ -107,4 +107,5 @@ export PATH=$(dirname "${e2e_test}"):"${PATH}"
${E2E_CLEAN_START:+"--clean-start=true"} \
${E2E_MIN_STARTUP_PODS:+"--minStartupPods=${E2E_MIN_STARTUP_PODS}"} \
${E2E_REPORT_DIR:+"--report-dir=${E2E_REPORT_DIR}"} \
${E2E_REPORT_PREFIX:+"--report-prefix=${E2E_REPORT_PREFIX}"} \
"${@:-}"

View File

@ -152,6 +152,22 @@ if [[ "${JENKINS_USE_TRUSTY_IMAGES:-}" =~ ^[yY]$ ]]; then
export KUBE_OS_DISTRIBUTION="trusty"
fi
function e2e_test() {
local -r ginkgo_test_args="${1}"
# Check to make sure the cluster is up before running tests, and fail if it's not.
go run ./hack/e2e.go ${E2E_OPT:-} -v --isup
# Jenkins will look at the junit*.xml files for test failures, so don't exit with a nonzero
# error code if it was only tests that failed.
go run ./hack/e2e.go ${E2E_OPT:-} -v --test \
${ginkgo_test_args:+--test_args="${ginkgo_test_args}"} \
&& exitcode=0 || exitcode=$?
if [[ "${E2E_PUBLISH_GREEN_VERSION:-}" == "true" && ${exitcode} == 0 ]]; then
# Use plaintext version file packaged with kubernetes.tar.gz
echo "Publish version to ci/latest-green.txt: $(cat version)"
gsutil cp ./version gs://kubernetes-release/ci/latest-green.txt
fi
}
echo "--------------------------------------------------------------------------------"
echo "Test Environment:"
printenv | sort
@ -231,7 +247,7 @@ fi
### Pre Set Up ###
# Install gcloud from a custom path if provided. Used to test GKE with gcloud
# at HEAD, release candidate.
if [[ ! -z "${CLOUDSDK_BUCKET:-}" ]]; then
if [[ -n "${CLOUDSDK_BUCKET:-}" ]]; then
gsutil -mq cp -r "${CLOUDSDK_BUCKET}" ~
rm -rf ~/repo ~/cloudsdk
mv ~/$(basename "${CLOUDSDK_BUCKET}") ~/repo
@ -267,21 +283,28 @@ if [[ "${E2E_UP,,}" == "true" ]]; then
fi
fi
### Run tests ###
# Jenkins will look at the junit*.xml files for test failures, so don't exit
# with a nonzero error code if it was only tests that failed.
if [[ "${E2E_TEST,,}" == "true" ]]; then
# Check to make sure the cluster is up before running tests, and fail if it's not.
go run ./hack/e2e.go ${E2E_OPT:-} -v --isup
go run ./hack/e2e.go ${E2E_OPT:-} -v --test \
${GINKGO_TEST_ARGS:+--test_args="${GINKGO_TEST_ARGS}"} \
&& exitcode=0 || exitcode=$?
if [[ "${E2E_PUBLISH_GREEN_VERSION:-}" == "true" && ${exitcode} == 0 && -n ${build_version:-} ]]; then
echo "Publish build_version to ci/latest-green.txt: ${build_version}"
gsutil cp ./version gs://kubernetes-release/ci/latest-green.txt
# Allow download & unpack of alternate version of tests, for cross-version & upgrade testing.
if [[ -n "${JENKINS_PUBLISHED_TEST_VERSION:-}" ]]; then
cd ..
mv kubernetes kubernetes_old
fetch_published_version_tars "${JENKINS_PUBLISHED_TEST_VERSION}"
cd kubernetes
# Upgrade the cluster before running other tests
if [[ "${E2E_UPGRADE_TEST,,}" == "true" ]]; then
# Add a report prefix for the e2e tests so that the tests don't get overwritten when we run
# the rest of the e2es.
E2E_REPORT_PREFIX='upgrade' e2e_test "${GINKGO_UPGRADE_TEST_ARGS:-}"
# If JENKINS_USE_OLD_TESTS is set, back out into the old tests now that we've upgraded.
if [[ "${JENKINS_USE_OLD_TESTS:-}" == "true" ]]; then
cd ../kubernetes_old
fi
fi
fi
if [[ "${E2E_TEST,,}" == "true" ]]; then
e2e_test "${GINKGO_TEST_ARGS:-}"
fi
### Start Kubemark ###
if [[ "${USE_KUBEMARK:-}" == "true" ]]; then
export RUN_FROM_DISTRO=true

View File

@ -572,7 +572,6 @@
jobs:
- 'kubernetes-e2e-{suffix}'
# ==============================================================================
# NOTE: From here on all jobs use Trusty as the image for master and/or nodes.
# Please add templates/groups/projects/jobs that use ContainerVm above/below
# this section (search "End of Trusty jobs" for the ending separator).
@ -842,4 +841,53 @@
export KUBE_OS_DISTRIBUTION="trusty"
jobs:
- 'kubernetes-e2e-{suffix}'
#============================== End of Trusty jobs =============================
# End of Trusty jobs
- project:
name: kubernetes-e2e-gke-upgrades-experimental
trigger-job: 'kubernetes-build'
test-owner: 'ihmccreery'
emails: 'ihmccreery@google.com'
provider-env: '{gke-provider-env}'
suffix:
- 'gke-kubectl-skew-1.1-1.2':
description: 'Deploys a cluster at v1.1 and runs the v1.2 Kubectl tests.'
timeout: 30
job-env: |
export PROJECT="kube-jks-gke-upg-experimental"
export E2E_NAME="gke-ctl-skew-1-1-1-2"
export JENKINS_PUBLISHED_TEST_VERSION="ci/latest-1.2"
export GINKGO_TEST_ARGS="--ginkgo.focus=Kubectl"
export GINKGO_PARALLEL="y"
- 'gke-upgrade-master-1.1-1.2':
description: 'Deploys a cluster at v1.1, upgrades its master to v1.2, and runs v1.1 tests against it.'
timeout: 30
job-env: |
export PROJECT="kube-jks-gke-upg-experimental"
export E2E_NAME="gke-upg-mas-1-1-1-2"
export JENKINS_PUBLISHED_TEST_VERSION="ci/latest-1.2"
export E2E_UPGRADE_TEST="true"
export GINKGO_UPGRADE_TEST_ARGS="--ginkgo.focus=\[Feature:MasterUpgrade\] --upgrade-target=ci/latest-1.2"
export JENKINS_USE_OLD_TESTS="true"
- 'gke-upgrade-cluster-1.1-1.2':
description: 'Deploys a cluster at v1.1, upgrades the cluster to v1.2, and runs v1.1 tests against it.'
timeout: 30
job-env: |
export PROJECT="kube-jks-gke-upg-experimental"
export E2E_NAME="gke-upg-clu-1-1-1-2"
export JENKINS_PUBLISHED_TEST_VERSION="ci/latest-1.2"
export E2E_UPGRADE_TEST="true"
export GINKGO_UPGRADE_TEST_ARGS="--ginkgo.focus=\[Feature:ClusterUpgrade\] --upgrade-target=ci/latest-1.2"
export JENKINS_USE_OLD_TESTS="true"
- 'gke-upgrade-cluster-new-1.1-1.2':
description: 'Deploys a cluster at v1.1, upgrades the cluster to v1.2, and runs v1.2 tests against it.'
timeout: 30
job-env: |
export PROJECT="kube-jks-gke-upg-experimental"
export E2E_NAME="gke-upg-clu-new-1-1-1-2"
export JENKINS_PUBLISHED_TEST_VERSION="ci/latest-1.2"
export E2E_UPGRADE_TEST="true"
export GINKGO_UPGRADE_TEST_ARGS="--ginkgo.focus=\[Feature:ClusterUpgrade\] --upgrade-target=ci/latest-1.2"
jobs:
- 'kubernetes-e2e-{suffix}'

View File

@ -334,6 +334,7 @@ replicaset-lookup-cache-size
replication-controller-lookup-cache-size
repo-root
report-dir
report-prefix
required-contexts
resolv-conf
resource-container

View File

@ -110,20 +110,52 @@ func nodeUpgradeGCE(rawV string) error {
// TODO(ihmccreery) This code path should be identical to how a user
// would trigger a node update; right now it's very different.
v := "v" + rawV
Logf("Getting the node template before the upgrade")
tmplBefore, err := migTemplate()
if err != nil {
return fmt.Errorf("error getting the node template before the upgrade: %v", err)
}
Logf("Preparing node upgrade by creating new instance template for %q", v)
stdout, _, err := runCmd(path.Join(testContext.RepoRoot, "cluster/gce/upgrade.sh"), "-P", v)
if err != nil {
return err
cleanupNodeUpgradeGCE(tmplBefore)
return fmt.Errorf("error preparing node upgrade: %v", err)
}
tmpl := strings.TrimSpace(stdout)
Logf("Performing a node upgrade to %q; waiting at most %v per node", tmpl, restartPerNodeTimeout)
if err := migRollingUpdate(tmpl, restartPerNodeTimeout); err != nil {
cleanupNodeUpgradeGCE(tmplBefore)
return fmt.Errorf("error doing node upgrade via a migRollingUpdate to %s: %v", tmpl, err)
}
return nil
}
func cleanupNodeUpgradeGCE(tmplBefore string) {
Logf("Cleaning up any unused node templates")
tmplAfter, err := migTemplate()
if err != nil {
Logf("Could not get node template post-upgrade; may have leaked template %s", tmplBefore)
return
}
if tmplBefore == tmplAfter {
// The node upgrade failed so there's no need to delete
// anything.
Logf("Node template %s is still in use; not cleaning up", tmplBefore)
return
}
Logf("Deleting node template %s", tmplBefore)
if _, _, err := retryCmd("gcloud", "compute", "instance-templates",
fmt.Sprintf("--project=%s", testContext.CloudConfig.ProjectID),
"delete",
tmplBefore); err != nil {
Logf("gcloud compute instance-templates delete %s call failed with err: %v", tmplBefore, err)
Logf("May have leaked instance template %q", tmplBefore)
}
}
func nodeUpgradeGKE(v string) error {
Logf("Upgrading nodes to %q", v)
_, _, err := runCmd("gcloud", "container",
@ -204,42 +236,6 @@ var _ = KubeDescribe("Upgrade [Feature:Upgrade]", func() {
})
KubeDescribe("node upgrade", func() {
var tmplBefore, tmplAfter string
BeforeEach(func() {
if providerIs("gce") {
By("Getting the node template before the upgrade")
var err error
tmplBefore, err = migTemplate()
expectNoError(err)
}
})
AfterEach(func() {
if providerIs("gce") {
By("Cleaning up any unused node templates")
var err error
tmplAfter, err = migTemplate()
if err != nil {
Logf("Could not get node template post-upgrade; may have leaked template %s", tmplBefore)
return
}
if tmplBefore == tmplAfter {
// The node upgrade failed so there's no need to delete
// anything.
Logf("Node template %s is still in use; not cleaning up", tmplBefore)
return
}
Logf("Deleting node template %s", tmplBefore)
if _, _, err := retryCmd("gcloud", "compute", "instance-templates",
fmt.Sprintf("--project=%s", testContext.CloudConfig.ProjectID),
"delete",
tmplBefore); err != nil {
Logf("gcloud compute instance-templates delete %s call failed with err: %v", tmplBefore, err)
Logf("May have leaked instance template %q", tmplBefore)
}
}
})
It("should maintain a functioning cluster [Feature:NodeUpgrade]", func() {
By("Validating cluster before node upgrade")
expectNoError(validate(f, svcName, rcName, ingress, replicas))
@ -267,6 +263,53 @@ var _ = KubeDescribe("Upgrade [Feature:Upgrade]", func() {
expectNoError(validate(f, svcName, rcName, ingress, replicas))
})
})
KubeDescribe("cluster upgrade", func() {
It("should maintain responsive services [Feature:ClusterUpgrade]", func() {
By("Validating cluster before master upgrade")
expectNoError(validate(f, svcName, rcName, ingress, replicas))
By("Performing a master upgrade")
testUpgrade(ip, v, masterUpgrade)
By("Checking master version")
expectNoError(checkMasterVersion(f.Client, v))
By("Validating cluster after master upgrade")
expectNoError(validate(f, svcName, rcName, ingress, replicas))
By("Validating cluster before node upgrade")
expectNoError(validate(f, svcName, rcName, ingress, replicas))
By("Performing a node upgrade")
// Circumnavigate testUpgrade, since services don't necessarily stay up.
Logf("Starting upgrade")
expectNoError(nodeUpgrade(f, replicas, v))
Logf("Upgrade complete")
By("Checking node versions")
expectNoError(checkNodesVersions(f.Client, v))
By("Validating cluster after node upgrade")
expectNoError(validate(f, svcName, rcName, ingress, replicas))
})
It("should maintain responsive services [Feature:ExperimentalClusterUpgrade]", func() {
By("Validating cluster before master upgrade")
expectNoError(validate(f, svcName, rcName, ingress, replicas))
By("Performing a master upgrade")
testUpgrade(ip, v, masterUpgrade)
By("Checking master version")
expectNoError(checkMasterVersion(f.Client, v))
By("Validating cluster after master upgrade")
expectNoError(validate(f, svcName, rcName, ingress, replicas))
By("Validating cluster before node upgrade")
expectNoError(validate(f, svcName, rcName, ingress, replicas))
By("Performing a node upgrade")
testUpgrade(ip, v, func(v string) error {
return nodeUpgrade(f, replicas, v)
})
By("Checking node versions")
expectNoError(checkNodesVersions(f.Client, v))
By("Validating cluster after node upgrade")
expectNoError(validate(f, svcName, rcName, ingress, replicas))
})
})
})
func testUpgrade(ip, v string, upF func(v string) error) {
@ -437,7 +480,7 @@ func migRollingUpdate(tmpl string, nt time.Duration) error {
return nil
}
// migTemplate (GCE/GKE-only) returns the name of the MIG template that the
// migTemplate (GCE-only) returns the name of the MIG template that the
// nodes of the cluster use.
func migTemplate() (string, error) {
var errLast error

View File

@ -71,6 +71,7 @@ func RegisterFlags() {
flag.StringVar(&testContext.KubectlPath, "kubectl-path", "kubectl", "The kubectl binary to use. For development, you might use 'cluster/kubectl.sh' here.")
flag.StringVar(&testContext.OutputDir, "e2e-output-dir", "/tmp", "Output directory for interesting/useful test data, like performance data, benchmarks, and other metrics.")
flag.StringVar(&testContext.ReportDir, "report-dir", "", "Path to the directory where the JUnit XML reports should be saved. Default is empty, which doesn't generate these reports.")
flag.StringVar(&testContext.ReportPrefix, "report-prefix", "", "Optional prefix for JUnit XML reports. Default is empty, which doesn't prepend anything to the default name.")
flag.StringVar(&testContext.prefix, "prefix", "e2e", "A prefix to be added to cloud resources created during testing.")
flag.StringVar(&testContext.OSDistro, "os-distro", "debian", "The OS distribution of cluster VM instances (debian, trusty, or coreos).")
@ -269,7 +270,7 @@ func RunE2ETests(t *testing.T) {
if err := os.MkdirAll(testContext.ReportDir, 0755); err != nil {
glog.Errorf("Failed creating report directory: %v", err)
} else {
r = append(r, reporters.NewJUnitReporter(path.Join(testContext.ReportDir, fmt.Sprintf("junit_%02d.xml", config.GinkgoConfig.ParallelNode))))
r = append(r, reporters.NewJUnitReporter(path.Join(testContext.ReportDir, fmt.Sprintf("junit_%v%02d.xml", testContext.ReportPrefix, config.GinkgoConfig.ParallelNode))))
}
}
glog.Infof("Starting e2e run %q on Ginkgo node %d", runId, config.GinkgoConfig.ParallelNode)

View File

@ -186,6 +186,7 @@ type TestContextType struct {
KubectlPath string
OutputDir string
ReportDir string
ReportPrefix string
prefix string
MinStartupPods int
UpgradeTarget string