Merge pull request #22962 from ihmccreery/split-upgrade-jobs

Add configuration for splitting upgrade jobs instead of using Jenkins steps
This commit is contained in:
Isaac Hollander McCreery 2016-04-05 13:05:25 -07:00
commit 9a7860855c
7 changed files with 172 additions and 54 deletions

View File

@ -107,4 +107,5 @@ export PATH=$(dirname "${e2e_test}"):"${PATH}"
${E2E_CLEAN_START:+"--clean-start=true"} \ ${E2E_CLEAN_START:+"--clean-start=true"} \
${E2E_MIN_STARTUP_PODS:+"--minStartupPods=${E2E_MIN_STARTUP_PODS}"} \ ${E2E_MIN_STARTUP_PODS:+"--minStartupPods=${E2E_MIN_STARTUP_PODS}"} \
${E2E_REPORT_DIR:+"--report-dir=${E2E_REPORT_DIR}"} \ ${E2E_REPORT_DIR:+"--report-dir=${E2E_REPORT_DIR}"} \
${E2E_REPORT_PREFIX:+"--report-prefix=${E2E_REPORT_PREFIX}"} \
"${@:-}" "${@:-}"

View File

@ -152,6 +152,22 @@ if [[ "${JENKINS_USE_TRUSTY_IMAGES:-}" =~ ^[yY]$ ]]; then
export KUBE_OS_DISTRIBUTION="trusty" export KUBE_OS_DISTRIBUTION="trusty"
fi fi
function e2e_test() {
local -r ginkgo_test_args="${1}"
# Check to make sure the cluster is up before running tests, and fail if it's not.
go run ./hack/e2e.go ${E2E_OPT:-} -v --isup
# Jenkins will look at the junit*.xml files for test failures, so don't exit with a nonzero
# error code if it was only tests that failed.
go run ./hack/e2e.go ${E2E_OPT:-} -v --test \
${ginkgo_test_args:+--test_args="${ginkgo_test_args}"} \
&& exitcode=0 || exitcode=$?
if [[ "${E2E_PUBLISH_GREEN_VERSION:-}" == "true" && ${exitcode} == 0 ]]; then
# Use plaintext version file packaged with kubernetes.tar.gz
echo "Publish version to ci/latest-green.txt: $(cat version)"
gsutil cp ./version gs://kubernetes-release/ci/latest-green.txt
fi
}
echo "--------------------------------------------------------------------------------" echo "--------------------------------------------------------------------------------"
echo "Test Environment:" echo "Test Environment:"
printenv | sort printenv | sort
@ -231,7 +247,7 @@ fi
### Pre Set Up ### ### Pre Set Up ###
# Install gcloud from a custom path if provided. Used to test GKE with gcloud # Install gcloud from a custom path if provided. Used to test GKE with gcloud
# at HEAD, release candidate. # at HEAD, release candidate.
if [[ ! -z "${CLOUDSDK_BUCKET:-}" ]]; then if [[ -n "${CLOUDSDK_BUCKET:-}" ]]; then
gsutil -mq cp -r "${CLOUDSDK_BUCKET}" ~ gsutil -mq cp -r "${CLOUDSDK_BUCKET}" ~
rm -rf ~/repo ~/cloudsdk rm -rf ~/repo ~/cloudsdk
mv ~/$(basename "${CLOUDSDK_BUCKET}") ~/repo mv ~/$(basename "${CLOUDSDK_BUCKET}") ~/repo
@ -267,19 +283,26 @@ if [[ "${E2E_UP,,}" == "true" ]]; then
fi fi
fi fi
### Run tests ### # Allow download & unpack of alternate version of tests, for cross-version & upgrade testing.
# Jenkins will look at the junit*.xml files for test failures, so don't exit if [[ -n "${JENKINS_PUBLISHED_TEST_VERSION:-}" ]]; then
# with a nonzero error code if it was only tests that failed. cd ..
if [[ "${E2E_TEST,,}" == "true" ]]; then mv kubernetes kubernetes_old
# Check to make sure the cluster is up before running tests, and fail if it's not. fetch_published_version_tars "${JENKINS_PUBLISHED_TEST_VERSION}"
go run ./hack/e2e.go ${E2E_OPT:-} -v --isup cd kubernetes
go run ./hack/e2e.go ${E2E_OPT:-} -v --test \ # Upgrade the cluster before running other tests
${GINKGO_TEST_ARGS:+--test_args="${GINKGO_TEST_ARGS}"} \ if [[ "${E2E_UPGRADE_TEST,,}" == "true" ]]; then
&& exitcode=0 || exitcode=$? # Add a report prefix for the e2e tests so that the tests don't get overwritten when we run
if [[ "${E2E_PUBLISH_GREEN_VERSION:-}" == "true" && ${exitcode} == 0 && -n ${build_version:-} ]]; then # the rest of the e2es.
echo "Publish build_version to ci/latest-green.txt: ${build_version}" E2E_REPORT_PREFIX='upgrade' e2e_test "${GINKGO_UPGRADE_TEST_ARGS:-}"
gsutil cp ./version gs://kubernetes-release/ci/latest-green.txt # If JENKINS_USE_OLD_TESTS is set, back out into the old tests now that we've upgraded.
if [[ "${JENKINS_USE_OLD_TESTS:-}" == "true" ]]; then
cd ../kubernetes_old
fi fi
fi
fi
if [[ "${E2E_TEST,,}" == "true" ]]; then
e2e_test "${GINKGO_TEST_ARGS:-}"
fi fi
### Start Kubemark ### ### Start Kubemark ###

View File

@ -572,7 +572,6 @@
jobs: jobs:
- 'kubernetes-e2e-{suffix}' - 'kubernetes-e2e-{suffix}'
# ==============================================================================
# NOTE: From here on all jobs use Trusty as the image for master and/or nodes. # NOTE: From here on all jobs use Trusty as the image for master and/or nodes.
# Please add templates/groups/projects/jobs that use ContainerVm above/below # Please add templates/groups/projects/jobs that use ContainerVm above/below
# this section (search "End of Trusty jobs" for the ending separator). # this section (search "End of Trusty jobs" for the ending separator).
@ -842,4 +841,53 @@
export KUBE_OS_DISTRIBUTION="trusty" export KUBE_OS_DISTRIBUTION="trusty"
jobs: jobs:
- 'kubernetes-e2e-{suffix}' - 'kubernetes-e2e-{suffix}'
#============================== End of Trusty jobs =============================
# End of Trusty jobs
- project:
name: kubernetes-e2e-gke-upgrades-experimental
trigger-job: 'kubernetes-build'
test-owner: 'ihmccreery'
emails: 'ihmccreery@google.com'
provider-env: '{gke-provider-env}'
suffix:
- 'gke-kubectl-skew-1.1-1.2':
description: 'Deploys a cluster at v1.1 and runs the v1.2 Kubectl tests.'
timeout: 30
job-env: |
export PROJECT="kube-jks-gke-upg-experimental"
export E2E_NAME="gke-ctl-skew-1-1-1-2"
export JENKINS_PUBLISHED_TEST_VERSION="ci/latest-1.2"
export GINKGO_TEST_ARGS="--ginkgo.focus=Kubectl"
export GINKGO_PARALLEL="y"
- 'gke-upgrade-master-1.1-1.2':
description: 'Deploys a cluster at v1.1, upgrades its master to v1.2, and runs v1.1 tests against it.'
timeout: 30
job-env: |
export PROJECT="kube-jks-gke-upg-experimental"
export E2E_NAME="gke-upg-mas-1-1-1-2"
export JENKINS_PUBLISHED_TEST_VERSION="ci/latest-1.2"
export E2E_UPGRADE_TEST="true"
export GINKGO_UPGRADE_TEST_ARGS="--ginkgo.focus=\[Feature:MasterUpgrade\] --upgrade-target=ci/latest-1.2"
export JENKINS_USE_OLD_TESTS="true"
- 'gke-upgrade-cluster-1.1-1.2':
description: 'Deploys a cluster at v1.1, upgrades the cluster to v1.2, and runs v1.1 tests against it.'
timeout: 30
job-env: |
export PROJECT="kube-jks-gke-upg-experimental"
export E2E_NAME="gke-upg-clu-1-1-1-2"
export JENKINS_PUBLISHED_TEST_VERSION="ci/latest-1.2"
export E2E_UPGRADE_TEST="true"
export GINKGO_UPGRADE_TEST_ARGS="--ginkgo.focus=\[Feature:ClusterUpgrade\] --upgrade-target=ci/latest-1.2"
export JENKINS_USE_OLD_TESTS="true"
- 'gke-upgrade-cluster-new-1.1-1.2':
description: 'Deploys a cluster at v1.1, upgrades the cluster to v1.2, and runs v1.2 tests against it.'
timeout: 30
job-env: |
export PROJECT="kube-jks-gke-upg-experimental"
export E2E_NAME="gke-upg-clu-new-1-1-1-2"
export JENKINS_PUBLISHED_TEST_VERSION="ci/latest-1.2"
export E2E_UPGRADE_TEST="true"
export GINKGO_UPGRADE_TEST_ARGS="--ginkgo.focus=\[Feature:ClusterUpgrade\] --upgrade-target=ci/latest-1.2"
jobs:
- 'kubernetes-e2e-{suffix}'

View File

@ -334,6 +334,7 @@ replicaset-lookup-cache-size
replication-controller-lookup-cache-size replication-controller-lookup-cache-size
repo-root repo-root
report-dir report-dir
report-prefix
required-contexts required-contexts
resolv-conf resolv-conf
resource-container resource-container

View File

@ -110,20 +110,52 @@ func nodeUpgradeGCE(rawV string) error {
// TODO(ihmccreery) This code path should be identical to how a user // TODO(ihmccreery) This code path should be identical to how a user
// would trigger a node update; right now it's very different. // would trigger a node update; right now it's very different.
v := "v" + rawV v := "v" + rawV
Logf("Getting the node template before the upgrade")
tmplBefore, err := migTemplate()
if err != nil {
return fmt.Errorf("error getting the node template before the upgrade: %v", err)
}
Logf("Preparing node upgrade by creating new instance template for %q", v) Logf("Preparing node upgrade by creating new instance template for %q", v)
stdout, _, err := runCmd(path.Join(testContext.RepoRoot, "cluster/gce/upgrade.sh"), "-P", v) stdout, _, err := runCmd(path.Join(testContext.RepoRoot, "cluster/gce/upgrade.sh"), "-P", v)
if err != nil { if err != nil {
return err cleanupNodeUpgradeGCE(tmplBefore)
return fmt.Errorf("error preparing node upgrade: %v", err)
} }
tmpl := strings.TrimSpace(stdout) tmpl := strings.TrimSpace(stdout)
Logf("Performing a node upgrade to %q; waiting at most %v per node", tmpl, restartPerNodeTimeout) Logf("Performing a node upgrade to %q; waiting at most %v per node", tmpl, restartPerNodeTimeout)
if err := migRollingUpdate(tmpl, restartPerNodeTimeout); err != nil { if err := migRollingUpdate(tmpl, restartPerNodeTimeout); err != nil {
cleanupNodeUpgradeGCE(tmplBefore)
return fmt.Errorf("error doing node upgrade via a migRollingUpdate to %s: %v", tmpl, err) return fmt.Errorf("error doing node upgrade via a migRollingUpdate to %s: %v", tmpl, err)
} }
return nil return nil
} }
func cleanupNodeUpgradeGCE(tmplBefore string) {
Logf("Cleaning up any unused node templates")
tmplAfter, err := migTemplate()
if err != nil {
Logf("Could not get node template post-upgrade; may have leaked template %s", tmplBefore)
return
}
if tmplBefore == tmplAfter {
// The node upgrade failed so there's no need to delete
// anything.
Logf("Node template %s is still in use; not cleaning up", tmplBefore)
return
}
Logf("Deleting node template %s", tmplBefore)
if _, _, err := retryCmd("gcloud", "compute", "instance-templates",
fmt.Sprintf("--project=%s", testContext.CloudConfig.ProjectID),
"delete",
tmplBefore); err != nil {
Logf("gcloud compute instance-templates delete %s call failed with err: %v", tmplBefore, err)
Logf("May have leaked instance template %q", tmplBefore)
}
}
func nodeUpgradeGKE(v string) error { func nodeUpgradeGKE(v string) error {
Logf("Upgrading nodes to %q", v) Logf("Upgrading nodes to %q", v)
_, _, err := runCmd("gcloud", "container", _, _, err := runCmd("gcloud", "container",
@ -204,42 +236,6 @@ var _ = KubeDescribe("Upgrade [Feature:Upgrade]", func() {
}) })
KubeDescribe("node upgrade", func() { KubeDescribe("node upgrade", func() {
var tmplBefore, tmplAfter string
BeforeEach(func() {
if providerIs("gce") {
By("Getting the node template before the upgrade")
var err error
tmplBefore, err = migTemplate()
expectNoError(err)
}
})
AfterEach(func() {
if providerIs("gce") {
By("Cleaning up any unused node templates")
var err error
tmplAfter, err = migTemplate()
if err != nil {
Logf("Could not get node template post-upgrade; may have leaked template %s", tmplBefore)
return
}
if tmplBefore == tmplAfter {
// The node upgrade failed so there's no need to delete
// anything.
Logf("Node template %s is still in use; not cleaning up", tmplBefore)
return
}
Logf("Deleting node template %s", tmplBefore)
if _, _, err := retryCmd("gcloud", "compute", "instance-templates",
fmt.Sprintf("--project=%s", testContext.CloudConfig.ProjectID),
"delete",
tmplBefore); err != nil {
Logf("gcloud compute instance-templates delete %s call failed with err: %v", tmplBefore, err)
Logf("May have leaked instance template %q", tmplBefore)
}
}
})
It("should maintain a functioning cluster [Feature:NodeUpgrade]", func() { It("should maintain a functioning cluster [Feature:NodeUpgrade]", func() {
By("Validating cluster before node upgrade") By("Validating cluster before node upgrade")
expectNoError(validate(f, svcName, rcName, ingress, replicas)) expectNoError(validate(f, svcName, rcName, ingress, replicas))
@ -267,6 +263,53 @@ var _ = KubeDescribe("Upgrade [Feature:Upgrade]", func() {
expectNoError(validate(f, svcName, rcName, ingress, replicas)) expectNoError(validate(f, svcName, rcName, ingress, replicas))
}) })
}) })
KubeDescribe("cluster upgrade", func() {
It("should maintain responsive services [Feature:ClusterUpgrade]", func() {
By("Validating cluster before master upgrade")
expectNoError(validate(f, svcName, rcName, ingress, replicas))
By("Performing a master upgrade")
testUpgrade(ip, v, masterUpgrade)
By("Checking master version")
expectNoError(checkMasterVersion(f.Client, v))
By("Validating cluster after master upgrade")
expectNoError(validate(f, svcName, rcName, ingress, replicas))
By("Validating cluster before node upgrade")
expectNoError(validate(f, svcName, rcName, ingress, replicas))
By("Performing a node upgrade")
// Circumnavigate testUpgrade, since services don't necessarily stay up.
Logf("Starting upgrade")
expectNoError(nodeUpgrade(f, replicas, v))
Logf("Upgrade complete")
By("Checking node versions")
expectNoError(checkNodesVersions(f.Client, v))
By("Validating cluster after node upgrade")
expectNoError(validate(f, svcName, rcName, ingress, replicas))
})
It("should maintain responsive services [Feature:ExperimentalClusterUpgrade]", func() {
By("Validating cluster before master upgrade")
expectNoError(validate(f, svcName, rcName, ingress, replicas))
By("Performing a master upgrade")
testUpgrade(ip, v, masterUpgrade)
By("Checking master version")
expectNoError(checkMasterVersion(f.Client, v))
By("Validating cluster after master upgrade")
expectNoError(validate(f, svcName, rcName, ingress, replicas))
By("Validating cluster before node upgrade")
expectNoError(validate(f, svcName, rcName, ingress, replicas))
By("Performing a node upgrade")
testUpgrade(ip, v, func(v string) error {
return nodeUpgrade(f, replicas, v)
})
By("Checking node versions")
expectNoError(checkNodesVersions(f.Client, v))
By("Validating cluster after node upgrade")
expectNoError(validate(f, svcName, rcName, ingress, replicas))
})
})
}) })
func testUpgrade(ip, v string, upF func(v string) error) { func testUpgrade(ip, v string, upF func(v string) error) {
@ -437,7 +480,7 @@ func migRollingUpdate(tmpl string, nt time.Duration) error {
return nil return nil
} }
// migTemplate (GCE/GKE-only) returns the name of the MIG template that the // migTemplate (GCE-only) returns the name of the MIG template that the
// nodes of the cluster use. // nodes of the cluster use.
func migTemplate() (string, error) { func migTemplate() (string, error) {
var errLast error var errLast error

View File

@ -71,6 +71,7 @@ func RegisterFlags() {
flag.StringVar(&testContext.KubectlPath, "kubectl-path", "kubectl", "The kubectl binary to use. For development, you might use 'cluster/kubectl.sh' here.") flag.StringVar(&testContext.KubectlPath, "kubectl-path", "kubectl", "The kubectl binary to use. For development, you might use 'cluster/kubectl.sh' here.")
flag.StringVar(&testContext.OutputDir, "e2e-output-dir", "/tmp", "Output directory for interesting/useful test data, like performance data, benchmarks, and other metrics.") flag.StringVar(&testContext.OutputDir, "e2e-output-dir", "/tmp", "Output directory for interesting/useful test data, like performance data, benchmarks, and other metrics.")
flag.StringVar(&testContext.ReportDir, "report-dir", "", "Path to the directory where the JUnit XML reports should be saved. Default is empty, which doesn't generate these reports.") flag.StringVar(&testContext.ReportDir, "report-dir", "", "Path to the directory where the JUnit XML reports should be saved. Default is empty, which doesn't generate these reports.")
flag.StringVar(&testContext.ReportPrefix, "report-prefix", "", "Optional prefix for JUnit XML reports. Default is empty, which doesn't prepend anything to the default name.")
flag.StringVar(&testContext.prefix, "prefix", "e2e", "A prefix to be added to cloud resources created during testing.") flag.StringVar(&testContext.prefix, "prefix", "e2e", "A prefix to be added to cloud resources created during testing.")
flag.StringVar(&testContext.OSDistro, "os-distro", "debian", "The OS distribution of cluster VM instances (debian, trusty, or coreos).") flag.StringVar(&testContext.OSDistro, "os-distro", "debian", "The OS distribution of cluster VM instances (debian, trusty, or coreos).")
@ -269,7 +270,7 @@ func RunE2ETests(t *testing.T) {
if err := os.MkdirAll(testContext.ReportDir, 0755); err != nil { if err := os.MkdirAll(testContext.ReportDir, 0755); err != nil {
glog.Errorf("Failed creating report directory: %v", err) glog.Errorf("Failed creating report directory: %v", err)
} else { } else {
r = append(r, reporters.NewJUnitReporter(path.Join(testContext.ReportDir, fmt.Sprintf("junit_%02d.xml", config.GinkgoConfig.ParallelNode)))) r = append(r, reporters.NewJUnitReporter(path.Join(testContext.ReportDir, fmt.Sprintf("junit_%v%02d.xml", testContext.ReportPrefix, config.GinkgoConfig.ParallelNode))))
} }
} }
glog.Infof("Starting e2e run %q on Ginkgo node %d", runId, config.GinkgoConfig.ParallelNode) glog.Infof("Starting e2e run %q on Ginkgo node %d", runId, config.GinkgoConfig.ParallelNode)

View File

@ -186,6 +186,7 @@ type TestContextType struct {
KubectlPath string KubectlPath string
OutputDir string OutputDir string
ReportDir string ReportDir string
ReportPrefix string
prefix string prefix string
MinStartupPods int MinStartupPods int
UpgradeTarget string UpgradeTarget string