diff --git a/test/e2e/cloud/gcp/cluster_upgrade.go b/test/e2e/cloud/gcp/cluster_upgrade.go index bda0ac5989d..584e89972f9 100644 --- a/test/e2e/cloud/gcp/cluster_upgrade.go +++ b/test/e2e/cloud/gcp/cluster_upgrade.go @@ -17,27 +17,15 @@ limitations under the License. package gcp import ( - "encoding/xml" "fmt" - "os" - "os/exec" "path" - "path/filepath" - "regexp" "strings" - "sync" "time" utilversion "k8s.io/apimachinery/pkg/util/version" - "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/apimachinery/pkg/version" "k8s.io/client-go/discovery" - clientset "k8s.io/client-go/kubernetes" - "k8s.io/kubernetes/test/e2e/chaosmonkey" "k8s.io/kubernetes/test/e2e/framework" e2econfig "k8s.io/kubernetes/test/e2e/framework/config" - e2eginkgowrapper "k8s.io/kubernetes/test/e2e/framework/ginkgowrapper" - e2enode "k8s.io/kubernetes/test/e2e/framework/node" e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" "k8s.io/kubernetes/test/e2e/upgrades" "k8s.io/kubernetes/test/e2e/upgrades/apps" @@ -97,78 +85,10 @@ var serviceaccountAdmissionControllerMigrationTests = []upgrades.Test{ &auth.ServiceAccountAdmissionControllerMigrationTest{}, } -// masterUpgrade upgrades master node on GCE/GKE. -func masterUpgrade(f *framework.Framework, v string, extraEnvs []string) error { - switch framework.TestContext.Provider { - case "gce": - return masterUpgradeGCE(v, extraEnvs) - case "gke": - return framework.MasterUpgradeGKE(f.Namespace.Name, v) - default: - return fmt.Errorf("masterUpgrade() is not implemented for provider %s", framework.TestContext.Provider) - } -} - func kubeProxyDaemonSetExtraEnvs(enableKubeProxyDaemonSet bool) []string { return []string{fmt.Sprintf("KUBE_PROXY_DAEMONSET=%v", enableKubeProxyDaemonSet)} } -func masterUpgradeGCE(rawV string, extraEnvs []string) error { - env := append(os.Environ(), extraEnvs...) - // TODO: Remove these variables when they're no longer needed for downgrades. - if framework.TestContext.EtcdUpgradeVersion != "" && framework.TestContext.EtcdUpgradeStorage != "" { - env = append(env, - "TEST_ETCD_VERSION="+framework.TestContext.EtcdUpgradeVersion, - "STORAGE_BACKEND="+framework.TestContext.EtcdUpgradeStorage, - "TEST_ETCD_IMAGE="+etcdImage) - } else { - // In e2e tests, we skip the confirmation prompt about - // implicit etcd upgrades to simulate the user entering "y". - env = append(env, "TEST_ALLOW_IMPLICIT_ETCD_UPGRADE=true") - } - - v := "v" + rawV - _, _, err := framework.RunCmdEnv(env, framework.GCEUpgradeScript(), "-M", v) - return err -} - -func masterUpgradeFunc(f *framework.Framework, upgCtx *upgrades.UpgradeContext, testCase *junit.TestCase, masterExtraEnvs []string) func() { - return func() { - start := time.Now() - defer finalizeUpgradeTest(start, testCase) - target := upgCtx.Versions[1].Version.String() - framework.ExpectNoError(masterUpgrade(f, target, masterExtraEnvs)) - framework.ExpectNoError(checkMasterVersion(f.ClientSet, target)) - } -} - -func clusterUpgradeFunc(f *framework.Framework, upgCtx *upgrades.UpgradeContext, testCase *junit.TestCase, masterExtraEnvs, nodeExtraEnvs []string) func() { - return func() { - start := time.Now() - defer finalizeUpgradeTest(start, testCase) - target := upgCtx.Versions[1].Version.String() - image := upgCtx.Versions[1].NodeImage - framework.ExpectNoError(masterUpgrade(f, target, masterExtraEnvs)) - framework.ExpectNoError(checkMasterVersion(f.ClientSet, target)) - framework.ExpectNoError(nodeUpgrade(f, target, image, nodeExtraEnvs)) - framework.ExpectNoError(checkNodesVersions(f.ClientSet, target)) - } -} - -func clusterDowngradeFunc(f *framework.Framework, upgCtx *upgrades.UpgradeContext, testCase *junit.TestCase, masterExtraEnvs, nodeExtraEnvs []string) func() { - return func() { - start := time.Now() - defer finalizeUpgradeTest(start, testCase) - target := upgCtx.Versions[1].Version.String() - image := upgCtx.Versions[1].NodeImage - // Yes this really is a downgrade. And nodes must downgrade first. - framework.ExpectNoError(nodeUpgrade(f, target, image, nodeExtraEnvs)) - framework.ExpectNoError(checkNodesVersions(f.ClientSet, target)) - framework.ExpectNoError(masterUpgrade(f, target, masterExtraEnvs)) - framework.ExpectNoError(checkMasterVersion(f.ClientSet, target)) - } -} - // TODO(#98326): Split the test by SIGs, move to appropriate directories and use SIGDescribe. var _ = ginkgo.Describe("Upgrade [Feature:Upgrade]", func() { f := framework.NewDefaultFramework("cluster-upgrade") @@ -187,8 +107,8 @@ var _ = ginkgo.Describe("Upgrade [Feature:Upgrade]", func() { } testSuite.TestCases = append(testSuite.TestCases, masterUpgradeTest, nil) - upgradeFunc := masterUpgradeFunc(f, upgCtx, masterUpgradeTest, nil) - runUpgradeSuite(upgCtx, upgradeTests, testSuite, upgrades.MasterUpgrade, upgradeFunc) + upgradeFunc := ControlPlaneUpgradeFunc(f, upgCtx, masterUpgradeTest, nil) + upgrades.RunUpgradeSuite(upgCtx, upgradeTests, testSuite, upgrades.MasterUpgrade, upgradeFunc) }) }) @@ -201,8 +121,8 @@ var _ = ginkgo.Describe("Upgrade [Feature:Upgrade]", func() { clusterUpgradeTest := &junit.TestCase{Name: "[sig-cloud-provider-gcp] cluster-upgrade", Classname: "upgrade_tests"} testSuite.TestCases = append(testSuite.TestCases, clusterUpgradeTest) - upgradeFunc := clusterUpgradeFunc(f, upgCtx, clusterUpgradeTest, nil, nil) - runUpgradeSuite(upgCtx, upgradeTests, testSuite, upgrades.ClusterUpgrade, upgradeFunc) + upgradeFunc := ClusterUpgradeFunc(f, upgCtx, clusterUpgradeTest, nil, nil) + upgrades.RunUpgradeSuite(upgCtx, upgradeTests, testSuite, upgrades.ClusterUpgrade, upgradeFunc) }) }) }) @@ -220,8 +140,8 @@ var _ = ginkgo.Describe("Downgrade [Feature:Downgrade]", func() { clusterDowngradeTest := &junit.TestCase{Name: "[sig-cloud-provider-gcp] cluster-downgrade", Classname: "upgrade_tests"} testSuite.TestCases = append(testSuite.TestCases, clusterDowngradeTest) - upgradeFunc := clusterDowngradeFunc(f, upgCtx, clusterDowngradeTest, nil, nil) - runUpgradeSuite(upgCtx, upgradeTests, testSuite, upgrades.ClusterUpgrade, upgradeFunc) + upgradeFunc := ClusterDowngradeFunc(f, upgCtx, clusterDowngradeTest, nil, nil) + upgrades.RunUpgradeSuite(upgCtx, upgradeTests, testSuite, upgrades.ClusterUpgrade, upgradeFunc) }) }) }) @@ -240,10 +160,10 @@ var _ = ginkgo.Describe("etcd Upgrade [Feature:EtcdUpgrade]", func() { upgradeFunc := func() { start := time.Now() - defer finalizeUpgradeTest(start, etcdTest) + defer upgrades.FinalizeUpgradeTest(start, etcdTest) framework.ExpectNoError(framework.EtcdUpgrade(framework.TestContext.EtcdUpgradeStorage, framework.TestContext.EtcdUpgradeVersion)) } - runUpgradeSuite(upgCtx, upgradeTests, testSuite, upgrades.EtcdUpgrade, upgradeFunc) + upgrades.RunUpgradeSuite(upgCtx, upgradeTests, testSuite, upgrades.EtcdUpgrade, upgradeFunc) }) }) }) @@ -261,8 +181,8 @@ var _ = ginkgo.Describe("gpu Upgrade [Feature:GPUUpgrade]", func() { gpuUpgradeTest := &junit.TestCase{Name: "[sig-node] gpu-master-upgrade", Classname: "upgrade_tests"} testSuite.TestCases = append(testSuite.TestCases, gpuUpgradeTest) - upgradeFunc := masterUpgradeFunc(f, upgCtx, gpuUpgradeTest, nil) - runUpgradeSuite(upgCtx, gpuUpgradeTests, testSuite, upgrades.MasterUpgrade, upgradeFunc) + upgradeFunc := ControlPlaneUpgradeFunc(f, upgCtx, gpuUpgradeTest, nil) + upgrades.RunUpgradeSuite(upgCtx, gpuUpgradeTests, testSuite, upgrades.MasterUpgrade, upgradeFunc) }) }) ginkgo.Describe("cluster upgrade", func() { @@ -274,8 +194,8 @@ var _ = ginkgo.Describe("gpu Upgrade [Feature:GPUUpgrade]", func() { gpuUpgradeTest := &junit.TestCase{Name: "[sig-node] gpu-cluster-upgrade", Classname: "upgrade_tests"} testSuite.TestCases = append(testSuite.TestCases, gpuUpgradeTest) - upgradeFunc := clusterUpgradeFunc(f, upgCtx, gpuUpgradeTest, nil, nil) - runUpgradeSuite(upgCtx, gpuUpgradeTests, testSuite, upgrades.ClusterUpgrade, upgradeFunc) + upgradeFunc := ClusterUpgradeFunc(f, upgCtx, gpuUpgradeTest, nil, nil) + upgrades.RunUpgradeSuite(upgCtx, gpuUpgradeTests, testSuite, upgrades.ClusterUpgrade, upgradeFunc) }) }) ginkgo.Describe("cluster downgrade", func() { @@ -287,8 +207,8 @@ var _ = ginkgo.Describe("gpu Upgrade [Feature:GPUUpgrade]", func() { gpuDowngradeTest := &junit.TestCase{Name: "[sig-node] gpu-cluster-downgrade", Classname: "upgrade_tests"} testSuite.TestCases = append(testSuite.TestCases, gpuDowngradeTest) - upgradeFunc := clusterDowngradeFunc(f, upgCtx, gpuDowngradeTest, nil, nil) - runUpgradeSuite(upgCtx, gpuUpgradeTests, testSuite, upgrades.ClusterUpgrade, upgradeFunc) + upgradeFunc := ClusterDowngradeFunc(f, upgCtx, gpuDowngradeTest, nil, nil) + upgrades.RunUpgradeSuite(upgCtx, gpuUpgradeTests, testSuite, upgrades.ClusterUpgrade, upgradeFunc) }) }) }) @@ -306,8 +226,8 @@ var _ = ginkgo.Describe("[sig-apps] stateful Upgrade [Feature:StatefulUpgrade]", statefulUpgradeTest := &junit.TestCase{Name: "[sig-apps] stateful-upgrade", Classname: "upgrade_tests"} testSuite.TestCases = append(testSuite.TestCases, statefulUpgradeTest) - upgradeFunc := clusterUpgradeFunc(f, upgCtx, statefulUpgradeTest, nil, nil) - runUpgradeSuite(upgCtx, statefulsetUpgradeTests, testSuite, upgrades.ClusterUpgrade, upgradeFunc) + upgradeFunc := ClusterUpgradeFunc(f, upgCtx, statefulUpgradeTest, nil, nil) + upgrades.RunUpgradeSuite(upgCtx, statefulsetUpgradeTests, testSuite, upgrades.ClusterUpgrade, upgradeFunc) }) }) }) @@ -333,8 +253,8 @@ var _ = ginkgo.Describe("kube-proxy migration [Feature:KubeProxyDaemonSetMigrati testSuite.TestCases = append(testSuite.TestCases, kubeProxyUpgradeTest) extraEnvs := kubeProxyDaemonSetExtraEnvs(true) - upgradeFunc := clusterUpgradeFunc(f, upgCtx, kubeProxyUpgradeTest, extraEnvs, extraEnvs) - runUpgradeSuite(upgCtx, kubeProxyUpgradeTests, testSuite, upgrades.ClusterUpgrade, upgradeFunc) + upgradeFunc := ClusterUpgradeFunc(f, upgCtx, kubeProxyUpgradeTest, extraEnvs, extraEnvs) + upgrades.RunUpgradeSuite(upgCtx, kubeProxyUpgradeTests, testSuite, upgrades.ClusterUpgrade, upgradeFunc) }) }) @@ -351,8 +271,8 @@ var _ = ginkgo.Describe("kube-proxy migration [Feature:KubeProxyDaemonSetMigrati testSuite.TestCases = append(testSuite.TestCases, kubeProxyDowngradeTest) extraEnvs := kubeProxyDaemonSetExtraEnvs(false) - upgradeFunc := clusterDowngradeFunc(f, upgCtx, kubeProxyDowngradeTest, extraEnvs, extraEnvs) - runUpgradeSuite(upgCtx, kubeProxyDowngradeTests, testSuite, upgrades.ClusterUpgrade, upgradeFunc) + upgradeFunc := ClusterDowngradeFunc(f, upgCtx, kubeProxyDowngradeTest, extraEnvs, extraEnvs) + upgrades.RunUpgradeSuite(upgCtx, kubeProxyDowngradeTests, testSuite, upgrades.ClusterUpgrade, upgradeFunc) }) }) }) @@ -374,125 +294,12 @@ var _ = ginkgo.Describe("[sig-auth] ServiceAccount admission controller migratio testSuite.TestCases = append(testSuite.TestCases, serviceaccountAdmissionControllerMigrationTest) extraEnvs := []string{"KUBE_FEATURE_GATES=BoundServiceAccountTokenVolume=true"} - upgradeFunc := masterUpgradeFunc(f, upgCtx, serviceaccountAdmissionControllerMigrationTest, extraEnvs) - runUpgradeSuite(upgCtx, serviceaccountAdmissionControllerMigrationTests, testSuite, upgrades.MasterUpgrade, upgradeFunc) + upgradeFunc := ControlPlaneUpgradeFunc(f, upgCtx, serviceaccountAdmissionControllerMigrationTest, extraEnvs) + upgrades.RunUpgradeSuite(upgCtx, serviceaccountAdmissionControllerMigrationTests, testSuite, upgrades.MasterUpgrade, upgradeFunc) }) }) }) -type chaosMonkeyAdapter struct { - test upgrades.Test - testReport *junit.TestCase - framework *framework.Framework - upgradeType upgrades.UpgradeType - upgCtx upgrades.UpgradeContext -} - -func (cma *chaosMonkeyAdapter) Test(sem *chaosmonkey.Semaphore) { - start := time.Now() - var once sync.Once - ready := func() { - once.Do(func() { - sem.Ready() - }) - } - defer finalizeUpgradeTest(start, cma.testReport) - defer ready() - if skippable, ok := cma.test.(upgrades.Skippable); ok && skippable.Skip(cma.upgCtx) { - ginkgo.By("skipping test " + cma.test.Name()) - cma.testReport.Skipped = "skipping test " + cma.test.Name() - return - } - - defer cma.test.Teardown(cma.framework) - cma.test.Setup(cma.framework) - ready() - cma.test.Test(cma.framework, sem.StopCh, cma.upgradeType) -} - -func finalizeUpgradeTest(start time.Time, tc *junit.TestCase) { - tc.Time = time.Since(start).Seconds() - r := recover() - if r == nil { - return - } - - switch r := r.(type) { - case e2eginkgowrapper.FailurePanic: - tc.Failures = []*junit.Failure{ - { - Message: r.Message, - Type: "Failure", - Value: fmt.Sprintf("%s\n\n%s", r.Message, r.FullStackTrace), - }, - } - case e2eskipper.SkipPanic: - tc.Skipped = fmt.Sprintf("%s:%d %q", r.Filename, r.Line, r.Message) - default: - tc.Errors = []*junit.Error{ - { - Message: fmt.Sprintf("%v", r), - Type: "Panic", - Value: fmt.Sprintf("%v", r), - }, - } - } -} - -func createUpgradeFrameworks(tests []upgrades.Test) map[string]*framework.Framework { - nsFilter := regexp.MustCompile("[^[:word:]-]+") // match anything that's not a word character or hyphen - testFrameworks := map[string]*framework.Framework{} - for _, t := range tests { - ns := nsFilter.ReplaceAllString(t.Name(), "-") // and replace with a single hyphen - ns = strings.Trim(ns, "-") - testFrameworks[t.Name()] = framework.NewDefaultFramework(ns) - } - return testFrameworks -} - -func runUpgradeSuite( - upgCtx *upgrades.UpgradeContext, - tests []upgrades.Test, - testSuite *junit.TestSuite, - upgradeType upgrades.UpgradeType, - upgradeFunc func(), -) { - testFrameworks := createUpgradeFrameworks(tests) - - cm := chaosmonkey.New(upgradeFunc) - for _, t := range tests { - testCase := &junit.TestCase{ - Name: t.Name(), - Classname: "upgrade_tests", - } - testSuite.TestCases = append(testSuite.TestCases, testCase) - cma := chaosMonkeyAdapter{ - test: t, - testReport: testCase, - framework: testFrameworks[t.Name()], - upgradeType: upgradeType, - upgCtx: *upgCtx, - } - cm.Register(cma.Test) - } - - start := time.Now() - defer func() { - testSuite.Update() - testSuite.Time = time.Since(start).Seconds() - if framework.TestContext.ReportDir != "" { - fname := filepath.Join(framework.TestContext.ReportDir, fmt.Sprintf("junit_%supgrades.xml", framework.TestContext.ReportPrefix)) - f, err := os.Create(fname) - if err != nil { - return - } - defer f.Close() - xml.NewEncoder(f).Encode(testSuite) - } - }() - cm.Do() -} - func getUpgradeContext(c discovery.DiscoveryInterface, upgradeTarget, upgradeImage string) (*upgrades.UpgradeContext, error) { current, err := c.ServerVersion() if err != nil { @@ -546,170 +353,3 @@ func realVersion(s string) (string, error) { framework.Logf("Version for %q is %q", s, v) return strings.TrimPrefix(strings.TrimSpace(v), "v"), nil } - -func traceRouteToMaster() { - traceroute, err := exec.LookPath("traceroute") - if err != nil { - framework.Logf("Could not find traceroute program") - return - } - cmd := exec.Command(traceroute, "-I", framework.APIAddress()) - out, err := cmd.Output() - if len(out) != 0 { - framework.Logf(string(out)) - } - if exiterr, ok := err.(*exec.ExitError); err != nil && ok { - framework.Logf("Error while running traceroute: %s", exiterr.Stderr) - } -} - -// checkMasterVersion validates the master version -func checkMasterVersion(c clientset.Interface, want string) error { - framework.Logf("Checking master version") - var err error - var v *version.Info - waitErr := wait.PollImmediate(5*time.Second, 2*time.Minute, func() (bool, error) { - v, err = c.Discovery().ServerVersion() - if err != nil { - traceRouteToMaster() - return false, nil - } - return true, nil - }) - if waitErr != nil { - return fmt.Errorf("CheckMasterVersion() couldn't get the master version: %v", err) - } - // We do prefix trimming and then matching because: - // want looks like: 0.19.3-815-g50e67d4 - // got looks like: v0.19.3-815-g50e67d4034e858-dirty - got := strings.TrimPrefix(v.GitVersion, "v") - if !strings.HasPrefix(got, want) { - return fmt.Errorf("master had kube-apiserver version %s which does not start with %s", got, want) - } - framework.Logf("Master is at version %s", want) - return nil -} - -// checkNodesVersions validates the nodes versions -func checkNodesVersions(cs clientset.Interface, want string) error { - l, err := e2enode.GetReadySchedulableNodes(cs) - if err != nil { - return err - } - for _, n := range l.Items { - // We do prefix trimming and then matching because: - // want looks like: 0.19.3-815-g50e67d4 - // kv/kvp look like: v0.19.3-815-g50e67d4034e858-dirty - kv, kpv := strings.TrimPrefix(n.Status.NodeInfo.KubeletVersion, "v"), - strings.TrimPrefix(n.Status.NodeInfo.KubeProxyVersion, "v") - if !strings.HasPrefix(kv, want) { - return fmt.Errorf("node %s had kubelet version %s which does not start with %s", - n.ObjectMeta.Name, kv, want) - } - if !strings.HasPrefix(kpv, want) { - return fmt.Errorf("node %s had kube-proxy version %s which does not start with %s", - n.ObjectMeta.Name, kpv, want) - } - } - return nil -} - -// nodeUpgrade upgrades nodes on GCE/GKE. -func nodeUpgrade(f *framework.Framework, v string, img string, extraEnvs []string) error { - // Perform the upgrade. - var err error - switch framework.TestContext.Provider { - case "gce": - err = nodeUpgradeGCE(v, img, extraEnvs) - case "gke": - err = nodeUpgradeGKE(f.Namespace.Name, v, img) - default: - err = fmt.Errorf("nodeUpgrade() is not implemented for provider %s", framework.TestContext.Provider) - } - if err != nil { - return err - } - return waitForNodesReadyAfterUpgrade(f) -} - -// TODO(mrhohn): Remove 'enableKubeProxyDaemonSet' when kube-proxy is run as a DaemonSet by default. -func nodeUpgradeGCE(rawV, img string, extraEnvs []string) error { - v := "v" + rawV - env := append(os.Environ(), extraEnvs...) - if img != "" { - env = append(env, "KUBE_NODE_OS_DISTRIBUTION="+img) - _, _, err := framework.RunCmdEnv(env, framework.GCEUpgradeScript(), "-N", "-o", v) - return err - } - _, _, err := framework.RunCmdEnv(env, framework.GCEUpgradeScript(), "-N", v) - return err -} - -func nodeUpgradeGKE(namespace string, v string, img string) error { - framework.Logf("Upgrading nodes to version %q and image %q", v, img) - nps, err := nodePoolsGKE() - if err != nil { - return err - } - framework.Logf("Found node pools %v", nps) - for _, np := range nps { - args := []string{ - "container", - "clusters", - fmt.Sprintf("--project=%s", framework.TestContext.CloudConfig.ProjectID), - framework.LocationParamGKE(), - "upgrade", - framework.TestContext.CloudConfig.Cluster, - fmt.Sprintf("--node-pool=%s", np), - fmt.Sprintf("--cluster-version=%s", v), - "--quiet", - } - if len(img) > 0 { - args = append(args, fmt.Sprintf("--image-type=%s", img)) - } - _, _, err = framework.RunCmd("gcloud", framework.AppendContainerCommandGroupIfNeeded(args)...) - - if err != nil { - return err - } - - framework.WaitForSSHTunnels(namespace) - } - return nil -} - -func nodePoolsGKE() ([]string, error) { - args := []string{ - "container", - "node-pools", - fmt.Sprintf("--project=%s", framework.TestContext.CloudConfig.ProjectID), - framework.LocationParamGKE(), - "list", - fmt.Sprintf("--cluster=%s", framework.TestContext.CloudConfig.Cluster), - "--format=get(name)", - } - stdout, _, err := framework.RunCmd("gcloud", framework.AppendContainerCommandGroupIfNeeded(args)...) - if err != nil { - return nil, err - } - if len(strings.TrimSpace(stdout)) == 0 { - return []string{}, nil - } - return strings.Fields(stdout), nil -} - -func waitForNodesReadyAfterUpgrade(f *framework.Framework) error { - // Wait for it to complete and validate nodes are healthy. - // - // TODO(ihmccreery) We shouldn't have to wait for nodes to be ready in - // GKE; the operation shouldn't return until they all are. - numNodes, err := e2enode.TotalRegistered(f.ClientSet) - if err != nil { - return fmt.Errorf("couldn't detect number of nodes") - } - framework.Logf("Waiting up to %v for all %d nodes to be ready after the upgrade", framework.RestartNodeReadyAgainTimeout, numNodes) - if _, err := e2enode.CheckReady(f.ClientSet, numNodes, framework.RestartNodeReadyAgainTimeout); err != nil { - return err - } - return nil -} diff --git a/test/e2e/cloud/gcp/upgrade_mechanics.go b/test/e2e/cloud/gcp/upgrade_mechanics.go new file mode 100644 index 00000000000..2c1407d31e1 --- /dev/null +++ b/test/e2e/cloud/gcp/upgrade_mechanics.go @@ -0,0 +1,271 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package gcp + +import ( + "fmt" + "os" + "os/exec" + "strings" + "time" + + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/apimachinery/pkg/version" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/kubernetes/test/e2e/framework" + e2enode "k8s.io/kubernetes/test/e2e/framework/node" + "k8s.io/kubernetes/test/e2e/upgrades" + "k8s.io/kubernetes/test/utils/junit" +) + +// ControlPlaneUpgradeFunc returns a function that performs control plane upgrade. +func ControlPlaneUpgradeFunc(f *framework.Framework, upgCtx *upgrades.UpgradeContext, testCase *junit.TestCase, controlPlaneExtraEnvs []string) func() { + return func() { + start := time.Now() + defer upgrades.FinalizeUpgradeTest(start, testCase) + target := upgCtx.Versions[1].Version.String() + framework.ExpectNoError(controlPlaneUpgrade(f, target, controlPlaneExtraEnvs)) + framework.ExpectNoError(checkControlPlaneVersion(f.ClientSet, target)) + } +} + +// ClusterUpgradeFunc returns a function that performs full cluster upgrade (both control plane and nodes). +func ClusterUpgradeFunc(f *framework.Framework, upgCtx *upgrades.UpgradeContext, testCase *junit.TestCase, controlPlaneExtraEnvs, nodeExtraEnvs []string) func() { + return func() { + start := time.Now() + defer upgrades.FinalizeUpgradeTest(start, testCase) + target := upgCtx.Versions[1].Version.String() + image := upgCtx.Versions[1].NodeImage + framework.ExpectNoError(controlPlaneUpgrade(f, target, controlPlaneExtraEnvs)) + framework.ExpectNoError(checkControlPlaneVersion(f.ClientSet, target)) + framework.ExpectNoError(nodeUpgrade(f, target, image, nodeExtraEnvs)) + framework.ExpectNoError(checkNodesVersions(f.ClientSet, target)) + } +} + +// ClusterDowngradeFunc returns a function that performs full cluster downgrade (both nodes and control plane). +func ClusterDowngradeFunc(f *framework.Framework, upgCtx *upgrades.UpgradeContext, testCase *junit.TestCase, controlPlaneExtraEnvs, nodeExtraEnvs []string) func() { + return func() { + start := time.Now() + defer upgrades.FinalizeUpgradeTest(start, testCase) + target := upgCtx.Versions[1].Version.String() + image := upgCtx.Versions[1].NodeImage + // Yes this really is a downgrade. And nodes must downgrade first. + framework.ExpectNoError(nodeUpgrade(f, target, image, nodeExtraEnvs)) + framework.ExpectNoError(checkNodesVersions(f.ClientSet, target)) + framework.ExpectNoError(controlPlaneUpgrade(f, target, controlPlaneExtraEnvs)) + framework.ExpectNoError(checkControlPlaneVersion(f.ClientSet, target)) + } +} + +// controlPlaneUpgrade upgrades control plane node on GCE/GKE. +func controlPlaneUpgrade(f *framework.Framework, v string, extraEnvs []string) error { + switch framework.TestContext.Provider { + case "gce": + return controlPlaneUpgradeGCE(v, extraEnvs) + case "gke": + return framework.MasterUpgradeGKE(f.Namespace.Name, v) + default: + return fmt.Errorf("controlPlaneUpgrade() is not implemented for provider %s", framework.TestContext.Provider) + } +} + +func controlPlaneUpgradeGCE(rawV string, extraEnvs []string) error { + env := append(os.Environ(), extraEnvs...) + // TODO: Remove these variables when they're no longer needed for downgrades. + if framework.TestContext.EtcdUpgradeVersion != "" && framework.TestContext.EtcdUpgradeStorage != "" { + env = append(env, + "TEST_ETCD_VERSION="+framework.TestContext.EtcdUpgradeVersion, + "STORAGE_BACKEND="+framework.TestContext.EtcdUpgradeStorage, + "TEST_ETCD_IMAGE="+etcdImage) + } else { + // In e2e tests, we skip the confirmation prompt about + // implicit etcd upgrades to simulate the user entering "y". + env = append(env, "TEST_ALLOW_IMPLICIT_ETCD_UPGRADE=true") + } + + v := "v" + rawV + _, _, err := framework.RunCmdEnv(env, framework.GCEUpgradeScript(), "-M", v) + return err +} + +func traceRouteToControlPlane() { + traceroute, err := exec.LookPath("traceroute") + if err != nil { + framework.Logf("Could not find traceroute program") + return + } + cmd := exec.Command(traceroute, "-I", framework.APIAddress()) + out, err := cmd.Output() + if len(out) != 0 { + framework.Logf(string(out)) + } + if exiterr, ok := err.(*exec.ExitError); err != nil && ok { + framework.Logf("Error while running traceroute: %s", exiterr.Stderr) + } +} + +// checkControlPlaneVersion validates the control plane version +func checkControlPlaneVersion(c clientset.Interface, want string) error { + framework.Logf("Checking control plane version") + var err error + var v *version.Info + waitErr := wait.PollImmediate(5*time.Second, 2*time.Minute, func() (bool, error) { + v, err = c.Discovery().ServerVersion() + if err != nil { + traceRouteToControlPlane() + return false, nil + } + return true, nil + }) + if waitErr != nil { + return fmt.Errorf("CheckControlPlane() couldn't get the control plane version: %v", err) + } + // We do prefix trimming and then matching because: + // want looks like: 0.19.3-815-g50e67d4 + // got looks like: v0.19.3-815-g50e67d4034e858-dirty + got := strings.TrimPrefix(v.GitVersion, "v") + if !strings.HasPrefix(got, want) { + return fmt.Errorf("control plane had kube-apiserver version %s which does not start with %s", got, want) + } + framework.Logf("Control plane is at version %s", want) + return nil +} + +// nodeUpgrade upgrades nodes on GCE/GKE. +func nodeUpgrade(f *framework.Framework, v string, img string, extraEnvs []string) error { + // Perform the upgrade. + var err error + switch framework.TestContext.Provider { + case "gce": + err = nodeUpgradeGCE(v, img, extraEnvs) + case "gke": + err = nodeUpgradeGKE(f.Namespace.Name, v, img) + default: + err = fmt.Errorf("nodeUpgrade() is not implemented for provider %s", framework.TestContext.Provider) + } + if err != nil { + return err + } + return waitForNodesReadyAfterUpgrade(f) +} + +// TODO(mrhohn): Remove 'enableKubeProxyDaemonSet' when kube-proxy is run as a DaemonSet by default. +func nodeUpgradeGCE(rawV, img string, extraEnvs []string) error { + v := "v" + rawV + env := append(os.Environ(), extraEnvs...) + if img != "" { + env = append(env, "KUBE_NODE_OS_DISTRIBUTION="+img) + _, _, err := framework.RunCmdEnv(env, framework.GCEUpgradeScript(), "-N", "-o", v) + return err + } + _, _, err := framework.RunCmdEnv(env, framework.GCEUpgradeScript(), "-N", v) + return err +} + +func nodeUpgradeGKE(namespace string, v string, img string) error { + framework.Logf("Upgrading nodes to version %q and image %q", v, img) + nps, err := nodePoolsGKE() + if err != nil { + return err + } + framework.Logf("Found node pools %v", nps) + for _, np := range nps { + args := []string{ + "container", + "clusters", + fmt.Sprintf("--project=%s", framework.TestContext.CloudConfig.ProjectID), + framework.LocationParamGKE(), + "upgrade", + framework.TestContext.CloudConfig.Cluster, + fmt.Sprintf("--node-pool=%s", np), + fmt.Sprintf("--cluster-version=%s", v), + "--quiet", + } + if len(img) > 0 { + args = append(args, fmt.Sprintf("--image-type=%s", img)) + } + _, _, err = framework.RunCmd("gcloud", framework.AppendContainerCommandGroupIfNeeded(args)...) + + if err != nil { + return err + } + + framework.WaitForSSHTunnels(namespace) + } + return nil +} + +func nodePoolsGKE() ([]string, error) { + args := []string{ + "container", + "node-pools", + fmt.Sprintf("--project=%s", framework.TestContext.CloudConfig.ProjectID), + framework.LocationParamGKE(), + "list", + fmt.Sprintf("--cluster=%s", framework.TestContext.CloudConfig.Cluster), + "--format=get(name)", + } + stdout, _, err := framework.RunCmd("gcloud", framework.AppendContainerCommandGroupIfNeeded(args)...) + if err != nil { + return nil, err + } + if len(strings.TrimSpace(stdout)) == 0 { + return []string{}, nil + } + return strings.Fields(stdout), nil +} + +func waitForNodesReadyAfterUpgrade(f *framework.Framework) error { + // Wait for it to complete and validate nodes are healthy. + // + // TODO(ihmccreery) We shouldn't have to wait for nodes to be ready in + // GKE; the operation shouldn't return until they all are. + numNodes, err := e2enode.TotalRegistered(f.ClientSet) + if err != nil { + return fmt.Errorf("couldn't detect number of nodes") + } + framework.Logf("Waiting up to %v for all %d nodes to be ready after the upgrade", framework.RestartNodeReadyAgainTimeout, numNodes) + if _, err := e2enode.CheckReady(f.ClientSet, numNodes, framework.RestartNodeReadyAgainTimeout); err != nil { + return err + } + return nil +} + +// checkNodesVersions validates the nodes versions +func checkNodesVersions(cs clientset.Interface, want string) error { + l, err := e2enode.GetReadySchedulableNodes(cs) + if err != nil { + return err + } + for _, n := range l.Items { + // We do prefix trimming and then matching because: + // want looks like: 0.19.3-815-g50e67d4 + // kv/kvp look like: v0.19.3-815-g50e67d4034e858-dirty + kv, kpv := strings.TrimPrefix(n.Status.NodeInfo.KubeletVersion, "v"), + strings.TrimPrefix(n.Status.NodeInfo.KubeProxyVersion, "v") + if !strings.HasPrefix(kv, want) { + return fmt.Errorf("node %s had kubelet version %s which does not start with %s", + n.ObjectMeta.Name, kv, want) + } + if !strings.HasPrefix(kpv, want) { + return fmt.Errorf("node %s had kube-proxy version %s which does not start with %s", + n.ObjectMeta.Name, kpv, want) + } + } + return nil +} diff --git a/test/e2e/upgrades/upgrade_suite.go b/test/e2e/upgrades/upgrade_suite.go new file mode 100644 index 00000000000..89f1a906842 --- /dev/null +++ b/test/e2e/upgrades/upgrade_suite.go @@ -0,0 +1,151 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package upgrades + +import ( + "encoding/xml" + "fmt" + "os" + "path/filepath" + "regexp" + "strings" + "sync" + "time" + + "k8s.io/kubernetes/test/e2e/chaosmonkey" + "k8s.io/kubernetes/test/e2e/framework" + e2eginkgowrapper "k8s.io/kubernetes/test/e2e/framework/ginkgowrapper" + e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" + "k8s.io/kubernetes/test/utils/junit" + + "github.com/onsi/ginkgo" +) + +type chaosMonkeyAdapter struct { + test Test + testReport *junit.TestCase + framework *framework.Framework + upgradeType UpgradeType + upgCtx UpgradeContext +} + +func (cma *chaosMonkeyAdapter) Test(sem *chaosmonkey.Semaphore) { + start := time.Now() + var once sync.Once + ready := func() { + once.Do(func() { + sem.Ready() + }) + } + defer FinalizeUpgradeTest(start, cma.testReport) + defer ready() + if skippable, ok := cma.test.(Skippable); ok && skippable.Skip(cma.upgCtx) { + ginkgo.By("skipping test " + cma.test.Name()) + cma.testReport.Skipped = "skipping test " + cma.test.Name() + return + } + + defer cma.test.Teardown(cma.framework) + cma.test.Setup(cma.framework) + ready() + cma.test.Test(cma.framework, sem.StopCh, cma.upgradeType) +} + +// FinalizeUpgradeTest fills the necessary information about junit.TestCase. +func FinalizeUpgradeTest(start time.Time, tc *junit.TestCase) { + tc.Time = time.Since(start).Seconds() + r := recover() + if r == nil { + return + } + + switch r := r.(type) { + case e2eginkgowrapper.FailurePanic: + tc.Failures = []*junit.Failure{ + { + Message: r.Message, + Type: "Failure", + Value: fmt.Sprintf("%s\n\n%s", r.Message, r.FullStackTrace), + }, + } + case e2eskipper.SkipPanic: + tc.Skipped = fmt.Sprintf("%s:%d %q", r.Filename, r.Line, r.Message) + default: + tc.Errors = []*junit.Error{ + { + Message: fmt.Sprintf("%v", r), + Type: "Panic", + Value: fmt.Sprintf("%v", r), + }, + } + } +} + +func createUpgradeFrameworks(tests []Test) map[string]*framework.Framework { + nsFilter := regexp.MustCompile("[^[:word:]-]+") // match anything that's not a word character or hyphen + testFrameworks := map[string]*framework.Framework{} + for _, t := range tests { + ns := nsFilter.ReplaceAllString(t.Name(), "-") // and replace with a single hyphen + ns = strings.Trim(ns, "-") + testFrameworks[t.Name()] = framework.NewDefaultFramework(ns) + } + return testFrameworks +} + +// RunUpgradeSuite runs the actual upgrade tests. +func RunUpgradeSuite( + upgCtx *UpgradeContext, + tests []Test, + testSuite *junit.TestSuite, + upgradeType UpgradeType, + upgradeFunc func(), +) { + testFrameworks := createUpgradeFrameworks(tests) + + cm := chaosmonkey.New(upgradeFunc) + for _, t := range tests { + testCase := &junit.TestCase{ + Name: t.Name(), + Classname: "upgrade_tests", + } + testSuite.TestCases = append(testSuite.TestCases, testCase) + cma := chaosMonkeyAdapter{ + test: t, + testReport: testCase, + framework: testFrameworks[t.Name()], + upgradeType: upgradeType, + upgCtx: *upgCtx, + } + cm.Register(cma.Test) + } + + start := time.Now() + defer func() { + testSuite.Update() + testSuite.Time = time.Since(start).Seconds() + if framework.TestContext.ReportDir != "" { + fname := filepath.Join(framework.TestContext.ReportDir, fmt.Sprintf("junit_%supgrades.xml", framework.TestContext.ReportPrefix)) + f, err := os.Create(fname) + if err != nil { + return + } + defer f.Close() + xml.NewEncoder(f).Encode(testSuite) + } + }() + cm.Do() +}