diff --git a/test/e2e/cloud/gcp/cluster_upgrade.go b/test/e2e/cloud/gcp/cluster_upgrade.go index bda0ac5989d..bc51f586d8d 100644 --- a/test/e2e/cloud/gcp/cluster_upgrade.go +++ b/test/e2e/cloud/gcp/cluster_upgrade.go @@ -20,7 +20,6 @@ import ( "encoding/xml" "fmt" "os" - "os/exec" "path" "path/filepath" "regexp" @@ -29,15 +28,11 @@ import ( "time" utilversion "k8s.io/apimachinery/pkg/util/version" - "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/apimachinery/pkg/version" "k8s.io/client-go/discovery" - clientset "k8s.io/client-go/kubernetes" "k8s.io/kubernetes/test/e2e/chaosmonkey" "k8s.io/kubernetes/test/e2e/framework" e2econfig "k8s.io/kubernetes/test/e2e/framework/config" e2eginkgowrapper "k8s.io/kubernetes/test/e2e/framework/ginkgowrapper" - e2enode "k8s.io/kubernetes/test/e2e/framework/node" e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" "k8s.io/kubernetes/test/e2e/upgrades" "k8s.io/kubernetes/test/e2e/upgrades/apps" @@ -97,78 +92,10 @@ var serviceaccountAdmissionControllerMigrationTests = []upgrades.Test{ &auth.ServiceAccountAdmissionControllerMigrationTest{}, } -// masterUpgrade upgrades master node on GCE/GKE. -func masterUpgrade(f *framework.Framework, v string, extraEnvs []string) error { - switch framework.TestContext.Provider { - case "gce": - return masterUpgradeGCE(v, extraEnvs) - case "gke": - return framework.MasterUpgradeGKE(f.Namespace.Name, v) - default: - return fmt.Errorf("masterUpgrade() is not implemented for provider %s", framework.TestContext.Provider) - } -} - func kubeProxyDaemonSetExtraEnvs(enableKubeProxyDaemonSet bool) []string { return []string{fmt.Sprintf("KUBE_PROXY_DAEMONSET=%v", enableKubeProxyDaemonSet)} } -func masterUpgradeGCE(rawV string, extraEnvs []string) error { - env := append(os.Environ(), extraEnvs...) - // TODO: Remove these variables when they're no longer needed for downgrades. - if framework.TestContext.EtcdUpgradeVersion != "" && framework.TestContext.EtcdUpgradeStorage != "" { - env = append(env, - "TEST_ETCD_VERSION="+framework.TestContext.EtcdUpgradeVersion, - "STORAGE_BACKEND="+framework.TestContext.EtcdUpgradeStorage, - "TEST_ETCD_IMAGE="+etcdImage) - } else { - // In e2e tests, we skip the confirmation prompt about - // implicit etcd upgrades to simulate the user entering "y". - env = append(env, "TEST_ALLOW_IMPLICIT_ETCD_UPGRADE=true") - } - - v := "v" + rawV - _, _, err := framework.RunCmdEnv(env, framework.GCEUpgradeScript(), "-M", v) - return err -} - -func masterUpgradeFunc(f *framework.Framework, upgCtx *upgrades.UpgradeContext, testCase *junit.TestCase, masterExtraEnvs []string) func() { - return func() { - start := time.Now() - defer finalizeUpgradeTest(start, testCase) - target := upgCtx.Versions[1].Version.String() - framework.ExpectNoError(masterUpgrade(f, target, masterExtraEnvs)) - framework.ExpectNoError(checkMasterVersion(f.ClientSet, target)) - } -} - -func clusterUpgradeFunc(f *framework.Framework, upgCtx *upgrades.UpgradeContext, testCase *junit.TestCase, masterExtraEnvs, nodeExtraEnvs []string) func() { - return func() { - start := time.Now() - defer finalizeUpgradeTest(start, testCase) - target := upgCtx.Versions[1].Version.String() - image := upgCtx.Versions[1].NodeImage - framework.ExpectNoError(masterUpgrade(f, target, masterExtraEnvs)) - framework.ExpectNoError(checkMasterVersion(f.ClientSet, target)) - framework.ExpectNoError(nodeUpgrade(f, target, image, nodeExtraEnvs)) - framework.ExpectNoError(checkNodesVersions(f.ClientSet, target)) - } -} - -func clusterDowngradeFunc(f *framework.Framework, upgCtx *upgrades.UpgradeContext, testCase *junit.TestCase, masterExtraEnvs, nodeExtraEnvs []string) func() { - return func() { - start := time.Now() - defer finalizeUpgradeTest(start, testCase) - target := upgCtx.Versions[1].Version.String() - image := upgCtx.Versions[1].NodeImage - // Yes this really is a downgrade. And nodes must downgrade first. - framework.ExpectNoError(nodeUpgrade(f, target, image, nodeExtraEnvs)) - framework.ExpectNoError(checkNodesVersions(f.ClientSet, target)) - framework.ExpectNoError(masterUpgrade(f, target, masterExtraEnvs)) - framework.ExpectNoError(checkMasterVersion(f.ClientSet, target)) - } -} - // TODO(#98326): Split the test by SIGs, move to appropriate directories and use SIGDescribe. var _ = ginkgo.Describe("Upgrade [Feature:Upgrade]", func() { f := framework.NewDefaultFramework("cluster-upgrade") @@ -546,170 +473,3 @@ func realVersion(s string) (string, error) { framework.Logf("Version for %q is %q", s, v) return strings.TrimPrefix(strings.TrimSpace(v), "v"), nil } - -func traceRouteToMaster() { - traceroute, err := exec.LookPath("traceroute") - if err != nil { - framework.Logf("Could not find traceroute program") - return - } - cmd := exec.Command(traceroute, "-I", framework.APIAddress()) - out, err := cmd.Output() - if len(out) != 0 { - framework.Logf(string(out)) - } - if exiterr, ok := err.(*exec.ExitError); err != nil && ok { - framework.Logf("Error while running traceroute: %s", exiterr.Stderr) - } -} - -// checkMasterVersion validates the master version -func checkMasterVersion(c clientset.Interface, want string) error { - framework.Logf("Checking master version") - var err error - var v *version.Info - waitErr := wait.PollImmediate(5*time.Second, 2*time.Minute, func() (bool, error) { - v, err = c.Discovery().ServerVersion() - if err != nil { - traceRouteToMaster() - return false, nil - } - return true, nil - }) - if waitErr != nil { - return fmt.Errorf("CheckMasterVersion() couldn't get the master version: %v", err) - } - // We do prefix trimming and then matching because: - // want looks like: 0.19.3-815-g50e67d4 - // got looks like: v0.19.3-815-g50e67d4034e858-dirty - got := strings.TrimPrefix(v.GitVersion, "v") - if !strings.HasPrefix(got, want) { - return fmt.Errorf("master had kube-apiserver version %s which does not start with %s", got, want) - } - framework.Logf("Master is at version %s", want) - return nil -} - -// checkNodesVersions validates the nodes versions -func checkNodesVersions(cs clientset.Interface, want string) error { - l, err := e2enode.GetReadySchedulableNodes(cs) - if err != nil { - return err - } - for _, n := range l.Items { - // We do prefix trimming and then matching because: - // want looks like: 0.19.3-815-g50e67d4 - // kv/kvp look like: v0.19.3-815-g50e67d4034e858-dirty - kv, kpv := strings.TrimPrefix(n.Status.NodeInfo.KubeletVersion, "v"), - strings.TrimPrefix(n.Status.NodeInfo.KubeProxyVersion, "v") - if !strings.HasPrefix(kv, want) { - return fmt.Errorf("node %s had kubelet version %s which does not start with %s", - n.ObjectMeta.Name, kv, want) - } - if !strings.HasPrefix(kpv, want) { - return fmt.Errorf("node %s had kube-proxy version %s which does not start with %s", - n.ObjectMeta.Name, kpv, want) - } - } - return nil -} - -// nodeUpgrade upgrades nodes on GCE/GKE. -func nodeUpgrade(f *framework.Framework, v string, img string, extraEnvs []string) error { - // Perform the upgrade. - var err error - switch framework.TestContext.Provider { - case "gce": - err = nodeUpgradeGCE(v, img, extraEnvs) - case "gke": - err = nodeUpgradeGKE(f.Namespace.Name, v, img) - default: - err = fmt.Errorf("nodeUpgrade() is not implemented for provider %s", framework.TestContext.Provider) - } - if err != nil { - return err - } - return waitForNodesReadyAfterUpgrade(f) -} - -// TODO(mrhohn): Remove 'enableKubeProxyDaemonSet' when kube-proxy is run as a DaemonSet by default. -func nodeUpgradeGCE(rawV, img string, extraEnvs []string) error { - v := "v" + rawV - env := append(os.Environ(), extraEnvs...) - if img != "" { - env = append(env, "KUBE_NODE_OS_DISTRIBUTION="+img) - _, _, err := framework.RunCmdEnv(env, framework.GCEUpgradeScript(), "-N", "-o", v) - return err - } - _, _, err := framework.RunCmdEnv(env, framework.GCEUpgradeScript(), "-N", v) - return err -} - -func nodeUpgradeGKE(namespace string, v string, img string) error { - framework.Logf("Upgrading nodes to version %q and image %q", v, img) - nps, err := nodePoolsGKE() - if err != nil { - return err - } - framework.Logf("Found node pools %v", nps) - for _, np := range nps { - args := []string{ - "container", - "clusters", - fmt.Sprintf("--project=%s", framework.TestContext.CloudConfig.ProjectID), - framework.LocationParamGKE(), - "upgrade", - framework.TestContext.CloudConfig.Cluster, - fmt.Sprintf("--node-pool=%s", np), - fmt.Sprintf("--cluster-version=%s", v), - "--quiet", - } - if len(img) > 0 { - args = append(args, fmt.Sprintf("--image-type=%s", img)) - } - _, _, err = framework.RunCmd("gcloud", framework.AppendContainerCommandGroupIfNeeded(args)...) - - if err != nil { - return err - } - - framework.WaitForSSHTunnels(namespace) - } - return nil -} - -func nodePoolsGKE() ([]string, error) { - args := []string{ - "container", - "node-pools", - fmt.Sprintf("--project=%s", framework.TestContext.CloudConfig.ProjectID), - framework.LocationParamGKE(), - "list", - fmt.Sprintf("--cluster=%s", framework.TestContext.CloudConfig.Cluster), - "--format=get(name)", - } - stdout, _, err := framework.RunCmd("gcloud", framework.AppendContainerCommandGroupIfNeeded(args)...) - if err != nil { - return nil, err - } - if len(strings.TrimSpace(stdout)) == 0 { - return []string{}, nil - } - return strings.Fields(stdout), nil -} - -func waitForNodesReadyAfterUpgrade(f *framework.Framework) error { - // Wait for it to complete and validate nodes are healthy. - // - // TODO(ihmccreery) We shouldn't have to wait for nodes to be ready in - // GKE; the operation shouldn't return until they all are. - numNodes, err := e2enode.TotalRegistered(f.ClientSet) - if err != nil { - return fmt.Errorf("couldn't detect number of nodes") - } - framework.Logf("Waiting up to %v for all %d nodes to be ready after the upgrade", framework.RestartNodeReadyAgainTimeout, numNodes) - if _, err := e2enode.CheckReady(f.ClientSet, numNodes, framework.RestartNodeReadyAgainTimeout); err != nil { - return err - } - return nil -} diff --git a/test/e2e/cloud/gcp/upgrade_mechanics.go b/test/e2e/cloud/gcp/upgrade_mechanics.go new file mode 100644 index 00000000000..14a57d04c83 --- /dev/null +++ b/test/e2e/cloud/gcp/upgrade_mechanics.go @@ -0,0 +1,269 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package gcp + +import ( + "fmt" + "os" + "os/exec" + "strings" + "time" + + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/apimachinery/pkg/version" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/kubernetes/test/e2e/framework" + e2enode "k8s.io/kubernetes/test/e2e/framework/node" + "k8s.io/kubernetes/test/e2e/upgrades" + "k8s.io/kubernetes/test/utils/junit" +) + +func masterUpgradeFunc(f *framework.Framework, upgCtx *upgrades.UpgradeContext, testCase *junit.TestCase, masterExtraEnvs []string) func() { + return func() { + start := time.Now() + defer finalizeUpgradeTest(start, testCase) + target := upgCtx.Versions[1].Version.String() + framework.ExpectNoError(masterUpgrade(f, target, masterExtraEnvs)) + framework.ExpectNoError(checkMasterVersion(f.ClientSet, target)) + } +} + +func clusterUpgradeFunc(f *framework.Framework, upgCtx *upgrades.UpgradeContext, testCase *junit.TestCase, masterExtraEnvs, nodeExtraEnvs []string) func() { + return func() { + start := time.Now() + defer finalizeUpgradeTest(start, testCase) + target := upgCtx.Versions[1].Version.String() + image := upgCtx.Versions[1].NodeImage + framework.ExpectNoError(masterUpgrade(f, target, masterExtraEnvs)) + framework.ExpectNoError(checkMasterVersion(f.ClientSet, target)) + framework.ExpectNoError(nodeUpgrade(f, target, image, nodeExtraEnvs)) + framework.ExpectNoError(checkNodesVersions(f.ClientSet, target)) + } +} + +func clusterDowngradeFunc(f *framework.Framework, upgCtx *upgrades.UpgradeContext, testCase *junit.TestCase, masterExtraEnvs, nodeExtraEnvs []string) func() { + return func() { + start := time.Now() + defer finalizeUpgradeTest(start, testCase) + target := upgCtx.Versions[1].Version.String() + image := upgCtx.Versions[1].NodeImage + // Yes this really is a downgrade. And nodes must downgrade first. + framework.ExpectNoError(nodeUpgrade(f, target, image, nodeExtraEnvs)) + framework.ExpectNoError(checkNodesVersions(f.ClientSet, target)) + framework.ExpectNoError(masterUpgrade(f, target, masterExtraEnvs)) + framework.ExpectNoError(checkMasterVersion(f.ClientSet, target)) + } +} + +// masterUpgrade upgrades master node on GCE/GKE. +func masterUpgrade(f *framework.Framework, v string, extraEnvs []string) error { + switch framework.TestContext.Provider { + case "gce": + return masterUpgradeGCE(v, extraEnvs) + case "gke": + return framework.MasterUpgradeGKE(f.Namespace.Name, v) + default: + return fmt.Errorf("masterUpgrade() is not implemented for provider %s", framework.TestContext.Provider) + } +} + +func masterUpgradeGCE(rawV string, extraEnvs []string) error { + env := append(os.Environ(), extraEnvs...) + // TODO: Remove these variables when they're no longer needed for downgrades. + if framework.TestContext.EtcdUpgradeVersion != "" && framework.TestContext.EtcdUpgradeStorage != "" { + env = append(env, + "TEST_ETCD_VERSION="+framework.TestContext.EtcdUpgradeVersion, + "STORAGE_BACKEND="+framework.TestContext.EtcdUpgradeStorage, + "TEST_ETCD_IMAGE="+etcdImage) + } else { + // In e2e tests, we skip the confirmation prompt about + // implicit etcd upgrades to simulate the user entering "y". + env = append(env, "TEST_ALLOW_IMPLICIT_ETCD_UPGRADE=true") + } + + v := "v" + rawV + _, _, err := framework.RunCmdEnv(env, framework.GCEUpgradeScript(), "-M", v) + return err +} + +func traceRouteToMaster() { + traceroute, err := exec.LookPath("traceroute") + if err != nil { + framework.Logf("Could not find traceroute program") + return + } + cmd := exec.Command(traceroute, "-I", framework.APIAddress()) + out, err := cmd.Output() + if len(out) != 0 { + framework.Logf(string(out)) + } + if exiterr, ok := err.(*exec.ExitError); err != nil && ok { + framework.Logf("Error while running traceroute: %s", exiterr.Stderr) + } +} + +// checkMasterVersion validates the master version +func checkMasterVersion(c clientset.Interface, want string) error { + framework.Logf("Checking master version") + var err error + var v *version.Info + waitErr := wait.PollImmediate(5*time.Second, 2*time.Minute, func() (bool, error) { + v, err = c.Discovery().ServerVersion() + if err != nil { + traceRouteToMaster() + return false, nil + } + return true, nil + }) + if waitErr != nil { + return fmt.Errorf("CheckMasterVersion() couldn't get the master version: %v", err) + } + // We do prefix trimming and then matching because: + // want looks like: 0.19.3-815-g50e67d4 + // got looks like: v0.19.3-815-g50e67d4034e858-dirty + got := strings.TrimPrefix(v.GitVersion, "v") + if !strings.HasPrefix(got, want) { + return fmt.Errorf("master had kube-apiserver version %s which does not start with %s", got, want) + } + framework.Logf("Master is at version %s", want) + return nil +} + +// nodeUpgrade upgrades nodes on GCE/GKE. +func nodeUpgrade(f *framework.Framework, v string, img string, extraEnvs []string) error { + // Perform the upgrade. + var err error + switch framework.TestContext.Provider { + case "gce": + err = nodeUpgradeGCE(v, img, extraEnvs) + case "gke": + err = nodeUpgradeGKE(f.Namespace.Name, v, img) + default: + err = fmt.Errorf("nodeUpgrade() is not implemented for provider %s", framework.TestContext.Provider) + } + if err != nil { + return err + } + return waitForNodesReadyAfterUpgrade(f) +} + +// TODO(mrhohn): Remove 'enableKubeProxyDaemonSet' when kube-proxy is run as a DaemonSet by default. +func nodeUpgradeGCE(rawV, img string, extraEnvs []string) error { + v := "v" + rawV + env := append(os.Environ(), extraEnvs...) + if img != "" { + env = append(env, "KUBE_NODE_OS_DISTRIBUTION="+img) + _, _, err := framework.RunCmdEnv(env, framework.GCEUpgradeScript(), "-N", "-o", v) + return err + } + _, _, err := framework.RunCmdEnv(env, framework.GCEUpgradeScript(), "-N", v) + return err +} + +func nodeUpgradeGKE(namespace string, v string, img string) error { + framework.Logf("Upgrading nodes to version %q and image %q", v, img) + nps, err := nodePoolsGKE() + if err != nil { + return err + } + framework.Logf("Found node pools %v", nps) + for _, np := range nps { + args := []string{ + "container", + "clusters", + fmt.Sprintf("--project=%s", framework.TestContext.CloudConfig.ProjectID), + framework.LocationParamGKE(), + "upgrade", + framework.TestContext.CloudConfig.Cluster, + fmt.Sprintf("--node-pool=%s", np), + fmt.Sprintf("--cluster-version=%s", v), + "--quiet", + } + if len(img) > 0 { + args = append(args, fmt.Sprintf("--image-type=%s", img)) + } + _, _, err = framework.RunCmd("gcloud", framework.AppendContainerCommandGroupIfNeeded(args)...) + + if err != nil { + return err + } + + framework.WaitForSSHTunnels(namespace) + } + return nil +} + +func nodePoolsGKE() ([]string, error) { + args := []string{ + "container", + "node-pools", + fmt.Sprintf("--project=%s", framework.TestContext.CloudConfig.ProjectID), + framework.LocationParamGKE(), + "list", + fmt.Sprintf("--cluster=%s", framework.TestContext.CloudConfig.Cluster), + "--format=get(name)", + } + stdout, _, err := framework.RunCmd("gcloud", framework.AppendContainerCommandGroupIfNeeded(args)...) + if err != nil { + return nil, err + } + if len(strings.TrimSpace(stdout)) == 0 { + return []string{}, nil + } + return strings.Fields(stdout), nil +} + +func waitForNodesReadyAfterUpgrade(f *framework.Framework) error { + // Wait for it to complete and validate nodes are healthy. + // + // TODO(ihmccreery) We shouldn't have to wait for nodes to be ready in + // GKE; the operation shouldn't return until they all are. + numNodes, err := e2enode.TotalRegistered(f.ClientSet) + if err != nil { + return fmt.Errorf("couldn't detect number of nodes") + } + framework.Logf("Waiting up to %v for all %d nodes to be ready after the upgrade", framework.RestartNodeReadyAgainTimeout, numNodes) + if _, err := e2enode.CheckReady(f.ClientSet, numNodes, framework.RestartNodeReadyAgainTimeout); err != nil { + return err + } + return nil +} + +// checkNodesVersions validates the nodes versions +func checkNodesVersions(cs clientset.Interface, want string) error { + l, err := e2enode.GetReadySchedulableNodes(cs) + if err != nil { + return err + } + for _, n := range l.Items { + // We do prefix trimming and then matching because: + // want looks like: 0.19.3-815-g50e67d4 + // kv/kvp look like: v0.19.3-815-g50e67d4034e858-dirty + kv, kpv := strings.TrimPrefix(n.Status.NodeInfo.KubeletVersion, "v"), + strings.TrimPrefix(n.Status.NodeInfo.KubeProxyVersion, "v") + if !strings.HasPrefix(kv, want) { + return fmt.Errorf("node %s had kubelet version %s which does not start with %s", + n.ObjectMeta.Name, kv, want) + } + if !strings.HasPrefix(kpv, want) { + return fmt.Errorf("node %s had kube-proxy version %s which does not start with %s", + n.ObjectMeta.Name, kpv, want) + } + } + return nil +} +