mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-30 15:05:27 +00:00
Merge pull request #26360 from jlowdermilk/skip-audit
Automatic merge from submit-queue Fix some gce-only tests to run on gke as well Enable "Services should work after restarting apiserver [Disruptive]" and DaemonRestart tests, except the 2 that require master ssh access. Move restart/upgrade related test helpers into their own file in framework package.
This commit is contained in:
commit
5762ebfc63
@ -17,14 +17,9 @@ limitations under the License.
|
||||
package e2e
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
client "k8s.io/kubernetes/pkg/client/unversioned"
|
||||
@ -47,7 +42,7 @@ var _ = framework.KubeDescribe("Upgrade [Feature:Upgrade]", func() {
|
||||
cm := chaosmonkey.New(func() {
|
||||
v, err := realVersion(framework.TestContext.UpgradeTarget)
|
||||
framework.ExpectNoError(err)
|
||||
framework.ExpectNoError(masterUpgrade(v))
|
||||
framework.ExpectNoError(framework.MasterUpgrade(v))
|
||||
framework.ExpectNoError(checkMasterVersion(f.Client, v))
|
||||
})
|
||||
cm.Register(func(sem *chaosmonkey.Semaphore) {
|
||||
@ -63,7 +58,7 @@ var _ = framework.KubeDescribe("Upgrade [Feature:Upgrade]", func() {
|
||||
cm := chaosmonkey.New(func() {
|
||||
v, err := realVersion(framework.TestContext.UpgradeTarget)
|
||||
framework.ExpectNoError(err)
|
||||
framework.ExpectNoError(nodeUpgrade(f, v))
|
||||
framework.ExpectNoError(framework.NodeUpgrade(f, v))
|
||||
framework.ExpectNoError(checkNodesVersions(f.Client, v))
|
||||
})
|
||||
cm.Register(func(sem *chaosmonkey.Semaphore) {
|
||||
@ -77,7 +72,7 @@ var _ = framework.KubeDescribe("Upgrade [Feature:Upgrade]", func() {
|
||||
cm := chaosmonkey.New(func() {
|
||||
v, err := realVersion(framework.TestContext.UpgradeTarget)
|
||||
framework.ExpectNoError(err)
|
||||
framework.ExpectNoError(nodeUpgrade(f, v))
|
||||
framework.ExpectNoError(framework.NodeUpgrade(f, v))
|
||||
framework.ExpectNoError(checkNodesVersions(f.Client, v))
|
||||
})
|
||||
cm.Register(func(sem *chaosmonkey.Semaphore) {
|
||||
@ -93,9 +88,9 @@ var _ = framework.KubeDescribe("Upgrade [Feature:Upgrade]", func() {
|
||||
cm := chaosmonkey.New(func() {
|
||||
v, err := realVersion(framework.TestContext.UpgradeTarget)
|
||||
framework.ExpectNoError(err)
|
||||
framework.ExpectNoError(masterUpgrade(v))
|
||||
framework.ExpectNoError(framework.MasterUpgrade(v))
|
||||
framework.ExpectNoError(checkMasterVersion(f.Client, v))
|
||||
framework.ExpectNoError(nodeUpgrade(f, v))
|
||||
framework.ExpectNoError(framework.NodeUpgrade(f, v))
|
||||
framework.ExpectNoError(checkNodesVersions(f.Client, v))
|
||||
})
|
||||
cm.Register(func(sem *chaosmonkey.Semaphore) {
|
||||
@ -109,9 +104,9 @@ var _ = framework.KubeDescribe("Upgrade [Feature:Upgrade]", func() {
|
||||
cm := chaosmonkey.New(func() {
|
||||
v, err := realVersion(framework.TestContext.UpgradeTarget)
|
||||
framework.ExpectNoError(err)
|
||||
framework.ExpectNoError(masterUpgrade(v))
|
||||
framework.ExpectNoError(framework.MasterUpgrade(v))
|
||||
framework.ExpectNoError(checkMasterVersion(f.Client, v))
|
||||
framework.ExpectNoError(nodeUpgrade(f, v))
|
||||
framework.ExpectNoError(framework.NodeUpgrade(f, v))
|
||||
framework.ExpectNoError(checkNodesVersions(f.Client, v))
|
||||
})
|
||||
cm.Register(func(sem *chaosmonkey.Semaphore) {
|
||||
@ -127,7 +122,7 @@ var _ = framework.KubeDescribe("Upgrade [Feature:Upgrade]", func() {
|
||||
// GKE. See hack/get-build.sh for more information.
|
||||
func realVersion(s string) (string, error) {
|
||||
framework.Logf(fmt.Sprintf("Getting real version for %q", s))
|
||||
v, _, err := runCmd(path.Join(framework.TestContext.RepoRoot, "hack/get-build.sh"), "-v", s)
|
||||
v, _, err := framework.RunCmd(path.Join(framework.TestContext.RepoRoot, "hack/get-build.sh"), "-v", s)
|
||||
if err != nil {
|
||||
return v, err
|
||||
}
|
||||
@ -135,128 +130,6 @@ func realVersion(s string) (string, error) {
|
||||
return strings.TrimPrefix(strings.TrimSpace(v), "v"), nil
|
||||
}
|
||||
|
||||
// The following upgrade functions are passed into the framework below and used
|
||||
// to do the actual upgrades.
|
||||
var masterUpgrade = func(v string) error {
|
||||
switch framework.TestContext.Provider {
|
||||
case "gce":
|
||||
return masterUpgradeGCE(v)
|
||||
case "gke":
|
||||
return masterUpgradeGKE(v)
|
||||
default:
|
||||
return fmt.Errorf("masterUpgrade() is not implemented for provider %s", framework.TestContext.Provider)
|
||||
}
|
||||
}
|
||||
|
||||
func masterUpgradeGCE(rawV string) error {
|
||||
v := "v" + rawV
|
||||
_, _, err := runCmd(path.Join(framework.TestContext.RepoRoot, "cluster/gce/upgrade.sh"), "-M", v)
|
||||
return err
|
||||
}
|
||||
|
||||
func masterUpgradeGKE(v string) error {
|
||||
framework.Logf("Upgrading master to %q", v)
|
||||
_, _, err := runCmd("gcloud", "container",
|
||||
fmt.Sprintf("--project=%s", framework.TestContext.CloudConfig.ProjectID),
|
||||
fmt.Sprintf("--zone=%s", framework.TestContext.CloudConfig.Zone),
|
||||
"clusters",
|
||||
"upgrade",
|
||||
framework.TestContext.CloudConfig.Cluster,
|
||||
"--master",
|
||||
fmt.Sprintf("--cluster-version=%s", v),
|
||||
"--quiet")
|
||||
return err
|
||||
}
|
||||
|
||||
var nodeUpgrade = func(f *framework.Framework, v string) error {
|
||||
// Perform the upgrade.
|
||||
var err error
|
||||
switch framework.TestContext.Provider {
|
||||
case "gce":
|
||||
err = nodeUpgradeGCE(v)
|
||||
case "gke":
|
||||
err = nodeUpgradeGKE(v)
|
||||
default:
|
||||
err = fmt.Errorf("nodeUpgrade() is not implemented for provider %s", framework.TestContext.Provider)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Wait for it to complete and validate nodes are healthy.
|
||||
//
|
||||
// TODO(ihmccreery) We shouldn't have to wait for nodes to be ready in
|
||||
// GKE; the operation shouldn't return until they all are.
|
||||
framework.Logf("Waiting up to %v for all nodes to be ready after the upgrade", restartNodeReadyAgainTimeout)
|
||||
if _, err := checkNodesReady(f.Client, restartNodeReadyAgainTimeout, framework.TestContext.CloudConfig.NumNodes); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func nodeUpgradeGCE(rawV string) error {
|
||||
// TODO(ihmccreery) This code path should be identical to how a user
|
||||
// would trigger a node update; right now it's very different.
|
||||
v := "v" + rawV
|
||||
|
||||
framework.Logf("Getting the node template before the upgrade")
|
||||
tmplBefore, err := migTemplate()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting the node template before the upgrade: %v", err)
|
||||
}
|
||||
|
||||
framework.Logf("Preparing node upgrade by creating new instance template for %q", v)
|
||||
stdout, _, err := runCmd(path.Join(framework.TestContext.RepoRoot, "cluster/gce/upgrade.sh"), "-P", v)
|
||||
if err != nil {
|
||||
cleanupNodeUpgradeGCE(tmplBefore)
|
||||
return fmt.Errorf("error preparing node upgrade: %v", err)
|
||||
}
|
||||
tmpl := strings.TrimSpace(stdout)
|
||||
|
||||
framework.Logf("Performing a node upgrade to %q; waiting at most %v per node", tmpl, restartPerNodeTimeout)
|
||||
if err := migRollingUpdate(tmpl, restartPerNodeTimeout); err != nil {
|
||||
cleanupNodeUpgradeGCE(tmplBefore)
|
||||
return fmt.Errorf("error doing node upgrade via a migRollingUpdate to %s: %v", tmpl, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func cleanupNodeUpgradeGCE(tmplBefore string) {
|
||||
framework.Logf("Cleaning up any unused node templates")
|
||||
tmplAfter, err := migTemplate()
|
||||
if err != nil {
|
||||
framework.Logf("Could not get node template post-upgrade; may have leaked template %s", tmplBefore)
|
||||
return
|
||||
}
|
||||
if tmplBefore == tmplAfter {
|
||||
// The node upgrade failed so there's no need to delete
|
||||
// anything.
|
||||
framework.Logf("Node template %s is still in use; not cleaning up", tmplBefore)
|
||||
return
|
||||
}
|
||||
framework.Logf("Deleting node template %s", tmplBefore)
|
||||
if _, _, err := retryCmd("gcloud", "compute", "instance-templates",
|
||||
fmt.Sprintf("--project=%s", framework.TestContext.CloudConfig.ProjectID),
|
||||
"delete",
|
||||
tmplBefore); err != nil {
|
||||
framework.Logf("gcloud compute instance-templates delete %s call failed with err: %v", tmplBefore, err)
|
||||
framework.Logf("May have leaked instance template %q", tmplBefore)
|
||||
}
|
||||
}
|
||||
|
||||
func nodeUpgradeGKE(v string) error {
|
||||
framework.Logf("Upgrading nodes to %q", v)
|
||||
_, _, err := runCmd("gcloud", "container",
|
||||
fmt.Sprintf("--project=%s", framework.TestContext.CloudConfig.ProjectID),
|
||||
fmt.Sprintf("--zone=%s", framework.TestContext.CloudConfig.Zone),
|
||||
"clusters",
|
||||
"upgrade",
|
||||
framework.TestContext.CloudConfig.Cluster,
|
||||
fmt.Sprintf("--cluster-version=%s", v),
|
||||
"--quiet")
|
||||
return err
|
||||
}
|
||||
|
||||
func testServiceUpBeforeAndAfter(f *framework.Framework, sem *chaosmonkey.Semaphore) {
|
||||
testService(f, sem, false)
|
||||
}
|
||||
@ -355,193 +228,3 @@ func checkNodesVersions(c *client.Client, want string) error {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// retryCmd runs cmd using args and retries it for up to framework.SingleCallTimeout if
|
||||
// it returns an error. It returns stdout and stderr.
|
||||
func retryCmd(command string, args ...string) (string, string, error) {
|
||||
var err error
|
||||
stdout, stderr := "", ""
|
||||
wait.Poll(framework.Poll, framework.SingleCallTimeout, func() (bool, error) {
|
||||
stdout, stderr, err = runCmd(command, args...)
|
||||
if err != nil {
|
||||
framework.Logf("Got %v", err)
|
||||
return false, nil
|
||||
}
|
||||
return true, nil
|
||||
})
|
||||
return stdout, stderr, err
|
||||
}
|
||||
|
||||
// runCmd runs cmd using args and returns its stdout and stderr. It also outputs
|
||||
// cmd's stdout and stderr to their respective OS streams.
|
||||
//
|
||||
// TODO(ihmccreery) This function should either be moved into util.go or
|
||||
// removed; other e2e's use bare exe.Command.
|
||||
func runCmd(command string, args ...string) (string, string, error) {
|
||||
framework.Logf("Running %s %v", command, args)
|
||||
var bout, berr bytes.Buffer
|
||||
cmd := exec.Command(command, args...)
|
||||
// We also output to the OS stdout/stderr to aid in debugging in case cmd
|
||||
// hangs and never returns before the test gets killed.
|
||||
//
|
||||
// This creates some ugly output because gcloud doesn't always provide
|
||||
// newlines.
|
||||
cmd.Stdout = io.MultiWriter(os.Stdout, &bout)
|
||||
cmd.Stderr = io.MultiWriter(os.Stderr, &berr)
|
||||
err := cmd.Run()
|
||||
stdout, stderr := bout.String(), berr.String()
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("error running %s %v; got error %v, stdout %q, stderr %q",
|
||||
command, args, err, stdout, stderr)
|
||||
}
|
||||
return stdout, stderr, nil
|
||||
}
|
||||
|
||||
// migRollingUpdate starts a MIG rolling update, upgrading the nodes to a new
|
||||
// instance template named tmpl, and waits up to nt times the number of nodes
|
||||
// for it to complete.
|
||||
func migRollingUpdate(tmpl string, nt time.Duration) error {
|
||||
framework.Logf(fmt.Sprintf("starting the MIG rolling update to %s", tmpl))
|
||||
id, err := migRollingUpdateStart(tmpl, nt)
|
||||
if err != nil {
|
||||
return fmt.Errorf("couldn't start the MIG rolling update: %v", err)
|
||||
}
|
||||
|
||||
framework.Logf(fmt.Sprintf("polling the MIG rolling update (%s) until it completes", id))
|
||||
if err := migRollingUpdatePoll(id, nt); err != nil {
|
||||
return fmt.Errorf("err waiting until update completed: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// migTemplate (GCE-only) returns the name of the MIG template that the
|
||||
// nodes of the cluster use.
|
||||
func migTemplate() (string, error) {
|
||||
var errLast error
|
||||
var templ string
|
||||
key := "instanceTemplate"
|
||||
if wait.Poll(framework.Poll, framework.SingleCallTimeout, func() (bool, error) {
|
||||
// TODO(mikedanese): make this hit the compute API directly instead of
|
||||
// shelling out to gcloud.
|
||||
// An `instance-groups managed describe` call outputs what we want to stdout.
|
||||
output, _, err := retryCmd("gcloud", "compute", "instance-groups", "managed",
|
||||
fmt.Sprintf("--project=%s", framework.TestContext.CloudConfig.ProjectID),
|
||||
"describe",
|
||||
fmt.Sprintf("--zone=%s", framework.TestContext.CloudConfig.Zone),
|
||||
framework.TestContext.CloudConfig.NodeInstanceGroup)
|
||||
if err != nil {
|
||||
errLast = fmt.Errorf("gcloud compute instance-groups managed describe call failed with err: %v", err)
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// The 'describe' call probably succeeded; parse the output and try to
|
||||
// find the line that looks like "instanceTemplate: url/to/<templ>" and
|
||||
// return <templ>.
|
||||
if val := framework.ParseKVLines(output, key); len(val) > 0 {
|
||||
url := strings.Split(val, "/")
|
||||
templ = url[len(url)-1]
|
||||
framework.Logf("MIG group %s using template: %s", framework.TestContext.CloudConfig.NodeInstanceGroup, templ)
|
||||
return true, nil
|
||||
}
|
||||
errLast = fmt.Errorf("couldn't find %s in output to get MIG template. Output: %s", key, output)
|
||||
return false, nil
|
||||
}) != nil {
|
||||
return "", fmt.Errorf("migTemplate() failed with last error: %v", errLast)
|
||||
}
|
||||
return templ, nil
|
||||
}
|
||||
|
||||
// migRollingUpdateStart (GCE/GKE-only) starts a MIG rolling update using templ
|
||||
// as the new template, waiting up to nt per node, and returns the ID of that
|
||||
// update.
|
||||
func migRollingUpdateStart(templ string, nt time.Duration) (string, error) {
|
||||
var errLast error
|
||||
var id string
|
||||
prefix, suffix := "Started [", "]."
|
||||
if err := wait.Poll(framework.Poll, framework.SingleCallTimeout, func() (bool, error) {
|
||||
// TODO(mikedanese): make this hit the compute API directly instead of
|
||||
// shelling out to gcloud.
|
||||
// NOTE(mikedanese): If you are changing this gcloud command, update
|
||||
// cluster/gce/upgrade.sh to match this EXACTLY.
|
||||
// A `rolling-updates start` call outputs what we want to stderr.
|
||||
_, output, err := retryCmd("gcloud", "alpha", "compute",
|
||||
"rolling-updates",
|
||||
fmt.Sprintf("--project=%s", framework.TestContext.CloudConfig.ProjectID),
|
||||
fmt.Sprintf("--zone=%s", framework.TestContext.CloudConfig.Zone),
|
||||
"start",
|
||||
// Required args.
|
||||
fmt.Sprintf("--group=%s", framework.TestContext.CloudConfig.NodeInstanceGroup),
|
||||
fmt.Sprintf("--template=%s", templ),
|
||||
// Optional args to fine-tune behavior.
|
||||
fmt.Sprintf("--instance-startup-timeout=%ds", int(nt.Seconds())),
|
||||
// NOTE: We can speed up this process by increasing
|
||||
// --max-num-concurrent-instances.
|
||||
fmt.Sprintf("--max-num-concurrent-instances=%d", 1),
|
||||
fmt.Sprintf("--max-num-failed-instances=%d", 0),
|
||||
fmt.Sprintf("--min-instance-update-time=%ds", 0))
|
||||
if err != nil {
|
||||
errLast = fmt.Errorf("rolling-updates call failed with err: %v", err)
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// The 'start' call probably succeeded; parse the output and try to find
|
||||
// the line that looks like "Started [url/to/<id>]." and return <id>.
|
||||
for _, line := range strings.Split(output, "\n") {
|
||||
// As a sanity check, ensure the line starts with prefix and ends
|
||||
// with suffix.
|
||||
if strings.Index(line, prefix) != 0 || strings.Index(line, suffix) != len(line)-len(suffix) {
|
||||
continue
|
||||
}
|
||||
url := strings.Split(strings.TrimSuffix(strings.TrimPrefix(line, prefix), suffix), "/")
|
||||
id = url[len(url)-1]
|
||||
framework.Logf("Started MIG rolling update; ID: %s", id)
|
||||
return true, nil
|
||||
}
|
||||
errLast = fmt.Errorf("couldn't find line like '%s ... %s' in output to MIG rolling-update start. Output: %s",
|
||||
prefix, suffix, output)
|
||||
return false, nil
|
||||
}); err != nil {
|
||||
return "", fmt.Errorf("migRollingUpdateStart() failed with last error: %v", errLast)
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
|
||||
// migRollingUpdatePoll (CKE/GKE-only) polls the progress of the MIG rolling
|
||||
// update with ID id until it is complete. It returns an error if this takes
|
||||
// longer than nt times the number of nodes.
|
||||
func migRollingUpdatePoll(id string, nt time.Duration) error {
|
||||
// Two keys and a val.
|
||||
status, progress, done := "status", "statusMessage", "ROLLED_OUT"
|
||||
start, timeout := time.Now(), nt*time.Duration(framework.TestContext.CloudConfig.NumNodes)
|
||||
var errLast error
|
||||
framework.Logf("Waiting up to %v for MIG rolling update to complete.", timeout)
|
||||
if wait.Poll(restartPoll, timeout, func() (bool, error) {
|
||||
// A `rolling-updates describe` call outputs what we want to stdout.
|
||||
output, _, err := retryCmd("gcloud", "alpha", "compute",
|
||||
"rolling-updates",
|
||||
fmt.Sprintf("--project=%s", framework.TestContext.CloudConfig.ProjectID),
|
||||
fmt.Sprintf("--zone=%s", framework.TestContext.CloudConfig.Zone),
|
||||
"describe",
|
||||
id)
|
||||
if err != nil {
|
||||
errLast = fmt.Errorf("Error calling rolling-updates describe %s: %v", id, err)
|
||||
framework.Logf("%v", errLast)
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// The 'describe' call probably succeeded; parse the output and try to
|
||||
// find the line that looks like "status: <status>" and see whether it's
|
||||
// done.
|
||||
framework.Logf("Waiting for MIG rolling update: %s (%v elapsed)",
|
||||
framework.ParseKVLines(output, progress), time.Since(start))
|
||||
if st := framework.ParseKVLines(output, status); st == done {
|
||||
return true, nil
|
||||
}
|
||||
return false, nil
|
||||
}) != nil {
|
||||
return fmt.Errorf("timeout waiting %v for MIG rolling update to complete. Last error: %v", timeout, errLast)
|
||||
}
|
||||
framework.Logf("MIG rolling update complete after %v", time.Since(start))
|
||||
return nil
|
||||
}
|
||||
|
@ -199,8 +199,7 @@ var _ = framework.KubeDescribe("DaemonRestart [Disruptive]", func() {
|
||||
|
||||
BeforeEach(func() {
|
||||
// These tests require SSH
|
||||
// TODO(11834): Enable this test in GKE once experimental API there is switched on
|
||||
framework.SkipUnlessProviderIs("gce", "aws")
|
||||
framework.SkipUnlessProviderIs(framework.ProvidersWithSSH...)
|
||||
ns = f.Namespace.Name
|
||||
|
||||
// All the restart tests need an rc and a watch on pods of the rc.
|
||||
@ -252,6 +251,8 @@ var _ = framework.KubeDescribe("DaemonRestart [Disruptive]", func() {
|
||||
|
||||
It("Controller Manager should not create/delete replicas across restart", func() {
|
||||
|
||||
// Requires master ssh access.
|
||||
framework.SkipUnlessProviderIs("gce", "aws")
|
||||
restarter := NewRestartConfig(
|
||||
framework.GetMasterHost(), "kube-controller", ports.ControllerManagerPort, restartPollInterval, restartTimeout)
|
||||
restarter.restart()
|
||||
@ -281,6 +282,8 @@ var _ = framework.KubeDescribe("DaemonRestart [Disruptive]", func() {
|
||||
|
||||
It("Scheduler should continue assigning pods to nodes across restart", func() {
|
||||
|
||||
// Requires master ssh access.
|
||||
framework.SkipUnlessProviderIs("gce", "aws")
|
||||
restarter := NewRestartConfig(
|
||||
framework.GetMasterHost(), "kube-scheduler", ports.SchedulerPort, restartPollInterval, restartTimeout)
|
||||
|
||||
|
358
test/e2e/framework/nodes_util.go
Normal file
358
test/e2e/framework/nodes_util.go
Normal file
@ -0,0 +1,358 @@
|
||||
/*
|
||||
Copyright 2014 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package framework
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
client "k8s.io/kubernetes/pkg/client/unversioned"
|
||||
"k8s.io/kubernetes/pkg/fields"
|
||||
"k8s.io/kubernetes/pkg/util/wait"
|
||||
)
|
||||
|
||||
// The following upgrade functions are passed into the framework below and used
|
||||
// to do the actual upgrades.
|
||||
var MasterUpgrade = func(v string) error {
|
||||
switch TestContext.Provider {
|
||||
case "gce":
|
||||
return masterUpgradeGCE(v)
|
||||
case "gke":
|
||||
return masterUpgradeGKE(v)
|
||||
default:
|
||||
return fmt.Errorf("MasterUpgrade() is not implemented for provider %s", TestContext.Provider)
|
||||
}
|
||||
}
|
||||
|
||||
func masterUpgradeGCE(rawV string) error {
|
||||
v := "v" + rawV
|
||||
_, _, err := RunCmd(path.Join(TestContext.RepoRoot, "cluster/gce/upgrade.sh"), "-M", v)
|
||||
return err
|
||||
}
|
||||
|
||||
func masterUpgradeGKE(v string) error {
|
||||
Logf("Upgrading master to %q", v)
|
||||
_, _, err := RunCmd("gcloud", "container",
|
||||
fmt.Sprintf("--project=%s", TestContext.CloudConfig.ProjectID),
|
||||
fmt.Sprintf("--zone=%s", TestContext.CloudConfig.Zone),
|
||||
"clusters",
|
||||
"upgrade",
|
||||
TestContext.CloudConfig.Cluster,
|
||||
"--master",
|
||||
fmt.Sprintf("--cluster-version=%s", v),
|
||||
"--quiet")
|
||||
return err
|
||||
}
|
||||
|
||||
var NodeUpgrade = func(f *Framework, v string) error {
|
||||
// Perform the upgrade.
|
||||
var err error
|
||||
switch TestContext.Provider {
|
||||
case "gce":
|
||||
err = nodeUpgradeGCE(v)
|
||||
case "gke":
|
||||
err = nodeUpgradeGKE(v)
|
||||
default:
|
||||
err = fmt.Errorf("NodeUpgrade() is not implemented for provider %s", TestContext.Provider)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Wait for it to complete and validate nodes are healthy.
|
||||
//
|
||||
// TODO(ihmccreery) We shouldn't have to wait for nodes to be ready in
|
||||
// GKE; the operation shouldn't return until they all are.
|
||||
Logf("Waiting up to %v for all nodes to be ready after the upgrade", RestartNodeReadyAgainTimeout)
|
||||
if _, err := CheckNodesReady(f.Client, RestartNodeReadyAgainTimeout, TestContext.CloudConfig.NumNodes); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func nodeUpgradeGCE(rawV string) error {
|
||||
// TODO(ihmccreery) This code path should be identical to how a user
|
||||
// would trigger a node update; right now it's very different.
|
||||
v := "v" + rawV
|
||||
|
||||
Logf("Getting the node template before the upgrade")
|
||||
tmplBefore, err := MigTemplate()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting the node template before the upgrade: %v", err)
|
||||
}
|
||||
|
||||
Logf("Preparing node upgrade by creating new instance template for %q", v)
|
||||
stdout, _, err := RunCmd(path.Join(TestContext.RepoRoot, "cluster/gce/upgrade.sh"), "-P", v)
|
||||
if err != nil {
|
||||
cleanupNodeUpgradeGCE(tmplBefore)
|
||||
return fmt.Errorf("error preparing node upgrade: %v", err)
|
||||
}
|
||||
tmpl := strings.TrimSpace(stdout)
|
||||
|
||||
Logf("Performing a node upgrade to %q; waiting at most %v per node", tmpl, RestartPerNodeTimeout)
|
||||
if err := MigRollingUpdate(tmpl, RestartPerNodeTimeout); err != nil {
|
||||
cleanupNodeUpgradeGCE(tmplBefore)
|
||||
return fmt.Errorf("error doing node upgrade via a MigRollingUpdate to %s: %v", tmpl, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// MigRollingUpdate starts a MIG rolling update, upgrading the nodes to a new
|
||||
// instance template named tmpl, and waits up to nt times the number of nodes
|
||||
// for it to complete.
|
||||
func MigRollingUpdate(tmpl string, nt time.Duration) error {
|
||||
Logf(fmt.Sprintf("starting the MIG rolling update to %s", tmpl))
|
||||
id, err := migRollingUpdateStart(tmpl, nt)
|
||||
if err != nil {
|
||||
return fmt.Errorf("couldn't start the MIG rolling update: %v", err)
|
||||
}
|
||||
|
||||
Logf(fmt.Sprintf("polling the MIG rolling update (%s) until it completes", id))
|
||||
if err := migRollingUpdatePoll(id, nt); err != nil {
|
||||
return fmt.Errorf("err waiting until update completed: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// migRollingUpdateStart (GCE/GKE-only) starts a MIG rolling update using templ
|
||||
// as the new template, waiting up to nt per node, and returns the ID of that
|
||||
// update.
|
||||
func migRollingUpdateStart(templ string, nt time.Duration) (string, error) {
|
||||
var errLast error
|
||||
var id string
|
||||
prefix, suffix := "Started [", "]."
|
||||
if err := wait.Poll(Poll, SingleCallTimeout, func() (bool, error) {
|
||||
// TODO(mikedanese): make this hit the compute API directly instead of
|
||||
// shelling out to gcloud.
|
||||
// NOTE(mikedanese): If you are changing this gcloud command, update
|
||||
// cluster/gce/upgrade.sh to match this EXACTLY.
|
||||
// A `rolling-updates start` call outputs what we want to stderr.
|
||||
_, output, err := retryCmd("gcloud", "alpha", "compute",
|
||||
"rolling-updates",
|
||||
fmt.Sprintf("--project=%s", TestContext.CloudConfig.ProjectID),
|
||||
fmt.Sprintf("--zone=%s", TestContext.CloudConfig.Zone),
|
||||
"start",
|
||||
// Required args.
|
||||
fmt.Sprintf("--group=%s", TestContext.CloudConfig.NodeInstanceGroup),
|
||||
fmt.Sprintf("--template=%s", templ),
|
||||
// Optional args to fine-tune behavior.
|
||||
fmt.Sprintf("--instance-startup-timeout=%ds", int(nt.Seconds())),
|
||||
// NOTE: We can speed up this process by increasing
|
||||
// --max-num-concurrent-instances.
|
||||
fmt.Sprintf("--max-num-concurrent-instances=%d", 1),
|
||||
fmt.Sprintf("--max-num-failed-instances=%d", 0),
|
||||
fmt.Sprintf("--min-instance-update-time=%ds", 0))
|
||||
if err != nil {
|
||||
errLast = fmt.Errorf("rolling-updates call failed with err: %v", err)
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// The 'start' call probably succeeded; parse the output and try to find
|
||||
// the line that looks like "Started [url/to/<id>]." and return <id>.
|
||||
for _, line := range strings.Split(output, "\n") {
|
||||
// As a sanity check, ensure the line starts with prefix and ends
|
||||
// with suffix.
|
||||
if strings.Index(line, prefix) != 0 || strings.Index(line, suffix) != len(line)-len(suffix) {
|
||||
continue
|
||||
}
|
||||
url := strings.Split(strings.TrimSuffix(strings.TrimPrefix(line, prefix), suffix), "/")
|
||||
id = url[len(url)-1]
|
||||
Logf("Started MIG rolling update; ID: %s", id)
|
||||
return true, nil
|
||||
}
|
||||
errLast = fmt.Errorf("couldn't find line like '%s ... %s' in output to MIG rolling-update start. Output: %s",
|
||||
prefix, suffix, output)
|
||||
return false, nil
|
||||
}); err != nil {
|
||||
return "", fmt.Errorf("migRollingUpdateStart() failed with last error: %v", errLast)
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
|
||||
// migRollingUpdatePoll (CKE/GKE-only) polls the progress of the MIG rolling
|
||||
// update with ID id until it is complete. It returns an error if this takes
|
||||
// longer than nt times the number of nodes.
|
||||
func migRollingUpdatePoll(id string, nt time.Duration) error {
|
||||
// Two keys and a val.
|
||||
status, progress, done := "status", "statusMessage", "ROLLED_OUT"
|
||||
start, timeout := time.Now(), nt*time.Duration(TestContext.CloudConfig.NumNodes)
|
||||
var errLast error
|
||||
Logf("Waiting up to %v for MIG rolling update to complete.", timeout)
|
||||
if wait.Poll(RestartPoll, timeout, func() (bool, error) {
|
||||
// A `rolling-updates describe` call outputs what we want to stdout.
|
||||
output, _, err := retryCmd("gcloud", "alpha", "compute",
|
||||
"rolling-updates",
|
||||
fmt.Sprintf("--project=%s", TestContext.CloudConfig.ProjectID),
|
||||
fmt.Sprintf("--zone=%s", TestContext.CloudConfig.Zone),
|
||||
"describe",
|
||||
id)
|
||||
if err != nil {
|
||||
errLast = fmt.Errorf("Error calling rolling-updates describe %s: %v", id, err)
|
||||
Logf("%v", errLast)
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// The 'describe' call probably succeeded; parse the output and try to
|
||||
// find the line that looks like "status: <status>" and see whether it's
|
||||
// done.
|
||||
Logf("Waiting for MIG rolling update: %s (%v elapsed)",
|
||||
ParseKVLines(output, progress), time.Since(start))
|
||||
if st := ParseKVLines(output, status); st == done {
|
||||
return true, nil
|
||||
}
|
||||
return false, nil
|
||||
}) != nil {
|
||||
return fmt.Errorf("timeout waiting %v for MIG rolling update to complete. Last error: %v", timeout, errLast)
|
||||
}
|
||||
Logf("MIG rolling update complete after %v", time.Since(start))
|
||||
return nil
|
||||
}
|
||||
|
||||
func cleanupNodeUpgradeGCE(tmplBefore string) {
|
||||
Logf("Cleaning up any unused node templates")
|
||||
tmplAfter, err := MigTemplate()
|
||||
if err != nil {
|
||||
Logf("Could not get node template post-upgrade; may have leaked template %s", tmplBefore)
|
||||
return
|
||||
}
|
||||
if tmplBefore == tmplAfter {
|
||||
// The node upgrade failed so there's no need to delete
|
||||
// anything.
|
||||
Logf("Node template %s is still in use; not cleaning up", tmplBefore)
|
||||
return
|
||||
}
|
||||
Logf("Deleting node template %s", tmplBefore)
|
||||
if _, _, err := retryCmd("gcloud", "compute", "instance-templates",
|
||||
fmt.Sprintf("--project=%s", TestContext.CloudConfig.ProjectID),
|
||||
"delete",
|
||||
tmplBefore); err != nil {
|
||||
Logf("gcloud compute instance-templates delete %s call failed with err: %v", tmplBefore, err)
|
||||
Logf("May have leaked instance template %q", tmplBefore)
|
||||
}
|
||||
}
|
||||
|
||||
func nodeUpgradeGKE(v string) error {
|
||||
Logf("Upgrading nodes to %q", v)
|
||||
_, _, err := RunCmd("gcloud", "container",
|
||||
fmt.Sprintf("--project=%s", TestContext.CloudConfig.ProjectID),
|
||||
fmt.Sprintf("--zone=%s", TestContext.CloudConfig.Zone),
|
||||
"clusters",
|
||||
"upgrade",
|
||||
TestContext.CloudConfig.Cluster,
|
||||
fmt.Sprintf("--cluster-version=%s", v),
|
||||
"--quiet")
|
||||
return err
|
||||
}
|
||||
|
||||
// CheckNodesReady waits up to nt for expect nodes accessed by c to be ready,
|
||||
// returning an error if this doesn't happen in time. It returns the names of
|
||||
// nodes it finds.
|
||||
func CheckNodesReady(c *client.Client, nt time.Duration, expect int) ([]string, error) {
|
||||
// First, keep getting all of the nodes until we get the number we expect.
|
||||
var nodeList *api.NodeList
|
||||
var errLast error
|
||||
start := time.Now()
|
||||
found := wait.Poll(Poll, nt, func() (bool, error) {
|
||||
// A rolling-update (GCE/GKE implementation of restart) can complete before the apiserver
|
||||
// knows about all of the nodes. Thus, we retry the list nodes call
|
||||
// until we get the expected number of nodes.
|
||||
nodeList, errLast = c.Nodes().List(api.ListOptions{
|
||||
FieldSelector: fields.Set{"spec.unschedulable": "false"}.AsSelector()})
|
||||
if errLast != nil {
|
||||
return false, nil
|
||||
}
|
||||
if len(nodeList.Items) != expect {
|
||||
errLast = fmt.Errorf("expected to find %d nodes but found only %d (%v elapsed)",
|
||||
expect, len(nodeList.Items), time.Since(start))
|
||||
Logf("%v", errLast)
|
||||
return false, nil
|
||||
}
|
||||
return true, nil
|
||||
}) == nil
|
||||
nodeNames := make([]string, len(nodeList.Items))
|
||||
for i, n := range nodeList.Items {
|
||||
nodeNames[i] = n.ObjectMeta.Name
|
||||
}
|
||||
if !found {
|
||||
return nodeNames, fmt.Errorf("couldn't find %d nodes within %v; last error: %v",
|
||||
expect, nt, errLast)
|
||||
}
|
||||
Logf("Successfully found %d nodes", expect)
|
||||
|
||||
// Next, ensure in parallel that all the nodes are ready. We subtract the
|
||||
// time we spent waiting above.
|
||||
timeout := nt - time.Since(start)
|
||||
result := make(chan bool, len(nodeList.Items))
|
||||
for _, n := range nodeNames {
|
||||
n := n
|
||||
go func() { result <- WaitForNodeToBeReady(c, n, timeout) }()
|
||||
}
|
||||
failed := false
|
||||
// TODO(mbforbes): Change to `for range` syntax once we support only Go
|
||||
// >= 1.4.
|
||||
for i := range nodeList.Items {
|
||||
_ = i
|
||||
if !<-result {
|
||||
failed = true
|
||||
}
|
||||
}
|
||||
if failed {
|
||||
return nodeNames, fmt.Errorf("at least one node failed to be ready")
|
||||
}
|
||||
return nodeNames, nil
|
||||
}
|
||||
|
||||
// MigTemplate (GCE-only) returns the name of the MIG template that the
|
||||
// nodes of the cluster use.
|
||||
func MigTemplate() (string, error) {
|
||||
var errLast error
|
||||
var templ string
|
||||
key := "instanceTemplate"
|
||||
if wait.Poll(Poll, SingleCallTimeout, func() (bool, error) {
|
||||
// TODO(mikedanese): make this hit the compute API directly instead of
|
||||
// shelling out to gcloud.
|
||||
// An `instance-groups managed describe` call outputs what we want to stdout.
|
||||
output, _, err := retryCmd("gcloud", "compute", "instance-groups", "managed",
|
||||
fmt.Sprintf("--project=%s", TestContext.CloudConfig.ProjectID),
|
||||
"describe",
|
||||
fmt.Sprintf("--zone=%s", TestContext.CloudConfig.Zone),
|
||||
TestContext.CloudConfig.NodeInstanceGroup)
|
||||
if err != nil {
|
||||
errLast = fmt.Errorf("gcloud compute instance-groups managed describe call failed with err: %v", err)
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// The 'describe' call probably succeeded; parse the output and try to
|
||||
// find the line that looks like "instanceTemplate: url/to/<templ>" and
|
||||
// return <templ>.
|
||||
if val := ParseKVLines(output, key); len(val) > 0 {
|
||||
url := strings.Split(val, "/")
|
||||
templ = url[len(url)-1]
|
||||
Logf("MIG group %s using template: %s", TestContext.CloudConfig.NodeInstanceGroup, templ)
|
||||
return true, nil
|
||||
}
|
||||
errLast = fmt.Errorf("couldn't find %s in output to get MIG template. Output: %s", key, output)
|
||||
return false, nil
|
||||
}) != nil {
|
||||
return "", fmt.Errorf("MigTemplate() failed with last error: %v", errLast)
|
||||
}
|
||||
return templ, nil
|
||||
}
|
@ -134,6 +134,22 @@ const (
|
||||
// When these values are updated, also update cmd/kubelet/app/options/options.go
|
||||
currentPodInfraContainerImageName = "gcr.io/google_containers/pause"
|
||||
currentPodInfraContainerImageVersion = "3.0"
|
||||
|
||||
// How long each node is given during a process that restarts all nodes
|
||||
// before the test is considered failed. (Note that the total time to
|
||||
// restart all nodes will be this number times the number of nodes.)
|
||||
RestartPerNodeTimeout = 5 * time.Minute
|
||||
|
||||
// How often to Poll the statues of a restart.
|
||||
RestartPoll = 20 * time.Second
|
||||
|
||||
// How long a node is allowed to become "Ready" after it is restarted before
|
||||
// the test is considered failed.
|
||||
RestartNodeReadyAgainTimeout = 5 * time.Minute
|
||||
|
||||
// How long a pod is allowed to become "running" and "ready" after a node
|
||||
// restart before test is considered failed.
|
||||
RestartPodReadyAgainTimeout = 5 * time.Minute
|
||||
)
|
||||
|
||||
// Label allocated to the image puller static pod that runs on each node
|
||||
@ -3517,13 +3533,29 @@ func RestartKubeProxy(host string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func RestartApiserver() error {
|
||||
func RestartApiserver(c *client.Client) error {
|
||||
// TODO: Make it work for all providers.
|
||||
if !ProviderIs("gce", "gke", "aws") {
|
||||
return fmt.Errorf("unsupported provider: %s", TestContext.Provider)
|
||||
}
|
||||
if ProviderIs("gce", "aws") {
|
||||
return sshRestartMaster()
|
||||
}
|
||||
// GKE doesn't allow ssh accesss, so use a same-version master
|
||||
// upgrade to teardown/recreate master.
|
||||
v, err := c.ServerVersion()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return masterUpgradeGKE(v.GitVersion[1:]) // strip leading 'v'
|
||||
}
|
||||
|
||||
func sshRestartMaster() error {
|
||||
if !ProviderIs("gce", "aws") {
|
||||
return fmt.Errorf("unsupported provider: %s", TestContext.Provider)
|
||||
}
|
||||
var command string
|
||||
if ProviderIs("gce", "gke") {
|
||||
if ProviderIs("gce") {
|
||||
command = "sudo docker ps | grep /kube-apiserver | cut -d ' ' -f 1 | xargs sudo docker kill"
|
||||
} else {
|
||||
command = "sudo /etc/init.d/kube-apiserver restart"
|
||||
@ -4139,3 +4171,41 @@ func GetPodsInNamespace(c *client.Client, ns string, ignoreLabels map[string]str
|
||||
}
|
||||
return filtered, nil
|
||||
}
|
||||
|
||||
// RunCmd runs cmd using args and returns its stdout and stderr. It also outputs
|
||||
// cmd's stdout and stderr to their respective OS streams.
|
||||
func RunCmd(command string, args ...string) (string, string, error) {
|
||||
Logf("Running %s %v", command, args)
|
||||
var bout, berr bytes.Buffer
|
||||
cmd := exec.Command(command, args...)
|
||||
// We also output to the OS stdout/stderr to aid in debugging in case cmd
|
||||
// hangs and never returns before the test gets killed.
|
||||
//
|
||||
// This creates some ugly output because gcloud doesn't always provide
|
||||
// newlines.
|
||||
cmd.Stdout = io.MultiWriter(os.Stdout, &bout)
|
||||
cmd.Stderr = io.MultiWriter(os.Stderr, &berr)
|
||||
err := cmd.Run()
|
||||
stdout, stderr := bout.String(), berr.String()
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("error running %s %v; got error %v, stdout %q, stderr %q",
|
||||
command, args, err, stdout, stderr)
|
||||
}
|
||||
return stdout, stderr, nil
|
||||
}
|
||||
|
||||
// retryCmd runs cmd using args and retries it for up to SingleCallTimeout if
|
||||
// it returns an error. It returns stdout and stderr.
|
||||
func retryCmd(command string, args ...string) (string, string, error) {
|
||||
var err error
|
||||
stdout, stderr := "", ""
|
||||
wait.Poll(Poll, SingleCallTimeout, func() (bool, error) {
|
||||
stdout, stderr, err = RunCmd(command, args...)
|
||||
if err != nil {
|
||||
Logf("Got %v", err)
|
||||
return false, nil
|
||||
}
|
||||
return true, nil
|
||||
})
|
||||
return stdout, stderr, err
|
||||
}
|
||||
|
@ -21,7 +21,6 @@ import (
|
||||
"time"
|
||||
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
client "k8s.io/kubernetes/pkg/client/unversioned"
|
||||
"k8s.io/kubernetes/pkg/fields"
|
||||
"k8s.io/kubernetes/pkg/labels"
|
||||
"k8s.io/kubernetes/pkg/util/wait"
|
||||
@ -31,24 +30,6 @@ import (
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
const (
|
||||
// How long each node is given during a process that restarts all nodes
|
||||
// before the test is considered failed. (Note that the total time to
|
||||
// restart all nodes will be this number times the number of nodes.)
|
||||
restartPerNodeTimeout = 5 * time.Minute
|
||||
|
||||
// How often to framework.Poll the statues of a restart.
|
||||
restartPoll = 20 * time.Second
|
||||
|
||||
// How long a node is allowed to become "Ready" after it is restarted before
|
||||
// the test is considered failed.
|
||||
restartNodeReadyAgainTimeout = 5 * time.Minute
|
||||
|
||||
// How long a pod is allowed to become "running" and "ready" after a node
|
||||
// restart before test is considered failed.
|
||||
restartPodReadyAgainTimeout = 5 * time.Minute
|
||||
)
|
||||
|
||||
var _ = framework.KubeDescribe("Restart [Disruptive]", func() {
|
||||
f := framework.NewDefaultFramework("restart")
|
||||
var ps *framework.PodStore
|
||||
@ -71,7 +52,7 @@ var _ = framework.KubeDescribe("Restart [Disruptive]", func() {
|
||||
nn := framework.TestContext.CloudConfig.NumNodes
|
||||
|
||||
By("ensuring all nodes are ready")
|
||||
nodeNamesBefore, err := checkNodesReady(f.Client, framework.NodeReadyInitialTimeout, nn)
|
||||
nodeNamesBefore, err := framework.CheckNodesReady(f.Client, framework.NodeReadyInitialTimeout, nn)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
framework.Logf("Got the following nodes before restart: %v", nodeNamesBefore)
|
||||
|
||||
@ -87,11 +68,11 @@ var _ = framework.KubeDescribe("Restart [Disruptive]", func() {
|
||||
}
|
||||
|
||||
By("restarting all of the nodes")
|
||||
err = restartNodes(framework.TestContext.Provider, restartPerNodeTimeout)
|
||||
err = restartNodes(framework.TestContext.Provider, framework.RestartPerNodeTimeout)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
By("ensuring all nodes are ready after the restart")
|
||||
nodeNamesAfter, err := checkNodesReady(f.Client, restartNodeReadyAgainTimeout, nn)
|
||||
nodeNamesAfter, err := framework.CheckNodesReady(f.Client, framework.RestartNodeReadyAgainTimeout, nn)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
framework.Logf("Got the following nodes after restart: %v", nodeNamesAfter)
|
||||
|
||||
@ -108,10 +89,10 @@ var _ = framework.KubeDescribe("Restart [Disruptive]", func() {
|
||||
// across node restarts.
|
||||
By("ensuring the same number of pods are running and ready after restart")
|
||||
podCheckStart := time.Now()
|
||||
podNamesAfter, err := waitForNPods(ps, len(podNamesBefore), restartPodReadyAgainTimeout)
|
||||
podNamesAfter, err := waitForNPods(ps, len(podNamesBefore), framework.RestartPodReadyAgainTimeout)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
remaining := restartPodReadyAgainTimeout - time.Since(podCheckStart)
|
||||
if !framework.CheckPodsRunningReadyOrSucceeded(f.Client, ns, podNamesAfter, remaining) {
|
||||
remaining := framework.RestartPodReadyAgainTimeout - time.Since(podCheckStart)
|
||||
if !framework.CheckPodsRunningReady(f.Client, ns, podNamesAfter, remaining) {
|
||||
framework.Failf("At least one pod wasn't running and ready after the restart.")
|
||||
}
|
||||
})
|
||||
@ -144,64 +125,6 @@ func waitForNPods(ps *framework.PodStore, expect int, timeout time.Duration) ([]
|
||||
return podNames, nil
|
||||
}
|
||||
|
||||
// checkNodesReady waits up to nt for expect nodes accessed by c to be ready,
|
||||
// returning an error if this doesn't happen in time. It returns the names of
|
||||
// nodes it finds.
|
||||
func checkNodesReady(c *client.Client, nt time.Duration, expect int) ([]string, error) {
|
||||
// First, keep getting all of the nodes until we get the number we expect.
|
||||
var nodeList *api.NodeList
|
||||
var errLast error
|
||||
start := time.Now()
|
||||
found := wait.Poll(framework.Poll, nt, func() (bool, error) {
|
||||
// A rolling-update (GCE/GKE implementation of restart) can complete before the apiserver
|
||||
// knows about all of the nodes. Thus, we retry the list nodes call
|
||||
// until we get the expected number of nodes.
|
||||
nodeList, errLast = c.Nodes().List(api.ListOptions{
|
||||
FieldSelector: fields.Set{"spec.unschedulable": "false"}.AsSelector()})
|
||||
if errLast != nil {
|
||||
return false, nil
|
||||
}
|
||||
if len(nodeList.Items) != expect {
|
||||
errLast = fmt.Errorf("expected to find %d nodes but found only %d (%v elapsed)",
|
||||
expect, len(nodeList.Items), time.Since(start))
|
||||
framework.Logf("%v", errLast)
|
||||
return false, nil
|
||||
}
|
||||
return true, nil
|
||||
}) == nil
|
||||
nodeNames := make([]string, len(nodeList.Items))
|
||||
for i, n := range nodeList.Items {
|
||||
nodeNames[i] = n.ObjectMeta.Name
|
||||
}
|
||||
if !found {
|
||||
return nodeNames, fmt.Errorf("couldn't find %d nodes within %v; last error: %v",
|
||||
expect, nt, errLast)
|
||||
}
|
||||
framework.Logf("Successfully found %d nodes", expect)
|
||||
|
||||
// Next, ensure in parallel that all the nodes are ready. We subtract the
|
||||
// time we spent waiting above.
|
||||
timeout := nt - time.Since(start)
|
||||
result := make(chan bool, len(nodeList.Items))
|
||||
for _, n := range nodeNames {
|
||||
n := n
|
||||
go func() { result <- framework.WaitForNodeToBeReady(c, n, timeout) }()
|
||||
}
|
||||
failed := false
|
||||
// TODO(mbforbes): Change to `for range` syntax once we support only Go
|
||||
// >= 1.4.
|
||||
for i := range nodeList.Items {
|
||||
_ = i
|
||||
if !<-result {
|
||||
failed = true
|
||||
}
|
||||
}
|
||||
if failed {
|
||||
return nodeNames, fmt.Errorf("at least one node failed to be ready")
|
||||
}
|
||||
return nodeNames, nil
|
||||
}
|
||||
|
||||
// restartNodes uses provider to do a restart of all nodes in the cluster,
|
||||
// allowing up to nt per node.
|
||||
func restartNodes(provider string, nt time.Duration) error {
|
||||
@ -235,9 +158,9 @@ func restartNodes(provider string, nt time.Duration) error {
|
||||
// done
|
||||
func migRollingUpdateSelf(nt time.Duration) error {
|
||||
By("getting the name of the template for the managed instance group")
|
||||
tmpl, err := migTemplate()
|
||||
tmpl, err := framework.MigTemplate()
|
||||
if err != nil {
|
||||
return fmt.Errorf("couldn't get MIG template name: %v", err)
|
||||
}
|
||||
return migRollingUpdate(tmpl, nt)
|
||||
return framework.MigRollingUpdate(tmpl, nt)
|
||||
}
|
||||
|
@ -331,8 +331,7 @@ var _ = framework.KubeDescribe("Services", func() {
|
||||
|
||||
It("should work after restarting apiserver [Disruptive]", func() {
|
||||
// TODO: use the ServiceTestJig here
|
||||
// TODO: framework.RestartApiserver doesn't work in GKE - fix it and reenable this test.
|
||||
framework.SkipUnlessProviderIs("gce")
|
||||
framework.SkipUnlessProviderIs("gce", "gke")
|
||||
|
||||
ns := f.Namespace.Name
|
||||
numPods, servicePort := 3, 80
|
||||
@ -351,7 +350,7 @@ var _ = framework.KubeDescribe("Services", func() {
|
||||
framework.ExpectNoError(verifyServeHostnameServiceUp(c, ns, host, podNames1, svc1IP, servicePort))
|
||||
|
||||
// Restart apiserver
|
||||
if err := framework.RestartApiserver(); err != nil {
|
||||
if err := framework.RestartApiserver(c); err != nil {
|
||||
framework.Failf("error restarting apiserver: %v", err)
|
||||
}
|
||||
if err := framework.WaitForApiserverUp(c); err != nil {
|
||||
|
Loading…
Reference in New Issue
Block a user