From 4e5e1b8ac5bd08a1c2c0ef0d0765cfc57790e1b6 Mon Sep 17 00:00:00 2001 From: Isaac Hollander McCreery Date: Wed, 6 Apr 2016 15:12:31 -0700 Subject: [PATCH] Create chaosmonkey package, use ServiceTestJig for upgrade tests --- test/e2e/chaosmonkey/chaosmonkey.go | 149 ++++++++ test/e2e/chaosmonkey/chaosmonkey_test.go | 53 +++ test/e2e/cluster_upgrade.go | 432 +++++++++-------------- test/e2e/service.go | 2 + 4 files changed, 362 insertions(+), 274 deletions(-) create mode 100644 test/e2e/chaosmonkey/chaosmonkey.go create mode 100644 test/e2e/chaosmonkey/chaosmonkey_test.go diff --git a/test/e2e/chaosmonkey/chaosmonkey.go b/test/e2e/chaosmonkey/chaosmonkey.go new file mode 100644 index 00000000000..fc7ab287ab2 --- /dev/null +++ b/test/e2e/chaosmonkey/chaosmonkey.go @@ -0,0 +1,149 @@ +/* +Copyright 2016 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package chaosmonkey + +import . "github.com/onsi/ginkgo" + +// Disruption is the type to construct a chaosmonkey with; see Do for more information. +type Disruption func() + +// Test is the type to register with a chaosmonkey. A test will run asynchronously across the +// chaosmonkey's Disruption. A Test takes a Semaphore as an argument. It should call sem.Ready() +// once it's ready for the disruption to start and should then wait until sem.StopCh (which is a +// <-chan struct{}) is closed, which signals that the disruption is over. It should then clean up +// and return. See Do and Semaphore for more information. +type Test func(sem *Semaphore) + +// Interface can be implemented if you prefer to define tests without dealing with a Semaphore. You +// may define a struct that implements Interface's three methods (Setup, Test, and Teardown) and +// RegisterInterface. See RegisterInterface for more information. +type Interface interface { + Setup() + Test(stopCh <-chan struct{}) + Teardown() +} + +type chaosmonkey struct { + disruption Disruption + tests []Test +} + +// New creates and returns a chaosmonkey, with which the caller should register Tests and call Do. +// See Do for more information. +func New(disruption Disruption) *chaosmonkey { + return &chaosmonkey{ + disruption, + []Test{}, + } +} + +// Register registers the given Test with the chaosmonkey, so that the test will run over the +// Disruption. +func (cm *chaosmonkey) Register(test Test) { + cm.tests = append(cm.tests, test) +} + +// RegisterInterface registers the given Interface with the chaosmonkey, so the chaosmonkey will +// call Setup, Test, and Teardown properly. Test can tell that the Disruption is finished when +// stopCh is closed. +func (cm *chaosmonkey) RegisterInterface(in Interface) { + cm.Register(func(sem *Semaphore) { + in.Setup() + sem.Ready() + in.Test(sem.StopCh) + in.Teardown() + }) +} + +// Do performs the Disruption while testing the registered Tests. Once the caller has registered +// all Tests with the chaosmonkey, they call Do. Do starts each registered test asynchronously and +// waits for each test to signal that it is ready by calling sem.Ready(). Do will then do the +// Disruption, and when it's complete, close sem.StopCh to signal to the registered Tests that the +// Disruption is over, and wait for all Tests to return. +func (cm *chaosmonkey) Do() { + sems := []*Semaphore{} + // All semaphores have the same StopCh. + stopCh := make(chan struct{}) + + for _, test := range cm.tests { + sem := newSemaphore(stopCh) + sems = append(sems, sem) + go func() { + defer GinkgoRecover() + defer sem.done() + test(sem) + }() + } + + By("Waiting for all async tests to be ready") + for _, sem := range sems { + sem.waitForReadyOrDone() + } + + By("Starting disruption") + cm.disruption() + By("Disruption complete; stopping async validations") + close(stopCh) + By("Waiting for async validations to complete") + for _, sem := range sems { + sem.waitForDone() + } +} + +// Semaphore is taken by a Test and provides: Ready(), for the Test to call when it's ready for the +// disruption to start; and StopCh, the closure of which signals to the Test that the disruption is +// finished. +type Semaphore struct { + readyCh chan struct{} + StopCh <-chan struct{} + doneCh chan struct{} +} + +func newSemaphore(stopCh <-chan struct{}) *Semaphore { + // We don't want to block on Ready() or done() + return &Semaphore { + make(chan struct{}, 1), + stopCh, + make(chan struct{}, 1), + } +} + +// Ready is called by the Test to signal that the Test is ready for the disruption to start. +func (sem *Semaphore) Ready() { + close(sem.readyCh) +} + +// done is an internal method for Go to defer, both to wait for all tests to return, but also to +// sense if a test panicked before calling Ready. See waitForReadyOrDone. +func (sem *Semaphore) done() { + close(sem.doneCh) +} + +// We would like to just check if all tests are ready, but if they fail (which Ginkgo implements as +// a panic), they may not have called Ready(). We check done as well to see if the function has +// already returned; if it has, we don't care if it's ready, and just continue. +func (sem *Semaphore) waitForReadyOrDone() { + select { + case <-sem.readyCh: + case <-sem.doneCh: + } +} + +// waitForDone is an internal method for Go to wait on all Tests returning. +func (sem *Semaphore) waitForDone() { + <-sem.doneCh +} diff --git a/test/e2e/chaosmonkey/chaosmonkey_test.go b/test/e2e/chaosmonkey/chaosmonkey_test.go new file mode 100644 index 00000000000..70c7f70b16c --- /dev/null +++ b/test/e2e/chaosmonkey/chaosmonkey_test.go @@ -0,0 +1,53 @@ +/* +Copyright 2016 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package chaosmonkey + +import ( + "sync/atomic" + "testing" +) + +func TestDoWithPanic(t *testing.T) { + var counter int64 = 0 + cm := New(func() {}) + tests := []Test{ + // No panic + func(sem *Semaphore) { + defer atomic.AddInt64(&counter, 1) + sem.Ready() + }, + // Panic after sem.Ready() + func(sem *Semaphore) { + defer atomic.AddInt64(&counter, 1) + sem.Ready() + panic("Panic after calling sem.Ready()") + }, + // Panic before sem.Ready() + func(sem *Semaphore) { + defer atomic.AddInt64(&counter, 1) + panic("Panic before calling sem.Ready()") + }, + } + for _, test := range tests { + cm.Register(test) + } + cm.Do() + // Check that all funcs in tests were called. + if int(counter) != len(tests) { + t.Errorf("Expected counter to be %v, but it was %v", len(tests), counter) + } +} diff --git a/test/e2e/cluster_upgrade.go b/test/e2e/cluster_upgrade.go index e88e5038d2a..23f6e890b58 100644 --- a/test/e2e/cluster_upgrade.go +++ b/test/e2e/cluster_upgrade.go @@ -1,5 +1,5 @@ /* -Copyright 2015 The Kubernetes Authors All rights reserved. +Copyright 2016 The Kubernetes Authors All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -20,30 +20,118 @@ import ( "bytes" "fmt" "io" - "net/http" "os" "os/exec" "path" "strings" - "sync" "time" "k8s.io/kubernetes/pkg/api" client "k8s.io/kubernetes/pkg/client/unversioned" "k8s.io/kubernetes/pkg/util/wait" + "k8s.io/kubernetes/test/e2e/chaosmonkey" "k8s.io/kubernetes/test/e2e/framework" . "github.com/onsi/ginkgo" - . "github.com/onsi/gomega" ) +// TODO(mikedanese): Add setup, validate, and teardown for: +// - secrets +// - volumes +// - persistent volumes +var _ = framework.KubeDescribe("Upgrade [Feature:Upgrade]", func() { + f := framework.NewDefaultFramework("cluster-upgrade") + + framework.KubeDescribe("master upgrade", func() { + It("should maintain responsive services [Feature:MasterUpgrade]", func() { + cm := chaosmonkey.New(func() { + v, err := realVersion(framework.TestContext.UpgradeTarget) + framework.ExpectNoError(err) + framework.ExpectNoError(masterUpgrade(v)) + framework.ExpectNoError(checkMasterVersion(f.Client, v)) + }) + cm.Register(func(sem *chaosmonkey.Semaphore) { + // Close over f. + testServiceRemainsUp(f, sem) + }) + cm.Do() + }) + }) + + framework.KubeDescribe("node upgrade", func() { + It("should maintain a functioning cluster [Feature:NodeUpgrade]", func() { + cm := chaosmonkey.New(func() { + v, err := realVersion(framework.TestContext.UpgradeTarget) + framework.ExpectNoError(err) + framework.ExpectNoError(nodeUpgrade(f, v)) + framework.ExpectNoError(checkNodesVersions(f.Client, v)) + }) + cm.Register(func(sem *chaosmonkey.Semaphore) { + // Close over f. + testServiceUpBeforeAndAfter(f, sem) + }) + cm.Do() + }) + + It("should maintain responsive services [Feature:ExperimentalNodeUpgrade]", func() { + cm := chaosmonkey.New(func() { + v, err := realVersion(framework.TestContext.UpgradeTarget) + framework.ExpectNoError(err) + framework.ExpectNoError(nodeUpgrade(f, v)) + framework.ExpectNoError(checkNodesVersions(f.Client, v)) + }) + cm.Register(func(sem *chaosmonkey.Semaphore) { + // Close over f. + testServiceRemainsUp(f, sem) + }) + cm.Do() + }) + }) + + framework.KubeDescribe("cluster upgrade", func() { + It("should maintain a functioning cluster [Feature:ClusterUpgrade]", func() { + cm := chaosmonkey.New(func() { + v, err := realVersion(framework.TestContext.UpgradeTarget) + framework.ExpectNoError(err) + framework.ExpectNoError(masterUpgrade(v)) + framework.ExpectNoError(checkMasterVersion(f.Client, v)) + framework.ExpectNoError(nodeUpgrade(f, v)) + framework.ExpectNoError(checkNodesVersions(f.Client, v)) + }) + cm.Register(func(sem *chaosmonkey.Semaphore) { + // Close over f. + testServiceUpBeforeAndAfter(f, sem) + }) + cm.Do() + }) + + It("should maintain responsive services [Feature:ExperimentalClusterUpgrade]", func() { + cm := chaosmonkey.New(func() { + v, err := realVersion(framework.TestContext.UpgradeTarget) + framework.ExpectNoError(err) + framework.ExpectNoError(masterUpgrade(v)) + framework.ExpectNoError(checkMasterVersion(f.Client, v)) + framework.ExpectNoError(nodeUpgrade(f, v)) + framework.ExpectNoError(checkNodesVersions(f.Client, v)) + }) + cm.Register(func(sem *chaosmonkey.Semaphore) { + // Close over f. + testServiceRemainsUp(f, sem) + }) + cm.Do() + }) + }) +}) + // realVersion turns a version constant s into a version string deployable on // GKE. See hack/get-build.sh for more information. func realVersion(s string) (string, error) { + framework.Logf(fmt.Sprintf("Getting real version for %q", s)) v, _, err := runCmd(path.Join(framework.TestContext.RepoRoot, "hack/get-build.sh"), "-v", s) if err != nil { return v, err } + framework.Logf("Version for %q is %q", s, v) return strings.TrimPrefix(strings.TrimSpace(v), "v"), nil } @@ -80,7 +168,7 @@ func masterUpgradeGKE(v string) error { return err } -var nodeUpgrade = func(f *framework.Framework, replicas int32, v string) error { +var nodeUpgrade = func(f *framework.Framework, v string) error { // Perform the upgrade. var err error switch framework.TestContext.Provider { @@ -95,7 +183,7 @@ var nodeUpgrade = func(f *framework.Framework, replicas int32, v string) error { return err } - // Wait for it to complete and validate nodes and pods are healthy. + // Wait for it to complete and validate nodes are healthy. // // TODO(ihmccreery) We shouldn't have to wait for nodes to be ready in // GKE; the operation shouldn't return until they all are. @@ -103,8 +191,7 @@ var nodeUpgrade = func(f *framework.Framework, replicas int32, v string) error { if _, err := checkNodesReady(f.Client, restartNodeReadyAgainTimeout, framework.TestContext.CloudConfig.NumNodes); err != nil { return err } - framework.Logf("Waiting up to %v for all pods to be running and ready after the upgrade", restartPodReadyAgainTimeout) - return framework.WaitForPodsRunningReady(f.Namespace.Name, replicas, restartPodReadyAgainTimeout) + return nil } func nodeUpgradeGCE(rawV string) error { @@ -170,191 +257,77 @@ func nodeUpgradeGKE(v string) error { return err } -var _ = framework.KubeDescribe("Upgrade [Feature:Upgrade]", func() { +func testServiceUpBeforeAndAfter(f *framework.Framework, sem *chaosmonkey.Semaphore) { + testService(f, sem, false) +} - svcName, replicas := "baz", int32(2) - var rcName, ip, v string - var ingress api.LoadBalancerIngress +func testServiceRemainsUp(f *framework.Framework, sem *chaosmonkey.Semaphore) { + testService(f, sem, true) +} - BeforeEach(func() { - // The version is determined once at the beginning of the test so that - // the master and nodes won't be skewed if the value changes during the - // test. - By(fmt.Sprintf("Getting real version for %q", framework.TestContext.UpgradeTarget)) - var err error - v, err = realVersion(framework.TestContext.UpgradeTarget) - framework.ExpectNoError(err) - framework.Logf("Version for %q is %q", framework.TestContext.UpgradeTarget, v) +// testService is a helper for testServiceUpBeforeAndAfter and testServiceRemainsUp with a flag for testDuringUpgrade +// +// TODO(ihmccreery) remove this abstraction once testServiceUpBeforeAndAfter is no longer needed, because node upgrades +// maintain a responsive service. +func testService(f *framework.Framework, sem *chaosmonkey.Semaphore, testDuringUpgrade bool) { + // Setup + serviceName := "service-test" + + jig := NewServiceTestJig(f.Client, serviceName) + // nodeIP := pickNodeIP(jig.Client) // for later + + By("creating a TCP service " + serviceName + " with type=LoadBalancer in namespace " + f.Namespace.Name) + // TODO it's weird that we have to do this and then wait WaitForLoadBalancer which changes + // tcpService. + tcpService := jig.CreateTCPServiceOrFail(f.Namespace.Name, func(s *api.Service) { + s.Spec.Type = api.ServiceTypeLoadBalancer }) + tcpService = jig.WaitForLoadBalancerOrFail(f.Namespace.Name, tcpService.Name) + jig.SanityCheckService(tcpService, api.ServiceTypeLoadBalancer) - f := framework.NewDefaultFramework("cluster-upgrade") - var w *ServiceTestFixture - BeforeEach(func() { - By("Setting up the service, RC, and pods") - w = NewServerTest(f.Client, f.Namespace.Name, svcName) - rc := w.CreateWebserverRC(replicas) - rcName = rc.ObjectMeta.Name - svc := w.BuildServiceSpec() - svc.Spec.Type = api.ServiceTypeLoadBalancer - w.CreateService(svc) + // Get info to hit it with + tcpIngressIP := getIngressPoint(&tcpService.Status.LoadBalancer.Ingress[0]) + svcPort := tcpService.Spec.Ports[0].Port - By("Waiting for the service to become reachable") - result, err := waitForLoadBalancerIngress(f.Client, svcName, f.Namespace.Name) - Expect(err).NotTo(HaveOccurred()) - ingresses := result.Status.LoadBalancer.Ingress - if len(ingresses) != 1 { - framework.Failf("Was expecting only 1 ingress IP but got %d (%v): %v", len(ingresses), ingresses, result) - } - ingress = ingresses[0] - framework.Logf("Got load balancer ingress point %v", ingress) - ip = ingress.IP - if ip == "" { - ip = ingress.Hostname - } - testLoadBalancerReachable(ingress, 80) + By("creating pod to be part of service " + serviceName) + // TODO newRCTemplate only allows for the creation of one replica... that probably won't + // work so well. + jig.RunOrFail(f.Namespace.Name, nil) - // TODO(mikedanese): Add setup, validate, and teardown for: - // - secrets - // - volumes - // - persistent volumes - }) + // Hit it once before considering ourselves ready + By("hitting the pod through the service's LoadBalancer") + jig.TestReachableHTTP(tcpIngressIP, svcPort, loadBalancerLagTimeoutDefault) - AfterEach(func() { - w.Cleanup() - }) + sem.Ready() - framework.KubeDescribe("master upgrade", func() { - It("should maintain responsive services [Feature:MasterUpgrade]", func() { - By("Validating cluster before master upgrade") - framework.ExpectNoError(validate(f, svcName, rcName, ingress, replicas)) - By("Performing a master upgrade") - testUpgrade(ip, v, masterUpgrade) - By("Checking master version") - framework.ExpectNoError(checkMasterVersion(f.Client, v)) - By("Validating cluster after master upgrade") - framework.ExpectNoError(validate(f, svcName, rcName, ingress, replicas)) - }) - }) + if testDuringUpgrade { + // Continuous validation + wait.Until(func() { + By("hitting the pod through the service's LoadBalancer") + // TODO this is way too long of a timeout; make it shorter since we've already + // validated it's working. + jig.TestReachableHTTP(tcpIngressIP, svcPort, loadBalancerLagTimeoutDefault) + }, 3*time.Second, sem.StopCh) + } else { + // Block until chaosmonkey is done + By("waiting for upgrade to finish without checking if service remains up") + <-sem.StopCh + } - framework.KubeDescribe("node upgrade", func() { - It("should maintain a functioning cluster [Feature:NodeUpgrade]", func() { - By("Validating cluster before node upgrade") - framework.ExpectNoError(validate(f, svcName, rcName, ingress, replicas)) - By("Performing a node upgrade") - // Circumnavigate testUpgrade, since services don't necessarily stay up. - framework.Logf("Starting upgrade") - framework.ExpectNoError(nodeUpgrade(f, replicas, v)) - framework.Logf("Upgrade complete") - By("Checking node versions") - framework.ExpectNoError(checkNodesVersions(f.Client, v)) - By("Validating cluster after node upgrade") - framework.ExpectNoError(validate(f, svcName, rcName, ingress, replicas)) - }) + // TODO(ihmccreery) We maybe shouldn't have to wait for the pods to be running again. I + // pulled this over from the NodeUpgrade test, but I'm not sure what the need for it there + // was. + // + // Also 1 is a magic number from newRCTemplate. + framework.Logf("Waiting up to %v for all pods to be running and ready after the upgrade", restartPodReadyAgainTimeout) + framework.ExpectNoError(framework.WaitForPodsRunningReady(f.Namespace.Name, 1, restartPodReadyAgainTimeout)) - It("should maintain responsive services [Feature:ExperimentalNodeUpgrade]", func() { - By("Validating cluster before node upgrade") - framework.ExpectNoError(validate(f, svcName, rcName, ingress, replicas)) - By("Performing a node upgrade") - testUpgrade(ip, v, func(v string) error { - return nodeUpgrade(f, replicas, v) - }) - By("Checking node versions") - framework.ExpectNoError(checkNodesVersions(f.Client, v)) - By("Validating cluster after node upgrade") - framework.ExpectNoError(validate(f, svcName, rcName, ingress, replicas)) - }) - }) - - framework.KubeDescribe("cluster upgrade", func() { - It("should maintain responsive services [Feature:ClusterUpgrade]", func() { - By("Validating cluster before master upgrade") - framework.ExpectNoError(validate(f, svcName, rcName, ingress, replicas)) - By("Performing a master upgrade") - testUpgrade(ip, v, masterUpgrade) - By("Checking master version") - framework.ExpectNoError(checkMasterVersion(f.Client, v)) - By("Validating cluster after master upgrade") - framework.ExpectNoError(validate(f, svcName, rcName, ingress, replicas)) - - By("Validating cluster before node upgrade") - framework.ExpectNoError(validate(f, svcName, rcName, ingress, replicas)) - By("Performing a node upgrade") - // Circumnavigate testUpgrade, since services don't necessarily stay up. - framework.Logf("Starting upgrade") - framework.ExpectNoError(nodeUpgrade(f, replicas, v)) - framework.Logf("Upgrade complete") - By("Checking node versions") - framework.ExpectNoError(checkNodesVersions(f.Client, v)) - By("Validating cluster after node upgrade") - framework.ExpectNoError(validate(f, svcName, rcName, ingress, replicas)) - }) - - It("should maintain responsive services [Feature:ExperimentalClusterUpgrade]", func() { - By("Validating cluster before master upgrade") - framework.ExpectNoError(validate(f, svcName, rcName, ingress, replicas)) - By("Performing a master upgrade") - testUpgrade(ip, v, masterUpgrade) - By("Checking master version") - framework.ExpectNoError(checkMasterVersion(f.Client, v)) - By("Validating cluster after master upgrade") - framework.ExpectNoError(validate(f, svcName, rcName, ingress, replicas)) - - By("Validating cluster before node upgrade") - framework.ExpectNoError(validate(f, svcName, rcName, ingress, replicas)) - By("Performing a node upgrade") - testUpgrade(ip, v, func(v string) error { - return nodeUpgrade(f, replicas, v) - }) - By("Checking node versions") - framework.ExpectNoError(checkNodesVersions(f.Client, v)) - By("Validating cluster after node upgrade") - framework.ExpectNoError(validate(f, svcName, rcName, ingress, replicas)) - }) - }) -}) - -func testUpgrade(ip, v string, upF func(v string) error) { - framework.Logf("Starting async validation") - httpClient := http.Client{Timeout: 2 * time.Second} - done := make(chan struct{}, 1) - // Let's make sure we've finished the heartbeat before shutting things down. - var wg sync.WaitGroup - go wait.Until(func() { - defer GinkgoRecover() - wg.Add(1) - defer wg.Done() - - if err := wait.Poll(framework.Poll, framework.SingleCallTimeout, func() (bool, error) { - r, err := httpClient.Get("http://" + ip) - if err != nil { - framework.Logf("Error reaching %s: %v", ip, err) - return false, nil - } - if r.StatusCode < http.StatusOK || r.StatusCode >= http.StatusNotFound { - framework.Logf("Bad response; status: %d, response: %v", r.StatusCode, r) - return false, nil - } - return true, nil - }); err != nil { - // We log the error here because the test will fail at the very end - // because this validation runs in another goroutine. Without this, - // a failure is very confusing to track down because from the logs - // everything looks fine. - msg := fmt.Sprintf("Failed to contact service during upgrade: %v", err) - framework.Logf(msg) - framework.Failf(msg) - } - }, 200*time.Millisecond, done) - - framework.Logf("Starting upgrade") - framework.ExpectNoError(upF(v)) - done <- struct{}{} - framework.Logf("Stopping async validation") - wg.Wait() - framework.Logf("Upgrade complete") + // Validation + jig.SanityCheckService(tcpService, api.ServiceTypeLoadBalancer) } func checkMasterVersion(c *client.Client, want string) error { + framework.Logf("Checking master version") v, err := c.Discovery().ServerVersion() if err != nil { return fmt.Errorf("checkMasterVersion() couldn't get the master version: %v", err) @@ -418,6 +391,9 @@ func runCmd(command string, args ...string) (string, string, error) { cmd := exec.Command(command, args...) // We also output to the OS stdout/stderr to aid in debugging in case cmd // hangs and never returns before the test gets killed. + // + // This creates some ugly output because gcloud doesn't always provide + // newlines. cmd.Stdout = io.MultiWriter(os.Stdout, &bout) cmd.Stderr = io.MultiWriter(os.Stderr, &berr) err := cmd.Run() @@ -429,51 +405,17 @@ func runCmd(command string, args ...string) (string, string, error) { return stdout, stderr, nil } -func validate(f *framework.Framework, svcNameWant, rcNameWant string, ingress api.LoadBalancerIngress, podsWant int32) error { - framework.Logf("Beginning cluster validation") - // Verify RC. - rcs, err := f.Client.ReplicationControllers(f.Namespace.Name).List(api.ListOptions{}) - if err != nil { - return fmt.Errorf("error listing RCs: %v", err) - } - if len(rcs.Items) != 1 { - return fmt.Errorf("wanted 1 RC with name %s, got %d", rcNameWant, len(rcs.Items)) - } - if got := rcs.Items[0].Name; got != rcNameWant { - return fmt.Errorf("wanted RC name %q, got %q", rcNameWant, got) - } - - // Verify pods. - if err := framework.VerifyPods(f.Client, f.Namespace.Name, rcNameWant, false, podsWant); err != nil { - return fmt.Errorf("failed to find %d %q pods: %v", podsWant, rcNameWant, err) - } - - // Verify service. - svc, err := f.Client.Services(f.Namespace.Name).Get(svcNameWant) - if err != nil { - return fmt.Errorf("error getting service %s: %v", svcNameWant, err) - } - if svcNameWant != svc.Name { - return fmt.Errorf("wanted service name %q, got %q", svcNameWant, svc.Name) - } - // TODO(mikedanese): Make testLoadBalancerReachable return an error. - testLoadBalancerReachable(ingress, 80) - - framework.Logf("Cluster validation succeeded") - return nil -} - // migRollingUpdate starts a MIG rolling update, upgrading the nodes to a new // instance template named tmpl, and waits up to nt times the number of nodes // for it to complete. func migRollingUpdate(tmpl string, nt time.Duration) error { - By(fmt.Sprintf("starting the MIG rolling update to %s", tmpl)) + framework.Logf(fmt.Sprintf("starting the MIG rolling update to %s", tmpl)) id, err := migRollingUpdateStart(tmpl, nt) if err != nil { return fmt.Errorf("couldn't start the MIG rolling update: %v", err) } - By(fmt.Sprintf("polling the MIG rolling update (%s) until it completes", id)) + framework.Logf(fmt.Sprintf("polling the MIG rolling update (%s) until it completes", id)) if err := migRollingUpdatePoll(id, nt); err != nil { return fmt.Errorf("err waiting until update completed: %v", err) } @@ -611,61 +553,3 @@ func migRollingUpdatePoll(id string, nt time.Duration) error { framework.Logf("MIG rolling update complete after %v", time.Since(start)) return nil } - -func testLoadBalancerReachable(ingress api.LoadBalancerIngress, port int) bool { - loadBalancerLagTimeout := loadBalancerLagTimeoutDefault - if framework.ProviderIs("aws") { - loadBalancerLagTimeout = loadBalancerLagTimeoutAWS - } - return testLoadBalancerReachableInTime(ingress, port, loadBalancerLagTimeout) -} - -func testLoadBalancerReachableInTime(ingress api.LoadBalancerIngress, port int, timeout time.Duration) bool { - ip := ingress.IP - if ip == "" { - ip = ingress.Hostname - } - - return testReachableInTime(conditionFuncDecorator(ip, port, testReachableHTTP, "/", "test-webserver"), timeout) - -} - -func conditionFuncDecorator(ip string, port int, fn func(string, int, string, string) (bool, error), request string, expect string) wait.ConditionFunc { - return func() (bool, error) { - return fn(ip, port, request, expect) - } -} - -func testReachableInTime(testFunc wait.ConditionFunc, timeout time.Duration) bool { - By(fmt.Sprintf("Waiting up to %v", timeout)) - err := wait.PollImmediate(framework.Poll, timeout, testFunc) - if err != nil { - Expect(err).NotTo(HaveOccurred(), "Error waiting") - return false - } - return true -} - -func waitForLoadBalancerIngress(c *client.Client, serviceName, namespace string) (*api.Service, error) { - // TODO: once support ticket 21807001 is resolved, reduce this timeout - // back to something reasonable - const timeout = 20 * time.Minute - var service *api.Service - By(fmt.Sprintf("waiting up to %v for service %s in namespace %s to have a LoadBalancer ingress point", timeout, serviceName, namespace)) - i := 1 - for start := time.Now(); time.Since(start) < timeout; time.Sleep(3 * time.Second) { - service, err := c.Services(namespace).Get(serviceName) - if err != nil { - framework.Logf("Get service failed, ignoring for 5s: %v", err) - continue - } - if len(service.Status.LoadBalancer.Ingress) > 0 { - return service, nil - } - if i%5 == 0 { - framework.Logf("Waiting for service %s in namespace %s to have a LoadBalancer ingress point (%v)", serviceName, namespace, time.Since(start)) - } - i++ - } - return service, fmt.Errorf("service %s in namespace %s doesn't have a LoadBalancer ingress point after %.2f seconds", serviceName, namespace, timeout.Seconds()) -} diff --git a/test/e2e/service.go b/test/e2e/service.go index b7f9ac2c8b0..6254256e53d 100644 --- a/test/e2e/service.go +++ b/test/e2e/service.go @@ -481,6 +481,8 @@ var _ = framework.KubeDescribe("Services", func() { // Change the services to LoadBalancer. + // Here we test that LoadBalancers can receive static IP addresses. This isn't + // necessary, but is an additional feature this monolithic test checks. requestedIP := "" staticIPName := "" if framework.ProviderIs("gce", "gke") {