Added metrics/debug gathering methods to utils and used them in density #7572

This commit is contained in:
Robert Rati 2015-04-30 11:32:46 -04:00
parent 10339d72b6
commit a89121cb70
4 changed files with 72 additions and 9 deletions

View File

@ -19,6 +19,7 @@ package e2e
import (
"fmt"
"math"
"os"
"strconv"
"time"
@ -36,6 +37,26 @@ import (
. "github.com/onsi/gomega"
)
func writePerfData(c *client.Client, dirName string, postfix string) {
defer GinkgoRecover()
hdnl, err := os.Create(fmt.Sprintf("%s/metrics_%s.txt", dirName, postfix))
expectNoError(err)
metrics, err := GetMetrics(c)
expectNoError(err)
_, err = hdnl.WriteString(metrics)
expectNoError(err)
expectNoError(hdnl.Close())
debug, err := GetDebugInfo(c)
for key, value := range debug {
hdnl, err = os.Create(fmt.Sprintf("%s/%s_%s.txt", dirName, key, postfix))
expectNoError(err)
_, err = hdnl.WriteString(value)
expectNoError(err)
expectNoError(hdnl.Close())
}
}
// This test suite can take a long time to run, so by default it is added to
// the ginkgo.skip list (see driver.go).
// To run this suite you must explicitly ask for it by setting the
@ -45,6 +66,7 @@ var _ = Describe("Density", func() {
var minionCount int
var RCName string
var ns string
var uuid string
BeforeEach(func() {
var err error
@ -57,6 +79,9 @@ var _ = Describe("Density", func() {
nsForTesting, err := createTestingNS("density", c)
ns = nsForTesting.Name
expectNoError(err)
uuid = string(util.NewUUID())
expectNoError(os.Mkdir(uuid, 0777))
writePerfData(c, uuid, "before")
})
AfterEach(func() {
@ -82,6 +107,7 @@ var _ = Describe("Density", func() {
highLatencyRequests, err := HighLatencyRequests(c, 10*time.Second, util.NewStringSet("events"))
expectNoError(err)
Expect(highLatencyRequests).NotTo(BeNumerically(">", 0))
writePerfData(c, uuid, "after")
})
// Tests with "Skipped" substring in their name will be skipped when running
@ -112,8 +138,10 @@ var _ = Describe("Density", func() {
itArg := testArg
It(name, func() {
totalPods := itArg.podsPerMinion * minionCount
nameStr := strconv.Itoa(totalPods) + "-" + string(util.NewUUID())
RCName = "my-hostname-density" + nameStr
RCName = "density" + strconv.Itoa(totalPods) + "-" + uuid
fileHndl, err := os.Create(fmt.Sprintf("%s/pod_states.txt", uuid))
expectNoError(err)
defer fileHndl.Close()
// Create a listener for events.
events := make([](*api.Event), 0)
@ -139,9 +167,10 @@ var _ = Describe("Density", func() {
// Start the replication controller.
startTime := time.Now()
expectNoError(RunRC(c, RCName, ns, "gcr.io/google_containers/pause:go", totalPods))
expectNoError(RunRC(c, RCName, ns, "gcr.io/google_containers/pause:go", totalPods, fileHndl))
e2eStartupTime := time.Now().Sub(startTime)
Logf("E2E startup time for %d pods: %v", totalPods, e2eStartupTime)
fmt.Fprintf(fileHndl, "E2E startup time for %d pods: %v\n", totalPods, e2eStartupTime)
By("Waiting for all events to be recorded")
last := -1

View File

@ -120,7 +120,7 @@ func playWithRC(c *client.Client, wg *sync.WaitGroup, ns, name string, size int)
// Once every 1-2 minutes perform resize of RC.
for start := time.Now(); time.Since(start) < simulationTime; time.Sleep(time.Duration(60+rand.Intn(60)) * time.Second) {
if !rcExist {
expectNoError(RunRC(c, name, ns, image, size), fmt.Sprintf("creating rc %s in namespace %s", name, ns))
expectNoError(RunRC(c, name, ns, image, size, nil), fmt.Sprintf("creating rc %s in namespace %s", name, ns))
rcExist = true
}
// Resize RC to a random size between 0.5x and 1.5x of the original size.

View File

@ -107,7 +107,7 @@ var _ = Describe("Scale", func() {
for i := 0; i < itArg.rcsPerThread; i++ {
name := "my-short-lived-pod" + string(util.NewUUID())
n := itArg.podsPerMinion * minionCount
expectNoError(RunRC(c, name, ns, "gcr.io/google_containers/pause:go", n))
expectNoError(RunRC(c, name, ns, "gcr.io/google_containers/pause:go", n, nil))
podsLaunched += n
Logf("Launched %v pods so far...", podsLaunched)
err := DeleteRC(c, ns, name)

View File

@ -474,7 +474,7 @@ func Diff(oldPods *api.PodList, curPods *api.PodList) PodDiff {
// It will waits for all pods it spawns to become "Running".
// It's the caller's responsibility to clean up externally (i.e. use the
// namespace lifecycle for handling cleanup).
func RunRC(c *client.Client, name string, ns, image string, replicas int) error {
func RunRC(c *client.Client, name string, ns, image string, replicas int, podStatusFile *os.File) error {
var last int
maxContainerFailures := int(math.Max(1.0, float64(replicas)*.01))
@ -522,7 +522,11 @@ func RunRC(c *client.Client, name string, ns, image string, replicas int) error
current = len(pods.Items)
failCount := 5
for same < failCount && current < replicas {
Logf("Controller %s: Found %d pods out of %d", name, current, replicas)
msg := fmt.Sprintf("Controller %s: Found %d pods out of %d", name, current, replicas)
Logf(msg)
if podStatusFile != nil {
fmt.Fprintf(podStatusFile, "%s: %s\n", time.Now().String(), msg)
}
if last < current {
same = 0
} else if last == current {
@ -546,7 +550,11 @@ func RunRC(c *client.Client, name string, ns, image string, replicas int) error
if current != replicas {
return fmt.Errorf("Controller %s: Only found %d replicas out of %d", name, current, replicas)
}
Logf("Controller %s in ns %s: Found %d pods out of %d", name, ns, current, replicas)
msg := fmt.Sprintf("Controller %s in ns %s: Found %d pods out of %d", name, ns, current, replicas)
Logf(msg)
if podStatusFile != nil {
fmt.Fprintf(podStatusFile, "%s: %s\n", time.Now().String(), msg)
}
By(fmt.Sprintf("Waiting for all %d replicas to be running with a max container failures of %d", replicas, maxContainerFailures))
same = 0
@ -587,7 +595,11 @@ func RunRC(c *client.Client, name string, ns, image string, replicas int) error
unknown++
}
}
Logf("Pod States: %d running, %d pending, %d waiting, %d inactive, %d unknown ", current, pending, waiting, inactive, unknown)
msg := fmt.Sprintf("Pod States: %d running, %d pending, %d waiting, %d inactive, %d unknown ", current, pending, waiting, inactive, unknown)
Logf(msg)
if podStatusFile != nil {
fmt.Fprintf(podStatusFile, "%s: %s\n", time.Now().String(), msg)
}
if len(currentPods.Items) != len(pods.Items) {
@ -936,3 +948,25 @@ func HighLatencyRequests(c *client.Client, threshold time.Duration, ignoredResou
return len(badMetrics), nil
}
// Retrieve metrics information
func GetMetrics(c *client.Client) (string, error) {
body, err := c.Get().AbsPath("/metrics").DoRaw()
if err != nil {
return "", err
}
return string(body), nil
}
// Retrieve debug information
func GetDebugInfo(c *client.Client) (map[string]string, error) {
data := make(map[string]string)
for _, key := range []string{"block", "goroutine", "heap", "threadcreate"} {
body, err := c.Get().AbsPath(fmt.Sprintf("/debug/pprof/%s", key)).DoRaw()
if err != nil {
Logf("Warning: Error trying to fetch %s debug data: %v", key, err)
}
data[key] = string(body)
}
return data, nil
}