mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-01 15:58:37 +00:00
Added metrics/debug gathering methods to utils and used them in density #7572
This commit is contained in:
parent
10339d72b6
commit
a89121cb70
@ -19,6 +19,7 @@ package e2e
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
@ -36,6 +37,26 @@ import (
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
func writePerfData(c *client.Client, dirName string, postfix string) {
|
||||
defer GinkgoRecover()
|
||||
|
||||
hdnl, err := os.Create(fmt.Sprintf("%s/metrics_%s.txt", dirName, postfix))
|
||||
expectNoError(err)
|
||||
metrics, err := GetMetrics(c)
|
||||
expectNoError(err)
|
||||
_, err = hdnl.WriteString(metrics)
|
||||
expectNoError(err)
|
||||
expectNoError(hdnl.Close())
|
||||
debug, err := GetDebugInfo(c)
|
||||
for key, value := range debug {
|
||||
hdnl, err = os.Create(fmt.Sprintf("%s/%s_%s.txt", dirName, key, postfix))
|
||||
expectNoError(err)
|
||||
_, err = hdnl.WriteString(value)
|
||||
expectNoError(err)
|
||||
expectNoError(hdnl.Close())
|
||||
}
|
||||
}
|
||||
|
||||
// This test suite can take a long time to run, so by default it is added to
|
||||
// the ginkgo.skip list (see driver.go).
|
||||
// To run this suite you must explicitly ask for it by setting the
|
||||
@ -45,6 +66,7 @@ var _ = Describe("Density", func() {
|
||||
var minionCount int
|
||||
var RCName string
|
||||
var ns string
|
||||
var uuid string
|
||||
|
||||
BeforeEach(func() {
|
||||
var err error
|
||||
@ -57,6 +79,9 @@ var _ = Describe("Density", func() {
|
||||
nsForTesting, err := createTestingNS("density", c)
|
||||
ns = nsForTesting.Name
|
||||
expectNoError(err)
|
||||
uuid = string(util.NewUUID())
|
||||
expectNoError(os.Mkdir(uuid, 0777))
|
||||
writePerfData(c, uuid, "before")
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
@ -82,6 +107,7 @@ var _ = Describe("Density", func() {
|
||||
highLatencyRequests, err := HighLatencyRequests(c, 10*time.Second, util.NewStringSet("events"))
|
||||
expectNoError(err)
|
||||
Expect(highLatencyRequests).NotTo(BeNumerically(">", 0))
|
||||
writePerfData(c, uuid, "after")
|
||||
})
|
||||
|
||||
// Tests with "Skipped" substring in their name will be skipped when running
|
||||
@ -112,8 +138,10 @@ var _ = Describe("Density", func() {
|
||||
itArg := testArg
|
||||
It(name, func() {
|
||||
totalPods := itArg.podsPerMinion * minionCount
|
||||
nameStr := strconv.Itoa(totalPods) + "-" + string(util.NewUUID())
|
||||
RCName = "my-hostname-density" + nameStr
|
||||
RCName = "density" + strconv.Itoa(totalPods) + "-" + uuid
|
||||
fileHndl, err := os.Create(fmt.Sprintf("%s/pod_states.txt", uuid))
|
||||
expectNoError(err)
|
||||
defer fileHndl.Close()
|
||||
|
||||
// Create a listener for events.
|
||||
events := make([](*api.Event), 0)
|
||||
@ -139,9 +167,10 @@ var _ = Describe("Density", func() {
|
||||
|
||||
// Start the replication controller.
|
||||
startTime := time.Now()
|
||||
expectNoError(RunRC(c, RCName, ns, "gcr.io/google_containers/pause:go", totalPods))
|
||||
expectNoError(RunRC(c, RCName, ns, "gcr.io/google_containers/pause:go", totalPods, fileHndl))
|
||||
e2eStartupTime := time.Now().Sub(startTime)
|
||||
Logf("E2E startup time for %d pods: %v", totalPods, e2eStartupTime)
|
||||
fmt.Fprintf(fileHndl, "E2E startup time for %d pods: %v\n", totalPods, e2eStartupTime)
|
||||
|
||||
By("Waiting for all events to be recorded")
|
||||
last := -1
|
||||
|
@ -120,7 +120,7 @@ func playWithRC(c *client.Client, wg *sync.WaitGroup, ns, name string, size int)
|
||||
// Once every 1-2 minutes perform resize of RC.
|
||||
for start := time.Now(); time.Since(start) < simulationTime; time.Sleep(time.Duration(60+rand.Intn(60)) * time.Second) {
|
||||
if !rcExist {
|
||||
expectNoError(RunRC(c, name, ns, image, size), fmt.Sprintf("creating rc %s in namespace %s", name, ns))
|
||||
expectNoError(RunRC(c, name, ns, image, size, nil), fmt.Sprintf("creating rc %s in namespace %s", name, ns))
|
||||
rcExist = true
|
||||
}
|
||||
// Resize RC to a random size between 0.5x and 1.5x of the original size.
|
||||
|
@ -107,7 +107,7 @@ var _ = Describe("Scale", func() {
|
||||
for i := 0; i < itArg.rcsPerThread; i++ {
|
||||
name := "my-short-lived-pod" + string(util.NewUUID())
|
||||
n := itArg.podsPerMinion * minionCount
|
||||
expectNoError(RunRC(c, name, ns, "gcr.io/google_containers/pause:go", n))
|
||||
expectNoError(RunRC(c, name, ns, "gcr.io/google_containers/pause:go", n, nil))
|
||||
podsLaunched += n
|
||||
Logf("Launched %v pods so far...", podsLaunched)
|
||||
err := DeleteRC(c, ns, name)
|
||||
|
@ -474,7 +474,7 @@ func Diff(oldPods *api.PodList, curPods *api.PodList) PodDiff {
|
||||
// It will waits for all pods it spawns to become "Running".
|
||||
// It's the caller's responsibility to clean up externally (i.e. use the
|
||||
// namespace lifecycle for handling cleanup).
|
||||
func RunRC(c *client.Client, name string, ns, image string, replicas int) error {
|
||||
func RunRC(c *client.Client, name string, ns, image string, replicas int, podStatusFile *os.File) error {
|
||||
var last int
|
||||
|
||||
maxContainerFailures := int(math.Max(1.0, float64(replicas)*.01))
|
||||
@ -522,7 +522,11 @@ func RunRC(c *client.Client, name string, ns, image string, replicas int) error
|
||||
current = len(pods.Items)
|
||||
failCount := 5
|
||||
for same < failCount && current < replicas {
|
||||
Logf("Controller %s: Found %d pods out of %d", name, current, replicas)
|
||||
msg := fmt.Sprintf("Controller %s: Found %d pods out of %d", name, current, replicas)
|
||||
Logf(msg)
|
||||
if podStatusFile != nil {
|
||||
fmt.Fprintf(podStatusFile, "%s: %s\n", time.Now().String(), msg)
|
||||
}
|
||||
if last < current {
|
||||
same = 0
|
||||
} else if last == current {
|
||||
@ -546,7 +550,11 @@ func RunRC(c *client.Client, name string, ns, image string, replicas int) error
|
||||
if current != replicas {
|
||||
return fmt.Errorf("Controller %s: Only found %d replicas out of %d", name, current, replicas)
|
||||
}
|
||||
Logf("Controller %s in ns %s: Found %d pods out of %d", name, ns, current, replicas)
|
||||
msg := fmt.Sprintf("Controller %s in ns %s: Found %d pods out of %d", name, ns, current, replicas)
|
||||
Logf(msg)
|
||||
if podStatusFile != nil {
|
||||
fmt.Fprintf(podStatusFile, "%s: %s\n", time.Now().String(), msg)
|
||||
}
|
||||
|
||||
By(fmt.Sprintf("Waiting for all %d replicas to be running with a max container failures of %d", replicas, maxContainerFailures))
|
||||
same = 0
|
||||
@ -587,7 +595,11 @@ func RunRC(c *client.Client, name string, ns, image string, replicas int) error
|
||||
unknown++
|
||||
}
|
||||
}
|
||||
Logf("Pod States: %d running, %d pending, %d waiting, %d inactive, %d unknown ", current, pending, waiting, inactive, unknown)
|
||||
msg := fmt.Sprintf("Pod States: %d running, %d pending, %d waiting, %d inactive, %d unknown ", current, pending, waiting, inactive, unknown)
|
||||
Logf(msg)
|
||||
if podStatusFile != nil {
|
||||
fmt.Fprintf(podStatusFile, "%s: %s\n", time.Now().String(), msg)
|
||||
}
|
||||
|
||||
if len(currentPods.Items) != len(pods.Items) {
|
||||
|
||||
@ -936,3 +948,25 @@ func HighLatencyRequests(c *client.Client, threshold time.Duration, ignoredResou
|
||||
|
||||
return len(badMetrics), nil
|
||||
}
|
||||
|
||||
// Retrieve metrics information
|
||||
func GetMetrics(c *client.Client) (string, error) {
|
||||
body, err := c.Get().AbsPath("/metrics").DoRaw()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(body), nil
|
||||
}
|
||||
|
||||
// Retrieve debug information
|
||||
func GetDebugInfo(c *client.Client) (map[string]string, error) {
|
||||
data := make(map[string]string)
|
||||
for _, key := range []string{"block", "goroutine", "heap", "threadcreate"} {
|
||||
body, err := c.Get().AbsPath(fmt.Sprintf("/debug/pprof/%s", key)).DoRaw()
|
||||
if err != nil {
|
||||
Logf("Warning: Error trying to fetch %s debug data: %v", key, err)
|
||||
}
|
||||
data[key] = string(body)
|
||||
}
|
||||
return data, nil
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user