mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-03 09:22:44 +00:00
Added metrics/debug gathering methods to utils and used them in density #7572
This commit is contained in:
parent
10339d72b6
commit
a89121cb70
@ -19,6 +19,7 @@ package e2e
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
|
"os"
|
||||||
"strconv"
|
"strconv"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -36,6 +37,26 @@ import (
|
|||||||
. "github.com/onsi/gomega"
|
. "github.com/onsi/gomega"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func writePerfData(c *client.Client, dirName string, postfix string) {
|
||||||
|
defer GinkgoRecover()
|
||||||
|
|
||||||
|
hdnl, err := os.Create(fmt.Sprintf("%s/metrics_%s.txt", dirName, postfix))
|
||||||
|
expectNoError(err)
|
||||||
|
metrics, err := GetMetrics(c)
|
||||||
|
expectNoError(err)
|
||||||
|
_, err = hdnl.WriteString(metrics)
|
||||||
|
expectNoError(err)
|
||||||
|
expectNoError(hdnl.Close())
|
||||||
|
debug, err := GetDebugInfo(c)
|
||||||
|
for key, value := range debug {
|
||||||
|
hdnl, err = os.Create(fmt.Sprintf("%s/%s_%s.txt", dirName, key, postfix))
|
||||||
|
expectNoError(err)
|
||||||
|
_, err = hdnl.WriteString(value)
|
||||||
|
expectNoError(err)
|
||||||
|
expectNoError(hdnl.Close())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// This test suite can take a long time to run, so by default it is added to
|
// This test suite can take a long time to run, so by default it is added to
|
||||||
// the ginkgo.skip list (see driver.go).
|
// the ginkgo.skip list (see driver.go).
|
||||||
// To run this suite you must explicitly ask for it by setting the
|
// To run this suite you must explicitly ask for it by setting the
|
||||||
@ -45,6 +66,7 @@ var _ = Describe("Density", func() {
|
|||||||
var minionCount int
|
var minionCount int
|
||||||
var RCName string
|
var RCName string
|
||||||
var ns string
|
var ns string
|
||||||
|
var uuid string
|
||||||
|
|
||||||
BeforeEach(func() {
|
BeforeEach(func() {
|
||||||
var err error
|
var err error
|
||||||
@ -57,6 +79,9 @@ var _ = Describe("Density", func() {
|
|||||||
nsForTesting, err := createTestingNS("density", c)
|
nsForTesting, err := createTestingNS("density", c)
|
||||||
ns = nsForTesting.Name
|
ns = nsForTesting.Name
|
||||||
expectNoError(err)
|
expectNoError(err)
|
||||||
|
uuid = string(util.NewUUID())
|
||||||
|
expectNoError(os.Mkdir(uuid, 0777))
|
||||||
|
writePerfData(c, uuid, "before")
|
||||||
})
|
})
|
||||||
|
|
||||||
AfterEach(func() {
|
AfterEach(func() {
|
||||||
@ -82,6 +107,7 @@ var _ = Describe("Density", func() {
|
|||||||
highLatencyRequests, err := HighLatencyRequests(c, 10*time.Second, util.NewStringSet("events"))
|
highLatencyRequests, err := HighLatencyRequests(c, 10*time.Second, util.NewStringSet("events"))
|
||||||
expectNoError(err)
|
expectNoError(err)
|
||||||
Expect(highLatencyRequests).NotTo(BeNumerically(">", 0))
|
Expect(highLatencyRequests).NotTo(BeNumerically(">", 0))
|
||||||
|
writePerfData(c, uuid, "after")
|
||||||
})
|
})
|
||||||
|
|
||||||
// Tests with "Skipped" substring in their name will be skipped when running
|
// Tests with "Skipped" substring in their name will be skipped when running
|
||||||
@ -112,8 +138,10 @@ var _ = Describe("Density", func() {
|
|||||||
itArg := testArg
|
itArg := testArg
|
||||||
It(name, func() {
|
It(name, func() {
|
||||||
totalPods := itArg.podsPerMinion * minionCount
|
totalPods := itArg.podsPerMinion * minionCount
|
||||||
nameStr := strconv.Itoa(totalPods) + "-" + string(util.NewUUID())
|
RCName = "density" + strconv.Itoa(totalPods) + "-" + uuid
|
||||||
RCName = "my-hostname-density" + nameStr
|
fileHndl, err := os.Create(fmt.Sprintf("%s/pod_states.txt", uuid))
|
||||||
|
expectNoError(err)
|
||||||
|
defer fileHndl.Close()
|
||||||
|
|
||||||
// Create a listener for events.
|
// Create a listener for events.
|
||||||
events := make([](*api.Event), 0)
|
events := make([](*api.Event), 0)
|
||||||
@ -139,9 +167,10 @@ var _ = Describe("Density", func() {
|
|||||||
|
|
||||||
// Start the replication controller.
|
// Start the replication controller.
|
||||||
startTime := time.Now()
|
startTime := time.Now()
|
||||||
expectNoError(RunRC(c, RCName, ns, "gcr.io/google_containers/pause:go", totalPods))
|
expectNoError(RunRC(c, RCName, ns, "gcr.io/google_containers/pause:go", totalPods, fileHndl))
|
||||||
e2eStartupTime := time.Now().Sub(startTime)
|
e2eStartupTime := time.Now().Sub(startTime)
|
||||||
Logf("E2E startup time for %d pods: %v", totalPods, e2eStartupTime)
|
Logf("E2E startup time for %d pods: %v", totalPods, e2eStartupTime)
|
||||||
|
fmt.Fprintf(fileHndl, "E2E startup time for %d pods: %v\n", totalPods, e2eStartupTime)
|
||||||
|
|
||||||
By("Waiting for all events to be recorded")
|
By("Waiting for all events to be recorded")
|
||||||
last := -1
|
last := -1
|
||||||
|
@ -120,7 +120,7 @@ func playWithRC(c *client.Client, wg *sync.WaitGroup, ns, name string, size int)
|
|||||||
// Once every 1-2 minutes perform resize of RC.
|
// Once every 1-2 minutes perform resize of RC.
|
||||||
for start := time.Now(); time.Since(start) < simulationTime; time.Sleep(time.Duration(60+rand.Intn(60)) * time.Second) {
|
for start := time.Now(); time.Since(start) < simulationTime; time.Sleep(time.Duration(60+rand.Intn(60)) * time.Second) {
|
||||||
if !rcExist {
|
if !rcExist {
|
||||||
expectNoError(RunRC(c, name, ns, image, size), fmt.Sprintf("creating rc %s in namespace %s", name, ns))
|
expectNoError(RunRC(c, name, ns, image, size, nil), fmt.Sprintf("creating rc %s in namespace %s", name, ns))
|
||||||
rcExist = true
|
rcExist = true
|
||||||
}
|
}
|
||||||
// Resize RC to a random size between 0.5x and 1.5x of the original size.
|
// Resize RC to a random size between 0.5x and 1.5x of the original size.
|
||||||
|
@ -107,7 +107,7 @@ var _ = Describe("Scale", func() {
|
|||||||
for i := 0; i < itArg.rcsPerThread; i++ {
|
for i := 0; i < itArg.rcsPerThread; i++ {
|
||||||
name := "my-short-lived-pod" + string(util.NewUUID())
|
name := "my-short-lived-pod" + string(util.NewUUID())
|
||||||
n := itArg.podsPerMinion * minionCount
|
n := itArg.podsPerMinion * minionCount
|
||||||
expectNoError(RunRC(c, name, ns, "gcr.io/google_containers/pause:go", n))
|
expectNoError(RunRC(c, name, ns, "gcr.io/google_containers/pause:go", n, nil))
|
||||||
podsLaunched += n
|
podsLaunched += n
|
||||||
Logf("Launched %v pods so far...", podsLaunched)
|
Logf("Launched %v pods so far...", podsLaunched)
|
||||||
err := DeleteRC(c, ns, name)
|
err := DeleteRC(c, ns, name)
|
||||||
|
@ -474,7 +474,7 @@ func Diff(oldPods *api.PodList, curPods *api.PodList) PodDiff {
|
|||||||
// It will waits for all pods it spawns to become "Running".
|
// It will waits for all pods it spawns to become "Running".
|
||||||
// It's the caller's responsibility to clean up externally (i.e. use the
|
// It's the caller's responsibility to clean up externally (i.e. use the
|
||||||
// namespace lifecycle for handling cleanup).
|
// namespace lifecycle for handling cleanup).
|
||||||
func RunRC(c *client.Client, name string, ns, image string, replicas int) error {
|
func RunRC(c *client.Client, name string, ns, image string, replicas int, podStatusFile *os.File) error {
|
||||||
var last int
|
var last int
|
||||||
|
|
||||||
maxContainerFailures := int(math.Max(1.0, float64(replicas)*.01))
|
maxContainerFailures := int(math.Max(1.0, float64(replicas)*.01))
|
||||||
@ -522,7 +522,11 @@ func RunRC(c *client.Client, name string, ns, image string, replicas int) error
|
|||||||
current = len(pods.Items)
|
current = len(pods.Items)
|
||||||
failCount := 5
|
failCount := 5
|
||||||
for same < failCount && current < replicas {
|
for same < failCount && current < replicas {
|
||||||
Logf("Controller %s: Found %d pods out of %d", name, current, replicas)
|
msg := fmt.Sprintf("Controller %s: Found %d pods out of %d", name, current, replicas)
|
||||||
|
Logf(msg)
|
||||||
|
if podStatusFile != nil {
|
||||||
|
fmt.Fprintf(podStatusFile, "%s: %s\n", time.Now().String(), msg)
|
||||||
|
}
|
||||||
if last < current {
|
if last < current {
|
||||||
same = 0
|
same = 0
|
||||||
} else if last == current {
|
} else if last == current {
|
||||||
@ -546,7 +550,11 @@ func RunRC(c *client.Client, name string, ns, image string, replicas int) error
|
|||||||
if current != replicas {
|
if current != replicas {
|
||||||
return fmt.Errorf("Controller %s: Only found %d replicas out of %d", name, current, replicas)
|
return fmt.Errorf("Controller %s: Only found %d replicas out of %d", name, current, replicas)
|
||||||
}
|
}
|
||||||
Logf("Controller %s in ns %s: Found %d pods out of %d", name, ns, current, replicas)
|
msg := fmt.Sprintf("Controller %s in ns %s: Found %d pods out of %d", name, ns, current, replicas)
|
||||||
|
Logf(msg)
|
||||||
|
if podStatusFile != nil {
|
||||||
|
fmt.Fprintf(podStatusFile, "%s: %s\n", time.Now().String(), msg)
|
||||||
|
}
|
||||||
|
|
||||||
By(fmt.Sprintf("Waiting for all %d replicas to be running with a max container failures of %d", replicas, maxContainerFailures))
|
By(fmt.Sprintf("Waiting for all %d replicas to be running with a max container failures of %d", replicas, maxContainerFailures))
|
||||||
same = 0
|
same = 0
|
||||||
@ -587,7 +595,11 @@ func RunRC(c *client.Client, name string, ns, image string, replicas int) error
|
|||||||
unknown++
|
unknown++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Logf("Pod States: %d running, %d pending, %d waiting, %d inactive, %d unknown ", current, pending, waiting, inactive, unknown)
|
msg := fmt.Sprintf("Pod States: %d running, %d pending, %d waiting, %d inactive, %d unknown ", current, pending, waiting, inactive, unknown)
|
||||||
|
Logf(msg)
|
||||||
|
if podStatusFile != nil {
|
||||||
|
fmt.Fprintf(podStatusFile, "%s: %s\n", time.Now().String(), msg)
|
||||||
|
}
|
||||||
|
|
||||||
if len(currentPods.Items) != len(pods.Items) {
|
if len(currentPods.Items) != len(pods.Items) {
|
||||||
|
|
||||||
@ -936,3 +948,25 @@ func HighLatencyRequests(c *client.Client, threshold time.Duration, ignoredResou
|
|||||||
|
|
||||||
return len(badMetrics), nil
|
return len(badMetrics), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Retrieve metrics information
|
||||||
|
func GetMetrics(c *client.Client) (string, error) {
|
||||||
|
body, err := c.Get().AbsPath("/metrics").DoRaw()
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return string(body), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Retrieve debug information
|
||||||
|
func GetDebugInfo(c *client.Client) (map[string]string, error) {
|
||||||
|
data := make(map[string]string)
|
||||||
|
for _, key := range []string{"block", "goroutine", "heap", "threadcreate"} {
|
||||||
|
body, err := c.Get().AbsPath(fmt.Sprintf("/debug/pprof/%s", key)).DoRaw()
|
||||||
|
if err != nil {
|
||||||
|
Logf("Warning: Error trying to fetch %s debug data: %v", key, err)
|
||||||
|
}
|
||||||
|
data[key] = string(body)
|
||||||
|
}
|
||||||
|
return data, nil
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user