Merge pull request #10329 from bprashanth/density_logging

Add clarity to density test
This commit is contained in:
Yu-Ju Hong 2015-07-06 10:55:19 -07:00
commit 5e21a040ca
2 changed files with 57 additions and 1 deletions

View File

@ -20,6 +20,7 @@ import (
"fmt"
"math"
"os"
"os/exec"
"sort"
"strconv"
"sync"
@ -66,6 +67,20 @@ func printLatencies(latencies []podLatencyData, header string) {
Logf("perc50: %v, perc90: %v, perc99: %v", perc50, perc90, perc99)
}
// List nodes via gcloud. We don't rely on the apiserver because we really want the node ips
// and sometimes the node controller is slow to populate them.
func gcloudListNodes() {
Logf("Listing nodes via gcloud:")
output, err := exec.Command("gcloud", "compute", "instances", "list",
"--project="+testContext.CloudConfig.ProjectID, "--zone="+testContext.CloudConfig.Zone).CombinedOutput()
if err != nil {
Logf("Failed to list nodes: %v, %v", err)
return
}
Logf(string(output))
return
}
// This test suite can take a long time to run, so by default it is added to
// the ginkgo.skip list (see driver.go).
// To run this suite you must explicitly ask for it by setting the
@ -101,6 +116,7 @@ var _ = Describe("Density", func() {
expectNoError(resetMetrics(c))
expectNoError(os.Mkdir(fmt.Sprintf(testContext.OutputDir+"/%s", uuid), 0777))
expectNoError(writePerfData(c, fmt.Sprintf(testContext.OutputDir+"/%s", uuid), "before"))
gcloudListNodes()
})
AfterEach(func() {

View File

@ -1046,7 +1046,7 @@ func RunRC(config RCConfig) error {
oldPods := make([]*api.Pod, 0)
oldRunning := 0
lastChange := time.Now()
for oldRunning != config.Replicas && time.Since(lastChange) < timeout {
for oldRunning != config.Replicas {
time.Sleep(interval)
running := 0
@ -1104,6 +1104,11 @@ func RunRC(config RCConfig) error {
}
oldPods = pods
oldRunning = running
if time.Since(lastChange) > timeout {
dumpPodDebugInfo(config.Client, pods)
break
}
}
if oldRunning != config.Replicas {
@ -1112,6 +1117,41 @@ func RunRC(config RCConfig) error {
return nil
}
func dumpPodDebugInfo(c *client.Client, pods []*api.Pod) {
badNodes := util.NewStringSet()
for _, p := range pods {
if p.Status.Phase != api.PodRunning {
if p.Spec.NodeName != "" {
Logf("Pod %v assigned to host %v (IP: %v) in %v", p.Name, p.Spec.NodeName, p.Status.HostIP, p.Status.Phase)
badNodes.Insert(p.Spec.NodeName)
} else {
Logf("Pod %v still unassigned", p.Name)
}
}
}
dumpNodeDebugInfo(c, badNodes.List())
}
func dumpNodeDebugInfo(c *client.Client, nodeNames []string) {
for _, n := range nodeNames {
Logf("\nLogging pods the kubelet thinks is on node %v", n)
podList, err := GetKubeletPods(c, n)
if err != nil {
Logf("Unable to retrieve kubelet pods for node %v", n)
continue
}
for _, p := range podList.Items {
Logf("%v started at %v (%d container statuses recorded)", p.Name, p.Status.StartTime, len(p.Status.ContainerStatuses))
for _, c := range p.Status.ContainerStatuses {
Logf("\tContainer %v ready: %v, restart count %v",
c.Name, c.Ready, c.RestartCount)
}
}
HighLatencyKubeletOperations(c, 10*time.Second, n)
// TODO: Log node resource info
}
}
func ScaleRC(c *client.Client, ns, name string, size uint) error {
By(fmt.Sprintf("%v Scaling replication controller %s in namespace %s to %d", time.Now(), name, ns, size))
scaler, err := kubectl.ScalerFor("ReplicationController", kubectl.NewScalerClient(c))