Merge pull request #24434 from gmarek/services

Automatic merge from submit-queue

Create multiple RCs in NC - prerequisite for adding services

WIP because I need to make the logs readable again.
This commit is contained in:
k8s-merge-robot 2016-04-23 15:19:36 -07:00
commit 0057931f4e
2 changed files with 147 additions and 84 deletions

View File

@ -84,6 +84,26 @@ func density30AddonResourceVerifier() map[string]framework.ResourceConstraint {
return constraints
}
func logPodStartupStatus(c *client.Client, expectedPods int, ns string, observedLabels map[string]string, period time.Duration, stopCh chan struct{}) {
label := labels.SelectorFromSet(labels.Set(observedLabels))
podStore := framework.NewPodStore(c, ns, label, fields.Everything())
defer podStore.Stop()
ticker := time.NewTicker(period)
for {
select {
case <-ticker.C:
pods := podStore.List()
startupStatus := framework.ComputeRCStartupStatus(pods, expectedPods)
startupStatus.Print("Density")
case <-stopCh:
pods := podStore.List()
startupStatus := framework.ComputeRCStartupStatus(pods, expectedPods)
startupStatus.Print("Density")
return
}
}
}
// This test suite can take a long time to run, and can affect or be affected by other tests.
// So by default it is added to the ginkgo.skip list (see driver.go).
// To run this suite you must explicitly ask for it by setting the
@ -185,7 +205,7 @@ var _ = framework.KubeDescribe("Density", func() {
{podsPerNode: 30, runLatencyTest: true, interval: 10 * time.Second},
{podsPerNode: 50, runLatencyTest: false, interval: 10 * time.Second},
{podsPerNode: 95, runLatencyTest: true, interval: 10 * time.Second},
{podsPerNode: 100, runLatencyTest: false, interval: 1 * time.Second},
{podsPerNode: 100, runLatencyTest: false, interval: 10 * time.Second},
}
for _, testArg := range densityTests {
@ -201,22 +221,29 @@ var _ = framework.KubeDescribe("Density", func() {
}
itArg := testArg
It(name, func() {
podsPerNode := itArg.podsPerNode
totalPods = podsPerNode * nodeCount
RCName = "density" + strconv.Itoa(totalPods) + "-" + uuid
fileHndl, err := os.Create(fmt.Sprintf(framework.TestContext.OutputDir+"/%s/pod_states.csv", uuid))
framework.ExpectNoError(err)
defer fileHndl.Close()
config := framework.RCConfig{Client: c,
Image: "gcr.io/google_containers/pause:2.0",
Name: RCName,
Namespace: ns,
PollInterval: itArg.interval,
PodStatusFile: fileHndl,
Replicas: totalPods,
CpuRequest: nodeCpuCapacity / 100,
MemRequest: nodeMemCapacity / 100,
MaxContainerFailures: &MaxContainerFailures,
podsPerNode := itArg.podsPerNode
totalPods = podsPerNode * nodeCount
// TODO: loop to podsPerNode instead of 1 when we're ready.
numberOrRCs := 1
RCConfigs := make([]framework.RCConfig, numberOrRCs)
for i := 0; i < numberOrRCs; i++ {
RCName = "density" + strconv.Itoa(totalPods) + "-" + strconv.Itoa(i) + "-" + uuid
RCConfigs[i] = framework.RCConfig{Client: c,
Image: "gcr.io/google_containers/pause:2.0",
Name: RCName,
Namespace: ns,
Labels: map[string]string{"type": "densityPod"},
PollInterval: itArg.interval,
PodStatusFile: fileHndl,
Replicas: (totalPods + numberOrRCs - 1) / numberOrRCs,
CpuRequest: nodeCpuCapacity / 100,
MemRequest: nodeMemCapacity / 100,
MaxContainerFailures: &MaxContainerFailures,
Silent: true,
}
}
// Create a listener for events.
@ -249,7 +276,7 @@ var _ = framework.KubeDescribe("Density", func() {
// uLock is a lock protects the updateCount
var uLock sync.Mutex
updateCount := 0
label := labels.SelectorFromSet(labels.Set(map[string]string{"name": RCName}))
label := labels.SelectorFromSet(labels.Set(map[string]string{"type": "densityPod"}))
_, updateController := controllerframework.NewInformer(
&cache.ListWatch{
ListFunc: func(options api.ListOptions) (runtime.Object, error) {
@ -273,10 +300,22 @@ var _ = framework.KubeDescribe("Density", func() {
)
go updateController.Run(stop)
// Start the replication controller.
// Start all replication controllers.
startTime := time.Now()
framework.ExpectNoError(framework.RunRC(config))
wg := sync.WaitGroup{}
wg.Add(len(RCConfigs))
for i := range RCConfigs {
rcConfig := RCConfigs[i]
go func() {
framework.ExpectNoError(framework.RunRC(rcConfig))
wg.Done()
}()
}
logStopCh := make(chan struct{})
go logPodStartupStatus(c, totalPods, ns, map[string]string{"type": "densityPod"}, itArg.interval, logStopCh)
wg.Wait()
e2eStartupTime = time.Now().Sub(startTime)
close(logStopCh)
framework.Logf("E2E startup time for %d pods: %v", totalPods, e2eStartupTime)
framework.Logf("Throughput (pods/s) during cluster saturation phase: %v", float32(totalPods)/float32(e2eStartupTime/time.Second))
@ -506,11 +545,14 @@ var _ = framework.KubeDescribe("Density", func() {
By("Deleting ReplicationController")
// We explicitly delete all pods to have API calls necessary for deletion accounted in metrics.
rc, err := c.ReplicationControllers(ns).Get(RCName)
if err == nil && rc.Spec.Replicas != 0 {
By("Cleaning up the replication controller")
err := framework.DeleteRC(c, ns, RCName)
framework.ExpectNoError(err)
for i := range RCConfigs {
rcName := RCConfigs[i].Name
rc, err := c.ReplicationControllers(ns).Get(rcName)
if err == nil && rc.Spec.Replicas != 0 {
By("Cleaning up the replication controller")
err := framework.DeleteRC(c, ns, rcName)
framework.ExpectNoError(err)
}
}
By("Removing additional replication controllers if any")

View File

@ -242,6 +242,9 @@ type RCConfig struct {
// Maximum allowable container failures. If exceeded, RunRC returns an error.
// Defaults to replicas*0.1 if unspecified.
MaxContainerFailures *int
// If set to false starting RC will print progress, otherwise only errors will be printed.
Silent bool
}
type DeploymentConfig struct {
@ -1934,6 +1937,70 @@ func (config *RCConfig) applyTo(template *api.PodTemplateSpec) {
}
}
type RCStartupStatus struct {
Expected int
Terminating int
Running int
RunningButNotReady int
Waiting int
Pending int
Unknown int
Inactive int
FailedContainers int
Created []*api.Pod
ContainerRestartNodes sets.String
}
func (s *RCStartupStatus) Print(name string) {
Logf("%v Pods: %d out of %d created, %d running, %d pending, %d waiting, %d inactive, %d terminating, %d unknown, %d runningButNotReady ",
name, len(s.Created), s.Expected, s.Running, s.Pending, s.Waiting, s.Inactive, s.Terminating, s.Unknown, s.RunningButNotReady)
}
func ComputeRCStartupStatus(pods []*api.Pod, expected int) RCStartupStatus {
startupStatus := RCStartupStatus{
Expected: expected,
Created: make([]*api.Pod, 0, expected),
ContainerRestartNodes: sets.NewString(),
}
for _, p := range pods {
if p.DeletionTimestamp != nil {
startupStatus.Terminating++
continue
}
startupStatus.Created = append(startupStatus.Created, p)
if p.Status.Phase == api.PodRunning {
ready := false
for _, c := range p.Status.Conditions {
if c.Type == api.PodReady && c.Status == api.ConditionTrue {
ready = true
break
}
}
if ready {
// Only count a pod is running when it is also ready.
startupStatus.Running++
} else {
startupStatus.RunningButNotReady++
}
for _, v := range FailedContainers(p) {
startupStatus.FailedContainers = startupStatus.FailedContainers + v.Restarts
startupStatus.ContainerRestartNodes.Insert(p.Spec.NodeName)
}
} else if p.Status.Phase == api.PodPending {
if p.Spec.NodeName == "" {
startupStatus.Waiting++
} else {
startupStatus.Pending++
}
} else if p.Status.Phase == api.PodSucceeded || p.Status.Phase == api.PodFailed {
startupStatus.Inactive++
} else if p.Status.Phase == api.PodUnknown {
startupStatus.Unknown++
}
}
return startupStatus
}
func (config *RCConfig) start() error {
// Don't force tests to fail if they don't care about containers restarting.
var maxContainerFailures int
@ -1962,74 +2029,28 @@ func (config *RCConfig) start() error {
for oldRunning != config.Replicas {
time.Sleep(interval)
terminating := 0
running := 0
runningButNotReady := 0
waiting := 0
pending := 0
unknown := 0
inactive := 0
failedContainers := 0
containerRestartNodes := sets.NewString()
pods := PodStore.List()
created := []*api.Pod{}
for _, p := range pods {
if p.DeletionTimestamp != nil {
terminating++
continue
}
created = append(created, p)
if p.Status.Phase == api.PodRunning {
ready := false
for _, c := range p.Status.Conditions {
if c.Type == api.PodReady && c.Status == api.ConditionTrue {
ready = true
break
}
}
if ready {
// Only count a pod is running when it is also ready.
running++
} else {
runningButNotReady++
}
for _, v := range FailedContainers(p) {
failedContainers = failedContainers + v.Restarts
containerRestartNodes.Insert(p.Spec.NodeName)
}
} else if p.Status.Phase == api.PodPending {
if p.Spec.NodeName == "" {
waiting++
} else {
pending++
}
} else if p.Status.Phase == api.PodSucceeded || p.Status.Phase == api.PodFailed {
inactive++
} else if p.Status.Phase == api.PodUnknown {
unknown++
}
}
pods = created
startupStatus := ComputeRCStartupStatus(pods, config.Replicas)
pods = startupStatus.Created
if config.CreatedPods != nil {
*config.CreatedPods = pods
}
Logf("%v Pods: %d out of %d created, %d running, %d pending, %d waiting, %d inactive, %d terminating, %d unknown, %d runningButNotReady ",
config.Name, len(pods), config.Replicas, running, pending, waiting, inactive, terminating, unknown, runningButNotReady)
promPushRunningPending(running, pending)
if config.PodStatusFile != nil {
fmt.Fprintf(config.PodStatusFile, "%d, running, %d, pending, %d, waiting, %d, inactive, %d, unknown, %d, runningButNotReady\n", running, pending, waiting, inactive, unknown, runningButNotReady)
if !config.Silent {
startupStatus.Print(config.Name)
}
if failedContainers > maxContainerFailures {
DumpNodeDebugInfo(config.Client, containerRestartNodes.List())
promPushRunningPending(startupStatus.Running, startupStatus.Pending)
if config.PodStatusFile != nil {
fmt.Fprintf(config.PodStatusFile, "%d, running, %d, pending, %d, waiting, %d, inactive, %d, unknown, %d, runningButNotReady\n", startupStatus.Running, startupStatus.Pending, startupStatus.Waiting, startupStatus.Inactive, startupStatus.Unknown, startupStatus.RunningButNotReady)
}
if startupStatus.FailedContainers > maxContainerFailures {
DumpNodeDebugInfo(config.Client, startupStatus.ContainerRestartNodes.List())
// Get the logs from the failed containers to help diagnose what caused them to fail
LogFailedContainers(config.Namespace)
return fmt.Errorf("%d containers failed which is more than allowed %d", failedContainers, maxContainerFailures)
return fmt.Errorf("%d containers failed which is more than allowed %d", startupStatus.FailedContainers, maxContainerFailures)
}
if len(pods) < len(oldPods) || len(pods) > config.Replicas {
// This failure mode includes:
@ -2043,11 +2064,11 @@ func (config *RCConfig) start() error {
return fmt.Errorf(errorStr)
}
if len(pods) > len(oldPods) || running > oldRunning {
if len(pods) > len(oldPods) || startupStatus.Running > oldRunning {
lastChange = time.Now()
}
oldPods = pods
oldRunning = running
oldRunning = startupStatus.Running
if time.Since(lastChange) > timeout {
dumpPodDebugInfo(config.Client, pods)