mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 19:56:01 +00:00
Merge pull request #24434 from gmarek/services
Automatic merge from submit-queue Create multiple RCs in NC - prerequisite for adding services WIP because I need to make the logs readable again.
This commit is contained in:
commit
0057931f4e
@ -84,6 +84,26 @@ func density30AddonResourceVerifier() map[string]framework.ResourceConstraint {
|
||||
return constraints
|
||||
}
|
||||
|
||||
func logPodStartupStatus(c *client.Client, expectedPods int, ns string, observedLabels map[string]string, period time.Duration, stopCh chan struct{}) {
|
||||
label := labels.SelectorFromSet(labels.Set(observedLabels))
|
||||
podStore := framework.NewPodStore(c, ns, label, fields.Everything())
|
||||
defer podStore.Stop()
|
||||
ticker := time.NewTicker(period)
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
pods := podStore.List()
|
||||
startupStatus := framework.ComputeRCStartupStatus(pods, expectedPods)
|
||||
startupStatus.Print("Density")
|
||||
case <-stopCh:
|
||||
pods := podStore.List()
|
||||
startupStatus := framework.ComputeRCStartupStatus(pods, expectedPods)
|
||||
startupStatus.Print("Density")
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This test suite can take a long time to run, and can affect or be affected by other tests.
|
||||
// So by default it is added to the ginkgo.skip list (see driver.go).
|
||||
// To run this suite you must explicitly ask for it by setting the
|
||||
@ -185,7 +205,7 @@ var _ = framework.KubeDescribe("Density", func() {
|
||||
{podsPerNode: 30, runLatencyTest: true, interval: 10 * time.Second},
|
||||
{podsPerNode: 50, runLatencyTest: false, interval: 10 * time.Second},
|
||||
{podsPerNode: 95, runLatencyTest: true, interval: 10 * time.Second},
|
||||
{podsPerNode: 100, runLatencyTest: false, interval: 1 * time.Second},
|
||||
{podsPerNode: 100, runLatencyTest: false, interval: 10 * time.Second},
|
||||
}
|
||||
|
||||
for _, testArg := range densityTests {
|
||||
@ -201,22 +221,29 @@ var _ = framework.KubeDescribe("Density", func() {
|
||||
}
|
||||
itArg := testArg
|
||||
It(name, func() {
|
||||
podsPerNode := itArg.podsPerNode
|
||||
totalPods = podsPerNode * nodeCount
|
||||
RCName = "density" + strconv.Itoa(totalPods) + "-" + uuid
|
||||
fileHndl, err := os.Create(fmt.Sprintf(framework.TestContext.OutputDir+"/%s/pod_states.csv", uuid))
|
||||
framework.ExpectNoError(err)
|
||||
defer fileHndl.Close()
|
||||
config := framework.RCConfig{Client: c,
|
||||
Image: "gcr.io/google_containers/pause:2.0",
|
||||
Name: RCName,
|
||||
Namespace: ns,
|
||||
PollInterval: itArg.interval,
|
||||
PodStatusFile: fileHndl,
|
||||
Replicas: totalPods,
|
||||
CpuRequest: nodeCpuCapacity / 100,
|
||||
MemRequest: nodeMemCapacity / 100,
|
||||
MaxContainerFailures: &MaxContainerFailures,
|
||||
podsPerNode := itArg.podsPerNode
|
||||
totalPods = podsPerNode * nodeCount
|
||||
// TODO: loop to podsPerNode instead of 1 when we're ready.
|
||||
numberOrRCs := 1
|
||||
RCConfigs := make([]framework.RCConfig, numberOrRCs)
|
||||
for i := 0; i < numberOrRCs; i++ {
|
||||
RCName = "density" + strconv.Itoa(totalPods) + "-" + strconv.Itoa(i) + "-" + uuid
|
||||
RCConfigs[i] = framework.RCConfig{Client: c,
|
||||
Image: "gcr.io/google_containers/pause:2.0",
|
||||
Name: RCName,
|
||||
Namespace: ns,
|
||||
Labels: map[string]string{"type": "densityPod"},
|
||||
PollInterval: itArg.interval,
|
||||
PodStatusFile: fileHndl,
|
||||
Replicas: (totalPods + numberOrRCs - 1) / numberOrRCs,
|
||||
CpuRequest: nodeCpuCapacity / 100,
|
||||
MemRequest: nodeMemCapacity / 100,
|
||||
MaxContainerFailures: &MaxContainerFailures,
|
||||
Silent: true,
|
||||
}
|
||||
}
|
||||
|
||||
// Create a listener for events.
|
||||
@ -249,7 +276,7 @@ var _ = framework.KubeDescribe("Density", func() {
|
||||
// uLock is a lock protects the updateCount
|
||||
var uLock sync.Mutex
|
||||
updateCount := 0
|
||||
label := labels.SelectorFromSet(labels.Set(map[string]string{"name": RCName}))
|
||||
label := labels.SelectorFromSet(labels.Set(map[string]string{"type": "densityPod"}))
|
||||
_, updateController := controllerframework.NewInformer(
|
||||
&cache.ListWatch{
|
||||
ListFunc: func(options api.ListOptions) (runtime.Object, error) {
|
||||
@ -273,10 +300,22 @@ var _ = framework.KubeDescribe("Density", func() {
|
||||
)
|
||||
go updateController.Run(stop)
|
||||
|
||||
// Start the replication controller.
|
||||
// Start all replication controllers.
|
||||
startTime := time.Now()
|
||||
framework.ExpectNoError(framework.RunRC(config))
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(len(RCConfigs))
|
||||
for i := range RCConfigs {
|
||||
rcConfig := RCConfigs[i]
|
||||
go func() {
|
||||
framework.ExpectNoError(framework.RunRC(rcConfig))
|
||||
wg.Done()
|
||||
}()
|
||||
}
|
||||
logStopCh := make(chan struct{})
|
||||
go logPodStartupStatus(c, totalPods, ns, map[string]string{"type": "densityPod"}, itArg.interval, logStopCh)
|
||||
wg.Wait()
|
||||
e2eStartupTime = time.Now().Sub(startTime)
|
||||
close(logStopCh)
|
||||
framework.Logf("E2E startup time for %d pods: %v", totalPods, e2eStartupTime)
|
||||
framework.Logf("Throughput (pods/s) during cluster saturation phase: %v", float32(totalPods)/float32(e2eStartupTime/time.Second))
|
||||
|
||||
@ -506,11 +545,14 @@ var _ = framework.KubeDescribe("Density", func() {
|
||||
|
||||
By("Deleting ReplicationController")
|
||||
// We explicitly delete all pods to have API calls necessary for deletion accounted in metrics.
|
||||
rc, err := c.ReplicationControllers(ns).Get(RCName)
|
||||
if err == nil && rc.Spec.Replicas != 0 {
|
||||
By("Cleaning up the replication controller")
|
||||
err := framework.DeleteRC(c, ns, RCName)
|
||||
framework.ExpectNoError(err)
|
||||
for i := range RCConfigs {
|
||||
rcName := RCConfigs[i].Name
|
||||
rc, err := c.ReplicationControllers(ns).Get(rcName)
|
||||
if err == nil && rc.Spec.Replicas != 0 {
|
||||
By("Cleaning up the replication controller")
|
||||
err := framework.DeleteRC(c, ns, rcName)
|
||||
framework.ExpectNoError(err)
|
||||
}
|
||||
}
|
||||
|
||||
By("Removing additional replication controllers if any")
|
||||
|
@ -242,6 +242,9 @@ type RCConfig struct {
|
||||
// Maximum allowable container failures. If exceeded, RunRC returns an error.
|
||||
// Defaults to replicas*0.1 if unspecified.
|
||||
MaxContainerFailures *int
|
||||
|
||||
// If set to false starting RC will print progress, otherwise only errors will be printed.
|
||||
Silent bool
|
||||
}
|
||||
|
||||
type DeploymentConfig struct {
|
||||
@ -1934,6 +1937,70 @@ func (config *RCConfig) applyTo(template *api.PodTemplateSpec) {
|
||||
}
|
||||
}
|
||||
|
||||
type RCStartupStatus struct {
|
||||
Expected int
|
||||
Terminating int
|
||||
Running int
|
||||
RunningButNotReady int
|
||||
Waiting int
|
||||
Pending int
|
||||
Unknown int
|
||||
Inactive int
|
||||
FailedContainers int
|
||||
Created []*api.Pod
|
||||
ContainerRestartNodes sets.String
|
||||
}
|
||||
|
||||
func (s *RCStartupStatus) Print(name string) {
|
||||
Logf("%v Pods: %d out of %d created, %d running, %d pending, %d waiting, %d inactive, %d terminating, %d unknown, %d runningButNotReady ",
|
||||
name, len(s.Created), s.Expected, s.Running, s.Pending, s.Waiting, s.Inactive, s.Terminating, s.Unknown, s.RunningButNotReady)
|
||||
}
|
||||
|
||||
func ComputeRCStartupStatus(pods []*api.Pod, expected int) RCStartupStatus {
|
||||
startupStatus := RCStartupStatus{
|
||||
Expected: expected,
|
||||
Created: make([]*api.Pod, 0, expected),
|
||||
ContainerRestartNodes: sets.NewString(),
|
||||
}
|
||||
for _, p := range pods {
|
||||
if p.DeletionTimestamp != nil {
|
||||
startupStatus.Terminating++
|
||||
continue
|
||||
}
|
||||
startupStatus.Created = append(startupStatus.Created, p)
|
||||
if p.Status.Phase == api.PodRunning {
|
||||
ready := false
|
||||
for _, c := range p.Status.Conditions {
|
||||
if c.Type == api.PodReady && c.Status == api.ConditionTrue {
|
||||
ready = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if ready {
|
||||
// Only count a pod is running when it is also ready.
|
||||
startupStatus.Running++
|
||||
} else {
|
||||
startupStatus.RunningButNotReady++
|
||||
}
|
||||
for _, v := range FailedContainers(p) {
|
||||
startupStatus.FailedContainers = startupStatus.FailedContainers + v.Restarts
|
||||
startupStatus.ContainerRestartNodes.Insert(p.Spec.NodeName)
|
||||
}
|
||||
} else if p.Status.Phase == api.PodPending {
|
||||
if p.Spec.NodeName == "" {
|
||||
startupStatus.Waiting++
|
||||
} else {
|
||||
startupStatus.Pending++
|
||||
}
|
||||
} else if p.Status.Phase == api.PodSucceeded || p.Status.Phase == api.PodFailed {
|
||||
startupStatus.Inactive++
|
||||
} else if p.Status.Phase == api.PodUnknown {
|
||||
startupStatus.Unknown++
|
||||
}
|
||||
}
|
||||
return startupStatus
|
||||
}
|
||||
|
||||
func (config *RCConfig) start() error {
|
||||
// Don't force tests to fail if they don't care about containers restarting.
|
||||
var maxContainerFailures int
|
||||
@ -1962,74 +2029,28 @@ func (config *RCConfig) start() error {
|
||||
for oldRunning != config.Replicas {
|
||||
time.Sleep(interval)
|
||||
|
||||
terminating := 0
|
||||
|
||||
running := 0
|
||||
runningButNotReady := 0
|
||||
waiting := 0
|
||||
pending := 0
|
||||
unknown := 0
|
||||
inactive := 0
|
||||
failedContainers := 0
|
||||
containerRestartNodes := sets.NewString()
|
||||
|
||||
pods := PodStore.List()
|
||||
created := []*api.Pod{}
|
||||
for _, p := range pods {
|
||||
if p.DeletionTimestamp != nil {
|
||||
terminating++
|
||||
continue
|
||||
}
|
||||
created = append(created, p)
|
||||
if p.Status.Phase == api.PodRunning {
|
||||
ready := false
|
||||
for _, c := range p.Status.Conditions {
|
||||
if c.Type == api.PodReady && c.Status == api.ConditionTrue {
|
||||
ready = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if ready {
|
||||
// Only count a pod is running when it is also ready.
|
||||
running++
|
||||
} else {
|
||||
runningButNotReady++
|
||||
}
|
||||
for _, v := range FailedContainers(p) {
|
||||
failedContainers = failedContainers + v.Restarts
|
||||
containerRestartNodes.Insert(p.Spec.NodeName)
|
||||
}
|
||||
} else if p.Status.Phase == api.PodPending {
|
||||
if p.Spec.NodeName == "" {
|
||||
waiting++
|
||||
} else {
|
||||
pending++
|
||||
}
|
||||
} else if p.Status.Phase == api.PodSucceeded || p.Status.Phase == api.PodFailed {
|
||||
inactive++
|
||||
} else if p.Status.Phase == api.PodUnknown {
|
||||
unknown++
|
||||
}
|
||||
}
|
||||
pods = created
|
||||
startupStatus := ComputeRCStartupStatus(pods, config.Replicas)
|
||||
|
||||
pods = startupStatus.Created
|
||||
if config.CreatedPods != nil {
|
||||
*config.CreatedPods = pods
|
||||
}
|
||||
|
||||
Logf("%v Pods: %d out of %d created, %d running, %d pending, %d waiting, %d inactive, %d terminating, %d unknown, %d runningButNotReady ",
|
||||
config.Name, len(pods), config.Replicas, running, pending, waiting, inactive, terminating, unknown, runningButNotReady)
|
||||
|
||||
promPushRunningPending(running, pending)
|
||||
|
||||
if config.PodStatusFile != nil {
|
||||
fmt.Fprintf(config.PodStatusFile, "%d, running, %d, pending, %d, waiting, %d, inactive, %d, unknown, %d, runningButNotReady\n", running, pending, waiting, inactive, unknown, runningButNotReady)
|
||||
if !config.Silent {
|
||||
startupStatus.Print(config.Name)
|
||||
}
|
||||
|
||||
if failedContainers > maxContainerFailures {
|
||||
DumpNodeDebugInfo(config.Client, containerRestartNodes.List())
|
||||
promPushRunningPending(startupStatus.Running, startupStatus.Pending)
|
||||
|
||||
if config.PodStatusFile != nil {
|
||||
fmt.Fprintf(config.PodStatusFile, "%d, running, %d, pending, %d, waiting, %d, inactive, %d, unknown, %d, runningButNotReady\n", startupStatus.Running, startupStatus.Pending, startupStatus.Waiting, startupStatus.Inactive, startupStatus.Unknown, startupStatus.RunningButNotReady)
|
||||
}
|
||||
|
||||
if startupStatus.FailedContainers > maxContainerFailures {
|
||||
DumpNodeDebugInfo(config.Client, startupStatus.ContainerRestartNodes.List())
|
||||
// Get the logs from the failed containers to help diagnose what caused them to fail
|
||||
LogFailedContainers(config.Namespace)
|
||||
return fmt.Errorf("%d containers failed which is more than allowed %d", failedContainers, maxContainerFailures)
|
||||
return fmt.Errorf("%d containers failed which is more than allowed %d", startupStatus.FailedContainers, maxContainerFailures)
|
||||
}
|
||||
if len(pods) < len(oldPods) || len(pods) > config.Replicas {
|
||||
// This failure mode includes:
|
||||
@ -2043,11 +2064,11 @@ func (config *RCConfig) start() error {
|
||||
return fmt.Errorf(errorStr)
|
||||
}
|
||||
|
||||
if len(pods) > len(oldPods) || running > oldRunning {
|
||||
if len(pods) > len(oldPods) || startupStatus.Running > oldRunning {
|
||||
lastChange = time.Now()
|
||||
}
|
||||
oldPods = pods
|
||||
oldRunning = running
|
||||
oldRunning = startupStatus.Running
|
||||
|
||||
if time.Since(lastChange) > timeout {
|
||||
dumpPodDebugInfo(config.Client, pods)
|
||||
|
Loading…
Reference in New Issue
Block a user