mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 11:50:44 +00:00
Merge pull request #66903 from mborsz/repeat
Automatic merge from submit-queue (batch tested with PRs 61389, 66817, 66903, 66675, 66965). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Scalability tests: Increase sample size for pod startup latency measurement **What this PR does / why we need it**: This PR adds a way to increase a sample size used in pod startup latency measurement. We found that this phase is flaky and want to check if increasing a number of samples will help reducing a flakiness. **Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*: **Special notes for your reviewer**: **Release note**: ```release-note NONE ```
This commit is contained in:
commit
b43ca0a037
@ -56,6 +56,7 @@ const (
|
|||||||
MinSaturationThreshold = 2 * time.Minute
|
MinSaturationThreshold = 2 * time.Minute
|
||||||
MinPodsPerSecondThroughput = 8
|
MinPodsPerSecondThroughput = 8
|
||||||
DensityPollInterval = 10 * time.Second
|
DensityPollInterval = 10 * time.Second
|
||||||
|
MinPodStartupMeasurements = 500
|
||||||
)
|
)
|
||||||
|
|
||||||
// Maximum container failures this test tolerates before failing.
|
// Maximum container failures this test tolerates before failing.
|
||||||
@ -696,7 +697,10 @@ var _ = SIGDescribe("Density", func() {
|
|||||||
}
|
}
|
||||||
e2eStartupTime = runDensityTest(dConfig, testPhaseDurations, &scheduleThroughputs)
|
e2eStartupTime = runDensityTest(dConfig, testPhaseDurations, &scheduleThroughputs)
|
||||||
if itArg.runLatencyTest {
|
if itArg.runLatencyTest {
|
||||||
By("Scheduling additional Pods to measure startup latencies")
|
// Pick latencyPodsIterations so that:
|
||||||
|
// latencyPodsIterations * nodeCount >= MinPodStartupMeasurements.
|
||||||
|
latencyPodsIterations := (MinPodStartupMeasurements + nodeCount - 1) / nodeCount
|
||||||
|
By(fmt.Sprintf("Scheduling additional %d Pods to measure startup latencies", latencyPodsIterations*nodeCount))
|
||||||
|
|
||||||
createTimes := make(map[string]metav1.Time, 0)
|
createTimes := make(map[string]metav1.Time, 0)
|
||||||
nodeNames := make(map[string]string, 0)
|
nodeNames := make(map[string]string, 0)
|
||||||
@ -775,58 +779,76 @@ var _ = SIGDescribe("Density", func() {
|
|||||||
|
|
||||||
go controller.Run(stopCh)
|
go controller.Run(stopCh)
|
||||||
}
|
}
|
||||||
|
for latencyPodsIteration := 0; latencyPodsIteration < latencyPodsIterations; latencyPodsIteration++ {
|
||||||
|
podIndexOffset := latencyPodsIteration * nodeCount
|
||||||
|
framework.Logf("Creating %d latency pods in range [%d, %d]", nodeCount, podIndexOffset+1, podIndexOffset+nodeCount)
|
||||||
|
|
||||||
// Create some additional pods with throughput ~5 pods/sec.
|
watchTimesLen := len(watchTimes)
|
||||||
latencyPodStartupPhase := testPhaseDurations.StartPhase(800, "latency pods creation")
|
|
||||||
defer latencyPodStartupPhase.End()
|
|
||||||
var wg sync.WaitGroup
|
|
||||||
wg.Add(nodeCount)
|
|
||||||
// Explicitly set requests here.
|
|
||||||
// Thanks to it we trigger increasing priority function by scheduling
|
|
||||||
// a pod to a node, which in turn will result in spreading latency pods
|
|
||||||
// more evenly between nodes.
|
|
||||||
cpuRequest := *resource.NewMilliQuantity(nodeCpuCapacity/5, resource.DecimalSI)
|
|
||||||
memRequest := *resource.NewQuantity(nodeMemCapacity/5, resource.DecimalSI)
|
|
||||||
if podsPerNode > 30 {
|
|
||||||
// This is to make them schedulable on high-density tests
|
|
||||||
// (e.g. 100 pods/node kubemark).
|
|
||||||
cpuRequest = *resource.NewMilliQuantity(0, resource.DecimalSI)
|
|
||||||
memRequest = *resource.NewQuantity(0, resource.DecimalSI)
|
|
||||||
}
|
|
||||||
rcNameToNsMap := map[string]string{}
|
|
||||||
for i := 1; i <= nodeCount; i++ {
|
|
||||||
name := additionalPodsPrefix + "-" + strconv.Itoa(i)
|
|
||||||
nsName := namespaces[i%len(namespaces)].Name
|
|
||||||
rcNameToNsMap[name] = nsName
|
|
||||||
go createRunningPodFromRC(&wg, c, name, nsName, imageutils.GetPauseImageName(), additionalPodsPrefix, cpuRequest, memRequest)
|
|
||||||
time.Sleep(200 * time.Millisecond)
|
|
||||||
}
|
|
||||||
wg.Wait()
|
|
||||||
latencyPodStartupPhase.End()
|
|
||||||
|
|
||||||
latencyMeasurementPhase := testPhaseDurations.StartPhase(810, "pod startup latencies measurement")
|
// Create some additional pods with throughput ~5 pods/sec.
|
||||||
defer latencyMeasurementPhase.End()
|
latencyPodStartupPhase := testPhaseDurations.StartPhase(800+latencyPodsIteration*10, "latency pods creation")
|
||||||
By("Waiting for all Pods begin observed by the watch...")
|
defer latencyPodStartupPhase.End()
|
||||||
waitTimeout := 10 * time.Minute
|
var wg sync.WaitGroup
|
||||||
for start := time.Now(); len(watchTimes) < nodeCount; time.Sleep(10 * time.Second) {
|
wg.Add(nodeCount)
|
||||||
if time.Since(start) < waitTimeout {
|
// Explicitly set requests here.
|
||||||
framework.Failf("Timeout reached waiting for all Pods being observed by the watch.")
|
// Thanks to it we trigger increasing priority function by scheduling
|
||||||
|
// a pod to a node, which in turn will result in spreading latency pods
|
||||||
|
// more evenly between nodes.
|
||||||
|
cpuRequest := *resource.NewMilliQuantity(nodeCpuCapacity/5, resource.DecimalSI)
|
||||||
|
memRequest := *resource.NewQuantity(nodeMemCapacity/5, resource.DecimalSI)
|
||||||
|
if podsPerNode > 30 {
|
||||||
|
// This is to make them schedulable on high-density tests
|
||||||
|
// (e.g. 100 pods/node kubemark).
|
||||||
|
cpuRequest = *resource.NewMilliQuantity(0, resource.DecimalSI)
|
||||||
|
memRequest = *resource.NewQuantity(0, resource.DecimalSI)
|
||||||
}
|
}
|
||||||
}
|
rcNameToNsMap := map[string]string{}
|
||||||
close(stopCh)
|
for i := 1; i <= nodeCount; i++ {
|
||||||
|
name := additionalPodsPrefix + "-" + strconv.Itoa(podIndexOffset+i)
|
||||||
nodeToLatencyPods := make(map[string]int)
|
nsName := namespaces[i%len(namespaces)].Name
|
||||||
for i := range latencyPodStores {
|
rcNameToNsMap[name] = nsName
|
||||||
for _, item := range latencyPodStores[i].List() {
|
go createRunningPodFromRC(&wg, c, name, nsName, imageutils.GetPauseImageName(), additionalPodsPrefix, cpuRequest, memRequest)
|
||||||
pod := item.(*v1.Pod)
|
time.Sleep(200 * time.Millisecond)
|
||||||
nodeToLatencyPods[pod.Spec.NodeName]++
|
|
||||||
}
|
}
|
||||||
for node, count := range nodeToLatencyPods {
|
wg.Wait()
|
||||||
if count > 1 {
|
latencyPodStartupPhase.End()
|
||||||
framework.Logf("%d latency pods scheduled on %s", count, node)
|
|
||||||
|
latencyMeasurementPhase := testPhaseDurations.StartPhase(801+latencyPodsIteration*10, "pod startup latencies measurement")
|
||||||
|
defer latencyMeasurementPhase.End()
|
||||||
|
By("Waiting for all Pods begin observed by the watch...")
|
||||||
|
waitTimeout := 10 * time.Minute
|
||||||
|
for start := time.Now(); len(watchTimes) < watchTimesLen+nodeCount; time.Sleep(10 * time.Second) {
|
||||||
|
if time.Since(start) < waitTimeout {
|
||||||
|
framework.Failf("Timeout reached waiting for all Pods being observed by the watch.")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
nodeToLatencyPods := make(map[string]int)
|
||||||
|
for i := range latencyPodStores {
|
||||||
|
for _, item := range latencyPodStores[i].List() {
|
||||||
|
pod := item.(*v1.Pod)
|
||||||
|
nodeToLatencyPods[pod.Spec.NodeName]++
|
||||||
|
}
|
||||||
|
for node, count := range nodeToLatencyPods {
|
||||||
|
if count > 1 {
|
||||||
|
framework.Logf("%d latency pods scheduled on %s", count, node)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
latencyMeasurementPhase.End()
|
||||||
|
|
||||||
|
By("Removing additional replication controllers")
|
||||||
|
podDeletionPhase := testPhaseDurations.StartPhase(802+latencyPodsIteration*10, "latency pods deletion")
|
||||||
|
defer podDeletionPhase.End()
|
||||||
|
deleteRC := func(i int) {
|
||||||
|
defer GinkgoRecover()
|
||||||
|
name := additionalPodsPrefix + "-" + strconv.Itoa(podIndexOffset+i+1)
|
||||||
|
framework.ExpectNoError(framework.DeleteRCAndWaitForGC(c, rcNameToNsMap[name], name))
|
||||||
|
}
|
||||||
|
workqueue.Parallelize(25, nodeCount, deleteRC)
|
||||||
|
podDeletionPhase.End()
|
||||||
}
|
}
|
||||||
|
close(stopCh)
|
||||||
|
|
||||||
for i := 0; i < len(namespaces); i++ {
|
for i := 0; i < len(namespaces); i++ {
|
||||||
nsName := namespaces[i].Name
|
nsName := namespaces[i].Name
|
||||||
@ -914,18 +936,6 @@ var _ = SIGDescribe("Density", func() {
|
|||||||
framework.ExpectNoError(framework.VerifyLatencyWithinThreshold(podStartupLatencyThreshold, podStartupLatency.E2ELatency, "pod startup"))
|
framework.ExpectNoError(framework.VerifyLatencyWithinThreshold(podStartupLatencyThreshold, podStartupLatency.E2ELatency, "pod startup"))
|
||||||
|
|
||||||
framework.LogSuspiciousLatency(startupLag, e2eLag, nodeCount, c)
|
framework.LogSuspiciousLatency(startupLag, e2eLag, nodeCount, c)
|
||||||
latencyMeasurementPhase.End()
|
|
||||||
|
|
||||||
By("Removing additional replication controllers")
|
|
||||||
podDeletionPhase := testPhaseDurations.StartPhase(820, "latency pods deletion")
|
|
||||||
defer podDeletionPhase.End()
|
|
||||||
deleteRC := func(i int) {
|
|
||||||
defer GinkgoRecover()
|
|
||||||
name := additionalPodsPrefix + "-" + strconv.Itoa(i+1)
|
|
||||||
framework.ExpectNoError(framework.DeleteRCAndWaitForGC(c, rcNameToNsMap[name], name))
|
|
||||||
}
|
|
||||||
workqueue.Parallelize(25, nodeCount, deleteRC)
|
|
||||||
podDeletionPhase.End()
|
|
||||||
}
|
}
|
||||||
cleanupDensityTest(dConfig, testPhaseDurations)
|
cleanupDensityTest(dConfig, testPhaseDurations)
|
||||||
})
|
})
|
||||||
|
Loading…
Reference in New Issue
Block a user