Merge pull request #32250 from coufon/increase_qps

Automatic merge from submit-queue Add node e2e density test using 60 QPS for benchmark This PR adds a new benchmark node e2e density test which sets Kubelet API QPS limit from default 5 to 60, through ConfigMap. The latency caused by API QPS limit is as large as ~30% when creating a large batch of pods (e.g. 105). It makes the pod startup latency, as well creation throughput underestimated. This test helps us to know the real performance of Kubelet core.
2025-07-31 15:25:57 +00:00 · 2016-09-12 20:27:11 -07:00 · 2016-09-12 20:27:11 -07:00 · 0ca6506850
commit 0ca6506850
parent 2dc4c26f76 a6500cc74a
2 changed files with 94 additions and 25 deletions
--- a/test/e2e_node/density_test.go
+++ b/test/e2e_node/density_test.go
@ -171,6 +171,47 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {
 		}
 	})

+	Context("create a batch of pods with higher API QPS", func() {
+		dTests := []densityTest{
+			{
+				podsNr:      105,
+				interval:    0 * time.Millisecond,
+				APIQPSLimit: 60,
+			},
+			{
+				podsNr:      105,
+				interval:    100 * time.Millisecond,
+				APIQPSLimit: 60,
+			},
+			{
+				podsNr:      105,
+				interval:    300 * time.Millisecond,
+				APIQPSLimit: 60,
+			},
+		}
+
+		for _, testArg := range dTests {
+			itArg := testArg
+			It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval (QPS %d) [Benchmark]",
+				itArg.podsNr, itArg.interval, itArg.APIQPSLimit), func() {
+				itArg.createMethod = "batch"
+				testName := itArg.getTestName()
+				// The latency caused by API QPS limit takes a large portion (up to ~33%) of e2e latency.
+				// It makes the pod startup latency of Kubelet (creation throughput as well) under-estimated.
+				// Here we set API QPS limit from default 5 to 60 in order to test real Kubelet performance.
+				// Note that it will cause higher resource usage.
+				setKubeletAPIQPSLimit(f, int32(itArg.APIQPSLimit))
+				batchLag, e2eLags := runDensityBatchTest(f, rc, itArg, true)
+
+				By("Verifying latency")
+				logAndVerifyLatency(batchLag, e2eLags, itArg.podStartupLimits, itArg.podBatchStartupLimit, testName, false)
+
+				By("Verifying resource")
+				logAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, testName, false)
+			})
+		}
+	})
+
 	Context("create a sequence of pods", func() {
 		dTests := []densityTest{
 			{
@ -252,6 +293,8 @@ type densityTest struct {
 	interval time.Duration
 	// create pods in 'batch' or 'sequence'
 	createMethod string
+	// API QPS limit
+	APIQPSLimit int
 	// performance limits
 	cpuLimits            framework.ContainersCPUSummary
 	memLimits            framework.ResourceUsagePerContainer
@ -260,7 +303,14 @@ type densityTest struct {
 }

 func (dt *densityTest) getTestName() string {
-	return fmt.Sprintf("density_create_%s_%d_%d_%d", dt.createMethod, dt.podsNr, dt.bgPodsNr, dt.interval.Nanoseconds()/1000000)
+	// The current default API QPS limit is 5
+	// TODO(coufon): is there any way to not hard code this?
+	APIQPSLimit := 5
+	if dt.APIQPSLimit > 0 {
+		APIQPSLimit = dt.APIQPSLimit
+	}
+	return fmt.Sprintf("density_create_%s_%d_%d_%d_%d", dt.createMethod, dt.podsNr, dt.bgPodsNr,
+		dt.interval.Nanoseconds()/1000000, APIQPSLimit)
 }

 // runDensityBatchTest runs the density batch pod creation test
@ -519,3 +569,34 @@ func logAndVerifyLatency(batchLag time.Duration, e2eLags []framework.PodLatencyD
 func logPodCreateThroughput(batchLag time.Duration, e2eLags []framework.PodLatencyData, podsNr int, testName string) {
 	framework.PrintPerfData(getThroughputPerfData(batchLag, e2eLags, podsNr, testName))
 }
+
+// increaseKubeletAPIQPSLimit sets Kubelet API QPS via ConfigMap. Kubelet will restart with the new QPS.
+func setKubeletAPIQPSLimit(f *framework.Framework, newAPIQPS int32) {
+	const restartGap = 40 * time.Second
+
+	resp := pollConfigz(2*time.Minute, 5*time.Second)
+	kubeCfg, err := decodeConfigz(resp)
+	framework.ExpectNoError(err)
+	framework.Logf("Old QPS limit is: %d\n", kubeCfg.KubeAPIQPS)
+
+	// Set new API QPS limit
+	kubeCfg.KubeAPIQPS = newAPIQPS
+	// TODO(coufon): createConfigMap should firstly check whether configmap already exists, if so, use updateConfigMap.
+	// Calling createConfigMap twice will result in error. It is fine for benchmark test because we only run one test on a new node.
+	_, err = createConfigMap(f, kubeCfg)
+	framework.ExpectNoError(err)
+
+	// Wait for Kubelet to restart
+	time.Sleep(restartGap)
+
+	// Check new QPS has been set
+	resp = pollConfigz(2*time.Minute, 5*time.Second)
+	kubeCfg, err = decodeConfigz(resp)
+	framework.ExpectNoError(err)
+	framework.Logf("New QPS limit is: %d\n", kubeCfg.KubeAPIQPS)
+
+	// TODO(coufon): check test result to see if we need to retry here
+	if kubeCfg.KubeAPIQPS != newAPIQPS {
+		framework.Failf("Fail to set new kubelet API QPS limit.")
+	}
+}
--- a/test/e2e_node/jenkins/benchmark/benchmark-config.yaml
+++ b/test/e2e_node/jenkins/benchmark/benchmark-config.yaml
@ -5,46 +5,34 @@ images:
    project: kubernetes-node-e2e-images
    machine: n1-standard-1
    tests:
-      - '.*create 35 pods with 0s? interval \[Benchmark\]'    
+      - 'create 35 pods with 0s? interval \[Benchmark\]'    
  containervm-density2:
    image: e2e-node-containervm-v20160321-image
    project: kubernetes-node-e2e-images
    machine: n1-standard-1
    tests:
-      - '.*create 105 pods with 0s? interval \[Benchmark\]'
+      - 'create 105 pods with 0s? interval \[Benchmark\]'
+  containervm-density2-qps60:
+    image: e2e-node-containervm-v20160321-image
+    project: kubernetes-node-e2e-images
+    machine: n1-standard-1
+    tests:
+      - 'create 105 pods with 0s? interval \(QPS 60\) \[Benchmark\]'
  containervm-density3:
    image: e2e-node-containervm-v20160321-image
    project: kubernetes-node-e2e-images
    machine: n1-standard-2
    tests:
-      - '.*create 105 pods with 0s? interval \[Benchmark\]'
+      - 'create 105 pods with 0s? interval \[Benchmark\]'
  containervm-density4:
    image: e2e-node-containervm-v20160321-image
    project: kubernetes-node-e2e-images
    machine: n1-standard-1
    tests:
-      - '.*create 35 pods with 100ms interval \[Benchmark\]'    
-  containervm-density5:
+      - 'create 105 pods with 100ms interval \[Benchmark\]'
+  containervm-resource1:
    image: e2e-node-containervm-v20160321-image
    project: kubernetes-node-e2e-images
    machine: n1-standard-1
    tests:
-      - '.*create 105 pods with 100ms interval \[Benchmark\]'
-  containervm-density6:
-    image: e2e-node-containervm-v20160321-image
-    project: kubernetes-node-e2e-images
-    machine: n1-standard-2
-    tests:
-      - '.*create 105 pods with 100ms interval \[Benchmark\]'
-  containervm-density7:
-    image: e2e-node-containervm-v20160321-image
-    project: kubernetes-node-e2e-images
-    machine: n1-standard-1
-    tests:
-      - '.*create 105 pods with 300ms interval \[Benchmark\]'
-  containervm-density8:
-    image: e2e-node-containervm-v20160321-image
-    project: kubernetes-node-e2e-images
-    machine: n1-standard-2
-    tests:
-      - '.*create 105 pods with 300ms interval \[Benchmark\]'
+      - 'resource tracking for 105 pods per node \[Benchmark\]'