From b93b4a2c96d3b9f4c143d46651ddefdb0da14ece Mon Sep 17 00:00:00 2001
From: Wei Huang <wei.huang1@ibm.com>
Date: Thu, 25 Feb 2021 13:33:27 -0800
Subject: [PATCH] sched: fix a bug that metrics of init or collected pods are
 re-collected

---
 .../metrics/testutil/metrics.go               | 15 -------
 .../metrics/testutil/metrics_test.go          | 42 -------------------
 .../scheduler_perf/scheduler_perf_test.go     | 11 +++++
 test/integration/scheduler_perf/util.go       |  4 --
 4 files changed, 11 insertions(+), 61 deletions(-)

diff --git a/staging/src/k8s.io/component-base/metrics/testutil/metrics.go b/staging/src/k8s.io/component-base/metrics/testutil/metrics.go
index ac66772c446..60a186483fb 100644
--- a/staging/src/k8s.io/component-base/metrics/testutil/metrics.go
+++ b/staging/src/k8s.io/component-base/metrics/testutil/metrics.go
@@ -281,21 +281,6 @@ func (hist *Histogram) Average() float64 {
 	return hist.GetSampleSum() / float64(hist.GetSampleCount())
 }
 
-// Clear clears all fields of the wrapped histogram
-func (hist *Histogram) Clear() {
-	if hist.SampleCount != nil {
-		*hist.SampleCount = 0
-	}
-	if hist.SampleSum != nil {
-		*hist.SampleSum = 0
-	}
-	for _, b := range hist.Bucket {
-		if b.CumulativeCount != nil {
-			*b.CumulativeCount = 0
-		}
-	}
-}
-
 // Validate makes sure the wrapped histogram has all necessary fields set and with valid values.
 func (hist *Histogram) Validate() error {
 	if hist.SampleCount == nil || hist.GetSampleCount() == 0 {
diff --git a/staging/src/k8s.io/component-base/metrics/testutil/metrics_test.go b/staging/src/k8s.io/component-base/metrics/testutil/metrics_test.go
index 81afe8c8cb9..d3958e3a0b5 100644
--- a/staging/src/k8s.io/component-base/metrics/testutil/metrics_test.go
+++ b/staging/src/k8s.io/component-base/metrics/testutil/metrics_test.go
@@ -121,48 +121,6 @@ func TestHistogramQuantile(t *testing.T) {
 	}
 }
 
-func TestHistogramClear(t *testing.T) {
-	samples := []float64{0.5, 0.5, 0.5, 0.5, 1.5, 1.5, 1.5, 1.5, 3, 3, 3, 3, 6, 6, 6, 6}
-	bounds := []float64{1, 2, 4, 8}
-	h := samples2Histogram(samples, bounds)
-
-	if *h.SampleCount == 0 {
-		t.Errorf("Expected histogram .SampleCount to be non-zero")
-	}
-	if *h.SampleSum == 0 {
-		t.Errorf("Expected histogram .SampleSum to be non-zero")
-	}
-
-	for _, b := range h.Bucket {
-		if b.CumulativeCount != nil {
-			if *b.CumulativeCount == 0 {
-				t.Errorf("Expected histogram bucket to have non-zero comulative count")
-			}
-		}
-	}
-
-	h.Clear()
-
-	if *h.SampleCount != 0 {
-		t.Errorf("Expected histogram .SampleCount to be zero, have %v instead", *h.SampleCount)
-	}
-
-	if *h.SampleSum != 0 {
-		t.Errorf("Expected histogram .SampleSum to be zero, have %v instead", *h.SampleSum)
-	}
-
-	for _, b := range h.Bucket {
-		if b.CumulativeCount != nil {
-			if *b.CumulativeCount != 0 {
-				t.Errorf("Expected histogram bucket to have zero comulative count, have %v instead", *b.CumulativeCount)
-			}
-		}
-		if b.UpperBound != nil {
-			*b.UpperBound = 0
-		}
-	}
-}
-
 func TestHistogramValidate(t *testing.T) {
 	tests := []struct {
 		name string
diff --git a/test/integration/scheduler_perf/scheduler_perf_test.go b/test/integration/scheduler_perf/scheduler_perf_test.go
index 8bd4b41d717..98bf90f1716 100644
--- a/test/integration/scheduler_perf/scheduler_perf_test.go
+++ b/test/integration/scheduler_perf/scheduler_perf_test.go
@@ -42,6 +42,7 @@ import (
 	"k8s.io/client-go/restmapper"
 	"k8s.io/component-base/featuregate"
 	featuregatetesting "k8s.io/component-base/featuregate/testing"
+	"k8s.io/component-base/metrics/legacyregistry"
 	"k8s.io/klog/v2"
 	"k8s.io/kubernetes/test/integration/framework"
 	testutils "k8s.io/kubernetes/test/utils"
@@ -363,6 +364,9 @@ func BenchmarkPerfScheduling(b *testing.B) {
 						defer featuregatetesting.SetFeatureGateDuringTest(b, utilfeature.DefaultFeatureGate, feature, flag)()
 					}
 					dataItems.DataItems = append(dataItems.DataItems, runWorkload(b, tc, w)...)
+					// Reset metrics to prevent metrics generated in current workload gets
+					// carried over to the next workload.
+					legacyregistry.Reset()
 				})
 			}
 		})
@@ -454,6 +458,13 @@ func runWorkload(b *testing.B, tc *testCase, w *workload) []DataItem {
 				mu.Unlock()
 			}
 
+			if !concreteOp.SkipWaitToCompletion {
+				// SkipWaitToCompletion=false indicates this step has waited for the Pods to be scheduled.
+				// So we reset the metrics in global registry; otherwise metrics gathered in this step
+				// will be carried over to next step.
+				legacyregistry.Reset()
+			}
+
 		case *churnOp:
 			var namespace string
 			if concreteOp.Namespace != nil {
diff --git a/test/integration/scheduler_perf/util.go b/test/integration/scheduler_perf/util.go
index 7fdc4e813b1..1ad97969350 100644
--- a/test/integration/scheduler_perf/util.go
+++ b/test/integration/scheduler_perf/util.go
@@ -205,10 +205,6 @@ func collectHistogram(metric string, labels map[string]string) *DataItem {
 	q99 := hist.Quantile(0.95)
 	avg := hist.Average()
 
-	// clear the metrics so that next test always starts with empty prometheus
-	// metrics (since the metrics are shared among all tests run inside the same binary)
-	hist.Clear()
-
 	msFactor := float64(time.Second) / float64(time.Millisecond)
 
 	// Copy labels and add "Metric" label for this metric.