Add Snapshot Controller e2e metric tests

Signed-off-by: Grant Griffiths <ggriffiths@purestorage.com>
This commit is contained in:
Grant Griffiths 2021-01-08 15:54:59 -08:00
parent 9739592e84
commit 564e531aa7
13 changed files with 467 additions and 62 deletions

View File

@ -22,6 +22,8 @@ spec:
serviceAccount: volume-snapshot-controller
containers:
- name: volume-snapshot-controller
image: k8s.gcr.io/sig-storage/snapshot-controller:v3.0.2
image: k8s.gcr.io/sig-storage/snapshot-controller:v4.0.0
args:
- "--v=5"
- "--metrics-path=/metrics"
- "--http-endpoint=:9102"

View File

@ -259,7 +259,7 @@ func verifyRemainingObjects(f *framework.Framework, objects map[string]int) (boo
func gatherMetrics(f *framework.Framework) {
ginkgo.By("Gathering metrics")
var summary framework.TestDataSummary
grabber, err := e2emetrics.NewMetricsGrabber(f.ClientSet, f.KubemarkExternalClusterClientSet, false, false, true, false, false)
grabber, err := e2emetrics.NewMetricsGrabber(f.ClientSet, f.KubemarkExternalClusterClientSet, false, false, true, false, false, false)
if err != nil {
framework.Logf("Failed to create MetricsGrabber. Skipping metrics gathering.")
} else {

View File

@ -296,7 +296,7 @@ func (f *Framework) BeforeEach() {
gatherMetricsAfterTest := TestContext.GatherMetricsAfterTest == "true" || TestContext.GatherMetricsAfterTest == "master"
if gatherMetricsAfterTest && TestContext.IncludeClusterAutoscalerMetrics {
grabber, err := e2emetrics.NewMetricsGrabber(f.ClientSet, f.KubemarkExternalClusterClientSet, !ProviderIs("kubemark"), false, false, false, TestContext.IncludeClusterAutoscalerMetrics)
grabber, err := e2emetrics.NewMetricsGrabber(f.ClientSet, f.KubemarkExternalClusterClientSet, !ProviderIs("kubemark"), false, false, false, TestContext.IncludeClusterAutoscalerMetrics, false)
if err != nil {
Logf("Failed to create MetricsGrabber (skipping ClusterAutoscaler metrics gathering before test): %v", err)
} else {
@ -449,7 +449,7 @@ func (f *Framework) AfterEach() {
ginkgo.By("Gathering metrics")
// Grab apiserver, scheduler, controller-manager metrics and (optionally) nodes' kubelet metrics.
grabMetricsFromKubelets := TestContext.GatherMetricsAfterTest != "master" && !ProviderIs("kubemark")
grabber, err := e2emetrics.NewMetricsGrabber(f.ClientSet, f.KubemarkExternalClusterClientSet, grabMetricsFromKubelets, true, true, true, TestContext.IncludeClusterAutoscalerMetrics)
grabber, err := e2emetrics.NewMetricsGrabber(f.ClientSet, f.KubemarkExternalClusterClientSet, grabMetricsFromKubelets, true, true, true, TestContext.IncludeClusterAutoscalerMetrics, false)
if err != nil {
Logf("Failed to create MetricsGrabber (skipping metrics gathering): %v", err)
} else {

View File

@ -139,7 +139,7 @@ func getKubeletMetricsFromNode(c clientset.Interface, nodeName string) (KubeletM
if c == nil {
return GrabKubeletMetricsWithoutProxy(nodeName, "/metrics")
}
grabber, err := NewMetricsGrabber(c, nil, true, false, false, false, false)
grabber, err := NewMetricsGrabber(c, nil, true, false, false, false, false, false)
if err != nil {
return KubeletMetrics{}, err
}

View File

@ -38,40 +38,48 @@ const (
// kubeControllerManagerPort is the default port for the controller manager status server.
kubeControllerManagerPort = 10257
metricsProxyPod = "metrics-proxy"
// snapshotControllerPort is the port for the snapshot controller
snapshotControllerPort = 9102
)
// Collection is metrics collection of components
type Collection struct {
APIServerMetrics APIServerMetrics
ControllerManagerMetrics ControllerManagerMetrics
KubeletMetrics map[string]KubeletMetrics
SchedulerMetrics SchedulerMetrics
ClusterAutoscalerMetrics ClusterAutoscalerMetrics
APIServerMetrics APIServerMetrics
ControllerManagerMetrics ControllerManagerMetrics
SnapshotControllerMetrics SnapshotControllerMetrics
KubeletMetrics map[string]KubeletMetrics
SchedulerMetrics SchedulerMetrics
ClusterAutoscalerMetrics ClusterAutoscalerMetrics
}
// Grabber provides functions which grab metrics from components
type Grabber struct {
client clientset.Interface
externalClient clientset.Interface
grabFromAPIServer bool
grabFromControllerManager bool
grabFromKubelets bool
grabFromScheduler bool
grabFromClusterAutoscaler bool
kubeScheduler string
waitForSchedulerReadyOnce sync.Once
kubeControllerManager string
waitForControllerManagerReadyOnce sync.Once
client clientset.Interface
externalClient clientset.Interface
grabFromAPIServer bool
grabFromControllerManager bool
grabFromKubelets bool
grabFromScheduler bool
grabFromClusterAutoscaler bool
grabFromSnapshotController bool
kubeScheduler string
waitForSchedulerReadyOnce sync.Once
kubeControllerManager string
waitForControllerManagerReadyOnce sync.Once
snapshotController string
waitForSnapshotControllerReadyOnce sync.Once
}
// NewMetricsGrabber returns new metrics which are initialized.
func NewMetricsGrabber(c clientset.Interface, ec clientset.Interface, kubelets bool, scheduler bool, controllers bool, apiServer bool, clusterAutoscaler bool) (*Grabber, error) {
func NewMetricsGrabber(c clientset.Interface, ec clientset.Interface, kubelets bool, scheduler bool, controllers bool, apiServer bool, clusterAutoscaler bool, snapshotController bool) (*Grabber, error) {
kubeScheduler := ""
kubeControllerManager := ""
snapshotControllerManager := ""
regKubeScheduler := regexp.MustCompile("kube-scheduler-.*")
regKubeControllerManager := regexp.MustCompile("kube-controller-manager-.*")
regSnapshotController := regexp.MustCompile("volume-snapshot-controller.*")
podList, err := c.CoreV1().Pods(metav1.NamespaceSystem).List(context.TODO(), metav1.ListOptions{})
if err != nil {
@ -87,7 +95,10 @@ func NewMetricsGrabber(c clientset.Interface, ec clientset.Interface, kubelets b
if regKubeControllerManager.MatchString(pod.Name) {
kubeControllerManager = pod.Name
}
if kubeScheduler != "" && kubeControllerManager != "" {
if regSnapshotController.MatchString(pod.Name) {
snapshotControllerManager = pod.Name
}
if kubeScheduler != "" && kubeControllerManager != "" && snapshotControllerManager != "" {
break
}
}
@ -99,20 +110,26 @@ func NewMetricsGrabber(c clientset.Interface, ec clientset.Interface, kubelets b
controllers = false
klog.Warningf("Can't find kube-controller-manager pod. Grabbing metrics from kube-controller-manager is disabled.")
}
if snapshotControllerManager == "" {
snapshotController = false
klog.Warningf("Can't find snapshot-controller pod. Grabbing metrics from snapshot-controller is disabled.")
}
if ec == nil {
klog.Warningf("Did not receive an external client interface. Grabbing metrics from ClusterAutoscaler is disabled.")
}
return &Grabber{
client: c,
externalClient: ec,
grabFromAPIServer: apiServer,
grabFromControllerManager: controllers,
grabFromKubelets: kubelets,
grabFromScheduler: scheduler,
grabFromClusterAutoscaler: clusterAutoscaler,
kubeScheduler: kubeScheduler,
kubeControllerManager: kubeControllerManager,
client: c,
externalClient: ec,
grabFromAPIServer: apiServer,
grabFromControllerManager: controllers,
grabFromKubelets: kubelets,
grabFromScheduler: scheduler,
grabFromClusterAutoscaler: clusterAutoscaler,
grabFromSnapshotController: snapshotController,
kubeScheduler: kubeScheduler,
kubeControllerManager: kubeControllerManager,
snapshotController: snapshotControllerManager,
}, nil
}
@ -220,6 +237,48 @@ func (g *Grabber) GrabFromControllerManager() (ControllerManagerMetrics, error)
return parseControllerManagerMetrics(output)
}
// GrabFromSnapshotController returns metrics from controller manager
func (g *Grabber) GrabFromSnapshotController(podName string, port int) (SnapshotControllerMetrics, error) {
if g.snapshotController == "" {
return SnapshotControllerMetrics{}, fmt.Errorf("SnapshotController pod is not registered. Skipping SnapshotController's metrics gathering")
}
// Use overrides if provided via test config flags.
// Otherwise, use the default snapshot controller pod name and port.
if podName == "" {
podName = g.snapshotController
}
if port == 0 {
port = snapshotControllerPort
}
var err error
g.waitForSnapshotControllerReadyOnce.Do(func() {
if readyErr := e2epod.WaitForPodsReady(g.client, metav1.NamespaceSystem, podName, 0); readyErr != nil {
err = fmt.Errorf("error waiting for snapshot controller pod to be ready: %w", readyErr)
return
}
var lastMetricsFetchErr error
if metricsWaitErr := wait.PollImmediate(time.Second, time.Minute, func() (bool, error) {
_, lastMetricsFetchErr = g.getMetricsFromPod(g.client, podName, metav1.NamespaceSystem, port)
return lastMetricsFetchErr == nil, nil
}); metricsWaitErr != nil {
err = fmt.Errorf("error waiting for snapshot controller pod to expose metrics: %v; %v", metricsWaitErr, lastMetricsFetchErr)
return
}
})
if err != nil {
return SnapshotControllerMetrics{}, err
}
output, err := g.getMetricsFromPod(g.client, podName, metav1.NamespaceSystem, port)
if err != nil {
return SnapshotControllerMetrics{}, err
}
return parseSnapshotControllerMetrics(output)
}
// GrabFromAPIServer returns metrics from API server
func (g *Grabber) GrabFromAPIServer() (APIServerMetrics, error) {
output, err := g.getMetricsFromAPIServer()
@ -257,6 +316,14 @@ func (g *Grabber) Grab() (Collection, error) {
result.ControllerManagerMetrics = metrics
}
}
if g.grabFromSnapshotController {
metrics, err := g.GrabFromSnapshotController(g.snapshotController, snapshotControllerPort)
if err != nil {
errs = append(errs, err)
} else {
result.SnapshotControllerMetrics = metrics
}
}
if g.grabFromClusterAutoscaler {
metrics, err := g.GrabFromClusterAutoscaler()
if err != nil {

View File

@ -0,0 +1,40 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
import "k8s.io/component-base/metrics/testutil"
// SnapshotControllerMetrics is metrics for controller manager
type SnapshotControllerMetrics testutil.Metrics
// Equal returns true if all metrics are the same as the arguments.
func (m *SnapshotControllerMetrics) Equal(o SnapshotControllerMetrics) bool {
return (*testutil.Metrics)(m).Equal(testutil.Metrics(o))
}
func newSnapshotControllerMetrics() SnapshotControllerMetrics {
result := testutil.NewMetrics()
return SnapshotControllerMetrics(result)
}
func parseSnapshotControllerMetrics(data string) (SnapshotControllerMetrics, error) {
result := newSnapshotControllerMetrics()
if err := testutil.ParseMetrics(data, (*testutil.Metrics)(&result)); err != nil {
return SnapshotControllerMetrics{}, err
}
return result, nil
}

View File

@ -182,6 +182,12 @@ type TestContextType struct {
// DockerConfigFile is a file that contains credentials which can be used to pull images from certain private registries, needed for a test.
DockerConfigFile string
// SnapshotControllerPodName is the name used for identifying the snapshot controller pod.
SnapshotControllerPodName string
// SnapshotControllerHTTPPort the port used for communicating with the snapshot controller HTTP endpoint.
SnapshotControllerHTTPPort int
}
// NodeKillerConfig describes configuration of NodeKiller -- a utility to
@ -315,6 +321,9 @@ func RegisterCommonFlags(flags *flag.FlagSet) {
flags.StringVar(&TestContext.ProgressReportURL, "progress-report-url", "", "The URL to POST progress updates to as the suite runs to assist in aiding integrations. If empty, no messages sent.")
flags.StringVar(&TestContext.SpecSummaryOutput, "spec-dump", "", "The file to dump all ginkgo.SpecSummary to after tests run. If empty, no objects are saved/printed.")
flags.StringVar(&TestContext.DockerConfigFile, "docker-config-file", "", "A file that contains credentials which can be used to pull images from certain private registries, needed for a test.")
flags.StringVar(&TestContext.SnapshotControllerPodName, "snapshot-controller-pod-name", "", "The pod name to use for identifying the snapshot controller in the kube-system namespace.")
flags.IntVar(&TestContext.SnapshotControllerHTTPPort, "snapshot-controller-http-port", 0, "The port to use for snapshot controller HTTP communication.")
}
// RegisterClusterFlags registers flags specific to the cluster e2e test suite.

View File

@ -20,19 +20,20 @@ import "time"
const (
// Default timeouts to be used in TimeoutContext
podStartTimeout = 5 * time.Minute
podStartShortTimeout = 2 * time.Minute
podStartSlowTimeout = 15 * time.Minute
podDeleteTimeout = 5 * time.Minute
claimProvisionTimeout = 5 * time.Minute
claimProvisionShortTimeout = 1 * time.Minute
claimBoundTimeout = 3 * time.Minute
pvReclaimTimeout = 3 * time.Minute
pvBoundTimeout = 3 * time.Minute
pvDeleteTimeout = 3 * time.Minute
pvDeleteSlowTimeout = 20 * time.Minute
snapshotCreateTimeout = 5 * time.Minute
snapshotDeleteTimeout = 5 * time.Minute
podStartTimeout = 5 * time.Minute
podStartShortTimeout = 2 * time.Minute
podStartSlowTimeout = 15 * time.Minute
podDeleteTimeout = 5 * time.Minute
claimProvisionTimeout = 5 * time.Minute
claimProvisionShortTimeout = 1 * time.Minute
claimBoundTimeout = 3 * time.Minute
pvReclaimTimeout = 3 * time.Minute
pvBoundTimeout = 3 * time.Minute
pvDeleteTimeout = 3 * time.Minute
pvDeleteSlowTimeout = 20 * time.Minute
snapshotCreateTimeout = 5 * time.Minute
snapshotDeleteTimeout = 5 * time.Minute
snapshotControllerMetricsTimeout = 5 * time.Minute
)
// TimeoutContext contains timeout settings for several actions.
@ -77,23 +78,27 @@ type TimeoutContext struct {
// SnapshotDelete is how long for snapshot to delete snapshotContent.
SnapshotDelete time.Duration
// SnapshotControllerMetrics is how long to wait for snapshot controller metrics.
SnapshotControllerMetrics time.Duration
}
// NewTimeoutContextWithDefaults returns a TimeoutContext with default values.
func NewTimeoutContextWithDefaults() *TimeoutContext {
return &TimeoutContext{
PodStart: podStartTimeout,
PodStartShort: podStartShortTimeout,
PodStartSlow: podStartSlowTimeout,
PodDelete: podDeleteTimeout,
ClaimProvision: claimProvisionTimeout,
ClaimProvisionShort: claimProvisionShortTimeout,
ClaimBound: claimBoundTimeout,
PVReclaim: pvReclaimTimeout,
PVBound: pvBoundTimeout,
PVDelete: pvDeleteTimeout,
PVDeleteSlow: pvDeleteSlowTimeout,
SnapshotCreate: snapshotCreateTimeout,
SnapshotDelete: snapshotDeleteTimeout,
PodStart: podStartTimeout,
PodStartShort: podStartShortTimeout,
PodStartSlow: podStartSlowTimeout,
PodDelete: podDeleteTimeout,
ClaimProvision: claimProvisionTimeout,
ClaimProvisionShort: claimProvisionShortTimeout,
ClaimBound: claimBoundTimeout,
PVReclaim: pvReclaimTimeout,
PVBound: pvBoundTimeout,
PVDelete: pvDeleteTimeout,
PVDeleteSlow: pvDeleteSlowTimeout,
SnapshotCreate: snapshotCreateTimeout,
SnapshotDelete: snapshotDeleteTimeout,
SnapshotControllerMetrics: snapshotControllerMetricsTimeout,
}
}

View File

@ -51,7 +51,7 @@ var _ = instrumentation.SIGDescribe("MetricsGrabber", func() {
}
}
gomega.Eventually(func() error {
grabber, err = e2emetrics.NewMetricsGrabber(c, ec, true, true, true, true, true)
grabber, err = e2emetrics.NewMetricsGrabber(c, ec, true, true, true, true, true, true)
if err != nil {
return fmt.Errorf("failed to create metrics grabber: %v", err)
}

View File

@ -48,6 +48,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/events"
"k8s.io/kubernetes/test/e2e/framework"
e2eevents "k8s.io/kubernetes/test/e2e/framework/events"
e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics"
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
e2epv "k8s.io/kubernetes/test/e2e/framework/pv"
e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
@ -1621,6 +1622,107 @@ var _ = utils.SIGDescribe("CSI mock volume", func() {
})
}
})
ginkgo.Context("CSI Snapshot Controller metrics [Feature:VolumeSnapshotDataSource]", func() {
tests := []struct {
name string
pattern storageframework.TestPattern
}{
{
name: "snapshot controller should emit dynamic CreateSnapshot, CreateSnapshotAndReady, and DeleteSnapshot metrics",
pattern: storageframework.DynamicSnapshotDelete,
},
{
name: "snapshot controller should emit pre-provisioned CreateSnapshot, CreateSnapshotAndReady, and DeleteSnapshot metrics",
pattern: storageframework.PreprovisionedSnapshotDelete,
},
}
for _, test := range tests {
ginkgo.It(test.name, func() {
init(testParameters{
disableAttach: true,
registerDriver: true,
enableSnapshot: true,
})
sDriver, ok := m.driver.(storageframework.SnapshottableTestDriver)
if !ok {
e2eskipper.Skipf("mock driver does not support snapshots -- skipping")
}
defer cleanup()
metricsGrabber, err := e2emetrics.NewMetricsGrabber(m.config.Framework.ClientSet, nil, false, false, false, false, false, true)
if err != nil {
framework.Failf("Error creating metrics grabber : %v", err)
}
// Grab initial metrics - if this fails, snapshot controller metrics are not setup. Skip in this case.
_, err = metricsGrabber.GrabFromSnapshotController(framework.TestContext.SnapshotControllerPodName, framework.TestContext.SnapshotControllerHTTPPort)
if err != nil {
e2eskipper.Skipf("Snapshot controller metrics not found -- skipping")
}
ginkgo.By("getting all initial metric values")
metricsTestConfig := newSnapshotMetricsTestConfig("snapshot_controller_operation_total_seconds_count",
"count",
m.config.GetUniqueDriverName(),
"CreateSnapshot",
"success",
"",
test.pattern)
createSnapshotMetrics := newSnapshotControllerMetrics(metricsTestConfig, metricsGrabber)
originalCreateSnapshotCount, _ := createSnapshotMetrics.getSnapshotControllerMetricValue()
metricsTestConfig.operationName = "CreateSnapshotAndReady"
createSnapshotAndReadyMetrics := newSnapshotControllerMetrics(metricsTestConfig, metricsGrabber)
originalCreateSnapshotAndReadyCount, _ := createSnapshotAndReadyMetrics.getSnapshotControllerMetricValue()
metricsTestConfig.operationName = "DeleteSnapshot"
deleteSnapshotMetrics := newSnapshotControllerMetrics(metricsTestConfig, metricsGrabber)
originalDeleteSnapshotCount, _ := deleteSnapshotMetrics.getSnapshotControllerMetricValue()
ginkgo.By("Creating storage class")
var sc *storagev1.StorageClass
if dDriver, ok := m.driver.(storageframework.DynamicPVTestDriver); ok {
sc = dDriver.GetDynamicProvisionStorageClass(m.config, "")
}
class, err := m.cs.StorageV1().StorageClasses().Create(context.TODO(), sc, metav1.CreateOptions{})
framework.ExpectNoError(err, "Failed to create storage class: %v", err)
m.sc[class.Name] = class
pvc := e2epv.MakePersistentVolumeClaim(e2epv.PersistentVolumeClaimConfig{
Name: "snapshot-test-pvc",
StorageClassName: &(class.Name),
}, f.Namespace.Name)
ginkgo.By(fmt.Sprintf("Creating PVC %s/%s", pvc.Namespace, pvc.Name))
pvc, err = m.cs.CoreV1().PersistentVolumeClaims(f.Namespace.Name).Create(context.TODO(), pvc, metav1.CreateOptions{})
framework.ExpectNoError(err, "Failed to create claim: %v", err)
ginkgo.By("Wait for PVC to be Bound")
_, err = e2epv.WaitForPVClaimBoundPhase(m.cs, []*v1.PersistentVolumeClaim{pvc}, 1*time.Minute)
framework.ExpectNoError(err, "Failed to create claim: %v", err)
ginkgo.By("Creating snapshot")
parameters := map[string]string{}
sr := storageframework.CreateSnapshotResource(sDriver, m.config, test.pattern, pvc.Name, pvc.Namespace, f.Timeouts, parameters)
framework.ExpectNoError(err, "failed to create snapshot")
ginkgo.By("Checking for CreateSnapshot metrics")
createSnapshotMetrics.waitForSnapshotControllerMetric(originalCreateSnapshotCount+1.0, f.Timeouts.SnapshotControllerMetrics)
ginkgo.By("Checking for CreateSnapshotAndReady metrics")
err = utils.WaitForSnapshotReady(m.config.Framework.DynamicClient, pvc.Namespace, sr.Vs.GetName(), framework.Poll, f.Timeouts.SnapshotCreate)
framework.ExpectNoError(err, "failed to wait for snapshot ready")
createSnapshotAndReadyMetrics.waitForSnapshotControllerMetric(originalCreateSnapshotAndReadyCount+1.0, f.Timeouts.SnapshotControllerMetrics)
// delete the snapshot and check if the snapshot is deleted
deleteSnapshot(m.cs, m.config, sr.Vs)
ginkgo.By("check for delete metrics")
metricsTestConfig.operationName = "DeleteSnapshot"
deleteSnapshotMetrics.waitForSnapshotControllerMetric(originalDeleteSnapshotCount+1.0, f.Timeouts.SnapshotControllerMetrics)
})
}
})
})
func deleteSnapshot(cs clientset.Interface, config *storageframework.PerTestConfig, snapshot *unstructured.Unstructured) {
@ -2136,3 +2238,183 @@ func createPreHook(method string, callback func(counter int64) error) *drivers.H
}(),
}
}
type snapshotMetricsTestConfig struct {
// expected values
metricName string
metricType string
driverName string
operationName string
operationStatus string
snapshotType string
le string
}
type snapshotControllerMetrics struct {
// configuration for metric
cfg snapshotMetricsTestConfig
metricsGrabber *e2emetrics.Grabber
// results
countMetrics map[string]float64
sumMetrics map[string]float64
bucketMetrics map[string]float64
}
func newSnapshotMetricsTestConfig(metricName, metricType, driverName, operationName, operationStatus, le string, pattern storageframework.TestPattern) snapshotMetricsTestConfig {
var snapshotType string
switch pattern.SnapshotType {
case storageframework.DynamicCreatedSnapshot:
snapshotType = "dynamic"
case storageframework.PreprovisionedCreatedSnapshot:
snapshotType = "pre-provisioned"
default:
framework.Failf("invalid snapshotType: %v", pattern.SnapshotType)
}
return snapshotMetricsTestConfig{
metricName: metricName,
metricType: metricType,
driverName: driverName,
operationName: operationName,
operationStatus: operationStatus,
snapshotType: snapshotType,
le: le,
}
}
func newSnapshotControllerMetrics(cfg snapshotMetricsTestConfig, metricsGrabber *e2emetrics.Grabber) *snapshotControllerMetrics {
return &snapshotControllerMetrics{
cfg: cfg,
metricsGrabber: metricsGrabber,
countMetrics: make(map[string]float64),
sumMetrics: make(map[string]float64),
bucketMetrics: make(map[string]float64),
}
}
func (scm *snapshotControllerMetrics) waitForSnapshotControllerMetric(expectedValue float64, timeout time.Duration) {
metricKey := scm.getMetricKey()
if successful := utils.WaitUntil(10*time.Second, timeout, func() bool {
// get metric value
actualValue, err := scm.getSnapshotControllerMetricValue()
if err != nil {
return false
}
// Another operation could have finished from a previous test,
// so we check if we have at least the expected value.
if actualValue < expectedValue {
return false
}
return true
}); successful {
return
}
scm.showMetricsFailure(metricKey)
framework.Failf("Unable to get valid snapshot controller metrics after %v", timeout)
}
func (scm *snapshotControllerMetrics) getSnapshotControllerMetricValue() (float64, error) {
metricKey := scm.getMetricKey()
// grab and parse into readable format
err := scm.grabSnapshotControllerMetrics()
if err != nil {
return 0, err
}
metrics := scm.getMetricsTable()
actual, ok := metrics[metricKey]
if !ok {
return 0, fmt.Errorf("did not find metric for key %s", metricKey)
}
return actual, nil
}
func (scm *snapshotControllerMetrics) getMetricsTable() map[string]float64 {
var metrics map[string]float64
switch scm.cfg.metricType {
case "count":
metrics = scm.countMetrics
case "sum":
metrics = scm.sumMetrics
case "bucket":
metrics = scm.bucketMetrics
}
return metrics
}
func (scm *snapshotControllerMetrics) showMetricsFailure(metricKey string) {
framework.Logf("failed to find metric key %s inside of the following metrics:", metricKey)
metrics := scm.getMetricsTable()
for k, v := range metrics {
framework.Logf("%s: %v", k, v)
}
}
func (scm *snapshotControllerMetrics) grabSnapshotControllerMetrics() error {
// pull all metrics
metrics, err := scm.metricsGrabber.GrabFromSnapshotController(framework.TestContext.SnapshotControllerPodName, framework.TestContext.SnapshotControllerHTTPPort)
if err != nil {
return err
}
for method, samples := range metrics {
for _, sample := range samples {
operationName := string(sample.Metric["operation_name"])
driverName := string(sample.Metric["driver_name"])
operationStatus := string(sample.Metric["operation_status"])
snapshotType := string(sample.Metric["snapshot_type"])
le := string(sample.Metric["le"])
key := snapshotMetricKey(scm.cfg.metricName, driverName, operationName, operationStatus, snapshotType, le)
switch method {
case "snapshot_controller_operation_total_seconds_count":
for _, sample := range samples {
scm.countMetrics[key] = float64(sample.Value)
}
case "snapshot_controller_operation_total_seconds_sum":
for _, sample := range samples {
scm.sumMetrics[key] = float64(sample.Value)
}
case "snapshot_controller_operation_total_seconds_bucket":
for _, sample := range samples {
scm.bucketMetrics[key] = float64(sample.Value)
}
}
}
}
return nil
}
func (scm *snapshotControllerMetrics) getMetricKey() string {
return snapshotMetricKey(scm.cfg.metricName, scm.cfg.driverName, scm.cfg.operationName, scm.cfg.operationStatus, scm.cfg.snapshotType, scm.cfg.le)
}
func snapshotMetricKey(metricName, driverName, operationName, operationStatus, snapshotType, le string) string {
key := driverName
// build key for shorthand metrics storage
for _, s := range []string{metricName, operationName, operationStatus, snapshotType, le} {
if s != "" {
key = fmt.Sprintf("%s_%s", key, s)
}
}
return key
}

View File

@ -107,7 +107,7 @@ func getVolumeOpCounts(c clientset.Interface, pluginName string) opCounts {
nodeLimit := 25
metricsGrabber, err := e2emetrics.NewMetricsGrabber(c, nil, true, false, true, false, false)
metricsGrabber, err := e2emetrics.NewMetricsGrabber(c, nil, true, false, true, false, false, false)
if err != nil {
framework.ExpectNoError(err, "Error creating metrics grabber: %v", err)

View File

@ -72,7 +72,7 @@ var _ = utils.SIGDescribe("[Serial] Volume metrics", func() {
VolumeMode: &test.VolumeMode,
}, ns)
metricsGrabber, err = e2emetrics.NewMetricsGrabber(c, nil, true, false, true, false, false)
metricsGrabber, err = e2emetrics.NewMetricsGrabber(c, nil, true, false, true, false, false, false)
if err != nil {
framework.Failf("Error creating metrics grabber : %v", err)

View File

@ -61,7 +61,7 @@ func gatherTestSuiteMetrics() error {
}
// Grab metrics for apiserver, scheduler, controller-manager, kubelet (for non-kubemark case) and cluster autoscaler (optionally).
grabber, err := e2emetrics.NewMetricsGrabber(c, nil, !framework.ProviderIs("kubemark"), true, true, true, framework.TestContext.IncludeClusterAutoscalerMetrics)
grabber, err := e2emetrics.NewMetricsGrabber(c, nil, !framework.ProviderIs("kubemark"), true, true, true, framework.TestContext.IncludeClusterAutoscalerMetrics, false)
if err != nil {
return fmt.Errorf("failed to create MetricsGrabber: %v", err)
}