Add status count to the storage_operation_duration_seconds volume metric

This commit is contained in:
Mauricio Poppe 2021-11-22 22:16:25 +00:00
parent a142f86351
commit 04805ca330

View File

@ -19,6 +19,7 @@ package storage
import ( import (
"context" "context"
"fmt" "fmt"
"strings"
"time" "time"
"github.com/onsi/ginkgo" "github.com/onsi/ginkgo"
@ -143,7 +144,6 @@ var _ = utils.SIGDescribe("[Serial] Volume metrics", func() {
err = e2epod.WaitTimeoutForPodRunningInNamespace(c, pod.Name, pod.Namespace, f.Timeouts.PodStart) err = e2epod.WaitTimeoutForPodRunningInNamespace(c, pod.Name, pod.Namespace, f.Timeouts.PodStart)
framework.ExpectNoError(err, "Error starting pod %s", pod.Name) framework.ExpectNoError(err, "Error starting pod %s", pod.Name)
framework.Logf("Deleting pod %q/%q", pod.Namespace, pod.Name)
framework.ExpectNoError(e2epod.DeletePodWithWait(c, pod)) framework.ExpectNoError(e2epod.DeletePodWithWait(c, pod))
updatedStorageMetrics := waitForDetachAndGrabMetrics(storageOpMetrics, metricsGrabber, pluginName) updatedStorageMetrics := waitForDetachAndGrabMetrics(storageOpMetrics, metricsGrabber, pluginName)
@ -151,7 +151,7 @@ var _ = utils.SIGDescribe("[Serial] Volume metrics", func() {
framework.ExpectNotEqual(len(updatedStorageMetrics.latencyMetrics), 0, "Error fetching c-m updated storage metrics") framework.ExpectNotEqual(len(updatedStorageMetrics.latencyMetrics), 0, "Error fetching c-m updated storage metrics")
framework.ExpectNotEqual(len(updatedStorageMetrics.statusMetrics), 0, "Error fetching c-m updated storage metrics") framework.ExpectNotEqual(len(updatedStorageMetrics.statusMetrics), 0, "Error fetching c-m updated storage metrics")
volumeOperations := []string{"volume_provision", "volume_detach", "volume_attach"} volumeOperations := []string{"volume_detach", "volume_attach"}
for _, volumeOp := range volumeOperations { for _, volumeOp := range volumeOperations {
verifyMetricCount(storageOpMetrics, updatedStorageMetrics, volumeOp, false) verifyMetricCount(storageOpMetrics, updatedStorageMetrics, volumeOp, false)
@ -168,12 +168,6 @@ var _ = utils.SIGDescribe("[Serial] Volume metrics", func() {
framework.ExpectNoError(err, "Error getting default storageclass: %v", err) framework.ExpectNoError(err, "Error getting default storageclass: %v", err)
pluginName := defaultClass.Provisioner pluginName := defaultClass.Provisioner
controllerMetrics, err := metricsGrabber.GrabFromControllerManager()
framework.ExpectNoError(err, "Error getting c-m metrics : %v", err)
storageOpMetrics := getControllerStorageMetrics(controllerMetrics, pluginName)
invalidSc = &storagev1.StorageClass{ invalidSc = &storagev1.StorageClass{
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("fail-metrics-invalid-sc-%s", pvc.Namespace), Name: fmt.Sprintf("fail-metrics-invalid-sc-%s", pvc.Namespace),
@ -210,7 +204,6 @@ var _ = utils.SIGDescribe("[Serial] Volume metrics", func() {
updatedStorageMetrics := getControllerStorageMetrics(updatedControllerMetrics, pluginName) updatedStorageMetrics := getControllerStorageMetrics(updatedControllerMetrics, pluginName)
framework.ExpectNotEqual(len(updatedStorageMetrics.statusMetrics), 0, "Error fetching c-m updated storage metrics") framework.ExpectNotEqual(len(updatedStorageMetrics.statusMetrics), 0, "Error fetching c-m updated storage metrics")
verifyMetricCount(storageOpMetrics, updatedStorageMetrics, "volume_provision", true)
} }
filesystemMode := func(isEphemeral bool) { filesystemMode := func(isEphemeral bool) {
@ -465,6 +458,8 @@ var _ = utils.SIGDescribe("[Serial] Volume metrics", func() {
ginkgo.It("should create prometheus metrics for volume provisioning and attach/detach", func() { ginkgo.It("should create prometheus metrics for volume provisioning and attach/detach", func() {
provisioning(isEphemeral) provisioning(isEphemeral)
}) })
// TODO(mauriciopoppe): after CSIMigration is turned on we're no longer reporting
// the volume_provision metric (removed in #106609), issue to investigate the bug #106773
ginkgo.It("should create prometheus metrics for volume provisioning errors [Slow]", func() { ginkgo.It("should create prometheus metrics for volume provisioning errors [Slow]", func() {
provisioningError(isEphemeral) provisioningError(isEphemeral)
}) })
@ -744,27 +739,23 @@ func getControllerStorageMetrics(ms e2emetrics.ControllerManagerMetrics, pluginN
for method, samples := range ms { for method, samples := range ms {
switch method { switch method {
// from the base metric name "storage_operation_duration_seconds"
case "storage_operation_duration_seconds_count": case "storage_operation_duration_seconds_count":
for _, sample := range samples { for _, sample := range samples {
count := int64(sample.Value) count := int64(sample.Value)
operation := string(sample.Metric["operation_name"]) operation := string(sample.Metric["operation_name"])
// if the volumes were provisioned with a CSI Driver
// the metric operation name will be prefixed with
// "kubernetes.io/csi:"
metricPluginName := string(sample.Metric["volume_plugin"]) metricPluginName := string(sample.Metric["volume_plugin"])
if len(pluginName) > 0 && pluginName != metricPluginName {
continue
}
result.latencyMetrics[operation] = count
}
case "storage_operation_status_count":
for _, sample := range samples {
count := int64(sample.Value)
operation := string(sample.Metric["operation_name"])
status := string(sample.Metric["status"]) status := string(sample.Metric["status"])
statusCounts := result.statusMetrics[operation] if strings.Index(metricPluginName, pluginName) < 0 {
metricPluginName := string(sample.Metric["volume_plugin"]) // the metric volume plugin field doesn't match
if len(pluginName) > 0 && pluginName != metricPluginName { // the default storageClass.Provisioner field
continue continue
} }
statusCounts := result.statusMetrics[operation]
switch status { switch status {
case "success": case "success":
statusCounts.successCount = count statusCounts.successCount = count
@ -774,8 +765,8 @@ func getControllerStorageMetrics(ms e2emetrics.ControllerManagerMetrics, pluginN
statusCounts.otherCount = count statusCounts.otherCount = count
} }
result.statusMetrics[operation] = statusCounts result.statusMetrics[operation] = statusCounts
result.latencyMetrics[operation] = count
} }
} }
} }
return result return result