Add status count to the storage_operation_duration_seconds volume metric

This commit is contained in:
Mauricio Poppe 2021-11-22 22:16:25 +00:00
parent a142f86351
commit 04805ca330

View File

@ -19,6 +19,7 @@ package storage
import (
"context"
"fmt"
"strings"
"time"
"github.com/onsi/ginkgo"
@ -143,7 +144,6 @@ var _ = utils.SIGDescribe("[Serial] Volume metrics", func() {
err = e2epod.WaitTimeoutForPodRunningInNamespace(c, pod.Name, pod.Namespace, f.Timeouts.PodStart)
framework.ExpectNoError(err, "Error starting pod %s", pod.Name)
framework.Logf("Deleting pod %q/%q", pod.Namespace, pod.Name)
framework.ExpectNoError(e2epod.DeletePodWithWait(c, pod))
updatedStorageMetrics := waitForDetachAndGrabMetrics(storageOpMetrics, metricsGrabber, pluginName)
@ -151,7 +151,7 @@ var _ = utils.SIGDescribe("[Serial] Volume metrics", func() {
framework.ExpectNotEqual(len(updatedStorageMetrics.latencyMetrics), 0, "Error fetching c-m updated storage metrics")
framework.ExpectNotEqual(len(updatedStorageMetrics.statusMetrics), 0, "Error fetching c-m updated storage metrics")
volumeOperations := []string{"volume_provision", "volume_detach", "volume_attach"}
volumeOperations := []string{"volume_detach", "volume_attach"}
for _, volumeOp := range volumeOperations {
verifyMetricCount(storageOpMetrics, updatedStorageMetrics, volumeOp, false)
@ -168,12 +168,6 @@ var _ = utils.SIGDescribe("[Serial] Volume metrics", func() {
framework.ExpectNoError(err, "Error getting default storageclass: %v", err)
pluginName := defaultClass.Provisioner
controllerMetrics, err := metricsGrabber.GrabFromControllerManager()
framework.ExpectNoError(err, "Error getting c-m metrics : %v", err)
storageOpMetrics := getControllerStorageMetrics(controllerMetrics, pluginName)
invalidSc = &storagev1.StorageClass{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("fail-metrics-invalid-sc-%s", pvc.Namespace),
@ -210,7 +204,6 @@ var _ = utils.SIGDescribe("[Serial] Volume metrics", func() {
updatedStorageMetrics := getControllerStorageMetrics(updatedControllerMetrics, pluginName)
framework.ExpectNotEqual(len(updatedStorageMetrics.statusMetrics), 0, "Error fetching c-m updated storage metrics")
verifyMetricCount(storageOpMetrics, updatedStorageMetrics, "volume_provision", true)
}
filesystemMode := func(isEphemeral bool) {
@ -465,6 +458,8 @@ var _ = utils.SIGDescribe("[Serial] Volume metrics", func() {
ginkgo.It("should create prometheus metrics for volume provisioning and attach/detach", func() {
provisioning(isEphemeral)
})
// TODO(mauriciopoppe): after CSIMigration is turned on we're no longer reporting
// the volume_provision metric (removed in #106609), issue to investigate the bug #106773
ginkgo.It("should create prometheus metrics for volume provisioning errors [Slow]", func() {
provisioningError(isEphemeral)
})
@ -744,27 +739,23 @@ func getControllerStorageMetrics(ms e2emetrics.ControllerManagerMetrics, pluginN
for method, samples := range ms {
switch method {
// from the base metric name "storage_operation_duration_seconds"
case "storage_operation_duration_seconds_count":
for _, sample := range samples {
count := int64(sample.Value)
operation := string(sample.Metric["operation_name"])
// if the volumes were provisioned with a CSI Driver
// the metric operation name will be prefixed with
// "kubernetes.io/csi:"
metricPluginName := string(sample.Metric["volume_plugin"])
if len(pluginName) > 0 && pluginName != metricPluginName {
continue
}
result.latencyMetrics[operation] = count
}
case "storage_operation_status_count":
for _, sample := range samples {
count := int64(sample.Value)
operation := string(sample.Metric["operation_name"])
status := string(sample.Metric["status"])
statusCounts := result.statusMetrics[operation]
metricPluginName := string(sample.Metric["volume_plugin"])
if len(pluginName) > 0 && pluginName != metricPluginName {
if strings.Index(metricPluginName, pluginName) < 0 {
// the metric volume plugin field doesn't match
// the default storageClass.Provisioner field
continue
}
statusCounts := result.statusMetrics[operation]
switch status {
case "success":
statusCounts.successCount = count
@ -774,8 +765,8 @@ func getControllerStorageMetrics(ms e2emetrics.ControllerManagerMetrics, pluginN
statusCounts.otherCount = count
}
result.statusMetrics[operation] = statusCounts
result.latencyMetrics[operation] = count
}
}
}
return result