mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-10 04:27:54 +00:00
Merge pull request #119185 from xing-yang/metrics_attach
Add reason to force detach metric
This commit is contained in:
commit
986171d388
@ -35,6 +35,14 @@ import (
|
|||||||
|
|
||||||
const pluginNameNotAvailable = "N/A"
|
const pluginNameNotAvailable = "N/A"
|
||||||
|
|
||||||
|
const (
|
||||||
|
// Force detach reason is timeout
|
||||||
|
ForceDetachReasonTimeout = "timeout"
|
||||||
|
// Force detach reason is the node has an out-of-service taint
|
||||||
|
ForceDetachReasonOutOfService = "out-of-service"
|
||||||
|
attachDetachController = "attach_detach_controller"
|
||||||
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
inUseVolumeMetricDesc = metrics.NewDesc(
|
inUseVolumeMetricDesc = metrics.NewDesc(
|
||||||
metrics.BuildFQName("", "storage_count", "attachable_volumes_in_use"),
|
metrics.BuildFQName("", "storage_count", "attachable_volumes_in_use"),
|
||||||
@ -48,12 +56,15 @@ var (
|
|||||||
[]string{"plugin_name", "state"}, nil,
|
[]string{"plugin_name", "state"}, nil,
|
||||||
metrics.ALPHA, "")
|
metrics.ALPHA, "")
|
||||||
|
|
||||||
forcedDetachMetricCounter = metrics.NewCounter(
|
ForceDetachMetricCounter = metrics.NewCounterVec(
|
||||||
&metrics.CounterOpts{
|
&metrics.CounterOpts{
|
||||||
|
Subsystem: attachDetachController,
|
||||||
Name: "attachdetach_controller_forced_detaches",
|
Name: "attachdetach_controller_forced_detaches",
|
||||||
Help: "Number of times the A/D Controller performed a forced detach",
|
Help: "Number of times the A/D Controller performed a forced detach",
|
||||||
StabilityLevel: metrics.ALPHA,
|
StabilityLevel: metrics.ALPHA,
|
||||||
})
|
},
|
||||||
|
[]string{"reason"},
|
||||||
|
)
|
||||||
)
|
)
|
||||||
var registerMetrics sync.Once
|
var registerMetrics sync.Once
|
||||||
|
|
||||||
@ -75,7 +86,7 @@ func Register(pvcLister corelisters.PersistentVolumeClaimLister,
|
|||||||
pluginMgr,
|
pluginMgr,
|
||||||
csiMigratedPluginManager,
|
csiMigratedPluginManager,
|
||||||
intreeToCSITranslator))
|
intreeToCSITranslator))
|
||||||
legacyregistry.MustRegister(forcedDetachMetricCounter)
|
legacyregistry.MustRegister(ForceDetachMetricCounter)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -209,6 +220,6 @@ func (collector *attachDetachStateCollector) getTotalVolumesCount() volumeCount
|
|||||||
}
|
}
|
||||||
|
|
||||||
// RecordForcedDetachMetric register a forced detach metric.
|
// RecordForcedDetachMetric register a forced detach metric.
|
||||||
func RecordForcedDetachMetric() {
|
func RecordForcedDetachMetric(forceDetachReason string) {
|
||||||
forcedDetachMetricCounter.Inc()
|
ForceDetachMetricCounter.WithLabelValues(forceDetachReason).Inc()
|
||||||
}
|
}
|
||||||
|
@ -269,14 +269,21 @@ func (rc *reconciler) reconcile(ctx context.Context) {
|
|||||||
verifySafeToDetach := !(timeout || hasOutOfServiceTaint)
|
verifySafeToDetach := !(timeout || hasOutOfServiceTaint)
|
||||||
err = rc.attacherDetacher.DetachVolume(logger, attachedVolume.AttachedVolume, verifySafeToDetach, rc.actualStateOfWorld)
|
err = rc.attacherDetacher.DetachVolume(logger, attachedVolume.AttachedVolume, verifySafeToDetach, rc.actualStateOfWorld)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
if !timeout {
|
if verifySafeToDetach { // normal detach
|
||||||
logger.Info("attacherDetacher.DetachVolume started", "node", klog.KRef("", string(attachedVolume.NodeName)), "volumeName", attachedVolume.VolumeName)
|
logger.Info("attacherDetacher.DetachVolume started", "node", klog.KRef("", string(attachedVolume.NodeName)), "volumeName", attachedVolume.VolumeName)
|
||||||
} else {
|
} else { // force detach
|
||||||
metrics.RecordForcedDetachMetric()
|
if timeout {
|
||||||
logger.Info("attacherDetacher.DetachVolume started: this volume is not safe to detach, but maxWaitForUnmountDuration expired, force detaching",
|
metrics.RecordForcedDetachMetric(metrics.ForceDetachReasonTimeout)
|
||||||
"duration", rc.maxWaitForUnmountDuration,
|
logger.Info("attacherDetacher.DetachVolume started: this volume is not safe to detach, but maxWaitForUnmountDuration expired, force detaching",
|
||||||
"node", klog.KRef("", string(attachedVolume.NodeName)),
|
"duration", rc.maxWaitForUnmountDuration,
|
||||||
"volumeName", attachedVolume.VolumeName)
|
"node", klog.KRef("", string(attachedVolume.NodeName)),
|
||||||
|
"volumeName", attachedVolume.VolumeName)
|
||||||
|
} else {
|
||||||
|
metrics.RecordForcedDetachMetric(metrics.ForceDetachReasonOutOfService)
|
||||||
|
logger.Info("attacherDetacher.DetachVolume started: node has out-of-service taint, force detaching",
|
||||||
|
"node", klog.KRef("", string(attachedVolume.NodeName)),
|
||||||
|
"volumeName", attachedVolume.VolumeName)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -19,6 +19,7 @@ package reconciler
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"sync"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -30,10 +31,13 @@ import (
|
|||||||
"k8s.io/client-go/informers"
|
"k8s.io/client-go/informers"
|
||||||
"k8s.io/client-go/tools/record"
|
"k8s.io/client-go/tools/record"
|
||||||
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
||||||
|
"k8s.io/component-base/metrics/legacyregistry"
|
||||||
|
metricstestutil "k8s.io/component-base/metrics/testutil"
|
||||||
"k8s.io/klog/v2"
|
"k8s.io/klog/v2"
|
||||||
"k8s.io/klog/v2/ktesting"
|
"k8s.io/klog/v2/ktesting"
|
||||||
"k8s.io/kubernetes/pkg/controller"
|
"k8s.io/kubernetes/pkg/controller"
|
||||||
"k8s.io/kubernetes/pkg/controller/volume/attachdetach/cache"
|
"k8s.io/kubernetes/pkg/controller/volume/attachdetach/cache"
|
||||||
|
"k8s.io/kubernetes/pkg/controller/volume/attachdetach/metrics"
|
||||||
"k8s.io/kubernetes/pkg/controller/volume/attachdetach/statusupdater"
|
"k8s.io/kubernetes/pkg/controller/volume/attachdetach/statusupdater"
|
||||||
controllervolumetesting "k8s.io/kubernetes/pkg/controller/volume/attachdetach/testing"
|
controllervolumetesting "k8s.io/kubernetes/pkg/controller/volume/attachdetach/testing"
|
||||||
"k8s.io/kubernetes/pkg/features"
|
"k8s.io/kubernetes/pkg/features"
|
||||||
@ -51,6 +55,8 @@ const (
|
|||||||
volumeAttachedCheckTimeout = 5 * time.Second
|
volumeAttachedCheckTimeout = 5 * time.Second
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var registerMetrics sync.Once
|
||||||
|
|
||||||
// Calls Run()
|
// Calls Run()
|
||||||
// Verifies there are no calls to attach or detach.
|
// Verifies there are no calls to attach or detach.
|
||||||
func Test_Run_Positive_DoNothing(t *testing.T) {
|
func Test_Run_Positive_DoNothing(t *testing.T) {
|
||||||
@ -221,6 +227,9 @@ func Test_Run_Positive_OneDesiredVolumeAttachThenDetachWithUnmountedVolume(t *te
|
|||||||
// Deletes the node/volume/pod tuple from desiredStateOfWorld cache without first marking the node/volume as unmounted.
|
// Deletes the node/volume/pod tuple from desiredStateOfWorld cache without first marking the node/volume as unmounted.
|
||||||
// Verifies there is one detach call and no (new) attach calls.
|
// Verifies there is one detach call and no (new) attach calls.
|
||||||
func Test_Run_Positive_OneDesiredVolumeAttachThenDetachWithMountedVolume(t *testing.T) {
|
func Test_Run_Positive_OneDesiredVolumeAttachThenDetachWithMountedVolume(t *testing.T) {
|
||||||
|
registerMetrics.Do(func() {
|
||||||
|
legacyregistry.MustRegister(metrics.ForceDetachMetricCounter)
|
||||||
|
})
|
||||||
// Arrange
|
// Arrange
|
||||||
volumePluginMgr, fakePlugin := volumetesting.GetTestVolumePluginMgr(t)
|
volumePluginMgr, fakePlugin := volumetesting.GetTestVolumePluginMgr(t)
|
||||||
dsw := cache.NewDesiredStateOfWorld(volumePluginMgr)
|
dsw := cache.NewDesiredStateOfWorld(volumePluginMgr)
|
||||||
@ -287,6 +296,9 @@ func Test_Run_Positive_OneDesiredVolumeAttachThenDetachWithMountedVolume(t *test
|
|||||||
waitForAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin)
|
waitForAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin)
|
||||||
verifyNewDetacherCallCount(t, false /* expectZeroNewDetacherCallCount */, fakePlugin)
|
verifyNewDetacherCallCount(t, false /* expectZeroNewDetacherCallCount */, fakePlugin)
|
||||||
waitForDetachCallCount(t, 1 /* expectedDetachCallCount */, fakePlugin)
|
waitForDetachCallCount(t, 1 /* expectedDetachCallCount */, fakePlugin)
|
||||||
|
|
||||||
|
// Force detach metric due to timeout
|
||||||
|
testForceDetachMetric(t, 1, metrics.ForceDetachReasonTimeout)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Populates desiredStateOfWorld cache with one node/volume/pod tuple.
|
// Populates desiredStateOfWorld cache with one node/volume/pod tuple.
|
||||||
@ -852,6 +864,9 @@ func Test_Run_OneVolumeAttachAndDetachTimeoutNodesWithReadWriteOnce(t *testing.T
|
|||||||
// Verifies there is one detach call and no (new) attach calls.
|
// Verifies there is one detach call and no (new) attach calls.
|
||||||
func Test_Run_OneVolumeDetachOnOutOfServiceTaintedNode(t *testing.T) {
|
func Test_Run_OneVolumeDetachOnOutOfServiceTaintedNode(t *testing.T) {
|
||||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeOutOfServiceVolumeDetach, true)()
|
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeOutOfServiceVolumeDetach, true)()
|
||||||
|
registerMetrics.Do(func() {
|
||||||
|
legacyregistry.MustRegister(metrics.ForceDetachMetricCounter)
|
||||||
|
})
|
||||||
// Arrange
|
// Arrange
|
||||||
volumePluginMgr, fakePlugin := volumetesting.GetTestVolumePluginMgr(t)
|
volumePluginMgr, fakePlugin := volumetesting.GetTestVolumePluginMgr(t)
|
||||||
dsw := cache.NewDesiredStateOfWorld(volumePluginMgr)
|
dsw := cache.NewDesiredStateOfWorld(volumePluginMgr)
|
||||||
@ -920,6 +935,9 @@ func Test_Run_OneVolumeDetachOnOutOfServiceTaintedNode(t *testing.T) {
|
|||||||
waitForAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin)
|
waitForAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin)
|
||||||
verifyNewDetacherCallCount(t, false /* expectZeroNewDetacherCallCount */, fakePlugin)
|
verifyNewDetacherCallCount(t, false /* expectZeroNewDetacherCallCount */, fakePlugin)
|
||||||
waitForDetachCallCount(t, 1 /* expectedDetachCallCount */, fakePlugin)
|
waitForDetachCallCount(t, 1 /* expectedDetachCallCount */, fakePlugin)
|
||||||
|
|
||||||
|
// Force detach metric due to out-of-service taint
|
||||||
|
testForceDetachMetric(t, 1, metrics.ForceDetachReasonOutOfService)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Populates desiredStateOfWorld cache with one node/volume/pod tuple.
|
// Populates desiredStateOfWorld cache with one node/volume/pod tuple.
|
||||||
@ -1666,3 +1684,16 @@ func retryWithExponentialBackOff(initialDuration time.Duration, fn wait.Conditio
|
|||||||
}
|
}
|
||||||
return wait.ExponentialBackoff(backoff, fn)
|
return wait.ExponentialBackoff(backoff, fn)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// verifies the force detach metric with reason
|
||||||
|
func testForceDetachMetric(t *testing.T, inputForceDetachMetricCounter int, reason string) {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
actualForceDetachMericCounter, err := metricstestutil.GetCounterMetricValue(metrics.ForceDetachMetricCounter.WithLabelValues(reason))
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Error getting actualForceDetachMericCounter")
|
||||||
|
}
|
||||||
|
if actualForceDetachMericCounter != float64(inputForceDetachMetricCounter) {
|
||||||
|
t.Errorf("Expected desiredForceDetachMericCounter to be %d, got %v", inputForceDetachMetricCounter, actualForceDetachMericCounter)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user