mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-26 05:03:09 +00:00
Add metrics for force cleaned mounts after failed reconstruction
Count nr. of force cleaned mounts + their failures after a volume fails reconstruction.
This commit is contained in:
parent
bd73aee9db
commit
9ca548fcf0
@ -31,8 +31,10 @@ const (
|
|||||||
|
|
||||||
// Metric keys for Volume Manager.
|
// Metric keys for Volume Manager.
|
||||||
volumeManagerTotalVolumes = "volume_manager_total_volumes"
|
volumeManagerTotalVolumes = "volume_manager_total_volumes"
|
||||||
reconstructedVolumesTotal = "reconstructed_volumes_total"
|
reconstructVolumeOperationsTotal = "reconstruct_volume_operations_total"
|
||||||
reconstructedVolumesErrorsTotal = "reconstructed_volumes_errors_total"
|
reconstructVolumeOperationsErrorsTotal = "reconstruct_volume_operations_errors_total"
|
||||||
|
forceCleanedFailedVolumeOperationsTotal = "force_cleaned_failed_volume_operations_total"
|
||||||
|
forceCleanedFailedVolumeOperationsErrorsTotal = "force_cleaned_failed_volume_operation_errors_total"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@ -46,20 +48,35 @@ var (
|
|||||||
metrics.ALPHA, "",
|
metrics.ALPHA, "",
|
||||||
)
|
)
|
||||||
|
|
||||||
ReconstructedVolumesTotal = metrics.NewCounter(
|
ReconstructVolumeOperationsTotal = metrics.NewCounter(
|
||||||
&metrics.CounterOpts{
|
&metrics.CounterOpts{
|
||||||
Name: reconstructedVolumesTotal,
|
Name: reconstructVolumeOperationsTotal,
|
||||||
Help: "The number of volumes that were attempted to be reconstructed from the operating system during kubelet startup. This includes both successful and failed reconstruction.",
|
Help: "The number of volumes that were attempted to be reconstructed from the operating system during kubelet startup. This includes both successful and failed reconstruction.",
|
||||||
StabilityLevel: metrics.ALPHA,
|
StabilityLevel: metrics.ALPHA,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
ReconstructedVolumesErrorsTotal = metrics.NewCounter(
|
ReconstructVolumeOperationsErrorsTotal = metrics.NewCounter(
|
||||||
&metrics.CounterOpts{
|
&metrics.CounterOpts{
|
||||||
Name: reconstructedVolumesErrorsTotal,
|
Name: reconstructVolumeOperationsErrorsTotal,
|
||||||
Help: "The number of volumes that failed reconstruction from the operating system during kubelet startup.",
|
Help: "The number of volumes that failed reconstruction from the operating system during kubelet startup.",
|
||||||
StabilityLevel: metrics.ALPHA,
|
StabilityLevel: metrics.ALPHA,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
ForceCleanedFailedVolumeOperationsTotal = metrics.NewCounter(
|
||||||
|
&metrics.CounterOpts{
|
||||||
|
Name: forceCleanedFailedVolumeOperationsTotal,
|
||||||
|
Help: "The number of volumes that were force cleaned after their reconstruction failed during kubelet startup. This includes both successful and failed cleanups.",
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
ForceCleanedFailedVolumeOperationsErrorsTotal = metrics.NewCounter(
|
||||||
|
&metrics.CounterOpts{
|
||||||
|
Name: forceCleanedFailedVolumeOperationsErrorsTotal,
|
||||||
|
Help: "The number of volumes that failed force cleanup after their reconstruction failed during kubelet startup.",
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
|
},
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
// volumeCount is a map of maps used as a counter.
|
// volumeCount is a map of maps used as a counter.
|
||||||
@ -78,8 +95,10 @@ func (v volumeCount) add(state, plugin string) {
|
|||||||
func Register(asw cache.ActualStateOfWorld, dsw cache.DesiredStateOfWorld, pluginMgr *volume.VolumePluginMgr) {
|
func Register(asw cache.ActualStateOfWorld, dsw cache.DesiredStateOfWorld, pluginMgr *volume.VolumePluginMgr) {
|
||||||
registerMetrics.Do(func() {
|
registerMetrics.Do(func() {
|
||||||
legacyregistry.CustomMustRegister(&totalVolumesCollector{asw: asw, dsw: dsw, pluginMgr: pluginMgr})
|
legacyregistry.CustomMustRegister(&totalVolumesCollector{asw: asw, dsw: dsw, pluginMgr: pluginMgr})
|
||||||
legacyregistry.MustRegister(ReconstructedVolumesTotal)
|
legacyregistry.MustRegister(ReconstructVolumeOperationsTotal)
|
||||||
legacyregistry.MustRegister(ReconstructedVolumesErrorsTotal)
|
legacyregistry.MustRegister(ReconstructVolumeOperationsErrorsTotal)
|
||||||
|
legacyregistry.MustRegister(ForceCleanedFailedVolumeOperationsTotal)
|
||||||
|
legacyregistry.MustRegister(ForceCleanedFailedVolumeOperationsErrorsTotal)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -95,10 +95,12 @@ func (rc *reconciler) cleanupMounts(volume podVolume) {
|
|||||||
PluginName: volume.pluginName,
|
PluginName: volume.pluginName,
|
||||||
PodUID: types.UID(volume.podName),
|
PodUID: types.UID(volume.podName),
|
||||||
}
|
}
|
||||||
|
metrics.ForceCleanedFailedVolumeOperationsTotal.Inc()
|
||||||
// TODO: Currently cleanupMounts only includes UnmountVolume operation. In the next PR, we will add
|
// TODO: Currently cleanupMounts only includes UnmountVolume operation. In the next PR, we will add
|
||||||
// to unmount both volume and device in the same routine.
|
// to unmount both volume and device in the same routine.
|
||||||
err := rc.operationExecutor.UnmountVolume(mountedVolume, rc.actualStateOfWorld, rc.kubeletPodsDir)
|
err := rc.operationExecutor.UnmountVolume(mountedVolume, rc.actualStateOfWorld, rc.kubeletPodsDir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
metrics.ForceCleanedFailedVolumeOperationsErrorsTotal.Inc()
|
||||||
klog.ErrorS(err, mountedVolume.GenerateErrorDetailed("volumeHandler.UnmountVolumeHandler for UnmountVolume failed", err).Error())
|
klog.ErrorS(err, mountedVolume.GenerateErrorDetailed("volumeHandler.UnmountVolumeHandler for UnmountVolume failed", err).Error())
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -179,10 +181,10 @@ func getVolumesFromPodDir(podDir string) ([]podVolume, error) {
|
|||||||
|
|
||||||
// Reconstruct volume data structure by reading the pod's volume directories
|
// Reconstruct volume data structure by reading the pod's volume directories
|
||||||
func (rc *reconciler) reconstructVolume(volume podVolume) (rvolume *reconstructedVolume, rerr error) {
|
func (rc *reconciler) reconstructVolume(volume podVolume) (rvolume *reconstructedVolume, rerr error) {
|
||||||
metrics.ReconstructedVolumesTotal.Inc()
|
metrics.ReconstructVolumeOperationsTotal.Inc()
|
||||||
defer func() {
|
defer func() {
|
||||||
if rerr != nil {
|
if rerr != nil {
|
||||||
metrics.ReconstructedVolumesErrorsTotal.Inc()
|
metrics.ReconstructVolumeOperationsErrorsTotal.Inc()
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user