Merge pull request #108758 from fengzixu/improvement-volume-health

re-push "add volume kubelet_volume_stats_health_abnormal to kubelet #105585"
This commit is contained in:
Kubernetes Prow Robot 2022-03-29 17:35:34 -07:00 committed by GitHub
commit b0254c8a0b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 228 additions and 62 deletions

View File

@ -61,6 +61,12 @@ var (
[]string{"namespace", "persistentvolumeclaim"}, nil,
metrics.ALPHA, "",
)
volumeStatsHealthAbnormalDesc = metrics.NewDesc(
metrics.BuildFQName("", kubeletmetrics.KubeletSubsystem, kubeletmetrics.VolumeStatsHealthStatusAbnormalKey),
"Abnormal volume health status. The count is either 1 or 0. 1 indicates the volume is unhealthy, 0 indicates volume is healthy",
[]string{"namespace", "persistentvolumeclaim"}, nil,
metrics.ALPHA, "")
)
type volumeStatsCollector struct {
@ -85,6 +91,7 @@ func (collector *volumeStatsCollector) DescribeWithStability(ch chan<- *metrics.
ch <- volumeStatsInodesDesc
ch <- volumeStatsInodesFreeDesc
ch <- volumeStatsInodesUsedDesc
ch <- volumeStatsHealthAbnormalDesc
}
// CollectWithStability implements the metrics.StableCollector interface.
@ -95,7 +102,6 @@ func (collector *volumeStatsCollector) CollectWithStability(ch chan<- metrics.Me
}
addGauge := func(desc *metrics.Desc, pvcRef *stats.PVCReference, v float64, lv ...string) {
lv = append([]string{pvcRef.Namespace, pvcRef.Name}, lv...)
ch <- metrics.NewLazyConstMetric(desc, metrics.GaugeValue, v, lv...)
}
allPVCs := sets.String{}
@ -120,7 +126,18 @@ func (collector *volumeStatsCollector) CollectWithStability(ch chan<- metrics.Me
addGauge(volumeStatsInodesDesc, pvcRef, float64(*volumeStat.Inodes))
addGauge(volumeStatsInodesFreeDesc, pvcRef, float64(*volumeStat.InodesFree))
addGauge(volumeStatsInodesUsedDesc, pvcRef, float64(*volumeStat.InodesUsed))
if volumeStat.VolumeHealthStats != nil {
addGauge(volumeStatsHealthAbnormalDesc, pvcRef, convertBoolToFloat64(volumeStat.VolumeHealthStats.Abnormal))
}
allPVCs.Insert(pvcUniqStr)
}
}
}
func convertBoolToFloat64(boolVal bool) float64 {
if boolVal {
return 1
}
return 0
}

View File

@ -32,6 +32,126 @@ func newUint64Pointer(i uint64) *uint64 {
}
func TestVolumeStatsCollector(t *testing.T) {
// Fixed metadata on type and help text. We prepend this to every expected
// output so we only have to modify a single place when doing adjustments.
const metadata = `
# HELP kubelet_volume_stats_available_bytes [ALPHA] Number of available bytes in the volume
# TYPE kubelet_volume_stats_available_bytes gauge
# HELP kubelet_volume_stats_capacity_bytes [ALPHA] Capacity in bytes of the volume
# TYPE kubelet_volume_stats_capacity_bytes gauge
# HELP kubelet_volume_stats_inodes [ALPHA] Maximum number of inodes in the volume
# TYPE kubelet_volume_stats_inodes gauge
# HELP kubelet_volume_stats_inodes_free [ALPHA] Number of free inodes in the volume
# TYPE kubelet_volume_stats_inodes_free gauge
# HELP kubelet_volume_stats_inodes_used [ALPHA] Number of used inodes in the volume
# TYPE kubelet_volume_stats_inodes_used gauge
# HELP kubelet_volume_stats_used_bytes [ALPHA] Number of used bytes in the volume
# TYPE kubelet_volume_stats_used_bytes gauge
# HELP kubelet_volume_stats_health_status_abnormal [ALPHA] Abnormal volume health status. The count is either 1 or 0. 1 indicates the volume is unhealthy, 0 indicates volume is healthy
# TYPE kubelet_volume_stats_health_status_abnormal gauge
`
var (
podStats = []statsapi.PodStats{
{
PodRef: statsapi.PodReference{Name: "test-pod", Namespace: "test-namespace", UID: "UID_test-pod"},
StartTime: metav1.Now(),
VolumeStats: []statsapi.VolumeStats{
{
FsStats: statsapi.FsStats{
Time: metav1.Now(),
AvailableBytes: newUint64Pointer(5.663154176e+09),
CapacityBytes: newUint64Pointer(1.0434699264e+10),
UsedBytes: newUint64Pointer(4.21789696e+09),
InodesFree: newUint64Pointer(655344),
Inodes: newUint64Pointer(655360),
InodesUsed: newUint64Pointer(16),
},
Name: "test",
PVCRef: nil,
},
{
FsStats: statsapi.FsStats{
Time: metav1.Now(),
AvailableBytes: newUint64Pointer(5.663154176e+09),
CapacityBytes: newUint64Pointer(1.0434699264e+10),
UsedBytes: newUint64Pointer(4.21789696e+09),
InodesFree: newUint64Pointer(655344),
Inodes: newUint64Pointer(655360),
InodesUsed: newUint64Pointer(16),
},
Name: "test",
PVCRef: &statsapi.PVCReference{
Name: "testpvc",
Namespace: "testns",
},
VolumeHealthStats: &statsapi.VolumeHealthStats{
Abnormal: true,
},
},
},
},
{
// Another pod references the same PVC (test-namespace/testpvc).
PodRef: statsapi.PodReference{Name: "test-pod-2", Namespace: "test-namespace", UID: "UID_test-pod"},
StartTime: metav1.Now(),
VolumeStats: []statsapi.VolumeStats{
{
FsStats: statsapi.FsStats{
Time: metav1.Now(),
AvailableBytes: newUint64Pointer(5.663154176e+09),
CapacityBytes: newUint64Pointer(1.0434699264e+10),
UsedBytes: newUint64Pointer(4.21789696e+09),
InodesFree: newUint64Pointer(655344),
Inodes: newUint64Pointer(655360),
InodesUsed: newUint64Pointer(16),
},
Name: "test",
PVCRef: &statsapi.PVCReference{
Name: "testpvc",
Namespace: "testns",
},
VolumeHealthStats: &statsapi.VolumeHealthStats{
Abnormal: true,
},
},
},
},
}
want = metadata + `
kubelet_volume_stats_available_bytes{namespace="testns",persistentvolumeclaim="testpvc"} 5.663154176e+09
kubelet_volume_stats_capacity_bytes{namespace="testns",persistentvolumeclaim="testpvc"} 1.0434699264e+10
kubelet_volume_stats_inodes{namespace="testns",persistentvolumeclaim="testpvc"} 655360
kubelet_volume_stats_inodes_free{namespace="testns",persistentvolumeclaim="testpvc"} 655344
kubelet_volume_stats_inodes_used{namespace="testns",persistentvolumeclaim="testpvc"} 16
kubelet_volume_stats_used_bytes{namespace="testns",persistentvolumeclaim="testpvc"} 4.21789696e+09
kubelet_volume_stats_health_status_abnormal{namespace="testns",persistentvolumeclaim="testpvc"} 1
`
metrics = []string{
"kubelet_volume_stats_available_bytes",
"kubelet_volume_stats_capacity_bytes",
"kubelet_volume_stats_inodes",
"kubelet_volume_stats_inodes_free",
"kubelet_volume_stats_inodes_used",
"kubelet_volume_stats_used_bytes",
"kubelet_volume_stats_health_status_abnormal",
}
)
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()
mockStatsProvider := statstest.NewMockProvider(mockCtrl)
mockStatsProvider.EXPECT().ListPodStats().Return(podStats, nil).AnyTimes()
mockStatsProvider.EXPECT().ListPodStatsAndUpdateCPUNanoCoreUsage().Return(podStats, nil).AnyTimes()
if err := testutil.CustomCollectAndCompare(&volumeStatsCollector{statsProvider: mockStatsProvider}, strings.NewReader(want), metrics...); err != nil {
t.Errorf("unexpected collecting result:\n%s", err)
}
}
func TestVolumeStatsCollectorWithNullVolumeStatus(t *testing.T) {
// Fixed metadata on type and help text. We prepend this to every expected
// output so we only have to modify a single place when doing adjustments.
const metadata = `
@ -86,29 +206,6 @@ func TestVolumeStatsCollector(t *testing.T) {
},
},
},
{
// Another pod references the same PVC (test-namespace/testpvc).
PodRef: statsapi.PodReference{Name: "test-pod-2", Namespace: "test-namespace", UID: "UID_test-pod"},
StartTime: metav1.Now(),
VolumeStats: []statsapi.VolumeStats{
{
FsStats: statsapi.FsStats{
Time: metav1.Now(),
AvailableBytes: newUint64Pointer(5.663154176e+09),
CapacityBytes: newUint64Pointer(1.0434699264e+10),
UsedBytes: newUint64Pointer(4.21789696e+09),
InodesFree: newUint64Pointer(655344),
Inodes: newUint64Pointer(655360),
InodesUsed: newUint64Pointer(16),
},
Name: "test",
PVCRef: &statsapi.PVCReference{
Name: "testpvc",
Namespace: "testns",
},
},
},
},
}
want = metadata + `

View File

@ -30,28 +30,29 @@ import (
// This const block defines the metric names for the kubelet metrics.
const (
KubeletSubsystem = "kubelet"
NodeNameKey = "node_name"
NodeLabelKey = "node"
PodWorkerDurationKey = "pod_worker_duration_seconds"
PodStartDurationKey = "pod_start_duration_seconds"
CgroupManagerOperationsKey = "cgroup_manager_duration_seconds"
PodWorkerStartDurationKey = "pod_worker_start_duration_seconds"
PLEGRelistDurationKey = "pleg_relist_duration_seconds"
PLEGDiscardEventsKey = "pleg_discard_events"
PLEGRelistIntervalKey = "pleg_relist_interval_seconds"
PLEGLastSeenKey = "pleg_last_seen_seconds"
EvictionsKey = "evictions"
EvictionStatsAgeKey = "eviction_stats_age_seconds"
PreemptionsKey = "preemptions"
VolumeStatsCapacityBytesKey = "volume_stats_capacity_bytes"
VolumeStatsAvailableBytesKey = "volume_stats_available_bytes"
VolumeStatsUsedBytesKey = "volume_stats_used_bytes"
VolumeStatsInodesKey = "volume_stats_inodes"
VolumeStatsInodesFreeKey = "volume_stats_inodes_free"
VolumeStatsInodesUsedKey = "volume_stats_inodes_used"
RunningPodsKey = "running_pods"
RunningContainersKey = "running_containers"
KubeletSubsystem = "kubelet"
NodeNameKey = "node_name"
NodeLabelKey = "node"
PodWorkerDurationKey = "pod_worker_duration_seconds"
PodStartDurationKey = "pod_start_duration_seconds"
CgroupManagerOperationsKey = "cgroup_manager_duration_seconds"
PodWorkerStartDurationKey = "pod_worker_start_duration_seconds"
PLEGRelistDurationKey = "pleg_relist_duration_seconds"
PLEGDiscardEventsKey = "pleg_discard_events"
PLEGRelistIntervalKey = "pleg_relist_interval_seconds"
PLEGLastSeenKey = "pleg_last_seen_seconds"
EvictionsKey = "evictions"
EvictionStatsAgeKey = "eviction_stats_age_seconds"
PreemptionsKey = "preemptions"
VolumeStatsCapacityBytesKey = "volume_stats_capacity_bytes"
VolumeStatsAvailableBytesKey = "volume_stats_available_bytes"
VolumeStatsUsedBytesKey = "volume_stats_used_bytes"
VolumeStatsInodesKey = "volume_stats_inodes"
VolumeStatsInodesFreeKey = "volume_stats_inodes_free"
VolumeStatsInodesUsedKey = "volume_stats_inodes_used"
VolumeStatsHealthStatusAbnormalKey = "volume_stats_health_status_abnormal"
RunningPodsKey = "running_pods"
RunningContainersKey = "running_containers"
// Metrics keys of remote runtime operations
RuntimeOperationsKey = "runtime_operations_total"
RuntimeOperationsDurationKey = "runtime_operations_duration_seconds"

View File

@ -177,7 +177,10 @@ func (s *volumeStatCalculator) calcAndStoreStats() {
// parsePodVolumeStats converts (internal) volume.Metrics to (external) stats.VolumeStats structures
func (s *volumeStatCalculator) parsePodVolumeStats(podName string, pvcRef *stats.PVCReference, metric *volume.Metrics, volSpec v1.Volume) stats.VolumeStats {
var available, capacity, used, inodes, inodesFree, inodesUsed uint64
var (
available, capacity, used, inodes, inodesFree, inodesUsed uint64
)
if metric.Available != nil {
available = uint64(metric.Available.Value())
}
@ -197,10 +200,18 @@ func (s *volumeStatCalculator) parsePodVolumeStats(podName string, pvcRef *stats
inodesUsed = uint64(metric.InodesUsed.Value())
}
return stats.VolumeStats{
volumeStats := stats.VolumeStats{
Name: podName,
PVCRef: pvcRef,
FsStats: stats.FsStats{Time: metric.Time, AvailableBytes: &available, CapacityBytes: &capacity,
UsedBytes: &used, Inodes: &inodes, InodesFree: &inodesFree, InodesUsed: &inodesUsed},
}
if metric.Abnormal != nil {
volumeStats.VolumeHealthStats = &stats.VolumeHealthStats{
Abnormal: *metric.Abnormal,
}
}
return volumeStats
}

View File

@ -128,8 +128,9 @@ func TestPVCRef(t *testing.T) {
assert.Len(t, append(vs.EphemeralVolumes, vs.PersistentVolumes...), 4)
// Verify 'vol0' doesn't have a PVC reference
assert.Contains(t, append(vs.EphemeralVolumes, vs.PersistentVolumes...), kubestats.VolumeStats{
Name: vol0,
FsStats: expectedFSStats(),
Name: vol0,
FsStats: expectedFSStats(),
VolumeHealthStats: expectedVolumeHealthStats(),
})
// Verify 'vol1' has a PVC reference
assert.Contains(t, append(vs.EphemeralVolumes, vs.PersistentVolumes...), kubestats.VolumeStats{
@ -138,16 +139,18 @@ func TestPVCRef(t *testing.T) {
Name: pvcClaimName0,
Namespace: namespace0,
},
FsStats: expectedFSStats(),
FsStats: expectedFSStats(),
VolumeHealthStats: expectedVolumeHealthStats(),
})
// Verify 'vol2' has a PVC reference
// // Verify 'vol2' has a PVC reference
assert.Contains(t, append(vs.EphemeralVolumes, vs.PersistentVolumes...), kubestats.VolumeStats{
Name: vol2,
PVCRef: &kubestats.PVCReference{
Name: pvcClaimName1,
Namespace: namespace0,
},
FsStats: expectedBlockStats(),
FsStats: expectedBlockStats(),
VolumeHealthStats: expectedVolumeHealthStats(),
})
// Verify 'vol3' has a PVC reference
assert.Contains(t, append(vs.EphemeralVolumes, vs.PersistentVolumes...), kubestats.VolumeStats{
@ -156,7 +159,8 @@ func TestPVCRef(t *testing.T) {
Name: pName0 + "-" + vol3,
Namespace: namespace0,
},
FsStats: expectedFSStats(),
FsStats: expectedFSStats(),
VolumeHealthStats: expectedVolumeHealthStats(),
})
}
@ -202,8 +206,10 @@ func TestAbnormalVolumeEvent(t *testing.T) {
}
// Calculate stats for pod
volumeCondition.Message = "The target path of the volume doesn't exist"
volumeCondition.Abnormal = true
if volumeCondition != nil {
volumeCondition.Message = "The target path of the volume doesn't exist"
volumeCondition.Abnormal = true
}
statsCalculator := newVolumeStatCalculator(mockStats, time.Minute, fakePod, &fakeEventRecorder)
statsCalculator.calcAndStoreStats()
@ -233,16 +239,21 @@ func (v *fakeVolume) GetMetrics() (*volume.Metrics, error) {
}
func expectedMetrics() *volume.Metrics {
return &volume.Metrics{
vMetrics := &volume.Metrics{
Available: resource.NewQuantity(available, resource.BinarySI),
Capacity: resource.NewQuantity(capacity, resource.BinarySI),
Used: resource.NewQuantity(available-capacity, resource.BinarySI),
Inodes: resource.NewQuantity(inodesTotal, resource.BinarySI),
InodesFree: resource.NewQuantity(inodesFree, resource.BinarySI),
InodesUsed: resource.NewQuantity(inodesTotal-inodesFree, resource.BinarySI),
Message: &volumeCondition.Message,
Abnormal: &volumeCondition.Abnormal,
}
if volumeCondition != nil {
vMetrics.Message = &volumeCondition.Message
vMetrics.Abnormal = &volumeCondition.Abnormal
}
return vMetrics
}
func expectedFSStats() kubestats.FsStats {
@ -263,6 +274,17 @@ func expectedFSStats() kubestats.FsStats {
}
}
func expectedVolumeHealthStats() *kubestats.VolumeHealthStats {
metric := expectedMetrics()
hs := &kubestats.VolumeHealthStats{}
if metric != nil && metric.Abnormal != nil {
hs.Abnormal = *metric.Abnormal
}
return hs
}
// Fake block-volume/metrics provider, block-devices have no inodes
var _ volume.BlockVolume = &fakeBlockVolume{}
@ -279,11 +301,17 @@ func (v *fakeBlockVolume) GetMetrics() (*volume.Metrics, error) {
}
func expectedBlockMetrics() *volume.Metrics {
return &volume.Metrics{
vMetrics := &volume.Metrics{
Available: resource.NewQuantity(available, resource.BinarySI),
Capacity: resource.NewQuantity(capacity, resource.BinarySI),
Used: resource.NewQuantity(available-capacity, resource.BinarySI),
}
if volumeCondition != nil {
vMetrics.Abnormal = &volumeCondition.Abnormal
}
return vMetrics
}
func expectedBlockStats() kubestats.FsStats {

View File

@ -271,6 +271,17 @@ type VolumeStats struct {
// Reference to the PVC, if one exists
// +optional
PVCRef *PVCReference `json:"pvcRef,omitempty"`
// VolumeHealthStats contains data about volume health
// +optional
VolumeHealthStats *VolumeHealthStats `json:"volumeHealthStats,omitempty"`
}
// VolumeHealthStats contains data about volume health.
type VolumeHealthStats struct {
// Normal volumes are available for use and operating optimally.
// An abnormal volume does not meet these criteria.
Abnormal bool `json:"abnormal"`
}
// PVCReference contains enough information to describe the referenced PVC.

View File

@ -230,8 +230,9 @@ var _ = SIGDescribe("Summary API [NodeConformance]", func() {
}),
"VolumeStats": gstruct.MatchAllElements(summaryObjectID, gstruct.Elements{
"test-empty-dir": gstruct.MatchAllFields(gstruct.Fields{
"Name": gomega.Equal("test-empty-dir"),
"PVCRef": gomega.BeNil(),
"Name": gomega.Equal("test-empty-dir"),
"PVCRef": gomega.BeNil(),
"VolumeHealthStats": gomega.BeNil(),
"FsStats": gstruct.MatchAllFields(gstruct.Fields{
"Time": recent(maxStatsAge),
"AvailableBytes": fsCapacityBounds,