From 7d675381f85bca2f05814be95f7d33244d1f6fa0 Mon Sep 17 00:00:00 2001 From: fengzixu Date: Thu, 17 Mar 2022 00:21:12 +0000 Subject: [PATCH 1/3] fix: fix panic bug when volumeHealthStatus is nil --- .../metrics/collectors/volume_stats.go | 3 + .../metrics/collectors/volume_stats_test.go | 87 +++++++++++++++++++ 2 files changed, 90 insertions(+) diff --git a/pkg/kubelet/metrics/collectors/volume_stats.go b/pkg/kubelet/metrics/collectors/volume_stats.go index 8ff14a0b2ac..dae751c2dad 100644 --- a/pkg/kubelet/metrics/collectors/volume_stats.go +++ b/pkg/kubelet/metrics/collectors/volume_stats.go @@ -120,6 +120,9 @@ func (collector *volumeStatsCollector) CollectWithStability(ch chan<- metrics.Me addGauge(volumeStatsInodesDesc, pvcRef, float64(*volumeStat.Inodes)) addGauge(volumeStatsInodesFreeDesc, pvcRef, float64(*volumeStat.InodesFree)) addGauge(volumeStatsInodesUsedDesc, pvcRef, float64(*volumeStat.InodesUsed)) + if volumeStat.VolumeHealthStats != nil { + addGauge(volumeStatsHealthAbnormalDesc, pvcRef, convertBoolToFloat64(volumeStat.VolumeHealthStats.Abnormal)) + } allPVCs.Insert(pvcUniqStr) } } diff --git a/pkg/kubelet/metrics/collectors/volume_stats_test.go b/pkg/kubelet/metrics/collectors/volume_stats_test.go index 54f07178c6d..0e335a4d743 100644 --- a/pkg/kubelet/metrics/collectors/volume_stats_test.go +++ b/pkg/kubelet/metrics/collectors/volume_stats_test.go @@ -140,3 +140,90 @@ func TestVolumeStatsCollector(t *testing.T) { t.Errorf("unexpected collecting result:\n%s", err) } } + +func TestVolumeStatsCollectorWithNullVolumeStatus(t *testing.T) { + // Fixed metadata on type and help text. We prepend this to every expected + // output so we only have to modify a single place when doing adjustments. + const metadata = ` + # HELP kubelet_volume_stats_available_bytes [ALPHA] Number of available bytes in the volume + # TYPE kubelet_volume_stats_available_bytes gauge + # HELP kubelet_volume_stats_capacity_bytes [ALPHA] Capacity in bytes of the volume + # TYPE kubelet_volume_stats_capacity_bytes gauge + # HELP kubelet_volume_stats_inodes [ALPHA] Maximum number of inodes in the volume + # TYPE kubelet_volume_stats_inodes gauge + # HELP kubelet_volume_stats_inodes_free [ALPHA] Number of free inodes in the volume + # TYPE kubelet_volume_stats_inodes_free gauge + # HELP kubelet_volume_stats_inodes_used [ALPHA] Number of used inodes in the volume + # TYPE kubelet_volume_stats_inodes_used gauge + # HELP kubelet_volume_stats_used_bytes [ALPHA] Number of used bytes in the volume + # TYPE kubelet_volume_stats_used_bytes gauge + ` + + var ( + podStats = []statsapi.PodStats{ + { + PodRef: statsapi.PodReference{Name: "test-pod", Namespace: "test-namespace", UID: "UID_test-pod"}, + StartTime: metav1.Now(), + VolumeStats: []statsapi.VolumeStats{ + { + FsStats: statsapi.FsStats{ + Time: metav1.Now(), + AvailableBytes: newUint64Pointer(5.663154176e+09), + CapacityBytes: newUint64Pointer(1.0434699264e+10), + UsedBytes: newUint64Pointer(4.21789696e+09), + InodesFree: newUint64Pointer(655344), + Inodes: newUint64Pointer(655360), + InodesUsed: newUint64Pointer(16), + }, + Name: "test", + PVCRef: nil, + }, + { + FsStats: statsapi.FsStats{ + Time: metav1.Now(), + AvailableBytes: newUint64Pointer(5.663154176e+09), + CapacityBytes: newUint64Pointer(1.0434699264e+10), + UsedBytes: newUint64Pointer(4.21789696e+09), + InodesFree: newUint64Pointer(655344), + Inodes: newUint64Pointer(655360), + InodesUsed: newUint64Pointer(16), + }, + Name: "test", + PVCRef: &statsapi.PVCReference{ + Name: "testpvc", + Namespace: "testns", + }, + }, + }, + }, + } + + want = metadata + ` + kubelet_volume_stats_available_bytes{namespace="testns",persistentvolumeclaim="testpvc"} 5.663154176e+09 + kubelet_volume_stats_capacity_bytes{namespace="testns",persistentvolumeclaim="testpvc"} 1.0434699264e+10 + kubelet_volume_stats_inodes{namespace="testns",persistentvolumeclaim="testpvc"} 655360 + kubelet_volume_stats_inodes_free{namespace="testns",persistentvolumeclaim="testpvc"} 655344 + kubelet_volume_stats_inodes_used{namespace="testns",persistentvolumeclaim="testpvc"} 16 + kubelet_volume_stats_used_bytes{namespace="testns",persistentvolumeclaim="testpvc"} 4.21789696e+09 + ` + + metrics = []string{ + "kubelet_volume_stats_available_bytes", + "kubelet_volume_stats_capacity_bytes", + "kubelet_volume_stats_inodes", + "kubelet_volume_stats_inodes_free", + "kubelet_volume_stats_inodes_used", + "kubelet_volume_stats_used_bytes", + } + ) + + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + mockStatsProvider := statstest.NewMockProvider(mockCtrl) + + mockStatsProvider.EXPECT().ListPodStats().Return(podStats, nil).AnyTimes() + mockStatsProvider.EXPECT().ListPodStatsAndUpdateCPUNanoCoreUsage().Return(podStats, nil).AnyTimes() + if err := testutil.CustomCollectAndCompare(&volumeStatsCollector{statsProvider: mockStatsProvider}, strings.NewReader(want), metrics...); err != nil { + t.Errorf("unexpected collecting result:\n%s", err) + } +} From 5cb6fab8f6336990cc58f7cb6abffc6ad178121b Mon Sep 17 00:00:00 2001 From: Kubernetes Prow Robot Date: Tue, 15 Mar 2022 05:58:11 -0700 Subject: [PATCH 2/3] Merge pull request #105585 from fengzixu/improvement-volume-health add volume kubelet_volume_stats_health_abnormal to kubelet --- .../metrics/collectors/volume_stats.go | 16 ++++++- .../metrics/collectors/volume_stats_test.go | 10 +++++ pkg/kubelet/metrics/metrics.go | 45 ++++++++++--------- .../server/stats/volume_stat_calculator.go | 15 ++++++- .../stats/volume_stat_calculator_test.go | 24 +++++++--- .../kubelet/pkg/apis/stats/v1alpha1/types.go | 11 +++++ test/e2e_node/summary_test.go | 5 ++- 7 files changed, 93 insertions(+), 33 deletions(-) diff --git a/pkg/kubelet/metrics/collectors/volume_stats.go b/pkg/kubelet/metrics/collectors/volume_stats.go index dae751c2dad..b6bf1870c4c 100644 --- a/pkg/kubelet/metrics/collectors/volume_stats.go +++ b/pkg/kubelet/metrics/collectors/volume_stats.go @@ -61,6 +61,12 @@ var ( []string{"namespace", "persistentvolumeclaim"}, nil, metrics.ALPHA, "", ) + + volumeStatsHealthAbnormalDesc = metrics.NewDesc( + metrics.BuildFQName("", kubeletmetrics.KubeletSubsystem, kubeletmetrics.VolumeStatsHealthStatusAbnormalKey), + "Abnormal volume health status. The count is either 1 or 0. 1 indicates the volume is unhealthy, 0 indicates volume is healthy", + []string{"namespace", "persistentvolumeclaim"}, nil, + metrics.ALPHA, "") ) type volumeStatsCollector struct { @@ -85,6 +91,7 @@ func (collector *volumeStatsCollector) DescribeWithStability(ch chan<- *metrics. ch <- volumeStatsInodesDesc ch <- volumeStatsInodesFreeDesc ch <- volumeStatsInodesUsedDesc + ch <- volumeStatsHealthAbnormalDesc } // CollectWithStability implements the metrics.StableCollector interface. @@ -95,7 +102,6 @@ func (collector *volumeStatsCollector) CollectWithStability(ch chan<- metrics.Me } addGauge := func(desc *metrics.Desc, pvcRef *stats.PVCReference, v float64, lv ...string) { lv = append([]string{pvcRef.Namespace, pvcRef.Name}, lv...) - ch <- metrics.NewLazyConstMetric(desc, metrics.GaugeValue, v, lv...) } allPVCs := sets.String{} @@ -127,3 +133,11 @@ func (collector *volumeStatsCollector) CollectWithStability(ch chan<- metrics.Me } } } + +func convertBoolToFloat64(boolVal bool) float64 { + if boolVal { + return 1 + } + + return 0 +} diff --git a/pkg/kubelet/metrics/collectors/volume_stats_test.go b/pkg/kubelet/metrics/collectors/volume_stats_test.go index 0e335a4d743..1d0e825a35b 100644 --- a/pkg/kubelet/metrics/collectors/volume_stats_test.go +++ b/pkg/kubelet/metrics/collectors/volume_stats_test.go @@ -47,6 +47,8 @@ func TestVolumeStatsCollector(t *testing.T) { # TYPE kubelet_volume_stats_inodes_used gauge # HELP kubelet_volume_stats_used_bytes [ALPHA] Number of used bytes in the volume # TYPE kubelet_volume_stats_used_bytes gauge + # HELP kubelet_volume_stats_health_status_abnormal [ALPHA] Abnormal volume health status. The count is either 1 or 0. 1 indicates the volume is unhealthy, 0 indicates volume is healthy + # TYPE kubelet_volume_stats_health_status_abnormal gauge ` var ( @@ -83,6 +85,9 @@ func TestVolumeStatsCollector(t *testing.T) { Name: "testpvc", Namespace: "testns", }, + VolumeHealthStats: &statsapi.VolumeHealthStats{ + Abnormal: true, + }, }, }, }, @@ -106,6 +111,9 @@ func TestVolumeStatsCollector(t *testing.T) { Name: "testpvc", Namespace: "testns", }, + VolumeHealthStats: &statsapi.VolumeHealthStats{ + Abnormal: true, + }, }, }, }, @@ -118,6 +126,7 @@ func TestVolumeStatsCollector(t *testing.T) { kubelet_volume_stats_inodes_free{namespace="testns",persistentvolumeclaim="testpvc"} 655344 kubelet_volume_stats_inodes_used{namespace="testns",persistentvolumeclaim="testpvc"} 16 kubelet_volume_stats_used_bytes{namespace="testns",persistentvolumeclaim="testpvc"} 4.21789696e+09 + kubelet_volume_stats_health_status_abnormal{namespace="testns",persistentvolumeclaim="testpvc"} 1 ` metrics = []string{ @@ -127,6 +136,7 @@ func TestVolumeStatsCollector(t *testing.T) { "kubelet_volume_stats_inodes_free", "kubelet_volume_stats_inodes_used", "kubelet_volume_stats_used_bytes", + "kubelet_volume_stats_health_status_abnormal", } ) diff --git a/pkg/kubelet/metrics/metrics.go b/pkg/kubelet/metrics/metrics.go index 2051ffed448..b7a6f2f0476 100644 --- a/pkg/kubelet/metrics/metrics.go +++ b/pkg/kubelet/metrics/metrics.go @@ -30,28 +30,29 @@ import ( // This const block defines the metric names for the kubelet metrics. const ( - KubeletSubsystem = "kubelet" - NodeNameKey = "node_name" - NodeLabelKey = "node" - PodWorkerDurationKey = "pod_worker_duration_seconds" - PodStartDurationKey = "pod_start_duration_seconds" - CgroupManagerOperationsKey = "cgroup_manager_duration_seconds" - PodWorkerStartDurationKey = "pod_worker_start_duration_seconds" - PLEGRelistDurationKey = "pleg_relist_duration_seconds" - PLEGDiscardEventsKey = "pleg_discard_events" - PLEGRelistIntervalKey = "pleg_relist_interval_seconds" - PLEGLastSeenKey = "pleg_last_seen_seconds" - EvictionsKey = "evictions" - EvictionStatsAgeKey = "eviction_stats_age_seconds" - PreemptionsKey = "preemptions" - VolumeStatsCapacityBytesKey = "volume_stats_capacity_bytes" - VolumeStatsAvailableBytesKey = "volume_stats_available_bytes" - VolumeStatsUsedBytesKey = "volume_stats_used_bytes" - VolumeStatsInodesKey = "volume_stats_inodes" - VolumeStatsInodesFreeKey = "volume_stats_inodes_free" - VolumeStatsInodesUsedKey = "volume_stats_inodes_used" - RunningPodsKey = "running_pods" - RunningContainersKey = "running_containers" + KubeletSubsystem = "kubelet" + NodeNameKey = "node_name" + NodeLabelKey = "node" + PodWorkerDurationKey = "pod_worker_duration_seconds" + PodStartDurationKey = "pod_start_duration_seconds" + CgroupManagerOperationsKey = "cgroup_manager_duration_seconds" + PodWorkerStartDurationKey = "pod_worker_start_duration_seconds" + PLEGRelistDurationKey = "pleg_relist_duration_seconds" + PLEGDiscardEventsKey = "pleg_discard_events" + PLEGRelistIntervalKey = "pleg_relist_interval_seconds" + PLEGLastSeenKey = "pleg_last_seen_seconds" + EvictionsKey = "evictions" + EvictionStatsAgeKey = "eviction_stats_age_seconds" + PreemptionsKey = "preemptions" + VolumeStatsCapacityBytesKey = "volume_stats_capacity_bytes" + VolumeStatsAvailableBytesKey = "volume_stats_available_bytes" + VolumeStatsUsedBytesKey = "volume_stats_used_bytes" + VolumeStatsInodesKey = "volume_stats_inodes" + VolumeStatsInodesFreeKey = "volume_stats_inodes_free" + VolumeStatsInodesUsedKey = "volume_stats_inodes_used" + VolumeStatsHealthStatusAbnormalKey = "volume_stats_health_status_abnormal" + RunningPodsKey = "running_pods" + RunningContainersKey = "running_containers" // Metrics keys of remote runtime operations RuntimeOperationsKey = "runtime_operations_total" RuntimeOperationsDurationKey = "runtime_operations_duration_seconds" diff --git a/pkg/kubelet/server/stats/volume_stat_calculator.go b/pkg/kubelet/server/stats/volume_stat_calculator.go index 63e9b1e7c39..f2580e0e679 100644 --- a/pkg/kubelet/server/stats/volume_stat_calculator.go +++ b/pkg/kubelet/server/stats/volume_stat_calculator.go @@ -177,7 +177,10 @@ func (s *volumeStatCalculator) calcAndStoreStats() { // parsePodVolumeStats converts (internal) volume.Metrics to (external) stats.VolumeStats structures func (s *volumeStatCalculator) parsePodVolumeStats(podName string, pvcRef *stats.PVCReference, metric *volume.Metrics, volSpec v1.Volume) stats.VolumeStats { - var available, capacity, used, inodes, inodesFree, inodesUsed uint64 + var ( + available, capacity, used, inodes, inodesFree, inodesUsed uint64 + ) + if metric.Available != nil { available = uint64(metric.Available.Value()) } @@ -197,10 +200,18 @@ func (s *volumeStatCalculator) parsePodVolumeStats(podName string, pvcRef *stats inodesUsed = uint64(metric.InodesUsed.Value()) } - return stats.VolumeStats{ + volumeStats := stats.VolumeStats{ Name: podName, PVCRef: pvcRef, FsStats: stats.FsStats{Time: metric.Time, AvailableBytes: &available, CapacityBytes: &capacity, UsedBytes: &used, Inodes: &inodes, InodesFree: &inodesFree, InodesUsed: &inodesUsed}, } + + if metric.Abnormal != nil { + volumeStats.VolumeHealthStats = &stats.VolumeHealthStats{ + Abnormal: *metric.Abnormal, + } + } + + return volumeStats } diff --git a/pkg/kubelet/server/stats/volume_stat_calculator_test.go b/pkg/kubelet/server/stats/volume_stat_calculator_test.go index 1edd0c8d7df..a1d3ce4898b 100644 --- a/pkg/kubelet/server/stats/volume_stat_calculator_test.go +++ b/pkg/kubelet/server/stats/volume_stat_calculator_test.go @@ -128,8 +128,9 @@ func TestPVCRef(t *testing.T) { assert.Len(t, append(vs.EphemeralVolumes, vs.PersistentVolumes...), 4) // Verify 'vol0' doesn't have a PVC reference assert.Contains(t, append(vs.EphemeralVolumes, vs.PersistentVolumes...), kubestats.VolumeStats{ - Name: vol0, - FsStats: expectedFSStats(), + Name: vol0, + FsStats: expectedFSStats(), + VolumeHealthStats: expectedVolumeHealthStats(), }) // Verify 'vol1' has a PVC reference assert.Contains(t, append(vs.EphemeralVolumes, vs.PersistentVolumes...), kubestats.VolumeStats{ @@ -138,16 +139,18 @@ func TestPVCRef(t *testing.T) { Name: pvcClaimName0, Namespace: namespace0, }, - FsStats: expectedFSStats(), + FsStats: expectedFSStats(), + VolumeHealthStats: expectedVolumeHealthStats(), }) - // Verify 'vol2' has a PVC reference + // // Verify 'vol2' has a PVC reference assert.Contains(t, append(vs.EphemeralVolumes, vs.PersistentVolumes...), kubestats.VolumeStats{ Name: vol2, PVCRef: &kubestats.PVCReference{ Name: pvcClaimName1, Namespace: namespace0, }, - FsStats: expectedBlockStats(), + FsStats: expectedBlockStats(), + VolumeHealthStats: expectedVolumeHealthStats(), }) // Verify 'vol3' has a PVC reference assert.Contains(t, append(vs.EphemeralVolumes, vs.PersistentVolumes...), kubestats.VolumeStats{ @@ -156,7 +159,8 @@ func TestPVCRef(t *testing.T) { Name: pName0 + "-" + vol3, Namespace: namespace0, }, - FsStats: expectedFSStats(), + FsStats: expectedFSStats(), + VolumeHealthStats: expectedVolumeHealthStats(), }) } @@ -263,6 +267,13 @@ func expectedFSStats() kubestats.FsStats { } } +func expectedVolumeHealthStats() *kubestats.VolumeHealthStats { + metric := expectedMetrics() + return &kubestats.VolumeHealthStats{ + Abnormal: *metric.Abnormal, + } +} + // Fake block-volume/metrics provider, block-devices have no inodes var _ volume.BlockVolume = &fakeBlockVolume{} @@ -283,6 +294,7 @@ func expectedBlockMetrics() *volume.Metrics { Available: resource.NewQuantity(available, resource.BinarySI), Capacity: resource.NewQuantity(capacity, resource.BinarySI), Used: resource.NewQuantity(available-capacity, resource.BinarySI), + Abnormal: &volumeCondition.Abnormal, } } diff --git a/staging/src/k8s.io/kubelet/pkg/apis/stats/v1alpha1/types.go b/staging/src/k8s.io/kubelet/pkg/apis/stats/v1alpha1/types.go index 5afbf452260..5e75fefe53f 100644 --- a/staging/src/k8s.io/kubelet/pkg/apis/stats/v1alpha1/types.go +++ b/staging/src/k8s.io/kubelet/pkg/apis/stats/v1alpha1/types.go @@ -271,6 +271,17 @@ type VolumeStats struct { // Reference to the PVC, if one exists // +optional PVCRef *PVCReference `json:"pvcRef,omitempty"` + + // VolumeHealthStats contains data about volume health + // +optional + VolumeHealthStats *VolumeHealthStats `json:"volumeHealthStats,omitempty"` +} + +// VolumeHealthStats contains data about volume health. +type VolumeHealthStats struct { + // Normal volumes are available for use and operating optimally. + // An abnormal volume does not meet these criteria. + Abnormal bool `json:"abnormal"` } // PVCReference contains enough information to describe the referenced PVC. diff --git a/test/e2e_node/summary_test.go b/test/e2e_node/summary_test.go index 2c520def83e..2cbc990109e 100644 --- a/test/e2e_node/summary_test.go +++ b/test/e2e_node/summary_test.go @@ -228,8 +228,9 @@ var _ = SIGDescribe("Summary API [NodeConformance]", func() { }), "VolumeStats": gstruct.MatchAllElements(summaryObjectID, gstruct.Elements{ "test-empty-dir": gstruct.MatchAllFields(gstruct.Fields{ - "Name": gomega.Equal("test-empty-dir"), - "PVCRef": gomega.BeNil(), + "Name": gomega.Equal("test-empty-dir"), + "PVCRef": gomega.BeNil(), + "VolumeHealthStats": gomega.BeNil(), "FsStats": gstruct.MatchAllFields(gstruct.Fields{ "Time": recent(maxStatsAge), "AvailableBytes": fsCapacityBounds, From 38d8aae408cba997b6c78673ed7481bee45ad680 Mon Sep 17 00:00:00 2001 From: fengzixu Date: Sun, 27 Mar 2022 08:38:20 +0000 Subject: [PATCH 3/3] fix: add nil check --- .../stats/volume_stat_calculator_test.go | 34 ++++++++++++++----- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/pkg/kubelet/server/stats/volume_stat_calculator_test.go b/pkg/kubelet/server/stats/volume_stat_calculator_test.go index a1d3ce4898b..7af179659db 100644 --- a/pkg/kubelet/server/stats/volume_stat_calculator_test.go +++ b/pkg/kubelet/server/stats/volume_stat_calculator_test.go @@ -206,8 +206,10 @@ func TestAbnormalVolumeEvent(t *testing.T) { } // Calculate stats for pod - volumeCondition.Message = "The target path of the volume doesn't exist" - volumeCondition.Abnormal = true + if volumeCondition != nil { + volumeCondition.Message = "The target path of the volume doesn't exist" + volumeCondition.Abnormal = true + } statsCalculator := newVolumeStatCalculator(mockStats, time.Minute, fakePod, &fakeEventRecorder) statsCalculator.calcAndStoreStats() @@ -237,16 +239,21 @@ func (v *fakeVolume) GetMetrics() (*volume.Metrics, error) { } func expectedMetrics() *volume.Metrics { - return &volume.Metrics{ + vMetrics := &volume.Metrics{ Available: resource.NewQuantity(available, resource.BinarySI), Capacity: resource.NewQuantity(capacity, resource.BinarySI), Used: resource.NewQuantity(available-capacity, resource.BinarySI), Inodes: resource.NewQuantity(inodesTotal, resource.BinarySI), InodesFree: resource.NewQuantity(inodesFree, resource.BinarySI), InodesUsed: resource.NewQuantity(inodesTotal-inodesFree, resource.BinarySI), - Message: &volumeCondition.Message, - Abnormal: &volumeCondition.Abnormal, } + + if volumeCondition != nil { + vMetrics.Message = &volumeCondition.Message + vMetrics.Abnormal = &volumeCondition.Abnormal + } + + return vMetrics } func expectedFSStats() kubestats.FsStats { @@ -269,9 +276,13 @@ func expectedFSStats() kubestats.FsStats { func expectedVolumeHealthStats() *kubestats.VolumeHealthStats { metric := expectedMetrics() - return &kubestats.VolumeHealthStats{ - Abnormal: *metric.Abnormal, + hs := &kubestats.VolumeHealthStats{} + + if metric != nil && metric.Abnormal != nil { + hs.Abnormal = *metric.Abnormal } + + return hs } // Fake block-volume/metrics provider, block-devices have no inodes @@ -290,12 +301,17 @@ func (v *fakeBlockVolume) GetMetrics() (*volume.Metrics, error) { } func expectedBlockMetrics() *volume.Metrics { - return &volume.Metrics{ + vMetrics := &volume.Metrics{ Available: resource.NewQuantity(available, resource.BinarySI), Capacity: resource.NewQuantity(capacity, resource.BinarySI), Used: resource.NewQuantity(available-capacity, resource.BinarySI), - Abnormal: &volumeCondition.Abnormal, } + + if volumeCondition != nil { + vMetrics.Abnormal = &volumeCondition.Abnormal + } + + return vMetrics } func expectedBlockStats() kubestats.FsStats {