diff --git a/pkg/kubelet/metrics/collectors/volume_stats.go b/pkg/kubelet/metrics/collectors/volume_stats.go index 8ff14a0b2ac..b6bf1870c4c 100644 --- a/pkg/kubelet/metrics/collectors/volume_stats.go +++ b/pkg/kubelet/metrics/collectors/volume_stats.go @@ -61,6 +61,12 @@ var ( []string{"namespace", "persistentvolumeclaim"}, nil, metrics.ALPHA, "", ) + + volumeStatsHealthAbnormalDesc = metrics.NewDesc( + metrics.BuildFQName("", kubeletmetrics.KubeletSubsystem, kubeletmetrics.VolumeStatsHealthStatusAbnormalKey), + "Abnormal volume health status. The count is either 1 or 0. 1 indicates the volume is unhealthy, 0 indicates volume is healthy", + []string{"namespace", "persistentvolumeclaim"}, nil, + metrics.ALPHA, "") ) type volumeStatsCollector struct { @@ -85,6 +91,7 @@ func (collector *volumeStatsCollector) DescribeWithStability(ch chan<- *metrics. ch <- volumeStatsInodesDesc ch <- volumeStatsInodesFreeDesc ch <- volumeStatsInodesUsedDesc + ch <- volumeStatsHealthAbnormalDesc } // CollectWithStability implements the metrics.StableCollector interface. @@ -95,7 +102,6 @@ func (collector *volumeStatsCollector) CollectWithStability(ch chan<- metrics.Me } addGauge := func(desc *metrics.Desc, pvcRef *stats.PVCReference, v float64, lv ...string) { lv = append([]string{pvcRef.Namespace, pvcRef.Name}, lv...) - ch <- metrics.NewLazyConstMetric(desc, metrics.GaugeValue, v, lv...) } allPVCs := sets.String{} @@ -120,7 +126,18 @@ func (collector *volumeStatsCollector) CollectWithStability(ch chan<- metrics.Me addGauge(volumeStatsInodesDesc, pvcRef, float64(*volumeStat.Inodes)) addGauge(volumeStatsInodesFreeDesc, pvcRef, float64(*volumeStat.InodesFree)) addGauge(volumeStatsInodesUsedDesc, pvcRef, float64(*volumeStat.InodesUsed)) + if volumeStat.VolumeHealthStats != nil { + addGauge(volumeStatsHealthAbnormalDesc, pvcRef, convertBoolToFloat64(volumeStat.VolumeHealthStats.Abnormal)) + } allPVCs.Insert(pvcUniqStr) } } } + +func convertBoolToFloat64(boolVal bool) float64 { + if boolVal { + return 1 + } + + return 0 +} diff --git a/pkg/kubelet/metrics/collectors/volume_stats_test.go b/pkg/kubelet/metrics/collectors/volume_stats_test.go index 54f07178c6d..1d0e825a35b 100644 --- a/pkg/kubelet/metrics/collectors/volume_stats_test.go +++ b/pkg/kubelet/metrics/collectors/volume_stats_test.go @@ -32,6 +32,126 @@ func newUint64Pointer(i uint64) *uint64 { } func TestVolumeStatsCollector(t *testing.T) { + // Fixed metadata on type and help text. We prepend this to every expected + // output so we only have to modify a single place when doing adjustments. + const metadata = ` + # HELP kubelet_volume_stats_available_bytes [ALPHA] Number of available bytes in the volume + # TYPE kubelet_volume_stats_available_bytes gauge + # HELP kubelet_volume_stats_capacity_bytes [ALPHA] Capacity in bytes of the volume + # TYPE kubelet_volume_stats_capacity_bytes gauge + # HELP kubelet_volume_stats_inodes [ALPHA] Maximum number of inodes in the volume + # TYPE kubelet_volume_stats_inodes gauge + # HELP kubelet_volume_stats_inodes_free [ALPHA] Number of free inodes in the volume + # TYPE kubelet_volume_stats_inodes_free gauge + # HELP kubelet_volume_stats_inodes_used [ALPHA] Number of used inodes in the volume + # TYPE kubelet_volume_stats_inodes_used gauge + # HELP kubelet_volume_stats_used_bytes [ALPHA] Number of used bytes in the volume + # TYPE kubelet_volume_stats_used_bytes gauge + # HELP kubelet_volume_stats_health_status_abnormal [ALPHA] Abnormal volume health status. The count is either 1 or 0. 1 indicates the volume is unhealthy, 0 indicates volume is healthy + # TYPE kubelet_volume_stats_health_status_abnormal gauge + ` + + var ( + podStats = []statsapi.PodStats{ + { + PodRef: statsapi.PodReference{Name: "test-pod", Namespace: "test-namespace", UID: "UID_test-pod"}, + StartTime: metav1.Now(), + VolumeStats: []statsapi.VolumeStats{ + { + FsStats: statsapi.FsStats{ + Time: metav1.Now(), + AvailableBytes: newUint64Pointer(5.663154176e+09), + CapacityBytes: newUint64Pointer(1.0434699264e+10), + UsedBytes: newUint64Pointer(4.21789696e+09), + InodesFree: newUint64Pointer(655344), + Inodes: newUint64Pointer(655360), + InodesUsed: newUint64Pointer(16), + }, + Name: "test", + PVCRef: nil, + }, + { + FsStats: statsapi.FsStats{ + Time: metav1.Now(), + AvailableBytes: newUint64Pointer(5.663154176e+09), + CapacityBytes: newUint64Pointer(1.0434699264e+10), + UsedBytes: newUint64Pointer(4.21789696e+09), + InodesFree: newUint64Pointer(655344), + Inodes: newUint64Pointer(655360), + InodesUsed: newUint64Pointer(16), + }, + Name: "test", + PVCRef: &statsapi.PVCReference{ + Name: "testpvc", + Namespace: "testns", + }, + VolumeHealthStats: &statsapi.VolumeHealthStats{ + Abnormal: true, + }, + }, + }, + }, + { + // Another pod references the same PVC (test-namespace/testpvc). + PodRef: statsapi.PodReference{Name: "test-pod-2", Namespace: "test-namespace", UID: "UID_test-pod"}, + StartTime: metav1.Now(), + VolumeStats: []statsapi.VolumeStats{ + { + FsStats: statsapi.FsStats{ + Time: metav1.Now(), + AvailableBytes: newUint64Pointer(5.663154176e+09), + CapacityBytes: newUint64Pointer(1.0434699264e+10), + UsedBytes: newUint64Pointer(4.21789696e+09), + InodesFree: newUint64Pointer(655344), + Inodes: newUint64Pointer(655360), + InodesUsed: newUint64Pointer(16), + }, + Name: "test", + PVCRef: &statsapi.PVCReference{ + Name: "testpvc", + Namespace: "testns", + }, + VolumeHealthStats: &statsapi.VolumeHealthStats{ + Abnormal: true, + }, + }, + }, + }, + } + + want = metadata + ` + kubelet_volume_stats_available_bytes{namespace="testns",persistentvolumeclaim="testpvc"} 5.663154176e+09 + kubelet_volume_stats_capacity_bytes{namespace="testns",persistentvolumeclaim="testpvc"} 1.0434699264e+10 + kubelet_volume_stats_inodes{namespace="testns",persistentvolumeclaim="testpvc"} 655360 + kubelet_volume_stats_inodes_free{namespace="testns",persistentvolumeclaim="testpvc"} 655344 + kubelet_volume_stats_inodes_used{namespace="testns",persistentvolumeclaim="testpvc"} 16 + kubelet_volume_stats_used_bytes{namespace="testns",persistentvolumeclaim="testpvc"} 4.21789696e+09 + kubelet_volume_stats_health_status_abnormal{namespace="testns",persistentvolumeclaim="testpvc"} 1 + ` + + metrics = []string{ + "kubelet_volume_stats_available_bytes", + "kubelet_volume_stats_capacity_bytes", + "kubelet_volume_stats_inodes", + "kubelet_volume_stats_inodes_free", + "kubelet_volume_stats_inodes_used", + "kubelet_volume_stats_used_bytes", + "kubelet_volume_stats_health_status_abnormal", + } + ) + + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + mockStatsProvider := statstest.NewMockProvider(mockCtrl) + + mockStatsProvider.EXPECT().ListPodStats().Return(podStats, nil).AnyTimes() + mockStatsProvider.EXPECT().ListPodStatsAndUpdateCPUNanoCoreUsage().Return(podStats, nil).AnyTimes() + if err := testutil.CustomCollectAndCompare(&volumeStatsCollector{statsProvider: mockStatsProvider}, strings.NewReader(want), metrics...); err != nil { + t.Errorf("unexpected collecting result:\n%s", err) + } +} + +func TestVolumeStatsCollectorWithNullVolumeStatus(t *testing.T) { // Fixed metadata on type and help text. We prepend this to every expected // output so we only have to modify a single place when doing adjustments. const metadata = ` @@ -86,29 +206,6 @@ func TestVolumeStatsCollector(t *testing.T) { }, }, }, - { - // Another pod references the same PVC (test-namespace/testpvc). - PodRef: statsapi.PodReference{Name: "test-pod-2", Namespace: "test-namespace", UID: "UID_test-pod"}, - StartTime: metav1.Now(), - VolumeStats: []statsapi.VolumeStats{ - { - FsStats: statsapi.FsStats{ - Time: metav1.Now(), - AvailableBytes: newUint64Pointer(5.663154176e+09), - CapacityBytes: newUint64Pointer(1.0434699264e+10), - UsedBytes: newUint64Pointer(4.21789696e+09), - InodesFree: newUint64Pointer(655344), - Inodes: newUint64Pointer(655360), - InodesUsed: newUint64Pointer(16), - }, - Name: "test", - PVCRef: &statsapi.PVCReference{ - Name: "testpvc", - Namespace: "testns", - }, - }, - }, - }, } want = metadata + ` diff --git a/pkg/kubelet/metrics/metrics.go b/pkg/kubelet/metrics/metrics.go index cbfdead20d3..53b04c05897 100644 --- a/pkg/kubelet/metrics/metrics.go +++ b/pkg/kubelet/metrics/metrics.go @@ -30,28 +30,29 @@ import ( // This const block defines the metric names for the kubelet metrics. const ( - KubeletSubsystem = "kubelet" - NodeNameKey = "node_name" - NodeLabelKey = "node" - PodWorkerDurationKey = "pod_worker_duration_seconds" - PodStartDurationKey = "pod_start_duration_seconds" - CgroupManagerOperationsKey = "cgroup_manager_duration_seconds" - PodWorkerStartDurationKey = "pod_worker_start_duration_seconds" - PLEGRelistDurationKey = "pleg_relist_duration_seconds" - PLEGDiscardEventsKey = "pleg_discard_events" - PLEGRelistIntervalKey = "pleg_relist_interval_seconds" - PLEGLastSeenKey = "pleg_last_seen_seconds" - EvictionsKey = "evictions" - EvictionStatsAgeKey = "eviction_stats_age_seconds" - PreemptionsKey = "preemptions" - VolumeStatsCapacityBytesKey = "volume_stats_capacity_bytes" - VolumeStatsAvailableBytesKey = "volume_stats_available_bytes" - VolumeStatsUsedBytesKey = "volume_stats_used_bytes" - VolumeStatsInodesKey = "volume_stats_inodes" - VolumeStatsInodesFreeKey = "volume_stats_inodes_free" - VolumeStatsInodesUsedKey = "volume_stats_inodes_used" - RunningPodsKey = "running_pods" - RunningContainersKey = "running_containers" + KubeletSubsystem = "kubelet" + NodeNameKey = "node_name" + NodeLabelKey = "node" + PodWorkerDurationKey = "pod_worker_duration_seconds" + PodStartDurationKey = "pod_start_duration_seconds" + CgroupManagerOperationsKey = "cgroup_manager_duration_seconds" + PodWorkerStartDurationKey = "pod_worker_start_duration_seconds" + PLEGRelistDurationKey = "pleg_relist_duration_seconds" + PLEGDiscardEventsKey = "pleg_discard_events" + PLEGRelistIntervalKey = "pleg_relist_interval_seconds" + PLEGLastSeenKey = "pleg_last_seen_seconds" + EvictionsKey = "evictions" + EvictionStatsAgeKey = "eviction_stats_age_seconds" + PreemptionsKey = "preemptions" + VolumeStatsCapacityBytesKey = "volume_stats_capacity_bytes" + VolumeStatsAvailableBytesKey = "volume_stats_available_bytes" + VolumeStatsUsedBytesKey = "volume_stats_used_bytes" + VolumeStatsInodesKey = "volume_stats_inodes" + VolumeStatsInodesFreeKey = "volume_stats_inodes_free" + VolumeStatsInodesUsedKey = "volume_stats_inodes_used" + VolumeStatsHealthStatusAbnormalKey = "volume_stats_health_status_abnormal" + RunningPodsKey = "running_pods" + RunningContainersKey = "running_containers" // Metrics keys of remote runtime operations RuntimeOperationsKey = "runtime_operations_total" RuntimeOperationsDurationKey = "runtime_operations_duration_seconds" diff --git a/pkg/kubelet/server/stats/volume_stat_calculator.go b/pkg/kubelet/server/stats/volume_stat_calculator.go index 63e9b1e7c39..f2580e0e679 100644 --- a/pkg/kubelet/server/stats/volume_stat_calculator.go +++ b/pkg/kubelet/server/stats/volume_stat_calculator.go @@ -177,7 +177,10 @@ func (s *volumeStatCalculator) calcAndStoreStats() { // parsePodVolumeStats converts (internal) volume.Metrics to (external) stats.VolumeStats structures func (s *volumeStatCalculator) parsePodVolumeStats(podName string, pvcRef *stats.PVCReference, metric *volume.Metrics, volSpec v1.Volume) stats.VolumeStats { - var available, capacity, used, inodes, inodesFree, inodesUsed uint64 + var ( + available, capacity, used, inodes, inodesFree, inodesUsed uint64 + ) + if metric.Available != nil { available = uint64(metric.Available.Value()) } @@ -197,10 +200,18 @@ func (s *volumeStatCalculator) parsePodVolumeStats(podName string, pvcRef *stats inodesUsed = uint64(metric.InodesUsed.Value()) } - return stats.VolumeStats{ + volumeStats := stats.VolumeStats{ Name: podName, PVCRef: pvcRef, FsStats: stats.FsStats{Time: metric.Time, AvailableBytes: &available, CapacityBytes: &capacity, UsedBytes: &used, Inodes: &inodes, InodesFree: &inodesFree, InodesUsed: &inodesUsed}, } + + if metric.Abnormal != nil { + volumeStats.VolumeHealthStats = &stats.VolumeHealthStats{ + Abnormal: *metric.Abnormal, + } + } + + return volumeStats } diff --git a/pkg/kubelet/server/stats/volume_stat_calculator_test.go b/pkg/kubelet/server/stats/volume_stat_calculator_test.go index 1edd0c8d7df..7af179659db 100644 --- a/pkg/kubelet/server/stats/volume_stat_calculator_test.go +++ b/pkg/kubelet/server/stats/volume_stat_calculator_test.go @@ -128,8 +128,9 @@ func TestPVCRef(t *testing.T) { assert.Len(t, append(vs.EphemeralVolumes, vs.PersistentVolumes...), 4) // Verify 'vol0' doesn't have a PVC reference assert.Contains(t, append(vs.EphemeralVolumes, vs.PersistentVolumes...), kubestats.VolumeStats{ - Name: vol0, - FsStats: expectedFSStats(), + Name: vol0, + FsStats: expectedFSStats(), + VolumeHealthStats: expectedVolumeHealthStats(), }) // Verify 'vol1' has a PVC reference assert.Contains(t, append(vs.EphemeralVolumes, vs.PersistentVolumes...), kubestats.VolumeStats{ @@ -138,16 +139,18 @@ func TestPVCRef(t *testing.T) { Name: pvcClaimName0, Namespace: namespace0, }, - FsStats: expectedFSStats(), + FsStats: expectedFSStats(), + VolumeHealthStats: expectedVolumeHealthStats(), }) - // Verify 'vol2' has a PVC reference + // // Verify 'vol2' has a PVC reference assert.Contains(t, append(vs.EphemeralVolumes, vs.PersistentVolumes...), kubestats.VolumeStats{ Name: vol2, PVCRef: &kubestats.PVCReference{ Name: pvcClaimName1, Namespace: namespace0, }, - FsStats: expectedBlockStats(), + FsStats: expectedBlockStats(), + VolumeHealthStats: expectedVolumeHealthStats(), }) // Verify 'vol3' has a PVC reference assert.Contains(t, append(vs.EphemeralVolumes, vs.PersistentVolumes...), kubestats.VolumeStats{ @@ -156,7 +159,8 @@ func TestPVCRef(t *testing.T) { Name: pName0 + "-" + vol3, Namespace: namespace0, }, - FsStats: expectedFSStats(), + FsStats: expectedFSStats(), + VolumeHealthStats: expectedVolumeHealthStats(), }) } @@ -202,8 +206,10 @@ func TestAbnormalVolumeEvent(t *testing.T) { } // Calculate stats for pod - volumeCondition.Message = "The target path of the volume doesn't exist" - volumeCondition.Abnormal = true + if volumeCondition != nil { + volumeCondition.Message = "The target path of the volume doesn't exist" + volumeCondition.Abnormal = true + } statsCalculator := newVolumeStatCalculator(mockStats, time.Minute, fakePod, &fakeEventRecorder) statsCalculator.calcAndStoreStats() @@ -233,16 +239,21 @@ func (v *fakeVolume) GetMetrics() (*volume.Metrics, error) { } func expectedMetrics() *volume.Metrics { - return &volume.Metrics{ + vMetrics := &volume.Metrics{ Available: resource.NewQuantity(available, resource.BinarySI), Capacity: resource.NewQuantity(capacity, resource.BinarySI), Used: resource.NewQuantity(available-capacity, resource.BinarySI), Inodes: resource.NewQuantity(inodesTotal, resource.BinarySI), InodesFree: resource.NewQuantity(inodesFree, resource.BinarySI), InodesUsed: resource.NewQuantity(inodesTotal-inodesFree, resource.BinarySI), - Message: &volumeCondition.Message, - Abnormal: &volumeCondition.Abnormal, } + + if volumeCondition != nil { + vMetrics.Message = &volumeCondition.Message + vMetrics.Abnormal = &volumeCondition.Abnormal + } + + return vMetrics } func expectedFSStats() kubestats.FsStats { @@ -263,6 +274,17 @@ func expectedFSStats() kubestats.FsStats { } } +func expectedVolumeHealthStats() *kubestats.VolumeHealthStats { + metric := expectedMetrics() + hs := &kubestats.VolumeHealthStats{} + + if metric != nil && metric.Abnormal != nil { + hs.Abnormal = *metric.Abnormal + } + + return hs +} + // Fake block-volume/metrics provider, block-devices have no inodes var _ volume.BlockVolume = &fakeBlockVolume{} @@ -279,11 +301,17 @@ func (v *fakeBlockVolume) GetMetrics() (*volume.Metrics, error) { } func expectedBlockMetrics() *volume.Metrics { - return &volume.Metrics{ + vMetrics := &volume.Metrics{ Available: resource.NewQuantity(available, resource.BinarySI), Capacity: resource.NewQuantity(capacity, resource.BinarySI), Used: resource.NewQuantity(available-capacity, resource.BinarySI), } + + if volumeCondition != nil { + vMetrics.Abnormal = &volumeCondition.Abnormal + } + + return vMetrics } func expectedBlockStats() kubestats.FsStats { diff --git a/staging/src/k8s.io/kubelet/pkg/apis/stats/v1alpha1/types.go b/staging/src/k8s.io/kubelet/pkg/apis/stats/v1alpha1/types.go index 5afbf452260..5e75fefe53f 100644 --- a/staging/src/k8s.io/kubelet/pkg/apis/stats/v1alpha1/types.go +++ b/staging/src/k8s.io/kubelet/pkg/apis/stats/v1alpha1/types.go @@ -271,6 +271,17 @@ type VolumeStats struct { // Reference to the PVC, if one exists // +optional PVCRef *PVCReference `json:"pvcRef,omitempty"` + + // VolumeHealthStats contains data about volume health + // +optional + VolumeHealthStats *VolumeHealthStats `json:"volumeHealthStats,omitempty"` +} + +// VolumeHealthStats contains data about volume health. +type VolumeHealthStats struct { + // Normal volumes are available for use and operating optimally. + // An abnormal volume does not meet these criteria. + Abnormal bool `json:"abnormal"` } // PVCReference contains enough information to describe the referenced PVC. diff --git a/test/e2e_node/summary_test.go b/test/e2e_node/summary_test.go index c0768072bcf..9af721b9193 100644 --- a/test/e2e_node/summary_test.go +++ b/test/e2e_node/summary_test.go @@ -230,8 +230,9 @@ var _ = SIGDescribe("Summary API [NodeConformance]", func() { }), "VolumeStats": gstruct.MatchAllElements(summaryObjectID, gstruct.Elements{ "test-empty-dir": gstruct.MatchAllFields(gstruct.Fields{ - "Name": gomega.Equal("test-empty-dir"), - "PVCRef": gomega.BeNil(), + "Name": gomega.Equal("test-empty-dir"), + "PVCRef": gomega.BeNil(), + "VolumeHealthStats": gomega.BeNil(), "FsStats": gstruct.MatchAllFields(gstruct.Fields{ "Time": recent(maxStatsAge), "AvailableBytes": fsCapacityBounds,