diff --git a/pkg/kubelet/metrics/metrics.go b/pkg/kubelet/metrics/metrics.go index 8f82c870da1..7dfa7882956 100644 --- a/pkg/kubelet/metrics/metrics.go +++ b/pkg/kubelet/metrics/metrics.go @@ -461,6 +461,26 @@ var ( }, []string{"runtime_handler"}, ) + + // RunningPodCount is a gauge that tracks the number of Pods currently running + RunningPodCount = metrics.NewGauge( + &metrics.GaugeOpts{ + Subsystem: KubeletSubsystem, + Name: "running_pod_count", + Help: "Number of pods currently running", + StabilityLevel: metrics.ALPHA, + }, + ) + // RunningContainerCount is a gauge that tracks the number of containers currently running + RunningContainerCount = metrics.NewGaugeVec( + &metrics.GaugeOpts{ + Subsystem: KubeletSubsystem, + Name: "running_container_count", + Help: "Number of containers currently running", + StabilityLevel: metrics.ALPHA, + }, + []string{"container_state"}, + ) ) var registerMetrics sync.Once @@ -475,7 +495,6 @@ func Register(containerCache kubecontainer.RuntimeCache, collectors ...prometheu legacyregistry.MustRegister(CgroupManagerDuration) legacyregistry.MustRegister(PodWorkerStartDuration) legacyregistry.MustRegister(ContainersPerPodCount) - legacyregistry.RawMustRegister(newPodAndContainerCollector(containerCache)) legacyregistry.MustRegister(PLEGRelistDuration) legacyregistry.MustRegister(PLEGDiscardEvents) legacyregistry.MustRegister(PLEGRelistInterval) @@ -498,6 +517,8 @@ func Register(containerCache kubecontainer.RuntimeCache, collectors ...prometheu legacyregistry.MustRegister(DeprecatedEvictionStatsAge) legacyregistry.MustRegister(DeprecatedDevicePluginRegistrationCount) legacyregistry.MustRegister(DeprecatedDevicePluginAllocationLatency) + legacyregistry.MustRegister(RunningContainerCount) + legacyregistry.MustRegister(RunningPodCount) if utilfeature.DefaultFeatureGate.Enabled(features.DynamicKubeletConfig) { legacyregistry.MustRegister(AssignedConfig) legacyregistry.MustRegister(ActiveConfig) @@ -520,60 +541,6 @@ func SinceInSeconds(start time.Time) float64 { return time.Since(start).Seconds() } -func newPodAndContainerCollector(containerCache kubecontainer.RuntimeCache) *podAndContainerCollector { - return &podAndContainerCollector{ - containerCache: containerCache, - } -} - -// Custom collector for current pod and container counts. -type podAndContainerCollector struct { - // Cache for accessing information about running containers. - containerCache kubecontainer.RuntimeCache -} - -// TODO(vmarmol): Split by source? -var ( - runningPodCountDesc = prometheus.NewDesc( - prometheus.BuildFQName("", KubeletSubsystem, "running_pod_count"), - "Number of pods currently running", - nil, nil) - runningContainerCountDesc = prometheus.NewDesc( - prometheus.BuildFQName("", KubeletSubsystem, "running_container_count"), - "Number of containers currently running", - nil, nil) -) - -// Describe implements Prometheus' Describe method from the Collector interface. It sends all -// available descriptions to the provided channel and retunrs once the last description has been sent. -func (pc *podAndContainerCollector) Describe(ch chan<- *prometheus.Desc) { - ch <- runningPodCountDesc - ch <- runningContainerCountDesc -} - -// Collect implements Prometheus' Collect method from the Collector interface. It's called by the Prometheus -// registry when collecting metrics. -func (pc *podAndContainerCollector) Collect(ch chan<- prometheus.Metric) { - runningPods, err := pc.containerCache.GetPods() - if err != nil { - klog.Warningf("Failed to get running container information while collecting metrics: %v", err) - return - } - - runningContainers := 0 - for _, p := range runningPods { - runningContainers += len(p.Containers) - } - ch <- prometheus.MustNewConstMetric( - runningPodCountDesc, - prometheus.GaugeValue, - float64(len(runningPods))) - ch <- prometheus.MustNewConstMetric( - runningContainerCountDesc, - prometheus.GaugeValue, - float64(runningContainers)) -} - const configMapAPIPathFmt = "/api/v1/namespaces/%s/configmaps/%s" func configLabels(source *corev1.NodeConfigSource) (map[string]string, error) { diff --git a/pkg/kubelet/pleg/BUILD b/pkg/kubelet/pleg/BUILD index 948fd69be20..c6bf885ce8a 100644 --- a/pkg/kubelet/pleg/BUILD +++ b/pkg/kubelet/pleg/BUILD @@ -33,9 +33,11 @@ go_test( deps = [ "//pkg/kubelet/container:go_default_library", "//pkg/kubelet/container/testing:go_default_library", + "//pkg/kubelet/metrics:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/types:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/clock:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/diff:go_default_library", + "//vendor/github.com/prometheus/client_model/go:go_default_library", "//vendor/github.com/stretchr/testify/assert:go_default_library", ], ) diff --git a/pkg/kubelet/pleg/generic.go b/pkg/kubelet/pleg/generic.go index c81bef81c67..9fda1be6b48 100644 --- a/pkg/kubelet/pleg/generic.go +++ b/pkg/kubelet/pleg/generic.go @@ -209,6 +209,8 @@ func (g *GenericPLEG) relist() { g.updateRelistTime(timestamp) pods := kubecontainer.Pods(podList) + // update running pod and container count + updateRunningPodAndContainerMetrics(pods) g.podRecords.setCurrent(pods) // Compare the old and the current pods, and generate events. @@ -431,6 +433,24 @@ func getContainerState(pod *kubecontainer.Pod, cid *kubecontainer.ContainerID) p return state } +func updateRunningPodAndContainerMetrics(pods []*kubecontainer.Pod) { + // Set the number of running pods in the parameter + metrics.RunningPodCount.Set(float64(len(pods))) + // intermediate map to store the count of each "container_state" + containerStateCount := make(map[string]int) + + for _, pod := range pods { + containers := pod.Containers + for _, container := range containers { + // update the corresponding "container_state" in map to set value for the gaugeVec metrics + containerStateCount[string(container.State)]++ + } + } + for key, value := range containerStateCount { + metrics.RunningContainerCount.WithLabelValues(key).Set(float64(value)) + } +} + func (pr podRecords) getOld(id types.UID) *kubecontainer.Pod { r, ok := pr[id] if !ok { diff --git a/pkg/kubelet/pleg/generic_test.go b/pkg/kubelet/pleg/generic_test.go index 7756882eb4d..46da6e39ed7 100644 --- a/pkg/kubelet/pleg/generic_test.go +++ b/pkg/kubelet/pleg/generic_test.go @@ -24,12 +24,14 @@ import ( "testing" "time" + dto "github.com/prometheus/client_model/go" "github.com/stretchr/testify/assert" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/clock" "k8s.io/apimachinery/pkg/util/diff" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" containertest "k8s.io/kubernetes/pkg/kubelet/container/testing" + "k8s.io/kubernetes/pkg/kubelet/metrics" ) const ( @@ -643,3 +645,49 @@ func TestRelistIPChange(t *testing.T) { assert.Exactly(t, []*PodLifecycleEvent{event}, actualEvents) } } + +func TestRunningPodAndContainerCount(t *testing.T) { + fakeRuntime := &containertest.FakeRuntime{} + runtimeCache, _ := kubecontainer.NewRuntimeCache(fakeRuntime) + metrics.Register(runtimeCache) + testPleg := newTestGenericPLEG() + pleg, runtime := testPleg.pleg, testPleg.runtime + + runtime.AllPodList = []*containertest.FakePod{ + {Pod: &kubecontainer.Pod{ + ID: "1234", + Containers: []*kubecontainer.Container{ + createTestContainer("c1", kubecontainer.ContainerStateRunning), + createTestContainer("c2", kubecontainer.ContainerStateUnknown), + createTestContainer("c3", kubecontainer.ContainerStateUnknown), + }, + }}, + {Pod: &kubecontainer.Pod{ + ID: "4567", + Containers: []*kubecontainer.Container{ + createTestContainer("c1", kubecontainer.ContainerStateExited), + }, + }}, + } + + pleg.relist() + + // assert for container count with label "running" + actualMetricRunningContainerCount := &dto.Metric{} + expectedMetricRunningContainerCount := float64(1) + metrics.RunningContainerCount.WithLabelValues(string(kubecontainer.ContainerStateRunning)).Write(actualMetricRunningContainerCount) + assert.Equal(t, expectedMetricRunningContainerCount, actualMetricRunningContainerCount.GetGauge().GetValue()) + + // assert for container count with label "unknown" + actualMetricUnknownContainerCount := &dto.Metric{} + expectedMetricUnknownContainerCount := float64(2) + metrics.RunningContainerCount.WithLabelValues(string(kubecontainer.ContainerStateUnknown)).Write(actualMetricUnknownContainerCount) + assert.Equal(t, expectedMetricUnknownContainerCount, actualMetricUnknownContainerCount.GetGauge().GetValue()) + + // assert for running pod count + actualMetricRunningPodCount := &dto.Metric{} + metrics.RunningPodCount.Write(actualMetricRunningPodCount) + expectedMetricRunningPodCount := float64(2) + assert.Equal(t, expectedMetricRunningPodCount, actualMetricRunningPodCount.GetGauge().GetValue()) + +}