mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-25 20:53:33 +00:00
Update running_pod_count and running_container_count metric
As already mentioned in this issue https://github.com/kubernetes/kubernetes/issues/79286, some metrics like "running_pod_count" and "running_container_count" uses non-standard prometheus metrics, this change converts them to be standard prometheus gauges Minor refactor in kubelet/pleg/generic.go and added some test for ruuning container and running pod metrics Fixed issues related to github CI pipeline failure * Updated bazel for new deps * Add comment for exported metrics variables,RuuningContainerCount and RunningPodCount * Specify keys explicitly in Guage metric instantation Fix go lint errors Replace "+=1" with "++", as reported by go lint Set container state as a label for the metrics "running_container_count" As per the metrics name "running_container_count" it should "ideally" be showing the number of containers in "running" state , but it was showing all the container count, irrespective of the state it is in. This commit adds a new label "container_running_state" to the metrics "running_container_count", which doesn't change the base metrics but adds the option to query the metrics with "container_state" such as "running"/"unknown/... remove unused methods reported by staticcheck Remove variables while instantiating gauge(vec) which are default set to nil Convert kubelet metrics(running_pod_count and running_container_count) to standard gauges and added label to running_container_count metrics. Currently kubelet metrics(running_pod_count and running_container_count) use non-standard prometheus collectors , this change converts them to standard prometheus gauges. Also this adds a new label(container_state) to running_container_count which does a breakdown of containers tracked by kubelet based on the containers' state(running/unknown/created/exited). Set statbility explicitly for running_pod_count and running_container_count and reformat test register metrics explicitly in test , so that they don't become no-op
This commit is contained in:
parent
06dc8cf4cb
commit
c02d49d775
@ -461,6 +461,26 @@ var (
|
|||||||
},
|
},
|
||||||
[]string{"runtime_handler"},
|
[]string{"runtime_handler"},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// RunningPodCount is a gauge that tracks the number of Pods currently running
|
||||||
|
RunningPodCount = metrics.NewGauge(
|
||||||
|
&metrics.GaugeOpts{
|
||||||
|
Subsystem: KubeletSubsystem,
|
||||||
|
Name: "running_pod_count",
|
||||||
|
Help: "Number of pods currently running",
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
// RunningContainerCount is a gauge that tracks the number of containers currently running
|
||||||
|
RunningContainerCount = metrics.NewGaugeVec(
|
||||||
|
&metrics.GaugeOpts{
|
||||||
|
Subsystem: KubeletSubsystem,
|
||||||
|
Name: "running_container_count",
|
||||||
|
Help: "Number of containers currently running",
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
|
},
|
||||||
|
[]string{"container_state"},
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
var registerMetrics sync.Once
|
var registerMetrics sync.Once
|
||||||
@ -475,7 +495,6 @@ func Register(containerCache kubecontainer.RuntimeCache, collectors ...prometheu
|
|||||||
legacyregistry.MustRegister(CgroupManagerDuration)
|
legacyregistry.MustRegister(CgroupManagerDuration)
|
||||||
legacyregistry.MustRegister(PodWorkerStartDuration)
|
legacyregistry.MustRegister(PodWorkerStartDuration)
|
||||||
legacyregistry.MustRegister(ContainersPerPodCount)
|
legacyregistry.MustRegister(ContainersPerPodCount)
|
||||||
legacyregistry.RawMustRegister(newPodAndContainerCollector(containerCache))
|
|
||||||
legacyregistry.MustRegister(PLEGRelistDuration)
|
legacyregistry.MustRegister(PLEGRelistDuration)
|
||||||
legacyregistry.MustRegister(PLEGDiscardEvents)
|
legacyregistry.MustRegister(PLEGDiscardEvents)
|
||||||
legacyregistry.MustRegister(PLEGRelistInterval)
|
legacyregistry.MustRegister(PLEGRelistInterval)
|
||||||
@ -498,6 +517,8 @@ func Register(containerCache kubecontainer.RuntimeCache, collectors ...prometheu
|
|||||||
legacyregistry.MustRegister(DeprecatedEvictionStatsAge)
|
legacyregistry.MustRegister(DeprecatedEvictionStatsAge)
|
||||||
legacyregistry.MustRegister(DeprecatedDevicePluginRegistrationCount)
|
legacyregistry.MustRegister(DeprecatedDevicePluginRegistrationCount)
|
||||||
legacyregistry.MustRegister(DeprecatedDevicePluginAllocationLatency)
|
legacyregistry.MustRegister(DeprecatedDevicePluginAllocationLatency)
|
||||||
|
legacyregistry.MustRegister(RunningContainerCount)
|
||||||
|
legacyregistry.MustRegister(RunningPodCount)
|
||||||
if utilfeature.DefaultFeatureGate.Enabled(features.DynamicKubeletConfig) {
|
if utilfeature.DefaultFeatureGate.Enabled(features.DynamicKubeletConfig) {
|
||||||
legacyregistry.MustRegister(AssignedConfig)
|
legacyregistry.MustRegister(AssignedConfig)
|
||||||
legacyregistry.MustRegister(ActiveConfig)
|
legacyregistry.MustRegister(ActiveConfig)
|
||||||
@ -520,60 +541,6 @@ func SinceInSeconds(start time.Time) float64 {
|
|||||||
return time.Since(start).Seconds()
|
return time.Since(start).Seconds()
|
||||||
}
|
}
|
||||||
|
|
||||||
func newPodAndContainerCollector(containerCache kubecontainer.RuntimeCache) *podAndContainerCollector {
|
|
||||||
return &podAndContainerCollector{
|
|
||||||
containerCache: containerCache,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Custom collector for current pod and container counts.
|
|
||||||
type podAndContainerCollector struct {
|
|
||||||
// Cache for accessing information about running containers.
|
|
||||||
containerCache kubecontainer.RuntimeCache
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO(vmarmol): Split by source?
|
|
||||||
var (
|
|
||||||
runningPodCountDesc = prometheus.NewDesc(
|
|
||||||
prometheus.BuildFQName("", KubeletSubsystem, "running_pod_count"),
|
|
||||||
"Number of pods currently running",
|
|
||||||
nil, nil)
|
|
||||||
runningContainerCountDesc = prometheus.NewDesc(
|
|
||||||
prometheus.BuildFQName("", KubeletSubsystem, "running_container_count"),
|
|
||||||
"Number of containers currently running",
|
|
||||||
nil, nil)
|
|
||||||
)
|
|
||||||
|
|
||||||
// Describe implements Prometheus' Describe method from the Collector interface. It sends all
|
|
||||||
// available descriptions to the provided channel and retunrs once the last description has been sent.
|
|
||||||
func (pc *podAndContainerCollector) Describe(ch chan<- *prometheus.Desc) {
|
|
||||||
ch <- runningPodCountDesc
|
|
||||||
ch <- runningContainerCountDesc
|
|
||||||
}
|
|
||||||
|
|
||||||
// Collect implements Prometheus' Collect method from the Collector interface. It's called by the Prometheus
|
|
||||||
// registry when collecting metrics.
|
|
||||||
func (pc *podAndContainerCollector) Collect(ch chan<- prometheus.Metric) {
|
|
||||||
runningPods, err := pc.containerCache.GetPods()
|
|
||||||
if err != nil {
|
|
||||||
klog.Warningf("Failed to get running container information while collecting metrics: %v", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
runningContainers := 0
|
|
||||||
for _, p := range runningPods {
|
|
||||||
runningContainers += len(p.Containers)
|
|
||||||
}
|
|
||||||
ch <- prometheus.MustNewConstMetric(
|
|
||||||
runningPodCountDesc,
|
|
||||||
prometheus.GaugeValue,
|
|
||||||
float64(len(runningPods)))
|
|
||||||
ch <- prometheus.MustNewConstMetric(
|
|
||||||
runningContainerCountDesc,
|
|
||||||
prometheus.GaugeValue,
|
|
||||||
float64(runningContainers))
|
|
||||||
}
|
|
||||||
|
|
||||||
const configMapAPIPathFmt = "/api/v1/namespaces/%s/configmaps/%s"
|
const configMapAPIPathFmt = "/api/v1/namespaces/%s/configmaps/%s"
|
||||||
|
|
||||||
func configLabels(source *corev1.NodeConfigSource) (map[string]string, error) {
|
func configLabels(source *corev1.NodeConfigSource) (map[string]string, error) {
|
||||||
|
@ -33,9 +33,11 @@ go_test(
|
|||||||
deps = [
|
deps = [
|
||||||
"//pkg/kubelet/container:go_default_library",
|
"//pkg/kubelet/container:go_default_library",
|
||||||
"//pkg/kubelet/container/testing:go_default_library",
|
"//pkg/kubelet/container/testing:go_default_library",
|
||||||
|
"//pkg/kubelet/metrics:go_default_library",
|
||||||
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
|
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
|
||||||
"//staging/src/k8s.io/apimachinery/pkg/util/clock:go_default_library",
|
"//staging/src/k8s.io/apimachinery/pkg/util/clock:go_default_library",
|
||||||
"//staging/src/k8s.io/apimachinery/pkg/util/diff:go_default_library",
|
"//staging/src/k8s.io/apimachinery/pkg/util/diff:go_default_library",
|
||||||
|
"//vendor/github.com/prometheus/client_model/go:go_default_library",
|
||||||
"//vendor/github.com/stretchr/testify/assert:go_default_library",
|
"//vendor/github.com/stretchr/testify/assert:go_default_library",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
@ -209,6 +209,8 @@ func (g *GenericPLEG) relist() {
|
|||||||
g.updateRelistTime(timestamp)
|
g.updateRelistTime(timestamp)
|
||||||
|
|
||||||
pods := kubecontainer.Pods(podList)
|
pods := kubecontainer.Pods(podList)
|
||||||
|
// update running pod and container count
|
||||||
|
updateRunningPodAndContainerMetrics(pods)
|
||||||
g.podRecords.setCurrent(pods)
|
g.podRecords.setCurrent(pods)
|
||||||
|
|
||||||
// Compare the old and the current pods, and generate events.
|
// Compare the old and the current pods, and generate events.
|
||||||
@ -431,6 +433,24 @@ func getContainerState(pod *kubecontainer.Pod, cid *kubecontainer.ContainerID) p
|
|||||||
return state
|
return state
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func updateRunningPodAndContainerMetrics(pods []*kubecontainer.Pod) {
|
||||||
|
// Set the number of running pods in the parameter
|
||||||
|
metrics.RunningPodCount.Set(float64(len(pods)))
|
||||||
|
// intermediate map to store the count of each "container_state"
|
||||||
|
containerStateCount := make(map[string]int)
|
||||||
|
|
||||||
|
for _, pod := range pods {
|
||||||
|
containers := pod.Containers
|
||||||
|
for _, container := range containers {
|
||||||
|
// update the corresponding "container_state" in map to set value for the gaugeVec metrics
|
||||||
|
containerStateCount[string(container.State)]++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for key, value := range containerStateCount {
|
||||||
|
metrics.RunningContainerCount.WithLabelValues(key).Set(float64(value))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (pr podRecords) getOld(id types.UID) *kubecontainer.Pod {
|
func (pr podRecords) getOld(id types.UID) *kubecontainer.Pod {
|
||||||
r, ok := pr[id]
|
r, ok := pr[id]
|
||||||
if !ok {
|
if !ok {
|
||||||
|
@ -24,12 +24,14 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
dto "github.com/prometheus/client_model/go"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"k8s.io/apimachinery/pkg/types"
|
"k8s.io/apimachinery/pkg/types"
|
||||||
"k8s.io/apimachinery/pkg/util/clock"
|
"k8s.io/apimachinery/pkg/util/clock"
|
||||||
"k8s.io/apimachinery/pkg/util/diff"
|
"k8s.io/apimachinery/pkg/util/diff"
|
||||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||||
containertest "k8s.io/kubernetes/pkg/kubelet/container/testing"
|
containertest "k8s.io/kubernetes/pkg/kubelet/container/testing"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@ -643,3 +645,49 @@ func TestRelistIPChange(t *testing.T) {
|
|||||||
assert.Exactly(t, []*PodLifecycleEvent{event}, actualEvents)
|
assert.Exactly(t, []*PodLifecycleEvent{event}, actualEvents)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestRunningPodAndContainerCount(t *testing.T) {
|
||||||
|
fakeRuntime := &containertest.FakeRuntime{}
|
||||||
|
runtimeCache, _ := kubecontainer.NewRuntimeCache(fakeRuntime)
|
||||||
|
metrics.Register(runtimeCache)
|
||||||
|
testPleg := newTestGenericPLEG()
|
||||||
|
pleg, runtime := testPleg.pleg, testPleg.runtime
|
||||||
|
|
||||||
|
runtime.AllPodList = []*containertest.FakePod{
|
||||||
|
{Pod: &kubecontainer.Pod{
|
||||||
|
ID: "1234",
|
||||||
|
Containers: []*kubecontainer.Container{
|
||||||
|
createTestContainer("c1", kubecontainer.ContainerStateRunning),
|
||||||
|
createTestContainer("c2", kubecontainer.ContainerStateUnknown),
|
||||||
|
createTestContainer("c3", kubecontainer.ContainerStateUnknown),
|
||||||
|
},
|
||||||
|
}},
|
||||||
|
{Pod: &kubecontainer.Pod{
|
||||||
|
ID: "4567",
|
||||||
|
Containers: []*kubecontainer.Container{
|
||||||
|
createTestContainer("c1", kubecontainer.ContainerStateExited),
|
||||||
|
},
|
||||||
|
}},
|
||||||
|
}
|
||||||
|
|
||||||
|
pleg.relist()
|
||||||
|
|
||||||
|
// assert for container count with label "running"
|
||||||
|
actualMetricRunningContainerCount := &dto.Metric{}
|
||||||
|
expectedMetricRunningContainerCount := float64(1)
|
||||||
|
metrics.RunningContainerCount.WithLabelValues(string(kubecontainer.ContainerStateRunning)).Write(actualMetricRunningContainerCount)
|
||||||
|
assert.Equal(t, expectedMetricRunningContainerCount, actualMetricRunningContainerCount.GetGauge().GetValue())
|
||||||
|
|
||||||
|
// assert for container count with label "unknown"
|
||||||
|
actualMetricUnknownContainerCount := &dto.Metric{}
|
||||||
|
expectedMetricUnknownContainerCount := float64(2)
|
||||||
|
metrics.RunningContainerCount.WithLabelValues(string(kubecontainer.ContainerStateUnknown)).Write(actualMetricUnknownContainerCount)
|
||||||
|
assert.Equal(t, expectedMetricUnknownContainerCount, actualMetricUnknownContainerCount.GetGauge().GetValue())
|
||||||
|
|
||||||
|
// assert for running pod count
|
||||||
|
actualMetricRunningPodCount := &dto.Metric{}
|
||||||
|
metrics.RunningPodCount.Write(actualMetricRunningPodCount)
|
||||||
|
expectedMetricRunningPodCount := float64(2)
|
||||||
|
assert.Equal(t, expectedMetricRunningPodCount, actualMetricRunningPodCount.GetGauge().GetValue())
|
||||||
|
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user