Merge pull request #81573 from irajdeep/irajdeep/change_runningPod_runningContainer_metrics

Convert kubelet metrics(running_pod_count and running_container_count) from non-standard prometheus collectors to standard gauges
This commit is contained in:
Kubernetes Prow Robot 2019-08-29 18:08:42 -07:00 committed by GitHub
commit 7da563f0f8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 92 additions and 55 deletions

View File

@ -461,6 +461,26 @@ var (
},
[]string{"runtime_handler"},
)
// RunningPodCount is a gauge that tracks the number of Pods currently running
RunningPodCount = metrics.NewGauge(
&metrics.GaugeOpts{
Subsystem: KubeletSubsystem,
Name: "running_pod_count",
Help: "Number of pods currently running",
StabilityLevel: metrics.ALPHA,
},
)
// RunningContainerCount is a gauge that tracks the number of containers currently running
RunningContainerCount = metrics.NewGaugeVec(
&metrics.GaugeOpts{
Subsystem: KubeletSubsystem,
Name: "running_container_count",
Help: "Number of containers currently running",
StabilityLevel: metrics.ALPHA,
},
[]string{"container_state"},
)
)
var registerMetrics sync.Once
@ -475,7 +495,6 @@ func Register(containerCache kubecontainer.RuntimeCache, collectors ...prometheu
legacyregistry.MustRegister(CgroupManagerDuration)
legacyregistry.MustRegister(PodWorkerStartDuration)
legacyregistry.MustRegister(ContainersPerPodCount)
legacyregistry.RawMustRegister(newPodAndContainerCollector(containerCache))
legacyregistry.MustRegister(PLEGRelistDuration)
legacyregistry.MustRegister(PLEGDiscardEvents)
legacyregistry.MustRegister(PLEGRelistInterval)
@ -498,6 +517,8 @@ func Register(containerCache kubecontainer.RuntimeCache, collectors ...prometheu
legacyregistry.MustRegister(DeprecatedEvictionStatsAge)
legacyregistry.MustRegister(DeprecatedDevicePluginRegistrationCount)
legacyregistry.MustRegister(DeprecatedDevicePluginAllocationLatency)
legacyregistry.MustRegister(RunningContainerCount)
legacyregistry.MustRegister(RunningPodCount)
if utilfeature.DefaultFeatureGate.Enabled(features.DynamicKubeletConfig) {
legacyregistry.MustRegister(AssignedConfig)
legacyregistry.MustRegister(ActiveConfig)
@ -520,60 +541,6 @@ func SinceInSeconds(start time.Time) float64 {
return time.Since(start).Seconds()
}
func newPodAndContainerCollector(containerCache kubecontainer.RuntimeCache) *podAndContainerCollector {
return &podAndContainerCollector{
containerCache: containerCache,
}
}
// Custom collector for current pod and container counts.
type podAndContainerCollector struct {
// Cache for accessing information about running containers.
containerCache kubecontainer.RuntimeCache
}
// TODO(vmarmol): Split by source?
var (
runningPodCountDesc = prometheus.NewDesc(
prometheus.BuildFQName("", KubeletSubsystem, "running_pod_count"),
"Number of pods currently running",
nil, nil)
runningContainerCountDesc = prometheus.NewDesc(
prometheus.BuildFQName("", KubeletSubsystem, "running_container_count"),
"Number of containers currently running",
nil, nil)
)
// Describe implements Prometheus' Describe method from the Collector interface. It sends all
// available descriptions to the provided channel and retunrs once the last description has been sent.
func (pc *podAndContainerCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- runningPodCountDesc
ch <- runningContainerCountDesc
}
// Collect implements Prometheus' Collect method from the Collector interface. It's called by the Prometheus
// registry when collecting metrics.
func (pc *podAndContainerCollector) Collect(ch chan<- prometheus.Metric) {
runningPods, err := pc.containerCache.GetPods()
if err != nil {
klog.Warningf("Failed to get running container information while collecting metrics: %v", err)
return
}
runningContainers := 0
for _, p := range runningPods {
runningContainers += len(p.Containers)
}
ch <- prometheus.MustNewConstMetric(
runningPodCountDesc,
prometheus.GaugeValue,
float64(len(runningPods)))
ch <- prometheus.MustNewConstMetric(
runningContainerCountDesc,
prometheus.GaugeValue,
float64(runningContainers))
}
const configMapAPIPathFmt = "/api/v1/namespaces/%s/configmaps/%s"
func configLabels(source *corev1.NodeConfigSource) (map[string]string, error) {

View File

@ -33,9 +33,11 @@ go_test(
deps = [
"//pkg/kubelet/container:go_default_library",
"//pkg/kubelet/container/testing:go_default_library",
"//pkg/kubelet/metrics:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/clock:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/diff:go_default_library",
"//vendor/github.com/prometheus/client_model/go:go_default_library",
"//vendor/github.com/stretchr/testify/assert:go_default_library",
],
)

View File

@ -209,6 +209,8 @@ func (g *GenericPLEG) relist() {
g.updateRelistTime(timestamp)
pods := kubecontainer.Pods(podList)
// update running pod and container count
updateRunningPodAndContainerMetrics(pods)
g.podRecords.setCurrent(pods)
// Compare the old and the current pods, and generate events.
@ -431,6 +433,24 @@ func getContainerState(pod *kubecontainer.Pod, cid *kubecontainer.ContainerID) p
return state
}
func updateRunningPodAndContainerMetrics(pods []*kubecontainer.Pod) {
// Set the number of running pods in the parameter
metrics.RunningPodCount.Set(float64(len(pods)))
// intermediate map to store the count of each "container_state"
containerStateCount := make(map[string]int)
for _, pod := range pods {
containers := pod.Containers
for _, container := range containers {
// update the corresponding "container_state" in map to set value for the gaugeVec metrics
containerStateCount[string(container.State)]++
}
}
for key, value := range containerStateCount {
metrics.RunningContainerCount.WithLabelValues(key).Set(float64(value))
}
}
func (pr podRecords) getOld(id types.UID) *kubecontainer.Pod {
r, ok := pr[id]
if !ok {

View File

@ -24,12 +24,14 @@ import (
"testing"
"time"
dto "github.com/prometheus/client_model/go"
"github.com/stretchr/testify/assert"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/clock"
"k8s.io/apimachinery/pkg/util/diff"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
containertest "k8s.io/kubernetes/pkg/kubelet/container/testing"
"k8s.io/kubernetes/pkg/kubelet/metrics"
)
const (
@ -643,3 +645,49 @@ func TestRelistIPChange(t *testing.T) {
assert.Exactly(t, []*PodLifecycleEvent{event}, actualEvents)
}
}
func TestRunningPodAndContainerCount(t *testing.T) {
fakeRuntime := &containertest.FakeRuntime{}
runtimeCache, _ := kubecontainer.NewRuntimeCache(fakeRuntime)
metrics.Register(runtimeCache)
testPleg := newTestGenericPLEG()
pleg, runtime := testPleg.pleg, testPleg.runtime
runtime.AllPodList = []*containertest.FakePod{
{Pod: &kubecontainer.Pod{
ID: "1234",
Containers: []*kubecontainer.Container{
createTestContainer("c1", kubecontainer.ContainerStateRunning),
createTestContainer("c2", kubecontainer.ContainerStateUnknown),
createTestContainer("c3", kubecontainer.ContainerStateUnknown),
},
}},
{Pod: &kubecontainer.Pod{
ID: "4567",
Containers: []*kubecontainer.Container{
createTestContainer("c1", kubecontainer.ContainerStateExited),
},
}},
}
pleg.relist()
// assert for container count with label "running"
actualMetricRunningContainerCount := &dto.Metric{}
expectedMetricRunningContainerCount := float64(1)
metrics.RunningContainerCount.WithLabelValues(string(kubecontainer.ContainerStateRunning)).Write(actualMetricRunningContainerCount)
assert.Equal(t, expectedMetricRunningContainerCount, actualMetricRunningContainerCount.GetGauge().GetValue())
// assert for container count with label "unknown"
actualMetricUnknownContainerCount := &dto.Metric{}
expectedMetricUnknownContainerCount := float64(2)
metrics.RunningContainerCount.WithLabelValues(string(kubecontainer.ContainerStateUnknown)).Write(actualMetricUnknownContainerCount)
assert.Equal(t, expectedMetricUnknownContainerCount, actualMetricUnknownContainerCount.GetGauge().GetValue())
// assert for running pod count
actualMetricRunningPodCount := &dto.Metric{}
metrics.RunningPodCount.Write(actualMetricRunningPodCount)
expectedMetricRunningPodCount := float64(2)
assert.Equal(t, expectedMetricRunningPodCount, actualMetricRunningPodCount.GetGauge().GetValue())
}