Record pleg pod relist interval and latency

Relisting latency/interval affects how quick kubelet discovers changes. Record
the metrics in Prometheus to surface such information.
This commit is contained in:
Yu-Ju Hong 2015-12-22 10:07:19 -08:00
parent 4606171ad0
commit 7d180b337b
3 changed files with 38 additions and 0 deletions

View File

@ -35,6 +35,8 @@ const (
DockerOperationsKey = "docker_operations_latency_microseconds"
DockerErrorsKey = "docker_errors"
PodWorkerStartLatencyKey = "pod_worker_start_latency_microseconds"
PLEGRelistLatencyKey = "pleg_relist_latency_microseconds"
PLEGRelistIntervalKey = "pleg_relist_interval_microseconds"
)
var (
@ -105,6 +107,20 @@ var (
},
[]string{"operation_type"},
)
PLEGRelistLatency = prometheus.NewSummary(
prometheus.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: PLEGRelistLatencyKey,
Help: "Latency in microseconds for relisting pods in PLEG.",
},
)
PLEGRelistInterval = prometheus.NewSummary(
prometheus.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: PLEGRelistIntervalKey,
Help: "Interval in microseconds between relisting in PLEG.",
},
)
)
var registerMetrics sync.Once
@ -123,6 +139,8 @@ func Register(containerCache kubecontainer.RuntimeCache) {
prometheus.MustRegister(ContainersPerPodCount)
prometheus.MustRegister(DockerErrors)
prometheus.MustRegister(newPodAndContainerCollector(containerCache))
prometheus.MustRegister(PLEGRelistLatency)
prometheus.MustRegister(PLEGRelistInterval)
})
}

View File

@ -22,6 +22,7 @@ import (
"github.com/golang/glog"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/metrics"
"k8s.io/kubernetes/pkg/types"
"k8s.io/kubernetes/pkg/util"
)
@ -50,6 +51,8 @@ type GenericPLEG struct {
eventChannel chan *PodLifecycleEvent
// The internal cache for container information.
containers map[string]containerInfo
// Time of the last relisting.
lastRelistTime time.Time
}
type containerInfo struct {
@ -101,6 +104,17 @@ func generateEvent(podID types.UID, cid string, oldState, newState kubecontainer
// with the internal pods/containers, and generats events accordingly.
func (g *GenericPLEG) relist() {
glog.V(5).Infof("GenericPLEG: Relisting")
timestamp := time.Now()
if !g.lastRelistTime.IsZero() {
metrics.PLEGRelistInterval.Observe(metrics.SinceInMicroseconds(g.lastRelistTime))
}
defer func() {
// Update the relist time.
g.lastRelistTime = timestamp
metrics.PLEGRelistLatency.Observe(metrics.SinceInMicroseconds(timestamp))
}()
// Get all the pods.
pods, err := g.runtime.GetPods(true)
if err != nil {

View File

@ -74,6 +74,12 @@ var KnownKubeletMetrics = map[string][]string{
"kubelet_generate_pod_status_latency_microseconds": {"quantile"},
"kubelet_generate_pod_status_latency_microseconds_count": {},
"kubelet_generate_pod_status_latency_microseconds_sum": {},
"kubelet_pleg_relist_latency_microseconds": {"quantile"},
"kubelet_pleg_relist_latency_microseconds_sum": {},
"kubelet_pleg_relist_latency_microseconds_count": {},
"kubelet_pleg_relist_interval_microseconds": {"quantile"},
"kubelet_pleg_relist_interval_microseconds_sum": {},
"kubelet_pleg_relist_interval_microseconds_count": {},
"kubelet_pod_start_latency_microseconds": {"quantile"},
"kubelet_pod_start_latency_microseconds_count": {},
"kubelet_pod_start_latency_microseconds_sum": {},