Merge pull request #9523 from bprashanth/kl_stats

Scrape /metrics of kubelets from e2e tests
This commit is contained in:
Saad Ali
2015-06-17 11:30:28 -07:00
4 changed files with 261 additions and 15 deletions

View File

@@ -35,6 +35,7 @@ import (
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/record"
kubecontainer "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/container"
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/lifecycle"
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/metrics"
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/network"
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/prober"
kubeletTypes "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/types"
@@ -722,6 +723,10 @@ func (dm *DockerManager) GetContainers(all bool) ([]*kubecontainer.Container, er
}
func (dm *DockerManager) GetPods(all bool) ([]*kubecontainer.Pod, error) {
start := time.Now()
defer func() {
metrics.ContainerManagerLatency.WithLabelValues("GetPods").Observe(metrics.SinceInMicroseconds(start))
}()
pods := make(map[types.UID]*kubecontainer.Pod)
var result []*kubecontainer.Pod
@@ -1159,6 +1164,11 @@ func (dm *DockerManager) killContainer(containerID types.UID) error {
// Run a single container from a pod. Returns the docker container ID
func (dm *DockerManager) runContainerInPod(pod *api.Pod, container *api.Container, netMode, ipcMode string) (kubeletTypes.DockerID, error) {
start := time.Now()
defer func() {
metrics.ContainerManagerLatency.WithLabelValues("runContainerInPod").Observe(metrics.SinceInMicroseconds(start))
}()
ref, err := kubecontainer.GenerateContainerRef(pod, container)
if err != nil {
glog.Errorf("Couldn't make a ref to pod %v, container %v: '%v'", pod.Name, container.Name, err)
@@ -1224,6 +1234,10 @@ func (dm *DockerManager) runContainerInPod(pod *api.Pod, container *api.Containe
// createPodInfraContainer starts the pod infra container for a pod. Returns the docker container ID of the newly created container.
func (dm *DockerManager) createPodInfraContainer(pod *api.Pod) (kubeletTypes.DockerID, error) {
start := time.Now()
defer func() {
metrics.ContainerManagerLatency.WithLabelValues("createPodInfraContainer").Observe(metrics.SinceInMicroseconds(start))
}()
// Use host networking if specified.
netNamespace := ""
var ports []api.ContainerPort
@@ -1299,6 +1313,11 @@ type PodContainerChangesSpec struct {
}
func (dm *DockerManager) computePodContainerChanges(pod *api.Pod, runningPod kubecontainer.Pod, podStatus api.PodStatus) (PodContainerChangesSpec, error) {
start := time.Now()
defer func() {
metrics.ContainerManagerLatency.WithLabelValues("computePodContainerChanges").Observe(metrics.SinceInMicroseconds(start))
}()
podFullName := kubecontainer.GetPodFullName(pod)
uid := pod.UID
glog.V(4).Infof("Syncing Pod %+v, podFullName: %q, uid: %q", pod, podFullName, uid)
@@ -1445,6 +1464,12 @@ func (dm *DockerManager) pullImage(pod *api.Pod, container *api.Container, pullS
// Sync the running pod to match the specified desired pod.
func (dm *DockerManager) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, podStatus api.PodStatus, pullSecrets []api.Secret) error {
start := time.Now()
defer func() {
metrics.ContainerManagerLatency.WithLabelValues("SyncPod").Observe(metrics.SinceInMicroseconds(start))
}()
podFullName := kubecontainer.GetPodFullName(pod)
containerChanges, err := dm.computePodContainerChanges(pod, runningPod, podStatus)
glog.V(3).Infof("Got container changes for pod %q: %+v", podFullName, containerChanges)

View File

@@ -1092,6 +1092,7 @@ func (kl *Kubelet) makePodDataDirs(pod *api.Pod) error {
func (kl *Kubelet) syncPod(pod *api.Pod, mirrorPod *api.Pod, runningPod kubecontainer.Pod, updateType SyncPodType) error {
podFullName := kubecontainer.GetPodFullName(pod)
uid := pod.UID
start := time.Now()
// Before returning, regenerate status and store it in the cache.
defer func() {
@@ -1108,6 +1109,11 @@ func (kl *Kubelet) syncPod(pod *api.Pod, mirrorPod *api.Pod, runningPod kubecont
if mirrorPod != nil {
podToUpdate = mirrorPod
}
existingStatus, ok := kl.statusManager.GetPodStatus(podFullName)
if !ok || existingStatus.Phase == api.PodPending && status.Phase == api.PodRunning {
// TODO: Check the pod annotation instead of using `start`
metrics.PodStartLatency.Observe(metrics.SinceInMicroseconds(start))
}
kl.statusManager.SetPodStatus(podToUpdate, status)
}
}()
@@ -1379,7 +1385,7 @@ func (kl *Kubelet) SyncPods(allPods []*api.Pod, podSyncTypes map[types.UID]SyncP
// Run the sync in an async manifest worker.
kl.podWorkers.UpdatePod(pod, mirrorPods[podFullName], func() {
metrics.SyncPodLatency.WithLabelValues(podSyncTypes[pod.UID].String()).Observe(metrics.SinceInMicroseconds(start))
metrics.PodWorkerLatency.WithLabelValues(podSyncTypes[pod.UID].String()).Observe(metrics.SinceInMicroseconds(start))
})
// Note the number of containers for new pods.
@@ -2094,6 +2100,12 @@ func getPodReadyCondition(spec *api.PodSpec, statuses []api.ContainerStatus) []a
// By passing the pod directly, this method avoids pod lookup, which requires
// grabbing a lock.
func (kl *Kubelet) generatePodStatus(pod *api.Pod) (api.PodStatus, error) {
start := time.Now()
defer func() {
metrics.PodStatusLatency.Observe(metrics.SinceInMicroseconds(start))
}()
podFullName := kubecontainer.GetPodFullName(pod)
glog.V(3).Infof("Generating status for %q", podFullName)

View File

@@ -25,43 +25,74 @@ import (
"github.com/prometheus/client_golang/prometheus"
)
const kubeletSubsystem = "kubelet"
const (
KubeletSubsystem = "kubelet"
PodWorkerLatencyKey = "pod_worker_latency_microseconds"
SyncPodsLatencyKey = "sync_pods_latency_microseconds"
PodStartLatencyKey = "pod_start_latency_microseconds"
PodStatusLatencyKey = "generate_pod_status_latency_microseconds"
ContainerManagerOperationsKey = "container_manager_latency_microseconds"
DockerOperationsKey = "docker_operations_latency_microseconds"
DockerErrorsKey = "docker_errors"
)
var (
ContainersPerPodCount = prometheus.NewSummary(
prometheus.SummaryOpts{
Subsystem: kubeletSubsystem,
Subsystem: KubeletSubsystem,
Name: "containers_per_pod_count",
Help: "The number of containers per pod.",
},
)
SyncPodLatency = prometheus.NewSummaryVec(
PodWorkerLatency = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Subsystem: kubeletSubsystem,
Name: "sync_pod_latency_microseconds",
Subsystem: KubeletSubsystem,
Name: PodWorkerLatencyKey,
Help: "Latency in microseconds to sync a single pod. Broken down by operation type: create, update, or sync",
},
[]string{"operation_type"},
)
SyncPodsLatency = prometheus.NewSummary(
prometheus.SummaryOpts{
Subsystem: kubeletSubsystem,
Name: "sync_pods_latency_microseconds",
Subsystem: KubeletSubsystem,
Name: SyncPodsLatencyKey,
Help: "Latency in microseconds to sync all pods.",
},
)
PodStartLatency = prometheus.NewSummary(
prometheus.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: PodStartLatencyKey,
Help: "Latency in microseconds for a single pod to go from pending to running. Broken down by podname.",
},
)
PodStatusLatency = prometheus.NewSummary(
prometheus.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: PodStatusLatencyKey,
Help: "Latency in microseconds to generate status for a single pod.",
},
)
ContainerManagerLatency = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: ContainerManagerOperationsKey,
Help: "Latency in microseconds for container manager operations. Broken down by method.",
},
[]string{"operation_type"},
)
DockerOperationsLatency = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Subsystem: kubeletSubsystem,
Name: "docker_operations_latency_microseconds",
Subsystem: KubeletSubsystem,
Name: DockerOperationsKey,
Help: "Latency in microseconds of Docker operations. Broken down by operation type.",
},
[]string{"operation_type"},
)
DockerErrors = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: kubeletSubsystem,
Name: "docker_errors",
Subsystem: KubeletSubsystem,
Name: DockerErrorsKey,
Help: "Cumulative number of Docker errors by operation type.",
},
[]string{"operation_type"},
@@ -74,8 +105,11 @@ var registerMetrics sync.Once
func Register(containerCache kubecontainer.RuntimeCache) {
// Register the metrics.
registerMetrics.Do(func() {
prometheus.MustRegister(SyncPodLatency)
prometheus.MustRegister(PodWorkerLatency)
prometheus.MustRegister(PodStartLatency)
prometheus.MustRegister(PodStatusLatency)
prometheus.MustRegister(DockerOperationsLatency)
prometheus.MustRegister(ContainerManagerLatency)
prometheus.MustRegister(SyncPodsLatency)
prometheus.MustRegister(ContainersPerPodCount)
prometheus.MustRegister(DockerErrors)
@@ -103,11 +137,11 @@ type podAndContainerCollector struct {
// TODO(vmarmol): Split by source?
var (
runningPodCountDesc = prometheus.NewDesc(
prometheus.BuildFQName("", kubeletSubsystem, "running_pod_count"),
prometheus.BuildFQName("", KubeletSubsystem, "running_pod_count"),
"Number of pods currently running",
nil, nil)
runningContainerCountDesc = prometheus.NewDesc(
prometheus.BuildFQName("", kubeletSubsystem, "running_container_count"),
prometheus.BuildFQName("", KubeletSubsystem, "running_container_count"),
"Number of containers currently running",
nil, nil)
)