From 6b0d3d8df0f1e2f5b3bede4084c9f43fd757cb33 Mon Sep 17 00:00:00 2001 From: Victor Marmol Date: Tue, 2 Jun 2015 16:13:35 -0700 Subject: [PATCH] Add DockerErrors metric in the Kubelet. Allows the tracking of errors by Docker operation. --- .../dockertools/instrumented_docker.go | 71 ++++++++++++++----- pkg/kubelet/metrics/metrics.go | 9 +++ 2 files changed, 64 insertions(+), 16 deletions(-) diff --git a/pkg/kubelet/dockertools/instrumented_docker.go b/pkg/kubelet/dockertools/instrumented_docker.go index d55bcc6885d..d8bc844d4c9 100644 --- a/pkg/kubelet/dockertools/instrumented_docker.go +++ b/pkg/kubelet/dockertools/instrumented_docker.go @@ -39,114 +39,153 @@ func recordOperation(operation string, start time.Time) { metrics.DockerOperationsLatency.WithLabelValues(operation).Observe(metrics.SinceInMicroseconds(start)) } +// Record error for metric if an error occured. +func recordError(operation string, err error) { + if err != nil { + metrics.DockerErrors.WithLabelValues(operation).Inc() + } +} + func (in instrumentedDockerInterface) ListContainers(options docker.ListContainersOptions) ([]docker.APIContainers, error) { const operation = "list_containers" defer recordOperation(operation, time.Now()) - return in.client.ListContainers(options) + out, err := in.client.ListContainers(options) + recordError(operation, err) + return out, err } func (in instrumentedDockerInterface) InspectContainer(id string) (*docker.Container, error) { const operation = "inspect_container" defer recordOperation(operation, time.Now()) - return in.client.InspectContainer(id) + out, err := in.client.InspectContainer(id) + recordError(operation, err) + return out, err } func (in instrumentedDockerInterface) CreateContainer(opts docker.CreateContainerOptions) (*docker.Container, error) { const operation = "create_container" defer recordOperation(operation, time.Now()) - return in.client.CreateContainer(opts) + out, err := in.client.CreateContainer(opts) + recordError(operation, err) + return out, err } func (in instrumentedDockerInterface) StartContainer(id string, hostConfig *docker.HostConfig) error { const operation = "start_container" defer recordOperation(operation, time.Now()) - return in.client.StartContainer(id, hostConfig) + err := in.client.StartContainer(id, hostConfig) + recordError(operation, err) + return err } func (in instrumentedDockerInterface) StopContainer(id string, timeout uint) error { const operation = "stop_container" defer recordOperation(operation, time.Now()) - return in.client.StopContainer(id, timeout) + err := in.client.StopContainer(id, timeout) + recordError(operation, err) + return err } func (in instrumentedDockerInterface) RemoveContainer(opts docker.RemoveContainerOptions) error { const operation = "remove_container" defer recordOperation(operation, time.Now()) - return in.client.RemoveContainer(opts) + err := in.client.RemoveContainer(opts) + recordError(operation, err) + return err } func (in instrumentedDockerInterface) InspectImage(image string) (*docker.Image, error) { const operation = "inspect_image" defer recordOperation(operation, time.Now()) - return in.client.InspectImage(image) + out, err := in.client.InspectImage(image) + recordError(operation, err) + return out, err } func (in instrumentedDockerInterface) ListImages(opts docker.ListImagesOptions) ([]docker.APIImages, error) { const operation = "list_images" defer recordOperation(operation, time.Now()) - return in.client.ListImages(opts) + out, err := in.client.ListImages(opts) + recordError(operation, err) + return out, err } func (in instrumentedDockerInterface) PullImage(opts docker.PullImageOptions, auth docker.AuthConfiguration) error { const operation = "pull_image" defer recordOperation(operation, time.Now()) - return in.client.PullImage(opts, auth) + err := in.client.PullImage(opts, auth) + recordError(operation, err) + return err } func (in instrumentedDockerInterface) RemoveImage(image string) error { const operation = "remove_image" defer recordOperation(operation, time.Now()) - return in.client.RemoveImage(image) + err := in.client.RemoveImage(image) + recordError(operation, err) + return err } func (in instrumentedDockerInterface) Logs(opts docker.LogsOptions) error { const operation = "logs" defer recordOperation(operation, time.Now()) - return in.client.Logs(opts) + err := in.client.Logs(opts) + recordError(operation, err) + return err } func (in instrumentedDockerInterface) Version() (*docker.Env, error) { const operation = "version" defer recordOperation(operation, time.Now()) - return in.client.Version() + out, err := in.client.Version() + recordError(operation, err) + return out, err } func (in instrumentedDockerInterface) Info() (*docker.Env, error) { const operation = "info" defer recordOperation(operation, time.Now()) - return in.client.Info() + out, err := in.client.Info() + recordError(operation, err) + return out, err } func (in instrumentedDockerInterface) CreateExec(opts docker.CreateExecOptions) (*docker.Exec, error) { const operation = "create_exec" defer recordOperation(operation, time.Now()) - return in.client.CreateExec(opts) + out, err := in.client.CreateExec(opts) + recordError(operation, err) + return out, err } func (in instrumentedDockerInterface) StartExec(startExec string, opts docker.StartExecOptions) error { const operation = "start_exec" defer recordOperation(operation, time.Now()) - return in.client.StartExec(startExec, opts) + err := in.client.StartExec(startExec, opts) + recordError(operation, err) + return err } func (in instrumentedDockerInterface) InspectExec(id string) (*docker.ExecInspect, error) { const operation = "inspect_exec" defer recordOperation(operation, time.Now()) - return in.client.InspectExec(id) + out, err := in.client.InspectExec(id) + recordError(operation, err) + return out, err } diff --git a/pkg/kubelet/metrics/metrics.go b/pkg/kubelet/metrics/metrics.go index d4a15c99f2d..20ba153a946 100644 --- a/pkg/kubelet/metrics/metrics.go +++ b/pkg/kubelet/metrics/metrics.go @@ -58,6 +58,14 @@ var ( }, []string{"operation_type"}, ) + DockerErrors = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Subsystem: kubeletSubsystem, + Name: "docker_errors", + Help: "Cumulative number of Docker errors by operation type.", + }, + []string{"operation_type"}, + ) ) var registerMetrics sync.Once @@ -70,6 +78,7 @@ func Register(containerCache kubecontainer.RuntimeCache) { prometheus.MustRegister(DockerOperationsLatency) prometheus.MustRegister(SyncPodsLatency) prometheus.MustRegister(ContainersPerPodCount) + prometheus.MustRegister(DockerErrors) prometheus.MustRegister(newPodAndContainerCollector(containerCache)) }) }