diff --git a/pkg/kubelet/prober/prober_manager.go b/pkg/kubelet/prober/prober_manager.go index 62106b95dba..0a88c146ba5 100644 --- a/pkg/kubelet/prober/prober_manager.go +++ b/pkg/kubelet/prober/prober_manager.go @@ -32,12 +32,29 @@ import ( "k8s.io/kubernetes/pkg/kubelet/util/format" ) -// ProberResults stores the results of a probe as prometheus metrics. -var ProberResults = prometheus.NewGaugeVec( +// ProberResults stores the cumulative number of a probe by result as prometheus metrics. +var ProberResults = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Subsystem: "prober", + Name: "probe_total", + Help: "Cumulative number of a liveness or readiness probe for a container by result.", + }, + []string{"probe_type", + "result", + "container", + "pod", + "namespace", + "pod_uid"}, +) + +// DeprecatedProberResults stores the results of a probe as prometheus metrics. +// This metrics is deprecated, will be removed in a future release. +// Please convert to the metrics of counter type above. +var DeprecatedProberResults = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: "prober", Name: "probe_result", - Help: "The result of a liveness or readiness probe for a container.", + Help: "(Deprecated) The result of a liveness or readiness probe for a container.", }, []string{"probe_type", "container_name", @@ -130,6 +147,10 @@ type probeType int const ( liveness probeType = iota readiness + + probeResultSuccessful string = "successful" + probeResultFailed string = "failed" + probeResultUnknown string = "unknown" ) // For debugging. diff --git a/pkg/kubelet/prober/worker.go b/pkg/kubelet/prober/worker.go index 295f7c40173..6dd0d05d955 100644 --- a/pkg/kubelet/prober/worker.go +++ b/pkg/kubelet/prober/worker.go @@ -69,7 +69,10 @@ type worker struct { // proberResultsMetricLabels holds the labels attached to this worker // for the ProberResults metric. - proberResultsMetricLabels prometheus.Labels + proberResultsMetricLabels prometheus.Labels + proberResultsSuccessfulMetricLabels prometheus.Labels + proberResultsFailedMetricLabels prometheus.Labels + proberResultsUnknownMetricLabels prometheus.Labels } // Creates and starts a new probe worker. @@ -98,16 +101,27 @@ func newWorker( w.initialValue = results.Success } - w.proberResultsMetricLabels = prometheus.Labels{ - "probe_type": w.probeType.String(), - "container_name": w.container.Name, - "container": w.container.Name, - "pod_name": w.pod.Name, - "pod": w.pod.Name, - "namespace": w.pod.Namespace, - "pod_uid": string(w.pod.UID), + basicMetricLabels := prometheus.Labels{ + "probe_type": w.probeType.String(), + "container": w.container.Name, + "pod": w.pod.Name, + "namespace": w.pod.Namespace, + "pod_uid": string(w.pod.UID), } + w.proberResultsMetricLabels = deepCopyPrometheusLabels(basicMetricLabels) + w.proberResultsMetricLabels["container_name"] = w.container.Name + w.proberResultsMetricLabels["pod_name"] = w.pod.Name + + w.proberResultsSuccessfulMetricLabels = deepCopyPrometheusLabels(basicMetricLabels) + w.proberResultsSuccessfulMetricLabels["result"] = probeResultSuccessful + + w.proberResultsFailedMetricLabels = deepCopyPrometheusLabels(basicMetricLabels) + w.proberResultsFailedMetricLabels["result"] = probeResultFailed + + w.proberResultsUnknownMetricLabels = deepCopyPrometheusLabels(basicMetricLabels) + w.proberResultsUnknownMetricLabels["result"] = probeResultUnknown + return w } @@ -129,7 +143,10 @@ func (w *worker) run() { } w.probeManager.removeWorker(w.pod.UID, w.container.Name, w.probeType) - ProberResults.Delete(w.proberResultsMetricLabels) + ProberResults.Delete(w.proberResultsSuccessfulMetricLabels) + ProberResults.Delete(w.proberResultsFailedMetricLabels) + ProberResults.Delete(w.proberResultsUnknownMetricLabels) + DeprecatedProberResults.Delete(w.proberResultsMetricLabels) }() probeLoop: @@ -220,6 +237,15 @@ func (w *worker) doProbe() (keepGoing bool) { return true } + switch result { + case results.Success: + ProberResults.With(w.proberResultsSuccessfulMetricLabels).Inc() + case results.Failure: + ProberResults.With(w.proberResultsFailedMetricLabels).Inc() + default: + ProberResults.With(w.proberResultsUnknownMetricLabels).Inc() + } + if w.lastResult == result { w.resultRun++ } else { @@ -234,7 +260,7 @@ func (w *worker) doProbe() (keepGoing bool) { } w.resultsManager.Set(w.containerID, result, w.pod) - ProberResults.With(w.proberResultsMetricLabels).Set(result.ToPrometheusType()) + DeprecatedProberResults.With(w.proberResultsMetricLabels).Set(result.ToPrometheusType()) if w.probeType == liveness && result == results.Failure { // The container fails a liveness check, it will need to be restarted. @@ -247,3 +273,11 @@ func (w *worker) doProbe() (keepGoing bool) { return true } + +func deepCopyPrometheusLabels(m prometheus.Labels) prometheus.Labels { + ret := make(prometheus.Labels, len(m)) + for k, v := range m { + ret[k] = v + } + return ret +} diff --git a/pkg/kubelet/server/server.go b/pkg/kubelet/server/server.go index a4f75116558..a384c92a0c0 100644 --- a/pkg/kubelet/server/server.go +++ b/pkg/kubelet/server/server.go @@ -321,6 +321,7 @@ func (s *Server) InstallDefaultHandlers() { // prober metrics are exposed under a different endpoint p := prometheus.NewRegistry() p.MustRegister(prober.ProberResults) + p.MustRegister(prober.DeprecatedProberResults) s.restfulCont.Handle(proberMetricsPath, promhttp.HandlerFor(p, promhttp.HandlerOpts{ErrorHandling: promhttp.ContinueOnError}), )