Merge pull request #104484 from jackfrancis/prober-duration-metrics

add container probe duration metrics
This commit is contained in:
Kubernetes Prow Robot 2022-07-29 13:17:11 -07:00 committed by GitHub
commit 126c07604d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 100 additions and 1 deletions

View File

@ -48,6 +48,20 @@ var ProberResults = metrics.NewCounterVec(
"pod_uid"},
)
// ProberDuration stores the duration of a successful probe lifecycle by result as prometheus metrics.
var ProberDuration = metrics.NewHistogramVec(
&metrics.HistogramOpts{
Subsystem: "prober",
Name: "probe_duration_seconds",
Help: "Duration in seconds for a probe response.",
StabilityLevel: metrics.ALPHA,
},
[]string{"probe_type",
"container",
"pod",
"namespace"},
)
// Manager manages pod probing. It creates a probe "worker" for every container that specifies a
// probe (AddPod). The worker periodically probes its assigned container and caches the results. The
// manager use the cached probe results to set the appropriate Ready state in the PodStatus when

View File

@ -17,7 +17,9 @@ limitations under the License.
package prober
import (
"fmt"
"math/rand"
"strings"
"time"
v1 "k8s.io/api/core/v1"
@ -25,6 +27,7 @@ import (
"k8s.io/component-base/metrics"
"k8s.io/klog/v2"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
"k8s.io/kubernetes/pkg/apis/apps"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/prober/results"
)
@ -74,6 +77,10 @@ type worker struct {
proberResultsSuccessfulMetricLabels metrics.Labels
proberResultsFailedMetricLabels metrics.Labels
proberResultsUnknownMetricLabels metrics.Labels
// proberDurationMetricLabels holds the labels attached to this worker
// for the ProberDuration metric by result.
proberDurationSuccessfulMetricLabels metrics.Labels
proberDurationUnknownMetricLabels metrics.Labels
}
// Creates and starts a new probe worker.
@ -107,14 +114,23 @@ func newWorker(
w.initialValue = results.Unknown
}
podName := getPodLabelName(w.pod)
basicMetricLabels := metrics.Labels{
"probe_type": w.probeType.String(),
"container": w.container.Name,
"pod": w.pod.Name,
"pod": podName,
"namespace": w.pod.Namespace,
"pod_uid": string(w.pod.UID),
}
proberDurationLabels := metrics.Labels{
"probe_type": w.probeType.String(),
"container": w.container.Name,
"pod": podName,
"namespace": w.pod.Namespace,
}
w.proberResultsSuccessfulMetricLabels = deepCopyPrometheusLabels(basicMetricLabels)
w.proberResultsSuccessfulMetricLabels["result"] = probeResultSuccessful
@ -124,6 +140,9 @@ func newWorker(
w.proberResultsUnknownMetricLabels = deepCopyPrometheusLabels(basicMetricLabels)
w.proberResultsUnknownMetricLabels["result"] = probeResultUnknown
w.proberDurationSuccessfulMetricLabels = deepCopyPrometheusLabels(proberDurationLabels)
w.proberDurationUnknownMetricLabels = deepCopyPrometheusLabels(proberDurationLabels)
return w
}
@ -151,6 +170,8 @@ func (w *worker) run() {
ProberResults.Delete(w.proberResultsSuccessfulMetricLabels)
ProberResults.Delete(w.proberResultsFailedMetricLabels)
ProberResults.Delete(w.proberResultsUnknownMetricLabels)
ProberDuration.Delete(w.proberDurationSuccessfulMetricLabels)
ProberDuration.Delete(w.proberDurationUnknownMetricLabels)
}()
probeLoop:
@ -181,6 +202,7 @@ func (w *worker) doProbe() (keepGoing bool) {
defer func() { recover() }() // Actually eat panics (HandleCrash takes care of logging)
defer runtime.HandleCrash(func(_ interface{}) { keepGoing = true })
startTime := time.Now()
status, ok := w.probeManager.statusManager.GetPodStatus(w.pod.UID)
if !ok {
// Either the pod has not been created yet, or it was already deleted.
@ -271,10 +293,12 @@ func (w *worker) doProbe() (keepGoing bool) {
switch result {
case results.Success:
ProberResults.With(w.proberResultsSuccessfulMetricLabels).Inc()
ProberDuration.With(w.proberDurationSuccessfulMetricLabels).Observe(time.Since(startTime).Seconds())
case results.Failure:
ProberResults.With(w.proberResultsFailedMetricLabels).Inc()
default:
ProberResults.With(w.proberResultsUnknownMetricLabels).Inc()
ProberDuration.With(w.proberDurationUnknownMetricLabels).Observe(time.Since(startTime).Seconds())
}
if w.lastResult == result {
@ -311,3 +335,15 @@ func deepCopyPrometheusLabels(m metrics.Labels) metrics.Labels {
}
return ret
}
func getPodLabelName(pod *v1.Pod) string {
podName := pod.Name
if pod.GenerateName != "" {
podNameSlice := strings.Split(pod.Name, "-")
podName = strings.Join(podNameSlice[:len(podNameSlice)-1], "-")
if label, ok := pod.GetLabels()[apps.DefaultDeploymentUniqueLabelKey]; ok {
podName = strings.ReplaceAll(podName, fmt.Sprintf("-%s", label), "")
}
}
return podName
}

View File

@ -25,6 +25,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes/fake"
"k8s.io/kubernetes/pkg/apis/apps"
kubepod "k8s.io/kubernetes/pkg/kubelet/pod"
"k8s.io/kubernetes/pkg/kubelet/prober/results"
"k8s.io/kubernetes/pkg/kubelet/status"
@ -474,3 +475,50 @@ func TestStartupProbeDisabledByStarted(t *testing.T) {
expectContinue(t, w, w.doProbe(), msg)
expectResult(t, w, results.Success, msg)
}
func TestGetPodLabelName(t *testing.T) {
testCases := []struct {
name string
pod *v1.Pod
result string
}{
{
name: "Static pod",
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "kube-controller-manager-k8s-master-21385161-0",
},
},
result: "kube-controller-manager-k8s-master-21385161-0",
},
{
name: "Deployment pod",
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "coredns-845757d86-ccqpf",
GenerateName: "coredns-845757d86-",
Labels: map[string]string{
apps.DefaultDeploymentUniqueLabelKey: "845757d86",
},
},
},
result: "coredns",
},
{
name: "ReplicaSet pod",
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "kube-proxy-2gmqn",
GenerateName: "kube-proxy-",
},
},
result: "kube-proxy",
},
}
for _, test := range testCases {
ret := getPodLabelName(test.pod)
if ret != test.result {
t.Errorf("Expected %s, got %s", test.result, ret)
}
}
}

View File

@ -402,6 +402,7 @@ func (s *Server) InstallDefaultHandlers() {
p := compbasemetrics.NewKubeRegistry()
_ = compbasemetrics.RegisterProcessStartTime(p.Register)
p.MustRegister(prober.ProberResults)
p.MustRegister(prober.ProberDuration)
s.restfulCont.Handle(proberMetricsPath,
compbasemetrics.HandlerFor(p, compbasemetrics.HandlerOpts{ErrorHandling: compbasemetrics.ContinueOnError}),
)