mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-19 18:02:01 +00:00
Merge pull request #104484 from jackfrancis/prober-duration-metrics
add container probe duration metrics
This commit is contained in:
commit
126c07604d
@ -48,6 +48,20 @@ var ProberResults = metrics.NewCounterVec(
|
||||
"pod_uid"},
|
||||
)
|
||||
|
||||
// ProberDuration stores the duration of a successful probe lifecycle by result as prometheus metrics.
|
||||
var ProberDuration = metrics.NewHistogramVec(
|
||||
&metrics.HistogramOpts{
|
||||
Subsystem: "prober",
|
||||
Name: "probe_duration_seconds",
|
||||
Help: "Duration in seconds for a probe response.",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{"probe_type",
|
||||
"container",
|
||||
"pod",
|
||||
"namespace"},
|
||||
)
|
||||
|
||||
// Manager manages pod probing. It creates a probe "worker" for every container that specifies a
|
||||
// probe (AddPod). The worker periodically probes its assigned container and caches the results. The
|
||||
// manager use the cached probe results to set the appropriate Ready state in the PodStatus when
|
||||
|
@ -17,7 +17,9 @@ limitations under the License.
|
||||
package prober
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
@ -25,6 +27,7 @@ import (
|
||||
"k8s.io/component-base/metrics"
|
||||
"k8s.io/klog/v2"
|
||||
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
|
||||
"k8s.io/kubernetes/pkg/apis/apps"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/prober/results"
|
||||
)
|
||||
@ -74,6 +77,10 @@ type worker struct {
|
||||
proberResultsSuccessfulMetricLabels metrics.Labels
|
||||
proberResultsFailedMetricLabels metrics.Labels
|
||||
proberResultsUnknownMetricLabels metrics.Labels
|
||||
// proberDurationMetricLabels holds the labels attached to this worker
|
||||
// for the ProberDuration metric by result.
|
||||
proberDurationSuccessfulMetricLabels metrics.Labels
|
||||
proberDurationUnknownMetricLabels metrics.Labels
|
||||
}
|
||||
|
||||
// Creates and starts a new probe worker.
|
||||
@ -107,14 +114,23 @@ func newWorker(
|
||||
w.initialValue = results.Unknown
|
||||
}
|
||||
|
||||
podName := getPodLabelName(w.pod)
|
||||
|
||||
basicMetricLabels := metrics.Labels{
|
||||
"probe_type": w.probeType.String(),
|
||||
"container": w.container.Name,
|
||||
"pod": w.pod.Name,
|
||||
"pod": podName,
|
||||
"namespace": w.pod.Namespace,
|
||||
"pod_uid": string(w.pod.UID),
|
||||
}
|
||||
|
||||
proberDurationLabels := metrics.Labels{
|
||||
"probe_type": w.probeType.String(),
|
||||
"container": w.container.Name,
|
||||
"pod": podName,
|
||||
"namespace": w.pod.Namespace,
|
||||
}
|
||||
|
||||
w.proberResultsSuccessfulMetricLabels = deepCopyPrometheusLabels(basicMetricLabels)
|
||||
w.proberResultsSuccessfulMetricLabels["result"] = probeResultSuccessful
|
||||
|
||||
@ -124,6 +140,9 @@ func newWorker(
|
||||
w.proberResultsUnknownMetricLabels = deepCopyPrometheusLabels(basicMetricLabels)
|
||||
w.proberResultsUnknownMetricLabels["result"] = probeResultUnknown
|
||||
|
||||
w.proberDurationSuccessfulMetricLabels = deepCopyPrometheusLabels(proberDurationLabels)
|
||||
w.proberDurationUnknownMetricLabels = deepCopyPrometheusLabels(proberDurationLabels)
|
||||
|
||||
return w
|
||||
}
|
||||
|
||||
@ -151,6 +170,8 @@ func (w *worker) run() {
|
||||
ProberResults.Delete(w.proberResultsSuccessfulMetricLabels)
|
||||
ProberResults.Delete(w.proberResultsFailedMetricLabels)
|
||||
ProberResults.Delete(w.proberResultsUnknownMetricLabels)
|
||||
ProberDuration.Delete(w.proberDurationSuccessfulMetricLabels)
|
||||
ProberDuration.Delete(w.proberDurationUnknownMetricLabels)
|
||||
}()
|
||||
|
||||
probeLoop:
|
||||
@ -181,6 +202,7 @@ func (w *worker) doProbe() (keepGoing bool) {
|
||||
defer func() { recover() }() // Actually eat panics (HandleCrash takes care of logging)
|
||||
defer runtime.HandleCrash(func(_ interface{}) { keepGoing = true })
|
||||
|
||||
startTime := time.Now()
|
||||
status, ok := w.probeManager.statusManager.GetPodStatus(w.pod.UID)
|
||||
if !ok {
|
||||
// Either the pod has not been created yet, or it was already deleted.
|
||||
@ -271,10 +293,12 @@ func (w *worker) doProbe() (keepGoing bool) {
|
||||
switch result {
|
||||
case results.Success:
|
||||
ProberResults.With(w.proberResultsSuccessfulMetricLabels).Inc()
|
||||
ProberDuration.With(w.proberDurationSuccessfulMetricLabels).Observe(time.Since(startTime).Seconds())
|
||||
case results.Failure:
|
||||
ProberResults.With(w.proberResultsFailedMetricLabels).Inc()
|
||||
default:
|
||||
ProberResults.With(w.proberResultsUnknownMetricLabels).Inc()
|
||||
ProberDuration.With(w.proberDurationUnknownMetricLabels).Observe(time.Since(startTime).Seconds())
|
||||
}
|
||||
|
||||
if w.lastResult == result {
|
||||
@ -311,3 +335,15 @@ func deepCopyPrometheusLabels(m metrics.Labels) metrics.Labels {
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func getPodLabelName(pod *v1.Pod) string {
|
||||
podName := pod.Name
|
||||
if pod.GenerateName != "" {
|
||||
podNameSlice := strings.Split(pod.Name, "-")
|
||||
podName = strings.Join(podNameSlice[:len(podNameSlice)-1], "-")
|
||||
if label, ok := pod.GetLabels()[apps.DefaultDeploymentUniqueLabelKey]; ok {
|
||||
podName = strings.ReplaceAll(podName, fmt.Sprintf("-%s", label), "")
|
||||
}
|
||||
}
|
||||
return podName
|
||||
}
|
||||
|
@ -25,6 +25,7 @@ import (
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
"k8s.io/kubernetes/pkg/apis/apps"
|
||||
kubepod "k8s.io/kubernetes/pkg/kubelet/pod"
|
||||
"k8s.io/kubernetes/pkg/kubelet/prober/results"
|
||||
"k8s.io/kubernetes/pkg/kubelet/status"
|
||||
@ -474,3 +475,50 @@ func TestStartupProbeDisabledByStarted(t *testing.T) {
|
||||
expectContinue(t, w, w.doProbe(), msg)
|
||||
expectResult(t, w, results.Success, msg)
|
||||
}
|
||||
|
||||
func TestGetPodLabelName(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
pod *v1.Pod
|
||||
result string
|
||||
}{
|
||||
{
|
||||
name: "Static pod",
|
||||
pod: &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "kube-controller-manager-k8s-master-21385161-0",
|
||||
},
|
||||
},
|
||||
result: "kube-controller-manager-k8s-master-21385161-0",
|
||||
},
|
||||
{
|
||||
name: "Deployment pod",
|
||||
pod: &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "coredns-845757d86-ccqpf",
|
||||
GenerateName: "coredns-845757d86-",
|
||||
Labels: map[string]string{
|
||||
apps.DefaultDeploymentUniqueLabelKey: "845757d86",
|
||||
},
|
||||
},
|
||||
},
|
||||
result: "coredns",
|
||||
},
|
||||
{
|
||||
name: "ReplicaSet pod",
|
||||
pod: &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "kube-proxy-2gmqn",
|
||||
GenerateName: "kube-proxy-",
|
||||
},
|
||||
},
|
||||
result: "kube-proxy",
|
||||
},
|
||||
}
|
||||
for _, test := range testCases {
|
||||
ret := getPodLabelName(test.pod)
|
||||
if ret != test.result {
|
||||
t.Errorf("Expected %s, got %s", test.result, ret)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -402,6 +402,7 @@ func (s *Server) InstallDefaultHandlers() {
|
||||
p := compbasemetrics.NewKubeRegistry()
|
||||
_ = compbasemetrics.RegisterProcessStartTime(p.Register)
|
||||
p.MustRegister(prober.ProberResults)
|
||||
p.MustRegister(prober.ProberDuration)
|
||||
s.restfulCont.Handle(proberMetricsPath,
|
||||
compbasemetrics.HandlerFor(p, compbasemetrics.HandlerOpts{ErrorHandling: compbasemetrics.ContinueOnError}),
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user