kubelet: add metric for the first pod with network latency

The first pod with network latency impact user workloads, however,
it is difficuly to understand where is the problem of this latency,
since it depends on the CNI plugin to be ready at the moment of the
pod creation.

Add a new internal metric in the kubelet that allow developers and cluster
administrator to understand the source of the latency problems on
node startups.

kubelet_first_network_pod_start_sli_duration_seconds

Change-Id: I4cdb55b0df72c96a3a65b78ce2aae404c5195006
This commit is contained in:
Antonio Ojea 2023-11-15 06:09:49 +00:00
parent 3716bd7681
commit b8533f7976
3 changed files with 87 additions and 0 deletions

View File

@ -30,6 +30,7 @@ import (
// This const block defines the metric names for the kubelet metrics. // This const block defines the metric names for the kubelet metrics.
const ( const (
FirstNetworkPodStartSLIDurationKey = "first_network_pod_start_sli_duration_seconds"
KubeletSubsystem = "kubelet" KubeletSubsystem = "kubelet"
NodeNameKey = "node_name" NodeNameKey = "node_name"
NodeLabelKey = "node" NodeLabelKey = "node"
@ -211,6 +212,20 @@ var (
[]string{}, []string{},
) )
// FirstNetworkPodStartSLIDuration is a gauge that tracks the duration (in seconds) it takes for the first network pod to run,
// excluding the time for image pulling. This is an internal and temporary metric required because of the existing limitations of the
// existing networking subsystem and CRI/CNI implementations that will be solved by https://github.com/containernetworking/cni/issues/859
// The metric represents the latency observed by an user to run workloads in a new node.
// ref: https://github.com/kubernetes/community/blob/master/sig-scalability/slos/pod_startup_latency.md
FirstNetworkPodStartSLIDuration = metrics.NewGauge(
&metrics.GaugeOpts{
Subsystem: KubeletSubsystem,
Name: FirstNetworkPodStartSLIDurationKey,
Help: "Duration in seconds to start the first network pod, excluding time to pull images and run init containers, measured from pod creation timestamp to when all its containers are reported as started and observed via watch",
StabilityLevel: metrics.INTERNAL,
},
)
// CgroupManagerDuration is a Histogram that tracks the duration (in seconds) it takes for cgroup manager operations to complete. // CgroupManagerDuration is a Histogram that tracks the duration (in seconds) it takes for cgroup manager operations to complete.
// Broken down by method. // Broken down by method.
CgroupManagerDuration = metrics.NewHistogramVec( CgroupManagerDuration = metrics.NewHistogramVec(
@ -830,6 +845,7 @@ var registerMetrics sync.Once
func Register(collectors ...metrics.StableCollector) { func Register(collectors ...metrics.StableCollector) {
// Register the metrics. // Register the metrics.
registerMetrics.Do(func() { registerMetrics.Do(func() {
legacyregistry.MustRegister(FirstNetworkPodStartSLIDuration)
legacyregistry.MustRegister(NodeName) legacyregistry.MustRegister(NodeName)
legacyregistry.MustRegister(PodWorkerDuration) legacyregistry.MustRegister(PodWorkerDuration)
legacyregistry.MustRegister(PodStartDuration) legacyregistry.MustRegister(PodStartDuration)

View File

@ -41,6 +41,8 @@ type basicPodStartupLatencyTracker struct {
// protect against concurrent read and write on pods map // protect against concurrent read and write on pods map
lock sync.Mutex lock sync.Mutex
pods map[types.UID]*perPodState pods map[types.UID]*perPodState
// metrics for the first network pod only
firstNetworkPodSeen bool
// For testability // For testability
clock clock.Clock clock clock.Clock
} }
@ -112,6 +114,12 @@ func (p *basicPodStartupLatencyTracker) ObservedPodOnWatch(pod *v1.Pod, when tim
metrics.PodStartSLIDuration.WithLabelValues().Observe(podStartSLOduration) metrics.PodStartSLIDuration.WithLabelValues().Observe(podStartSLOduration)
metrics.PodStartTotalDuration.WithLabelValues().Observe(podStartingDuration.Seconds()) metrics.PodStartTotalDuration.WithLabelValues().Observe(podStartingDuration.Seconds())
state.metricRecorded = true state.metricRecorded = true
// if is the first Pod with network track the start values
// these metrics will help to identify problems with the CNI plugin
if !pod.Spec.HostNetwork && !p.firstNetworkPodSeen {
metrics.FirstNetworkPodStartSLIDuration.Set(podStartSLOduration)
p.firstNetworkPodSeen = true
}
} }
} }

View File

@ -275,6 +275,69 @@ kubelet_pod_start_sli_duration_seconds_count 1
}) })
} }
func TestFirstNetworkPodMetrics(t *testing.T) {
t.Run("first network pod; started in 30s, image pulling between 10th and 20th seconds", func(t *testing.T) {
wants := `
# HELP kubelet_first_network_pod_start_sli_duration_seconds [INTERNAL] Duration in seconds to start the first network pod, excluding time to pull images and run init containers, measured from pod creation timestamp to when all its containers are reported as started and observed via watch
# TYPE kubelet_first_network_pod_start_sli_duration_seconds gauge
kubelet_first_network_pod_start_sli_duration_seconds 30
`
fakeClock := testingclock.NewFakeClock(frozenTime)
metrics.Register()
tracker := &basicPodStartupLatencyTracker{
pods: map[types.UID]*perPodState{},
clock: fakeClock,
}
// hostNetwork pods should not be tracked
hostNetworkPodInitializing := buildInitializingPod()
hostNetworkPodInitializing.UID = "11111-22222"
hostNetworkPodInitializing.Spec.HostNetwork = true
tracker.ObservedPodOnWatch(hostNetworkPodInitializing, frozenTime)
hostNetworkPodStarted := buildRunningPod()
hostNetworkPodStarted.UID = "11111-22222"
hostNetworkPodStarted.Spec.HostNetwork = true
tracker.RecordStatusUpdated(hostNetworkPodStarted)
// track only the first pod with network
podInitializing := buildInitializingPod()
tracker.ObservedPodOnWatch(podInitializing, frozenTime)
// pod started
podStarted := buildRunningPod()
tracker.RecordStatusUpdated(podStarted)
// at 30s observe the same pod on watch
tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*30))
if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), "kubelet_first_network_pod_start_sli_duration_seconds", "kubelet_first_network_pod_start_total_duration_seconds"); err != nil {
t.Fatal(err)
}
// any new pod observations should not impact the metrics, as the pod should be recorder only once
tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*150))
tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*200))
tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*250))
if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), "kubelet_first_network_pod_start_sli_duration_seconds", "kubelet_first_network_pod_start_total_duration_seconds"); err != nil {
t.Fatal(err)
}
// cleanup
tracker.DeletePodStartupState(hostNetworkPodStarted.UID)
tracker.DeletePodStartupState(podStarted.UID)
assert.Empty(t, tracker.pods)
metrics.PodStartSLIDuration.Reset()
})
}
func buildInitializingPod() *corev1.Pod { func buildInitializingPod() *corev1.Pod {
return buildPodWithStatus([]corev1.ContainerStatus{ return buildPodWithStatus([]corev1.ContainerStatus{
{State: corev1.ContainerState{Waiting: &corev1.ContainerStateWaiting{Reason: "PodInitializing"}}}, {State: corev1.ContainerState{Waiting: &corev1.ContainerStateWaiting{Reason: "PodInitializing"}}},