Merge pull request #107896 from smarterclayton/track_pod_sync_latency

kubelet: Record a metric for latency of pod status update
2025-07-23 11:50:44 +00:00 · 2022-09-27 14:25:50 -07:00 · 2022-09-27 14:25:50 -07:00 · 00532e305a
commit 00532e305a
parent 8cbe9e91c8 e9a5fb7372
2 changed files with 40 additions and 3 deletions
--- a/pkg/kubelet/metrics/metrics.go
+++ b/pkg/kubelet/metrics/metrics.go
@ -37,6 +37,7 @@ const (
 	PodStartDurationKey                = "pod_start_duration_seconds"
 	CgroupManagerOperationsKey         = "cgroup_manager_duration_seconds"
 	PodWorkerStartDurationKey          = "pod_worker_start_duration_seconds"
+	PodStatusSyncDurationKey           = "pod_status_sync_duration_seconds"
 	PLEGRelistDurationKey              = "pleg_relist_duration_seconds"
 	PLEGDiscardEventsKey               = "pleg_discard_events"
 	PLEGRelistIntervalKey              = "pleg_relist_interval_seconds"
@ -53,6 +54,7 @@ const (
 	VolumeStatsHealthStatusAbnormalKey = "volume_stats_health_status_abnormal"
 	RunningPodsKey                     = "running_pods"
 	RunningContainersKey               = "running_containers"
+
 	// Metrics keys of remote runtime operations
 	RuntimeOperationsKey         = "runtime_operations_total"
 	RuntimeOperationsDurationKey = "runtime_operations_duration_seconds"
@ -156,6 +158,18 @@ var (
 			StabilityLevel: metrics.ALPHA,
 		},
 	)
+	// PodStatusSyncDuration is a Histogram that tracks the duration (in seconds) in takes from the time a pod
+	// status is generated to the time it is synced with the apiserver. If multiple status changes are generated
+	// on a pod before it is written to the API, the latency is from the first update to the last event.
+	PodStatusSyncDuration = metrics.NewHistogram(
+		&metrics.HistogramOpts{
+			Subsystem:      KubeletSubsystem,
+			Name:           PodStatusSyncDurationKey,
+			Help:           "Duration in seconds to sync a pod status update. Measures time from detection of a change to pod status until the API is successfully updated for that pod, even if multiple intevening changes to pod status occur.",
+			Buckets:        []float64{0.010, 0.050, 0.100, 0.500, 1, 5, 10, 20, 30, 45, 60},
+			StabilityLevel: metrics.ALPHA,
+		},
+	)
 	// PLEGRelistDuration is a Histogram that tracks the duration (in seconds) it takes for relisting pods in the Kubelet's
 	// Pod Lifecycle Event Generator (PLEG).
 	PLEGRelistDuration = metrics.NewHistogram(
@ -496,6 +510,7 @@ func Register(collectors ...metrics.StableCollector) {
 		legacyregistry.MustRegister(PodStartDuration)
 		legacyregistry.MustRegister(CgroupManagerDuration)
 		legacyregistry.MustRegister(PodWorkerStartDuration)
+		legacyregistry.MustRegister(PodStatusSyncDuration)
 		legacyregistry.MustRegister(ContainersPerPodCount)
 		legacyregistry.MustRegister(PLEGRelistDuration)
 		legacyregistry.MustRegister(PLEGDiscardEvents)
--- a/pkg/kubelet/status/status_manager.go
+++ b/pkg/kubelet/status/status_manager.go
@ -37,6 +37,7 @@ import (
 	"k8s.io/klog/v2"
 	podutil "k8s.io/kubernetes/pkg/api/v1/pod"
 	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
+	"k8s.io/kubernetes/pkg/kubelet/metrics"
 	kubepod "k8s.io/kubernetes/pkg/kubelet/pod"
 	kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
 	statusutil "k8s.io/kubernetes/pkg/util/pod"
@ -45,12 +46,15 @@ import (
 // A wrapper around v1.PodStatus that includes a version to enforce that stale pod statuses are
 // not sent to the API server.
 type versionedPodStatus struct {
-	status v1.PodStatus
-	// Monotonically increasing version number (per pod).
+	// version is a monotonically increasing version number (per pod).
 	version uint64
 	// Pod name & namespace, for sending updates to API server.
 	podName      string
 	podNamespace string
+	// at is the time at which the most recent status update was detected
+	at time.Time
+
+	status v1.PodStatus
 }

 type podStatusSyncRequest struct {
@ -545,6 +549,16 @@ func (m *manager) updateStatusInternal(pod *v1.Pod, status v1.PodStatus, forceUp
 		podName:      pod.Name,
 		podNamespace: pod.Namespace,
 	}
+
+	// Multiple status updates can be generated before we update the API server,
+	// so we track the time from the first status update until we retire it to
+	// the API.
+	if cachedStatus.at.IsZero() {
+		newStatus.at = time.Now()
+	} else {
+		newStatus.at = cachedStatus.at
+	}
+
 	m.podStatuses[pod.UID] = newStatus

 	select {
@ -685,7 +699,7 @@ func (m *manager) syncPod(uid types.UID, status versionedPodStatus) {
 	mergedStatus := mergePodStatus(pod.Status, status.status, m.podDeletionSafety.PodCouldHaveRunningContainers(pod))

 	newPod, patchBytes, unchanged, err := statusutil.PatchPodStatus(context.TODO(), m.kubeClient, pod.Namespace, pod.Name, pod.UID, pod.Status, mergedStatus)
-	klog.V(3).InfoS("Patch status for pod", "pod", klog.KObj(pod), "patch", string(patchBytes))
+	klog.V(3).InfoS("Patch status for pod", "pod", klog.KObj(pod), "podUID", uid, "patch", string(patchBytes))

 	if err != nil {
 		klog.InfoS("Failed to update status for pod", "pod", klog.KObj(pod), "err", err)
@ -698,6 +712,14 @@ func (m *manager) syncPod(uid types.UID, status versionedPodStatus) {
 		pod = newPod
 	}

+	// measure how long the status update took to propagate from generation to update on the server
+	if status.at.IsZero() {
+		klog.V(3).InfoS("Pod had no status time set", "pod", klog.KObj(pod), "podUID", uid, "version", status.version)
+	} else {
+		duration := time.Now().Sub(status.at).Truncate(time.Millisecond)
+		metrics.PodStatusSyncDuration.Observe(duration.Seconds())
+	}
+
 	m.apiStatusVersions[kubetypes.MirrorPodUID(pod.UID)] = status.version

 	// We don't handle graceful deletion of mirror pods.