mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-27 21:47:07 +00:00
Merge pull request #72470 from danielqsj/kl
Change kubelet metrics to conform metrics guidelines
This commit is contained in:
commit
0b01b9ce51
@ -284,7 +284,8 @@ func (m *cgroupManagerImpl) Exists(name CgroupName) bool {
|
|||||||
func (m *cgroupManagerImpl) Destroy(cgroupConfig *CgroupConfig) error {
|
func (m *cgroupManagerImpl) Destroy(cgroupConfig *CgroupConfig) error {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
defer func() {
|
defer func() {
|
||||||
metrics.CgroupManagerLatency.WithLabelValues("destroy").Observe(metrics.SinceInMicroseconds(start))
|
metrics.CgroupManagerDuration.WithLabelValues("destroy").Observe(metrics.SinceInSeconds(start))
|
||||||
|
metrics.DeprecatedCgroupManagerLatency.WithLabelValues("destroy").Observe(metrics.SinceInMicroseconds(start))
|
||||||
}()
|
}()
|
||||||
|
|
||||||
cgroupPaths := m.buildCgroupPaths(cgroupConfig.Name)
|
cgroupPaths := m.buildCgroupPaths(cgroupConfig.Name)
|
||||||
@ -411,7 +412,8 @@ func (m *cgroupManagerImpl) toResources(resourceConfig *ResourceConfig) *libcont
|
|||||||
func (m *cgroupManagerImpl) Update(cgroupConfig *CgroupConfig) error {
|
func (m *cgroupManagerImpl) Update(cgroupConfig *CgroupConfig) error {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
defer func() {
|
defer func() {
|
||||||
metrics.CgroupManagerLatency.WithLabelValues("update").Observe(metrics.SinceInMicroseconds(start))
|
metrics.CgroupManagerDuration.WithLabelValues("update").Observe(metrics.SinceInSeconds(start))
|
||||||
|
metrics.DeprecatedCgroupManagerLatency.WithLabelValues("update").Observe(metrics.SinceInMicroseconds(start))
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// Extract the cgroup resource parameters
|
// Extract the cgroup resource parameters
|
||||||
@ -446,7 +448,8 @@ func (m *cgroupManagerImpl) Update(cgroupConfig *CgroupConfig) error {
|
|||||||
func (m *cgroupManagerImpl) Create(cgroupConfig *CgroupConfig) error {
|
func (m *cgroupManagerImpl) Create(cgroupConfig *CgroupConfig) error {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
defer func() {
|
defer func() {
|
||||||
metrics.CgroupManagerLatency.WithLabelValues("create").Observe(metrics.SinceInMicroseconds(start))
|
metrics.CgroupManagerDuration.WithLabelValues("create").Observe(metrics.SinceInSeconds(start))
|
||||||
|
metrics.DeprecatedCgroupManagerLatency.WithLabelValues("create").Observe(metrics.SinceInMicroseconds(start))
|
||||||
}()
|
}()
|
||||||
|
|
||||||
resources := m.toResources(cgroupConfig.ResourceParameters)
|
resources := m.toResources(cgroupConfig.ResourceParameters)
|
||||||
|
@ -354,6 +354,7 @@ func (m *ManagerImpl) Allocate(node *schedulernodeinfo.NodeInfo, attrs *lifecycl
|
|||||||
func (m *ManagerImpl) Register(ctx context.Context, r *pluginapi.RegisterRequest) (*pluginapi.Empty, error) {
|
func (m *ManagerImpl) Register(ctx context.Context, r *pluginapi.RegisterRequest) (*pluginapi.Empty, error) {
|
||||||
klog.Infof("Got registration request from device plugin with resource name %q", r.ResourceName)
|
klog.Infof("Got registration request from device plugin with resource name %q", r.ResourceName)
|
||||||
metrics.DevicePluginRegistrationCount.WithLabelValues(r.ResourceName).Inc()
|
metrics.DevicePluginRegistrationCount.WithLabelValues(r.ResourceName).Inc()
|
||||||
|
metrics.DeprecatedDevicePluginRegistrationCount.WithLabelValues(r.ResourceName).Inc()
|
||||||
var versionCompatible bool
|
var versionCompatible bool
|
||||||
for _, v := range pluginapi.SupportedVersions {
|
for _, v := range pluginapi.SupportedVersions {
|
||||||
if r.Version == v {
|
if r.Version == v {
|
||||||
@ -696,7 +697,8 @@ func (m *ManagerImpl) allocateContainerResources(pod *v1.Pod, container *v1.Cont
|
|||||||
// in a passed in AllocateRequest pointer, and issues a single Allocate call per pod.
|
// in a passed in AllocateRequest pointer, and issues a single Allocate call per pod.
|
||||||
klog.V(3).Infof("Making allocation request for devices %v for device plugin %s", devs, resource)
|
klog.V(3).Infof("Making allocation request for devices %v for device plugin %s", devs, resource)
|
||||||
resp, err := eI.e.allocate(devs)
|
resp, err := eI.e.allocate(devs)
|
||||||
metrics.DevicePluginAllocationLatency.WithLabelValues(resource).Observe(metrics.SinceInMicroseconds(startRPCTime))
|
metrics.DevicePluginAllocationDuration.WithLabelValues(resource).Observe(metrics.SinceInSeconds(startRPCTime))
|
||||||
|
metrics.DeprecatedDevicePluginAllocationLatency.WithLabelValues(resource).Observe(metrics.SinceInMicroseconds(startRPCTime))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// In case of allocation failure, we want to restore m.allocatedDevices
|
// In case of allocation failure, we want to restore m.allocatedDevices
|
||||||
// to the actual allocated state from m.podDevices.
|
// to the actual allocated state from m.podDevices.
|
||||||
|
@ -361,7 +361,8 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
|
|||||||
for _, t := range thresholds {
|
for _, t := range thresholds {
|
||||||
timeObserved := observations[t.Signal].time
|
timeObserved := observations[t.Signal].time
|
||||||
if !timeObserved.IsZero() {
|
if !timeObserved.IsZero() {
|
||||||
metrics.EvictionStatsAge.WithLabelValues(string(t.Signal)).Observe(metrics.SinceInMicroseconds(timeObserved.Time))
|
metrics.EvictionStatsAge.WithLabelValues(string(t.Signal)).Observe(metrics.SinceInSeconds(timeObserved.Time))
|
||||||
|
metrics.DeprecatedEvictionStatsAge.WithLabelValues(string(t.Signal)).Observe(metrics.SinceInMicroseconds(timeObserved.Time))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1500,7 +1500,8 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error {
|
|||||||
if !firstSeenTime.IsZero() {
|
if !firstSeenTime.IsZero() {
|
||||||
// This is the first time we are syncing the pod. Record the latency
|
// This is the first time we are syncing the pod. Record the latency
|
||||||
// since kubelet first saw the pod if firstSeenTime is set.
|
// since kubelet first saw the pod if firstSeenTime is set.
|
||||||
metrics.PodWorkerStartLatency.Observe(metrics.SinceInMicroseconds(firstSeenTime))
|
metrics.PodWorkerStartDuration.Observe(metrics.SinceInSeconds(firstSeenTime))
|
||||||
|
metrics.DeprecatedPodWorkerStartLatency.Observe(metrics.SinceInMicroseconds(firstSeenTime))
|
||||||
} else {
|
} else {
|
||||||
klog.V(3).Infof("First seen time not recorded for pod %q", pod.UID)
|
klog.V(3).Infof("First seen time not recorded for pod %q", pod.UID)
|
||||||
}
|
}
|
||||||
@ -1517,7 +1518,8 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error {
|
|||||||
existingStatus, ok := kl.statusManager.GetPodStatus(pod.UID)
|
existingStatus, ok := kl.statusManager.GetPodStatus(pod.UID)
|
||||||
if !ok || existingStatus.Phase == v1.PodPending && apiPodStatus.Phase == v1.PodRunning &&
|
if !ok || existingStatus.Phase == v1.PodPending && apiPodStatus.Phase == v1.PodRunning &&
|
||||||
!firstSeenTime.IsZero() {
|
!firstSeenTime.IsZero() {
|
||||||
metrics.PodStartLatency.Observe(metrics.SinceInMicroseconds(firstSeenTime))
|
metrics.PodStartDuration.Observe(metrics.SinceInSeconds(firstSeenTime))
|
||||||
|
metrics.DeprecatedPodStartLatency.Observe(metrics.SinceInMicroseconds(firstSeenTime))
|
||||||
}
|
}
|
||||||
|
|
||||||
runnable := kl.canRunPod(pod)
|
runnable := kl.canRunPod(pod)
|
||||||
@ -1996,7 +1998,8 @@ func (kl *Kubelet) dispatchWork(pod *v1.Pod, syncType kubetypes.SyncPodType, mir
|
|||||||
UpdateType: syncType,
|
UpdateType: syncType,
|
||||||
OnCompleteFunc: func(err error) {
|
OnCompleteFunc: func(err error) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
metrics.PodWorkerLatency.WithLabelValues(syncType.String()).Observe(metrics.SinceInMicroseconds(start))
|
metrics.PodWorkerDuration.WithLabelValues(syncType.String()).Observe(metrics.SinceInSeconds(start))
|
||||||
|
metrics.DeprecatedPodWorkerLatency.WithLabelValues(syncType.String()).Observe(metrics.SinceInMicroseconds(start))
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
@ -49,13 +49,16 @@ func newInstrumentedImageManagerService(service internalapi.ImageManagerService)
|
|||||||
// recordOperation records the duration of the operation.
|
// recordOperation records the duration of the operation.
|
||||||
func recordOperation(operation string, start time.Time) {
|
func recordOperation(operation string, start time.Time) {
|
||||||
metrics.RuntimeOperations.WithLabelValues(operation).Inc()
|
metrics.RuntimeOperations.WithLabelValues(operation).Inc()
|
||||||
metrics.RuntimeOperationsLatency.WithLabelValues(operation).Observe(metrics.SinceInMicroseconds(start))
|
metrics.DeprecatedRuntimeOperations.WithLabelValues(operation).Inc()
|
||||||
|
metrics.RuntimeOperationsDuration.WithLabelValues(operation).Observe(metrics.SinceInSeconds(start))
|
||||||
|
metrics.DeprecatedRuntimeOperationsLatency.WithLabelValues(operation).Observe(metrics.SinceInMicroseconds(start))
|
||||||
}
|
}
|
||||||
|
|
||||||
// recordError records error for metric if an error occurred.
|
// recordError records error for metric if an error occurred.
|
||||||
func recordError(operation string, err error) {
|
func recordError(operation string, err error) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
metrics.RuntimeOperationsErrors.WithLabelValues(operation).Inc()
|
metrics.RuntimeOperationsErrors.WithLabelValues(operation).Inc()
|
||||||
|
metrics.DeprecatedRuntimeOperationsErrors.WithLabelValues(operation).Inc()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -30,7 +30,7 @@ import (
|
|||||||
|
|
||||||
func TestRecordOperation(t *testing.T) {
|
func TestRecordOperation(t *testing.T) {
|
||||||
prometheus.MustRegister(metrics.RuntimeOperations)
|
prometheus.MustRegister(metrics.RuntimeOperations)
|
||||||
prometheus.MustRegister(metrics.RuntimeOperationsLatency)
|
prometheus.MustRegister(metrics.RuntimeOperationsDuration)
|
||||||
prometheus.MustRegister(metrics.RuntimeOperationsErrors)
|
prometheus.MustRegister(metrics.RuntimeOperationsErrors)
|
||||||
|
|
||||||
temporalServer := "127.0.0.1:1234"
|
temporalServer := "127.0.0.1:1234"
|
||||||
@ -50,8 +50,8 @@ func TestRecordOperation(t *testing.T) {
|
|||||||
}()
|
}()
|
||||||
|
|
||||||
recordOperation("create_container", time.Now())
|
recordOperation("create_container", time.Now())
|
||||||
runtimeOperationsCounterExpected := "kubelet_runtime_operations{operation_type=\"create_container\"} 1"
|
runtimeOperationsCounterExpected := "kubelet_runtime_operations_total{operation_type=\"create_container\"} 1"
|
||||||
runtimeOperationsLatencyExpected := "kubelet_runtime_operations_latency_microseconds_count{operation_type=\"create_container\"} 1"
|
runtimeOperationsDurationExpected := "kubelet_runtime_operations_duration_seconds_count{operation_type=\"create_container\"} 1"
|
||||||
|
|
||||||
assert.HTTPBodyContains(t, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
assert.HTTPBodyContains(t, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
mux.ServeHTTP(w, r)
|
mux.ServeHTTP(w, r)
|
||||||
@ -59,7 +59,7 @@ func TestRecordOperation(t *testing.T) {
|
|||||||
|
|
||||||
assert.HTTPBodyContains(t, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
assert.HTTPBodyContains(t, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
mux.ServeHTTP(w, r)
|
mux.ServeHTTP(w, r)
|
||||||
}), "GET", prometheusURL, nil, runtimeOperationsLatencyExpected)
|
}), "GET", prometheusURL, nil, runtimeOperationsDurationExpected)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestInstrumentedVersion(t *testing.T) {
|
func TestInstrumentedVersion(t *testing.T) {
|
||||||
|
@ -34,14 +34,21 @@ const (
|
|||||||
KubeletSubsystem = "kubelet"
|
KubeletSubsystem = "kubelet"
|
||||||
NodeNameKey = "node_name"
|
NodeNameKey = "node_name"
|
||||||
NodeLabelKey = "node"
|
NodeLabelKey = "node"
|
||||||
PodWorkerLatencyKey = "pod_worker_latency_microseconds"
|
PodWorkerDurationKey = "pod_worker_duration_seconds"
|
||||||
PodStartLatencyKey = "pod_start_latency_microseconds"
|
PodStartDurationKey = "pod_start_duration_seconds"
|
||||||
CgroupManagerOperationsKey = "cgroup_manager_latency_microseconds"
|
CgroupManagerOperationsKey = "cgroup_manager_duration_seconds"
|
||||||
PodWorkerStartLatencyKey = "pod_worker_start_latency_microseconds"
|
PodWorkerStartDurationKey = "pod_worker_start_duration_seconds"
|
||||||
PLEGRelistLatencyKey = "pleg_relist_latency_microseconds"
|
PLEGRelistDurationKey = "pleg_relist_duration_seconds"
|
||||||
PLEGDiscardEventsKey = "pleg_discard_events"
|
PLEGDiscardEventsKey = "pleg_discard_events"
|
||||||
PLEGRelistIntervalKey = "pleg_relist_interval_microseconds"
|
PLEGRelistIntervalKey = "pleg_relist_interval_seconds"
|
||||||
EvictionStatsAgeKey = "eviction_stats_age_microseconds"
|
EvictionStatsAgeKey = "eviction_stats_age_seconds"
|
||||||
|
DeprecatedPodWorkerLatencyKey = "pod_worker_latency_microseconds"
|
||||||
|
DeprecatedPodStartLatencyKey = "pod_start_latency_microseconds"
|
||||||
|
DeprecatedCgroupManagerOperationsKey = "cgroup_manager_latency_microseconds"
|
||||||
|
DeprecatedPodWorkerStartLatencyKey = "pod_worker_start_latency_microseconds"
|
||||||
|
DeprecatedPLEGRelistLatencyKey = "pleg_relist_latency_microseconds"
|
||||||
|
DeprecatedPLEGRelistIntervalKey = "pleg_relist_interval_microseconds"
|
||||||
|
DeprecatedEvictionStatsAgeKey = "eviction_stats_age_microseconds"
|
||||||
VolumeStatsCapacityBytesKey = "volume_stats_capacity_bytes"
|
VolumeStatsCapacityBytesKey = "volume_stats_capacity_bytes"
|
||||||
VolumeStatsAvailableBytesKey = "volume_stats_available_bytes"
|
VolumeStatsAvailableBytesKey = "volume_stats_available_bytes"
|
||||||
VolumeStatsUsedBytesKey = "volume_stats_used_bytes"
|
VolumeStatsUsedBytesKey = "volume_stats_used_bytes"
|
||||||
@ -49,12 +56,17 @@ const (
|
|||||||
VolumeStatsInodesFreeKey = "volume_stats_inodes_free"
|
VolumeStatsInodesFreeKey = "volume_stats_inodes_free"
|
||||||
VolumeStatsInodesUsedKey = "volume_stats_inodes_used"
|
VolumeStatsInodesUsedKey = "volume_stats_inodes_used"
|
||||||
// Metrics keys of remote runtime operations
|
// Metrics keys of remote runtime operations
|
||||||
RuntimeOperationsKey = "runtime_operations"
|
RuntimeOperationsKey = "runtime_operations_total"
|
||||||
RuntimeOperationsLatencyKey = "runtime_operations_latency_microseconds"
|
RuntimeOperationsDurationKey = "runtime_operations_duration_seconds"
|
||||||
RuntimeOperationsErrorsKey = "runtime_operations_errors"
|
RuntimeOperationsErrorsKey = "runtime_operations_errors_total"
|
||||||
|
DeprecatedRuntimeOperationsKey = "runtime_operations"
|
||||||
|
DeprecatedRuntimeOperationsLatencyKey = "runtime_operations_latency_microseconds"
|
||||||
|
DeprecatedRuntimeOperationsErrorsKey = "runtime_operations_errors"
|
||||||
// Metrics keys of device plugin operations
|
// Metrics keys of device plugin operations
|
||||||
DevicePluginRegistrationCountKey = "device_plugin_registration_count"
|
DevicePluginRegistrationCountKey = "device_plugin_registration_total"
|
||||||
DevicePluginAllocationLatencyKey = "device_plugin_alloc_latency_microseconds"
|
DevicePluginAllocationDurationKey = "device_plugin_alloc_duration_seconds"
|
||||||
|
DeprecatedDevicePluginRegistrationCountKey = "device_plugin_registration_count"
|
||||||
|
DeprecatedDevicePluginAllocationLatencyKey = "device_plugin_alloc_latency_microseconds"
|
||||||
|
|
||||||
// Metric keys for node config
|
// Metric keys for node config
|
||||||
AssignedConfigKey = "node_config_assigned"
|
AssignedConfigKey = "node_config_assigned"
|
||||||
@ -81,48 +93,54 @@ var (
|
|||||||
},
|
},
|
||||||
[]string{NodeLabelKey},
|
[]string{NodeLabelKey},
|
||||||
)
|
)
|
||||||
ContainersPerPodCount = prometheus.NewSummary(
|
ContainersPerPodCount = prometheus.NewHistogram(
|
||||||
prometheus.SummaryOpts{
|
prometheus.HistogramOpts{
|
||||||
Subsystem: KubeletSubsystem,
|
Subsystem: KubeletSubsystem,
|
||||||
Name: "containers_per_pod_count",
|
Name: "containers_per_pod_count",
|
||||||
Help: "The number of containers per pod.",
|
Help: "The number of containers per pod.",
|
||||||
|
Buckets: prometheus.DefBuckets,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
PodWorkerLatency = prometheus.NewSummaryVec(
|
PodWorkerDuration = prometheus.NewHistogramVec(
|
||||||
prometheus.SummaryOpts{
|
prometheus.HistogramOpts{
|
||||||
Subsystem: KubeletSubsystem,
|
Subsystem: KubeletSubsystem,
|
||||||
Name: PodWorkerLatencyKey,
|
Name: PodWorkerDurationKey,
|
||||||
Help: "Latency in microseconds to sync a single pod. Broken down by operation type: create, update, or sync",
|
Help: "Duration in seconds to sync a single pod. Broken down by operation type: create, update, or sync",
|
||||||
|
Buckets: prometheus.DefBuckets,
|
||||||
},
|
},
|
||||||
[]string{"operation_type"},
|
[]string{"operation_type"},
|
||||||
)
|
)
|
||||||
PodStartLatency = prometheus.NewSummary(
|
PodStartDuration = prometheus.NewHistogram(
|
||||||
prometheus.SummaryOpts{
|
prometheus.HistogramOpts{
|
||||||
Subsystem: KubeletSubsystem,
|
Subsystem: KubeletSubsystem,
|
||||||
Name: PodStartLatencyKey,
|
Name: PodStartDurationKey,
|
||||||
Help: "Latency in microseconds for a single pod to go from pending to running.",
|
Help: "Duration in seconds for a single pod to go from pending to running.",
|
||||||
|
Buckets: prometheus.DefBuckets,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
CgroupManagerLatency = prometheus.NewSummaryVec(
|
CgroupManagerDuration = prometheus.NewHistogramVec(
|
||||||
prometheus.SummaryOpts{
|
prometheus.HistogramOpts{
|
||||||
Subsystem: KubeletSubsystem,
|
Subsystem: KubeletSubsystem,
|
||||||
Name: CgroupManagerOperationsKey,
|
Name: CgroupManagerOperationsKey,
|
||||||
Help: "Latency in microseconds for cgroup manager operations. Broken down by method.",
|
Help: "Duration in seconds for cgroup manager operations. Broken down by method.",
|
||||||
|
Buckets: prometheus.DefBuckets,
|
||||||
},
|
},
|
||||||
[]string{"operation_type"},
|
[]string{"operation_type"},
|
||||||
)
|
)
|
||||||
PodWorkerStartLatency = prometheus.NewSummary(
|
PodWorkerStartDuration = prometheus.NewHistogram(
|
||||||
prometheus.SummaryOpts{
|
prometheus.HistogramOpts{
|
||||||
Subsystem: KubeletSubsystem,
|
Subsystem: KubeletSubsystem,
|
||||||
Name: PodWorkerStartLatencyKey,
|
Name: PodWorkerStartDurationKey,
|
||||||
Help: "Latency in microseconds from seeing a pod to starting a worker.",
|
Help: "Duration in seconds from seeing a pod to starting a worker.",
|
||||||
|
Buckets: prometheus.DefBuckets,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
PLEGRelistLatency = prometheus.NewSummary(
|
PLEGRelistDuration = prometheus.NewHistogram(
|
||||||
prometheus.SummaryOpts{
|
prometheus.HistogramOpts{
|
||||||
Subsystem: KubeletSubsystem,
|
Subsystem: KubeletSubsystem,
|
||||||
Name: PLEGRelistLatencyKey,
|
Name: PLEGRelistDurationKey,
|
||||||
Help: "Latency in microseconds for relisting pods in PLEG.",
|
Help: "Duration in seconds for relisting pods in PLEG.",
|
||||||
|
Buckets: prometheus.DefBuckets,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
PLEGDiscardEvents = prometheus.NewCounterVec(
|
PLEGDiscardEvents = prometheus.NewCounterVec(
|
||||||
@ -133,11 +151,12 @@ var (
|
|||||||
},
|
},
|
||||||
[]string{},
|
[]string{},
|
||||||
)
|
)
|
||||||
PLEGRelistInterval = prometheus.NewSummary(
|
PLEGRelistInterval = prometheus.NewHistogram(
|
||||||
prometheus.SummaryOpts{
|
prometheus.HistogramOpts{
|
||||||
Subsystem: KubeletSubsystem,
|
Subsystem: KubeletSubsystem,
|
||||||
Name: PLEGRelistIntervalKey,
|
Name: PLEGRelistIntervalKey,
|
||||||
Help: "Interval in microseconds between relisting in PLEG.",
|
Help: "Interval in seconds between relisting in PLEG.",
|
||||||
|
Buckets: prometheus.DefBuckets,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
// Metrics of remote runtime operations.
|
// Metrics of remote runtime operations.
|
||||||
@ -149,11 +168,12 @@ var (
|
|||||||
},
|
},
|
||||||
[]string{"operation_type"},
|
[]string{"operation_type"},
|
||||||
)
|
)
|
||||||
RuntimeOperationsLatency = prometheus.NewSummaryVec(
|
RuntimeOperationsDuration = prometheus.NewHistogramVec(
|
||||||
prometheus.SummaryOpts{
|
prometheus.HistogramOpts{
|
||||||
Subsystem: KubeletSubsystem,
|
Subsystem: KubeletSubsystem,
|
||||||
Name: RuntimeOperationsLatencyKey,
|
Name: RuntimeOperationsDurationKey,
|
||||||
Help: "Latency in microseconds of runtime operations. Broken down by operation type.",
|
Help: "Duration in seconds of runtime operations. Broken down by operation type.",
|
||||||
|
Buckets: prometheus.DefBuckets,
|
||||||
},
|
},
|
||||||
[]string{"operation_type"},
|
[]string{"operation_type"},
|
||||||
)
|
)
|
||||||
@ -165,11 +185,12 @@ var (
|
|||||||
},
|
},
|
||||||
[]string{"operation_type"},
|
[]string{"operation_type"},
|
||||||
)
|
)
|
||||||
EvictionStatsAge = prometheus.NewSummaryVec(
|
EvictionStatsAge = prometheus.NewHistogramVec(
|
||||||
prometheus.SummaryOpts{
|
prometheus.HistogramOpts{
|
||||||
Subsystem: KubeletSubsystem,
|
Subsystem: KubeletSubsystem,
|
||||||
Name: EvictionStatsAgeKey,
|
Name: EvictionStatsAgeKey,
|
||||||
Help: "Time between when stats are collected, and when pod is evicted based on those stats by eviction signal",
|
Help: "Time between when stats are collected, and when pod is evicted based on those stats by eviction signal",
|
||||||
|
Buckets: prometheus.DefBuckets,
|
||||||
},
|
},
|
||||||
[]string{"eviction_signal"},
|
[]string{"eviction_signal"},
|
||||||
)
|
)
|
||||||
@ -181,11 +202,105 @@ var (
|
|||||||
},
|
},
|
||||||
[]string{"resource_name"},
|
[]string{"resource_name"},
|
||||||
)
|
)
|
||||||
DevicePluginAllocationLatency = prometheus.NewSummaryVec(
|
DevicePluginAllocationDuration = prometheus.NewHistogramVec(
|
||||||
|
prometheus.HistogramOpts{
|
||||||
|
Subsystem: KubeletSubsystem,
|
||||||
|
Name: DevicePluginAllocationDurationKey,
|
||||||
|
Help: "Duration in seconds to serve a device plugin Allocation request. Broken down by resource name.",
|
||||||
|
Buckets: prometheus.DefBuckets,
|
||||||
|
},
|
||||||
|
[]string{"resource_name"},
|
||||||
|
)
|
||||||
|
|
||||||
|
DeprecatedPodWorkerLatency = prometheus.NewSummaryVec(
|
||||||
prometheus.SummaryOpts{
|
prometheus.SummaryOpts{
|
||||||
Subsystem: KubeletSubsystem,
|
Subsystem: KubeletSubsystem,
|
||||||
Name: DevicePluginAllocationLatencyKey,
|
Name: DeprecatedPodWorkerLatencyKey,
|
||||||
Help: "Latency in microseconds to serve a device plugin Allocation request. Broken down by resource name.",
|
Help: "(Deprecated) Latency in microseconds to sync a single pod. Broken down by operation type: create, update, or sync",
|
||||||
|
},
|
||||||
|
[]string{"operation_type"},
|
||||||
|
)
|
||||||
|
DeprecatedPodStartLatency = prometheus.NewSummary(
|
||||||
|
prometheus.SummaryOpts{
|
||||||
|
Subsystem: KubeletSubsystem,
|
||||||
|
Name: DeprecatedPodStartLatencyKey,
|
||||||
|
Help: "(Deprecated) Latency in microseconds for a single pod to go from pending to running.",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
DeprecatedCgroupManagerLatency = prometheus.NewSummaryVec(
|
||||||
|
prometheus.SummaryOpts{
|
||||||
|
Subsystem: KubeletSubsystem,
|
||||||
|
Name: DeprecatedCgroupManagerOperationsKey,
|
||||||
|
Help: "(Deprecated) Latency in microseconds for cgroup manager operations. Broken down by method.",
|
||||||
|
},
|
||||||
|
[]string{"operation_type"},
|
||||||
|
)
|
||||||
|
DeprecatedPodWorkerStartLatency = prometheus.NewSummary(
|
||||||
|
prometheus.SummaryOpts{
|
||||||
|
Subsystem: KubeletSubsystem,
|
||||||
|
Name: DeprecatedPodWorkerStartLatencyKey,
|
||||||
|
Help: "(Deprecated) Latency in microseconds from seeing a pod to starting a worker.",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
DeprecatedPLEGRelistLatency = prometheus.NewSummary(
|
||||||
|
prometheus.SummaryOpts{
|
||||||
|
Subsystem: KubeletSubsystem,
|
||||||
|
Name: DeprecatedPLEGRelistLatencyKey,
|
||||||
|
Help: "(Deprecated) Latency in microseconds for relisting pods in PLEG.",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
DeprecatedPLEGRelistInterval = prometheus.NewSummary(
|
||||||
|
prometheus.SummaryOpts{
|
||||||
|
Subsystem: KubeletSubsystem,
|
||||||
|
Name: DeprecatedPLEGRelistIntervalKey,
|
||||||
|
Help: "(Deprecated) Interval in microseconds between relisting in PLEG.",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
DeprecatedRuntimeOperations = prometheus.NewCounterVec(
|
||||||
|
prometheus.CounterOpts{
|
||||||
|
Subsystem: KubeletSubsystem,
|
||||||
|
Name: DeprecatedRuntimeOperationsKey,
|
||||||
|
Help: "(Deprecated) Cumulative number of runtime operations by operation type.",
|
||||||
|
},
|
||||||
|
[]string{"operation_type"},
|
||||||
|
)
|
||||||
|
DeprecatedRuntimeOperationsLatency = prometheus.NewSummaryVec(
|
||||||
|
prometheus.SummaryOpts{
|
||||||
|
Subsystem: KubeletSubsystem,
|
||||||
|
Name: DeprecatedRuntimeOperationsLatencyKey,
|
||||||
|
Help: "(Deprecated) Latency in microseconds of runtime operations. Broken down by operation type.",
|
||||||
|
},
|
||||||
|
[]string{"operation_type"},
|
||||||
|
)
|
||||||
|
DeprecatedRuntimeOperationsErrors = prometheus.NewCounterVec(
|
||||||
|
prometheus.CounterOpts{
|
||||||
|
Subsystem: KubeletSubsystem,
|
||||||
|
Name: DeprecatedRuntimeOperationsErrorsKey,
|
||||||
|
Help: "(Deprecated) Cumulative number of runtime operation errors by operation type.",
|
||||||
|
},
|
||||||
|
[]string{"operation_type"},
|
||||||
|
)
|
||||||
|
DeprecatedEvictionStatsAge = prometheus.NewSummaryVec(
|
||||||
|
prometheus.SummaryOpts{
|
||||||
|
Subsystem: KubeletSubsystem,
|
||||||
|
Name: DeprecatedEvictionStatsAgeKey,
|
||||||
|
Help: "(Deprecated) Time between when stats are collected, and when pod is evicted based on those stats by eviction signal",
|
||||||
|
},
|
||||||
|
[]string{"eviction_signal"},
|
||||||
|
)
|
||||||
|
DeprecatedDevicePluginRegistrationCount = prometheus.NewCounterVec(
|
||||||
|
prometheus.CounterOpts{
|
||||||
|
Subsystem: KubeletSubsystem,
|
||||||
|
Name: DeprecatedDevicePluginRegistrationCountKey,
|
||||||
|
Help: "(Deprecated) Cumulative number of device plugin registrations. Broken down by resource name.",
|
||||||
|
},
|
||||||
|
[]string{"resource_name"},
|
||||||
|
)
|
||||||
|
DeprecatedDevicePluginAllocationLatency = prometheus.NewSummaryVec(
|
||||||
|
prometheus.SummaryOpts{
|
||||||
|
Subsystem: KubeletSubsystem,
|
||||||
|
Name: DeprecatedDevicePluginAllocationLatencyKey,
|
||||||
|
Help: "(Deprecated) Latency in microseconds to serve a device plugin Allocation request. Broken down by resource name.",
|
||||||
},
|
},
|
||||||
[]string{"resource_name"},
|
[]string{"resource_name"},
|
||||||
)
|
)
|
||||||
@ -248,21 +363,33 @@ func Register(containerCache kubecontainer.RuntimeCache, collectors ...prometheu
|
|||||||
// Register the metrics.
|
// Register the metrics.
|
||||||
registerMetrics.Do(func() {
|
registerMetrics.Do(func() {
|
||||||
prometheus.MustRegister(NodeName)
|
prometheus.MustRegister(NodeName)
|
||||||
prometheus.MustRegister(PodWorkerLatency)
|
prometheus.MustRegister(PodWorkerDuration)
|
||||||
prometheus.MustRegister(PodStartLatency)
|
prometheus.MustRegister(PodStartDuration)
|
||||||
prometheus.MustRegister(CgroupManagerLatency)
|
prometheus.MustRegister(CgroupManagerDuration)
|
||||||
prometheus.MustRegister(PodWorkerStartLatency)
|
prometheus.MustRegister(PodWorkerStartDuration)
|
||||||
prometheus.MustRegister(ContainersPerPodCount)
|
prometheus.MustRegister(ContainersPerPodCount)
|
||||||
prometheus.MustRegister(newPodAndContainerCollector(containerCache))
|
prometheus.MustRegister(newPodAndContainerCollector(containerCache))
|
||||||
prometheus.MustRegister(PLEGRelistLatency)
|
prometheus.MustRegister(PLEGRelistDuration)
|
||||||
prometheus.MustRegister(PLEGDiscardEvents)
|
prometheus.MustRegister(PLEGDiscardEvents)
|
||||||
prometheus.MustRegister(PLEGRelistInterval)
|
prometheus.MustRegister(PLEGRelistInterval)
|
||||||
prometheus.MustRegister(RuntimeOperations)
|
prometheus.MustRegister(RuntimeOperations)
|
||||||
prometheus.MustRegister(RuntimeOperationsLatency)
|
prometheus.MustRegister(RuntimeOperationsDuration)
|
||||||
prometheus.MustRegister(RuntimeOperationsErrors)
|
prometheus.MustRegister(RuntimeOperationsErrors)
|
||||||
prometheus.MustRegister(EvictionStatsAge)
|
prometheus.MustRegister(EvictionStatsAge)
|
||||||
prometheus.MustRegister(DevicePluginRegistrationCount)
|
prometheus.MustRegister(DevicePluginRegistrationCount)
|
||||||
prometheus.MustRegister(DevicePluginAllocationLatency)
|
prometheus.MustRegister(DevicePluginAllocationDuration)
|
||||||
|
prometheus.MustRegister(DeprecatedPodWorkerLatency)
|
||||||
|
prometheus.MustRegister(DeprecatedPodStartLatency)
|
||||||
|
prometheus.MustRegister(DeprecatedCgroupManagerLatency)
|
||||||
|
prometheus.MustRegister(DeprecatedPodWorkerStartLatency)
|
||||||
|
prometheus.MustRegister(DeprecatedPLEGRelistLatency)
|
||||||
|
prometheus.MustRegister(DeprecatedPLEGRelistInterval)
|
||||||
|
prometheus.MustRegister(DeprecatedRuntimeOperations)
|
||||||
|
prometheus.MustRegister(DeprecatedRuntimeOperationsLatency)
|
||||||
|
prometheus.MustRegister(DeprecatedRuntimeOperationsErrors)
|
||||||
|
prometheus.MustRegister(DeprecatedEvictionStatsAge)
|
||||||
|
prometheus.MustRegister(DeprecatedDevicePluginRegistrationCount)
|
||||||
|
prometheus.MustRegister(DeprecatedDevicePluginAllocationLatency)
|
||||||
if utilfeature.DefaultFeatureGate.Enabled(features.DynamicKubeletConfig) {
|
if utilfeature.DefaultFeatureGate.Enabled(features.DynamicKubeletConfig) {
|
||||||
prometheus.MustRegister(AssignedConfig)
|
prometheus.MustRegister(AssignedConfig)
|
||||||
prometheus.MustRegister(ActiveConfig)
|
prometheus.MustRegister(ActiveConfig)
|
||||||
@ -280,6 +407,11 @@ func SinceInMicroseconds(start time.Time) float64 {
|
|||||||
return float64(time.Since(start).Nanoseconds() / time.Microsecond.Nanoseconds())
|
return float64(time.Since(start).Nanoseconds() / time.Microsecond.Nanoseconds())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Gets the time since the specified start in seconds.
|
||||||
|
func SinceInSeconds(start time.Time) float64 {
|
||||||
|
return time.Since(start).Seconds()
|
||||||
|
}
|
||||||
|
|
||||||
func newPodAndContainerCollector(containerCache kubecontainer.RuntimeCache) *podAndContainerCollector {
|
func newPodAndContainerCollector(containerCache kubecontainer.RuntimeCache) *podAndContainerCollector {
|
||||||
return &podAndContainerCollector{
|
return &podAndContainerCollector{
|
||||||
containerCache: containerCache,
|
containerCache: containerCache,
|
||||||
|
@ -189,12 +189,14 @@ func (g *GenericPLEG) relist() {
|
|||||||
klog.V(5).Infof("GenericPLEG: Relisting")
|
klog.V(5).Infof("GenericPLEG: Relisting")
|
||||||
|
|
||||||
if lastRelistTime := g.getRelistTime(); !lastRelistTime.IsZero() {
|
if lastRelistTime := g.getRelistTime(); !lastRelistTime.IsZero() {
|
||||||
metrics.PLEGRelistInterval.Observe(metrics.SinceInMicroseconds(lastRelistTime))
|
metrics.PLEGRelistInterval.Observe(metrics.SinceInSeconds(lastRelistTime))
|
||||||
|
metrics.DeprecatedPLEGRelistInterval.Observe(metrics.SinceInMicroseconds(lastRelistTime))
|
||||||
}
|
}
|
||||||
|
|
||||||
timestamp := g.clock.Now()
|
timestamp := g.clock.Now()
|
||||||
defer func() {
|
defer func() {
|
||||||
metrics.PLEGRelistLatency.Observe(metrics.SinceInMicroseconds(timestamp))
|
metrics.PLEGRelistDuration.Observe(metrics.SinceInSeconds(timestamp))
|
||||||
|
metrics.DeprecatedPLEGRelistLatency.Observe(metrics.SinceInMicroseconds(timestamp))
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// Get all the pods.
|
// Get all the pods.
|
||||||
|
@ -102,13 +102,13 @@ func getKubeletMetrics(c clientset.Interface, nodeName string) (metrics.KubeletM
|
|||||||
// Note that the KubeletMetrics passed in should not contain subsystem prefix.
|
// Note that the KubeletMetrics passed in should not contain subsystem prefix.
|
||||||
func GetDefaultKubeletLatencyMetrics(ms metrics.KubeletMetrics) KubeletLatencyMetrics {
|
func GetDefaultKubeletLatencyMetrics(ms metrics.KubeletMetrics) KubeletLatencyMetrics {
|
||||||
latencyMetricNames := sets.NewString(
|
latencyMetricNames := sets.NewString(
|
||||||
kubeletmetrics.PodWorkerLatencyKey,
|
kubeletmetrics.PodWorkerDurationKey,
|
||||||
kubeletmetrics.PodWorkerStartLatencyKey,
|
kubeletmetrics.PodWorkerStartDurationKey,
|
||||||
kubeletmetrics.PodStartLatencyKey,
|
kubeletmetrics.PodStartDurationKey,
|
||||||
kubeletmetrics.CgroupManagerOperationsKey,
|
kubeletmetrics.CgroupManagerOperationsKey,
|
||||||
dockermetrics.DockerOperationsLatencyKey,
|
dockermetrics.DockerOperationsLatencyKey,
|
||||||
kubeletmetrics.PodWorkerStartLatencyKey,
|
kubeletmetrics.PodWorkerStartDurationKey,
|
||||||
kubeletmetrics.PLEGRelistLatencyKey,
|
kubeletmetrics.PLEGRelistDurationKey,
|
||||||
)
|
)
|
||||||
return GetKubeletLatencyMetrics(ms, latencyMetricNames)
|
return GetKubeletLatencyMetrics(ms, latencyMetricNames)
|
||||||
}
|
}
|
||||||
|
@ -168,9 +168,9 @@ var InterestingKubeletMetrics = []string{
|
|||||||
"kubelet_docker_errors",
|
"kubelet_docker_errors",
|
||||||
"kubelet_docker_operations_latency_seconds",
|
"kubelet_docker_operations_latency_seconds",
|
||||||
"kubelet_generate_pod_status_latency_microseconds",
|
"kubelet_generate_pod_status_latency_microseconds",
|
||||||
"kubelet_pod_start_latency_microseconds",
|
"kubelet_pod_start_duration_seconds",
|
||||||
"kubelet_pod_worker_latency_microseconds",
|
"kubelet_pod_worker_duration_seconds",
|
||||||
"kubelet_pod_worker_start_latency_microseconds",
|
"kubelet_pod_worker_start_duration_seconds",
|
||||||
"kubelet_sync_pods_latency_microseconds",
|
"kubelet_sync_pods_latency_microseconds",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -459,12 +459,12 @@ func getPodStartLatency(node string) (framework.KubeletLatencyMetrics, error) {
|
|||||||
|
|
||||||
for _, samples := range ms {
|
for _, samples := range ms {
|
||||||
for _, sample := range samples {
|
for _, sample := range samples {
|
||||||
if sample.Metric["__name__"] == kubemetrics.KubeletSubsystem+"_"+kubemetrics.PodStartLatencyKey {
|
if sample.Metric["__name__"] == kubemetrics.KubeletSubsystem+"_"+kubemetrics.PodStartDurationKey {
|
||||||
quantile, _ := strconv.ParseFloat(string(sample.Metric["quantile"]), 64)
|
quantile, _ := strconv.ParseFloat(string(sample.Metric["quantile"]), 64)
|
||||||
latencyMetrics = append(latencyMetrics,
|
latencyMetrics = append(latencyMetrics,
|
||||||
framework.KubeletLatencyMetric{
|
framework.KubeletLatencyMetric{
|
||||||
Quantile: quantile,
|
Quantile: quantile,
|
||||||
Method: kubemetrics.PodStartLatencyKey,
|
Method: kubemetrics.PodStartDurationKey,
|
||||||
Latency: time.Duration(int(sample.Value)) * time.Microsecond})
|
Latency: time.Duration(int(sample.Value)) * time.Microsecond})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -156,7 +156,7 @@ func logDevicePluginMetrics() {
|
|||||||
framework.ExpectNoError(err)
|
framework.ExpectNoError(err)
|
||||||
for msKey, samples := range ms {
|
for msKey, samples := range ms {
|
||||||
switch msKey {
|
switch msKey {
|
||||||
case kubeletmetrics.KubeletSubsystem + "_" + kubeletmetrics.DevicePluginAllocationLatencyKey:
|
case kubeletmetrics.KubeletSubsystem + "_" + kubeletmetrics.DevicePluginAllocationDurationKey:
|
||||||
for _, sample := range samples {
|
for _, sample := range samples {
|
||||||
latency := sample.Value
|
latency := sample.Value
|
||||||
resource := string(sample.Metric["resource_name"])
|
resource := string(sample.Metric["resource_name"])
|
||||||
|
Loading…
Reference in New Issue
Block a user