mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 11:50:44 +00:00
Merge pull request #99000 from verb/1.21-kubelet-metrics
Add kubelet metrics for ephemeral containers
This commit is contained in:
commit
7c84064a4f
@ -48,6 +48,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/images"
|
||||
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
||||
"k8s.io/kubernetes/pkg/kubelet/logs"
|
||||
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results"
|
||||
"k8s.io/kubernetes/pkg/kubelet/runtimeclass"
|
||||
"k8s.io/kubernetes/pkg/kubelet/types"
|
||||
@ -784,6 +785,7 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine
|
||||
var err error
|
||||
|
||||
klog.V(4).InfoS("Creating PodSandbox for pod", "pod", klog.KObj(pod))
|
||||
metrics.StartedPodsTotal.Inc()
|
||||
createSandboxResult := kubecontainer.NewSyncResult(kubecontainer.CreatePodSandbox, format.Pod(pod))
|
||||
result.AddSyncResult(createSandboxResult)
|
||||
podSandboxID, msg, err = m.createPodSandbox(pod, podContainerChanges.Attempt)
|
||||
@ -796,6 +798,7 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine
|
||||
klog.V(4).InfoS("Pod was deleted and sandbox failed to be created", "pod", klog.KObj(pod), "podUID", pod.UID)
|
||||
return
|
||||
}
|
||||
metrics.StartedPodsErrorsTotal.WithLabelValues(err.Error()).Inc()
|
||||
createSandboxResult.Fail(kubecontainer.ErrCreatePodSandbox, msg)
|
||||
klog.ErrorS(err, "CreatePodSandbox for pod failed", "pod", klog.KObj(pod))
|
||||
ref, referr := ref.GetReference(legacyscheme.Scheme, pod)
|
||||
@ -848,9 +851,11 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine
|
||||
}
|
||||
|
||||
// Helper containing boilerplate common to starting all types of containers.
|
||||
// typeName is a label used to describe this type of container in log messages,
|
||||
// typeName is a description used to describe this type of container in log messages,
|
||||
// currently: "container", "init container" or "ephemeral container"
|
||||
start := func(typeName string, spec *startSpec) error {
|
||||
// metricLabel is the label used to describe this type of container in monitoring metrics.
|
||||
// currently: "container", "init_container" or "ephemeral_container"
|
||||
start := func(typeName, metricLabel string, spec *startSpec) error {
|
||||
startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, spec.container.Name)
|
||||
result.AddSyncResult(startContainerResult)
|
||||
|
||||
@ -861,9 +866,13 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine
|
||||
return err
|
||||
}
|
||||
|
||||
metrics.StartedContainersTotal.WithLabelValues(metricLabel).Inc()
|
||||
klog.V(4).InfoS("Creating container in pod", "containerType", typeName, "container", spec.container, "pod", klog.KObj(pod))
|
||||
// NOTE (aramase) podIPs are populated for single stack and dual stack clusters. Send only podIPs.
|
||||
if msg, err := m.startContainer(podSandboxID, podSandboxConfig, spec, pod, podStatus, pullSecrets, podIP, podIPs); err != nil {
|
||||
// startContainer() returns well-defined error codes that have reasonable cardinality for metrics and are
|
||||
// useful to cluster administrators to distinguish "server errors" from "user errors".
|
||||
metrics.StartedContainersErrorsTotal.WithLabelValues(metricLabel, err.Error()).Inc()
|
||||
startContainerResult.Fail(err, msg)
|
||||
// known errors that are logged in other places are logged at higher levels here to avoid
|
||||
// repetitive log spam
|
||||
@ -885,14 +894,14 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine
|
||||
// containers cannot be specified on pod creation.
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.EphemeralContainers) {
|
||||
for _, idx := range podContainerChanges.EphemeralContainersToStart {
|
||||
start("ephemeral container", ephemeralContainerStartSpec(&pod.Spec.EphemeralContainers[idx]))
|
||||
start("ephemeral container", metrics.EphemeralContainer, ephemeralContainerStartSpec(&pod.Spec.EphemeralContainers[idx]))
|
||||
}
|
||||
}
|
||||
|
||||
// Step 6: start the init container.
|
||||
if container := podContainerChanges.NextInitContainerToStart; container != nil {
|
||||
// Start the next init container.
|
||||
if err := start("init container", containerStartSpec(container)); err != nil {
|
||||
if err := start("init container", metrics.InitContainer, containerStartSpec(container)); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
@ -902,7 +911,7 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine
|
||||
|
||||
// Step 7: start containers in podContainerChanges.ContainersToStart.
|
||||
for _, idx := range podContainerChanges.ContainersToStart {
|
||||
start("container", containerStartSpec(&pod.Spec.Containers[idx]))
|
||||
start("container", metrics.Container, containerStartSpec(&pod.Spec.Containers[idx]))
|
||||
}
|
||||
|
||||
return
|
||||
|
@ -83,6 +83,20 @@ const (
|
||||
// Metrics keys for RuntimeClass
|
||||
RunPodSandboxDurationKey = "run_podsandbox_duration_seconds"
|
||||
RunPodSandboxErrorsKey = "run_podsandbox_errors_total"
|
||||
|
||||
// Metrics to keep track of total number of Pods and Containers started
|
||||
StartedPodsTotalKey = "started_pods_total"
|
||||
StartedPodsErrorsTotalKey = "started_pods_errors_total"
|
||||
StartedContainersTotalKey = "started_containers_total"
|
||||
StartedContainersErrorsTotalKey = "started_containers_errors_total"
|
||||
|
||||
// Metrics to track ephemeral container usage by this kubelet
|
||||
ManagedEphemeralContainersKey = "managed_ephemeral_containers"
|
||||
|
||||
// Values used in metric labels
|
||||
Container = "container"
|
||||
InitContainer = "init_container"
|
||||
EphemeralContainer = "ephemeral_container"
|
||||
)
|
||||
|
||||
var (
|
||||
@ -436,6 +450,54 @@ var (
|
||||
},
|
||||
[]string{"container_state"},
|
||||
)
|
||||
// StartedPodsTotal is a counter that tracks pod sandbox creation operations
|
||||
StartedPodsTotal = metrics.NewCounter(
|
||||
&metrics.CounterOpts{
|
||||
Subsystem: KubeletSubsystem,
|
||||
Name: StartedPodsTotalKey,
|
||||
Help: "Cumulative number of pods started",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
)
|
||||
// StartedPodsErrorsTotal is a counter that tracks the number of errors creating pod sandboxes
|
||||
StartedPodsErrorsTotal = metrics.NewCounterVec(
|
||||
&metrics.CounterOpts{
|
||||
Subsystem: KubeletSubsystem,
|
||||
Name: StartedPodsErrorsTotalKey,
|
||||
Help: "Cumulative number of errors when starting pods",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{"message"},
|
||||
)
|
||||
// StartedContainersTotal is a counter that tracks the number of container creation operations
|
||||
StartedContainersTotal = metrics.NewCounterVec(
|
||||
&metrics.CounterOpts{
|
||||
Subsystem: KubeletSubsystem,
|
||||
Name: StartedContainersTotalKey,
|
||||
Help: "Cumulative number of containers started",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{"container_type"},
|
||||
)
|
||||
// StartedContainersTotal is a counter that tracks the number of errors creating containers
|
||||
StartedContainersErrorsTotal = metrics.NewCounterVec(
|
||||
&metrics.CounterOpts{
|
||||
Subsystem: KubeletSubsystem,
|
||||
Name: StartedContainersErrorsTotalKey,
|
||||
Help: "Cumulative number of errors when starting containers",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{"container_type", "code"},
|
||||
)
|
||||
// ManagedEphemeralContainers is a gauge that indicates how many ephemeral containers are managed by this kubelet.
|
||||
ManagedEphemeralContainers = metrics.NewGauge(
|
||||
&metrics.GaugeOpts{
|
||||
Subsystem: KubeletSubsystem,
|
||||
Name: ManagedEphemeralContainersKey,
|
||||
Help: "Current number of ephemeral containers in pods managed by this kubelet. Ephemeral containers will be ignored if disabled by the EphemeralContainers feature gate, and this number will be 0.",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
var registerMetrics sync.Once
|
||||
@ -464,6 +526,11 @@ func Register(collectors ...metrics.StableCollector) {
|
||||
legacyregistry.MustRegister(DevicePluginAllocationDuration)
|
||||
legacyregistry.MustRegister(RunningContainerCount)
|
||||
legacyregistry.MustRegister(RunningPodCount)
|
||||
legacyregistry.MustRegister(ManagedEphemeralContainers)
|
||||
legacyregistry.MustRegister(StartedPodsTotal)
|
||||
legacyregistry.MustRegister(StartedPodsErrorsTotal)
|
||||
legacyregistry.MustRegister(StartedContainersTotal)
|
||||
legacyregistry.MustRegister(StartedContainersErrorsTotal)
|
||||
legacyregistry.MustRegister(RunPodSandboxDuration)
|
||||
legacyregistry.MustRegister(RunPodSandboxErrors)
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.DynamicKubeletConfig) {
|
||||
|
@ -19,10 +19,13 @@ package pod
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/pkg/kubelet/configmap"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
"k8s.io/kubernetes/pkg/kubelet/secret"
|
||||
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
|
||||
)
|
||||
@ -159,6 +162,25 @@ func isPodInTerminatedState(pod *v1.Pod) bool {
|
||||
return pod.Status.Phase == v1.PodFailed || pod.Status.Phase == v1.PodSucceeded
|
||||
}
|
||||
|
||||
// updateMetrics updates the metrics surfaced by the pod manager.
|
||||
// oldPod or newPod may be nil to signify creation or deletion.
|
||||
func updateMetrics(oldPod, newPod *v1.Pod) {
|
||||
if !utilfeature.DefaultFeatureGate.Enabled(features.EphemeralContainers) {
|
||||
return
|
||||
}
|
||||
|
||||
var numEC int
|
||||
if oldPod != nil {
|
||||
numEC -= len(oldPod.Spec.EphemeralContainers)
|
||||
}
|
||||
if newPod != nil {
|
||||
numEC += len(newPod.Spec.EphemeralContainers)
|
||||
}
|
||||
if numEC != 0 {
|
||||
metrics.ManagedEphemeralContainers.Add(float64(numEC))
|
||||
}
|
||||
}
|
||||
|
||||
// updatePodsInternal replaces the given pods in the current state of the
|
||||
// manager, updating the various indices. The caller is assumed to hold the
|
||||
// lock.
|
||||
@ -202,6 +224,7 @@ func (pm *basicManager) updatePodsInternal(pods ...*v1.Pod) {
|
||||
}
|
||||
} else {
|
||||
resolvedPodUID := kubetypes.ResolvedPodUID(pod.UID)
|
||||
updateMetrics(pm.podByUID[resolvedPodUID], pod)
|
||||
pm.podByUID[resolvedPodUID] = pod
|
||||
pm.podByFullName[podFullName] = pod
|
||||
if mirror, ok := pm.mirrorPodByFullName[podFullName]; ok {
|
||||
@ -212,6 +235,7 @@ func (pm *basicManager) updatePodsInternal(pods ...*v1.Pod) {
|
||||
}
|
||||
|
||||
func (pm *basicManager) DeletePod(pod *v1.Pod) {
|
||||
updateMetrics(pod, nil)
|
||||
pm.lock.Lock()
|
||||
defer pm.lock.Unlock()
|
||||
if pm.secretManager != nil {
|
||||
|
Loading…
Reference in New Issue
Block a user