Merge pull request #99000 from verb/1.21-kubelet-metrics

Add kubelet metrics for ephemeral containers
This commit is contained in:
Kubernetes Prow Robot 2021-07-08 14:00:55 -07:00 committed by GitHub
commit 7c84064a4f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 106 additions and 6 deletions

View File

@ -48,6 +48,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/images" "k8s.io/kubernetes/pkg/kubelet/images"
"k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/pkg/kubelet/lifecycle"
"k8s.io/kubernetes/pkg/kubelet/logs" "k8s.io/kubernetes/pkg/kubelet/logs"
"k8s.io/kubernetes/pkg/kubelet/metrics"
proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results" proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results"
"k8s.io/kubernetes/pkg/kubelet/runtimeclass" "k8s.io/kubernetes/pkg/kubelet/runtimeclass"
"k8s.io/kubernetes/pkg/kubelet/types" "k8s.io/kubernetes/pkg/kubelet/types"
@ -784,6 +785,7 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine
var err error var err error
klog.V(4).InfoS("Creating PodSandbox for pod", "pod", klog.KObj(pod)) klog.V(4).InfoS("Creating PodSandbox for pod", "pod", klog.KObj(pod))
metrics.StartedPodsTotal.Inc()
createSandboxResult := kubecontainer.NewSyncResult(kubecontainer.CreatePodSandbox, format.Pod(pod)) createSandboxResult := kubecontainer.NewSyncResult(kubecontainer.CreatePodSandbox, format.Pod(pod))
result.AddSyncResult(createSandboxResult) result.AddSyncResult(createSandboxResult)
podSandboxID, msg, err = m.createPodSandbox(pod, podContainerChanges.Attempt) podSandboxID, msg, err = m.createPodSandbox(pod, podContainerChanges.Attempt)
@ -796,6 +798,7 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine
klog.V(4).InfoS("Pod was deleted and sandbox failed to be created", "pod", klog.KObj(pod), "podUID", pod.UID) klog.V(4).InfoS("Pod was deleted and sandbox failed to be created", "pod", klog.KObj(pod), "podUID", pod.UID)
return return
} }
metrics.StartedPodsErrorsTotal.WithLabelValues(err.Error()).Inc()
createSandboxResult.Fail(kubecontainer.ErrCreatePodSandbox, msg) createSandboxResult.Fail(kubecontainer.ErrCreatePodSandbox, msg)
klog.ErrorS(err, "CreatePodSandbox for pod failed", "pod", klog.KObj(pod)) klog.ErrorS(err, "CreatePodSandbox for pod failed", "pod", klog.KObj(pod))
ref, referr := ref.GetReference(legacyscheme.Scheme, pod) ref, referr := ref.GetReference(legacyscheme.Scheme, pod)
@ -848,9 +851,11 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine
} }
// Helper containing boilerplate common to starting all types of containers. // Helper containing boilerplate common to starting all types of containers.
// typeName is a label used to describe this type of container in log messages, // typeName is a description used to describe this type of container in log messages,
// currently: "container", "init container" or "ephemeral container" // currently: "container", "init container" or "ephemeral container"
start := func(typeName string, spec *startSpec) error { // metricLabel is the label used to describe this type of container in monitoring metrics.
// currently: "container", "init_container" or "ephemeral_container"
start := func(typeName, metricLabel string, spec *startSpec) error {
startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, spec.container.Name) startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, spec.container.Name)
result.AddSyncResult(startContainerResult) result.AddSyncResult(startContainerResult)
@ -861,9 +866,13 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine
return err return err
} }
metrics.StartedContainersTotal.WithLabelValues(metricLabel).Inc()
klog.V(4).InfoS("Creating container in pod", "containerType", typeName, "container", spec.container, "pod", klog.KObj(pod)) klog.V(4).InfoS("Creating container in pod", "containerType", typeName, "container", spec.container, "pod", klog.KObj(pod))
// NOTE (aramase) podIPs are populated for single stack and dual stack clusters. Send only podIPs. // NOTE (aramase) podIPs are populated for single stack and dual stack clusters. Send only podIPs.
if msg, err := m.startContainer(podSandboxID, podSandboxConfig, spec, pod, podStatus, pullSecrets, podIP, podIPs); err != nil { if msg, err := m.startContainer(podSandboxID, podSandboxConfig, spec, pod, podStatus, pullSecrets, podIP, podIPs); err != nil {
// startContainer() returns well-defined error codes that have reasonable cardinality for metrics and are
// useful to cluster administrators to distinguish "server errors" from "user errors".
metrics.StartedContainersErrorsTotal.WithLabelValues(metricLabel, err.Error()).Inc()
startContainerResult.Fail(err, msg) startContainerResult.Fail(err, msg)
// known errors that are logged in other places are logged at higher levels here to avoid // known errors that are logged in other places are logged at higher levels here to avoid
// repetitive log spam // repetitive log spam
@ -885,14 +894,14 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine
// containers cannot be specified on pod creation. // containers cannot be specified on pod creation.
if utilfeature.DefaultFeatureGate.Enabled(features.EphemeralContainers) { if utilfeature.DefaultFeatureGate.Enabled(features.EphemeralContainers) {
for _, idx := range podContainerChanges.EphemeralContainersToStart { for _, idx := range podContainerChanges.EphemeralContainersToStart {
start("ephemeral container", ephemeralContainerStartSpec(&pod.Spec.EphemeralContainers[idx])) start("ephemeral container", metrics.EphemeralContainer, ephemeralContainerStartSpec(&pod.Spec.EphemeralContainers[idx]))
} }
} }
// Step 6: start the init container. // Step 6: start the init container.
if container := podContainerChanges.NextInitContainerToStart; container != nil { if container := podContainerChanges.NextInitContainerToStart; container != nil {
// Start the next init container. // Start the next init container.
if err := start("init container", containerStartSpec(container)); err != nil { if err := start("init container", metrics.InitContainer, containerStartSpec(container)); err != nil {
return return
} }
@ -902,7 +911,7 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine
// Step 7: start containers in podContainerChanges.ContainersToStart. // Step 7: start containers in podContainerChanges.ContainersToStart.
for _, idx := range podContainerChanges.ContainersToStart { for _, idx := range podContainerChanges.ContainersToStart {
start("container", containerStartSpec(&pod.Spec.Containers[idx])) start("container", metrics.Container, containerStartSpec(&pod.Spec.Containers[idx]))
} }
return return

View File

@ -83,6 +83,20 @@ const (
// Metrics keys for RuntimeClass // Metrics keys for RuntimeClass
RunPodSandboxDurationKey = "run_podsandbox_duration_seconds" RunPodSandboxDurationKey = "run_podsandbox_duration_seconds"
RunPodSandboxErrorsKey = "run_podsandbox_errors_total" RunPodSandboxErrorsKey = "run_podsandbox_errors_total"
// Metrics to keep track of total number of Pods and Containers started
StartedPodsTotalKey = "started_pods_total"
StartedPodsErrorsTotalKey = "started_pods_errors_total"
StartedContainersTotalKey = "started_containers_total"
StartedContainersErrorsTotalKey = "started_containers_errors_total"
// Metrics to track ephemeral container usage by this kubelet
ManagedEphemeralContainersKey = "managed_ephemeral_containers"
// Values used in metric labels
Container = "container"
InitContainer = "init_container"
EphemeralContainer = "ephemeral_container"
) )
var ( var (
@ -436,6 +450,54 @@ var (
}, },
[]string{"container_state"}, []string{"container_state"},
) )
// StartedPodsTotal is a counter that tracks pod sandbox creation operations
StartedPodsTotal = metrics.NewCounter(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: StartedPodsTotalKey,
Help: "Cumulative number of pods started",
StabilityLevel: metrics.ALPHA,
},
)
// StartedPodsErrorsTotal is a counter that tracks the number of errors creating pod sandboxes
StartedPodsErrorsTotal = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: StartedPodsErrorsTotalKey,
Help: "Cumulative number of errors when starting pods",
StabilityLevel: metrics.ALPHA,
},
[]string{"message"},
)
// StartedContainersTotal is a counter that tracks the number of container creation operations
StartedContainersTotal = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: StartedContainersTotalKey,
Help: "Cumulative number of containers started",
StabilityLevel: metrics.ALPHA,
},
[]string{"container_type"},
)
// StartedContainersTotal is a counter that tracks the number of errors creating containers
StartedContainersErrorsTotal = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: StartedContainersErrorsTotalKey,
Help: "Cumulative number of errors when starting containers",
StabilityLevel: metrics.ALPHA,
},
[]string{"container_type", "code"},
)
// ManagedEphemeralContainers is a gauge that indicates how many ephemeral containers are managed by this kubelet.
ManagedEphemeralContainers = metrics.NewGauge(
&metrics.GaugeOpts{
Subsystem: KubeletSubsystem,
Name: ManagedEphemeralContainersKey,
Help: "Current number of ephemeral containers in pods managed by this kubelet. Ephemeral containers will be ignored if disabled by the EphemeralContainers feature gate, and this number will be 0.",
StabilityLevel: metrics.ALPHA,
},
)
) )
var registerMetrics sync.Once var registerMetrics sync.Once
@ -464,6 +526,11 @@ func Register(collectors ...metrics.StableCollector) {
legacyregistry.MustRegister(DevicePluginAllocationDuration) legacyregistry.MustRegister(DevicePluginAllocationDuration)
legacyregistry.MustRegister(RunningContainerCount) legacyregistry.MustRegister(RunningContainerCount)
legacyregistry.MustRegister(RunningPodCount) legacyregistry.MustRegister(RunningPodCount)
legacyregistry.MustRegister(ManagedEphemeralContainers)
legacyregistry.MustRegister(StartedPodsTotal)
legacyregistry.MustRegister(StartedPodsErrorsTotal)
legacyregistry.MustRegister(StartedContainersTotal)
legacyregistry.MustRegister(StartedContainersErrorsTotal)
legacyregistry.MustRegister(RunPodSandboxDuration) legacyregistry.MustRegister(RunPodSandboxDuration)
legacyregistry.MustRegister(RunPodSandboxErrors) legacyregistry.MustRegister(RunPodSandboxErrors)
if utilfeature.DefaultFeatureGate.Enabled(features.DynamicKubeletConfig) { if utilfeature.DefaultFeatureGate.Enabled(features.DynamicKubeletConfig) {

View File

@ -19,10 +19,13 @@ package pod
import ( import (
"sync" "sync"
"k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/types"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/configmap" "k8s.io/kubernetes/pkg/kubelet/configmap"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/metrics"
"k8s.io/kubernetes/pkg/kubelet/secret" "k8s.io/kubernetes/pkg/kubelet/secret"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types" kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
) )
@ -159,6 +162,25 @@ func isPodInTerminatedState(pod *v1.Pod) bool {
return pod.Status.Phase == v1.PodFailed || pod.Status.Phase == v1.PodSucceeded return pod.Status.Phase == v1.PodFailed || pod.Status.Phase == v1.PodSucceeded
} }
// updateMetrics updates the metrics surfaced by the pod manager.
// oldPod or newPod may be nil to signify creation or deletion.
func updateMetrics(oldPod, newPod *v1.Pod) {
if !utilfeature.DefaultFeatureGate.Enabled(features.EphemeralContainers) {
return
}
var numEC int
if oldPod != nil {
numEC -= len(oldPod.Spec.EphemeralContainers)
}
if newPod != nil {
numEC += len(newPod.Spec.EphemeralContainers)
}
if numEC != 0 {
metrics.ManagedEphemeralContainers.Add(float64(numEC))
}
}
// updatePodsInternal replaces the given pods in the current state of the // updatePodsInternal replaces the given pods in the current state of the
// manager, updating the various indices. The caller is assumed to hold the // manager, updating the various indices. The caller is assumed to hold the
// lock. // lock.
@ -202,6 +224,7 @@ func (pm *basicManager) updatePodsInternal(pods ...*v1.Pod) {
} }
} else { } else {
resolvedPodUID := kubetypes.ResolvedPodUID(pod.UID) resolvedPodUID := kubetypes.ResolvedPodUID(pod.UID)
updateMetrics(pm.podByUID[resolvedPodUID], pod)
pm.podByUID[resolvedPodUID] = pod pm.podByUID[resolvedPodUID] = pod
pm.podByFullName[podFullName] = pod pm.podByFullName[podFullName] = pod
if mirror, ok := pm.mirrorPodByFullName[podFullName]; ok { if mirror, ok := pm.mirrorPodByFullName[podFullName]; ok {
@ -212,6 +235,7 @@ func (pm *basicManager) updatePodsInternal(pods ...*v1.Pod) {
} }
func (pm *basicManager) DeletePod(pod *v1.Pod) { func (pm *basicManager) DeletePod(pod *v1.Pod) {
updateMetrics(pod, nil)
pm.lock.Lock() pm.lock.Lock()
defer pm.lock.Unlock() defer pm.lock.Unlock()
if pm.secretManager != nil { if pm.secretManager != nil {