Merge pull request #99000 from verb/1.21-kubelet-metrics

Add kubelet metrics for ephemeral containers
This commit is contained in:
Kubernetes Prow Robot 2021-07-08 14:00:55 -07:00 committed by GitHub
commit 7c84064a4f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 106 additions and 6 deletions

View File

@ -48,6 +48,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/images"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
"k8s.io/kubernetes/pkg/kubelet/logs"
"k8s.io/kubernetes/pkg/kubelet/metrics"
proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results"
"k8s.io/kubernetes/pkg/kubelet/runtimeclass"
"k8s.io/kubernetes/pkg/kubelet/types"
@ -784,6 +785,7 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine
var err error
klog.V(4).InfoS("Creating PodSandbox for pod", "pod", klog.KObj(pod))
metrics.StartedPodsTotal.Inc()
createSandboxResult := kubecontainer.NewSyncResult(kubecontainer.CreatePodSandbox, format.Pod(pod))
result.AddSyncResult(createSandboxResult)
podSandboxID, msg, err = m.createPodSandbox(pod, podContainerChanges.Attempt)
@ -796,6 +798,7 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine
klog.V(4).InfoS("Pod was deleted and sandbox failed to be created", "pod", klog.KObj(pod), "podUID", pod.UID)
return
}
metrics.StartedPodsErrorsTotal.WithLabelValues(err.Error()).Inc()
createSandboxResult.Fail(kubecontainer.ErrCreatePodSandbox, msg)
klog.ErrorS(err, "CreatePodSandbox for pod failed", "pod", klog.KObj(pod))
ref, referr := ref.GetReference(legacyscheme.Scheme, pod)
@ -848,9 +851,11 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine
}
// Helper containing boilerplate common to starting all types of containers.
// typeName is a label used to describe this type of container in log messages,
// typeName is a description used to describe this type of container in log messages,
// currently: "container", "init container" or "ephemeral container"
start := func(typeName string, spec *startSpec) error {
// metricLabel is the label used to describe this type of container in monitoring metrics.
// currently: "container", "init_container" or "ephemeral_container"
start := func(typeName, metricLabel string, spec *startSpec) error {
startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, spec.container.Name)
result.AddSyncResult(startContainerResult)
@ -861,9 +866,13 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine
return err
}
metrics.StartedContainersTotal.WithLabelValues(metricLabel).Inc()
klog.V(4).InfoS("Creating container in pod", "containerType", typeName, "container", spec.container, "pod", klog.KObj(pod))
// NOTE (aramase) podIPs are populated for single stack and dual stack clusters. Send only podIPs.
if msg, err := m.startContainer(podSandboxID, podSandboxConfig, spec, pod, podStatus, pullSecrets, podIP, podIPs); err != nil {
// startContainer() returns well-defined error codes that have reasonable cardinality for metrics and are
// useful to cluster administrators to distinguish "server errors" from "user errors".
metrics.StartedContainersErrorsTotal.WithLabelValues(metricLabel, err.Error()).Inc()
startContainerResult.Fail(err, msg)
// known errors that are logged in other places are logged at higher levels here to avoid
// repetitive log spam
@ -885,14 +894,14 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine
// containers cannot be specified on pod creation.
if utilfeature.DefaultFeatureGate.Enabled(features.EphemeralContainers) {
for _, idx := range podContainerChanges.EphemeralContainersToStart {
start("ephemeral container", ephemeralContainerStartSpec(&pod.Spec.EphemeralContainers[idx]))
start("ephemeral container", metrics.EphemeralContainer, ephemeralContainerStartSpec(&pod.Spec.EphemeralContainers[idx]))
}
}
// Step 6: start the init container.
if container := podContainerChanges.NextInitContainerToStart; container != nil {
// Start the next init container.
if err := start("init container", containerStartSpec(container)); err != nil {
if err := start("init container", metrics.InitContainer, containerStartSpec(container)); err != nil {
return
}
@ -902,7 +911,7 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine
// Step 7: start containers in podContainerChanges.ContainersToStart.
for _, idx := range podContainerChanges.ContainersToStart {
start("container", containerStartSpec(&pod.Spec.Containers[idx]))
start("container", metrics.Container, containerStartSpec(&pod.Spec.Containers[idx]))
}
return

View File

@ -83,6 +83,20 @@ const (
// Metrics keys for RuntimeClass
RunPodSandboxDurationKey = "run_podsandbox_duration_seconds"
RunPodSandboxErrorsKey = "run_podsandbox_errors_total"
// Metrics to keep track of total number of Pods and Containers started
StartedPodsTotalKey = "started_pods_total"
StartedPodsErrorsTotalKey = "started_pods_errors_total"
StartedContainersTotalKey = "started_containers_total"
StartedContainersErrorsTotalKey = "started_containers_errors_total"
// Metrics to track ephemeral container usage by this kubelet
ManagedEphemeralContainersKey = "managed_ephemeral_containers"
// Values used in metric labels
Container = "container"
InitContainer = "init_container"
EphemeralContainer = "ephemeral_container"
)
var (
@ -436,6 +450,54 @@ var (
},
[]string{"container_state"},
)
// StartedPodsTotal is a counter that tracks pod sandbox creation operations
StartedPodsTotal = metrics.NewCounter(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: StartedPodsTotalKey,
Help: "Cumulative number of pods started",
StabilityLevel: metrics.ALPHA,
},
)
// StartedPodsErrorsTotal is a counter that tracks the number of errors creating pod sandboxes
StartedPodsErrorsTotal = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: StartedPodsErrorsTotalKey,
Help: "Cumulative number of errors when starting pods",
StabilityLevel: metrics.ALPHA,
},
[]string{"message"},
)
// StartedContainersTotal is a counter that tracks the number of container creation operations
StartedContainersTotal = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: StartedContainersTotalKey,
Help: "Cumulative number of containers started",
StabilityLevel: metrics.ALPHA,
},
[]string{"container_type"},
)
// StartedContainersTotal is a counter that tracks the number of errors creating containers
StartedContainersErrorsTotal = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: StartedContainersErrorsTotalKey,
Help: "Cumulative number of errors when starting containers",
StabilityLevel: metrics.ALPHA,
},
[]string{"container_type", "code"},
)
// ManagedEphemeralContainers is a gauge that indicates how many ephemeral containers are managed by this kubelet.
ManagedEphemeralContainers = metrics.NewGauge(
&metrics.GaugeOpts{
Subsystem: KubeletSubsystem,
Name: ManagedEphemeralContainersKey,
Help: "Current number of ephemeral containers in pods managed by this kubelet. Ephemeral containers will be ignored if disabled by the EphemeralContainers feature gate, and this number will be 0.",
StabilityLevel: metrics.ALPHA,
},
)
)
var registerMetrics sync.Once
@ -464,6 +526,11 @@ func Register(collectors ...metrics.StableCollector) {
legacyregistry.MustRegister(DevicePluginAllocationDuration)
legacyregistry.MustRegister(RunningContainerCount)
legacyregistry.MustRegister(RunningPodCount)
legacyregistry.MustRegister(ManagedEphemeralContainers)
legacyregistry.MustRegister(StartedPodsTotal)
legacyregistry.MustRegister(StartedPodsErrorsTotal)
legacyregistry.MustRegister(StartedContainersTotal)
legacyregistry.MustRegister(StartedContainersErrorsTotal)
legacyregistry.MustRegister(RunPodSandboxDuration)
legacyregistry.MustRegister(RunPodSandboxErrors)
if utilfeature.DefaultFeatureGate.Enabled(features.DynamicKubeletConfig) {

View File

@ -19,10 +19,13 @@ package pod
import (
"sync"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/configmap"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/metrics"
"k8s.io/kubernetes/pkg/kubelet/secret"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
)
@ -159,6 +162,25 @@ func isPodInTerminatedState(pod *v1.Pod) bool {
return pod.Status.Phase == v1.PodFailed || pod.Status.Phase == v1.PodSucceeded
}
// updateMetrics updates the metrics surfaced by the pod manager.
// oldPod or newPod may be nil to signify creation or deletion.
func updateMetrics(oldPod, newPod *v1.Pod) {
if !utilfeature.DefaultFeatureGate.Enabled(features.EphemeralContainers) {
return
}
var numEC int
if oldPod != nil {
numEC -= len(oldPod.Spec.EphemeralContainers)
}
if newPod != nil {
numEC += len(newPod.Spec.EphemeralContainers)
}
if numEC != 0 {
metrics.ManagedEphemeralContainers.Add(float64(numEC))
}
}
// updatePodsInternal replaces the given pods in the current state of the
// manager, updating the various indices. The caller is assumed to hold the
// lock.
@ -202,6 +224,7 @@ func (pm *basicManager) updatePodsInternal(pods ...*v1.Pod) {
}
} else {
resolvedPodUID := kubetypes.ResolvedPodUID(pod.UID)
updateMetrics(pm.podByUID[resolvedPodUID], pod)
pm.podByUID[resolvedPodUID] = pod
pm.podByFullName[podFullName] = pod
if mirror, ok := pm.mirrorPodByFullName[podFullName]; ok {
@ -212,6 +235,7 @@ func (pm *basicManager) updatePodsInternal(pods ...*v1.Pod) {
}
func (pm *basicManager) DeletePod(pod *v1.Pod) {
updateMetrics(pod, nil)
pm.lock.Lock()
defer pm.lock.Unlock()
if pm.secretManager != nil {