mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-14 22:33:34 +00:00
fix gc metrics
This commit is contained in:
parent
8ffbbe455b
commit
322da7c1aa
@ -37,6 +37,7 @@ import (
|
||||
"k8s.io/client-go/tools/cache"
|
||||
"k8s.io/client-go/util/workqueue"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/controller/podgc/metrics"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/pkg/kubelet/eviction"
|
||||
nodeutil "k8s.io/kubernetes/pkg/util/node"
|
||||
@ -69,11 +70,6 @@ type PodGCController struct {
|
||||
quarantineTime time.Duration
|
||||
}
|
||||
|
||||
func init() {
|
||||
// Register prometheus metrics
|
||||
RegisterMetrics()
|
||||
}
|
||||
|
||||
func NewPodGC(ctx context.Context, kubeClient clientset.Interface, podInformer coreinformers.PodInformer,
|
||||
nodeInformer coreinformers.NodeInformer, terminatedPodThreshold int) *PodGCController {
|
||||
return NewPodGCInternal(ctx, kubeClient, podInformer, nodeInformer, terminatedPodThreshold, gcCheckPeriod, quarantineTime)
|
||||
@ -94,6 +90,8 @@ func NewPodGCInternal(ctx context.Context, kubeClient clientset.Interface, podIn
|
||||
quarantineTime: quarantineTime,
|
||||
}
|
||||
|
||||
// Register prometheus metrics
|
||||
metrics.RegisterMetrics()
|
||||
return gcc
|
||||
}
|
||||
|
||||
@ -179,11 +177,11 @@ func (gcc *PodGCController) gcTerminating(ctx context.Context, pods []*v1.Pod) {
|
||||
wait.Add(1)
|
||||
go func(pod *v1.Pod) {
|
||||
defer wait.Done()
|
||||
deletingPodsTotal.WithLabelValues().Inc()
|
||||
metrics.DeletingPodsTotal.WithLabelValues(pod.Namespace, metrics.PodGCReasonTerminatingOutOfService).Inc()
|
||||
if err := gcc.markFailedAndDeletePod(ctx, pod); err != nil {
|
||||
// ignore not founds
|
||||
utilruntime.HandleError(err)
|
||||
deletingPodsErrorTotal.WithLabelValues().Inc()
|
||||
metrics.DeletingPodsErrorTotal.WithLabelValues(pod.Namespace, metrics.PodGCReasonTerminatingOutOfService).Inc()
|
||||
}
|
||||
}(terminatingPods[i])
|
||||
}
|
||||
@ -216,7 +214,9 @@ func (gcc *PodGCController) gcTerminated(ctx context.Context, pods []*v1.Pod) {
|
||||
if err := gcc.markFailedAndDeletePod(ctx, pod); err != nil {
|
||||
// ignore not founds
|
||||
defer utilruntime.HandleError(err)
|
||||
metrics.DeletingPodsErrorTotal.WithLabelValues(pod.Namespace, metrics.PodGCReasonTerminated).Inc()
|
||||
}
|
||||
metrics.DeletingPodsTotal.WithLabelValues(pod.Namespace, metrics.PodGCReasonTerminated).Inc()
|
||||
}(terminatedPods[i])
|
||||
}
|
||||
wait.Wait()
|
||||
@ -254,9 +254,11 @@ func (gcc *PodGCController) gcOrphaned(ctx context.Context, pods []*v1.Pod, node
|
||||
WithLastTransitionTime(metav1.Now())
|
||||
if err := gcc.markFailedAndDeletePodWithCondition(ctx, pod, condition); err != nil {
|
||||
utilruntime.HandleError(err)
|
||||
metrics.DeletingPodsErrorTotal.WithLabelValues(pod.Namespace, metrics.PodGCReasonOrphaned).Inc()
|
||||
} else {
|
||||
klog.InfoS("Forced deletion of orphaned Pod succeeded", "pod", klog.KObj(pod))
|
||||
}
|
||||
metrics.DeletingPodsTotal.WithLabelValues(pod.Namespace, metrics.PodGCReasonOrphaned).Inc()
|
||||
}
|
||||
}
|
||||
|
||||
@ -303,9 +305,11 @@ func (gcc *PodGCController) gcUnscheduledTerminating(ctx context.Context, pods [
|
||||
klog.V(2).InfoS("Found unscheduled terminating Pod not assigned to any Node, deleting.", "pod", klog.KObj(pod))
|
||||
if err := gcc.markFailedAndDeletePod(ctx, pod); err != nil {
|
||||
utilruntime.HandleError(err)
|
||||
metrics.DeletingPodsErrorTotal.WithLabelValues(pod.Namespace, metrics.PodGCReasonTerminatingUnscheduled).Inc()
|
||||
} else {
|
||||
klog.InfoS("Forced deletion of unscheduled terminating Pod succeeded", "pod", klog.KObj(pod))
|
||||
}
|
||||
metrics.DeletingPodsTotal.WithLabelValues(pod.Namespace, metrics.PodGCReasonTerminatingUnscheduled).Inc()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -37,6 +37,7 @@ import (
|
||||
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
||||
metricstestutil "k8s.io/component-base/metrics/testutil"
|
||||
"k8s.io/kubernetes/pkg/controller"
|
||||
"k8s.io/kubernetes/pkg/controller/podgc/metrics"
|
||||
"k8s.io/kubernetes/pkg/controller/testutil"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/pkg/kubelet/eviction"
|
||||
@ -159,7 +160,7 @@ func TestGCTerminated(t *testing.T) {
|
||||
for _, pod := range test.pods {
|
||||
creationTime = creationTime.Add(1 * time.Hour)
|
||||
pods = append(pods, &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: pod.name, CreationTimestamp: metav1.Time{Time: creationTime}},
|
||||
ObjectMeta: metav1.ObjectMeta{Name: pod.name, Namespace: metav1.NamespaceDefault, CreationTimestamp: metav1.Time{Time: creationTime}},
|
||||
Status: v1.PodStatus{Phase: pod.phase, Reason: pod.reason},
|
||||
Spec: v1.PodSpec{NodeName: "node"},
|
||||
})
|
||||
@ -175,12 +176,16 @@ func TestGCTerminated(t *testing.T) {
|
||||
verifyDeletedAndPatchedPods(t, client, test.deletedPodNames, test.patchedPodNames)
|
||||
})
|
||||
}
|
||||
|
||||
// testDeletingPodsMetrics is 9 in this test
|
||||
testDeletingPodsMetrics(t, 9, metrics.PodGCReasonTerminated)
|
||||
}
|
||||
|
||||
func makePod(name string, nodeName string, phase v1.PodPhase) *v1.Pod {
|
||||
return &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: name,
|
||||
Namespace: metav1.NamespaceDefault,
|
||||
},
|
||||
Spec: v1.PodSpec{NodeName: nodeName},
|
||||
Status: v1.PodStatus{Phase: phase},
|
||||
@ -406,6 +411,9 @@ func TestGCOrphaned(t *testing.T) {
|
||||
verifyDeletedAndPatchedPods(t, client, test.deletedPodNames, test.patchedPodNames)
|
||||
})
|
||||
}
|
||||
|
||||
// testDeletingPodsMetrics is 10 in this test
|
||||
testDeletingPodsMetrics(t, 10, metrics.PodGCReasonOrphaned)
|
||||
}
|
||||
|
||||
func TestGCUnscheduledTerminating(t *testing.T) {
|
||||
@ -463,7 +471,7 @@ func TestGCUnscheduledTerminating(t *testing.T) {
|
||||
for _, pod := range test.pods {
|
||||
creationTime = creationTime.Add(1 * time.Hour)
|
||||
pods = append(pods, &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: pod.name, CreationTimestamp: metav1.Time{Time: creationTime},
|
||||
ObjectMeta: metav1.ObjectMeta{Name: pod.name, Namespace: metav1.NamespaceDefault, CreationTimestamp: metav1.Time{Time: creationTime},
|
||||
DeletionTimestamp: pod.deletionTimeStamp},
|
||||
Status: v1.PodStatus{Phase: pod.phase},
|
||||
Spec: v1.PodSpec{NodeName: pod.nodeName},
|
||||
@ -486,6 +494,9 @@ func TestGCUnscheduledTerminating(t *testing.T) {
|
||||
verifyDeletedAndPatchedPods(t, client, test.deletedPodNames, test.patchedPodNames)
|
||||
})
|
||||
}
|
||||
|
||||
// testDeletingPodsMetrics is 6 in this test
|
||||
testDeletingPodsMetrics(t, 6, metrics.PodGCReasonTerminatingUnscheduled)
|
||||
}
|
||||
|
||||
func TestGCTerminating(t *testing.T) {
|
||||
@ -633,7 +644,7 @@ func TestGCTerminating(t *testing.T) {
|
||||
for _, pod := range test.pods {
|
||||
creationTime = creationTime.Add(1 * time.Hour)
|
||||
pods = append(pods, &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: pod.name, CreationTimestamp: metav1.Time{Time: creationTime},
|
||||
ObjectMeta: metav1.ObjectMeta{Name: pod.name, Namespace: metav1.NamespaceDefault, CreationTimestamp: metav1.Time{Time: creationTime},
|
||||
DeletionTimestamp: pod.deletionTimeStamp},
|
||||
Status: v1.PodStatus{Phase: pod.phase},
|
||||
Spec: v1.PodSpec{NodeName: pod.nodeName},
|
||||
@ -653,8 +664,8 @@ func TestGCTerminating(t *testing.T) {
|
||||
verifyDeletedAndPatchedPods(t, client, test.deletedPodNames, test.patchedPodNames)
|
||||
})
|
||||
}
|
||||
// deletingPodsTotal is 7 in this test
|
||||
testDeletingPodsMetrics(t, 7)
|
||||
// testDeletingPodsMetrics is 7 in this test
|
||||
testDeletingPodsMetrics(t, 7, metrics.PodGCReasonTerminatingOutOfService)
|
||||
}
|
||||
|
||||
func verifyDeletedAndPatchedPods(t *testing.T, client *fake.Clientset, wantDeletedPodNames, wantPatchedPodNames sets.String) {
|
||||
@ -669,18 +680,18 @@ func verifyDeletedAndPatchedPods(t *testing.T, client *fake.Clientset, wantDelet
|
||||
}
|
||||
}
|
||||
|
||||
func testDeletingPodsMetrics(t *testing.T, inputDeletingPodsTotal int) {
|
||||
func testDeletingPodsMetrics(t *testing.T, total int, reason string) {
|
||||
t.Helper()
|
||||
|
||||
actualDeletingPodsTotal, err := metricstestutil.GetCounterMetricValue(deletingPodsTotal.WithLabelValues())
|
||||
actualDeletingPodsTotal, err := metricstestutil.GetCounterMetricValue(metrics.DeletingPodsTotal.WithLabelValues(metav1.NamespaceDefault, reason))
|
||||
if err != nil {
|
||||
t.Errorf("Error getting actualDeletingPodsTotal")
|
||||
}
|
||||
if actualDeletingPodsTotal != float64(inputDeletingPodsTotal) {
|
||||
t.Errorf("Expected desiredDeletingPodsTotal to be %d, got %v", inputDeletingPodsTotal, actualDeletingPodsTotal)
|
||||
if actualDeletingPodsTotal != float64(total) {
|
||||
t.Errorf("Expected desiredDeletingPodsTotal to be %d, got %v", total, actualDeletingPodsTotal)
|
||||
}
|
||||
|
||||
actualDeletingPodsErrorTotal, err := metricstestutil.GetCounterMetricValue(deletingPodsErrorTotal.WithLabelValues())
|
||||
actualDeletingPodsErrorTotal, err := metricstestutil.GetCounterMetricValue(metrics.DeletingPodsErrorTotal.WithLabelValues("", reason))
|
||||
if err != nil {
|
||||
t.Errorf("Error getting actualDeletingPodsErrorTotal")
|
||||
}
|
||||
|
@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package podgc
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"sync"
|
||||
@ -28,32 +28,47 @@ const (
|
||||
)
|
||||
|
||||
var (
|
||||
deletingPodsTotal = metrics.NewCounterVec(
|
||||
DeletingPodsTotal = metrics.NewCounterVec(
|
||||
&metrics.CounterOpts{
|
||||
Subsystem: podGCController,
|
||||
Name: "force_delete_pods_total",
|
||||
Help: "Number of pods that are being forcefully deleted since the Pod GC Controller started.",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{},
|
||||
[]string{"namespace", "reason"},
|
||||
)
|
||||
deletingPodsErrorTotal = metrics.NewCounterVec(
|
||||
DeletingPodsErrorTotal = metrics.NewCounterVec(
|
||||
&metrics.CounterOpts{
|
||||
Subsystem: podGCController,
|
||||
Name: "force_delete_pod_errors_total",
|
||||
Help: "Number of errors encountered when forcefully deleting the pods since the Pod GC Controller started.",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{},
|
||||
[]string{"namespace", "reason"},
|
||||
)
|
||||
)
|
||||
|
||||
const (
|
||||
// Possible values for the "reason" label in the above metrics.
|
||||
|
||||
// PodGCReasonTerminated is used when the pod is terminated.
|
||||
PodGCReasonTerminated = "terminated"
|
||||
// PodGCReasonCompleted is used when the pod is terminating and the corresponding node
|
||||
// is not ready and has `node.kubernetes.io/out-of-service` taint.
|
||||
PodGCReasonTerminatingOutOfService = "out-of-service"
|
||||
// PodGCReasonOrphaned is used when the pod is orphaned which means the corresponding node
|
||||
// has been deleted.
|
||||
PodGCReasonOrphaned = "orphaned"
|
||||
// PodGCReasonUnscheduled is used when the pod is terminating and unscheduled.
|
||||
PodGCReasonTerminatingUnscheduled = "unscheduled"
|
||||
)
|
||||
|
||||
var registerMetrics sync.Once
|
||||
|
||||
// Register the metrics that are to be monitored.
|
||||
func RegisterMetrics() {
|
||||
registerMetrics.Do(func() {
|
||||
legacyregistry.MustRegister(deletingPodsTotal)
|
||||
legacyregistry.MustRegister(deletingPodsErrorTotal)
|
||||
legacyregistry.MustRegister(DeletingPodsTotal)
|
||||
legacyregistry.MustRegister(DeletingPodsErrorTotal)
|
||||
})
|
||||
}
|
Loading…
Reference in New Issue
Block a user