mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-15 06:43:54 +00:00
fix gc metrics
This commit is contained in:
parent
8ffbbe455b
commit
322da7c1aa
@ -37,6 +37,7 @@ import (
|
|||||||
"k8s.io/client-go/tools/cache"
|
"k8s.io/client-go/tools/cache"
|
||||||
"k8s.io/client-go/util/workqueue"
|
"k8s.io/client-go/util/workqueue"
|
||||||
"k8s.io/klog/v2"
|
"k8s.io/klog/v2"
|
||||||
|
"k8s.io/kubernetes/pkg/controller/podgc/metrics"
|
||||||
"k8s.io/kubernetes/pkg/features"
|
"k8s.io/kubernetes/pkg/features"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/eviction"
|
"k8s.io/kubernetes/pkg/kubelet/eviction"
|
||||||
nodeutil "k8s.io/kubernetes/pkg/util/node"
|
nodeutil "k8s.io/kubernetes/pkg/util/node"
|
||||||
@ -69,11 +70,6 @@ type PodGCController struct {
|
|||||||
quarantineTime time.Duration
|
quarantineTime time.Duration
|
||||||
}
|
}
|
||||||
|
|
||||||
func init() {
|
|
||||||
// Register prometheus metrics
|
|
||||||
RegisterMetrics()
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewPodGC(ctx context.Context, kubeClient clientset.Interface, podInformer coreinformers.PodInformer,
|
func NewPodGC(ctx context.Context, kubeClient clientset.Interface, podInformer coreinformers.PodInformer,
|
||||||
nodeInformer coreinformers.NodeInformer, terminatedPodThreshold int) *PodGCController {
|
nodeInformer coreinformers.NodeInformer, terminatedPodThreshold int) *PodGCController {
|
||||||
return NewPodGCInternal(ctx, kubeClient, podInformer, nodeInformer, terminatedPodThreshold, gcCheckPeriod, quarantineTime)
|
return NewPodGCInternal(ctx, kubeClient, podInformer, nodeInformer, terminatedPodThreshold, gcCheckPeriod, quarantineTime)
|
||||||
@ -94,6 +90,8 @@ func NewPodGCInternal(ctx context.Context, kubeClient clientset.Interface, podIn
|
|||||||
quarantineTime: quarantineTime,
|
quarantineTime: quarantineTime,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Register prometheus metrics
|
||||||
|
metrics.RegisterMetrics()
|
||||||
return gcc
|
return gcc
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -179,11 +177,11 @@ func (gcc *PodGCController) gcTerminating(ctx context.Context, pods []*v1.Pod) {
|
|||||||
wait.Add(1)
|
wait.Add(1)
|
||||||
go func(pod *v1.Pod) {
|
go func(pod *v1.Pod) {
|
||||||
defer wait.Done()
|
defer wait.Done()
|
||||||
deletingPodsTotal.WithLabelValues().Inc()
|
metrics.DeletingPodsTotal.WithLabelValues(pod.Namespace, metrics.PodGCReasonTerminatingOutOfService).Inc()
|
||||||
if err := gcc.markFailedAndDeletePod(ctx, pod); err != nil {
|
if err := gcc.markFailedAndDeletePod(ctx, pod); err != nil {
|
||||||
// ignore not founds
|
// ignore not founds
|
||||||
utilruntime.HandleError(err)
|
utilruntime.HandleError(err)
|
||||||
deletingPodsErrorTotal.WithLabelValues().Inc()
|
metrics.DeletingPodsErrorTotal.WithLabelValues(pod.Namespace, metrics.PodGCReasonTerminatingOutOfService).Inc()
|
||||||
}
|
}
|
||||||
}(terminatingPods[i])
|
}(terminatingPods[i])
|
||||||
}
|
}
|
||||||
@ -216,7 +214,9 @@ func (gcc *PodGCController) gcTerminated(ctx context.Context, pods []*v1.Pod) {
|
|||||||
if err := gcc.markFailedAndDeletePod(ctx, pod); err != nil {
|
if err := gcc.markFailedAndDeletePod(ctx, pod); err != nil {
|
||||||
// ignore not founds
|
// ignore not founds
|
||||||
defer utilruntime.HandleError(err)
|
defer utilruntime.HandleError(err)
|
||||||
|
metrics.DeletingPodsErrorTotal.WithLabelValues(pod.Namespace, metrics.PodGCReasonTerminated).Inc()
|
||||||
}
|
}
|
||||||
|
metrics.DeletingPodsTotal.WithLabelValues(pod.Namespace, metrics.PodGCReasonTerminated).Inc()
|
||||||
}(terminatedPods[i])
|
}(terminatedPods[i])
|
||||||
}
|
}
|
||||||
wait.Wait()
|
wait.Wait()
|
||||||
@ -254,9 +254,11 @@ func (gcc *PodGCController) gcOrphaned(ctx context.Context, pods []*v1.Pod, node
|
|||||||
WithLastTransitionTime(metav1.Now())
|
WithLastTransitionTime(metav1.Now())
|
||||||
if err := gcc.markFailedAndDeletePodWithCondition(ctx, pod, condition); err != nil {
|
if err := gcc.markFailedAndDeletePodWithCondition(ctx, pod, condition); err != nil {
|
||||||
utilruntime.HandleError(err)
|
utilruntime.HandleError(err)
|
||||||
|
metrics.DeletingPodsErrorTotal.WithLabelValues(pod.Namespace, metrics.PodGCReasonOrphaned).Inc()
|
||||||
} else {
|
} else {
|
||||||
klog.InfoS("Forced deletion of orphaned Pod succeeded", "pod", klog.KObj(pod))
|
klog.InfoS("Forced deletion of orphaned Pod succeeded", "pod", klog.KObj(pod))
|
||||||
}
|
}
|
||||||
|
metrics.DeletingPodsTotal.WithLabelValues(pod.Namespace, metrics.PodGCReasonOrphaned).Inc()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -303,9 +305,11 @@ func (gcc *PodGCController) gcUnscheduledTerminating(ctx context.Context, pods [
|
|||||||
klog.V(2).InfoS("Found unscheduled terminating Pod not assigned to any Node, deleting.", "pod", klog.KObj(pod))
|
klog.V(2).InfoS("Found unscheduled terminating Pod not assigned to any Node, deleting.", "pod", klog.KObj(pod))
|
||||||
if err := gcc.markFailedAndDeletePod(ctx, pod); err != nil {
|
if err := gcc.markFailedAndDeletePod(ctx, pod); err != nil {
|
||||||
utilruntime.HandleError(err)
|
utilruntime.HandleError(err)
|
||||||
|
metrics.DeletingPodsErrorTotal.WithLabelValues(pod.Namespace, metrics.PodGCReasonTerminatingUnscheduled).Inc()
|
||||||
} else {
|
} else {
|
||||||
klog.InfoS("Forced deletion of unscheduled terminating Pod succeeded", "pod", klog.KObj(pod))
|
klog.InfoS("Forced deletion of unscheduled terminating Pod succeeded", "pod", klog.KObj(pod))
|
||||||
}
|
}
|
||||||
|
metrics.DeletingPodsTotal.WithLabelValues(pod.Namespace, metrics.PodGCReasonTerminatingUnscheduled).Inc()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -37,6 +37,7 @@ import (
|
|||||||
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
||||||
metricstestutil "k8s.io/component-base/metrics/testutil"
|
metricstestutil "k8s.io/component-base/metrics/testutil"
|
||||||
"k8s.io/kubernetes/pkg/controller"
|
"k8s.io/kubernetes/pkg/controller"
|
||||||
|
"k8s.io/kubernetes/pkg/controller/podgc/metrics"
|
||||||
"k8s.io/kubernetes/pkg/controller/testutil"
|
"k8s.io/kubernetes/pkg/controller/testutil"
|
||||||
"k8s.io/kubernetes/pkg/features"
|
"k8s.io/kubernetes/pkg/features"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/eviction"
|
"k8s.io/kubernetes/pkg/kubelet/eviction"
|
||||||
@ -159,7 +160,7 @@ func TestGCTerminated(t *testing.T) {
|
|||||||
for _, pod := range test.pods {
|
for _, pod := range test.pods {
|
||||||
creationTime = creationTime.Add(1 * time.Hour)
|
creationTime = creationTime.Add(1 * time.Hour)
|
||||||
pods = append(pods, &v1.Pod{
|
pods = append(pods, &v1.Pod{
|
||||||
ObjectMeta: metav1.ObjectMeta{Name: pod.name, CreationTimestamp: metav1.Time{Time: creationTime}},
|
ObjectMeta: metav1.ObjectMeta{Name: pod.name, Namespace: metav1.NamespaceDefault, CreationTimestamp: metav1.Time{Time: creationTime}},
|
||||||
Status: v1.PodStatus{Phase: pod.phase, Reason: pod.reason},
|
Status: v1.PodStatus{Phase: pod.phase, Reason: pod.reason},
|
||||||
Spec: v1.PodSpec{NodeName: "node"},
|
Spec: v1.PodSpec{NodeName: "node"},
|
||||||
})
|
})
|
||||||
@ -175,12 +176,16 @@ func TestGCTerminated(t *testing.T) {
|
|||||||
verifyDeletedAndPatchedPods(t, client, test.deletedPodNames, test.patchedPodNames)
|
verifyDeletedAndPatchedPods(t, client, test.deletedPodNames, test.patchedPodNames)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// testDeletingPodsMetrics is 9 in this test
|
||||||
|
testDeletingPodsMetrics(t, 9, metrics.PodGCReasonTerminated)
|
||||||
}
|
}
|
||||||
|
|
||||||
func makePod(name string, nodeName string, phase v1.PodPhase) *v1.Pod {
|
func makePod(name string, nodeName string, phase v1.PodPhase) *v1.Pod {
|
||||||
return &v1.Pod{
|
return &v1.Pod{
|
||||||
ObjectMeta: metav1.ObjectMeta{
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
Name: name,
|
Name: name,
|
||||||
|
Namespace: metav1.NamespaceDefault,
|
||||||
},
|
},
|
||||||
Spec: v1.PodSpec{NodeName: nodeName},
|
Spec: v1.PodSpec{NodeName: nodeName},
|
||||||
Status: v1.PodStatus{Phase: phase},
|
Status: v1.PodStatus{Phase: phase},
|
||||||
@ -406,6 +411,9 @@ func TestGCOrphaned(t *testing.T) {
|
|||||||
verifyDeletedAndPatchedPods(t, client, test.deletedPodNames, test.patchedPodNames)
|
verifyDeletedAndPatchedPods(t, client, test.deletedPodNames, test.patchedPodNames)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// testDeletingPodsMetrics is 10 in this test
|
||||||
|
testDeletingPodsMetrics(t, 10, metrics.PodGCReasonOrphaned)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGCUnscheduledTerminating(t *testing.T) {
|
func TestGCUnscheduledTerminating(t *testing.T) {
|
||||||
@ -463,7 +471,7 @@ func TestGCUnscheduledTerminating(t *testing.T) {
|
|||||||
for _, pod := range test.pods {
|
for _, pod := range test.pods {
|
||||||
creationTime = creationTime.Add(1 * time.Hour)
|
creationTime = creationTime.Add(1 * time.Hour)
|
||||||
pods = append(pods, &v1.Pod{
|
pods = append(pods, &v1.Pod{
|
||||||
ObjectMeta: metav1.ObjectMeta{Name: pod.name, CreationTimestamp: metav1.Time{Time: creationTime},
|
ObjectMeta: metav1.ObjectMeta{Name: pod.name, Namespace: metav1.NamespaceDefault, CreationTimestamp: metav1.Time{Time: creationTime},
|
||||||
DeletionTimestamp: pod.deletionTimeStamp},
|
DeletionTimestamp: pod.deletionTimeStamp},
|
||||||
Status: v1.PodStatus{Phase: pod.phase},
|
Status: v1.PodStatus{Phase: pod.phase},
|
||||||
Spec: v1.PodSpec{NodeName: pod.nodeName},
|
Spec: v1.PodSpec{NodeName: pod.nodeName},
|
||||||
@ -486,6 +494,9 @@ func TestGCUnscheduledTerminating(t *testing.T) {
|
|||||||
verifyDeletedAndPatchedPods(t, client, test.deletedPodNames, test.patchedPodNames)
|
verifyDeletedAndPatchedPods(t, client, test.deletedPodNames, test.patchedPodNames)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// testDeletingPodsMetrics is 6 in this test
|
||||||
|
testDeletingPodsMetrics(t, 6, metrics.PodGCReasonTerminatingUnscheduled)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGCTerminating(t *testing.T) {
|
func TestGCTerminating(t *testing.T) {
|
||||||
@ -633,7 +644,7 @@ func TestGCTerminating(t *testing.T) {
|
|||||||
for _, pod := range test.pods {
|
for _, pod := range test.pods {
|
||||||
creationTime = creationTime.Add(1 * time.Hour)
|
creationTime = creationTime.Add(1 * time.Hour)
|
||||||
pods = append(pods, &v1.Pod{
|
pods = append(pods, &v1.Pod{
|
||||||
ObjectMeta: metav1.ObjectMeta{Name: pod.name, CreationTimestamp: metav1.Time{Time: creationTime},
|
ObjectMeta: metav1.ObjectMeta{Name: pod.name, Namespace: metav1.NamespaceDefault, CreationTimestamp: metav1.Time{Time: creationTime},
|
||||||
DeletionTimestamp: pod.deletionTimeStamp},
|
DeletionTimestamp: pod.deletionTimeStamp},
|
||||||
Status: v1.PodStatus{Phase: pod.phase},
|
Status: v1.PodStatus{Phase: pod.phase},
|
||||||
Spec: v1.PodSpec{NodeName: pod.nodeName},
|
Spec: v1.PodSpec{NodeName: pod.nodeName},
|
||||||
@ -653,8 +664,8 @@ func TestGCTerminating(t *testing.T) {
|
|||||||
verifyDeletedAndPatchedPods(t, client, test.deletedPodNames, test.patchedPodNames)
|
verifyDeletedAndPatchedPods(t, client, test.deletedPodNames, test.patchedPodNames)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
// deletingPodsTotal is 7 in this test
|
// testDeletingPodsMetrics is 7 in this test
|
||||||
testDeletingPodsMetrics(t, 7)
|
testDeletingPodsMetrics(t, 7, metrics.PodGCReasonTerminatingOutOfService)
|
||||||
}
|
}
|
||||||
|
|
||||||
func verifyDeletedAndPatchedPods(t *testing.T, client *fake.Clientset, wantDeletedPodNames, wantPatchedPodNames sets.String) {
|
func verifyDeletedAndPatchedPods(t *testing.T, client *fake.Clientset, wantDeletedPodNames, wantPatchedPodNames sets.String) {
|
||||||
@ -669,18 +680,18 @@ func verifyDeletedAndPatchedPods(t *testing.T, client *fake.Clientset, wantDelet
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func testDeletingPodsMetrics(t *testing.T, inputDeletingPodsTotal int) {
|
func testDeletingPodsMetrics(t *testing.T, total int, reason string) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
|
|
||||||
actualDeletingPodsTotal, err := metricstestutil.GetCounterMetricValue(deletingPodsTotal.WithLabelValues())
|
actualDeletingPodsTotal, err := metricstestutil.GetCounterMetricValue(metrics.DeletingPodsTotal.WithLabelValues(metav1.NamespaceDefault, reason))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("Error getting actualDeletingPodsTotal")
|
t.Errorf("Error getting actualDeletingPodsTotal")
|
||||||
}
|
}
|
||||||
if actualDeletingPodsTotal != float64(inputDeletingPodsTotal) {
|
if actualDeletingPodsTotal != float64(total) {
|
||||||
t.Errorf("Expected desiredDeletingPodsTotal to be %d, got %v", inputDeletingPodsTotal, actualDeletingPodsTotal)
|
t.Errorf("Expected desiredDeletingPodsTotal to be %d, got %v", total, actualDeletingPodsTotal)
|
||||||
}
|
}
|
||||||
|
|
||||||
actualDeletingPodsErrorTotal, err := metricstestutil.GetCounterMetricValue(deletingPodsErrorTotal.WithLabelValues())
|
actualDeletingPodsErrorTotal, err := metricstestutil.GetCounterMetricValue(metrics.DeletingPodsErrorTotal.WithLabelValues("", reason))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("Error getting actualDeletingPodsErrorTotal")
|
t.Errorf("Error getting actualDeletingPodsErrorTotal")
|
||||||
}
|
}
|
||||||
|
@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package podgc
|
package metrics
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"sync"
|
"sync"
|
||||||
@ -28,32 +28,47 @@ const (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
deletingPodsTotal = metrics.NewCounterVec(
|
DeletingPodsTotal = metrics.NewCounterVec(
|
||||||
&metrics.CounterOpts{
|
&metrics.CounterOpts{
|
||||||
Subsystem: podGCController,
|
Subsystem: podGCController,
|
||||||
Name: "force_delete_pods_total",
|
Name: "force_delete_pods_total",
|
||||||
Help: "Number of pods that are being forcefully deleted since the Pod GC Controller started.",
|
Help: "Number of pods that are being forcefully deleted since the Pod GC Controller started.",
|
||||||
StabilityLevel: metrics.ALPHA,
|
StabilityLevel: metrics.ALPHA,
|
||||||
},
|
},
|
||||||
[]string{},
|
[]string{"namespace", "reason"},
|
||||||
)
|
)
|
||||||
deletingPodsErrorTotal = metrics.NewCounterVec(
|
DeletingPodsErrorTotal = metrics.NewCounterVec(
|
||||||
&metrics.CounterOpts{
|
&metrics.CounterOpts{
|
||||||
Subsystem: podGCController,
|
Subsystem: podGCController,
|
||||||
Name: "force_delete_pod_errors_total",
|
Name: "force_delete_pod_errors_total",
|
||||||
Help: "Number of errors encountered when forcefully deleting the pods since the Pod GC Controller started.",
|
Help: "Number of errors encountered when forcefully deleting the pods since the Pod GC Controller started.",
|
||||||
StabilityLevel: metrics.ALPHA,
|
StabilityLevel: metrics.ALPHA,
|
||||||
},
|
},
|
||||||
[]string{},
|
[]string{"namespace", "reason"},
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// Possible values for the "reason" label in the above metrics.
|
||||||
|
|
||||||
|
// PodGCReasonTerminated is used when the pod is terminated.
|
||||||
|
PodGCReasonTerminated = "terminated"
|
||||||
|
// PodGCReasonCompleted is used when the pod is terminating and the corresponding node
|
||||||
|
// is not ready and has `node.kubernetes.io/out-of-service` taint.
|
||||||
|
PodGCReasonTerminatingOutOfService = "out-of-service"
|
||||||
|
// PodGCReasonOrphaned is used when the pod is orphaned which means the corresponding node
|
||||||
|
// has been deleted.
|
||||||
|
PodGCReasonOrphaned = "orphaned"
|
||||||
|
// PodGCReasonUnscheduled is used when the pod is terminating and unscheduled.
|
||||||
|
PodGCReasonTerminatingUnscheduled = "unscheduled"
|
||||||
|
)
|
||||||
|
|
||||||
var registerMetrics sync.Once
|
var registerMetrics sync.Once
|
||||||
|
|
||||||
// Register the metrics that are to be monitored.
|
// Register the metrics that are to be monitored.
|
||||||
func RegisterMetrics() {
|
func RegisterMetrics() {
|
||||||
registerMetrics.Do(func() {
|
registerMetrics.Do(func() {
|
||||||
legacyregistry.MustRegister(deletingPodsTotal)
|
legacyregistry.MustRegister(DeletingPodsTotal)
|
||||||
legacyregistry.MustRegister(deletingPodsErrorTotal)
|
legacyregistry.MustRegister(DeletingPodsErrorTotal)
|
||||||
})
|
})
|
||||||
}
|
}
|
Loading…
Reference in New Issue
Block a user