mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-21 10:51:29 +00:00
Merge pull request #111070 from mimowo/retriable-pod-failures-refactor-gc-controller
Refactor gc_controller to do not use the deletePod stub
This commit is contained in:
commit
27110bd821
@ -60,7 +60,6 @@ type PodGCController struct {
|
||||
|
||||
nodeQueue workqueue.DelayingInterface
|
||||
|
||||
deletePod func(namespace, name string) error
|
||||
terminatedPodThreshold int
|
||||
}
|
||||
|
||||
@ -77,10 +76,6 @@ func NewPodGC(ctx context.Context, kubeClient clientset.Interface, podInformer c
|
||||
nodeLister: nodeInformer.Lister(),
|
||||
nodeListerSynced: nodeInformer.Informer().HasSynced,
|
||||
nodeQueue: workqueue.NewNamedDelayingQueue("orphaned_pods_nodes"),
|
||||
deletePod: func(namespace, name string) error {
|
||||
klog.InfoS("PodGC is force deleting Pod", "pod", klog.KRef(namespace, name))
|
||||
return kubeClient.CoreV1().Pods(namespace).Delete(ctx, name, *metav1.NewDeleteOptions(0))
|
||||
},
|
||||
}
|
||||
|
||||
return gcc
|
||||
@ -114,13 +109,13 @@ func (gcc *PodGCController) gc(ctx context.Context) {
|
||||
return
|
||||
}
|
||||
if gcc.terminatedPodThreshold > 0 {
|
||||
gcc.gcTerminated(pods)
|
||||
gcc.gcTerminated(ctx, pods)
|
||||
}
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.NodeOutOfServiceVolumeDetach) {
|
||||
gcc.gcTerminating(pods)
|
||||
gcc.gcTerminating(ctx, pods)
|
||||
}
|
||||
gcc.gcOrphaned(ctx, pods, nodes)
|
||||
gcc.gcUnscheduledTerminating(pods)
|
||||
gcc.gcUnscheduledTerminating(ctx, pods)
|
||||
}
|
||||
|
||||
func isPodTerminated(pod *v1.Pod) bool {
|
||||
@ -135,7 +130,7 @@ func isPodTerminating(pod *v1.Pod) bool {
|
||||
return pod.ObjectMeta.DeletionTimestamp != nil
|
||||
}
|
||||
|
||||
func (gcc *PodGCController) gcTerminating(pods []*v1.Pod) {
|
||||
func (gcc *PodGCController) gcTerminating(ctx context.Context, pods []*v1.Pod) {
|
||||
klog.V(4).Info("GC'ing terminating pods that are on out-of-service nodes")
|
||||
terminatingPods := []*v1.Pod{}
|
||||
for _, pod := range pods {
|
||||
@ -168,7 +163,7 @@ func (gcc *PodGCController) gcTerminating(pods []*v1.Pod) {
|
||||
wait.Add(1)
|
||||
go func(namespace string, name string) {
|
||||
defer wait.Done()
|
||||
if err := gcc.deletePod(namespace, name); err != nil {
|
||||
if err := gcc.deletePod(ctx, namespace, name); err != nil {
|
||||
// ignore not founds
|
||||
utilruntime.HandleError(err)
|
||||
}
|
||||
@ -177,7 +172,7 @@ func (gcc *PodGCController) gcTerminating(pods []*v1.Pod) {
|
||||
wait.Wait()
|
||||
}
|
||||
|
||||
func (gcc *PodGCController) gcTerminated(pods []*v1.Pod) {
|
||||
func (gcc *PodGCController) gcTerminated(ctx context.Context, pods []*v1.Pod) {
|
||||
terminatedPods := []*v1.Pod{}
|
||||
for _, pod := range pods {
|
||||
if isPodTerminated(pod) {
|
||||
@ -200,7 +195,7 @@ func (gcc *PodGCController) gcTerminated(pods []*v1.Pod) {
|
||||
wait.Add(1)
|
||||
go func(namespace string, name string) {
|
||||
defer wait.Done()
|
||||
if err := gcc.deletePod(namespace, name); err != nil {
|
||||
if err := gcc.deletePod(ctx, namespace, name); err != nil {
|
||||
// ignore not founds
|
||||
defer utilruntime.HandleError(err)
|
||||
}
|
||||
@ -233,7 +228,7 @@ func (gcc *PodGCController) gcOrphaned(ctx context.Context, pods []*v1.Pod, node
|
||||
continue
|
||||
}
|
||||
klog.V(2).InfoS("Found orphaned Pod assigned to the Node, deleting.", "pod", klog.KObj(pod), "node", pod.Spec.NodeName)
|
||||
if err := gcc.deletePod(pod.Namespace, pod.Name); err != nil {
|
||||
if err := gcc.deletePod(ctx, pod.Namespace, pod.Name); err != nil {
|
||||
utilruntime.HandleError(err)
|
||||
} else {
|
||||
klog.V(0).InfoS("Forced deletion of orphaned Pod succeeded", "pod", klog.KObj(pod))
|
||||
@ -273,7 +268,7 @@ func (gcc *PodGCController) checkIfNodeExists(ctx context.Context, name string)
|
||||
}
|
||||
|
||||
// gcUnscheduledTerminating deletes pods that are terminating and haven't been scheduled to a particular node.
|
||||
func (gcc *PodGCController) gcUnscheduledTerminating(pods []*v1.Pod) {
|
||||
func (gcc *PodGCController) gcUnscheduledTerminating(ctx context.Context, pods []*v1.Pod) {
|
||||
klog.V(4).Infof("GC'ing unscheduled pods which are terminating.")
|
||||
|
||||
for _, pod := range pods {
|
||||
@ -282,7 +277,7 @@ func (gcc *PodGCController) gcUnscheduledTerminating(pods []*v1.Pod) {
|
||||
}
|
||||
|
||||
klog.V(2).InfoS("Found unscheduled terminating Pod not assigned to any Node, deleting.", "pod", klog.KObj(pod))
|
||||
if err := gcc.deletePod(pod.Namespace, pod.Name); err != nil {
|
||||
if err := gcc.deletePod(ctx, pod.Namespace, pod.Name); err != nil {
|
||||
utilruntime.HandleError(err)
|
||||
} else {
|
||||
klog.V(0).InfoS("Forced deletion of unscheduled terminating Pod succeeded", "pod", klog.KObj(pod))
|
||||
@ -302,3 +297,8 @@ func (o byCreationTimestamp) Less(i, j int) bool {
|
||||
}
|
||||
return o[i].CreationTimestamp.Before(&o[j].CreationTimestamp)
|
||||
}
|
||||
|
||||
func (gcc *PodGCController) deletePod(ctx context.Context, namespace, name string) error {
|
||||
klog.InfoS("PodGC is force deleting Pod", "pod", klog.KRef(namespace, name))
|
||||
return gcc.kubeClient.CoreV1().Pods(namespace).Delete(ctx, name, *metav1.NewDeleteOptions(0))
|
||||
}
|
||||
|
@ -18,7 +18,6 @@ package podgc
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@ -32,6 +31,7 @@ import (
|
||||
coreinformers "k8s.io/client-go/informers/core/v1"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
clienttesting "k8s.io/client-go/testing"
|
||||
"k8s.io/client-go/util/workqueue"
|
||||
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
||||
"k8s.io/kubernetes/pkg/controller"
|
||||
@ -63,6 +63,17 @@ func compareStringSetToList(set sets.String, list []string) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func getDeletedPodNames(client *fake.Clientset) []string {
|
||||
deletedPodNames := make([]string, 0)
|
||||
for _, action := range client.Actions() {
|
||||
if action.GetVerb() == "delete" && action.GetResource().Resource == "pods" {
|
||||
deleteAction := action.(clienttesting.DeleteAction)
|
||||
deletedPodNames = append(deletedPodNames, deleteAction.GetName())
|
||||
}
|
||||
}
|
||||
return deletedPodNames
|
||||
}
|
||||
|
||||
func TestGCTerminated(t *testing.T) {
|
||||
type nameToPhase struct {
|
||||
name string
|
||||
@ -129,14 +140,6 @@ func TestGCTerminated(t *testing.T) {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
client := fake.NewSimpleClientset(&v1.NodeList{Items: []v1.Node{*testutil.NewNode("node")}})
|
||||
gcc, podInformer, _ := NewFromClient(client, test.threshold)
|
||||
deletedPodNames := make([]string, 0)
|
||||
var lock sync.Mutex
|
||||
gcc.deletePod = func(_, name string) error {
|
||||
lock.Lock()
|
||||
defer lock.Unlock()
|
||||
deletedPodNames = append(deletedPodNames, name)
|
||||
return nil
|
||||
}
|
||||
|
||||
creationTime := time.Unix(0, 0)
|
||||
for _, pod := range test.pods {
|
||||
@ -150,6 +153,8 @@ func TestGCTerminated(t *testing.T) {
|
||||
|
||||
gcc.gc(context.TODO())
|
||||
|
||||
deletedPodNames := getDeletedPodNames(client)
|
||||
|
||||
if pass := compareStringSetToList(test.deletedPodNames, deletedPodNames); !pass {
|
||||
t.Errorf("[%v]pod's deleted expected and actual did not match.\n\texpected: %v\n\tactual: %v",
|
||||
i, test.deletedPodNames.List(), deletedPodNames)
|
||||
@ -329,17 +334,10 @@ func TestGCOrphaned(t *testing.T) {
|
||||
gcc.nodeQueue.ShutDown()
|
||||
gcc.nodeQueue = workqueue.NewDelayingQueueWithCustomClock(fakeClock, "podgc_test_queue")
|
||||
|
||||
deletedPodNames := make([]string, 0)
|
||||
var lock sync.Mutex
|
||||
gcc.deletePod = func(_, name string) error {
|
||||
lock.Lock()
|
||||
defer lock.Unlock()
|
||||
deletedPodNames = append(deletedPodNames, name)
|
||||
return nil
|
||||
}
|
||||
|
||||
// First GC of orphaned pods
|
||||
gcc.gc(context.TODO())
|
||||
deletedPodNames := getDeletedPodNames(client)
|
||||
|
||||
if len(deletedPodNames) > 0 {
|
||||
t.Errorf("no pods should be deleted at this point.\n\tactual: %v", deletedPodNames)
|
||||
}
|
||||
@ -371,6 +369,7 @@ func TestGCOrphaned(t *testing.T) {
|
||||
|
||||
// Actual pod deletion
|
||||
gcc.gc(context.TODO())
|
||||
deletedPodNames = getDeletedPodNames(client)
|
||||
|
||||
if pass := compareStringSetToList(test.deletedPodNames, deletedPodNames); !pass {
|
||||
t.Errorf("pod's deleted expected and actual did not match.\n\texpected: %v\n\tactual: %v",
|
||||
@ -417,14 +416,6 @@ func TestGCUnscheduledTerminating(t *testing.T) {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
client := fake.NewSimpleClientset()
|
||||
gcc, podInformer, _ := NewFromClient(client, -1)
|
||||
deletedPodNames := make([]string, 0)
|
||||
var lock sync.Mutex
|
||||
gcc.deletePod = func(_, name string) error {
|
||||
lock.Lock()
|
||||
defer lock.Unlock()
|
||||
deletedPodNames = append(deletedPodNames, name)
|
||||
return nil
|
||||
}
|
||||
|
||||
creationTime := time.Unix(0, 0)
|
||||
for _, pod := range test.pods {
|
||||
@ -442,7 +433,8 @@ func TestGCUnscheduledTerminating(t *testing.T) {
|
||||
t.Errorf("Error while listing all Pods: %v", err)
|
||||
return
|
||||
}
|
||||
gcc.gcUnscheduledTerminating(pods)
|
||||
gcc.gcUnscheduledTerminating(context.TODO(), pods)
|
||||
deletedPodNames := getDeletedPodNames(client)
|
||||
|
||||
if pass := compareStringSetToList(test.deletedPodNames, deletedPodNames); !pass {
|
||||
t.Errorf("[%v]pod's deleted expected and actual did not match.\n\texpected: %v\n\tactual: %v, test: %v",
|
||||
@ -557,14 +549,7 @@ func TestGCTerminating(t *testing.T) {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
client := fake.NewSimpleClientset(&v1.NodeList{Items: []v1.Node{*testutil.NewNode("node-a")}})
|
||||
gcc, podInformer, nodeInformer := NewFromClient(client, -1)
|
||||
deletedPodNames := make([]string, 0)
|
||||
var lock sync.Mutex
|
||||
gcc.deletePod = func(_, name string) error {
|
||||
lock.Lock()
|
||||
defer lock.Unlock()
|
||||
deletedPodNames = append(deletedPodNames, name)
|
||||
return nil
|
||||
}
|
||||
|
||||
creationTime := time.Unix(0, 0)
|
||||
for _, node := range test.nodes {
|
||||
creationTime = creationTime.Add(2 * time.Hour)
|
||||
@ -595,6 +580,8 @@ func TestGCTerminating(t *testing.T) {
|
||||
}
|
||||
|
||||
gcc.gc(context.TODO())
|
||||
deletedPodNames := getDeletedPodNames(client)
|
||||
|
||||
if pass := compareStringSetToList(test.deletedPodNames, deletedPodNames); !pass {
|
||||
t.Errorf("[%v]pod's deleted expected and actual did not match.\n\texpected: %v\n\tactual: %v",
|
||||
i, test.deletedPodNames.List(), deletedPodNames)
|
||||
|
Loading…
Reference in New Issue
Block a user