Merge pull request #111070 from mimowo/retriable-pod-failures-refactor-gc-controller

Refactor gc_controller to do not use the deletePod stub
This commit is contained in:
Kubernetes Prow Robot 2022-07-14 06:11:09 -07:00 committed by GitHub
commit 27110bd821
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 37 additions and 50 deletions

View File

@ -60,7 +60,6 @@ type PodGCController struct {
nodeQueue workqueue.DelayingInterface
deletePod func(namespace, name string) error
terminatedPodThreshold int
}
@ -77,10 +76,6 @@ func NewPodGC(ctx context.Context, kubeClient clientset.Interface, podInformer c
nodeLister: nodeInformer.Lister(),
nodeListerSynced: nodeInformer.Informer().HasSynced,
nodeQueue: workqueue.NewNamedDelayingQueue("orphaned_pods_nodes"),
deletePod: func(namespace, name string) error {
klog.InfoS("PodGC is force deleting Pod", "pod", klog.KRef(namespace, name))
return kubeClient.CoreV1().Pods(namespace).Delete(ctx, name, *metav1.NewDeleteOptions(0))
},
}
return gcc
@ -114,13 +109,13 @@ func (gcc *PodGCController) gc(ctx context.Context) {
return
}
if gcc.terminatedPodThreshold > 0 {
gcc.gcTerminated(pods)
gcc.gcTerminated(ctx, pods)
}
if utilfeature.DefaultFeatureGate.Enabled(features.NodeOutOfServiceVolumeDetach) {
gcc.gcTerminating(pods)
gcc.gcTerminating(ctx, pods)
}
gcc.gcOrphaned(ctx, pods, nodes)
gcc.gcUnscheduledTerminating(pods)
gcc.gcUnscheduledTerminating(ctx, pods)
}
func isPodTerminated(pod *v1.Pod) bool {
@ -135,7 +130,7 @@ func isPodTerminating(pod *v1.Pod) bool {
return pod.ObjectMeta.DeletionTimestamp != nil
}
func (gcc *PodGCController) gcTerminating(pods []*v1.Pod) {
func (gcc *PodGCController) gcTerminating(ctx context.Context, pods []*v1.Pod) {
klog.V(4).Info("GC'ing terminating pods that are on out-of-service nodes")
terminatingPods := []*v1.Pod{}
for _, pod := range pods {
@ -168,7 +163,7 @@ func (gcc *PodGCController) gcTerminating(pods []*v1.Pod) {
wait.Add(1)
go func(namespace string, name string) {
defer wait.Done()
if err := gcc.deletePod(namespace, name); err != nil {
if err := gcc.deletePod(ctx, namespace, name); err != nil {
// ignore not founds
utilruntime.HandleError(err)
}
@ -177,7 +172,7 @@ func (gcc *PodGCController) gcTerminating(pods []*v1.Pod) {
wait.Wait()
}
func (gcc *PodGCController) gcTerminated(pods []*v1.Pod) {
func (gcc *PodGCController) gcTerminated(ctx context.Context, pods []*v1.Pod) {
terminatedPods := []*v1.Pod{}
for _, pod := range pods {
if isPodTerminated(pod) {
@ -200,7 +195,7 @@ func (gcc *PodGCController) gcTerminated(pods []*v1.Pod) {
wait.Add(1)
go func(namespace string, name string) {
defer wait.Done()
if err := gcc.deletePod(namespace, name); err != nil {
if err := gcc.deletePod(ctx, namespace, name); err != nil {
// ignore not founds
defer utilruntime.HandleError(err)
}
@ -233,7 +228,7 @@ func (gcc *PodGCController) gcOrphaned(ctx context.Context, pods []*v1.Pod, node
continue
}
klog.V(2).InfoS("Found orphaned Pod assigned to the Node, deleting.", "pod", klog.KObj(pod), "node", pod.Spec.NodeName)
if err := gcc.deletePod(pod.Namespace, pod.Name); err != nil {
if err := gcc.deletePod(ctx, pod.Namespace, pod.Name); err != nil {
utilruntime.HandleError(err)
} else {
klog.V(0).InfoS("Forced deletion of orphaned Pod succeeded", "pod", klog.KObj(pod))
@ -273,7 +268,7 @@ func (gcc *PodGCController) checkIfNodeExists(ctx context.Context, name string)
}
// gcUnscheduledTerminating deletes pods that are terminating and haven't been scheduled to a particular node.
func (gcc *PodGCController) gcUnscheduledTerminating(pods []*v1.Pod) {
func (gcc *PodGCController) gcUnscheduledTerminating(ctx context.Context, pods []*v1.Pod) {
klog.V(4).Infof("GC'ing unscheduled pods which are terminating.")
for _, pod := range pods {
@ -282,7 +277,7 @@ func (gcc *PodGCController) gcUnscheduledTerminating(pods []*v1.Pod) {
}
klog.V(2).InfoS("Found unscheduled terminating Pod not assigned to any Node, deleting.", "pod", klog.KObj(pod))
if err := gcc.deletePod(pod.Namespace, pod.Name); err != nil {
if err := gcc.deletePod(ctx, pod.Namespace, pod.Name); err != nil {
utilruntime.HandleError(err)
} else {
klog.V(0).InfoS("Forced deletion of unscheduled terminating Pod succeeded", "pod", klog.KObj(pod))
@ -302,3 +297,8 @@ func (o byCreationTimestamp) Less(i, j int) bool {
}
return o[i].CreationTimestamp.Before(&o[j].CreationTimestamp)
}
func (gcc *PodGCController) deletePod(ctx context.Context, namespace, name string) error {
klog.InfoS("PodGC is force deleting Pod", "pod", klog.KRef(namespace, name))
return gcc.kubeClient.CoreV1().Pods(namespace).Delete(ctx, name, *metav1.NewDeleteOptions(0))
}

View File

@ -18,7 +18,6 @@ package podgc
import (
"context"
"sync"
"testing"
"time"
@ -32,6 +31,7 @@ import (
coreinformers "k8s.io/client-go/informers/core/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/fake"
clienttesting "k8s.io/client-go/testing"
"k8s.io/client-go/util/workqueue"
featuregatetesting "k8s.io/component-base/featuregate/testing"
"k8s.io/kubernetes/pkg/controller"
@ -63,6 +63,17 @@ func compareStringSetToList(set sets.String, list []string) bool {
return true
}
func getDeletedPodNames(client *fake.Clientset) []string {
deletedPodNames := make([]string, 0)
for _, action := range client.Actions() {
if action.GetVerb() == "delete" && action.GetResource().Resource == "pods" {
deleteAction := action.(clienttesting.DeleteAction)
deletedPodNames = append(deletedPodNames, deleteAction.GetName())
}
}
return deletedPodNames
}
func TestGCTerminated(t *testing.T) {
type nameToPhase struct {
name string
@ -129,14 +140,6 @@ func TestGCTerminated(t *testing.T) {
t.Run(test.name, func(t *testing.T) {
client := fake.NewSimpleClientset(&v1.NodeList{Items: []v1.Node{*testutil.NewNode("node")}})
gcc, podInformer, _ := NewFromClient(client, test.threshold)
deletedPodNames := make([]string, 0)
var lock sync.Mutex
gcc.deletePod = func(_, name string) error {
lock.Lock()
defer lock.Unlock()
deletedPodNames = append(deletedPodNames, name)
return nil
}
creationTime := time.Unix(0, 0)
for _, pod := range test.pods {
@ -150,6 +153,8 @@ func TestGCTerminated(t *testing.T) {
gcc.gc(context.TODO())
deletedPodNames := getDeletedPodNames(client)
if pass := compareStringSetToList(test.deletedPodNames, deletedPodNames); !pass {
t.Errorf("[%v]pod's deleted expected and actual did not match.\n\texpected: %v\n\tactual: %v",
i, test.deletedPodNames.List(), deletedPodNames)
@ -329,17 +334,10 @@ func TestGCOrphaned(t *testing.T) {
gcc.nodeQueue.ShutDown()
gcc.nodeQueue = workqueue.NewDelayingQueueWithCustomClock(fakeClock, "podgc_test_queue")
deletedPodNames := make([]string, 0)
var lock sync.Mutex
gcc.deletePod = func(_, name string) error {
lock.Lock()
defer lock.Unlock()
deletedPodNames = append(deletedPodNames, name)
return nil
}
// First GC of orphaned pods
gcc.gc(context.TODO())
deletedPodNames := getDeletedPodNames(client)
if len(deletedPodNames) > 0 {
t.Errorf("no pods should be deleted at this point.\n\tactual: %v", deletedPodNames)
}
@ -371,6 +369,7 @@ func TestGCOrphaned(t *testing.T) {
// Actual pod deletion
gcc.gc(context.TODO())
deletedPodNames = getDeletedPodNames(client)
if pass := compareStringSetToList(test.deletedPodNames, deletedPodNames); !pass {
t.Errorf("pod's deleted expected and actual did not match.\n\texpected: %v\n\tactual: %v",
@ -417,14 +416,6 @@ func TestGCUnscheduledTerminating(t *testing.T) {
t.Run(test.name, func(t *testing.T) {
client := fake.NewSimpleClientset()
gcc, podInformer, _ := NewFromClient(client, -1)
deletedPodNames := make([]string, 0)
var lock sync.Mutex
gcc.deletePod = func(_, name string) error {
lock.Lock()
defer lock.Unlock()
deletedPodNames = append(deletedPodNames, name)
return nil
}
creationTime := time.Unix(0, 0)
for _, pod := range test.pods {
@ -442,7 +433,8 @@ func TestGCUnscheduledTerminating(t *testing.T) {
t.Errorf("Error while listing all Pods: %v", err)
return
}
gcc.gcUnscheduledTerminating(pods)
gcc.gcUnscheduledTerminating(context.TODO(), pods)
deletedPodNames := getDeletedPodNames(client)
if pass := compareStringSetToList(test.deletedPodNames, deletedPodNames); !pass {
t.Errorf("[%v]pod's deleted expected and actual did not match.\n\texpected: %v\n\tactual: %v, test: %v",
@ -557,14 +549,7 @@ func TestGCTerminating(t *testing.T) {
t.Run(test.name, func(t *testing.T) {
client := fake.NewSimpleClientset(&v1.NodeList{Items: []v1.Node{*testutil.NewNode("node-a")}})
gcc, podInformer, nodeInformer := NewFromClient(client, -1)
deletedPodNames := make([]string, 0)
var lock sync.Mutex
gcc.deletePod = func(_, name string) error {
lock.Lock()
defer lock.Unlock()
deletedPodNames = append(deletedPodNames, name)
return nil
}
creationTime := time.Unix(0, 0)
for _, node := range test.nodes {
creationTime = creationTime.Add(2 * time.Hour)
@ -595,6 +580,8 @@ func TestGCTerminating(t *testing.T) {
}
gcc.gc(context.TODO())
deletedPodNames := getDeletedPodNames(client)
if pass := compareStringSetToList(test.deletedPodNames, deletedPodNames); !pass {
t.Errorf("[%v]pod's deleted expected and actual did not match.\n\texpected: %v\n\tactual: %v",
i, test.deletedPodNames.List(), deletedPodNames)