mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-21 19:01:49 +00:00
Merge pull request #111070 from mimowo/retriable-pod-failures-refactor-gc-controller
Refactor gc_controller to do not use the deletePod stub
This commit is contained in:
commit
27110bd821
@ -60,7 +60,6 @@ type PodGCController struct {
|
|||||||
|
|
||||||
nodeQueue workqueue.DelayingInterface
|
nodeQueue workqueue.DelayingInterface
|
||||||
|
|
||||||
deletePod func(namespace, name string) error
|
|
||||||
terminatedPodThreshold int
|
terminatedPodThreshold int
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -77,10 +76,6 @@ func NewPodGC(ctx context.Context, kubeClient clientset.Interface, podInformer c
|
|||||||
nodeLister: nodeInformer.Lister(),
|
nodeLister: nodeInformer.Lister(),
|
||||||
nodeListerSynced: nodeInformer.Informer().HasSynced,
|
nodeListerSynced: nodeInformer.Informer().HasSynced,
|
||||||
nodeQueue: workqueue.NewNamedDelayingQueue("orphaned_pods_nodes"),
|
nodeQueue: workqueue.NewNamedDelayingQueue("orphaned_pods_nodes"),
|
||||||
deletePod: func(namespace, name string) error {
|
|
||||||
klog.InfoS("PodGC is force deleting Pod", "pod", klog.KRef(namespace, name))
|
|
||||||
return kubeClient.CoreV1().Pods(namespace).Delete(ctx, name, *metav1.NewDeleteOptions(0))
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return gcc
|
return gcc
|
||||||
@ -114,13 +109,13 @@ func (gcc *PodGCController) gc(ctx context.Context) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
if gcc.terminatedPodThreshold > 0 {
|
if gcc.terminatedPodThreshold > 0 {
|
||||||
gcc.gcTerminated(pods)
|
gcc.gcTerminated(ctx, pods)
|
||||||
}
|
}
|
||||||
if utilfeature.DefaultFeatureGate.Enabled(features.NodeOutOfServiceVolumeDetach) {
|
if utilfeature.DefaultFeatureGate.Enabled(features.NodeOutOfServiceVolumeDetach) {
|
||||||
gcc.gcTerminating(pods)
|
gcc.gcTerminating(ctx, pods)
|
||||||
}
|
}
|
||||||
gcc.gcOrphaned(ctx, pods, nodes)
|
gcc.gcOrphaned(ctx, pods, nodes)
|
||||||
gcc.gcUnscheduledTerminating(pods)
|
gcc.gcUnscheduledTerminating(ctx, pods)
|
||||||
}
|
}
|
||||||
|
|
||||||
func isPodTerminated(pod *v1.Pod) bool {
|
func isPodTerminated(pod *v1.Pod) bool {
|
||||||
@ -135,7 +130,7 @@ func isPodTerminating(pod *v1.Pod) bool {
|
|||||||
return pod.ObjectMeta.DeletionTimestamp != nil
|
return pod.ObjectMeta.DeletionTimestamp != nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (gcc *PodGCController) gcTerminating(pods []*v1.Pod) {
|
func (gcc *PodGCController) gcTerminating(ctx context.Context, pods []*v1.Pod) {
|
||||||
klog.V(4).Info("GC'ing terminating pods that are on out-of-service nodes")
|
klog.V(4).Info("GC'ing terminating pods that are on out-of-service nodes")
|
||||||
terminatingPods := []*v1.Pod{}
|
terminatingPods := []*v1.Pod{}
|
||||||
for _, pod := range pods {
|
for _, pod := range pods {
|
||||||
@ -168,7 +163,7 @@ func (gcc *PodGCController) gcTerminating(pods []*v1.Pod) {
|
|||||||
wait.Add(1)
|
wait.Add(1)
|
||||||
go func(namespace string, name string) {
|
go func(namespace string, name string) {
|
||||||
defer wait.Done()
|
defer wait.Done()
|
||||||
if err := gcc.deletePod(namespace, name); err != nil {
|
if err := gcc.deletePod(ctx, namespace, name); err != nil {
|
||||||
// ignore not founds
|
// ignore not founds
|
||||||
utilruntime.HandleError(err)
|
utilruntime.HandleError(err)
|
||||||
}
|
}
|
||||||
@ -177,7 +172,7 @@ func (gcc *PodGCController) gcTerminating(pods []*v1.Pod) {
|
|||||||
wait.Wait()
|
wait.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (gcc *PodGCController) gcTerminated(pods []*v1.Pod) {
|
func (gcc *PodGCController) gcTerminated(ctx context.Context, pods []*v1.Pod) {
|
||||||
terminatedPods := []*v1.Pod{}
|
terminatedPods := []*v1.Pod{}
|
||||||
for _, pod := range pods {
|
for _, pod := range pods {
|
||||||
if isPodTerminated(pod) {
|
if isPodTerminated(pod) {
|
||||||
@ -200,7 +195,7 @@ func (gcc *PodGCController) gcTerminated(pods []*v1.Pod) {
|
|||||||
wait.Add(1)
|
wait.Add(1)
|
||||||
go func(namespace string, name string) {
|
go func(namespace string, name string) {
|
||||||
defer wait.Done()
|
defer wait.Done()
|
||||||
if err := gcc.deletePod(namespace, name); err != nil {
|
if err := gcc.deletePod(ctx, namespace, name); err != nil {
|
||||||
// ignore not founds
|
// ignore not founds
|
||||||
defer utilruntime.HandleError(err)
|
defer utilruntime.HandleError(err)
|
||||||
}
|
}
|
||||||
@ -233,7 +228,7 @@ func (gcc *PodGCController) gcOrphaned(ctx context.Context, pods []*v1.Pod, node
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
klog.V(2).InfoS("Found orphaned Pod assigned to the Node, deleting.", "pod", klog.KObj(pod), "node", pod.Spec.NodeName)
|
klog.V(2).InfoS("Found orphaned Pod assigned to the Node, deleting.", "pod", klog.KObj(pod), "node", pod.Spec.NodeName)
|
||||||
if err := gcc.deletePod(pod.Namespace, pod.Name); err != nil {
|
if err := gcc.deletePod(ctx, pod.Namespace, pod.Name); err != nil {
|
||||||
utilruntime.HandleError(err)
|
utilruntime.HandleError(err)
|
||||||
} else {
|
} else {
|
||||||
klog.V(0).InfoS("Forced deletion of orphaned Pod succeeded", "pod", klog.KObj(pod))
|
klog.V(0).InfoS("Forced deletion of orphaned Pod succeeded", "pod", klog.KObj(pod))
|
||||||
@ -273,7 +268,7 @@ func (gcc *PodGCController) checkIfNodeExists(ctx context.Context, name string)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// gcUnscheduledTerminating deletes pods that are terminating and haven't been scheduled to a particular node.
|
// gcUnscheduledTerminating deletes pods that are terminating and haven't been scheduled to a particular node.
|
||||||
func (gcc *PodGCController) gcUnscheduledTerminating(pods []*v1.Pod) {
|
func (gcc *PodGCController) gcUnscheduledTerminating(ctx context.Context, pods []*v1.Pod) {
|
||||||
klog.V(4).Infof("GC'ing unscheduled pods which are terminating.")
|
klog.V(4).Infof("GC'ing unscheduled pods which are terminating.")
|
||||||
|
|
||||||
for _, pod := range pods {
|
for _, pod := range pods {
|
||||||
@ -282,7 +277,7 @@ func (gcc *PodGCController) gcUnscheduledTerminating(pods []*v1.Pod) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
klog.V(2).InfoS("Found unscheduled terminating Pod not assigned to any Node, deleting.", "pod", klog.KObj(pod))
|
klog.V(2).InfoS("Found unscheduled terminating Pod not assigned to any Node, deleting.", "pod", klog.KObj(pod))
|
||||||
if err := gcc.deletePod(pod.Namespace, pod.Name); err != nil {
|
if err := gcc.deletePod(ctx, pod.Namespace, pod.Name); err != nil {
|
||||||
utilruntime.HandleError(err)
|
utilruntime.HandleError(err)
|
||||||
} else {
|
} else {
|
||||||
klog.V(0).InfoS("Forced deletion of unscheduled terminating Pod succeeded", "pod", klog.KObj(pod))
|
klog.V(0).InfoS("Forced deletion of unscheduled terminating Pod succeeded", "pod", klog.KObj(pod))
|
||||||
@ -302,3 +297,8 @@ func (o byCreationTimestamp) Less(i, j int) bool {
|
|||||||
}
|
}
|
||||||
return o[i].CreationTimestamp.Before(&o[j].CreationTimestamp)
|
return o[i].CreationTimestamp.Before(&o[j].CreationTimestamp)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (gcc *PodGCController) deletePod(ctx context.Context, namespace, name string) error {
|
||||||
|
klog.InfoS("PodGC is force deleting Pod", "pod", klog.KRef(namespace, name))
|
||||||
|
return gcc.kubeClient.CoreV1().Pods(namespace).Delete(ctx, name, *metav1.NewDeleteOptions(0))
|
||||||
|
}
|
||||||
|
@ -18,7 +18,6 @@ package podgc
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"sync"
|
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -32,6 +31,7 @@ import (
|
|||||||
coreinformers "k8s.io/client-go/informers/core/v1"
|
coreinformers "k8s.io/client-go/informers/core/v1"
|
||||||
clientset "k8s.io/client-go/kubernetes"
|
clientset "k8s.io/client-go/kubernetes"
|
||||||
"k8s.io/client-go/kubernetes/fake"
|
"k8s.io/client-go/kubernetes/fake"
|
||||||
|
clienttesting "k8s.io/client-go/testing"
|
||||||
"k8s.io/client-go/util/workqueue"
|
"k8s.io/client-go/util/workqueue"
|
||||||
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
||||||
"k8s.io/kubernetes/pkg/controller"
|
"k8s.io/kubernetes/pkg/controller"
|
||||||
@ -63,6 +63,17 @@ func compareStringSetToList(set sets.String, list []string) bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getDeletedPodNames(client *fake.Clientset) []string {
|
||||||
|
deletedPodNames := make([]string, 0)
|
||||||
|
for _, action := range client.Actions() {
|
||||||
|
if action.GetVerb() == "delete" && action.GetResource().Resource == "pods" {
|
||||||
|
deleteAction := action.(clienttesting.DeleteAction)
|
||||||
|
deletedPodNames = append(deletedPodNames, deleteAction.GetName())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return deletedPodNames
|
||||||
|
}
|
||||||
|
|
||||||
func TestGCTerminated(t *testing.T) {
|
func TestGCTerminated(t *testing.T) {
|
||||||
type nameToPhase struct {
|
type nameToPhase struct {
|
||||||
name string
|
name string
|
||||||
@ -129,14 +140,6 @@ func TestGCTerminated(t *testing.T) {
|
|||||||
t.Run(test.name, func(t *testing.T) {
|
t.Run(test.name, func(t *testing.T) {
|
||||||
client := fake.NewSimpleClientset(&v1.NodeList{Items: []v1.Node{*testutil.NewNode("node")}})
|
client := fake.NewSimpleClientset(&v1.NodeList{Items: []v1.Node{*testutil.NewNode("node")}})
|
||||||
gcc, podInformer, _ := NewFromClient(client, test.threshold)
|
gcc, podInformer, _ := NewFromClient(client, test.threshold)
|
||||||
deletedPodNames := make([]string, 0)
|
|
||||||
var lock sync.Mutex
|
|
||||||
gcc.deletePod = func(_, name string) error {
|
|
||||||
lock.Lock()
|
|
||||||
defer lock.Unlock()
|
|
||||||
deletedPodNames = append(deletedPodNames, name)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
creationTime := time.Unix(0, 0)
|
creationTime := time.Unix(0, 0)
|
||||||
for _, pod := range test.pods {
|
for _, pod := range test.pods {
|
||||||
@ -150,6 +153,8 @@ func TestGCTerminated(t *testing.T) {
|
|||||||
|
|
||||||
gcc.gc(context.TODO())
|
gcc.gc(context.TODO())
|
||||||
|
|
||||||
|
deletedPodNames := getDeletedPodNames(client)
|
||||||
|
|
||||||
if pass := compareStringSetToList(test.deletedPodNames, deletedPodNames); !pass {
|
if pass := compareStringSetToList(test.deletedPodNames, deletedPodNames); !pass {
|
||||||
t.Errorf("[%v]pod's deleted expected and actual did not match.\n\texpected: %v\n\tactual: %v",
|
t.Errorf("[%v]pod's deleted expected and actual did not match.\n\texpected: %v\n\tactual: %v",
|
||||||
i, test.deletedPodNames.List(), deletedPodNames)
|
i, test.deletedPodNames.List(), deletedPodNames)
|
||||||
@ -329,17 +334,10 @@ func TestGCOrphaned(t *testing.T) {
|
|||||||
gcc.nodeQueue.ShutDown()
|
gcc.nodeQueue.ShutDown()
|
||||||
gcc.nodeQueue = workqueue.NewDelayingQueueWithCustomClock(fakeClock, "podgc_test_queue")
|
gcc.nodeQueue = workqueue.NewDelayingQueueWithCustomClock(fakeClock, "podgc_test_queue")
|
||||||
|
|
||||||
deletedPodNames := make([]string, 0)
|
|
||||||
var lock sync.Mutex
|
|
||||||
gcc.deletePod = func(_, name string) error {
|
|
||||||
lock.Lock()
|
|
||||||
defer lock.Unlock()
|
|
||||||
deletedPodNames = append(deletedPodNames, name)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// First GC of orphaned pods
|
// First GC of orphaned pods
|
||||||
gcc.gc(context.TODO())
|
gcc.gc(context.TODO())
|
||||||
|
deletedPodNames := getDeletedPodNames(client)
|
||||||
|
|
||||||
if len(deletedPodNames) > 0 {
|
if len(deletedPodNames) > 0 {
|
||||||
t.Errorf("no pods should be deleted at this point.\n\tactual: %v", deletedPodNames)
|
t.Errorf("no pods should be deleted at this point.\n\tactual: %v", deletedPodNames)
|
||||||
}
|
}
|
||||||
@ -371,6 +369,7 @@ func TestGCOrphaned(t *testing.T) {
|
|||||||
|
|
||||||
// Actual pod deletion
|
// Actual pod deletion
|
||||||
gcc.gc(context.TODO())
|
gcc.gc(context.TODO())
|
||||||
|
deletedPodNames = getDeletedPodNames(client)
|
||||||
|
|
||||||
if pass := compareStringSetToList(test.deletedPodNames, deletedPodNames); !pass {
|
if pass := compareStringSetToList(test.deletedPodNames, deletedPodNames); !pass {
|
||||||
t.Errorf("pod's deleted expected and actual did not match.\n\texpected: %v\n\tactual: %v",
|
t.Errorf("pod's deleted expected and actual did not match.\n\texpected: %v\n\tactual: %v",
|
||||||
@ -417,14 +416,6 @@ func TestGCUnscheduledTerminating(t *testing.T) {
|
|||||||
t.Run(test.name, func(t *testing.T) {
|
t.Run(test.name, func(t *testing.T) {
|
||||||
client := fake.NewSimpleClientset()
|
client := fake.NewSimpleClientset()
|
||||||
gcc, podInformer, _ := NewFromClient(client, -1)
|
gcc, podInformer, _ := NewFromClient(client, -1)
|
||||||
deletedPodNames := make([]string, 0)
|
|
||||||
var lock sync.Mutex
|
|
||||||
gcc.deletePod = func(_, name string) error {
|
|
||||||
lock.Lock()
|
|
||||||
defer lock.Unlock()
|
|
||||||
deletedPodNames = append(deletedPodNames, name)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
creationTime := time.Unix(0, 0)
|
creationTime := time.Unix(0, 0)
|
||||||
for _, pod := range test.pods {
|
for _, pod := range test.pods {
|
||||||
@ -442,7 +433,8 @@ func TestGCUnscheduledTerminating(t *testing.T) {
|
|||||||
t.Errorf("Error while listing all Pods: %v", err)
|
t.Errorf("Error while listing all Pods: %v", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
gcc.gcUnscheduledTerminating(pods)
|
gcc.gcUnscheduledTerminating(context.TODO(), pods)
|
||||||
|
deletedPodNames := getDeletedPodNames(client)
|
||||||
|
|
||||||
if pass := compareStringSetToList(test.deletedPodNames, deletedPodNames); !pass {
|
if pass := compareStringSetToList(test.deletedPodNames, deletedPodNames); !pass {
|
||||||
t.Errorf("[%v]pod's deleted expected and actual did not match.\n\texpected: %v\n\tactual: %v, test: %v",
|
t.Errorf("[%v]pod's deleted expected and actual did not match.\n\texpected: %v\n\tactual: %v, test: %v",
|
||||||
@ -557,14 +549,7 @@ func TestGCTerminating(t *testing.T) {
|
|||||||
t.Run(test.name, func(t *testing.T) {
|
t.Run(test.name, func(t *testing.T) {
|
||||||
client := fake.NewSimpleClientset(&v1.NodeList{Items: []v1.Node{*testutil.NewNode("node-a")}})
|
client := fake.NewSimpleClientset(&v1.NodeList{Items: []v1.Node{*testutil.NewNode("node-a")}})
|
||||||
gcc, podInformer, nodeInformer := NewFromClient(client, -1)
|
gcc, podInformer, nodeInformer := NewFromClient(client, -1)
|
||||||
deletedPodNames := make([]string, 0)
|
|
||||||
var lock sync.Mutex
|
|
||||||
gcc.deletePod = func(_, name string) error {
|
|
||||||
lock.Lock()
|
|
||||||
defer lock.Unlock()
|
|
||||||
deletedPodNames = append(deletedPodNames, name)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
creationTime := time.Unix(0, 0)
|
creationTime := time.Unix(0, 0)
|
||||||
for _, node := range test.nodes {
|
for _, node := range test.nodes {
|
||||||
creationTime = creationTime.Add(2 * time.Hour)
|
creationTime = creationTime.Add(2 * time.Hour)
|
||||||
@ -595,6 +580,8 @@ func TestGCTerminating(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
gcc.gc(context.TODO())
|
gcc.gc(context.TODO())
|
||||||
|
deletedPodNames := getDeletedPodNames(client)
|
||||||
|
|
||||||
if pass := compareStringSetToList(test.deletedPodNames, deletedPodNames); !pass {
|
if pass := compareStringSetToList(test.deletedPodNames, deletedPodNames); !pass {
|
||||||
t.Errorf("[%v]pod's deleted expected and actual did not match.\n\texpected: %v\n\tactual: %v",
|
t.Errorf("[%v]pod's deleted expected and actual did not match.\n\texpected: %v\n\tactual: %v",
|
||||||
i, test.deletedPodNames.List(), deletedPodNames)
|
i, test.deletedPodNames.List(), deletedPodNames)
|
||||||
|
Loading…
Reference in New Issue
Block a user