From 2f61b6105c1866bf064569ecc88c303d1c721bd1 Mon Sep 17 00:00:00 2001 From: Michal Wozniak Date: Fri, 15 Jul 2022 11:51:50 +0200 Subject: [PATCH] Add integration tests for podgc --- pkg/controller/podgc/gc_controller.go | 14 +- test/integration/podgc/OWNERS | 8 ++ test/integration/podgc/main_test.go | 27 ++++ test/integration/podgc/podgc_test.go | 198 ++++++++++++++++++++++++++ test/integration/util/util.go | 16 +++ 5 files changed, 261 insertions(+), 2 deletions(-) create mode 100644 test/integration/podgc/OWNERS create mode 100644 test/integration/podgc/main_test.go create mode 100644 test/integration/podgc/podgc_test.go diff --git a/pkg/controller/podgc/gc_controller.go b/pkg/controller/podgc/gc_controller.go index 34dfbd760be..a69740d5c0f 100644 --- a/pkg/controller/podgc/gc_controller.go +++ b/pkg/controller/podgc/gc_controller.go @@ -61,10 +61,18 @@ type PodGCController struct { nodeQueue workqueue.DelayingInterface terminatedPodThreshold int + gcCheckPeriod time.Duration + quarantineTime time.Duration } func NewPodGC(ctx context.Context, kubeClient clientset.Interface, podInformer coreinformers.PodInformer, nodeInformer coreinformers.NodeInformer, terminatedPodThreshold int) *PodGCController { + return NewPodGCInternal(ctx, kubeClient, podInformer, nodeInformer, terminatedPodThreshold, gcCheckPeriod, quarantineTime) +} + +// This function is only intended for integration tests +func NewPodGCInternal(ctx context.Context, kubeClient clientset.Interface, podInformer coreinformers.PodInformer, + nodeInformer coreinformers.NodeInformer, terminatedPodThreshold int, gcCheckPeriod, quarantineTime time.Duration) *PodGCController { if kubeClient != nil && kubeClient.CoreV1().RESTClient().GetRateLimiter() != nil { ratelimiter.RegisterMetricAndTrackRateLimiterUsage("gc_controller", kubeClient.CoreV1().RESTClient().GetRateLimiter()) } @@ -76,6 +84,8 @@ func NewPodGC(ctx context.Context, kubeClient clientset.Interface, podInformer c nodeLister: nodeInformer.Lister(), nodeListerSynced: nodeInformer.Informer().HasSynced, nodeQueue: workqueue.NewNamedDelayingQueue("orphaned_pods_nodes"), + gcCheckPeriod: gcCheckPeriod, + quarantineTime: quarantineTime, } return gcc @@ -92,7 +102,7 @@ func (gcc *PodGCController) Run(ctx context.Context) { return } - go wait.UntilWithContext(ctx, gcc.gc, gcCheckPeriod) + go wait.UntilWithContext(ctx, gcc.gc, gcc.gcCheckPeriod) <-ctx.Done() } @@ -214,7 +224,7 @@ func (gcc *PodGCController) gcOrphaned(ctx context.Context, pods []*v1.Pod, node // Add newly found unknown nodes to quarantine for _, pod := range pods { if pod.Spec.NodeName != "" && !existingNodeNames.Has(pod.Spec.NodeName) { - gcc.nodeQueue.AddAfter(pod.Spec.NodeName, quarantineTime) + gcc.nodeQueue.AddAfter(pod.Spec.NodeName, gcc.quarantineTime) } } // Check if nodes are still missing after quarantine period diff --git a/test/integration/podgc/OWNERS b/test/integration/podgc/OWNERS new file mode 100644 index 00000000000..c6d302e5ea2 --- /dev/null +++ b/test/integration/podgc/OWNERS @@ -0,0 +1,8 @@ +# See the OWNERS docs at https://go.k8s.io/owners + +approvers: + - sig-apps-approvers +reviewers: + - sig-apps-reviewers +labels: + - sig/apps diff --git a/test/integration/podgc/main_test.go b/test/integration/podgc/main_test.go new file mode 100644 index 00000000000..15d9a3c96b3 --- /dev/null +++ b/test/integration/podgc/main_test.go @@ -0,0 +1,27 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package podgc + +import ( + "testing" + + "k8s.io/kubernetes/test/integration/framework" +) + +func TestMain(m *testing.M) { + framework.EtcdMain(m.Run) +} diff --git a/test/integration/podgc/podgc_test.go b/test/integration/podgc/podgc_test.go new file mode 100644 index 00000000000..5c2ee0c6529 --- /dev/null +++ b/test/integration/podgc/podgc_test.go @@ -0,0 +1,198 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package podgc + +import ( + "testing" + "time" + + v1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + utilfeature "k8s.io/apiserver/pkg/util/feature" + "k8s.io/client-go/informers" + featuregatetesting "k8s.io/component-base/featuregate/testing" + "k8s.io/klog/v2" + "k8s.io/kubernetes/pkg/controller/podgc" + "k8s.io/kubernetes/pkg/features" + testutils "k8s.io/kubernetes/test/integration/util" + "k8s.io/utils/pointer" +) + +// TestPodGcOrphanedPodsWithFinalizer tests deletion of orphaned pods +func TestPodGcOrphanedPodsWithFinalizer(t *testing.T) { + testCtx := setup(t, "podgc-orphaned") + defer testutils.CleanupTest(t, testCtx) + cs := testCtx.ClientSet + + node := &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node", + }, + Spec: v1.NodeSpec{}, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + node, err := cs.CoreV1().Nodes().Create(testCtx.Ctx, node, metav1.CreateOptions{}) + if err != nil { + t.Fatalf("Failed to create node '%v', err: %v", node.Name, err) + } + + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "testpod", + Namespace: testCtx.NS.Name, + Finalizers: []string{"test.k8s.io/finalizer"}, + }, + Spec: v1.PodSpec{ + NodeName: node.Name, + Containers: []v1.Container{ + {Name: "foo", Image: "bar"}, + }, + }, + } + + pod, err = cs.CoreV1().Pods(testCtx.NS.Name).Create(testCtx.Ctx, pod, metav1.CreateOptions{}) + if err != nil { + t.Fatalf("Error %v, while creating pod: %v", err, klog.KObj(pod)) + } + defer testutils.RemovePodFinalizers(testCtx.ClientSet, t, []*v1.Pod{pod}) + pod, err = cs.CoreV1().Pods(testCtx.NS.Name).Get(testCtx.Ctx, pod.Name, metav1.GetOptions{}) + if err != nil { + t.Fatalf("Error: '%v' while updating pod info: '%v'", err, klog.KObj(pod)) + } + + // we delete the node to orphan the pod + err = cs.CoreV1().Nodes().Delete(testCtx.Ctx, pod.Spec.NodeName, metav1.DeleteOptions{}) + if err != nil { + t.Fatalf("Failed to delete node: %v, err: %v", pod.Spec.NodeName, err) + } + + err = wait.PollImmediate(time.Second, time.Second*15, func() (bool, error) { + updatedPod, err := cs.CoreV1().Pods(testCtx.NS.Name).Get(testCtx.Ctx, pod.Name, metav1.GetOptions{}) + if err != nil { + return true, err + } + if updatedPod.ObjectMeta.DeletionTimestamp != nil { + return true, nil + } + return false, nil + }) + if err != nil { + t.Fatalf("Error '%v' while waiting for the pod '%v' to be deleted", err, klog.KObj(pod)) + } +} + +// TestTerminatingOnOutOfServiceNode tests deletion pods terminating on out-of-service nodes +func TestTerminatingOnOutOfServiceNode(t *testing.T) { + defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeOutOfServiceVolumeDetach, true)() + testCtx := setup(t, "podgc-out-of-service") + defer testutils.CleanupTest(t, testCtx) + cs := testCtx.ClientSet + + node := &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node", + }, + Spec: v1.NodeSpec{}, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionFalse, + }, + }, + }, + } + node, err := cs.CoreV1().Nodes().Create(testCtx.Ctx, node, metav1.CreateOptions{}) + if err != nil { + t.Fatalf("Failed to create node '%v', err: %v", node.Name, err) + } + + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "testpod", + Namespace: testCtx.NS.Name, + }, + Spec: v1.PodSpec{ + NodeName: node.Name, + Containers: []v1.Container{ + {Name: "foo", Image: "bar"}, + }, + }, + } + + pod, err = cs.CoreV1().Pods(testCtx.NS.Name).Create(testCtx.Ctx, pod, metav1.CreateOptions{}) + if err != nil { + t.Fatalf("Error %v, while creating pod: %v", err, klog.KObj(pod)) + } + + // trigger termination of the pod, but with long grace period so that it is not removed immediately + err = cs.CoreV1().Pods(testCtx.NS.Name).Delete(testCtx.Ctx, pod.Name, metav1.DeleteOptions{GracePeriodSeconds: pointer.Int64(300)}) + if err != nil { + t.Fatalf("Error: '%v' while deleting pod: '%v'", err, klog.KObj(pod)) + } + + // taint the node with the out-of-service taint + err = testutils.AddTaintToNode(cs, pod.Spec.NodeName, v1.Taint{Key: v1.TaintNodeOutOfService, Value: "", Effect: v1.TaintEffectNoExecute}) + if err != nil { + t.Fatalf("Failed to taint node: %v, err: %v", pod.Spec.NodeName, err) + } + + // wait until the pod is deleted + err = wait.PollImmediate(time.Second, time.Second*15, func() (bool, error) { + updatedPod, err := cs.CoreV1().Pods(pod.Namespace).Get(testCtx.Ctx, pod.Name, metav1.GetOptions{}) + if err == nil { + return updatedPod == nil, nil + } + // there was an error + if apierrors.IsNotFound(err) { + return true, nil + } + return false, err + }) + if err != nil { + t.Fatalf("Error '%v' while waiting for the pod '%v' to be deleted", err, klog.KObj(pod)) + } +} + +func setup(t *testing.T, name string) *testutils.TestContext { + testCtx := testutils.InitTestAPIServer(t, name, nil) + externalInformers := informers.NewSharedInformerFactory(testCtx.ClientSet, time.Second) + + podgc := podgc.NewPodGCInternal(testCtx.Ctx, + testCtx.ClientSet, + externalInformers.Core().V1().Pods(), + externalInformers.Core().V1().Nodes(), + 0, + 500*time.Millisecond, + time.Second) + + // Waiting for all controllers to sync + externalInformers.Start(testCtx.Ctx.Done()) + externalInformers.WaitForCacheSync(testCtx.Ctx.Done()) + + go podgc.Run(testCtx.Ctx) + return testCtx +} diff --git a/test/integration/util/util.go b/test/integration/util/util.go index 27dfbd37a16..cd01371425c 100644 --- a/test/integration/util/util.go +++ b/test/integration/util/util.go @@ -217,6 +217,22 @@ func CleanupTest(t *testing.T, testCtx *TestContext) { testCtx.CloseFn() } +// RemovePodFinalizers removes pod finalizers for the pods +func RemovePodFinalizers(cs clientset.Interface, t *testing.T, pods []*v1.Pod) { + for _, p := range pods { + pod, err := cs.CoreV1().Pods(p.Namespace).Get(context.TODO(), p.Name, metav1.GetOptions{}) + if err != nil && !apierrors.IsNotFound(err) { + t.Errorf("error while removing pod finalizers for %v: %v", klog.KObj(p), err) + } else if pod != nil { + pod.ObjectMeta.Finalizers = nil + _, err = cs.CoreV1().Pods(pod.Namespace).Update(context.TODO(), pod, metav1.UpdateOptions{}) + if err != nil { + t.Errorf("error while updating pod status for %v: %v", klog.KObj(p), err) + } + } + } +} + // CleanupPods deletes the given pods and waits for them to be actually deleted. func CleanupPods(cs clientset.Interface, t *testing.T, pods []*v1.Pod) { for _, p := range pods {