mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-24 20:24:09 +00:00
Merge pull request #111091 from mimowo/retriable-pod-failures-podgc-integration
Add integration test for podgc
This commit is contained in:
commit
12004ea53d
@ -61,10 +61,18 @@ type PodGCController struct {
|
||||
nodeQueue workqueue.DelayingInterface
|
||||
|
||||
terminatedPodThreshold int
|
||||
gcCheckPeriod time.Duration
|
||||
quarantineTime time.Duration
|
||||
}
|
||||
|
||||
func NewPodGC(ctx context.Context, kubeClient clientset.Interface, podInformer coreinformers.PodInformer,
|
||||
nodeInformer coreinformers.NodeInformer, terminatedPodThreshold int) *PodGCController {
|
||||
return NewPodGCInternal(ctx, kubeClient, podInformer, nodeInformer, terminatedPodThreshold, gcCheckPeriod, quarantineTime)
|
||||
}
|
||||
|
||||
// This function is only intended for integration tests
|
||||
func NewPodGCInternal(ctx context.Context, kubeClient clientset.Interface, podInformer coreinformers.PodInformer,
|
||||
nodeInformer coreinformers.NodeInformer, terminatedPodThreshold int, gcCheckPeriod, quarantineTime time.Duration) *PodGCController {
|
||||
if kubeClient != nil && kubeClient.CoreV1().RESTClient().GetRateLimiter() != nil {
|
||||
ratelimiter.RegisterMetricAndTrackRateLimiterUsage("gc_controller", kubeClient.CoreV1().RESTClient().GetRateLimiter())
|
||||
}
|
||||
@ -76,6 +84,8 @@ func NewPodGC(ctx context.Context, kubeClient clientset.Interface, podInformer c
|
||||
nodeLister: nodeInformer.Lister(),
|
||||
nodeListerSynced: nodeInformer.Informer().HasSynced,
|
||||
nodeQueue: workqueue.NewNamedDelayingQueue("orphaned_pods_nodes"),
|
||||
gcCheckPeriod: gcCheckPeriod,
|
||||
quarantineTime: quarantineTime,
|
||||
}
|
||||
|
||||
return gcc
|
||||
@ -92,7 +102,7 @@ func (gcc *PodGCController) Run(ctx context.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
go wait.UntilWithContext(ctx, gcc.gc, gcCheckPeriod)
|
||||
go wait.UntilWithContext(ctx, gcc.gc, gcc.gcCheckPeriod)
|
||||
|
||||
<-ctx.Done()
|
||||
}
|
||||
@ -214,7 +224,7 @@ func (gcc *PodGCController) gcOrphaned(ctx context.Context, pods []*v1.Pod, node
|
||||
// Add newly found unknown nodes to quarantine
|
||||
for _, pod := range pods {
|
||||
if pod.Spec.NodeName != "" && !existingNodeNames.Has(pod.Spec.NodeName) {
|
||||
gcc.nodeQueue.AddAfter(pod.Spec.NodeName, quarantineTime)
|
||||
gcc.nodeQueue.AddAfter(pod.Spec.NodeName, gcc.quarantineTime)
|
||||
}
|
||||
}
|
||||
// Check if nodes are still missing after quarantine period
|
||||
|
8
test/integration/podgc/OWNERS
Normal file
8
test/integration/podgc/OWNERS
Normal file
@ -0,0 +1,8 @@
|
||||
# See the OWNERS docs at https://go.k8s.io/owners
|
||||
|
||||
approvers:
|
||||
- sig-apps-approvers
|
||||
reviewers:
|
||||
- sig-apps-reviewers
|
||||
labels:
|
||||
- sig/apps
|
27
test/integration/podgc/main_test.go
Normal file
27
test/integration/podgc/main_test.go
Normal file
@ -0,0 +1,27 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package podgc
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"k8s.io/kubernetes/test/integration/framework"
|
||||
)
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
framework.EtcdMain(m.Run)
|
||||
}
|
198
test/integration/podgc/podgc_test.go
Normal file
198
test/integration/podgc/podgc_test.go
Normal file
@ -0,0 +1,198 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package podgc
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/client-go/informers"
|
||||
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/controller/podgc"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
testutils "k8s.io/kubernetes/test/integration/util"
|
||||
"k8s.io/utils/pointer"
|
||||
)
|
||||
|
||||
// TestPodGcOrphanedPodsWithFinalizer tests deletion of orphaned pods
|
||||
func TestPodGcOrphanedPodsWithFinalizer(t *testing.T) {
|
||||
testCtx := setup(t, "podgc-orphaned")
|
||||
defer testutils.CleanupTest(t, testCtx)
|
||||
cs := testCtx.ClientSet
|
||||
|
||||
node := &v1.Node{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "node",
|
||||
},
|
||||
Spec: v1.NodeSpec{},
|
||||
Status: v1.NodeStatus{
|
||||
Conditions: []v1.NodeCondition{
|
||||
{
|
||||
Type: v1.NodeReady,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
node, err := cs.CoreV1().Nodes().Create(testCtx.Ctx, node, metav1.CreateOptions{})
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create node '%v', err: %v", node.Name, err)
|
||||
}
|
||||
|
||||
pod := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "testpod",
|
||||
Namespace: testCtx.NS.Name,
|
||||
Finalizers: []string{"test.k8s.io/finalizer"},
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
NodeName: node.Name,
|
||||
Containers: []v1.Container{
|
||||
{Name: "foo", Image: "bar"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
pod, err = cs.CoreV1().Pods(testCtx.NS.Name).Create(testCtx.Ctx, pod, metav1.CreateOptions{})
|
||||
if err != nil {
|
||||
t.Fatalf("Error %v, while creating pod: %v", err, klog.KObj(pod))
|
||||
}
|
||||
defer testutils.RemovePodFinalizers(testCtx.ClientSet, t, []*v1.Pod{pod})
|
||||
pod, err = cs.CoreV1().Pods(testCtx.NS.Name).Get(testCtx.Ctx, pod.Name, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
t.Fatalf("Error: '%v' while updating pod info: '%v'", err, klog.KObj(pod))
|
||||
}
|
||||
|
||||
// we delete the node to orphan the pod
|
||||
err = cs.CoreV1().Nodes().Delete(testCtx.Ctx, pod.Spec.NodeName, metav1.DeleteOptions{})
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to delete node: %v, err: %v", pod.Spec.NodeName, err)
|
||||
}
|
||||
|
||||
err = wait.PollImmediate(time.Second, time.Second*15, func() (bool, error) {
|
||||
updatedPod, err := cs.CoreV1().Pods(testCtx.NS.Name).Get(testCtx.Ctx, pod.Name, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
return true, err
|
||||
}
|
||||
if updatedPod.ObjectMeta.DeletionTimestamp != nil {
|
||||
return true, nil
|
||||
}
|
||||
return false, nil
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("Error '%v' while waiting for the pod '%v' to be deleted", err, klog.KObj(pod))
|
||||
}
|
||||
}
|
||||
|
||||
// TestTerminatingOnOutOfServiceNode tests deletion pods terminating on out-of-service nodes
|
||||
func TestTerminatingOnOutOfServiceNode(t *testing.T) {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeOutOfServiceVolumeDetach, true)()
|
||||
testCtx := setup(t, "podgc-out-of-service")
|
||||
defer testutils.CleanupTest(t, testCtx)
|
||||
cs := testCtx.ClientSet
|
||||
|
||||
node := &v1.Node{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "node",
|
||||
},
|
||||
Spec: v1.NodeSpec{},
|
||||
Status: v1.NodeStatus{
|
||||
Conditions: []v1.NodeCondition{
|
||||
{
|
||||
Type: v1.NodeReady,
|
||||
Status: v1.ConditionFalse,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
node, err := cs.CoreV1().Nodes().Create(testCtx.Ctx, node, metav1.CreateOptions{})
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create node '%v', err: %v", node.Name, err)
|
||||
}
|
||||
|
||||
pod := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "testpod",
|
||||
Namespace: testCtx.NS.Name,
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
NodeName: node.Name,
|
||||
Containers: []v1.Container{
|
||||
{Name: "foo", Image: "bar"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
pod, err = cs.CoreV1().Pods(testCtx.NS.Name).Create(testCtx.Ctx, pod, metav1.CreateOptions{})
|
||||
if err != nil {
|
||||
t.Fatalf("Error %v, while creating pod: %v", err, klog.KObj(pod))
|
||||
}
|
||||
|
||||
// trigger termination of the pod, but with long grace period so that it is not removed immediately
|
||||
err = cs.CoreV1().Pods(testCtx.NS.Name).Delete(testCtx.Ctx, pod.Name, metav1.DeleteOptions{GracePeriodSeconds: pointer.Int64(300)})
|
||||
if err != nil {
|
||||
t.Fatalf("Error: '%v' while deleting pod: '%v'", err, klog.KObj(pod))
|
||||
}
|
||||
|
||||
// taint the node with the out-of-service taint
|
||||
err = testutils.AddTaintToNode(cs, pod.Spec.NodeName, v1.Taint{Key: v1.TaintNodeOutOfService, Value: "", Effect: v1.TaintEffectNoExecute})
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to taint node: %v, err: %v", pod.Spec.NodeName, err)
|
||||
}
|
||||
|
||||
// wait until the pod is deleted
|
||||
err = wait.PollImmediate(time.Second, time.Second*15, func() (bool, error) {
|
||||
updatedPod, err := cs.CoreV1().Pods(pod.Namespace).Get(testCtx.Ctx, pod.Name, metav1.GetOptions{})
|
||||
if err == nil {
|
||||
return updatedPod == nil, nil
|
||||
}
|
||||
// there was an error
|
||||
if apierrors.IsNotFound(err) {
|
||||
return true, nil
|
||||
}
|
||||
return false, err
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("Error '%v' while waiting for the pod '%v' to be deleted", err, klog.KObj(pod))
|
||||
}
|
||||
}
|
||||
|
||||
func setup(t *testing.T, name string) *testutils.TestContext {
|
||||
testCtx := testutils.InitTestAPIServer(t, name, nil)
|
||||
externalInformers := informers.NewSharedInformerFactory(testCtx.ClientSet, time.Second)
|
||||
|
||||
podgc := podgc.NewPodGCInternal(testCtx.Ctx,
|
||||
testCtx.ClientSet,
|
||||
externalInformers.Core().V1().Pods(),
|
||||
externalInformers.Core().V1().Nodes(),
|
||||
0,
|
||||
500*time.Millisecond,
|
||||
time.Second)
|
||||
|
||||
// Waiting for all controllers to sync
|
||||
externalInformers.Start(testCtx.Ctx.Done())
|
||||
externalInformers.WaitForCacheSync(testCtx.Ctx.Done())
|
||||
|
||||
go podgc.Run(testCtx.Ctx)
|
||||
return testCtx
|
||||
}
|
@ -217,6 +217,22 @@ func CleanupTest(t *testing.T, testCtx *TestContext) {
|
||||
testCtx.CloseFn()
|
||||
}
|
||||
|
||||
// RemovePodFinalizers removes pod finalizers for the pods
|
||||
func RemovePodFinalizers(cs clientset.Interface, t *testing.T, pods []*v1.Pod) {
|
||||
for _, p := range pods {
|
||||
pod, err := cs.CoreV1().Pods(p.Namespace).Get(context.TODO(), p.Name, metav1.GetOptions{})
|
||||
if err != nil && !apierrors.IsNotFound(err) {
|
||||
t.Errorf("error while removing pod finalizers for %v: %v", klog.KObj(p), err)
|
||||
} else if pod != nil {
|
||||
pod.ObjectMeta.Finalizers = nil
|
||||
_, err = cs.CoreV1().Pods(pod.Namespace).Update(context.TODO(), pod, metav1.UpdateOptions{})
|
||||
if err != nil {
|
||||
t.Errorf("error while updating pod status for %v: %v", klog.KObj(p), err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// CleanupPods deletes the given pods and waits for them to be actually deleted.
|
||||
func CleanupPods(cs clientset.Interface, t *testing.T, pods []*v1.Pod) {
|
||||
for _, p := range pods {
|
||||
|
Loading…
Reference in New Issue
Block a user