mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-19 09:52:49 +00:00
Merge pull request #115442 from bobbypage/unknown_pods_test
test: Add e2e node test to check for unknown pods
This commit is contained in:
commit
59a7e34052
@ -31,11 +31,9 @@ import (
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/types"
|
||||
admissionapi "k8s.io/pod-security-admission/api"
|
||||
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
@ -372,28 +370,6 @@ func runTopologyManagerPolicySuiteTests(ctx context.Context, f *framework.Framew
|
||||
runMultipleGuPods(ctx, f)
|
||||
}
|
||||
|
||||
// waitForAllContainerRemoval waits until all the containers on a given pod are really gone.
|
||||
// This is needed by the e2e tests which involve exclusive resource allocation (cpu, topology manager; podresources; etc.)
|
||||
// In these cases, we need to make sure the tests clean up after themselves to make sure each test runs in
|
||||
// a pristine environment. The only way known so far to do that is to introduce this wait.
|
||||
// Worth noting, however, that this makes the test runtime much bigger.
|
||||
func waitForAllContainerRemoval(ctx context.Context, podName, podNS string) {
|
||||
rs, _, err := getCRIClient()
|
||||
framework.ExpectNoError(err)
|
||||
gomega.Eventually(ctx, func(ctx context.Context) bool {
|
||||
containers, err := rs.ListContainers(ctx, &runtimeapi.ContainerFilter{
|
||||
LabelSelector: map[string]string{
|
||||
types.KubernetesPodNameLabel: podName,
|
||||
types.KubernetesPodNamespaceLabel: podNS,
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return len(containers) == 0
|
||||
}, 2*time.Minute, 1*time.Second).Should(gomega.BeTrue())
|
||||
}
|
||||
|
||||
func runTopologyManagerPositiveTest(ctx context.Context, f *framework.Framework, numPods int, ctnAttrs, initCtnAttrs []tmCtnAttribute, envInfo *testEnvInfo) {
|
||||
podMap := make(map[string]*v1.Pod)
|
||||
|
||||
|
173
test/e2e_node/unknown_pods_test.go
Normal file
173
test/e2e_node/unknown_pods_test.go
Normal file
@ -0,0 +1,173 @@
|
||||
/*
|
||||
Copyright 2023 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package e2enode
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
|
||||
"github.com/onsi/ginkgo/v2"
|
||||
"github.com/onsi/gomega"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/uuid"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
|
||||
imageutils "k8s.io/kubernetes/test/utils/image"
|
||||
admissionapi "k8s.io/pod-security-admission/api"
|
||||
)
|
||||
|
||||
/*
|
||||
* Unknown pods are pods which are unknown pods to the kubelet, but are still
|
||||
* running in the container runtime. If kubelet detects a pod which is not in
|
||||
* the config (i.e. not present in API-server or static pod), but running as
|
||||
* detected in container runtime, kubelet should aggressively terminate the pod.
|
||||
*
|
||||
* This situation can be encountered if a pod is running, then kubelet is
|
||||
* stopped, and while stopped, the manifest is deleted (by force deleting the
|
||||
* API pod or deleting the static pod manifest), and then restarting the
|
||||
* kubelet. Upon restart, kubelet will see the pod as running via the container
|
||||
* runtime, but it will not be present in the config, thus making the pod a
|
||||
* "unknown pod". Kubelet should then proceed to terminate these unknown pods.
|
||||
*/
|
||||
var _ = SIGDescribe("Unknown Pods [Serial] [Disruptive]", func() {
|
||||
f := framework.NewDefaultFramework("unknown-pods")
|
||||
f.NamespacePodSecurityEnforceLevel = admissionapi.LevelBaseline
|
||||
|
||||
ginkgo.Context("when creating a mirror pod", func() {
|
||||
var ns, podPath, staticPodName, mirrorPodName string
|
||||
ginkgo.BeforeEach(func(ctx context.Context) {
|
||||
ns = f.Namespace.Name
|
||||
staticPodName = "unknown-test-pod-" + string(uuid.NewUUID())
|
||||
mirrorPodName = staticPodName + "-" + framework.TestContext.NodeName
|
||||
|
||||
podPath = framework.TestContext.KubeletConfig.StaticPodPath
|
||||
|
||||
framework.Logf("create the static pod %v", staticPodName)
|
||||
err := createStaticPodWithGracePeriod(podPath, staticPodName, ns)
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
framework.Logf("wait for the mirror pod %v to be running", mirrorPodName)
|
||||
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||
return checkMirrorPodRunning(ctx, f.ClientSet, mirrorPodName, ns)
|
||||
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeNil())
|
||||
})
|
||||
|
||||
ginkgo.It("the static pod should be terminated and cleaned up due to becoming a unknown pod due to being force deleted while kubelet is not running", func(ctx context.Context) {
|
||||
framework.Logf("Stopping the kubelet")
|
||||
startKubelet := stopKubelet()
|
||||
|
||||
pod, err := f.ClientSet.CoreV1().Pods(ns).Get(ctx, mirrorPodName, metav1.GetOptions{})
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
// wait until the kubelet health check will fail
|
||||
gomega.Eventually(ctx, func() bool {
|
||||
return kubeletHealthCheck(kubeletHealthCheckURL)
|
||||
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalse())
|
||||
|
||||
framework.Logf("Delete the static pod manifest while the kubelet is not running")
|
||||
file := staticPodPath(podPath, staticPodName, ns)
|
||||
framework.Logf("deleting static pod manifest %q", file)
|
||||
err = os.Remove(file)
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
framework.Logf("Starting the kubelet")
|
||||
startKubelet()
|
||||
|
||||
// wait until the kubelet health check will succeed
|
||||
gomega.Eventually(ctx, func() bool {
|
||||
return kubeletHealthCheck(kubeletHealthCheckURL)
|
||||
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrue())
|
||||
|
||||
framework.Logf("wait for the mirror pod %v to disappear", mirrorPodName)
|
||||
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||
return checkMirrorPodDisappear(ctx, f.ClientSet, mirrorPodName, ns)
|
||||
}, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.BeNil())
|
||||
|
||||
waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace)
|
||||
})
|
||||
|
||||
ginkgo.AfterEach(func(ctx context.Context) {
|
||||
framework.Logf("deleting the static pod %v", staticPodName)
|
||||
err := deleteStaticPod(podPath, staticPodName, ns)
|
||||
if !os.IsNotExist(err) {
|
||||
framework.ExpectNoError(err)
|
||||
}
|
||||
|
||||
framework.Logf("wait for the mirror pod to disappear")
|
||||
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||
return checkMirrorPodDisappear(ctx, f.ClientSet, mirrorPodName, ns)
|
||||
}, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.BeNil())
|
||||
})
|
||||
})
|
||||
|
||||
ginkgo.Context("when creating a API pod", func() {
|
||||
var ns, podName string
|
||||
|
||||
ginkgo.BeforeEach(func(ctx context.Context) {
|
||||
ns = f.Namespace.Name
|
||||
podName = "unknown-test-pause-pod-" + string(uuid.NewUUID())
|
||||
pod := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: podName,
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Name: "pause",
|
||||
Image: imageutils.GetPauseImageName(),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
e2epod.NewPodClient(f).CreateSync(ctx, pod)
|
||||
})
|
||||
|
||||
ginkgo.It("the api pod should be terminated and cleaned up due to becoming a unknown pod due to being force deleted while kubelet is not running", func(ctx context.Context) {
|
||||
framework.Logf("Stopping the kubelet")
|
||||
startKubelet := stopKubelet()
|
||||
|
||||
pod, err := f.ClientSet.CoreV1().Pods(ns).Get(ctx, podName, metav1.GetOptions{})
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
// wait until the kubelet health check will fail
|
||||
gomega.Eventually(ctx, func() bool {
|
||||
return kubeletHealthCheck(kubeletHealthCheckURL)
|
||||
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalse())
|
||||
|
||||
framework.Logf("Delete the pod while the kubelet is not running")
|
||||
// Delete pod sync by name will force delete the pod, removing it from kubelet's config
|
||||
deletePodSyncByName(ctx, f, podName)
|
||||
|
||||
framework.Logf("Starting the kubelet")
|
||||
startKubelet()
|
||||
|
||||
// wait until the kubelet health check will succeed
|
||||
gomega.Eventually(ctx, func() bool {
|
||||
return kubeletHealthCheck(kubeletHealthCheckURL)
|
||||
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrue())
|
||||
|
||||
framework.Logf("wait for the pod %v to disappear", podName)
|
||||
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||
return checkMirrorPodDisappear(ctx, f.ClientSet, podName, ns)
|
||||
}, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.BeNil())
|
||||
|
||||
waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace)
|
||||
})
|
||||
})
|
||||
})
|
@ -41,6 +41,7 @@ import (
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/component-base/featuregate"
|
||||
internalapi "k8s.io/cri-api/pkg/apis"
|
||||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
"k8s.io/klog/v2"
|
||||
kubeletpodresourcesv1 "k8s.io/kubelet/pkg/apis/podresources/v1"
|
||||
kubeletpodresourcesv1alpha1 "k8s.io/kubelet/pkg/apis/podresources/v1alpha1"
|
||||
@ -51,6 +52,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cri/remote"
|
||||
kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
"k8s.io/kubernetes/pkg/kubelet/types"
|
||||
"k8s.io/kubernetes/pkg/kubelet/util"
|
||||
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
@ -437,3 +439,29 @@ func withFeatureGate(feature featuregate.Feature, desired bool) func() {
|
||||
utilfeature.DefaultMutableFeatureGate.Set(fmt.Sprintf("%s=%v", string(feature), current))
|
||||
}
|
||||
}
|
||||
|
||||
// waitForAllContainerRemoval waits until all the containers on a given pod are really gone.
|
||||
// This is needed by the e2e tests which involve exclusive resource allocation (cpu, topology manager; podresources; etc.)
|
||||
// In these cases, we need to make sure the tests clean up after themselves to make sure each test runs in
|
||||
// a pristine environment. The only way known so far to do that is to introduce this wait.
|
||||
// Worth noting, however, that this makes the test runtime much bigger.
|
||||
func waitForAllContainerRemoval(ctx context.Context, podName, podNS string) {
|
||||
rs, _, err := getCRIClient()
|
||||
framework.ExpectNoError(err)
|
||||
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||
containers, err := rs.ListContainers(ctx, &runtimeapi.ContainerFilter{
|
||||
LabelSelector: map[string]string{
|
||||
types.KubernetesPodNameLabel: podName,
|
||||
types.KubernetesPodNamespaceLabel: podNS,
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("got error waiting for all containers to be removed from CRI: %v", err)
|
||||
}
|
||||
|
||||
if len(containers) > 0 {
|
||||
return fmt.Errorf("expected all containers to be removed from CRI but %v containers still remain. Containers: %+v", len(containers), containers)
|
||||
}
|
||||
return nil
|
||||
}, 2*time.Minute, 1*time.Second).Should(gomega.Succeed())
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user