mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-20 18:31:15 +00:00
Merge pull request #115442 from bobbypage/unknown_pods_test
test: Add e2e node test to check for unknown pods
This commit is contained in:
commit
59a7e34052
@ -31,11 +31,9 @@ import (
|
|||||||
"k8s.io/apimachinery/pkg/api/resource"
|
"k8s.io/apimachinery/pkg/api/resource"
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
"k8s.io/apimachinery/pkg/runtime"
|
"k8s.io/apimachinery/pkg/runtime"
|
||||||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
|
|
||||||
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
|
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
|
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/types"
|
|
||||||
admissionapi "k8s.io/pod-security-admission/api"
|
admissionapi "k8s.io/pod-security-admission/api"
|
||||||
|
|
||||||
"k8s.io/kubernetes/test/e2e/framework"
|
"k8s.io/kubernetes/test/e2e/framework"
|
||||||
@ -372,28 +370,6 @@ func runTopologyManagerPolicySuiteTests(ctx context.Context, f *framework.Framew
|
|||||||
runMultipleGuPods(ctx, f)
|
runMultipleGuPods(ctx, f)
|
||||||
}
|
}
|
||||||
|
|
||||||
// waitForAllContainerRemoval waits until all the containers on a given pod are really gone.
|
|
||||||
// This is needed by the e2e tests which involve exclusive resource allocation (cpu, topology manager; podresources; etc.)
|
|
||||||
// In these cases, we need to make sure the tests clean up after themselves to make sure each test runs in
|
|
||||||
// a pristine environment. The only way known so far to do that is to introduce this wait.
|
|
||||||
// Worth noting, however, that this makes the test runtime much bigger.
|
|
||||||
func waitForAllContainerRemoval(ctx context.Context, podName, podNS string) {
|
|
||||||
rs, _, err := getCRIClient()
|
|
||||||
framework.ExpectNoError(err)
|
|
||||||
gomega.Eventually(ctx, func(ctx context.Context) bool {
|
|
||||||
containers, err := rs.ListContainers(ctx, &runtimeapi.ContainerFilter{
|
|
||||||
LabelSelector: map[string]string{
|
|
||||||
types.KubernetesPodNameLabel: podName,
|
|
||||||
types.KubernetesPodNamespaceLabel: podNS,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
return len(containers) == 0
|
|
||||||
}, 2*time.Minute, 1*time.Second).Should(gomega.BeTrue())
|
|
||||||
}
|
|
||||||
|
|
||||||
func runTopologyManagerPositiveTest(ctx context.Context, f *framework.Framework, numPods int, ctnAttrs, initCtnAttrs []tmCtnAttribute, envInfo *testEnvInfo) {
|
func runTopologyManagerPositiveTest(ctx context.Context, f *framework.Framework, numPods int, ctnAttrs, initCtnAttrs []tmCtnAttribute, envInfo *testEnvInfo) {
|
||||||
podMap := make(map[string]*v1.Pod)
|
podMap := make(map[string]*v1.Pod)
|
||||||
|
|
||||||
|
173
test/e2e_node/unknown_pods_test.go
Normal file
173
test/e2e_node/unknown_pods_test.go
Normal file
@ -0,0 +1,173 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2023 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package e2enode
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"github.com/onsi/ginkgo/v2"
|
||||||
|
"github.com/onsi/gomega"
|
||||||
|
v1 "k8s.io/api/core/v1"
|
||||||
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
|
"k8s.io/apimachinery/pkg/util/uuid"
|
||||||
|
"k8s.io/kubernetes/test/e2e/framework"
|
||||||
|
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
|
||||||
|
imageutils "k8s.io/kubernetes/test/utils/image"
|
||||||
|
admissionapi "k8s.io/pod-security-admission/api"
|
||||||
|
)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Unknown pods are pods which are unknown pods to the kubelet, but are still
|
||||||
|
* running in the container runtime. If kubelet detects a pod which is not in
|
||||||
|
* the config (i.e. not present in API-server or static pod), but running as
|
||||||
|
* detected in container runtime, kubelet should aggressively terminate the pod.
|
||||||
|
*
|
||||||
|
* This situation can be encountered if a pod is running, then kubelet is
|
||||||
|
* stopped, and while stopped, the manifest is deleted (by force deleting the
|
||||||
|
* API pod or deleting the static pod manifest), and then restarting the
|
||||||
|
* kubelet. Upon restart, kubelet will see the pod as running via the container
|
||||||
|
* runtime, but it will not be present in the config, thus making the pod a
|
||||||
|
* "unknown pod". Kubelet should then proceed to terminate these unknown pods.
|
||||||
|
*/
|
||||||
|
var _ = SIGDescribe("Unknown Pods [Serial] [Disruptive]", func() {
|
||||||
|
f := framework.NewDefaultFramework("unknown-pods")
|
||||||
|
f.NamespacePodSecurityEnforceLevel = admissionapi.LevelBaseline
|
||||||
|
|
||||||
|
ginkgo.Context("when creating a mirror pod", func() {
|
||||||
|
var ns, podPath, staticPodName, mirrorPodName string
|
||||||
|
ginkgo.BeforeEach(func(ctx context.Context) {
|
||||||
|
ns = f.Namespace.Name
|
||||||
|
staticPodName = "unknown-test-pod-" + string(uuid.NewUUID())
|
||||||
|
mirrorPodName = staticPodName + "-" + framework.TestContext.NodeName
|
||||||
|
|
||||||
|
podPath = framework.TestContext.KubeletConfig.StaticPodPath
|
||||||
|
|
||||||
|
framework.Logf("create the static pod %v", staticPodName)
|
||||||
|
err := createStaticPodWithGracePeriod(podPath, staticPodName, ns)
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
|
||||||
|
framework.Logf("wait for the mirror pod %v to be running", mirrorPodName)
|
||||||
|
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||||
|
return checkMirrorPodRunning(ctx, f.ClientSet, mirrorPodName, ns)
|
||||||
|
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeNil())
|
||||||
|
})
|
||||||
|
|
||||||
|
ginkgo.It("the static pod should be terminated and cleaned up due to becoming a unknown pod due to being force deleted while kubelet is not running", func(ctx context.Context) {
|
||||||
|
framework.Logf("Stopping the kubelet")
|
||||||
|
startKubelet := stopKubelet()
|
||||||
|
|
||||||
|
pod, err := f.ClientSet.CoreV1().Pods(ns).Get(ctx, mirrorPodName, metav1.GetOptions{})
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
|
||||||
|
// wait until the kubelet health check will fail
|
||||||
|
gomega.Eventually(ctx, func() bool {
|
||||||
|
return kubeletHealthCheck(kubeletHealthCheckURL)
|
||||||
|
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalse())
|
||||||
|
|
||||||
|
framework.Logf("Delete the static pod manifest while the kubelet is not running")
|
||||||
|
file := staticPodPath(podPath, staticPodName, ns)
|
||||||
|
framework.Logf("deleting static pod manifest %q", file)
|
||||||
|
err = os.Remove(file)
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
|
||||||
|
framework.Logf("Starting the kubelet")
|
||||||
|
startKubelet()
|
||||||
|
|
||||||
|
// wait until the kubelet health check will succeed
|
||||||
|
gomega.Eventually(ctx, func() bool {
|
||||||
|
return kubeletHealthCheck(kubeletHealthCheckURL)
|
||||||
|
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrue())
|
||||||
|
|
||||||
|
framework.Logf("wait for the mirror pod %v to disappear", mirrorPodName)
|
||||||
|
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||||
|
return checkMirrorPodDisappear(ctx, f.ClientSet, mirrorPodName, ns)
|
||||||
|
}, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.BeNil())
|
||||||
|
|
||||||
|
waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace)
|
||||||
|
})
|
||||||
|
|
||||||
|
ginkgo.AfterEach(func(ctx context.Context) {
|
||||||
|
framework.Logf("deleting the static pod %v", staticPodName)
|
||||||
|
err := deleteStaticPod(podPath, staticPodName, ns)
|
||||||
|
if !os.IsNotExist(err) {
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
framework.Logf("wait for the mirror pod to disappear")
|
||||||
|
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||||
|
return checkMirrorPodDisappear(ctx, f.ClientSet, mirrorPodName, ns)
|
||||||
|
}, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.BeNil())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
ginkgo.Context("when creating a API pod", func() {
|
||||||
|
var ns, podName string
|
||||||
|
|
||||||
|
ginkgo.BeforeEach(func(ctx context.Context) {
|
||||||
|
ns = f.Namespace.Name
|
||||||
|
podName = "unknown-test-pause-pod-" + string(uuid.NewUUID())
|
||||||
|
pod := &v1.Pod{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Name: podName,
|
||||||
|
},
|
||||||
|
Spec: v1.PodSpec{
|
||||||
|
Containers: []v1.Container{
|
||||||
|
{
|
||||||
|
Name: "pause",
|
||||||
|
Image: imageutils.GetPauseImageName(),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
e2epod.NewPodClient(f).CreateSync(ctx, pod)
|
||||||
|
})
|
||||||
|
|
||||||
|
ginkgo.It("the api pod should be terminated and cleaned up due to becoming a unknown pod due to being force deleted while kubelet is not running", func(ctx context.Context) {
|
||||||
|
framework.Logf("Stopping the kubelet")
|
||||||
|
startKubelet := stopKubelet()
|
||||||
|
|
||||||
|
pod, err := f.ClientSet.CoreV1().Pods(ns).Get(ctx, podName, metav1.GetOptions{})
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
|
||||||
|
// wait until the kubelet health check will fail
|
||||||
|
gomega.Eventually(ctx, func() bool {
|
||||||
|
return kubeletHealthCheck(kubeletHealthCheckURL)
|
||||||
|
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalse())
|
||||||
|
|
||||||
|
framework.Logf("Delete the pod while the kubelet is not running")
|
||||||
|
// Delete pod sync by name will force delete the pod, removing it from kubelet's config
|
||||||
|
deletePodSyncByName(ctx, f, podName)
|
||||||
|
|
||||||
|
framework.Logf("Starting the kubelet")
|
||||||
|
startKubelet()
|
||||||
|
|
||||||
|
// wait until the kubelet health check will succeed
|
||||||
|
gomega.Eventually(ctx, func() bool {
|
||||||
|
return kubeletHealthCheck(kubeletHealthCheckURL)
|
||||||
|
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrue())
|
||||||
|
|
||||||
|
framework.Logf("wait for the pod %v to disappear", podName)
|
||||||
|
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||||
|
return checkMirrorPodDisappear(ctx, f.ClientSet, podName, ns)
|
||||||
|
}, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.BeNil())
|
||||||
|
|
||||||
|
waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
@ -41,6 +41,7 @@ import (
|
|||||||
clientset "k8s.io/client-go/kubernetes"
|
clientset "k8s.io/client-go/kubernetes"
|
||||||
"k8s.io/component-base/featuregate"
|
"k8s.io/component-base/featuregate"
|
||||||
internalapi "k8s.io/cri-api/pkg/apis"
|
internalapi "k8s.io/cri-api/pkg/apis"
|
||||||
|
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||||
"k8s.io/klog/v2"
|
"k8s.io/klog/v2"
|
||||||
kubeletpodresourcesv1 "k8s.io/kubelet/pkg/apis/podresources/v1"
|
kubeletpodresourcesv1 "k8s.io/kubelet/pkg/apis/podresources/v1"
|
||||||
kubeletpodresourcesv1alpha1 "k8s.io/kubelet/pkg/apis/podresources/v1alpha1"
|
kubeletpodresourcesv1alpha1 "k8s.io/kubelet/pkg/apis/podresources/v1alpha1"
|
||||||
@ -51,6 +52,7 @@ import (
|
|||||||
"k8s.io/kubernetes/pkg/kubelet/cm"
|
"k8s.io/kubernetes/pkg/kubelet/cm"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/cri/remote"
|
"k8s.io/kubernetes/pkg/kubelet/cri/remote"
|
||||||
kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
|
kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/types"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/util"
|
"k8s.io/kubernetes/pkg/kubelet/util"
|
||||||
|
|
||||||
"k8s.io/kubernetes/test/e2e/framework"
|
"k8s.io/kubernetes/test/e2e/framework"
|
||||||
@ -437,3 +439,29 @@ func withFeatureGate(feature featuregate.Feature, desired bool) func() {
|
|||||||
utilfeature.DefaultMutableFeatureGate.Set(fmt.Sprintf("%s=%v", string(feature), current))
|
utilfeature.DefaultMutableFeatureGate.Set(fmt.Sprintf("%s=%v", string(feature), current))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// waitForAllContainerRemoval waits until all the containers on a given pod are really gone.
|
||||||
|
// This is needed by the e2e tests which involve exclusive resource allocation (cpu, topology manager; podresources; etc.)
|
||||||
|
// In these cases, we need to make sure the tests clean up after themselves to make sure each test runs in
|
||||||
|
// a pristine environment. The only way known so far to do that is to introduce this wait.
|
||||||
|
// Worth noting, however, that this makes the test runtime much bigger.
|
||||||
|
func waitForAllContainerRemoval(ctx context.Context, podName, podNS string) {
|
||||||
|
rs, _, err := getCRIClient()
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||||
|
containers, err := rs.ListContainers(ctx, &runtimeapi.ContainerFilter{
|
||||||
|
LabelSelector: map[string]string{
|
||||||
|
types.KubernetesPodNameLabel: podName,
|
||||||
|
types.KubernetesPodNamespaceLabel: podNS,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("got error waiting for all containers to be removed from CRI: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(containers) > 0 {
|
||||||
|
return fmt.Errorf("expected all containers to be removed from CRI but %v containers still remain. Containers: %+v", len(containers), containers)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}, 2*time.Minute, 1*time.Second).Should(gomega.Succeed())
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user