mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-09-06 19:52:42 +00:00
Merge pull request #113145 from smarterclayton/zombie_terminating_pods
kubelet: Force deleted pods can fail to move out of terminating
This commit is contained in:
@@ -24,6 +24,8 @@ import (
|
||||
|
||||
"github.com/onsi/ginkgo/v2"
|
||||
"github.com/onsi/gomega"
|
||||
"github.com/onsi/gomega/gstruct"
|
||||
"github.com/prometheus/common/model"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
@@ -131,6 +133,174 @@ var _ = SIGDescribe("MirrorPodWithGracePeriod", func() {
|
||||
framework.ExpectEqual(pod.Spec.Containers[0].Image, image)
|
||||
})
|
||||
|
||||
ginkgo.Context("and the container runtime is temporarily down during pod termination [NodeConformance] [Serial] [Disruptive]", func() {
|
||||
ginkgo.It("the mirror pod should terminate successfully", func(ctx context.Context) {
|
||||
ginkgo.By("verifying the pod is described as syncing in metrics")
|
||||
gomega.Eventually(ctx, getKubeletMetrics, 5*time.Second, time.Second).Should(gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{
|
||||
"kubelet_working_pods": gstruct.MatchElements(sampleLabelID, 0, gstruct.Elements{
|
||||
`kubelet_working_pods{config="desired", lifecycle="sync", static=""}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="desired", lifecycle="sync", static="true"}`: timelessSample(1),
|
||||
`kubelet_working_pods{config="orphan", lifecycle="sync", static=""}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="orphan", lifecycle="sync", static="true"}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="runtime_only", lifecycle="sync", static="unknown"}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="desired", lifecycle="terminating", static=""}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="desired", lifecycle="terminating", static="true"}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="orphan", lifecycle="terminating", static=""}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="orphan", lifecycle="terminating", static="true"}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="runtime_only", lifecycle="terminating", static="unknown"}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="desired", lifecycle="terminated", static=""}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="desired", lifecycle="terminated", static="true"}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="orphan", lifecycle="terminated", static=""}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="orphan", lifecycle="terminated", static="true"}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="runtime_only", lifecycle="terminated", static="unknown"}`: timelessSample(0),
|
||||
}),
|
||||
"kubelet_mirror_pods": gstruct.MatchElements(sampleLabelID, 0, gstruct.Elements{
|
||||
`kubelet_mirror_pods`: timelessSample(1),
|
||||
}),
|
||||
"kubelet_active_pods": gstruct.MatchElements(sampleLabelID, 0, gstruct.Elements{
|
||||
`kubelet_active_pods{static=""}`: timelessSample(0),
|
||||
`kubelet_active_pods{static="true"}`: timelessSample(1),
|
||||
}),
|
||||
"kubelet_desired_pods": gstruct.MatchElements(sampleLabelID, 0, gstruct.Elements{
|
||||
`kubelet_desired_pods{static=""}`: timelessSample(0),
|
||||
`kubelet_desired_pods{static="true"}`: timelessSample(1),
|
||||
}),
|
||||
}))
|
||||
|
||||
ginkgo.By("delete the static pod")
|
||||
err := deleteStaticPod(podPath, staticPodName, ns)
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
// Note it is important we have a small delay here as we would like to reproduce https://issues.k8s.io/113091 which requires a failure in syncTerminatingPod()
|
||||
// This requires waiting a small period between the static pod being deleted so that syncTerminatingPod() will attempt to run
|
||||
ginkgo.By("sleeping before stopping the container runtime")
|
||||
time.Sleep(2 * time.Second)
|
||||
|
||||
ginkgo.By("stop the container runtime")
|
||||
err = stopContainerRuntime()
|
||||
framework.ExpectNoError(err, "expected no error stopping the container runtime")
|
||||
|
||||
ginkgo.By("waiting for the container runtime to be stopped")
|
||||
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||
_, _, err := getCRIClient()
|
||||
return err
|
||||
}, 2*time.Minute, time.Second*5).ShouldNot(gomega.Succeed())
|
||||
|
||||
ginkgo.By("verifying the mirror pod is running")
|
||||
gomega.Consistently(ctx, func(ctx context.Context) error {
|
||||
return checkMirrorPodRunning(ctx, f.ClientSet, mirrorPodName, ns)
|
||||
}, 19*time.Second, 200*time.Millisecond).Should(gomega.BeNil())
|
||||
|
||||
ginkgo.By("verifying the pod is described as terminating in metrics")
|
||||
gomega.Eventually(ctx, getKubeletMetrics, 5*time.Second, time.Second).Should(gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{
|
||||
"kubelet_working_pods": gstruct.MatchElements(sampleLabelID, 0, gstruct.Elements{
|
||||
`kubelet_working_pods{config="desired", lifecycle="sync", static=""}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="desired", lifecycle="sync", static="true"}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="orphan", lifecycle="sync", static=""}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="orphan", lifecycle="sync", static="true"}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="runtime_only", lifecycle="sync", static="unknown"}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="desired", lifecycle="terminating", static=""}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="desired", lifecycle="terminating", static="true"}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="orphan", lifecycle="terminating", static=""}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="orphan", lifecycle="terminating", static="true"}`: timelessSample(1),
|
||||
`kubelet_working_pods{config="runtime_only", lifecycle="terminating", static="unknown"}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="desired", lifecycle="terminated", static=""}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="desired", lifecycle="terminated", static="true"}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="orphan", lifecycle="terminated", static=""}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="orphan", lifecycle="terminated", static="true"}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="runtime_only", lifecycle="terminated", static="unknown"}`: timelessSample(0),
|
||||
}),
|
||||
"kubelet_mirror_pods": gstruct.MatchElements(sampleLabelID, 0, gstruct.Elements{
|
||||
`kubelet_mirror_pods`: timelessSample(1),
|
||||
}),
|
||||
"kubelet_active_pods": gstruct.MatchElements(sampleLabelID, 0, gstruct.Elements{
|
||||
`kubelet_active_pods{static=""}`: timelessSample(0),
|
||||
// TODO: the pod is still running and consuming resources, it should be considered in
|
||||
// admission https://github.com/kubernetes/kubernetes/issues/104824 for static pods at
|
||||
// least, which means it should be 1
|
||||
`kubelet_active_pods{static="true"}`: timelessSample(0),
|
||||
}),
|
||||
"kubelet_desired_pods": gstruct.MatchElements(sampleLabelID, 0, gstruct.Elements{
|
||||
`kubelet_desired_pods{static=""}`: timelessSample(0),
|
||||
`kubelet_desired_pods{static="true"}`: timelessSample(0),
|
||||
})}))
|
||||
|
||||
ginkgo.By("start the container runtime")
|
||||
err = startContainerRuntime()
|
||||
framework.ExpectNoError(err, "expected no error starting the container runtime")
|
||||
ginkgo.By("waiting for the container runtime to start")
|
||||
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||
r, _, err := getCRIClient()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting CRI client: %w", err)
|
||||
}
|
||||
status, err := r.Status(ctx, true)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error checking CRI status: %w", err)
|
||||
}
|
||||
framework.Logf("Runtime started: %#v", status)
|
||||
return nil
|
||||
}, 2*time.Minute, time.Second*5).Should(gomega.Succeed())
|
||||
|
||||
ginkgo.By(fmt.Sprintf("verifying that the mirror pod (%s/%s) stops running after about 30s", ns, mirrorPodName))
|
||||
// from the time the container runtime starts, it should take a maximum of:
|
||||
// 20s (grace period) + 2 sync transitions * 1s + 2s between housekeeping + 3s to detect CRI up +
|
||||
// 2s overhead
|
||||
// which we calculate here as "about 30s", so we try a bit longer than that but verify that it is
|
||||
// tightly bounded by not waiting longer (we want to catch regressions to shutdown)
|
||||
time.Sleep(30 * time.Second)
|
||||
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||
return checkMirrorPodDisappear(ctx, f.ClientSet, mirrorPodName, ns)
|
||||
}, time.Second*3, time.Second).Should(gomega.Succeed())
|
||||
|
||||
ginkgo.By("verifying the pod finishes terminating and is removed from metrics")
|
||||
gomega.Eventually(ctx, getKubeletMetrics, 15*time.Second, time.Second).Should(gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{
|
||||
"kubelet_working_pods": gstruct.MatchElements(sampleLabelID, 0, gstruct.Elements{
|
||||
`kubelet_working_pods{config="desired", lifecycle="sync", static=""}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="desired", lifecycle="sync", static="true"}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="orphan", lifecycle="sync", static=""}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="orphan", lifecycle="sync", static="true"}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="runtime_only", lifecycle="sync", static="unknown"}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="desired", lifecycle="terminating", static=""}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="desired", lifecycle="terminating", static="true"}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="orphan", lifecycle="terminating", static=""}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="orphan", lifecycle="terminating", static="true"}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="runtime_only", lifecycle="terminating", static="unknown"}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="desired", lifecycle="terminated", static=""}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="desired", lifecycle="terminated", static="true"}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="orphan", lifecycle="terminated", static=""}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="orphan", lifecycle="terminated", static="true"}`: timelessSample(0),
|
||||
`kubelet_working_pods{config="runtime_only", lifecycle="terminated", static="unknown"}`: timelessSample(0),
|
||||
}),
|
||||
"kubelet_mirror_pods": gstruct.MatchElements(sampleLabelID, 0, gstruct.Elements{
|
||||
`kubelet_mirror_pods`: timelessSample(0),
|
||||
}),
|
||||
"kubelet_active_pods": gstruct.MatchElements(sampleLabelID, 0, gstruct.Elements{
|
||||
`kubelet_active_pods{static=""}`: timelessSample(0),
|
||||
`kubelet_active_pods{static="true"}`: timelessSample(0),
|
||||
}),
|
||||
"kubelet_desired_pods": gstruct.MatchElements(sampleLabelID, 0, gstruct.Elements{
|
||||
`kubelet_desired_pods{static=""}`: timelessSample(0),
|
||||
`kubelet_desired_pods{static="true"}`: timelessSample(0),
|
||||
}),
|
||||
}))
|
||||
})
|
||||
|
||||
ginkgo.AfterEach(func(ctx context.Context) {
|
||||
ginkgo.By("starting the container runtime")
|
||||
err := startContainerRuntime()
|
||||
framework.ExpectNoError(err, "expected no error starting the container runtime")
|
||||
ginkgo.By("waiting for the container runtime to start")
|
||||
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||
_, _, err := getCRIClient()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting cri client: %v", err)
|
||||
}
|
||||
return nil
|
||||
}, 2*time.Minute, time.Second*5).Should(gomega.Succeed())
|
||||
})
|
||||
})
|
||||
|
||||
ginkgo.AfterEach(func(ctx context.Context) {
|
||||
ginkgo.By("delete the static pod")
|
||||
err := deleteStaticPod(podPath, staticPodName, ns)
|
||||
@@ -197,3 +367,8 @@ func checkMirrorPodRunningWithUID(ctx context.Context, cl clientset.Interface, n
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func sampleLabelID(element interface{}) string {
|
||||
el := element.(*model.Sample)
|
||||
return el.Metric.String()
|
||||
}
|
||||
|
@@ -18,7 +18,6 @@ package e2enode
|
||||
|
||||
import (
|
||||
"context"
|
||||
goerrors "errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
@@ -40,11 +39,13 @@ import (
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/onsi/ginkgo/v2"
|
||||
"github.com/onsi/gomega"
|
||||
"k8s.io/cli-runtime/pkg/printers"
|
||||
e2evolume "k8s.io/kubernetes/test/e2e/framework/volume"
|
||||
)
|
||||
|
||||
var _ = SIGDescribe("MirrorPod", func() {
|
||||
f := framework.NewDefaultFramework("mirror-pod")
|
||||
f.NamespacePodSecurityEnforceLevel = admissionapi.LevelBaseline
|
||||
f.NamespacePodSecurityEnforceLevel = admissionapi.LevelPrivileged
|
||||
ginkgo.Context("when create a mirror pod ", func() {
|
||||
var ns, podPath, staticPodName, mirrorPodName string
|
||||
ginkgo.BeforeEach(func(ctx context.Context) {
|
||||
@@ -196,8 +197,179 @@ var _ = SIGDescribe("MirrorPod", func() {
|
||||
}, 2*time.Minute, time.Second*4).Should(gomega.BeNil())
|
||||
})
|
||||
})
|
||||
ginkgo.Context("when recreating a static pod", func() {
|
||||
var ns, podPath, staticPodName, mirrorPodName string
|
||||
ginkgo.It("it should launch successfully even if it temporarily failed termination due to volume failing to unmount [NodeConformance] [Serial]", func(ctx context.Context) {
|
||||
node := getNodeName(ctx, f)
|
||||
ns = f.Namespace.Name
|
||||
c := f.ClientSet
|
||||
nfsTestConfig, nfsServerPod, nfsServerHost := e2evolume.NewNFSServerWithNodeName(ctx, c, ns, []string{"-G", "777", "/exports"}, node)
|
||||
ginkgo.DeferCleanup(func(ctx context.Context) {
|
||||
framework.Logf("Cleaning up NFS server pod")
|
||||
e2evolume.TestServerCleanup(ctx, f, nfsTestConfig)
|
||||
})
|
||||
|
||||
podPath = framework.TestContext.KubeletConfig.StaticPodPath
|
||||
staticPodName = "static-pod-nfs-test-pod" + string(uuid.NewUUID())
|
||||
mirrorPodName = staticPodName + "-" + framework.TestContext.NodeName
|
||||
|
||||
ginkgo.By(fmt.Sprintf("Creating nfs test pod: %s", staticPodName))
|
||||
|
||||
err := createStaticPodUsingNfs(nfsServerHost, node, "sleep 999999", podPath, staticPodName, ns)
|
||||
framework.ExpectNoError(err)
|
||||
ginkgo.By(fmt.Sprintf("Wating for nfs test pod: %s to start running...", staticPodName))
|
||||
gomega.Eventually(func() error {
|
||||
return checkMirrorPodRunning(ctx, f.ClientSet, mirrorPodName, ns)
|
||||
}, 2*time.Minute, time.Second*4).Should(gomega.BeNil())
|
||||
|
||||
mirrorPod, err := c.CoreV1().Pods(ns).Get(ctx, mirrorPodName, metav1.GetOptions{})
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
hash, ok := mirrorPod.Annotations[kubetypes.ConfigHashAnnotationKey]
|
||||
if !ok || hash == "" {
|
||||
framework.Failf("Failed to get hash for mirrorPod")
|
||||
}
|
||||
|
||||
ginkgo.By("Stopping the NFS server")
|
||||
stopNfsServer(f, nfsServerPod)
|
||||
|
||||
ginkgo.By("Waiting for NFS server to stop...")
|
||||
time.Sleep(30 * time.Second)
|
||||
|
||||
ginkgo.By(fmt.Sprintf("Deleting the static nfs test pod: %s", staticPodName))
|
||||
err = deleteStaticPod(podPath, staticPodName, ns)
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
// Wait 5 mins for syncTerminatedPod to fail. We expect that the pod volume should not be cleaned up because the NFS server is down.
|
||||
gomega.Consistently(func() bool {
|
||||
return podVolumeDirectoryExists(types.UID(hash))
|
||||
}, 5*time.Minute, 10*time.Second).Should(gomega.BeTrue(), "pod volume should exist while nfs server is stopped")
|
||||
|
||||
ginkgo.By("Start the NFS server")
|
||||
restartNfsServer(f, nfsServerPod)
|
||||
|
||||
ginkgo.By("Waiting for the pod volume to deleted after the NFS server is started")
|
||||
gomega.Eventually(func() bool {
|
||||
return podVolumeDirectoryExists(types.UID(hash))
|
||||
}, 5*time.Minute, 10*time.Second).Should(gomega.BeFalse(), "pod volume should be deleted after nfs server is started")
|
||||
|
||||
// Create the static pod again with the same config and expect it to start running
|
||||
err = createStaticPodUsingNfs(nfsServerHost, node, "sleep 999999", podPath, staticPodName, ns)
|
||||
framework.ExpectNoError(err)
|
||||
ginkgo.By(fmt.Sprintf("Wating for nfs test pod: %s to start running (after being recreated)", staticPodName))
|
||||
gomega.Eventually(func() error {
|
||||
return checkMirrorPodRunning(ctx, f.ClientSet, mirrorPodName, ns)
|
||||
}, 5*time.Minute, 5*time.Second).Should(gomega.BeNil())
|
||||
})
|
||||
|
||||
ginkgo.AfterEach(func(ctx context.Context) {
|
||||
ginkgo.By("delete the static pod")
|
||||
err := deleteStaticPod(podPath, staticPodName, ns)
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
ginkgo.By("wait for the mirror pod to disappear")
|
||||
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||
return checkMirrorPodDisappear(ctx, f.ClientSet, mirrorPodName, ns)
|
||||
}, 2*time.Minute, time.Second*4).Should(gomega.BeNil())
|
||||
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
func podVolumeDirectoryExists(uid types.UID) bool {
|
||||
podVolumePath := fmt.Sprintf("/var/lib/kubelet/pods/%s/volumes/", uid)
|
||||
var podVolumeDirectoryExists bool
|
||||
|
||||
if _, err := os.Stat(podVolumePath); !os.IsNotExist(err) {
|
||||
podVolumeDirectoryExists = true
|
||||
}
|
||||
|
||||
return podVolumeDirectoryExists
|
||||
}
|
||||
|
||||
// Restart the passed-in nfs-server by issuing a `/usr/sbin/rpc.nfsd 1` command in the
|
||||
// pod's (only) container. This command changes the number of nfs server threads from
|
||||
// (presumably) zero back to 1, and therefore allows nfs to open connections again.
|
||||
func restartNfsServer(f *framework.Framework, serverPod *v1.Pod) {
|
||||
const startcmd = "/usr/sbin/rpc.nfsd 1"
|
||||
_, _, err := e2evolume.PodExec(f, serverPod, startcmd)
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
}
|
||||
|
||||
// Stop the passed-in nfs-server by issuing a `/usr/sbin/rpc.nfsd 0` command in the
|
||||
// pod's (only) container. This command changes the number of nfs server threads to 0,
|
||||
// thus closing all open nfs connections.
|
||||
func stopNfsServer(f *framework.Framework, serverPod *v1.Pod) {
|
||||
const stopcmd = "/usr/sbin/rpc.nfsd 0"
|
||||
_, _, err := e2evolume.PodExec(f, serverPod, stopcmd)
|
||||
framework.ExpectNoError(err)
|
||||
}
|
||||
|
||||
func createStaticPodUsingNfs(nfsIP string, nodeName string, cmd string, dir string, name string, ns string) error {
|
||||
ginkgo.By("create pod using nfs volume")
|
||||
|
||||
isPrivileged := true
|
||||
cmdLine := []string{"-c", cmd}
|
||||
pod := &v1.Pod{
|
||||
TypeMeta: metav1.TypeMeta{
|
||||
Kind: "Pod",
|
||||
APIVersion: "v1",
|
||||
},
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: name,
|
||||
Namespace: ns,
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
NodeName: nodeName,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Name: "pod-nfs-vol",
|
||||
Image: imageutils.GetE2EImage(imageutils.BusyBox),
|
||||
Command: []string{"/bin/sh"},
|
||||
Args: cmdLine,
|
||||
VolumeMounts: []v1.VolumeMount{
|
||||
{
|
||||
Name: "nfs-vol",
|
||||
MountPath: "/mnt",
|
||||
},
|
||||
},
|
||||
SecurityContext: &v1.SecurityContext{
|
||||
Privileged: &isPrivileged,
|
||||
},
|
||||
},
|
||||
},
|
||||
RestartPolicy: v1.RestartPolicyNever, //don't restart pod
|
||||
Volumes: []v1.Volume{
|
||||
{
|
||||
Name: "nfs-vol",
|
||||
VolumeSource: v1.VolumeSource{
|
||||
NFS: &v1.NFSVolumeSource{
|
||||
Server: nfsIP,
|
||||
Path: "/exports",
|
||||
ReadOnly: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
file := staticPodPath(dir, name, ns)
|
||||
f, err := os.OpenFile(file, os.O_RDWR|os.O_TRUNC|os.O_CREATE, 0666)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
y := printers.YAMLPrinter{}
|
||||
y.PrintObj(pod, f)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func staticPodPath(dir, name, namespace string) string {
|
||||
return filepath.Join(dir, namespace+"-"+name+".yaml")
|
||||
}
|
||||
@@ -238,7 +410,10 @@ func checkMirrorPodDisappear(ctx context.Context, cl clientset.Interface, name,
|
||||
if apierrors.IsNotFound(err) {
|
||||
return nil
|
||||
}
|
||||
return goerrors.New("pod not disappear")
|
||||
if err == nil {
|
||||
return fmt.Errorf("mirror pod %v/%v still exists", namespace, name)
|
||||
}
|
||||
return fmt.Errorf("expect mirror pod %v/%v to not exist but got error: %w", namespace, name, err)
|
||||
}
|
||||
|
||||
func checkMirrorPodRunning(ctx context.Context, cl clientset.Interface, name, namespace string) error {
|
||||
|
@@ -87,15 +87,6 @@ func (n *NodeE2ERemote) SetupTestPackage(tardir, systemSpecName string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// prependCOSMounterFlag prepends the flag for setting the GCI mounter path to
|
||||
// args and returns the result.
|
||||
func prependCOSMounterFlag(args, host, workspace string) (string, error) {
|
||||
klog.V(2).Infof("GCI/COS node and GCI/COS mounter both detected, modifying --experimental-mounter-path accordingly")
|
||||
mounterPath := filepath.Join(workspace, "mounter")
|
||||
args = fmt.Sprintf("--kubelet-flags=--experimental-mounter-path=%s ", mounterPath) + args
|
||||
return args, nil
|
||||
}
|
||||
|
||||
// prependMemcgNotificationFlag prepends the flag for enabling memcg
|
||||
// notification to args and returns the result.
|
||||
func prependMemcgNotificationFlag(args string) string {
|
||||
@@ -124,8 +115,7 @@ func osSpecificActions(args, host, workspace string) (string, error) {
|
||||
return args, setKubeletSELinuxLabels(host, workspace)
|
||||
case strings.Contains(output, "gci"), strings.Contains(output, "cos"):
|
||||
args = prependMemcgNotificationFlag(args)
|
||||
args = prependGCPCredentialProviderFlag(args, workspace)
|
||||
return prependCOSMounterFlag(args, host, workspace)
|
||||
return prependGCPCredentialProviderFlag(args, workspace), nil
|
||||
case strings.Contains(output, "ubuntu"):
|
||||
args = prependGCPCredentialProviderFlag(args, workspace)
|
||||
return prependMemcgNotificationFlag(args), nil
|
||||
|
@@ -23,11 +23,8 @@ import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"text/tabwriter"
|
||||
@@ -39,11 +36,9 @@ import (
|
||||
v1 "k8s.io/api/core/v1"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/uuid"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
kubeletstatsv1alpha1 "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
|
||||
"k8s.io/kubernetes/pkg/util/procfs"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
e2ekubelet "k8s.io/kubernetes/test/e2e/framework/kubelet"
|
||||
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
|
||||
@@ -465,38 +460,6 @@ func (r *ResourceCollector) GetResourceTimeSeries() map[string]*perftype.Resourc
|
||||
|
||||
const kubeletProcessName = "kubelet"
|
||||
|
||||
func getPidsForProcess(name, pidFile string) ([]int, error) {
|
||||
if len(pidFile) > 0 {
|
||||
pid, err := getPidFromPidFile(pidFile)
|
||||
if err == nil {
|
||||
return []int{pid}, nil
|
||||
}
|
||||
// log the error and fall back to pidof
|
||||
runtime.HandleError(err)
|
||||
}
|
||||
return procfs.PidOf(name)
|
||||
}
|
||||
|
||||
func getPidFromPidFile(pidFile string) (int, error) {
|
||||
file, err := os.Open(pidFile)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error opening pid file %s: %w", pidFile, err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
data, err := io.ReadAll(file)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error reading pid file %s: %w", pidFile, err)
|
||||
}
|
||||
|
||||
pid, err := strconv.Atoi(string(data))
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error parsing %s as a number: %w", string(data), err)
|
||||
}
|
||||
|
||||
return pid, nil
|
||||
}
|
||||
|
||||
func getContainerNameForProcess(name, pidFile string) (string, error) {
|
||||
pids, err := getPidsForProcess(name, pidFile)
|
||||
if err != nil {
|
||||
|
@@ -25,17 +25,21 @@ import (
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"k8s.io/kubernetes/pkg/util/procfs"
|
||||
|
||||
oteltrace "go.opentelemetry.io/otel/trace"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
apiequality "k8s.io/apimachinery/pkg/api/equality"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
@@ -55,6 +59,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/types"
|
||||
"k8s.io/kubernetes/pkg/kubelet/util"
|
||||
|
||||
"github.com/coreos/go-systemd/v22/dbus"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
e2ekubelet "k8s.io/kubernetes/test/e2e/framework/kubelet"
|
||||
e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics"
|
||||
@@ -84,12 +89,14 @@ const (
|
||||
|
||||
var kubeletHealthCheckURL = fmt.Sprintf("http://127.0.0.1:%d/healthz", ports.KubeletHealthzPort)
|
||||
|
||||
var containerRuntimeUnitName = ""
|
||||
|
||||
func getNodeSummary(ctx context.Context) (*stats.Summary, error) {
|
||||
kubeletConfig, err := getCurrentKubeletConfig(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get current kubelet config")
|
||||
}
|
||||
req, err := http.NewRequest("GET", fmt.Sprintf("http://%s/stats/summary", net.JoinHostPort(kubeletConfig.Address, strconv.Itoa(int(kubeletConfig.ReadOnlyPort)))), nil)
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", fmt.Sprintf("http://%s/stats/summary", net.JoinHostPort(kubeletConfig.Address, strconv.Itoa(int(kubeletConfig.ReadOnlyPort)))), nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to build http request: %w", err)
|
||||
}
|
||||
@@ -340,6 +347,71 @@ func findKubeletServiceName(running bool) string {
|
||||
return kubeletServiceName
|
||||
}
|
||||
|
||||
func findContainerRuntimeServiceName() (string, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
conn, err := dbus.NewWithContext(ctx)
|
||||
framework.ExpectNoError(err, "Failed to setup dbus connection")
|
||||
defer conn.Close()
|
||||
|
||||
runtimePids, err := getPidsForProcess(framework.TestContext.ContainerRuntimeProcessName, framework.TestContext.ContainerRuntimePidFile)
|
||||
framework.ExpectNoError(err, "failed to get list of container runtime pids")
|
||||
framework.ExpectEqual(len(runtimePids), 1, "Unexpected number of container runtime pids. Expected 1 but got %v", len(runtimePids))
|
||||
|
||||
containerRuntimePid := runtimePids[0]
|
||||
|
||||
unitName, err := conn.GetUnitNameByPID(ctx, uint32(containerRuntimePid))
|
||||
framework.ExpectNoError(err, "Failed to get container runtime unit name")
|
||||
|
||||
return unitName, nil
|
||||
}
|
||||
|
||||
type containerRuntimeUnitOp int
|
||||
|
||||
const (
|
||||
startContainerRuntimeUnitOp containerRuntimeUnitOp = iota
|
||||
stopContainerRuntimeUnitOp
|
||||
)
|
||||
|
||||
func performContainerRuntimeUnitOp(op containerRuntimeUnitOp) error {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
conn, err := dbus.NewWithContext(ctx)
|
||||
framework.ExpectNoError(err, "Failed to setup dbus connection")
|
||||
defer conn.Close()
|
||||
|
||||
if containerRuntimeUnitName == "" {
|
||||
containerRuntimeUnitName, err = findContainerRuntimeServiceName()
|
||||
framework.ExpectNoError(err, "Failed to find container runtime name")
|
||||
}
|
||||
|
||||
reschan := make(chan string)
|
||||
|
||||
switch op {
|
||||
case startContainerRuntimeUnitOp:
|
||||
conn.StartUnitContext(ctx, containerRuntimeUnitName, "replace", reschan)
|
||||
case stopContainerRuntimeUnitOp:
|
||||
conn.StopUnitContext(ctx, containerRuntimeUnitName, "replace", reschan)
|
||||
default:
|
||||
framework.Failf("Unexpected container runtime op: %v", op)
|
||||
}
|
||||
|
||||
job := <-reschan
|
||||
framework.ExpectEqual(job, "done", "Expected job to complete with done")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func stopContainerRuntime() error {
|
||||
return performContainerRuntimeUnitOp(stopContainerRuntimeUnitOp)
|
||||
}
|
||||
|
||||
func startContainerRuntime() error {
|
||||
return performContainerRuntimeUnitOp(startContainerRuntimeUnitOp)
|
||||
}
|
||||
|
||||
// restartKubelet restarts the current kubelet service.
|
||||
// the "current" kubelet service is the instance managed by the current e2e_node test run.
|
||||
// If `running` is true, restarts only if the current kubelet is actually running. In some cases,
|
||||
@@ -465,3 +537,35 @@ func waitForAllContainerRemoval(ctx context.Context, podName, podNS string) {
|
||||
return nil
|
||||
}, 2*time.Minute, 1*time.Second).Should(gomega.Succeed())
|
||||
}
|
||||
|
||||
func getPidsForProcess(name, pidFile string) ([]int, error) {
|
||||
if len(pidFile) > 0 {
|
||||
pid, err := getPidFromPidFile(pidFile)
|
||||
if err == nil {
|
||||
return []int{pid}, nil
|
||||
}
|
||||
// log the error and fall back to pidof
|
||||
runtime.HandleError(err)
|
||||
}
|
||||
return procfs.PidOf(name)
|
||||
}
|
||||
|
||||
func getPidFromPidFile(pidFile string) (int, error) {
|
||||
file, err := os.Open(pidFile)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error opening pid file %s: %v", pidFile, err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
data, err := io.ReadAll(file)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error reading pid file %s: %v", pidFile, err)
|
||||
}
|
||||
|
||||
pid, err := strconv.Atoi(string(data))
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error parsing %s as a number: %v", string(data), err)
|
||||
}
|
||||
|
||||
return pid, nil
|
||||
}
|
||||
|
Reference in New Issue
Block a user