kubelet: fix a bug where kubelet drops the QOSClass field of the Pod's status when it rejects a Pod

Co-authored-by: Sergey Kanzhelev <S.Kanzhelev@live.com>
This commit is contained in:
carlory 2024-10-15 17:29:33 +08:00
parent 5b1a4caeda
commit c7e384f9ff
2 changed files with 85 additions and 4 deletions

View File

@ -42,6 +42,7 @@ import (
"k8s.io/client-go/informers"
"k8s.io/mount-utils"
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
utilfs "k8s.io/kubernetes/pkg/util/filesystem"
netutils "k8s.io/utils/net"
@ -2276,9 +2277,10 @@ func (kl *Kubelet) deletePod(pod *v1.Pod) error {
func (kl *Kubelet) rejectPod(pod *v1.Pod, reason, message string) {
kl.recorder.Eventf(pod, v1.EventTypeWarning, reason, message)
kl.statusManager.SetPodStatus(pod, v1.PodStatus{
Phase: v1.PodFailed,
Reason: reason,
Message: "Pod was rejected: " + message})
QOSClass: v1qos.GetPodQOS(pod), // keep it as is
Phase: v1.PodFailed,
Reason: reason,
Message: "Pod was rejected: " + message})
}
// canAdmitPod determines if a pod can be admitted, and gives a reason if it

View File

@ -20,8 +20,10 @@ import (
"context"
"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/kubernetes/test/e2e/framework"
@ -36,7 +38,7 @@ var _ = SIGDescribe("PodOSRejection", framework.WithNodeConformance(), func() {
f := framework.NewDefaultFramework("pod-os-rejection")
f.NamespacePodSecurityLevel = admissionapi.LevelBaseline
ginkgo.Context("Kubelet", func() {
ginkgo.It("should reject pod when the node OS doesn't match pod's OS", func(ctx context.Context) {
ginkgo.It("[LinuxOnly] should reject pod when the node OS doesn't match pod's OS", func(ctx context.Context) {
linuxNode, err := findLinuxNode(ctx, f)
framework.ExpectNoError(err)
pod := &v1.Pod{
@ -65,6 +67,83 @@ var _ = SIGDescribe("PodOSRejection", framework.WithNodeConformance(), func() {
})
})
var _ = SIGDescribe("PodRejectionStatus", func() {
f := framework.NewDefaultFramework("pod-rejection-status")
f.NamespacePodSecurityLevel = admissionapi.LevelBaseline
ginkgo.Context("Kubelet", func() {
ginkgo.It("should reject pod when the node didn't have enough resource", func(ctx context.Context) {
node, err := e2enode.GetRandomReadySchedulableNode(ctx, f.ClientSet)
framework.ExpectNoError(err, "Failed to get a ready schedulable node")
// Create a pod that requests more CPU than the node has
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-out-of-cpu",
Namespace: f.Namespace.Name,
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "pod-out-of-cpu",
Image: imageutils.GetPauseImageName(),
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000000000000"), // requests more CPU than any node has
},
},
},
},
},
}
pod = e2epod.NewPodClient(f).Create(ctx, pod)
// Wait for the scheduler to update the pod status
err = e2epod.WaitForPodNameUnschedulableInNamespace(ctx, f.ClientSet, pod.Name, pod.Namespace)
framework.ExpectNoError(err)
// Fetch the pod to get the latest status which should be last one observed by the scheduler
// before it rejected the pod
pod, err = f.ClientSet.CoreV1().Pods(pod.Namespace).Get(ctx, pod.Name, metav1.GetOptions{})
framework.ExpectNoError(err)
// force assign the Pod to a node in order to get rejection status later
binding := &v1.Binding{
ObjectMeta: metav1.ObjectMeta{
Name: pod.Name,
Namespace: pod.Namespace,
UID: pod.UID,
},
Target: v1.ObjectReference{
Kind: "Node",
Name: node.Name,
},
}
err = f.ClientSet.CoreV1().Pods(pod.Namespace).Bind(ctx, binding, metav1.CreateOptions{})
framework.ExpectNoError(err)
// kubelet has rejected the pod
err = e2epod.WaitForPodFailedReason(ctx, f.ClientSet, pod, "OutOfcpu", f.Timeouts.PodStartShort)
framework.ExpectNoError(err)
// fetch the reject Pod and compare the status
gotPod, err := f.ClientSet.CoreV1().Pods(pod.Namespace).Get(ctx, pod.Name, metav1.GetOptions{})
framework.ExpectNoError(err)
// This detects if there are any new fields in Status that were dropped by the pod rejection.
// These new fields either should be kept by kubelet's admission or added explicitly in the list of fields that are having a different value or must be cleared.
expectedStatus := pod.Status.DeepCopy()
expectedStatus.Phase = gotPod.Status.Phase
expectedStatus.Conditions = nil
expectedStatus.Message = gotPod.Status.Message
expectedStatus.Reason = gotPod.Status.Reason
expectedStatus.StartTime = gotPod.Status.StartTime
// expectedStatus.QOSClass keep it as is
gomega.Expect(gotPod.Status).To(gomega.Equal(*expectedStatus))
})
})
})
// findLinuxNode finds a Linux node that is Ready and Schedulable
func findLinuxNode(ctx context.Context, f *framework.Framework) (v1.Node, error) {
selector := labels.Set{"kubernetes.io/os": "linux"}.AsSelector()