diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index ff607a52b46..06e48bfd12d 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -1909,8 +1909,21 @@ func (kl *Kubelet) handleMirrorPod(mirrorPod *v1.Pod, start time.Time) { // a config source. func (kl *Kubelet) HandlePodAdditions(pods []*v1.Pod) { start := kl.clock.Now() - sort.Sort(sliceutils.PodsByCreationTime(pods)) - for _, pod := range pods { + + // Pass critical pods through admission check first. + var criticalPods []*v1.Pod + var nonCriticalPods []*v1.Pod + for _, p := range pods { + if kubepod.IsCriticalPod(p) { + criticalPods = append(criticalPods, p) + } else { + nonCriticalPods = append(nonCriticalPods, p) + } + } + sort.Sort(sliceutils.PodsByCreationTime(criticalPods)) + sort.Sort(sliceutils.PodsByCreationTime(nonCriticalPods)) + + for _, pod := range append(criticalPods, nonCriticalPods...) { existingPods := kl.podManager.GetPods() // Always add the pod to the pod manager. Kubelet relies on the pod // manager as the source of truth for the desired state. If a pod does diff --git a/pkg/kubelet/kubelet_test.go b/pkg/kubelet/kubelet_test.go index 85b9e95cfa1..351dd69b681 100644 --- a/pkg/kubelet/kubelet_test.go +++ b/pkg/kubelet/kubelet_test.go @@ -461,6 +461,68 @@ func TestHandlePortConflicts(t *testing.T) { require.Equal(t, v1.PodPending, status.Phase) } +// Tests that we sort pods based on criticality. +func TestCriticalPrioritySorting(t *testing.T) { + testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */) + kl := testKubelet.kubelet + nodes := []v1.Node{ + {ObjectMeta: v1.ObjectMeta{Name: testKubeletHostname}, + Status: v1.NodeStatus{Capacity: v1.ResourceList{}, Allocatable: v1.ResourceList{ + v1.ResourceCPU: *resource.NewMilliQuantity(10, resource.DecimalSI), + v1.ResourceMemory: *resource.NewQuantity(100, resource.BinarySI), + v1.ResourcePods: *resource.NewQuantity(40, resource.DecimalSI), + }}}, + } + kl.nodeLister = testNodeLister{nodes: nodes} + kl.nodeInfo = testNodeInfo{nodes: nodes} + testKubelet.fakeCadvisor.On("MachineInfo").Return(&cadvisorapi.MachineInfo{}, nil) + testKubelet.fakeCadvisor.On("ImagesFsInfo").Return(cadvisorapiv2.FsInfo{}, nil) + testKubelet.fakeCadvisor.On("RootFsInfo").Return(cadvisorapiv2.FsInfo{}, nil) + + spec := v1.PodSpec{NodeName: string(kl.nodeName), + Containers: []v1.Container{{Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + "memory": resource.MustParse("90"), + }, + }}}} + pods := []*v1.Pod{ + podWithUidNameNsSpec("000000000", "newpod", "foo", spec), + podWithUidNameNsSpec("987654321", "oldpod", "foo", spec), + podWithUidNameNsSpec("123456789", "middlepod", "foo", spec), + } + + // Pods are not sorted by creation time. + startTime := time.Now() + pods[0].CreationTimestamp = metav1.NewTime(startTime.Add(10 * time.Second)) + pods[1].CreationTimestamp = metav1.NewTime(startTime) + pods[2].CreationTimestamp = metav1.NewTime(startTime.Add(1 * time.Second)) + + // Make the middle and new pod critical, the middle pod should win + // even though it comes later in the list + critical := map[string]string{kubetypes.CriticalPodAnnotationKey: ""} + pods[0].Annotations = critical + pods[1].Annotations = map[string]string{} + pods[2].Annotations = critical + + // The non-critical pod should be rejected + notfittingPods := []*v1.Pod{pods[0], pods[1]} + fittingPod := pods[2] + + kl.HandlePodAdditions(pods) + // Check pod status stored in the status map. + // notfittingPod should be Failed + for _, p := range notfittingPods { + status, found := kl.statusManager.GetPodStatus(p.UID) + require.True(t, found, "Status of pod %q is not found in the status map", p.UID) + require.Equal(t, v1.PodFailed, status.Phase) + } + + // fittingPod should be Pending + status, found := kl.statusManager.GetPodStatus(fittingPod.UID) + require.True(t, found, "Status of pod %q is not found in the status map", fittingPod.UID) + require.Equal(t, v1.PodPending, status.Phase) +} + // Tests that we handle host name conflicts correctly by setting the failed status in status map. func TestHandleHostNameConflicts(t *testing.T) { testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)