mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-21 02:41:25 +00:00
Add pod disruption conditions for kubelet initiated failures
This commit is contained in:
parent
c519bc02e8
commit
52cd6755eb
@ -2433,7 +2433,7 @@ const (
|
||||
PodReasonSchedulingGated = "SchedulingGated"
|
||||
// ContainersReady indicates whether all containers in the pod are ready.
|
||||
ContainersReady PodConditionType = "ContainersReady"
|
||||
// AlphaNoCompatGuaranteeDisruptionTarget indicates the pod is about to be deleted due to a
|
||||
// AlphaNoCompatGuaranteeDisruptionTarget indicates the pod is about to be terminated due to a
|
||||
// disruption (such as preemption, eviction API or garbage-collection).
|
||||
// The constant is to be renamed once the name is accepted within the KEP-3329.
|
||||
AlphaNoCompatGuaranteeDisruptionTarget PodConditionType = "DisruptionTarget"
|
||||
|
@ -999,7 +999,10 @@ func (dc *DisruptionController) nonTerminatingPodHasStaleDisruptionCondition(pod
|
||||
return false, 0
|
||||
}
|
||||
_, cond := apipod.GetPodCondition(&pod.Status, v1.AlphaNoCompatGuaranteeDisruptionTarget)
|
||||
if cond == nil || cond.Status != v1.ConditionTrue {
|
||||
// Pod disruption conditions added by kubelet are never considered stale because the condition might take
|
||||
// arbitrarily long before the pod is terminating (has deletion timestamp). Also, pod conditions present
|
||||
// on pods in terminal phase are not stale to avoid unnecessary status updates.
|
||||
if cond == nil || cond.Status != v1.ConditionTrue || cond.Reason == v1.AlphaNoCompatGuaranteePodReasonTerminationByKubelet || apipod.IsPodPhaseTerminal(pod.Status.Phase) {
|
||||
return false, 0
|
||||
}
|
||||
waitFor := dc.stalePodDisruptionTimeout - dc.clock.Since(cond.LastTransitionTime.Time)
|
||||
|
@ -27,11 +27,14 @@ import (
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/client-go/tools/record"
|
||||
v1helper "k8s.io/component-helpers/scheduling/corev1"
|
||||
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
|
||||
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
|
||||
apiv1resource "k8s.io/kubernetes/pkg/api/v1/resource"
|
||||
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
|
||||
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
||||
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
@ -386,7 +389,16 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
|
||||
gracePeriodOverride = m.config.MaxPodGracePeriodSeconds
|
||||
}
|
||||
message, annotations := evictionMessage(resourceToReclaim, pod, statsFunc)
|
||||
if m.evictPod(pod, gracePeriodOverride, message, annotations) {
|
||||
var condition *v1.PodCondition
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) {
|
||||
condition = &v1.PodCondition{
|
||||
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: v1.AlphaNoCompatGuaranteePodReasonTerminationByKubelet,
|
||||
Message: message,
|
||||
}
|
||||
}
|
||||
if m.evictPod(pod, gracePeriodOverride, message, annotations, condition) {
|
||||
metrics.Evictions.WithLabelValues(string(thresholdToReclaim.Signal)).Inc()
|
||||
return []*v1.Pod{pod}
|
||||
}
|
||||
@ -492,7 +504,7 @@ func (m *managerImpl) emptyDirLimitEviction(podStats statsapi.PodStats, pod *v1.
|
||||
used := podVolumeUsed[pod.Spec.Volumes[i].Name]
|
||||
if used != nil && size != nil && size.Sign() == 1 && used.Cmp(*size) > 0 {
|
||||
// the emptyDir usage exceeds the size limit, evict the pod
|
||||
if m.evictPod(pod, 0, fmt.Sprintf(emptyDirMessageFmt, pod.Spec.Volumes[i].Name, size.String()), nil) {
|
||||
if m.evictPod(pod, 0, fmt.Sprintf(emptyDirMessageFmt, pod.Spec.Volumes[i].Name, size.String()), nil, nil) {
|
||||
metrics.Evictions.WithLabelValues(signalEmptyDirFsLimit).Inc()
|
||||
return true
|
||||
}
|
||||
@ -519,7 +531,8 @@ func (m *managerImpl) podEphemeralStorageLimitEviction(podStats statsapi.PodStat
|
||||
podEphemeralStorageLimit := podLimits[v1.ResourceEphemeralStorage]
|
||||
if podEphemeralStorageTotalUsage.Cmp(podEphemeralStorageLimit) > 0 {
|
||||
// the total usage of pod exceeds the total size limit of containers, evict the pod
|
||||
if m.evictPod(pod, 0, fmt.Sprintf(podEphemeralStorageMessageFmt, podEphemeralStorageLimit.String()), nil) {
|
||||
message := fmt.Sprintf(podEphemeralStorageMessageFmt, podEphemeralStorageLimit.String())
|
||||
if m.evictPod(pod, 0, message, nil, nil) {
|
||||
metrics.Evictions.WithLabelValues(signalEphemeralPodFsLimit).Inc()
|
||||
return true
|
||||
}
|
||||
@ -545,7 +558,7 @@ func (m *managerImpl) containerEphemeralStorageLimitEviction(podStats statsapi.P
|
||||
|
||||
if ephemeralStorageThreshold, ok := thresholdsMap[containerStat.Name]; ok {
|
||||
if ephemeralStorageThreshold.Cmp(*containerUsed) < 0 {
|
||||
if m.evictPod(pod, 0, fmt.Sprintf(containerEphemeralStorageMessageFmt, containerStat.Name, ephemeralStorageThreshold.String()), nil) {
|
||||
if m.evictPod(pod, 0, fmt.Sprintf(containerEphemeralStorageMessageFmt, containerStat.Name, ephemeralStorageThreshold.String()), nil, nil) {
|
||||
metrics.Evictions.WithLabelValues(signalEphemeralContainerFsLimit).Inc()
|
||||
return true
|
||||
}
|
||||
@ -556,7 +569,7 @@ func (m *managerImpl) containerEphemeralStorageLimitEviction(podStats statsapi.P
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *managerImpl) evictPod(pod *v1.Pod, gracePeriodOverride int64, evictMsg string, annotations map[string]string) bool {
|
||||
func (m *managerImpl) evictPod(pod *v1.Pod, gracePeriodOverride int64, evictMsg string, annotations map[string]string, condition *v1.PodCondition) bool {
|
||||
// If the pod is marked as critical and static, and support for critical pod annotations is enabled,
|
||||
// do not evict such pods. Static pods are not re-admitted after evictions.
|
||||
// https://github.com/kubernetes/kubernetes/issues/40573 has more details.
|
||||
@ -572,6 +585,9 @@ func (m *managerImpl) evictPod(pod *v1.Pod, gracePeriodOverride int64, evictMsg
|
||||
status.Phase = v1.PodFailed
|
||||
status.Reason = Reason
|
||||
status.Message = evictMsg
|
||||
if condition != nil {
|
||||
podutil.UpdatePodCondition(status, condition)
|
||||
}
|
||||
})
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Eviction manager: pod failed to evict", "pod", klog.KObj(pod))
|
||||
|
@ -23,6 +23,8 @@ import (
|
||||
"time"
|
||||
|
||||
gomock "github.com/golang/mock/gomock"
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/google/go-cmp/cmp/cmpopts"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
@ -185,6 +187,206 @@ type podToMake struct {
|
||||
perLocalVolumeInodesUsed string
|
||||
}
|
||||
|
||||
func TestMemoryPressure_VerifyPodStatus(t *testing.T) {
|
||||
testCases := map[string]struct {
|
||||
wantPodStatus v1.PodStatus
|
||||
}{
|
||||
"eviction due to memory pressure": {
|
||||
wantPodStatus: v1.PodStatus{
|
||||
Phase: v1.PodFailed,
|
||||
Reason: "Evicted",
|
||||
Message: "The node was low on resource: memory. ",
|
||||
},
|
||||
},
|
||||
}
|
||||
for name, tc := range testCases {
|
||||
for _, enablePodDisruptionConditions := range []bool{false, true} {
|
||||
t.Run(fmt.Sprintf("%s;PodDisruptionConditions=%v", name, enablePodDisruptionConditions), func(t *testing.T) {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, enablePodDisruptionConditions)()
|
||||
|
||||
podMaker := makePodWithMemoryStats
|
||||
summaryStatsMaker := makeMemoryStats
|
||||
podsToMake := []podToMake{
|
||||
{name: "below-requests", requests: newResourceList("", "1Gi", ""), limits: newResourceList("", "1Gi", ""), memoryWorkingSet: "900Mi"},
|
||||
{name: "above-requests", requests: newResourceList("", "100Mi", ""), limits: newResourceList("", "1Gi", ""), memoryWorkingSet: "700Mi"},
|
||||
}
|
||||
pods := []*v1.Pod{}
|
||||
podStats := map[*v1.Pod]statsapi.PodStats{}
|
||||
for _, podToMake := range podsToMake {
|
||||
pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet)
|
||||
pods = append(pods, pod)
|
||||
podStats[pod] = podStat
|
||||
}
|
||||
activePodsFunc := func() []*v1.Pod {
|
||||
return pods
|
||||
}
|
||||
|
||||
fakeClock := testingclock.NewFakeClock(time.Now())
|
||||
podKiller := &mockPodKiller{}
|
||||
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
|
||||
diskGC := &mockDiskGC{err: nil}
|
||||
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
||||
|
||||
config := Config{
|
||||
PressureTransitionPeriod: time.Minute * 5,
|
||||
Thresholds: []evictionapi.Threshold{
|
||||
{
|
||||
Signal: evictionapi.SignalMemoryAvailable,
|
||||
Operator: evictionapi.OpLessThan,
|
||||
Value: evictionapi.ThresholdValue{
|
||||
Quantity: quantityMustParse("2Gi"),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("1500Mi", podStats)}
|
||||
manager := &managerImpl{
|
||||
clock: fakeClock,
|
||||
killPodFunc: podKiller.killPodNow,
|
||||
imageGC: diskGC,
|
||||
containerGC: diskGC,
|
||||
config: config,
|
||||
recorder: &record.FakeRecorder{},
|
||||
summaryProvider: summaryProvider,
|
||||
nodeRef: nodeRef,
|
||||
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
|
||||
thresholdsFirstObservedAt: thresholdsObservedAt{},
|
||||
}
|
||||
|
||||
// synchronize to detect the memory pressure
|
||||
manager.synchronize(diskInfoProvider, activePodsFunc)
|
||||
|
||||
// verify memory pressure is detected
|
||||
if !manager.IsUnderMemoryPressure() {
|
||||
t.Fatalf("Manager should have detected memory pressure")
|
||||
}
|
||||
|
||||
// verify a pod is selected for eviction
|
||||
if podKiller.pod == nil {
|
||||
t.Fatalf("Manager should have selected a pod for eviction")
|
||||
}
|
||||
|
||||
wantPodStatus := tc.wantPodStatus.DeepCopy()
|
||||
if enablePodDisruptionConditions {
|
||||
wantPodStatus.Conditions = append(wantPodStatus.Conditions, v1.PodCondition{
|
||||
Type: "DisruptionTarget",
|
||||
Status: "True",
|
||||
Reason: "TerminationByKubelet",
|
||||
Message: "The node was low on resource: memory. ",
|
||||
})
|
||||
}
|
||||
|
||||
// verify the pod status after applying the status update function
|
||||
podKiller.statusFn(&podKiller.pod.Status)
|
||||
if diff := cmp.Diff(*wantPodStatus, podKiller.pod.Status, cmpopts.IgnoreFields(v1.PodCondition{}, "LastProbeTime", "LastTransitionTime")); diff != "" {
|
||||
t.Errorf("Unexpected pod status of the evicted pod (-want,+got):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDiskPressureNodeFs_VerifyPodStatus(t *testing.T) {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LocalStorageCapacityIsolation, true)()
|
||||
|
||||
testCases := map[string]struct {
|
||||
wantPodStatus v1.PodStatus
|
||||
}{
|
||||
"eviction due to disk pressure": {
|
||||
wantPodStatus: v1.PodStatus{
|
||||
Phase: v1.PodFailed,
|
||||
Reason: "Evicted",
|
||||
Message: "The node was low on resource: ephemeral-storage. ",
|
||||
},
|
||||
},
|
||||
}
|
||||
for name, tc := range testCases {
|
||||
for _, enablePodDisruptionConditions := range []bool{false, true} {
|
||||
t.Run(fmt.Sprintf("%s;PodDisruptionConditions=%v", name, enablePodDisruptionConditions), func(t *testing.T) {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, enablePodDisruptionConditions)()
|
||||
|
||||
podMaker := makePodWithDiskStats
|
||||
summaryStatsMaker := makeDiskStats
|
||||
podsToMake := []podToMake{
|
||||
{name: "below-requests", requests: newResourceList("", "", "1Gi"), limits: newResourceList("", "", "1Gi"), rootFsUsed: "900Mi"},
|
||||
{name: "above-requests", requests: newResourceList("", "", "100Mi"), limits: newResourceList("", "", "1Gi"), rootFsUsed: "700Mi"},
|
||||
}
|
||||
pods := []*v1.Pod{}
|
||||
podStats := map[*v1.Pod]statsapi.PodStats{}
|
||||
for _, podToMake := range podsToMake {
|
||||
pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed)
|
||||
pods = append(pods, pod)
|
||||
podStats[pod] = podStat
|
||||
}
|
||||
activePodsFunc := func() []*v1.Pod {
|
||||
return pods
|
||||
}
|
||||
|
||||
fakeClock := testingclock.NewFakeClock(time.Now())
|
||||
podKiller := &mockPodKiller{}
|
||||
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
|
||||
diskGC := &mockDiskGC{err: nil}
|
||||
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
||||
|
||||
config := Config{
|
||||
PressureTransitionPeriod: time.Minute * 5,
|
||||
Thresholds: []evictionapi.Threshold{
|
||||
{
|
||||
Signal: evictionapi.SignalNodeFsAvailable,
|
||||
Operator: evictionapi.OpLessThan,
|
||||
Value: evictionapi.ThresholdValue{
|
||||
Quantity: quantityMustParse("2Gi"),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("1.5Gi", "200Gi", podStats)}
|
||||
manager := &managerImpl{
|
||||
clock: fakeClock,
|
||||
killPodFunc: podKiller.killPodNow,
|
||||
imageGC: diskGC,
|
||||
containerGC: diskGC,
|
||||
config: config,
|
||||
recorder: &record.FakeRecorder{},
|
||||
summaryProvider: summaryProvider,
|
||||
nodeRef: nodeRef,
|
||||
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
|
||||
thresholdsFirstObservedAt: thresholdsObservedAt{},
|
||||
}
|
||||
|
||||
// synchronize
|
||||
manager.synchronize(diskInfoProvider, activePodsFunc)
|
||||
|
||||
// verify menager detected disk pressure
|
||||
if !manager.IsUnderDiskPressure() {
|
||||
t.Fatalf("Manager should report disk pressure")
|
||||
}
|
||||
|
||||
// verify a pod is selected for eviction
|
||||
if podKiller.pod == nil {
|
||||
t.Fatalf("Manager should have selected a pod for eviction")
|
||||
}
|
||||
|
||||
wantPodStatus := tc.wantPodStatus.DeepCopy()
|
||||
if enablePodDisruptionConditions {
|
||||
wantPodStatus.Conditions = append(wantPodStatus.Conditions, v1.PodCondition{
|
||||
Type: "DisruptionTarget",
|
||||
Status: "True",
|
||||
Reason: "TerminationByKubelet",
|
||||
Message: "The node was low on resource: ephemeral-storage. ",
|
||||
})
|
||||
}
|
||||
|
||||
// verify the pod status after applying the status update function
|
||||
podKiller.statusFn(&podKiller.pod.Status)
|
||||
if diff := cmp.Diff(*wantPodStatus, podKiller.pod.Status, cmpopts.IgnoreFields(v1.PodCondition{}, "LastProbeTime", "LastTransitionTime")); diff != "" {
|
||||
t.Errorf("Unexpected pod status of the evicted pod (-want,+got):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestMemoryPressure
|
||||
func TestMemoryPressure(t *testing.T) {
|
||||
podMaker := makePodWithMemoryStats
|
||||
|
@ -1510,6 +1510,22 @@ func (kl *Kubelet) generateAPIPodStatus(pod *v1.Pod, podStatus *kubecontainer.Po
|
||||
s.Conditions = append(s.Conditions, c)
|
||||
}
|
||||
}
|
||||
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) {
|
||||
// copy over the pod disruption conditions from state which is already
|
||||
// updated during the eviciton (due to either node resource pressure or
|
||||
// node graceful shutdown). We do not re-generate the conditions based
|
||||
// on the container statuses as they are added based on one-time events.
|
||||
cType := v1.AlphaNoCompatGuaranteeDisruptionTarget
|
||||
if _, condition := podutil.GetPodConditionFromList(oldPodStatus.Conditions, cType); condition != nil {
|
||||
if i, _ := podutil.GetPodConditionFromList(s.Conditions, cType); i >= 0 {
|
||||
s.Conditions[i] = *condition
|
||||
} else {
|
||||
s.Conditions = append(s.Conditions, *condition)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// set all Kubelet-owned conditions
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.PodHasNetworkCondition) {
|
||||
s.Conditions = append(s.Conditions, status.GeneratePodHasNetworkCondition(pod, podStatus))
|
||||
|
@ -2497,7 +2497,9 @@ func Test_generateAPIPodStatus(t *testing.T) {
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
now := metav1.Now()
|
||||
normalized_now := now.Rfc3339Copy()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
@ -2505,9 +2507,66 @@ func Test_generateAPIPodStatus(t *testing.T) {
|
||||
currentStatus *kubecontainer.PodStatus
|
||||
unreadyContainer []string
|
||||
previousStatus v1.PodStatus
|
||||
enablePodDisruptionConditions bool
|
||||
expected v1.PodStatus
|
||||
expectedPodDisruptionCondition v1.PodCondition
|
||||
expectedPodHasNetworkCondition v1.PodCondition
|
||||
}{
|
||||
{
|
||||
name: "pod disruption condition is copied over; PodDisruptionConditions enabled",
|
||||
pod: &v1.Pod{
|
||||
Spec: desiredState,
|
||||
Status: v1.PodStatus{
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
runningState("containerA"),
|
||||
runningState("containerB"),
|
||||
},
|
||||
Conditions: []v1.PodCondition{{
|
||||
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
LastTransitionTime: normalized_now,
|
||||
}},
|
||||
},
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "my-pod", DeletionTimestamp: &now},
|
||||
},
|
||||
currentStatus: sandboxReadyStatus,
|
||||
previousStatus: v1.PodStatus{
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
runningState("containerA"),
|
||||
runningState("containerB"),
|
||||
},
|
||||
Conditions: []v1.PodCondition{{
|
||||
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
LastTransitionTime: normalized_now,
|
||||
}},
|
||||
},
|
||||
enablePodDisruptionConditions: true,
|
||||
expected: v1.PodStatus{
|
||||
Phase: v1.PodRunning,
|
||||
HostIP: "127.0.0.1",
|
||||
QOSClass: v1.PodQOSBestEffort,
|
||||
Conditions: []v1.PodCondition{
|
||||
{Type: v1.PodInitialized, Status: v1.ConditionTrue},
|
||||
{Type: v1.PodReady, Status: v1.ConditionTrue},
|
||||
{Type: v1.ContainersReady, Status: v1.ConditionTrue},
|
||||
{Type: v1.PodScheduled, Status: v1.ConditionTrue},
|
||||
},
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
ready(waitingWithLastTerminationUnknown("containerA", 0)),
|
||||
ready(waitingWithLastTerminationUnknown("containerB", 0)),
|
||||
},
|
||||
},
|
||||
expectedPodDisruptionCondition: v1.PodCondition{
|
||||
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
LastTransitionTime: normalized_now,
|
||||
},
|
||||
expectedPodHasNetworkCondition: v1.PodCondition{
|
||||
Type: kubetypes.PodHasNetwork,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "current status ready, with previous statuses and deletion",
|
||||
pod: &v1.Pod{
|
||||
@ -2876,6 +2935,7 @@ func Test_generateAPIPodStatus(t *testing.T) {
|
||||
for _, test := range tests {
|
||||
for _, enablePodHasNetworkCondition := range []bool{false, true} {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, test.enablePodDisruptionConditions)()
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodHasNetworkCondition, enablePodHasNetworkCondition)()
|
||||
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
|
||||
defer testKubelet.Cleanup()
|
||||
@ -2884,12 +2944,16 @@ func Test_generateAPIPodStatus(t *testing.T) {
|
||||
for _, name := range test.unreadyContainer {
|
||||
kl.readinessManager.Set(kubecontainer.BuildContainerID("", findContainerStatusByName(test.expected, name).ContainerID), results.Failure, test.pod)
|
||||
}
|
||||
expected := test.expected.DeepCopy()
|
||||
actual := kl.generateAPIPodStatus(test.pod, test.currentStatus)
|
||||
if enablePodHasNetworkCondition {
|
||||
test.expected.Conditions = append([]v1.PodCondition{test.expectedPodHasNetworkCondition}, test.expected.Conditions...)
|
||||
expected.Conditions = append([]v1.PodCondition{test.expectedPodHasNetworkCondition}, expected.Conditions...)
|
||||
}
|
||||
if !apiequality.Semantic.DeepEqual(test.expected, actual) {
|
||||
t.Fatalf("Unexpected status: %s", diff.ObjectReflectDiff(actual, test.expected))
|
||||
if test.enablePodDisruptionConditions {
|
||||
expected.Conditions = append([]v1.PodCondition{test.expectedPodDisruptionCondition}, expected.Conditions...)
|
||||
}
|
||||
if !apiequality.Semantic.DeepEqual(*expected, actual) {
|
||||
t.Fatalf("Unexpected status: %s", diff.ObjectReflectDiff(*expected, actual))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
@ -31,6 +31,7 @@ import (
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/client-go/tools/record"
|
||||
"k8s.io/klog/v2"
|
||||
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
|
||||
"k8s.io/kubernetes/pkg/apis/scheduling"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
|
||||
@ -380,6 +381,14 @@ func (m *managerImpl) processShutdownEvent() error {
|
||||
}
|
||||
status.Message = nodeShutdownMessage
|
||||
status.Reason = nodeShutdownReason
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) {
|
||||
podutil.UpdatePodCondition(status, &v1.PodCondition{
|
||||
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: v1.AlphaNoCompatGuaranteePodReasonTerminationByKubelet,
|
||||
Message: nodeShutdownMessage,
|
||||
})
|
||||
}
|
||||
}); err != nil {
|
||||
m.logger.V(1).Info("Shutdown manager failed killing pod", "pod", klog.KObj(pod), "err", err)
|
||||
} else {
|
||||
|
@ -27,6 +27,8 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/google/go-cmp/cmp/cmpopts"
|
||||
"github.com/stretchr/testify/assert"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
@ -123,10 +125,86 @@ func TestManager(t *testing.T) {
|
||||
shutdownGracePeriodCriticalPods time.Duration
|
||||
systemInhibitDelay time.Duration
|
||||
overrideSystemInhibitDelay time.Duration
|
||||
enablePodDisruptionConditions bool
|
||||
expectedDidOverrideInhibitDelay bool
|
||||
expectedPodToGracePeriodOverride map[string]int64
|
||||
expectedError error
|
||||
expectedPodStatuses map[string]v1.PodStatus
|
||||
}{
|
||||
{
|
||||
desc: "verify pod status; PodDisruptionConditions enabled",
|
||||
activePods: []*v1.Pod{
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "running-pod"},
|
||||
Spec: v1.PodSpec{},
|
||||
Status: v1.PodStatus{
|
||||
Phase: v1.PodRunning,
|
||||
},
|
||||
},
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "failed-pod"},
|
||||
Spec: v1.PodSpec{},
|
||||
Status: v1.PodStatus{
|
||||
Phase: v1.PodFailed,
|
||||
},
|
||||
},
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "succeeded-pod"},
|
||||
Spec: v1.PodSpec{},
|
||||
Status: v1.PodStatus{
|
||||
Phase: v1.PodSucceeded,
|
||||
},
|
||||
},
|
||||
},
|
||||
shutdownGracePeriodRequested: time.Duration(30 * time.Second),
|
||||
shutdownGracePeriodCriticalPods: time.Duration(10 * time.Second),
|
||||
systemInhibitDelay: time.Duration(40 * time.Second),
|
||||
overrideSystemInhibitDelay: time.Duration(40 * time.Second),
|
||||
enablePodDisruptionConditions: true,
|
||||
expectedDidOverrideInhibitDelay: false,
|
||||
expectedPodToGracePeriodOverride: map[string]int64{"running-pod": 20, "failed-pod": 20, "succeeded-pod": 20},
|
||||
expectedPodStatuses: map[string]v1.PodStatus{
|
||||
"running-pod": {
|
||||
Phase: v1.PodFailed,
|
||||
Message: "Pod was terminated in response to imminent node shutdown.",
|
||||
Reason: "Terminated",
|
||||
Conditions: []v1.PodCondition{
|
||||
{
|
||||
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: "TerminationByKubelet",
|
||||
Message: "Pod was terminated in response to imminent node shutdown.",
|
||||
},
|
||||
},
|
||||
},
|
||||
"failed-pod": {
|
||||
Phase: v1.PodFailed,
|
||||
Message: "Pod was terminated in response to imminent node shutdown.",
|
||||
Reason: "Terminated",
|
||||
Conditions: []v1.PodCondition{
|
||||
{
|
||||
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: "TerminationByKubelet",
|
||||
Message: "Pod was terminated in response to imminent node shutdown.",
|
||||
},
|
||||
},
|
||||
},
|
||||
"succeeded-pod": {
|
||||
Phase: v1.PodSucceeded,
|
||||
Message: "Pod was terminated in response to imminent node shutdown.",
|
||||
Reason: "Terminated",
|
||||
Conditions: []v1.PodCondition{
|
||||
{
|
||||
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: "TerminationByKubelet",
|
||||
Message: "Pod was terminated in response to imminent node shutdown.",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "no override (total=30s, critical=10s)",
|
||||
activePods: []*v1.Pod{normalPodNoGracePeriod, criticalPodNoGracePeriod},
|
||||
@ -134,8 +212,21 @@ func TestManager(t *testing.T) {
|
||||
shutdownGracePeriodCriticalPods: time.Duration(10 * time.Second),
|
||||
systemInhibitDelay: time.Duration(40 * time.Second),
|
||||
overrideSystemInhibitDelay: time.Duration(40 * time.Second),
|
||||
enablePodDisruptionConditions: false,
|
||||
expectedDidOverrideInhibitDelay: false,
|
||||
expectedPodToGracePeriodOverride: map[string]int64{"normal-pod-nil-grace-period": 20, "critical-pod-nil-grace-period": 10},
|
||||
expectedPodStatuses: map[string]v1.PodStatus{
|
||||
"normal-pod-nil-grace-period": {
|
||||
Phase: v1.PodFailed,
|
||||
Message: "Pod was terminated in response to imminent node shutdown.",
|
||||
Reason: "Terminated",
|
||||
},
|
||||
"critical-pod-nil-grace-period": {
|
||||
Phase: v1.PodFailed,
|
||||
Message: "Pod was terminated in response to imminent node shutdown.",
|
||||
Reason: "Terminated",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "no override (total=30s, critical=10s) pods with terminationGracePeriod and without",
|
||||
@ -228,6 +319,7 @@ func TestManager(t *testing.T) {
|
||||
if gracePeriodOverride != nil {
|
||||
gracePeriod = *gracePeriodOverride
|
||||
}
|
||||
fn(&pod.Status)
|
||||
podKillChan <- PodKillInfo{Name: pod.Name, GracePeriod: gracePeriod}
|
||||
return nil
|
||||
}
|
||||
@ -239,6 +331,7 @@ func TestManager(t *testing.T) {
|
||||
systemDbus = func() (dbusInhibiter, error) {
|
||||
return fakeDbus, nil
|
||||
}
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.PodDisruptionConditions, tc.enablePodDisruptionConditions)()
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.GracefulNodeShutdown, true)()
|
||||
|
||||
proberManager := probetest.FakeManager{}
|
||||
@ -296,6 +389,13 @@ func TestManager(t *testing.T) {
|
||||
assert.Equal(t, manager.Admit(nil).Admit, false)
|
||||
assert.Equal(t, tc.expectedPodToGracePeriodOverride, killedPodsToGracePeriods)
|
||||
assert.Equal(t, tc.expectedDidOverrideInhibitDelay, fakeDbus.didOverrideInhibitDelay, "override system inhibit delay differs")
|
||||
if tc.expectedPodStatuses != nil {
|
||||
for _, pod := range tc.activePods {
|
||||
if diff := cmp.Diff(tc.expectedPodStatuses[pod.Name], pod.Status, cmpopts.IgnoreFields(v1.PodCondition{}, "LastProbeTime", "LastTransitionTime")); diff != "" {
|
||||
t.Errorf("Unexpected PodStatus: (-want,+got):\n%s", diff)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
@ -34,8 +34,10 @@ import (
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/apimachinery/pkg/util/diff"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/klog/v2"
|
||||
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
kubepod "k8s.io/kubernetes/pkg/kubelet/pod"
|
||||
@ -150,7 +152,8 @@ func NewManager(kubeClient clientset.Interface, podManager kubepod.Manager, podD
|
||||
func isPodStatusByKubeletEqual(oldStatus, status *v1.PodStatus) bool {
|
||||
oldCopy := oldStatus.DeepCopy()
|
||||
for _, c := range status.Conditions {
|
||||
if kubetypes.PodConditionByKubelet(c.Type) {
|
||||
// both owned and shared conditions are used for kubelet status equality
|
||||
if kubetypes.PodConditionByKubelet(c.Type) || kubetypes.PodConditionSharedByKubelet(c.Type) {
|
||||
_, oc := podutil.GetPodCondition(oldCopy, c.Type)
|
||||
if oc == nil || oc.Status != c.Status || oc.Message != c.Message || oc.Reason != c.Reason {
|
||||
return false
|
||||
@ -501,6 +504,11 @@ func (m *manager) updateStatusInternal(pod *v1.Pod, status v1.PodStatus, forceUp
|
||||
// Set PodScheduledCondition.LastTransitionTime.
|
||||
updateLastTransitionTime(&status, &oldStatus, v1.PodScheduled)
|
||||
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) {
|
||||
// Set DisruptionTarget.LastTransitionTime.
|
||||
updateLastTransitionTime(&status, &oldStatus, v1.AlphaNoCompatGuaranteeDisruptionTarget)
|
||||
}
|
||||
|
||||
// ensure that the start time does not change across updates.
|
||||
if oldStatus.StartTime != nil && !oldStatus.StartTime.IsZero() {
|
||||
status.StartTime = oldStatus.StartTime
|
||||
@ -879,9 +887,17 @@ func mergePodStatus(oldPodStatus, newPodStatus v1.PodStatus, couldHaveRunningCon
|
||||
podConditions = append(podConditions, c)
|
||||
}
|
||||
}
|
||||
|
||||
for _, c := range newPodStatus.Conditions {
|
||||
if kubetypes.PodConditionByKubelet(c.Type) {
|
||||
podConditions = append(podConditions, c)
|
||||
} else if kubetypes.PodConditionSharedByKubelet(c.Type) {
|
||||
// for shared conditions we update or append in podConditions
|
||||
if i, _ := podutil.GetPodConditionFromList(podConditions, c.Type); i >= 0 {
|
||||
podConditions[i] = c
|
||||
} else {
|
||||
podConditions = append(podConditions, c)
|
||||
}
|
||||
}
|
||||
}
|
||||
newPodStatus.Conditions = podConditions
|
||||
@ -900,6 +916,16 @@ func mergePodStatus(oldPodStatus, newPodStatus v1.PodStatus, couldHaveRunningCon
|
||||
newPodStatus.Phase = oldPodStatus.Phase
|
||||
newPodStatus.Reason = oldPodStatus.Reason
|
||||
newPodStatus.Message = oldPodStatus.Message
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) {
|
||||
// revert setting of the pod disruption condition until the pod is terminal in order to do not issue
|
||||
// an unnecessary PATCH request
|
||||
revertPodCondition(&oldPodStatus, &newPodStatus, v1.AlphaNoCompatGuaranteeDisruptionTarget)
|
||||
}
|
||||
} else {
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) {
|
||||
// update the LastTransitionTime when transitioning into the failed state
|
||||
updateLastTransitionTime(&newPodStatus, &oldPodStatus, v1.AlphaNoCompatGuaranteeDisruptionTarget)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -920,6 +946,18 @@ func mergePodStatus(oldPodStatus, newPodStatus v1.PodStatus, couldHaveRunningCon
|
||||
return newPodStatus
|
||||
}
|
||||
|
||||
func revertPodCondition(oldPodStatus, newPodStatus *v1.PodStatus, cType v1.PodConditionType) {
|
||||
if newIndex, newCondition := podutil.GetPodConditionFromList(newPodStatus.Conditions, cType); newCondition != nil {
|
||||
if _, oldCondition := podutil.GetPodConditionFromList(oldPodStatus.Conditions, cType); oldCondition != nil {
|
||||
// revert the new condition to what was before
|
||||
newPodStatus.Conditions[newIndex] = *oldCondition
|
||||
} else {
|
||||
// delete the new condition as it wasn't there before
|
||||
newPodStatus.Conditions = append(newPodStatus.Conditions[:newIndex], newPodStatus.Conditions[newIndex+1:]...)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// NeedToReconcilePodReadiness returns if the pod "Ready" condition need to be reconcile
|
||||
func NeedToReconcilePodReadiness(pod *v1.Pod) bool {
|
||||
if len(pod.Spec.ReadinessGates) == 0 {
|
||||
|
@ -34,11 +34,14 @@ import (
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/runtime/schema"
|
||||
"k8s.io/apimachinery/pkg/util/diff"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
core "k8s.io/client-go/testing"
|
||||
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
||||
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
|
||||
api "k8s.io/kubernetes/pkg/apis/core"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
kubeconfigmap "k8s.io/kubernetes/pkg/kubelet/configmap"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
kubepod "k8s.io/kubernetes/pkg/kubelet/pod"
|
||||
@ -1400,22 +1403,204 @@ func deleteAction() core.DeleteAction {
|
||||
|
||||
func TestMergePodStatus(t *testing.T) {
|
||||
useCases := []struct {
|
||||
desc string
|
||||
hasRunningContainers bool
|
||||
oldPodStatus func(input v1.PodStatus) v1.PodStatus
|
||||
newPodStatus func(input v1.PodStatus) v1.PodStatus
|
||||
expectPodStatus v1.PodStatus
|
||||
desc string
|
||||
enablePodDisruptionConditions bool
|
||||
hasRunningContainers bool
|
||||
oldPodStatus func(input v1.PodStatus) v1.PodStatus
|
||||
newPodStatus func(input v1.PodStatus) v1.PodStatus
|
||||
expectPodStatus v1.PodStatus
|
||||
}{
|
||||
{
|
||||
"no change",
|
||||
false,
|
||||
false,
|
||||
func(input v1.PodStatus) v1.PodStatus { return input },
|
||||
func(input v1.PodStatus) v1.PodStatus { return input },
|
||||
getPodStatus(),
|
||||
},
|
||||
{
|
||||
"add DisruptionTarget condition when transitioning into failed phase; PodDisruptionConditions enabled",
|
||||
true,
|
||||
false,
|
||||
func(input v1.PodStatus) v1.PodStatus { return input },
|
||||
func(input v1.PodStatus) v1.PodStatus {
|
||||
input.Phase = v1.PodFailed
|
||||
input.Conditions = append(input.Conditions, v1.PodCondition{
|
||||
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: "TerminationByKubelet",
|
||||
})
|
||||
return input
|
||||
},
|
||||
v1.PodStatus{
|
||||
Phase: v1.PodFailed,
|
||||
Conditions: []v1.PodCondition{
|
||||
{
|
||||
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: "TerminationByKubelet",
|
||||
},
|
||||
{
|
||||
Type: v1.PodReady,
|
||||
Status: v1.ConditionFalse,
|
||||
Reason: "PodFailed",
|
||||
},
|
||||
{
|
||||
Type: v1.PodScheduled,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
{
|
||||
Type: v1.ContainersReady,
|
||||
Status: v1.ConditionFalse,
|
||||
Reason: "PodFailed",
|
||||
},
|
||||
},
|
||||
Message: "Message",
|
||||
},
|
||||
},
|
||||
{
|
||||
"don't add DisruptionTarget condition when transitioning into failed phase, but there are might still be running containers; PodDisruptionConditions enabled",
|
||||
true,
|
||||
true,
|
||||
func(input v1.PodStatus) v1.PodStatus { return input },
|
||||
func(input v1.PodStatus) v1.PodStatus {
|
||||
input.Phase = v1.PodFailed
|
||||
input.Conditions = append(input.Conditions, v1.PodCondition{
|
||||
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: "TerminationByKubelet",
|
||||
})
|
||||
return input
|
||||
},
|
||||
v1.PodStatus{
|
||||
Phase: v1.PodRunning,
|
||||
Conditions: []v1.PodCondition{
|
||||
{
|
||||
Type: v1.PodReady,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
{
|
||||
Type: v1.PodScheduled,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
},
|
||||
Message: "Message",
|
||||
},
|
||||
},
|
||||
{
|
||||
"preserve DisruptionTarget condition; PodDisruptionConditions enabled",
|
||||
true,
|
||||
false,
|
||||
func(input v1.PodStatus) v1.PodStatus {
|
||||
input.Conditions = append(input.Conditions, v1.PodCondition{
|
||||
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: "TerminationByKubelet",
|
||||
})
|
||||
return input
|
||||
},
|
||||
func(input v1.PodStatus) v1.PodStatus {
|
||||
return input
|
||||
},
|
||||
v1.PodStatus{
|
||||
Phase: v1.PodRunning,
|
||||
Conditions: []v1.PodCondition{
|
||||
{
|
||||
Type: v1.PodReady,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
{
|
||||
Type: v1.PodScheduled,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
{
|
||||
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: "TerminationByKubelet",
|
||||
},
|
||||
},
|
||||
Message: "Message",
|
||||
},
|
||||
},
|
||||
{
|
||||
"preserve DisruptionTarget condition; PodDisruptionConditions disabled",
|
||||
false,
|
||||
false,
|
||||
func(input v1.PodStatus) v1.PodStatus {
|
||||
input.Conditions = append(input.Conditions, v1.PodCondition{
|
||||
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: "TerminationByKubelet",
|
||||
})
|
||||
return input
|
||||
},
|
||||
func(input v1.PodStatus) v1.PodStatus {
|
||||
return input
|
||||
},
|
||||
v1.PodStatus{
|
||||
Phase: v1.PodRunning,
|
||||
Conditions: []v1.PodCondition{
|
||||
{
|
||||
Type: v1.PodReady,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
{
|
||||
Type: v1.PodScheduled,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
{
|
||||
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: "TerminationByKubelet",
|
||||
},
|
||||
},
|
||||
Message: "Message",
|
||||
},
|
||||
},
|
||||
{
|
||||
"override DisruptionTarget condition; PodDisruptionConditions enabled",
|
||||
true,
|
||||
false,
|
||||
func(input v1.PodStatus) v1.PodStatus {
|
||||
input.Conditions = append(input.Conditions, v1.PodCondition{
|
||||
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: "EvictedByEvictionAPI",
|
||||
})
|
||||
return input
|
||||
},
|
||||
func(input v1.PodStatus) v1.PodStatus {
|
||||
input.Conditions = append(input.Conditions, v1.PodCondition{
|
||||
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: "TerminationByKubelet",
|
||||
})
|
||||
return input
|
||||
},
|
||||
v1.PodStatus{
|
||||
Phase: v1.PodRunning,
|
||||
Conditions: []v1.PodCondition{
|
||||
{
|
||||
Type: v1.PodReady,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
{
|
||||
Type: v1.PodScheduled,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
{
|
||||
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: "TerminationByKubelet",
|
||||
},
|
||||
},
|
||||
Message: "Message",
|
||||
},
|
||||
},
|
||||
{
|
||||
"readiness changes",
|
||||
false,
|
||||
false,
|
||||
func(input v1.PodStatus) v1.PodStatus { return input },
|
||||
func(input v1.PodStatus) v1.PodStatus {
|
||||
input.Conditions[0].Status = v1.ConditionFalse
|
||||
@ -1439,6 +1624,7 @@ func TestMergePodStatus(t *testing.T) {
|
||||
{
|
||||
"additional pod condition",
|
||||
false,
|
||||
false,
|
||||
func(input v1.PodStatus) v1.PodStatus {
|
||||
input.Conditions = append(input.Conditions, v1.PodCondition{
|
||||
Type: v1.PodConditionType("example.com/feature"),
|
||||
@ -1469,6 +1655,7 @@ func TestMergePodStatus(t *testing.T) {
|
||||
{
|
||||
"additional pod condition and readiness changes",
|
||||
false,
|
||||
false,
|
||||
func(input v1.PodStatus) v1.PodStatus {
|
||||
input.Conditions = append(input.Conditions, v1.PodCondition{
|
||||
Type: v1.PodConditionType("example.com/feature"),
|
||||
@ -1502,6 +1689,7 @@ func TestMergePodStatus(t *testing.T) {
|
||||
{
|
||||
"additional pod condition changes",
|
||||
false,
|
||||
false,
|
||||
func(input v1.PodStatus) v1.PodStatus {
|
||||
input.Conditions = append(input.Conditions, v1.PodCondition{
|
||||
Type: v1.PodConditionType("example.com/feature"),
|
||||
@ -1538,6 +1726,7 @@ func TestMergePodStatus(t *testing.T) {
|
||||
{
|
||||
"phase is transitioning to failed and no containers running",
|
||||
false,
|
||||
false,
|
||||
func(input v1.PodStatus) v1.PodStatus {
|
||||
input.Phase = v1.PodRunning
|
||||
input.Reason = "Unknown"
|
||||
@ -1556,10 +1745,12 @@ func TestMergePodStatus(t *testing.T) {
|
||||
{
|
||||
Type: v1.PodReady,
|
||||
Status: v1.ConditionFalse,
|
||||
Reason: "PodFailed",
|
||||
},
|
||||
{
|
||||
Type: v1.ContainersReady,
|
||||
Status: v1.ConditionFalse,
|
||||
Reason: "PodFailed",
|
||||
},
|
||||
{
|
||||
Type: v1.PodScheduled,
|
||||
@ -1572,6 +1763,7 @@ func TestMergePodStatus(t *testing.T) {
|
||||
},
|
||||
{
|
||||
"phase is transitioning to failed and containers running",
|
||||
false,
|
||||
true,
|
||||
func(input v1.PodStatus) v1.PodStatus {
|
||||
input.Phase = v1.PodRunning
|
||||
@ -1605,6 +1797,7 @@ func TestMergePodStatus(t *testing.T) {
|
||||
|
||||
for _, tc := range useCases {
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, tc.enablePodDisruptionConditions)()
|
||||
output := mergePodStatus(tc.oldPodStatus(getPodStatus()), tc.newPodStatus(getPodStatus()), tc.hasRunningContainers)
|
||||
if !conditionsEqual(output.Conditions, tc.expectPodStatus.Conditions) || !statusEqual(output, tc.expectPodStatus) {
|
||||
t.Fatalf("unexpected output: %s", cmp.Diff(tc.expectPodStatus, output))
|
||||
@ -1630,7 +1823,7 @@ func conditionsEqual(left, right []v1.PodCondition) bool {
|
||||
for _, r := range right {
|
||||
if l.Type == r.Type {
|
||||
found = true
|
||||
if l.Status != r.Status {
|
||||
if l.Status != r.Status || l.Reason != r.Reason {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
@ -44,3 +44,13 @@ func PodConditionByKubelet(conditionType v1.PodConditionType) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// PodConditionSharedByKubelet returns if the pod condition type is shared by kubelet
|
||||
func PodConditionSharedByKubelet(conditionType v1.PodConditionType) bool {
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) {
|
||||
if conditionType == v1.AlphaNoCompatGuaranteeDisruptionTarget {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
@ -27,6 +27,8 @@ import (
|
||||
|
||||
func TestPodConditionByKubelet(t *testing.T) {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodHasNetworkCondition, true)()
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, true)()
|
||||
|
||||
trueCases := []v1.PodConditionType{
|
||||
v1.PodScheduled,
|
||||
v1.PodReady,
|
||||
@ -52,3 +54,28 @@ func TestPodConditionByKubelet(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestPodConditionSharedByKubelet(t *testing.T) {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, true)()
|
||||
|
||||
trueCases := []v1.PodConditionType{
|
||||
v1.AlphaNoCompatGuaranteeDisruptionTarget,
|
||||
}
|
||||
|
||||
for _, tc := range trueCases {
|
||||
if !PodConditionSharedByKubelet(tc) {
|
||||
t.Errorf("Expect %q to be condition shared by kubelet.", tc)
|
||||
}
|
||||
}
|
||||
|
||||
falseCases := []v1.PodConditionType{
|
||||
v1.PodConditionType("abcd"),
|
||||
v1.PodConditionType(v1.PodReasonUnschedulable),
|
||||
}
|
||||
|
||||
for _, tc := range falseCases {
|
||||
if PodConditionSharedByKubelet(tc) {
|
||||
t.Errorf("Expect %q NOT to be condition shared by kubelet.", tc)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2654,7 +2654,7 @@ const (
|
||||
PodReady PodConditionType = "Ready"
|
||||
// PodScheduled represents status of the scheduling process for this pod.
|
||||
PodScheduled PodConditionType = "PodScheduled"
|
||||
// AlphaNoCompatGuaranteeDisruptionTarget indicates the pod is about to be deleted due to a
|
||||
// AlphaNoCompatGuaranteeDisruptionTarget indicates the pod is about to be terminated due to a
|
||||
// disruption (such as preemption, eviction API or garbage-collection).
|
||||
// The constant is to be renamed once the name is accepted within the KEP-3329.
|
||||
AlphaNoCompatGuaranteeDisruptionTarget PodConditionType = "DisruptionTarget"
|
||||
@ -2673,6 +2673,10 @@ const (
|
||||
// PodReasonSchedulerError reason in PodScheduled PodCondition means that some internal error happens
|
||||
// during scheduling, for example due to nodeAffinity parsing errors.
|
||||
PodReasonSchedulerError = "SchedulerError"
|
||||
|
||||
// TerminationByKubelet reason in DisruptionTarget pod condition indicates that the termination
|
||||
// is initiated by kubelet
|
||||
AlphaNoCompatGuaranteePodReasonTerminationByKubelet = "TerminationByKubelet"
|
||||
)
|
||||
|
||||
// PodCondition contains details for the current condition of this pod.
|
||||
|
@ -231,3 +231,13 @@ func mixinRestrictedContainerSecurityContext(container *v1.Container) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// FindPodConditionByType loops through all pod conditions in pod status and returns the specified condition.
|
||||
func FindPodConditionByType(podStatus *v1.PodStatus, conditionType v1.PodConditionType) *v1.PodCondition {
|
||||
for _, cond := range podStatus.Conditions {
|
||||
if cond.Type == conditionType {
|
||||
return &cond
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@ -31,6 +31,7 @@ import (
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/fields"
|
||||
kubeletstatsv1alpha1 "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
|
||||
"k8s.io/kubernetes/pkg/kubelet/eviction"
|
||||
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
|
||||
@ -500,6 +501,28 @@ var _ = SIGDescribe("PriorityPidEvictionOrdering [Slow] [Serial] [Disruptive][No
|
||||
specs[2].pod.Spec.PriorityClassName = highPriorityClassName
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logPidMetrics, specs)
|
||||
})
|
||||
|
||||
ginkgo.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition)+"; PodDisruptionConditions enabled [NodeFeature:PodDisruptionConditions]", func() {
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
pidsConsumed := int64(10000)
|
||||
summary := eventuallyGetSummary()
|
||||
availablePids := *(summary.Node.Rlimit.MaxPID) - *(summary.Node.Rlimit.NumOfRunningProcesses)
|
||||
initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalPIDAvailable): fmt.Sprintf("%d", availablePids-pidsConsumed)}
|
||||
initialConfig.EvictionMinimumReclaim = map[string]string{}
|
||||
initialConfig.FeatureGates = map[string]bool{
|
||||
string(features.PodDisruptionConditions): true,
|
||||
}
|
||||
})
|
||||
disruptionTarget := v1.AlphaNoCompatGuaranteeDisruptionTarget
|
||||
specs := []podEvictSpec{
|
||||
{
|
||||
evictionPriority: 1,
|
||||
pod: pidConsumingPod("fork-bomb-container", 30000),
|
||||
wantPodDisruptionCondition: &disruptionTarget,
|
||||
},
|
||||
}
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logPidMetrics, specs)
|
||||
})
|
||||
})
|
||||
|
||||
// Struct used by runEvictionTest that specifies the pod, and when that pod should be evicted, relative to other pods
|
||||
@ -507,8 +530,9 @@ type podEvictSpec struct {
|
||||
// P0 should never be evicted, P1 shouldn't evict before P2, etc.
|
||||
// If two are ranked at P1, either is permitted to fail before the other.
|
||||
// The test ends when all pods other than p0 have been evicted
|
||||
evictionPriority int
|
||||
pod *v1.Pod
|
||||
evictionPriority int
|
||||
pod *v1.Pod
|
||||
wantPodDisruptionCondition *v1.PodConditionType
|
||||
}
|
||||
|
||||
// runEvictionTest sets up a testing environment given the provided pods, and checks a few things:
|
||||
@ -560,6 +584,9 @@ func runEvictionTest(f *framework.Framework, pressureTimeout time.Duration, expe
|
||||
return verifyEvictionOrdering(f, testSpecs)
|
||||
}, pressureTimeout, evictionPollInterval).Should(gomega.BeNil())
|
||||
|
||||
ginkgo.By("checking for the expected pod conditions for evicted pods")
|
||||
verifyPodConditions(f, testSpecs)
|
||||
|
||||
// We observe pressure from the API server. The eviction manager observes pressure from the kubelet internal stats.
|
||||
// This means the eviction manager will observe pressure before we will, creating a delay between when the eviction manager
|
||||
// evicts a pod, and when we observe the pressure by querying the API server. Add a delay here to account for this delay
|
||||
@ -725,6 +752,21 @@ func verifyEvictionOrdering(f *framework.Framework, testSpecs []podEvictSpec) er
|
||||
return fmt.Errorf("pods that should be evicted are still running: %#v", pendingPods)
|
||||
}
|
||||
|
||||
func verifyPodConditions(f *framework.Framework, testSpecs []podEvictSpec) {
|
||||
for _, spec := range testSpecs {
|
||||
if spec.wantPodDisruptionCondition != nil {
|
||||
pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(context.TODO(), spec.pod.Name, metav1.GetOptions{})
|
||||
framework.ExpectNoError(err, "Failed to get the recent pod object for name: %q", pod.Name)
|
||||
|
||||
cType := *spec.wantPodDisruptionCondition
|
||||
podDisruptionCondition := e2epod.FindPodConditionByType(&pod.Status, cType)
|
||||
if podDisruptionCondition == nil {
|
||||
framework.Failf("pod %q should have the condition: %q, pod status: %v", pod.Name, cType, pod.Status)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func verifyEvictionEvents(f *framework.Framework, testSpecs []podEvictSpec, expectedStarvedResource v1.ResourceName) {
|
||||
for _, spec := range testSpecs {
|
||||
pod := spec.pod
|
||||
|
@ -55,6 +55,109 @@ import (
|
||||
var _ = SIGDescribe("GracefulNodeShutdown [Serial] [NodeFeature:GracefulNodeShutdown] [NodeFeature:GracefulNodeShutdownBasedOnPodPriority]", func() {
|
||||
f := framework.NewDefaultFramework("graceful-node-shutdown")
|
||||
f.NamespacePodSecurityEnforceLevel = admissionapi.LevelPrivileged
|
||||
|
||||
ginkgo.Context("graceful node shutdown when PodDisruptionConditions are enabled [NodeFeature:PodDisruptionConditions]", func() {
|
||||
|
||||
const (
|
||||
pollInterval = 1 * time.Second
|
||||
podStatusUpdateTimeout = 30 * time.Second
|
||||
nodeStatusUpdateTimeout = 30 * time.Second
|
||||
nodeShutdownGracePeriod = 30 * time.Second
|
||||
)
|
||||
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
initialConfig.FeatureGates = map[string]bool{
|
||||
string(features.GracefulNodeShutdown): true,
|
||||
string(features.PodDisruptionConditions): true,
|
||||
string(features.GracefulNodeShutdownBasedOnPodPriority): false,
|
||||
}
|
||||
initialConfig.ShutdownGracePeriod = metav1.Duration{Duration: nodeShutdownGracePeriod}
|
||||
})
|
||||
|
||||
ginkgo.BeforeEach(func() {
|
||||
ginkgo.By("Wait for the node to be ready")
|
||||
waitForNodeReady()
|
||||
})
|
||||
|
||||
ginkgo.AfterEach(func() {
|
||||
ginkgo.By("Emitting Shutdown false signal; cancelling the shutdown")
|
||||
err := emitSignalPrepareForShutdown(false)
|
||||
framework.ExpectNoError(err)
|
||||
})
|
||||
|
||||
ginkgo.It("should add the DisruptionTarget pod failure condition to the evicted pods", func() {
|
||||
nodeName := getNodeName(f)
|
||||
nodeSelector := fields.Set{
|
||||
"spec.nodeName": nodeName,
|
||||
}.AsSelector().String()
|
||||
|
||||
// Define test pods
|
||||
pods := []*v1.Pod{
|
||||
getGracePeriodOverrideTestPod("pod-to-evict", nodeName, 5, ""),
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
ginkgo.By("reating batch pods")
|
||||
e2epod.NewPodClient(f).CreateBatch(pods)
|
||||
|
||||
list, err := e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{
|
||||
FieldSelector: nodeSelector,
|
||||
})
|
||||
|
||||
framework.ExpectNoError(err)
|
||||
framework.ExpectEqual(len(list.Items), len(pods), "the number of pods is not as expected")
|
||||
|
||||
list, err = e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{
|
||||
FieldSelector: nodeSelector,
|
||||
})
|
||||
if err != nil {
|
||||
framework.Failf("Failed to start batch pod: %q", err)
|
||||
}
|
||||
framework.ExpectEqual(len(list.Items), len(pods), "the number of pods is not as expected")
|
||||
|
||||
for _, pod := range list.Items {
|
||||
framework.Logf("Pod %q status conditions: %q", pod.Name, &pod.Status.Conditions)
|
||||
}
|
||||
|
||||
ginkgo.By("Verifying batch pods are running")
|
||||
for _, pod := range list.Items {
|
||||
if podReady, err := testutils.PodRunningReady(&pod); err != nil || !podReady {
|
||||
framework.Failf("Failed to start batch pod: %v", pod.Name)
|
||||
}
|
||||
}
|
||||
|
||||
ginkgo.By("Emitting shutdown signal")
|
||||
err = emitSignalPrepareForShutdown(true)
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
ginkgo.By("Verifying that all pods are shutdown")
|
||||
// All pod should be shutdown
|
||||
gomega.Eventually(func() error {
|
||||
list, err = e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{
|
||||
FieldSelector: nodeSelector,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
framework.ExpectEqual(len(list.Items), len(pods), "the number of pods is not as expected")
|
||||
|
||||
for _, pod := range list.Items {
|
||||
if !isPodShutdown(&pod) {
|
||||
framework.Logf("Expecting pod to be shutdown, but it's not currently: Pod: %q, Pod Status %+v", pod.Name, pod.Status)
|
||||
return fmt.Errorf("pod should be shutdown, phase: %s", pod.Status.Phase)
|
||||
}
|
||||
podDisruptionCondition := e2epod.FindPodConditionByType(&pod.Status, v1.AlphaNoCompatGuaranteeDisruptionTarget)
|
||||
if podDisruptionCondition == nil {
|
||||
framework.Failf("pod %q should have the condition: %q, pod status: %v", pod.Name, v1.AlphaNoCompatGuaranteeDisruptionTarget, pod.Status)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}, podStatusUpdateTimeout+(nodeShutdownGracePeriod), pollInterval).Should(gomega.BeNil())
|
||||
})
|
||||
})
|
||||
|
||||
ginkgo.Context("when gracefully shutting down", func() {
|
||||
|
||||
const (
|
||||
|
@ -668,9 +668,12 @@ func TestStalePodDisruption(t *testing.T) {
|
||||
|
||||
cases := map[string]struct {
|
||||
deletePod bool
|
||||
podPhase v1.PodPhase
|
||||
reason string
|
||||
wantConditions []v1.PodCondition
|
||||
}{
|
||||
"stale-condition": {
|
||||
podPhase: v1.PodRunning,
|
||||
wantConditions: []v1.PodCondition{
|
||||
{
|
||||
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
|
||||
@ -679,6 +682,7 @@ func TestStalePodDisruption(t *testing.T) {
|
||||
},
|
||||
},
|
||||
"deleted-pod": {
|
||||
podPhase: v1.PodRunning,
|
||||
deletePod: true,
|
||||
wantConditions: []v1.PodCondition{
|
||||
{
|
||||
@ -687,6 +691,26 @@ func TestStalePodDisruption(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
"disruption-condition-by-kubelet": {
|
||||
podPhase: v1.PodFailed,
|
||||
reason: v1.AlphaNoCompatGuaranteePodReasonTerminationByKubelet,
|
||||
wantConditions: []v1.PodCondition{
|
||||
{
|
||||
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: v1.AlphaNoCompatGuaranteePodReasonTerminationByKubelet,
|
||||
},
|
||||
},
|
||||
},
|
||||
"disruption-condition-on-failed-pod": {
|
||||
podPhase: v1.PodFailed,
|
||||
wantConditions: []v1.PodCondition{
|
||||
{
|
||||
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for name, tc := range cases {
|
||||
@ -702,10 +726,11 @@ func TestStalePodDisruption(t *testing.T) {
|
||||
t.Fatalf("Failed creating pod: %v", err)
|
||||
}
|
||||
|
||||
pod.Status.Phase = v1.PodRunning
|
||||
pod.Status.Phase = tc.podPhase
|
||||
pod.Status.Conditions = append(pod.Status.Conditions, v1.PodCondition{
|
||||
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: tc.reason,
|
||||
LastTransitionTime: metav1.Now(),
|
||||
})
|
||||
pod, err = clientSet.CoreV1().Pods(nsName).UpdateStatus(ctx, pod, metav1.UpdateOptions{})
|
||||
|
Loading…
Reference in New Issue
Block a user