mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-30 15:05:27 +00:00
Merge pull request #36017 from foxish/kubectl-new-2
Automatic merge from submit-queue Set reason and message on Pod during nodecontroller eviction **What this PR does / why we need it**: Pods which are evicted by the nodecontroller due to network partition, or unresponsive kubelet should be differentiated from termination initiated by other sources. The reason/message are consumed by kubectl to provide a better summary using get/describe. **Which issue this PR fixes** *(optional, in `fixes #<issue number>(, #<issue_number>, ...)` format, will close that issue when PR gets merged)*: fixes #35725 **Release note**: ```release-note Pods that are terminating due to eviction by the nodecontroller (typically due to unresponsive kubelet, or network partition) now surface in `kubectl get` output as being in state "Unknown", along with a longer description in `kubectl describe` output. ```
This commit is contained in:
commit
f2b5600567
@ -41,6 +41,7 @@ go_library(
|
|||||||
"//pkg/runtime:go_default_library",
|
"//pkg/runtime:go_default_library",
|
||||||
"//pkg/types:go_default_library",
|
"//pkg/types:go_default_library",
|
||||||
"//pkg/util/clock:go_default_library",
|
"//pkg/util/clock:go_default_library",
|
||||||
|
"//pkg/util/errors:go_default_library",
|
||||||
"//pkg/util/flowcontrol:go_default_library",
|
"//pkg/util/flowcontrol:go_default_library",
|
||||||
"//pkg/util/metrics:go_default_library",
|
"//pkg/util/metrics:go_default_library",
|
||||||
"//pkg/util/node:go_default_library",
|
"//pkg/util/node:go_default_library",
|
||||||
@ -73,6 +74,7 @@ go_test(
|
|||||||
"//pkg/client/cache:go_default_library",
|
"//pkg/client/cache:go_default_library",
|
||||||
"//pkg/client/clientset_generated/internalclientset:go_default_library",
|
"//pkg/client/clientset_generated/internalclientset:go_default_library",
|
||||||
"//pkg/client/clientset_generated/internalclientset/fake:go_default_library",
|
"//pkg/client/clientset_generated/internalclientset/fake:go_default_library",
|
||||||
|
"//pkg/client/testing/core:go_default_library",
|
||||||
"//pkg/cloudprovider:go_default_library",
|
"//pkg/cloudprovider:go_default_library",
|
||||||
"//pkg/cloudprovider/providers/fake:go_default_library",
|
"//pkg/cloudprovider/providers/fake:go_default_library",
|
||||||
"//pkg/controller:go_default_library",
|
"//pkg/controller:go_default_library",
|
||||||
@ -80,6 +82,7 @@ go_test(
|
|||||||
"//pkg/types:go_default_library",
|
"//pkg/types:go_default_library",
|
||||||
"//pkg/util/diff:go_default_library",
|
"//pkg/util/diff:go_default_library",
|
||||||
"//pkg/util/flowcontrol:go_default_library",
|
"//pkg/util/flowcontrol:go_default_library",
|
||||||
|
"//pkg/util/node:go_default_library",
|
||||||
"//pkg/util/sets:go_default_library",
|
"//pkg/util/sets:go_default_library",
|
||||||
"//pkg/util/wait:go_default_library",
|
"//pkg/util/wait:go_default_library",
|
||||||
"//vendor:github.com/golang/glog",
|
"//vendor:github.com/golang/glog",
|
||||||
|
@ -21,6 +21,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"k8s.io/kubernetes/pkg/api"
|
"k8s.io/kubernetes/pkg/api"
|
||||||
|
"k8s.io/kubernetes/pkg/api/errors"
|
||||||
"k8s.io/kubernetes/pkg/client/cache"
|
"k8s.io/kubernetes/pkg/client/cache"
|
||||||
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
|
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
|
||||||
"k8s.io/kubernetes/pkg/client/record"
|
"k8s.io/kubernetes/pkg/client/record"
|
||||||
@ -28,6 +29,8 @@ import (
|
|||||||
"k8s.io/kubernetes/pkg/fields"
|
"k8s.io/kubernetes/pkg/fields"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/util/format"
|
"k8s.io/kubernetes/pkg/kubelet/util/format"
|
||||||
"k8s.io/kubernetes/pkg/types"
|
"k8s.io/kubernetes/pkg/types"
|
||||||
|
utilerrors "k8s.io/kubernetes/pkg/util/errors"
|
||||||
|
"k8s.io/kubernetes/pkg/util/node"
|
||||||
utilruntime "k8s.io/kubernetes/pkg/util/runtime"
|
utilruntime "k8s.io/kubernetes/pkg/util/runtime"
|
||||||
"k8s.io/kubernetes/pkg/version"
|
"k8s.io/kubernetes/pkg/version"
|
||||||
|
|
||||||
@ -46,6 +49,8 @@ func deletePods(kubeClient clientset.Interface, recorder record.EventRecorder, n
|
|||||||
selector := fields.OneTermEqualSelector(api.PodHostField, nodeName)
|
selector := fields.OneTermEqualSelector(api.PodHostField, nodeName)
|
||||||
options := api.ListOptions{FieldSelector: selector}
|
options := api.ListOptions{FieldSelector: selector}
|
||||||
pods, err := kubeClient.Core().Pods(api.NamespaceAll).List(options)
|
pods, err := kubeClient.Core().Pods(api.NamespaceAll).List(options)
|
||||||
|
var updateErrList []error
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return remaining, err
|
return remaining, err
|
||||||
}
|
}
|
||||||
@ -59,6 +64,15 @@ func deletePods(kubeClient clientset.Interface, recorder record.EventRecorder, n
|
|||||||
if pod.Spec.NodeName != nodeName {
|
if pod.Spec.NodeName != nodeName {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Set reason and message in the pod object.
|
||||||
|
if _, err = setPodTerminationReason(kubeClient, &pod, nodeName); err != nil {
|
||||||
|
if errors.IsConflict(err) {
|
||||||
|
updateErrList = append(updateErrList,
|
||||||
|
fmt.Errorf("update status failed for pod %q: %v", format.Pod(&pod), err))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
// if the pod has already been marked for deletion, we still return true that there are remaining pods.
|
// if the pod has already been marked for deletion, we still return true that there are remaining pods.
|
||||||
if pod.DeletionGracePeriodSeconds != nil {
|
if pod.DeletionGracePeriodSeconds != nil {
|
||||||
remaining = true
|
remaining = true
|
||||||
@ -77,9 +91,31 @@ func deletePods(kubeClient clientset.Interface, recorder record.EventRecorder, n
|
|||||||
}
|
}
|
||||||
remaining = true
|
remaining = true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if len(updateErrList) > 0 {
|
||||||
|
return false, utilerrors.NewAggregate(updateErrList)
|
||||||
|
}
|
||||||
return remaining, nil
|
return remaining, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// setPodTerminationReason attempts to set a reason and message in the pod status, updates it in the apiserver,
|
||||||
|
// and returns an error if it encounters one.
|
||||||
|
func setPodTerminationReason(kubeClient clientset.Interface, pod *api.Pod, nodeName string) (*api.Pod, error) {
|
||||||
|
if pod.Status.Reason == node.NodeUnreachablePodReason {
|
||||||
|
return pod, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
pod.Status.Reason = node.NodeUnreachablePodReason
|
||||||
|
pod.Status.Message = fmt.Sprintf(node.NodeUnreachablePodMessage, nodeName, pod.Name)
|
||||||
|
|
||||||
|
var updatedPod *api.Pod
|
||||||
|
var err error
|
||||||
|
if updatedPod, err = kubeClient.Core().Pods(pod.Namespace).UpdateStatus(pod); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return updatedPod, nil
|
||||||
|
}
|
||||||
|
|
||||||
func forcefullyDeletePod(c clientset.Interface, pod *api.Pod) error {
|
func forcefullyDeletePod(c clientset.Interface, pod *api.Pod) error {
|
||||||
var zero int64
|
var zero int64
|
||||||
err := c.Core().Pods(pod.Namespace).Delete(pod.Name, &api.DeleteOptions{GracePeriodSeconds: &zero})
|
err := c.Core().Pods(pod.Namespace).Delete(pod.Name, &api.DeleteOptions{GracePeriodSeconds: &zero})
|
||||||
|
@ -29,12 +29,14 @@ import (
|
|||||||
"k8s.io/kubernetes/pkg/client/cache"
|
"k8s.io/kubernetes/pkg/client/cache"
|
||||||
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
|
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
|
||||||
"k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/fake"
|
"k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/fake"
|
||||||
|
testcore "k8s.io/kubernetes/pkg/client/testing/core"
|
||||||
"k8s.io/kubernetes/pkg/cloudprovider"
|
"k8s.io/kubernetes/pkg/cloudprovider"
|
||||||
fakecloud "k8s.io/kubernetes/pkg/cloudprovider/providers/fake"
|
fakecloud "k8s.io/kubernetes/pkg/cloudprovider/providers/fake"
|
||||||
"k8s.io/kubernetes/pkg/controller"
|
"k8s.io/kubernetes/pkg/controller"
|
||||||
"k8s.io/kubernetes/pkg/controller/informers"
|
"k8s.io/kubernetes/pkg/controller/informers"
|
||||||
"k8s.io/kubernetes/pkg/types"
|
"k8s.io/kubernetes/pkg/types"
|
||||||
"k8s.io/kubernetes/pkg/util/diff"
|
"k8s.io/kubernetes/pkg/util/diff"
|
||||||
|
"k8s.io/kubernetes/pkg/util/node"
|
||||||
"k8s.io/kubernetes/pkg/util/wait"
|
"k8s.io/kubernetes/pkg/util/wait"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -538,6 +540,145 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestPodStatusChange(t *testing.T) {
|
||||||
|
fakeNow := unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
|
||||||
|
evictionTimeout := 10 * time.Minute
|
||||||
|
|
||||||
|
// Because of the logic that prevents NC from evicting anything when all Nodes are NotReady
|
||||||
|
// we need second healthy node in tests. Because of how the tests are written we need to update
|
||||||
|
// the status of this Node.
|
||||||
|
healthyNodeNewStatus := api.NodeStatus{
|
||||||
|
Conditions: []api.NodeCondition{
|
||||||
|
{
|
||||||
|
Type: api.NodeReady,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
// Node status has just been updated, and is NotReady for 10min.
|
||||||
|
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 9, 0, 0, time.UTC),
|
||||||
|
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
// Node created long time ago, node controller posted Unknown for a long period of time.
|
||||||
|
table := []struct {
|
||||||
|
fakeNodeHandler *FakeNodeHandler
|
||||||
|
daemonSets []extensions.DaemonSet
|
||||||
|
timeToPass time.Duration
|
||||||
|
newNodeStatus api.NodeStatus
|
||||||
|
secondNodeNewStatus api.NodeStatus
|
||||||
|
expectedPodUpdate bool
|
||||||
|
expectedReason string
|
||||||
|
description string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
fakeNodeHandler: &FakeNodeHandler{
|
||||||
|
Existing: []*api.Node{
|
||||||
|
{
|
||||||
|
ObjectMeta: api.ObjectMeta{
|
||||||
|
Name: "node0",
|
||||||
|
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||||
|
Labels: map[string]string{
|
||||||
|
unversioned.LabelZoneRegion: "region1",
|
||||||
|
unversioned.LabelZoneFailureDomain: "zone1",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Status: api.NodeStatus{
|
||||||
|
Conditions: []api.NodeCondition{
|
||||||
|
{
|
||||||
|
Type: api.NodeReady,
|
||||||
|
Status: api.ConditionUnknown,
|
||||||
|
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ObjectMeta: api.ObjectMeta{
|
||||||
|
Name: "node1",
|
||||||
|
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||||
|
Labels: map[string]string{
|
||||||
|
unversioned.LabelZoneRegion: "region1",
|
||||||
|
unversioned.LabelZoneFailureDomain: "zone1",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Status: api.NodeStatus{
|
||||||
|
Conditions: []api.NodeCondition{
|
||||||
|
{
|
||||||
|
Type: api.NodeReady,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
|
||||||
|
},
|
||||||
|
timeToPass: 60 * time.Minute,
|
||||||
|
newNodeStatus: api.NodeStatus{
|
||||||
|
Conditions: []api.NodeCondition{
|
||||||
|
{
|
||||||
|
Type: api.NodeReady,
|
||||||
|
Status: api.ConditionUnknown,
|
||||||
|
// Node status was updated by nodecontroller 1hr ago
|
||||||
|
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
secondNodeNewStatus: healthyNodeNewStatus,
|
||||||
|
expectedPodUpdate: true,
|
||||||
|
expectedReason: node.NodeUnreachablePodReason,
|
||||||
|
description: "Node created long time ago, node controller posted Unknown for a " +
|
||||||
|
"long period of time, the pod status must include reason for termination.",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, item := range table {
|
||||||
|
nodeController, _ := NewNodeControllerFromClient(nil, item.fakeNodeHandler,
|
||||||
|
evictionTimeout, testRateLimiterQPS, testRateLimiterQPS, testLargeClusterThreshold, testUnhealtyThreshold, testNodeMonitorGracePeriod,
|
||||||
|
testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, nil, 0, false)
|
||||||
|
nodeController.now = func() unversioned.Time { return fakeNow }
|
||||||
|
if err := nodeController.monitorNodeStatus(); err != nil {
|
||||||
|
t.Errorf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if item.timeToPass > 0 {
|
||||||
|
nodeController.now = func() unversioned.Time { return unversioned.Time{Time: fakeNow.Add(item.timeToPass)} }
|
||||||
|
item.fakeNodeHandler.Existing[0].Status = item.newNodeStatus
|
||||||
|
item.fakeNodeHandler.Existing[1].Status = item.secondNodeNewStatus
|
||||||
|
}
|
||||||
|
if err := nodeController.monitorNodeStatus(); err != nil {
|
||||||
|
t.Errorf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
zones := getZones(item.fakeNodeHandler)
|
||||||
|
for _, zone := range zones {
|
||||||
|
nodeController.zonePodEvictor[zone].Try(func(value TimedValue) (bool, time.Duration) {
|
||||||
|
nodeUid, _ := value.UID.(string)
|
||||||
|
deletePods(item.fakeNodeHandler, nodeController.recorder, value.Value, nodeUid, nodeController.daemonSetStore)
|
||||||
|
return true, 0
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
podReasonUpdate := false
|
||||||
|
for _, action := range item.fakeNodeHandler.Actions() {
|
||||||
|
if action.GetVerb() == "update" && action.GetResource().Resource == "pods" {
|
||||||
|
updateReason := action.(testcore.UpdateActionImpl).GetObject().(*api.Pod).Status.Reason
|
||||||
|
podReasonUpdate = true
|
||||||
|
if updateReason != item.expectedReason {
|
||||||
|
t.Errorf("expected pod status reason: %+v, got %+v for %+v", item.expectedReason, updateReason, item.description)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if podReasonUpdate != item.expectedPodUpdate {
|
||||||
|
t.Errorf("expected pod update: %+v, got %+v for %+v", podReasonUpdate, item.expectedPodUpdate, item.description)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
func TestMonitorNodeStatusEvictPodsWithDisruption(t *testing.T) {
|
func TestMonitorNodeStatusEvictPodsWithDisruption(t *testing.T) {
|
||||||
fakeNow := unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
|
fakeNow := unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
|
||||||
evictionTimeout := 10 * time.Minute
|
evictionTimeout := 10 * time.Minute
|
||||||
|
@ -95,6 +95,7 @@ go_library(
|
|||||||
"//pkg/util/integer:go_default_library",
|
"//pkg/util/integer:go_default_library",
|
||||||
"//pkg/util/intstr:go_default_library",
|
"//pkg/util/intstr:go_default_library",
|
||||||
"//pkg/util/jsonpath:go_default_library",
|
"//pkg/util/jsonpath:go_default_library",
|
||||||
|
"//pkg/util/node:go_default_library",
|
||||||
"//pkg/util/sets:go_default_library",
|
"//pkg/util/sets:go_default_library",
|
||||||
"//pkg/util/slice:go_default_library",
|
"//pkg/util/slice:go_default_library",
|
||||||
"//pkg/util/uuid:go_default_library",
|
"//pkg/util/uuid:go_default_library",
|
||||||
|
@ -30,8 +30,6 @@ import (
|
|||||||
"text/template"
|
"text/template"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/ghodss/yaml"
|
|
||||||
"github.com/golang/glog"
|
|
||||||
"k8s.io/kubernetes/federation/apis/federation"
|
"k8s.io/kubernetes/federation/apis/federation"
|
||||||
"k8s.io/kubernetes/pkg/api"
|
"k8s.io/kubernetes/pkg/api"
|
||||||
"k8s.io/kubernetes/pkg/api/events"
|
"k8s.io/kubernetes/pkg/api/events"
|
||||||
@ -49,7 +47,11 @@ import (
|
|||||||
"k8s.io/kubernetes/pkg/runtime"
|
"k8s.io/kubernetes/pkg/runtime"
|
||||||
utilerrors "k8s.io/kubernetes/pkg/util/errors"
|
utilerrors "k8s.io/kubernetes/pkg/util/errors"
|
||||||
"k8s.io/kubernetes/pkg/util/jsonpath"
|
"k8s.io/kubernetes/pkg/util/jsonpath"
|
||||||
|
"k8s.io/kubernetes/pkg/util/node"
|
||||||
"k8s.io/kubernetes/pkg/util/sets"
|
"k8s.io/kubernetes/pkg/util/sets"
|
||||||
|
|
||||||
|
"github.com/ghodss/yaml"
|
||||||
|
"github.com/golang/glog"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@ -731,7 +733,10 @@ func printPodBase(pod *api.Pod, w io.Writer, options PrintOptions) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if pod.DeletionTimestamp != nil {
|
|
||||||
|
if pod.DeletionTimestamp != nil && pod.Status.Reason == node.NodeUnreachablePodReason {
|
||||||
|
reason = "Unknown"
|
||||||
|
} else if pod.DeletionTimestamp != nil {
|
||||||
reason = "Terminating"
|
reason = "Terminating"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -31,6 +31,13 @@ import (
|
|||||||
"k8s.io/kubernetes/pkg/types"
|
"k8s.io/kubernetes/pkg/types"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// The reason and message set on a pod when its state cannot be confirmed as kubelet is unresponsive
|
||||||
|
// on the node it is (was) running.
|
||||||
|
NodeUnreachablePodReason = "NodeLost"
|
||||||
|
NodeUnreachablePodMessage = "Node %v which was running pod %v is unresponsive"
|
||||||
|
)
|
||||||
|
|
||||||
func GetHostname(hostnameOverride string) string {
|
func GetHostname(hostnameOverride string) string {
|
||||||
var hostname string = hostnameOverride
|
var hostname string = hostnameOverride
|
||||||
if hostname == "" {
|
if hostname == "" {
|
||||||
|
Loading…
Reference in New Issue
Block a user