mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 11:50:44 +00:00
Merge pull request #17741 from freehan/nodedown
Auto commit by PR queue bot
This commit is contained in:
commit
0834890c42
@ -35,6 +35,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/controller"
|
||||
"k8s.io/kubernetes/pkg/controller/framework"
|
||||
"k8s.io/kubernetes/pkg/fields"
|
||||
"k8s.io/kubernetes/pkg/kubelet/util/format"
|
||||
"k8s.io/kubernetes/pkg/runtime"
|
||||
"k8s.io/kubernetes/pkg/types"
|
||||
"k8s.io/kubernetes/pkg/util"
|
||||
@ -489,6 +490,9 @@ func (nc *NodeController) monitorNodeStatus() error {
|
||||
// Report node event.
|
||||
if readyCondition.Status != api.ConditionTrue && lastReadyCondition.Status == api.ConditionTrue {
|
||||
nc.recordNodeStatusChange(node, "NodeNotReady")
|
||||
if err = nc.markAllPodsNotReady(node.Name); err != nil {
|
||||
util.HandleError(fmt.Errorf("Unable to mark all pods NotReady on node %v: %v", node.Name, err))
|
||||
}
|
||||
}
|
||||
|
||||
// Check with the cloud provider to see if the node still exists. If it
|
||||
@ -832,6 +836,42 @@ func (nc *NodeController) deletePods(nodeName string) (bool, error) {
|
||||
return remaining, nil
|
||||
}
|
||||
|
||||
// update ready status of all pods running on given node from master
|
||||
// return true if success
|
||||
func (nc *NodeController) markAllPodsNotReady(nodeName string) error {
|
||||
glog.V(2).Infof("Update ready status of pods on node [%v]", nodeName)
|
||||
opts := api.ListOptions{FieldSelector: fields.OneTermEqualSelector(client.PodHost, nodeName)}
|
||||
pods, err := nc.kubeClient.Pods(api.NamespaceAll).List(opts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
errMsg := []string{}
|
||||
for _, pod := range pods.Items {
|
||||
// Defensive check, also needed for tests.
|
||||
if pod.Spec.NodeName != nodeName {
|
||||
continue
|
||||
}
|
||||
|
||||
for i, cond := range pod.Status.Conditions {
|
||||
if cond.Type == api.PodReady {
|
||||
pod.Status.Conditions[i].Status = api.ConditionFalse
|
||||
glog.V(2).Infof("Updating ready status of pod %v to false", pod.Name)
|
||||
pod, err := nc.kubeClient.Pods(pod.Namespace).UpdateStatus(&pod)
|
||||
if err != nil {
|
||||
glog.Warningf("Failed to updated status for pod %q: %v", format.Pod(pod), err)
|
||||
errMsg = append(errMsg, fmt.Sprintf("%v", err))
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(errMsg) == 0 {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("%v", strings.Join(errMsg, "; "))
|
||||
}
|
||||
|
||||
// terminatePods will ensure all pods on the given node that are in terminating state are eventually
|
||||
// cleaned up. Returns true if the node has no pods in terminating state, a duration that indicates how
|
||||
// long before we should check again (the next deadline for a pod to complete), or an error.
|
||||
|
@ -595,6 +595,157 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestMonitorNodeStatusMarkPodsNotReady(t *testing.T) {
|
||||
fakeNow := unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
|
||||
table := []struct {
|
||||
fakeNodeHandler *FakeNodeHandler
|
||||
timeToPass time.Duration
|
||||
newNodeStatus api.NodeStatus
|
||||
expectedPodStatusUpdate bool
|
||||
}{
|
||||
// Node created recently, without status.
|
||||
// Expect no action from node controller (within startup grace period).
|
||||
{
|
||||
fakeNodeHandler: &FakeNodeHandler{
|
||||
Existing: []*api.Node{
|
||||
{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: "node0",
|
||||
CreationTimestamp: fakeNow,
|
||||
},
|
||||
},
|
||||
},
|
||||
Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
|
||||
},
|
||||
expectedPodStatusUpdate: false,
|
||||
},
|
||||
// Node created long time ago, with status updated recently.
|
||||
// Expect no action from node controller (within monitor grace period).
|
||||
{
|
||||
fakeNodeHandler: &FakeNodeHandler{
|
||||
Existing: []*api.Node{
|
||||
{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: "node0",
|
||||
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||
},
|
||||
Status: api.NodeStatus{
|
||||
Conditions: []api.NodeCondition{
|
||||
{
|
||||
Type: api.NodeReady,
|
||||
Status: api.ConditionTrue,
|
||||
// Node status has just been updated.
|
||||
LastHeartbeatTime: fakeNow,
|
||||
LastTransitionTime: fakeNow,
|
||||
},
|
||||
},
|
||||
Capacity: api.ResourceList{
|
||||
api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
|
||||
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
|
||||
},
|
||||
},
|
||||
Spec: api.NodeSpec{
|
||||
ExternalID: "node0",
|
||||
},
|
||||
},
|
||||
},
|
||||
Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
|
||||
},
|
||||
expectedPodStatusUpdate: false,
|
||||
},
|
||||
// Node created long time ago, with status updated by kubelet exceeds grace period.
|
||||
// Expect pods status updated and Unknown node status posted from node controller
|
||||
{
|
||||
fakeNodeHandler: &FakeNodeHandler{
|
||||
Existing: []*api.Node{
|
||||
{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: "node0",
|
||||
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||
},
|
||||
Status: api.NodeStatus{
|
||||
Conditions: []api.NodeCondition{
|
||||
{
|
||||
Type: api.NodeReady,
|
||||
Status: api.ConditionTrue,
|
||||
// Node status hasn't been updated for 1hr.
|
||||
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||
},
|
||||
{
|
||||
Type: api.NodeOutOfDisk,
|
||||
Status: api.ConditionFalse,
|
||||
// Node status hasn't been updated for 1hr.
|
||||
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||
},
|
||||
},
|
||||
Capacity: api.ResourceList{
|
||||
api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
|
||||
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
|
||||
},
|
||||
},
|
||||
Spec: api.NodeSpec{
|
||||
ExternalID: "node0",
|
||||
},
|
||||
},
|
||||
},
|
||||
Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
|
||||
},
|
||||
timeToPass: 1 * time.Minute,
|
||||
newNodeStatus: api.NodeStatus{
|
||||
Conditions: []api.NodeCondition{
|
||||
{
|
||||
Type: api.NodeReady,
|
||||
Status: api.ConditionTrue,
|
||||
// Node status hasn't been updated for 1hr.
|
||||
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||
},
|
||||
{
|
||||
Type: api.NodeOutOfDisk,
|
||||
Status: api.ConditionFalse,
|
||||
// Node status hasn't been updated for 1hr.
|
||||
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||
},
|
||||
},
|
||||
Capacity: api.ResourceList{
|
||||
api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
|
||||
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
|
||||
},
|
||||
},
|
||||
expectedPodStatusUpdate: true,
|
||||
},
|
||||
}
|
||||
|
||||
for i, item := range table {
|
||||
nodeController := NewNodeController(nil, item.fakeNodeHandler, 5*time.Minute, util.NewFakeRateLimiter(),
|
||||
util.NewFakeRateLimiter(), testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, false)
|
||||
nodeController.now = func() unversioned.Time { return fakeNow }
|
||||
if err := nodeController.monitorNodeStatus(); err != nil {
|
||||
t.Errorf("Case[%d] unexpected error: %v", i, err)
|
||||
}
|
||||
if item.timeToPass > 0 {
|
||||
nodeController.now = func() unversioned.Time { return unversioned.Time{Time: fakeNow.Add(item.timeToPass)} }
|
||||
item.fakeNodeHandler.Existing[0].Status = item.newNodeStatus
|
||||
if err := nodeController.monitorNodeStatus(); err != nil {
|
||||
t.Errorf("Case[%d] unexpected error: %v", i, err)
|
||||
}
|
||||
}
|
||||
|
||||
podStatusUpdated := false
|
||||
for _, action := range item.fakeNodeHandler.Actions() {
|
||||
if action.GetVerb() == "update" && action.GetResource() == "pods" && action.GetSubresource() == "status" {
|
||||
podStatusUpdated = true
|
||||
}
|
||||
}
|
||||
if podStatusUpdated != item.expectedPodStatusUpdate {
|
||||
t.Errorf("Case[%d] expect pod status updated to be %v, but got %v", i, item.expectedPodStatusUpdate, podStatusUpdated)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestNodeDeletion(t *testing.T) {
|
||||
fakeNow := unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
|
||||
fakeNodeHandler := &FakeNodeHandler{
|
||||
@ -831,7 +982,8 @@ func newNode(name string) *api.Node {
|
||||
}
|
||||
|
||||
func newPod(name, host string) *api.Pod {
|
||||
return &api.Pod{ObjectMeta: api.ObjectMeta{Name: name}, Spec: api.PodSpec{NodeName: host}}
|
||||
return &api.Pod{ObjectMeta: api.ObjectMeta{Name: name}, Spec: api.PodSpec{NodeName: host},
|
||||
Status: api.PodStatus{Conditions: []api.PodCondition{{Type: api.PodReady, Status: api.ConditionTrue}}}}
|
||||
}
|
||||
|
||||
func contains(node *api.Node, nodes []*api.Node) bool {
|
||||
|
Loading…
Reference in New Issue
Block a user