mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-28 14:07:14 +00:00
Merge pull request #90737 from jiahuif/node-lifecyecle-controller-shutdown-order
CloudNodeLifecycleController: check node existence before shutdown status
This commit is contained in:
commit
68cbb35ebc
@ -147,24 +147,6 @@ func (c *CloudNodeLifecycleController) MonitorNodes() {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// we need to check this first to get taint working in similar in all cloudproviders
|
|
||||||
// current problem is that shutdown nodes are not working in similar way ie. all cloudproviders
|
|
||||||
// does not delete node from kubernetes cluster when instance it is shutdown see issue #46442
|
|
||||||
shutdown, err := shutdownInCloudProvider(context.TODO(), c.cloud, node)
|
|
||||||
if err != nil {
|
|
||||||
klog.Errorf("error checking if node %s is shutdown: %v", node.Name, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if shutdown && err == nil {
|
|
||||||
// if node is shutdown add shutdown taint
|
|
||||||
err = controller.AddOrUpdateTaintOnNode(c.kubeClient, node.Name, ShutdownTaint)
|
|
||||||
if err != nil {
|
|
||||||
klog.Errorf("failed to apply shutdown taint to node %s, it may have been deleted.", node.Name)
|
|
||||||
}
|
|
||||||
// Continue checking the remaining nodes since the current one is shutdown.
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// At this point the node has NotReady status, we need to check if the node has been removed
|
// At this point the node has NotReady status, we need to check if the node has been removed
|
||||||
// from the cloud provider. If node cannot be found in cloudprovider, then delete the node
|
// from the cloud provider. If node cannot be found in cloudprovider, then delete the node
|
||||||
exists, err := ensureNodeExistsByProviderID(context.TODO(), instances, node)
|
exists, err := ensureNodeExistsByProviderID(context.TODO(), instances, node)
|
||||||
@ -173,26 +155,41 @@ func (c *CloudNodeLifecycleController) MonitorNodes() {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if exists {
|
if !exists {
|
||||||
// Continue checking the remaining nodes since the current one is fine.
|
// Current node does not exist, we should delete it, its taints do not matter anymore
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
klog.V(2).Infof("deleting node since it is no longer present in cloud provider: %s", node.Name)
|
klog.V(2).Infof("deleting node since it is no longer present in cloud provider: %s", node.Name)
|
||||||
|
|
||||||
ref := &v1.ObjectReference{
|
ref := &v1.ObjectReference{
|
||||||
Kind: "Node",
|
Kind: "Node",
|
||||||
Name: node.Name,
|
Name: node.Name,
|
||||||
UID: types.UID(node.UID),
|
UID: types.UID(node.UID),
|
||||||
Namespace: "",
|
Namespace: "",
|
||||||
}
|
}
|
||||||
|
|
||||||
c.recorder.Eventf(ref, v1.EventTypeNormal,
|
c.recorder.Eventf(ref, v1.EventTypeNormal,
|
||||||
fmt.Sprintf("Deleting node %v because it does not exist in the cloud provider", node.Name),
|
fmt.Sprintf("Deleting node %v because it does not exist in the cloud provider", node.Name),
|
||||||
"Node %s event: %s", node.Name, deleteNodeEvent)
|
"Node %s event: %s", node.Name, deleteNodeEvent)
|
||||||
|
|
||||||
if err := c.kubeClient.CoreV1().Nodes().Delete(context.TODO(), node.Name, metav1.DeleteOptions{}); err != nil {
|
if err := c.kubeClient.CoreV1().Nodes().Delete(context.TODO(), node.Name, metav1.DeleteOptions{}); err != nil {
|
||||||
klog.Errorf("unable to delete node %q: %v", node.Name, err)
|
klog.Errorf("unable to delete node %q: %v", node.Name, err)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Node exists. We need to check this to get taint working in similar in all cloudproviders
|
||||||
|
// current problem is that shutdown nodes are not working in similar way ie. all cloudproviders
|
||||||
|
// does not delete node from kubernetes cluster when instance it is shutdown see issue #46442
|
||||||
|
shutdown, err := shutdownInCloudProvider(context.TODO(), c.cloud, node)
|
||||||
|
if err != nil {
|
||||||
|
klog.Errorf("error checking if node %s is shutdown: %v", node.Name, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if shutdown && err == nil {
|
||||||
|
// if node is shutdown add shutdown taint
|
||||||
|
err = controller.AddOrUpdateTaintOnNode(c.kubeClient, node.Name, ShutdownTaint)
|
||||||
|
if err != nil {
|
||||||
|
klog.Errorf("failed to apply shutdown taint to node %s, it may have been deleted.", node.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -275,7 +275,7 @@ func Test_NodesShutdown(t *testing.T) {
|
|||||||
updatedNodes []*v1.Node
|
updatedNodes []*v1.Node
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
name: "node is not ready and was shutdown",
|
name: "node is not ready and was shutdown, but exists",
|
||||||
fnh: &testutil.FakeNodeHandler{
|
fnh: &testutil.FakeNodeHandler{
|
||||||
Existing: []*v1.Node{
|
Existing: []*v1.Node{
|
||||||
{
|
{
|
||||||
@ -283,6 +283,9 @@ func Test_NodesShutdown(t *testing.T) {
|
|||||||
Name: "node0",
|
Name: "node0",
|
||||||
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.Local),
|
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.Local),
|
||||||
},
|
},
|
||||||
|
Spec: v1.NodeSpec{
|
||||||
|
ProviderID: "node0",
|
||||||
|
},
|
||||||
Status: v1.NodeStatus{
|
Status: v1.NodeStatus{
|
||||||
Conditions: []v1.NodeCondition{
|
Conditions: []v1.NodeCondition{
|
||||||
{
|
{
|
||||||
@ -300,6 +303,7 @@ func Test_NodesShutdown(t *testing.T) {
|
|||||||
},
|
},
|
||||||
fakeCloud: &fakecloud.Cloud{
|
fakeCloud: &fakecloud.Cloud{
|
||||||
NodeShutdown: true,
|
NodeShutdown: true,
|
||||||
|
ExistsByProviderID: true,
|
||||||
ErrShutdownByProviderID: nil,
|
ErrShutdownByProviderID: nil,
|
||||||
},
|
},
|
||||||
updatedNodes: []*v1.Node{
|
updatedNodes: []*v1.Node{
|
||||||
@ -309,6 +313,7 @@ func Test_NodesShutdown(t *testing.T) {
|
|||||||
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.Local),
|
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.Local),
|
||||||
},
|
},
|
||||||
Spec: v1.NodeSpec{
|
Spec: v1.NodeSpec{
|
||||||
|
ProviderID: "node0",
|
||||||
Taints: []v1.Taint{
|
Taints: []v1.Taint{
|
||||||
*ShutdownTaint,
|
*ShutdownTaint,
|
||||||
},
|
},
|
||||||
@ -416,6 +421,37 @@ func Test_NodesShutdown(t *testing.T) {
|
|||||||
},
|
},
|
||||||
updatedNodes: []*v1.Node{},
|
updatedNodes: []*v1.Node{},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "node is shutdown but provider says it does not exist",
|
||||||
|
fnh: &testutil.FakeNodeHandler{
|
||||||
|
Existing: []*v1.Node{
|
||||||
|
{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Name: "node0",
|
||||||
|
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.Local),
|
||||||
|
},
|
||||||
|
Status: v1.NodeStatus{
|
||||||
|
Conditions: []v1.NodeCondition{
|
||||||
|
{
|
||||||
|
Type: v1.NodeReady,
|
||||||
|
Status: v1.ConditionUnknown,
|
||||||
|
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.Local),
|
||||||
|
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.Local),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Clientset: fake.NewSimpleClientset(),
|
||||||
|
UpdatedNodes: []*v1.Node{},
|
||||||
|
},
|
||||||
|
fakeCloud: &fakecloud.Cloud{
|
||||||
|
NodeShutdown: true,
|
||||||
|
ExistsByProviderID: false,
|
||||||
|
ErrShutdownByProviderID: nil,
|
||||||
|
},
|
||||||
|
updatedNodes: []*v1.Node{}, // should be empty because node does not exist
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, testcase := range testcases {
|
for _, testcase := range testcases {
|
||||||
|
Loading…
Reference in New Issue
Block a user