mirror of
https://github.com/k3s-io/kubernetes.git
synced 2026-01-04 23:17:50 +00:00
consolidate node deletion logic between node lifecycle and cloud node controller
This commit is contained in:
@@ -22,7 +22,6 @@ limitations under the License.
|
||||
package nodelifecycle
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"io"
|
||||
@@ -37,7 +36,6 @@ import (
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
@@ -54,7 +52,6 @@ import (
|
||||
"k8s.io/client-go/tools/record"
|
||||
"k8s.io/client-go/util/flowcontrol"
|
||||
"k8s.io/client-go/util/workqueue"
|
||||
cloudprovider "k8s.io/cloud-provider"
|
||||
v1node "k8s.io/kubernetes/pkg/api/v1/node"
|
||||
"k8s.io/kubernetes/pkg/controller"
|
||||
"k8s.io/kubernetes/pkg/controller/nodelifecycle/scheduler"
|
||||
@@ -150,7 +147,6 @@ type Controller struct {
|
||||
taintManager *scheduler.NoExecuteTaintManager
|
||||
|
||||
podInformerSynced cache.InformerSynced
|
||||
cloud cloudprovider.Interface
|
||||
kubeClient clientset.Interface
|
||||
|
||||
// This timestamp is to be used instead of LastProbeTime stored in Condition. We do this
|
||||
@@ -179,12 +175,10 @@ type Controller struct {
|
||||
daemonSetStore extensionslisters.DaemonSetLister
|
||||
daemonSetInformerSynced cache.InformerSynced
|
||||
|
||||
leaseLister coordlisters.LeaseLister
|
||||
leaseInformerSynced cache.InformerSynced
|
||||
nodeLister corelisters.NodeLister
|
||||
nodeInformerSynced cache.InformerSynced
|
||||
nodeExistsInCloudProvider func(types.NodeName) (bool, error)
|
||||
nodeShutdownInCloudProvider func(context.Context, *v1.Node) (bool, error)
|
||||
leaseLister coordlisters.LeaseLister
|
||||
leaseInformerSynced cache.InformerSynced
|
||||
nodeLister corelisters.NodeLister
|
||||
nodeInformerSynced cache.InformerSynced
|
||||
|
||||
recorder record.EventRecorder
|
||||
|
||||
@@ -247,7 +241,6 @@ func NewNodeLifecycleController(
|
||||
podInformer coreinformers.PodInformer,
|
||||
nodeInformer coreinformers.NodeInformer,
|
||||
daemonSetInformer extensionsinformers.DaemonSetInformer,
|
||||
cloud cloudprovider.Interface,
|
||||
kubeClient clientset.Interface,
|
||||
nodeMonitorPeriod time.Duration,
|
||||
nodeStartupGracePeriod time.Duration,
|
||||
@@ -280,17 +273,10 @@ func NewNodeLifecycleController(
|
||||
}
|
||||
|
||||
nc := &Controller{
|
||||
cloud: cloud,
|
||||
kubeClient: kubeClient,
|
||||
now: metav1.Now,
|
||||
knownNodeSet: make(map[string]*v1.Node),
|
||||
nodeHealthMap: make(map[string]*nodeHealthData),
|
||||
nodeExistsInCloudProvider: func(nodeName types.NodeName) (bool, error) {
|
||||
return nodeutil.ExistsInCloudProvider(cloud, nodeName)
|
||||
},
|
||||
nodeShutdownInCloudProvider: func(ctx context.Context, node *v1.Node) (bool, error) {
|
||||
return nodeutil.ShutdownInCloudProvider(ctx, cloud, node)
|
||||
},
|
||||
kubeClient: kubeClient,
|
||||
now: metav1.Now,
|
||||
knownNodeSet: make(map[string]*v1.Node),
|
||||
nodeHealthMap: make(map[string]*nodeHealthData),
|
||||
recorder: recorder,
|
||||
nodeMonitorPeriod: nodeMonitorPeriod,
|
||||
nodeStartupGracePeriod: nodeStartupGracePeriod,
|
||||
@@ -779,11 +765,6 @@ func (nc *Controller) monitorNodeHealth() error {
|
||||
klog.V(2).Infof("Node %s is ready again, cancelled pod eviction", node.Name)
|
||||
}
|
||||
}
|
||||
// remove shutdown taint this is needed always depending do we use taintbased or not
|
||||
err := nc.markNodeAsNotShutdown(node)
|
||||
if err != nil {
|
||||
klog.Errorf("Failed to remove taints from node %v. Will retry in next iteration.", node.Name)
|
||||
}
|
||||
}
|
||||
|
||||
// Report node event.
|
||||
@@ -793,42 +774,6 @@ func (nc *Controller) monitorNodeHealth() error {
|
||||
utilruntime.HandleError(fmt.Errorf("Unable to mark all pods NotReady on node %v: %v", node.Name, err))
|
||||
}
|
||||
}
|
||||
|
||||
// Check with the cloud provider to see if the node still exists. If it
|
||||
// doesn't, delete the node immediately.
|
||||
if currentReadyCondition.Status != v1.ConditionTrue && nc.cloud != nil {
|
||||
// check is node shutdowned, if yes do not deleted it. Instead add taint
|
||||
shutdown, err := nc.nodeShutdownInCloudProvider(context.TODO(), node)
|
||||
if err != nil {
|
||||
klog.Errorf("Error determining if node %v shutdown in cloud: %v", node.Name, err)
|
||||
}
|
||||
// node shutdown
|
||||
if shutdown && err == nil {
|
||||
err = controller.AddOrUpdateTaintOnNode(nc.kubeClient, node.Name, controller.ShutdownTaint)
|
||||
if err != nil {
|
||||
klog.Errorf("Error patching node taints: %v", err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
exists, err := nc.nodeExistsInCloudProvider(types.NodeName(node.Name))
|
||||
if err != nil {
|
||||
klog.Errorf("Error determining if node %v exists in cloud: %v", node.Name, err)
|
||||
continue
|
||||
}
|
||||
if !exists {
|
||||
klog.V(2).Infof("Deleting node (no longer present in cloud provider): %s", node.Name)
|
||||
nodeutil.RecordNodeEvent(nc.recorder, node.Name, string(node.UID), v1.EventTypeNormal, "DeletingNode", fmt.Sprintf("Deleting Node %v because it's not present according to cloud provider", node.Name))
|
||||
go func(nodeName string) {
|
||||
defer utilruntime.HandleCrash()
|
||||
// Kubelet is not reporting and Cloud Provider says node
|
||||
// is gone. Delete it without worrying about grace
|
||||
// periods.
|
||||
if err := nodeutil.ForcefullyDeleteNode(nc.kubeClient, nodeName); err != nil {
|
||||
klog.Errorf("Unable to forcefully delete node %q: %v", nodeName, err)
|
||||
}
|
||||
}(node.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
nc.handleDisruption(zoneToNodeConditions, nodes)
|
||||
@@ -1268,17 +1213,6 @@ func (nc *Controller) markNodeAsReachable(node *v1.Node) (bool, error) {
|
||||
return nc.zoneNoExecuteTainter[utilnode.GetZoneKey(node)].Remove(node.Name), nil
|
||||
}
|
||||
|
||||
func (nc *Controller) markNodeAsNotShutdown(node *v1.Node) error {
|
||||
nc.evictorLock.Lock()
|
||||
defer nc.evictorLock.Unlock()
|
||||
err := controller.RemoveTaintOffNode(nc.kubeClient, node.Name, node, controller.ShutdownTaint)
|
||||
if err != nil {
|
||||
klog.Errorf("Failed to remove taint from node %v: %v", node.Name, err)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ComputeZoneState returns a slice of NodeReadyConditions for all Nodes in a given zone.
|
||||
// The zone is considered:
|
||||
// - fullyDisrupted if there're no Ready Nodes,
|
||||
|
||||
Reference in New Issue
Block a user