mirror of
https://github.com/k3s-io/kubernetes.git
synced 2026-01-04 23:17:50 +00:00
Revert "Revert "Modify nodes to register directly with the master.""
This reverts commit c53786ab31.
This commit is contained in:
@@ -31,6 +31,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
||||
apierrors "github.com/GoogleCloudPlatform/kubernetes/pkg/api/errors"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api/resource"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api/validation"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
|
||||
@@ -66,11 +67,6 @@ const (
|
||||
// Max amount of time to wait for the container runtime to come up.
|
||||
maxWaitForContainerRuntime = 5 * time.Minute
|
||||
|
||||
// Initial node status update frequency and incremental frequency, for faster cluster startup.
|
||||
// The update frequency will be increameted linearly, until it reaches status_update_frequency.
|
||||
initialNodeStatusUpdateFrequency = 100 * time.Millisecond
|
||||
nodeStatusUpdateFrequencyInc = 500 * time.Millisecond
|
||||
|
||||
// nodeStatusUpdateRetry specifies how many times kubelet retries when posting node status failed.
|
||||
nodeStatusUpdateRetry = 5
|
||||
|
||||
@@ -122,6 +118,7 @@ func NewMainKubelet(
|
||||
pullBurst int,
|
||||
containerGCPolicy ContainerGCPolicy,
|
||||
sourcesReady SourcesReadyFn,
|
||||
registerNode bool,
|
||||
clusterDomain string,
|
||||
clusterDNS net.IP,
|
||||
masterServiceNamespace string,
|
||||
@@ -224,6 +221,7 @@ func NewMainKubelet(
|
||||
readinessManager: readinessManager,
|
||||
httpClient: &http.Client{},
|
||||
sourcesReady: sourcesReady,
|
||||
registerNode: registerNode,
|
||||
clusterDomain: clusterDomain,
|
||||
clusterDNS: clusterDNS,
|
||||
serviceLister: serviceLister,
|
||||
@@ -373,6 +371,9 @@ type Kubelet struct {
|
||||
// cAdvisor used for container information.
|
||||
cadvisor cadvisor.Interface
|
||||
|
||||
// Set to true to have the node register itself with the apiserver.
|
||||
registerNode bool
|
||||
|
||||
// If non-empty, use this for container DNS search.
|
||||
clusterDomain string
|
||||
|
||||
@@ -657,26 +658,22 @@ func (kl *Kubelet) Run(updates <-chan PodUpdate) {
|
||||
glog.Infof("Running in container %q", kl.resourceContainer)
|
||||
}
|
||||
|
||||
err := kl.imageManager.Start()
|
||||
if err != nil {
|
||||
if err := kl.imageManager.Start(); err != nil {
|
||||
kl.recorder.Eventf(kl.nodeRef, "kubeletSetupFailed", "Failed to start ImageManager %v", err)
|
||||
glog.Errorf("Failed to start ImageManager, images may not be garbage collected: %v", err)
|
||||
}
|
||||
|
||||
err = kl.cadvisor.Start()
|
||||
if err != nil {
|
||||
if err := kl.cadvisor.Start(); err != nil {
|
||||
kl.recorder.Eventf(kl.nodeRef, "kubeletSetupFailed", "Failed to start CAdvisor %v", err)
|
||||
glog.Errorf("Failed to start CAdvisor, system may not be properly monitored: %v", err)
|
||||
}
|
||||
|
||||
err = kl.containerManager.Start()
|
||||
if err != nil {
|
||||
if err := kl.containerManager.Start(); err != nil {
|
||||
kl.recorder.Eventf(kl.nodeRef, "kubeletSetupFailed", "Failed to start ContainerManager %v", err)
|
||||
glog.Errorf("Failed to start ContainerManager, system may not be properly isolated: %v", err)
|
||||
}
|
||||
|
||||
err = kl.oomWatcher.Start(kl.nodeRef)
|
||||
if err != nil {
|
||||
if err := kl.oomWatcher.Start(kl.nodeRef); err != nil {
|
||||
kl.recorder.Eventf(kl.nodeRef, "kubeletSetupFailed", "Failed to start OOM watcher %v", err)
|
||||
glog.Errorf("Failed to start OOM watching: %v", err)
|
||||
}
|
||||
@@ -688,20 +685,83 @@ func (kl *Kubelet) Run(updates <-chan PodUpdate) {
|
||||
kl.syncLoop(updates, kl)
|
||||
}
|
||||
|
||||
func (kl *Kubelet) initialNodeStatus() (*api.Node, error) {
|
||||
node := &api.Node{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: kl.hostname,
|
||||
Labels: map[string]string{"kubernetes.io/hostname": kl.hostname},
|
||||
},
|
||||
}
|
||||
if kl.cloud != nil {
|
||||
instances, ok := kl.cloud.Instances()
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("failed to get instances from cloud provider")
|
||||
}
|
||||
// TODO(roberthbailey): Can we do this without having credentials to talk
|
||||
// to the cloud provider?
|
||||
instanceID, err := instances.ExternalID(kl.hostname)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get instance ID from cloud provider: %v", err)
|
||||
}
|
||||
node.Spec.ExternalID = instanceID
|
||||
} else {
|
||||
node.Spec.ExternalID = kl.hostname
|
||||
}
|
||||
if err := kl.setNodeStatus(node); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return node, nil
|
||||
}
|
||||
|
||||
// registerWithApiserver registers the node with the cluster master.
|
||||
func (kl *Kubelet) registerWithApiserver() {
|
||||
step := 100 * time.Millisecond
|
||||
for {
|
||||
time.Sleep(step)
|
||||
step = step * 2
|
||||
if step >= 7*time.Second {
|
||||
step = 7 * time.Second
|
||||
}
|
||||
|
||||
node, err := kl.initialNodeStatus()
|
||||
if err != nil {
|
||||
glog.Errorf("Unable to construct api.Node object for kubelet: %v", err)
|
||||
continue
|
||||
}
|
||||
glog.V(2).Infof("Attempting to register node %s", node.Name)
|
||||
if _, err := kl.kubeClient.Nodes().Create(node); err != nil {
|
||||
if apierrors.IsAlreadyExists(err) {
|
||||
currentNode, err := kl.kubeClient.Nodes().Get(kl.hostname)
|
||||
if err != nil {
|
||||
glog.Errorf("error getting node %q: %v", kl.hostname, err)
|
||||
continue
|
||||
}
|
||||
if currentNode == nil {
|
||||
glog.Errorf("no node instance returned for %q", kl.hostname)
|
||||
continue
|
||||
}
|
||||
if currentNode.Spec.ExternalID == node.Spec.ExternalID {
|
||||
glog.Infof("Node %s was previously registered", node.Name)
|
||||
return
|
||||
}
|
||||
}
|
||||
glog.V(2).Infof("Unable to register %s with the apiserver: %v", node.Name, err)
|
||||
continue
|
||||
}
|
||||
glog.Infof("Successfully registered node %s", node.Name)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// syncNodeStatus periodically synchronizes node status to master.
|
||||
func (kl *Kubelet) syncNodeStatus() {
|
||||
if kl.kubeClient == nil {
|
||||
return
|
||||
}
|
||||
glog.Infof("Starting node status updates")
|
||||
for feq := initialNodeStatusUpdateFrequency; feq < kl.nodeStatusUpdateFrequency; feq += nodeStatusUpdateFrequencyInc {
|
||||
select {
|
||||
case <-time.After(feq):
|
||||
if err := kl.updateNodeStatus(); err != nil {
|
||||
glog.Errorf("Unable to update node status: %v", err)
|
||||
}
|
||||
}
|
||||
if kl.registerNode {
|
||||
kl.registerWithApiserver()
|
||||
}
|
||||
glog.Infof("Starting node status updates")
|
||||
for {
|
||||
select {
|
||||
case <-time.After(kl.nodeStatusUpdateFrequency):
|
||||
@@ -1707,14 +1767,13 @@ func (kl *Kubelet) reconcileCBR0(podCIDR string) error {
|
||||
// updateNodeStatus updates node status to master with retries.
|
||||
func (kl *Kubelet) updateNodeStatus() error {
|
||||
for i := 0; i < nodeStatusUpdateRetry; i++ {
|
||||
err := kl.tryUpdateNodeStatus()
|
||||
if err != nil {
|
||||
glog.Errorf("error updating node status, will retry: %v", err)
|
||||
if err := kl.tryUpdateNodeStatus(); err != nil {
|
||||
glog.Errorf("Error updating node status, will retry: %v", err)
|
||||
} else {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("Update node status exceeds retry count")
|
||||
return fmt.Errorf("update node status exceeds retry count")
|
||||
}
|
||||
|
||||
func (kl *Kubelet) recordNodeOnlineEvent() {
|
||||
@@ -1738,15 +1797,36 @@ func (kl *Kubelet) recordNodeUnschedulableEvent() {
|
||||
// Maintains Node.Spec.Unschedulable value from previous run of tryUpdateNodeStatus()
|
||||
var oldNodeUnschedulable bool
|
||||
|
||||
// tryUpdateNodeStatus tries to update node status to master. If ReconcileCBR0
|
||||
// is set, this function will also confirm that cbr0 is configured correctly.
|
||||
func (kl *Kubelet) tryUpdateNodeStatus() error {
|
||||
node, err := kl.kubeClient.Nodes().Get(kl.hostname)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting node %q: %v", kl.hostname, err)
|
||||
}
|
||||
if node == nil {
|
||||
return fmt.Errorf("no node instance returned for %q", kl.hostname)
|
||||
// setNodeStatus fills in the Status fields of the given Node, overwriting
|
||||
// any fields that are currently set.
|
||||
func (kl *Kubelet) setNodeStatus(node *api.Node) error {
|
||||
// Set addresses for the node.
|
||||
if kl.cloud != nil {
|
||||
instances, ok := kl.cloud.Instances()
|
||||
if !ok {
|
||||
return fmt.Errorf("failed to get instances from cloud provider")
|
||||
}
|
||||
// TODO(roberthbailey): Can we do this without having credentials to talk
|
||||
// to the cloud provider?
|
||||
nodeAddresses, err := instances.NodeAddresses(kl.hostname)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get node address from cloud provider: %v", err)
|
||||
}
|
||||
node.Status.Addresses = nodeAddresses
|
||||
} else {
|
||||
addr := net.ParseIP(kl.hostname)
|
||||
if addr != nil {
|
||||
node.Status.Addresses = []api.NodeAddress{{Type: api.NodeLegacyHostIP, Address: addr.String()}}
|
||||
} else {
|
||||
addrs, err := net.LookupIP(node.Name)
|
||||
if err != nil {
|
||||
return fmt.Errorf("can't get ip address of node %s: %v", node.Name, err)
|
||||
} else if len(addrs) == 0 {
|
||||
return fmt.Errorf("no ip address for node %v", node.Name)
|
||||
} else {
|
||||
node.Status.Addresses = []api.NodeAddress{{Type: api.NodeLegacyHostIP, Address: addrs[0].String()}}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
networkConfigured := true
|
||||
@@ -1761,7 +1841,13 @@ func (kl *Kubelet) tryUpdateNodeStatus() error {
|
||||
// cAdvisor locally, e.g. for test-cmd.sh, and in integration test.
|
||||
info, err := kl.GetCachedMachineInfo()
|
||||
if err != nil {
|
||||
glog.Errorf("error getting machine info: %v", err)
|
||||
// TODO(roberthbailey): This is required for test-cmd.sh to pass.
|
||||
// See if the test should be updated instead.
|
||||
node.Status.Capacity = api.ResourceList{
|
||||
api.ResourceCPU: *resource.NewMilliQuantity(0, resource.DecimalSI),
|
||||
api.ResourceMemory: resource.MustParse("0Gi"),
|
||||
}
|
||||
glog.Errorf("Error getting machine info: %v", err)
|
||||
} else {
|
||||
node.Status.NodeInfo.MachineID = info.MachineID
|
||||
node.Status.NodeInfo.SystemUUID = info.SystemUUID
|
||||
@@ -1780,7 +1866,7 @@ func (kl *Kubelet) tryUpdateNodeStatus() error {
|
||||
|
||||
verinfo, err := kl.cadvisor.VersionInfo()
|
||||
if err != nil {
|
||||
glog.Errorf("error getting version info: %v", err)
|
||||
glog.Errorf("Error getting version info: %v", err)
|
||||
} else {
|
||||
node.Status.NodeInfo.KernelVersion = verinfo.KernelVersion
|
||||
node.Status.NodeInfo.OsImage = verinfo.ContainerOsVersion
|
||||
@@ -1848,7 +1934,22 @@ func (kl *Kubelet) tryUpdateNodeStatus() error {
|
||||
}
|
||||
oldNodeUnschedulable = node.Spec.Unschedulable
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// tryUpdateNodeStatus tries to update node status to master. If ReconcileCBR0
|
||||
// is set, this function will also confirm that cbr0 is configured correctly.
|
||||
func (kl *Kubelet) tryUpdateNodeStatus() error {
|
||||
node, err := kl.kubeClient.Nodes().Get(kl.hostname)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting node %q: %v", kl.hostname, err)
|
||||
}
|
||||
if node == nil {
|
||||
return fmt.Errorf("no node instance returned for %q", kl.hostname)
|
||||
}
|
||||
if err := kl.setNodeStatus(node); err != nil {
|
||||
return err
|
||||
}
|
||||
// Update the current status on the API server
|
||||
_, err = kl.kubeClient.Nodes().UpdateStatus(node)
|
||||
return err
|
||||
|
||||
Reference in New Issue
Block a user