diff --git a/pkg/cloudprovider/providers/vsphere/nodemanager.go b/pkg/cloudprovider/providers/vsphere/nodemanager.go index 580dbae413d..a30b9135b01 100644 --- a/pkg/cloudprovider/providers/vsphere/nodemanager.go +++ b/pkg/cloudprovider/providers/vsphere/nodemanager.go @@ -251,6 +251,8 @@ func (nm *NodeManager) removeNode(node *v1.Node) { // GetNodeInfo returns a NodeInfo which datacenter, vm and vc server ip address. // This method returns an error if it is unable find node VCs and DCs listed in vSphere.conf // NodeInfo returned may not be updated to reflect current VM location. +// +// This method is a getter but it can cause side-effect of updating NodeInfo object. func (nm *NodeManager) GetNodeInfo(nodeName k8stypes.NodeName) (NodeInfo, error) { getNodeInfo := func(nodeName k8stypes.NodeName) *NodeInfo { nm.nodeInfoLock.RLock() @@ -259,42 +261,57 @@ func (nm *NodeManager) GetNodeInfo(nodeName k8stypes.NodeName) (NodeInfo, error) return nodeInfo } nodeInfo := getNodeInfo(nodeName) + var err error if nodeInfo == nil { - err := nm.RediscoverNode(nodeName) + // Rediscover node if no NodeInfo found. + glog.V(4).Infof("No VM found for node %q. Initiating rediscovery.", convertToString(nodeName)) + err = nm.RediscoverNode(nodeName) if err != nil { - glog.V(4).Infof("error %q node info for node %q not found", err, convertToString(nodeName)) + glog.Errorf("Error %q node info for node %q not found", err, convertToString(nodeName)) return NodeInfo{}, err } nodeInfo = getNodeInfo(nodeName) + } else { + // Renew the found NodeInfo to avoid stale vSphere connection. + glog.V(4).Infof("Renewing NodeInfo %+v for node %q", nodeInfo, convertToString(nodeName)) + nodeInfo, err = nm.renewNodeInfo(nodeInfo, true) + if err != nil { + glog.Errorf("Error %q occurred while renewing NodeInfo for %q", err, convertToString(nodeName)) + return NodeInfo{}, err + } + nm.addNodeInfo(convertToString(nodeName), nodeInfo) } return *nodeInfo, nil } +// GetNodeDetails returns NodeDetails for all the discovered nodes. +// +// This method is a getter but it can cause side-effect of updating NodeInfo objects. func (nm *NodeManager) GetNodeDetails() ([]NodeDetails, error) { nm.nodeInfoLock.RLock() defer nm.nodeInfoLock.RUnlock() var nodeDetails []NodeDetails vsphereSessionRefreshMap := make(map[string]bool) - // Create context - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - for nodeName, nodeInfo := range nm.nodeInfoMap { - nodeDetails = append(nodeDetails, NodeDetails{nodeName, nodeInfo.vm}) + var n *NodeInfo + var err error if vsphereSessionRefreshMap[nodeInfo.vcServer] { - continue + // vSphere connection already refreshed. Just refresh VM and Datacenter. + glog.V(4).Infof("Renewing NodeInfo %+v for node %q. No new connection needed.", nodeInfo, nodeName) + n, err = nm.renewNodeInfo(nodeInfo, false) + } else { + // Refresh vSphere connection, VM and Datacenter. + glog.V(4).Infof("Renewing NodeInfo %+v for node %q with new vSphere connection.", nodeInfo, nodeName) + n, err = nm.renewNodeInfo(nodeInfo, true) + vsphereSessionRefreshMap[nodeInfo.vcServer] = true } - vsphereInstance := nm.vsphereInstanceMap[nodeInfo.vcServer] - if vsphereInstance == nil { - err := fmt.Errorf("vSphereInstance for vc server %q not found while looking for vm %q", nodeInfo.vcServer, nodeInfo.vm) - return nil, err - } - err := vsphereInstance.conn.Connect(ctx) if err != nil { return nil, err } - vsphereSessionRefreshMap[nodeInfo.vcServer] = true + nm.nodeInfoMap[nodeName] = n + glog.V(4).Infof("Updated NodeInfo %q for node %q.", nodeInfo, nodeName) + nodeDetails = append(nodeDetails, NodeDetails{nodeName, n.vm}) } return nodeDetails, nil } @@ -317,3 +334,23 @@ func (nm *NodeManager) GetVSphereInstance(nodeName k8stypes.NodeName) (VSphereIn } return *vsphereInstance, nil } + +// renewNodeInfo renews vSphere connection, VirtualMachine and Datacenter for NodeInfo instance. +func (nm *NodeManager) renewNodeInfo(nodeInfo *NodeInfo, reconnect bool) (*NodeInfo, error) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + vsphereInstance := nm.vsphereInstanceMap[nodeInfo.vcServer] + if vsphereInstance == nil { + err := fmt.Errorf("vSphereInstance for vSphere %q not found while refershing NodeInfo for VM %q", nodeInfo.vcServer, nodeInfo.vm) + return nil, err + } + if reconnect { + err := vsphereInstance.conn.Connect(ctx) + if err != nil { + return nil, err + } + } + vm := nodeInfo.vm.RenewVM(vsphereInstance.conn.GoVmomiClient) + return &NodeInfo{vm: &vm, dataCenter: vm.Datacenter, vcServer: nodeInfo.vcServer}, nil +} diff --git a/pkg/cloudprovider/providers/vsphere/vclib/virtualmachine.go b/pkg/cloudprovider/providers/vsphere/vclib/virtualmachine.go index db45b8e1935..679d827adc3 100644 --- a/pkg/cloudprovider/providers/vsphere/vclib/virtualmachine.go +++ b/pkg/cloudprovider/providers/vsphere/vclib/virtualmachine.go @@ -23,6 +23,7 @@ import ( "time" "github.com/golang/glog" + "github.com/vmware/govmomi" "github.com/vmware/govmomi/object" "github.com/vmware/govmomi/property" "github.com/vmware/govmomi/vim25/mo" @@ -400,3 +401,10 @@ func (vm *VirtualMachine) deleteController(ctx context.Context, controllerDevice } return nil } + +// RenewVM renews this virtual machine with new client connection. +func (vm *VirtualMachine) RenewVM(client *govmomi.Client) VirtualMachine { + dc := Datacenter{Datacenter: object.NewDatacenter(client.Client, vm.Datacenter.Reference())} + newVM := object.NewVirtualMachine(client.Client, vm.VirtualMachine.Reference()) + return VirtualMachine{VirtualMachine: newVM, Datacenter: &dc} +}