Add InstanceExists* methods to cloud provider interface for CCM

This commit is contained in:
Josh Horwitz 2017-08-21 14:55:43 -04:00 committed by Josh Horwitz
parent 73a6ee1dcc
commit 2f1ea47c83
13 changed files with 235 additions and 27 deletions

View File

@ -124,7 +124,7 @@ type Instances interface {
// ProviderID is a unique identifier of the node. This will not be called
// from the node whose nodeaddresses are being queried. i.e. local metadata
// services cannot be used in this method to obtain nodeaddresses
NodeAddressesByProviderID(providerId string) ([]v1.NodeAddress, error)
NodeAddressesByProviderID(providerID string) ([]v1.NodeAddress, error)
// ExternalID returns the cloud provider ID of the node with the specified NodeName.
// Note that if the instance does not exist or is no longer running, we must return ("", cloudprovider.InstanceNotFound)
ExternalID(nodeName types.NodeName) (string, error)
@ -140,6 +140,9 @@ type Instances interface {
// CurrentNodeName returns the name of the node we are currently running on
// On most clouds (e.g. GCE) this is the hostname, so we provide the hostname
CurrentNodeName(hostname string) (types.NodeName, error)
// InstanceExistsByProviderID returns true if the instance for the given provider id still is running.
// If false is returned with no error, the instance will be immediately deleted.
InstanceExistsByProviderID(providerID string) (bool, error)
}
// Route is a representation of an advanced routing rule.

View File

@ -1101,6 +1101,11 @@ func (c *Cloud) ExternalID(nodeName types.NodeName) (string, error) {
return orEmpty(instance.InstanceId), nil
}
// InstanceExistsByProviderID returns true if the instance with the given provider id still exists and is running.
func (c *Cloud) InstanceExistsByProviderID(providerID string) (bool, error) {
return false, errors.New("unimplemented")
}
// InstanceID returns the cloud provider ID of the node with the specified nodeName.
func (c *Cloud) InstanceID(nodeName types.NodeName) (string, error) {
// In the future it is possible to also return an endpoint as:

View File

@ -17,6 +17,7 @@ limitations under the License.
package azure
import (
"errors"
"fmt"
"k8s.io/api/core/v1"
@ -86,6 +87,11 @@ func (az *Cloud) ExternalID(name types.NodeName) (string, error) {
return az.InstanceID(name)
}
// InstanceExistsByProviderID returns true if the instance with the given provider id still exists and is running.
func (az *Cloud) InstanceExistsByProviderID(providerID string) (bool, error) {
return false, errors.New("unimplemented")
}
func (az *Cloud) isCurrentInstance(name types.NodeName) (bool, error) {
nodeName := mapNodeNameToVMName(name)
metadataName, err := az.metadata.Text("instance/compute/name")

View File

@ -47,8 +47,12 @@ type FakeUpdateBalancerCall struct {
// FakeCloud is a test-double implementation of Interface, LoadBalancer, Instances, and Routes. It is useful for testing.
type FakeCloud struct {
Exists bool
Err error
Exists bool
Err error
ExistsByProviderID bool
ErrByProviderID error
Calls []string
Addresses []v1.NodeAddress
ExtID map[types.NodeName]string
@ -234,6 +238,12 @@ func (f *FakeCloud) InstanceTypeByProviderID(providerID string) (string, error)
return f.InstanceTypes[types.NodeName(providerID)], nil
}
// InstanceExistsByProviderID returns true if the instance with the given provider id still exists and is running.
func (f *FakeCloud) InstanceExistsByProviderID(providerID string) (bool, error) {
f.addCall("instance-exists-by-provider-id")
return f.ExistsByProviderID, f.ErrByProviderID
}
// List is a test-spy implementation of Instances.List.
// It adds an entry "list" into the internal method call record.
func (f *FakeCloud) List(filter string) ([]types.NodeName, error) {

View File

@ -17,6 +17,7 @@ limitations under the License.
package gce
import (
"errors"
"fmt"
"net/http"
"strconv"
@ -151,6 +152,11 @@ func (gce *GCECloud) ExternalID(nodeName types.NodeName) (string, error) {
return strconv.FormatUint(inst.ID, 10), nil
}
// InstanceExistsByProviderID returns true if the instance with the given provider id still exists and is running.
func (gce *GCECloud) InstanceExistsByProviderID(providerID string) (bool, error) {
return false, errors.New("unimplemented")
}
// InstanceID returns the cloud provider ID of the node with the specified NodeName.
func (gce *GCECloud) InstanceID(nodeName types.NodeName) (string, error) {
instanceName := mapNodeNameToInstanceName(nodeName)

View File

@ -110,6 +110,11 @@ func (i *Instances) ExternalID(name types.NodeName) (string, error) {
return srv.ID, nil
}
// InstanceExistsByProviderID returns true if the instance with the given provider id still exists and is running.
func (i *Instances) InstanceExistsByProviderID(providerID string) (bool, error) {
return false, errors.New("unimplemented")
}
// InstanceID returns the kubelet's cloud provider ID.
func (os *OpenStack) InstanceID() (string, error) {
if len(os.localInstanceID) == 0 {

View File

@ -211,6 +211,11 @@ func (v *OVirtCloud) ExternalID(nodeName types.NodeName) (string, error) {
return instance.UUID, nil
}
// InstanceExistsByProviderID returns true if the instance with the given provider id still exists and is running.
func (v *OVirtCloud) InstanceExistsByProviderID(providerID string) (bool, error) {
return false, errors.New("unimplemented")
}
// InstanceID returns the cloud provider ID of the node with the specified NodeName.
func (v *OVirtCloud) InstanceID(nodeName types.NodeName) (string, error) {
name := mapNodeNameToInstanceName(nodeName)

View File

@ -470,6 +470,11 @@ func (pc *PCCloud) ExternalID(nodeName k8stypes.NodeName) (string, error) {
}
}
// InstanceExistsByProviderID returns true if the instance with the given provider id still exists and is running.
func (pc *PCCloud) InstanceExistsByProviderID(providerID string) (bool, error) {
return false, errors.New("unimplemented")
}
// InstanceID returns the cloud provider ID of the specified instance.
func (pc *PCCloud) InstanceID(nodeName k8stypes.NodeName) (string, error) {
name := string(nodeName)

View File

@ -436,6 +436,11 @@ func (i *Instances) ExternalID(nodeName types.NodeName) (string, error) {
return probeInstanceID(i.compute, serverName)
}
// InstanceExistsByProviderID returns true if the instance with the given provider id still exists and is running.
func (i *Instances) InstanceExistsByProviderID(providerID string) (bool, error) {
return false, errors.New("unimplemented")
}
// InstanceID returns the cloud provider ID of the kubelet's instance.
func (rs *Rackspace) InstanceID() (string, error) {
return readInstanceID()

View File

@ -376,6 +376,11 @@ func (vs *VSphere) ExternalID(nodeName k8stypes.NodeName) (string, error) {
return vs.InstanceID(nodeName)
}
// InstanceExistsByProviderID returns true if the instance with the given provider id still exists and is running.
func (vs *VSphere) InstanceExistsByProviderID(providerID string) (bool, error) {
return false, errors.New("unimplemented")
}
// InstanceID returns the cloud provider ID of the node with the specified Name.
func (vs *VSphere) InstanceID(nodeName k8stypes.NodeName) (string, error) {
if vs.localInstanceID == nodeNameToVMName(nodeName) {

View File

@ -237,26 +237,36 @@ func (cnc *CloudNodeController) MonitorNode() {
if currentReadyCondition.Status != v1.ConditionTrue {
// Check with the cloud provider to see if the node still exists. If it
// doesn't, delete the node immediately.
if _, err := instances.ExternalID(types.NodeName(node.Name)); err != nil {
if err == cloudprovider.InstanceNotFound {
glog.V(2).Infof("Deleting node no longer present in cloud provider: %s", node.Name)
ref := &v1.ObjectReference{
Kind: "Node",
Name: node.Name,
UID: types.UID(node.UID),
Namespace: "",
}
glog.V(2).Infof("Recording %s event message for node %s", "DeletingNode", node.Name)
cnc.recorder.Eventf(ref, v1.EventTypeNormal, fmt.Sprintf("Deleting Node %v because it's not present according to cloud provider", node.Name), "Node %s event: %s", node.Name, "DeletingNode")
go func(nodeName string) {
defer utilruntime.HandleCrash()
if err := cnc.kubeClient.CoreV1().Nodes().Delete(node.Name, nil); err != nil {
glog.Errorf("unable to delete node %q: %v", node.Name, err)
}
}(node.Name)
}
glog.Errorf("Error getting node data from cloud: %v", err)
exists, err := ensureNodeExistsByProviderIDOrName(instances, node)
if err != nil {
glog.Errorf("Error getting data for node %s from cloud: %v", node.Name, err)
continue
}
if exists {
// Continue checking the remaining nodes since the current one is fine.
continue
}
glog.V(2).Infof("Deleting node since it is no longer present in cloud provider: %s", node.Name)
ref := &v1.ObjectReference{
Kind: "Node",
Name: node.Name,
UID: types.UID(node.UID),
Namespace: "",
}
glog.V(2).Infof("Recording %s event message for node %s", "DeletingNode", node.Name)
cnc.recorder.Eventf(ref, v1.EventTypeNormal, fmt.Sprintf("Deleting Node %v because it's not present according to cloud provider", node.Name), "Node %s event: %s", node.Name, "DeletingNode")
go func(nodeName string) {
defer utilruntime.HandleCrash()
if err := cnc.kubeClient.CoreV1().Nodes().Delete(nodeName, nil); err != nil {
glog.Errorf("unable to delete node %q: %v", nodeName, err)
}
}(node.Name)
}
}
}
@ -372,6 +382,25 @@ func excludeTaintFromList(taints []v1.Taint, toExclude v1.Taint) []v1.Taint {
return newTaints
}
// ensureNodeExistsByProviderIDOrName first checks if the instance exists by the provider id and then by node name
func ensureNodeExistsByProviderIDOrName(instances cloudprovider.Instances, node *v1.Node) (bool, error) {
exists, err := instances.InstanceExistsByProviderID(node.Spec.ProviderID)
if err != nil {
providerIDErr := err
_, err = instances.ExternalID(types.NodeName(node.Name))
if err == nil {
return true, nil
}
if err == cloudprovider.InstanceNotFound {
return false, nil
}
return false, fmt.Errorf("InstanceExistsByProviderID: Error fetching by providerID: %v Error fetching by NodeName: %v", providerIDErr, err)
}
return exists, nil
}
func getNodeAddressesByProviderIDOrName(instances cloudprovider.Instances, node *v1.Node) ([]v1.NodeAddress, error) {
nodeAddresses, err := instances.NodeAddressesByProviderID(node.Spec.ProviderID)
if err != nil {

View File

@ -17,6 +17,8 @@ limitations under the License.
package cloud
import (
"errors"
"reflect"
"testing"
"time"
@ -39,6 +41,119 @@ import (
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
)
func TestEnsureNodeExistsByProviderIDOrNodeName(t *testing.T) {
testCases := []struct {
testName string
node *v1.Node
expectedCalls []string
existsByNodeName bool
existsByProviderID bool
nodeNameErr error
providerIDErr error
}{
{
testName: "node exists by provider id",
existsByProviderID: true,
providerIDErr: nil,
existsByNodeName: false,
nodeNameErr: errors.New("unimplemented"),
expectedCalls: []string{"instance-exists-by-provider-id"},
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
},
Spec: v1.NodeSpec{
ProviderID: "node0",
},
},
},
{
testName: "does not exist by provider id",
existsByProviderID: false,
providerIDErr: nil,
existsByNodeName: false,
nodeNameErr: errors.New("unimplemented"),
expectedCalls: []string{"instance-exists-by-provider-id"},
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
},
Spec: v1.NodeSpec{
ProviderID: "node0",
},
},
},
{
testName: "node exists by node name",
existsByProviderID: false,
providerIDErr: errors.New("unimplemented"),
existsByNodeName: true,
nodeNameErr: nil,
expectedCalls: []string{"instance-exists-by-provider-id", "external-id"},
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
},
Spec: v1.NodeSpec{
ProviderID: "node0",
},
},
},
{
testName: "does not exist by node name",
existsByProviderID: false,
providerIDErr: errors.New("unimplemented"),
existsByNodeName: false,
nodeNameErr: cloudprovider.InstanceNotFound,
expectedCalls: []string{"instance-exists-by-provider-id", "external-id"},
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
},
Spec: v1.NodeSpec{
ProviderID: "node0",
},
},
},
}
for _, tc := range testCases {
t.Run(tc.testName, func(t *testing.T) {
fc := &fakecloud.FakeCloud{
Exists: tc.existsByNodeName,
ExistsByProviderID: tc.existsByProviderID,
Err: tc.nodeNameErr,
ErrByProviderID: tc.providerIDErr,
}
instances, _ := fc.Instances()
exists, err := ensureNodeExistsByProviderIDOrName(instances, tc.node)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(fc.Calls, tc.expectedCalls) {
t.Fatalf("expected cloud provider methods `%v` to be called but `%v` was called ", tc.expectedCalls, fc.Calls)
}
if tc.existsByProviderID && tc.existsByProviderID != exists {
t.Fatalf("expected exist by provider id to be `%t` but got `%t`", tc.existsByProviderID, exists)
}
if tc.existsByNodeName && tc.existsByNodeName != exists {
t.Fatalf("expected exist by node name to be `%t` but got `%t`", tc.existsByNodeName, exists)
}
if !tc.existsByNodeName && !tc.existsByProviderID && exists {
t.Fatal("node is not supposed to exist")
}
})
}
}
// This test checks that the node is deleted when kubelet stops reporting
// and cloud provider says node is gone
func TestNodeDeleted(t *testing.T) {
@ -105,9 +220,12 @@ func TestNodeDeleted(t *testing.T) {
eventBroadcaster := record.NewBroadcaster()
cloudNodeController := &CloudNodeController{
kubeClient: fnh,
nodeInformer: factory.Core().V1().Nodes(),
cloud: &fakecloud.FakeCloud{Err: cloudprovider.InstanceNotFound},
kubeClient: fnh,
nodeInformer: factory.Core().V1().Nodes(),
cloud: &fakecloud.FakeCloud{
ExistsByProviderID: false,
Err: nil,
},
nodeMonitorPeriod: 1 * time.Second,
recorder: eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "cloud-controller-manager"}),
nodeStatusUpdateFrequency: 1 * time.Second,
@ -520,7 +638,8 @@ func TestNodeAddresses(t *testing.T) {
FailureDomain: "us-west-1a",
Region: "us-west",
},
Err: nil,
ExistsByProviderID: true,
Err: nil,
}
eventBroadcaster := record.NewBroadcaster()
@ -639,7 +758,8 @@ func TestNodeProvidedIPAddresses(t *testing.T) {
FailureDomain: "us-west-1a",
Region: "us-west",
},
Err: nil,
ExistsByProviderID: true,
Err: nil,
}
eventBroadcaster := record.NewBroadcaster()

View File

@ -650,6 +650,10 @@ func (instances *instances) InstanceTypeByProviderID(providerID string) (string,
return "", errors.New("Not implemented")
}
func (instances *instances) InstanceExistsByProviderID(providerID string) (bool, error) {
return false, errors.New("unimplemented")
}
func (instances *instances) List(filter string) ([]types.NodeName, error) {
return []types.NodeName{}, errors.New("Not implemented")
}