mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-10 12:32:03 +00:00
Add node check to vSphere cloud provider
At KCM startup, vSphere cloud provider builds a cache from NodeAdded events from an informer. But these events are asynchronous, the cloud provider may need information from the cache earlier, for example when detaching a volume from a node at KCM startup. If a node is missing in the cache, the cloud provider treats such a detach as successful, which is wrong. Such a volume will be attached to the node forever. To prevent this issue: 1. Try nodeLister before declaring a node as not found. A/D controller starts after its node informer has been synced. 2. Read API server before declaring a node as not found. Just in case the informer has stale data.
This commit is contained in:
parent
afcc207feb
commit
8d19c0002b
@ -27,11 +27,14 @@ import (
|
|||||||
|
|
||||||
"github.com/vmware/govmomi/object"
|
"github.com/vmware/govmomi/object"
|
||||||
"github.com/vmware/govmomi/vim25/mo"
|
"github.com/vmware/govmomi/vim25/mo"
|
||||||
"k8s.io/klog/v2"
|
|
||||||
|
|
||||||
v1 "k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
|
"k8s.io/apimachinery/pkg/api/errors"
|
||||||
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
k8stypes "k8s.io/apimachinery/pkg/types"
|
k8stypes "k8s.io/apimachinery/pkg/types"
|
||||||
|
coreclients "k8s.io/client-go/kubernetes/typed/core/v1"
|
||||||
|
corelisters "k8s.io/client-go/listers/core/v1"
|
||||||
cloudprovider "k8s.io/cloud-provider"
|
cloudprovider "k8s.io/cloud-provider"
|
||||||
|
"k8s.io/klog/v2"
|
||||||
"k8s.io/legacy-cloud-providers/vsphere/vclib"
|
"k8s.io/legacy-cloud-providers/vsphere/vclib"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -61,6 +64,9 @@ type NodeManager struct {
|
|||||||
//CredentialsManager
|
//CredentialsManager
|
||||||
credentialManager *SecretCredentialManager
|
credentialManager *SecretCredentialManager
|
||||||
|
|
||||||
|
nodeLister corelisters.NodeLister
|
||||||
|
nodeGetter coreclients.NodesGetter
|
||||||
|
|
||||||
// Mutexes
|
// Mutexes
|
||||||
registeredNodesLock sync.RWMutex
|
registeredNodesLock sync.RWMutex
|
||||||
nodeInfoLock sync.RWMutex
|
nodeInfoLock sync.RWMutex
|
||||||
@ -271,10 +277,43 @@ func (nm *NodeManager) GetNode(nodeName k8stypes.NodeName) (v1.Node, error) {
|
|||||||
nm.registeredNodesLock.RLock()
|
nm.registeredNodesLock.RLock()
|
||||||
node := nm.registeredNodes[convertToString(nodeName)]
|
node := nm.registeredNodes[convertToString(nodeName)]
|
||||||
nm.registeredNodesLock.RUnlock()
|
nm.registeredNodesLock.RUnlock()
|
||||||
if node == nil {
|
if node != nil {
|
||||||
return v1.Node{}, vclib.ErrNoVMFound
|
klog.V(4).Infof("Node %s found in vSphere cloud provider cache", nodeName)
|
||||||
|
return *node, nil
|
||||||
}
|
}
|
||||||
return *node, nil
|
|
||||||
|
if nm.nodeLister != nil {
|
||||||
|
klog.V(4).Infof("Node %s missing in vSphere cloud provider cache, trying node informer")
|
||||||
|
node, err := nm.nodeLister.Get(convertToString(nodeName))
|
||||||
|
if err != nil {
|
||||||
|
if !errors.IsNotFound(err) {
|
||||||
|
return v1.Node{}, err
|
||||||
|
}
|
||||||
|
// Fall through with IsNotFound error and try to get the node from the API server
|
||||||
|
} else {
|
||||||
|
node := node.DeepCopy()
|
||||||
|
nm.addNode(node)
|
||||||
|
klog.V(4).Infof("Node %s found in vSphere cloud provider node informer", nodeName)
|
||||||
|
return *node, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if nm.nodeGetter != nil {
|
||||||
|
klog.V(4).Infof("Node %s missing in vSphere cloud provider caches, trying the API server")
|
||||||
|
node, err := nm.nodeGetter.Nodes().Get(context.TODO(), convertToString(nodeName), metav1.GetOptions{})
|
||||||
|
if err != nil {
|
||||||
|
if !errors.IsNotFound(err) {
|
||||||
|
return v1.Node{}, err
|
||||||
|
}
|
||||||
|
// Fall through with IsNotFound error to keep the code consistent with the above
|
||||||
|
} else {
|
||||||
|
nm.addNode(node)
|
||||||
|
klog.V(4).Infof("Node %s found in the API server", nodeName)
|
||||||
|
return *node, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
klog.V(4).Infof("Node %s not found in vSphere cloud provider", nodeName)
|
||||||
|
return v1.Node{}, vclib.ErrNoVMFound
|
||||||
}
|
}
|
||||||
|
|
||||||
func (nm *NodeManager) getNodes() map[string]*v1.Node {
|
func (nm *NodeManager) getNodes() map[string]*v1.Node {
|
||||||
@ -515,3 +554,11 @@ func (nm *NodeManager) GetHostsInZone(ctx context.Context, zoneFailureDomain str
|
|||||||
klog.V(4).Infof("GetHostsInZone %v returning: %v", zoneFailureDomain, hosts)
|
klog.V(4).Infof("GetHostsInZone %v returning: %v", zoneFailureDomain, hosts)
|
||||||
return hosts, nil
|
return hosts, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (nm *NodeManager) SetNodeLister(nodeLister corelisters.NodeLister) {
|
||||||
|
nm.nodeLister = nodeLister
|
||||||
|
}
|
||||||
|
|
||||||
|
func (nm *NodeManager) SetNodeGetter(nodeGetter coreclients.NodesGetter) {
|
||||||
|
nm.nodeGetter = nodeGetter
|
||||||
|
}
|
||||||
|
@ -0,0 +1,149 @@
|
|||||||
|
//go:build !providerless
|
||||||
|
// +build !providerless
|
||||||
|
|
||||||
|
/*
|
||||||
|
Copyright 2023 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package vsphere
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
v1 "k8s.io/api/core/v1"
|
||||||
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
|
"k8s.io/apimachinery/pkg/runtime"
|
||||||
|
"k8s.io/client-go/informers"
|
||||||
|
"k8s.io/client-go/kubernetes/fake"
|
||||||
|
"k8s.io/legacy-cloud-providers/vsphere/vclib"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Annotation used to distinguish nodes in node cache / informer / API server
|
||||||
|
const nodeAnnotation = "test"
|
||||||
|
|
||||||
|
func getNode(annotation string) *v1.Node {
|
||||||
|
return &v1.Node{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Name: "node1",
|
||||||
|
Annotations: map[string]string{
|
||||||
|
nodeAnnotation: annotation,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGetNode(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
cachedNodes []*v1.Node
|
||||||
|
informerNodes []*v1.Node // "nil" means that the NodeManager has no nodeLister
|
||||||
|
apiServerNodes []*v1.Node // "nil" means that the NodeManager has no nodeGetter
|
||||||
|
|
||||||
|
expectedNodeAnnotation string
|
||||||
|
expectNotFound bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "No cached node anywhere",
|
||||||
|
cachedNodes: []*v1.Node{},
|
||||||
|
informerNodes: []*v1.Node{},
|
||||||
|
apiServerNodes: []*v1.Node{},
|
||||||
|
expectNotFound: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "No lister & getter",
|
||||||
|
cachedNodes: []*v1.Node{},
|
||||||
|
informerNodes: nil,
|
||||||
|
apiServerNodes: nil,
|
||||||
|
expectNotFound: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "cache is used first",
|
||||||
|
cachedNodes: []*v1.Node{getNode("cache")},
|
||||||
|
informerNodes: []*v1.Node{getNode("informer")},
|
||||||
|
apiServerNodes: []*v1.Node{getNode("apiserver")},
|
||||||
|
expectedNodeAnnotation: "cache",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "informer is used second",
|
||||||
|
cachedNodes: []*v1.Node{},
|
||||||
|
informerNodes: []*v1.Node{getNode("informer")},
|
||||||
|
apiServerNodes: []*v1.Node{getNode("apiserver")},
|
||||||
|
expectedNodeAnnotation: "informer",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "API server is used last",
|
||||||
|
cachedNodes: []*v1.Node{},
|
||||||
|
informerNodes: []*v1.Node{},
|
||||||
|
apiServerNodes: []*v1.Node{getNode("apiserver")},
|
||||||
|
expectedNodeAnnotation: "apiserver",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
t.Run(test.name, func(t *testing.T) {
|
||||||
|
|
||||||
|
// local NodeManager cache
|
||||||
|
cache := make(map[string]*v1.Node)
|
||||||
|
for _, node := range test.cachedNodes {
|
||||||
|
cache[node.Name] = node
|
||||||
|
}
|
||||||
|
|
||||||
|
// Client with apiServerNodes
|
||||||
|
objs := []runtime.Object{}
|
||||||
|
for _, node := range test.apiServerNodes {
|
||||||
|
objs = append(objs, node)
|
||||||
|
}
|
||||||
|
client := fake.NewSimpleClientset(objs...)
|
||||||
|
nodeGetter := client.CoreV1()
|
||||||
|
|
||||||
|
// Informer + nodeLister. Despite the client already has apiServerNodes, they won't appear in the
|
||||||
|
// nodeLister, because the informer is never started.
|
||||||
|
factory := informers.NewSharedInformerFactory(client, 0 /* no resync */)
|
||||||
|
nodeInformer := factory.Core().V1().Nodes()
|
||||||
|
for _, node := range test.informerNodes {
|
||||||
|
nodeInformer.Informer().GetStore().Add(node)
|
||||||
|
}
|
||||||
|
nodeLister := nodeInformer.Lister()
|
||||||
|
|
||||||
|
nodeManager := NodeManager{
|
||||||
|
registeredNodes: cache,
|
||||||
|
}
|
||||||
|
if test.informerNodes != nil {
|
||||||
|
nodeManager.SetNodeLister(nodeLister)
|
||||||
|
}
|
||||||
|
if test.apiServerNodes != nil {
|
||||||
|
nodeManager.SetNodeGetter(nodeGetter)
|
||||||
|
}
|
||||||
|
|
||||||
|
node, err := nodeManager.GetNode("node1")
|
||||||
|
if test.expectNotFound && err != vclib.ErrNoVMFound {
|
||||||
|
t.Errorf("Expected NotFound error, got: %v", err)
|
||||||
|
}
|
||||||
|
if !test.expectNotFound && err != nil {
|
||||||
|
t.Errorf("Unexpected error: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if test.expectedNodeAnnotation != "" {
|
||||||
|
if node.Annotations == nil {
|
||||||
|
t.Errorf("Expected node with annotation %q, got nil", test.expectedNodeAnnotation)
|
||||||
|
} else {
|
||||||
|
if ann := node.Annotations[nodeAnnotation]; ann != test.expectedNodeAnnotation {
|
||||||
|
t.Errorf("Expected node with annotation %q, got %q", test.expectedNodeAnnotation, ann)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
@ -276,6 +276,7 @@ func init() {
|
|||||||
// Initialize passes a Kubernetes clientBuilder interface to the cloud provider
|
// Initialize passes a Kubernetes clientBuilder interface to the cloud provider
|
||||||
func (vs *VSphere) Initialize(clientBuilder cloudprovider.ControllerClientBuilder, stop <-chan struct{}) {
|
func (vs *VSphere) Initialize(clientBuilder cloudprovider.ControllerClientBuilder, stop <-chan struct{}) {
|
||||||
vs.kubeClient = clientBuilder.ClientOrDie("vsphere-legacy-cloud-provider")
|
vs.kubeClient = clientBuilder.ClientOrDie("vsphere-legacy-cloud-provider")
|
||||||
|
vs.nodeManager.SetNodeGetter(vs.kubeClient.CoreV1())
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize Node Informers
|
// Initialize Node Informers
|
||||||
@ -318,6 +319,9 @@ func (vs *VSphere) SetInformers(informerFactory informers.SharedInformerFactory)
|
|||||||
cache.ResourceEventHandlerFuncs{UpdateFunc: vs.syncNodeZoneLabels},
|
cache.ResourceEventHandlerFuncs{UpdateFunc: vs.syncNodeZoneLabels},
|
||||||
zoneLabelsResyncPeriod,
|
zoneLabelsResyncPeriod,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
nodeLister := informerFactory.Core().V1().Nodes().Lister()
|
||||||
|
vs.nodeManager.SetNodeLister(nodeLister)
|
||||||
klog.V(4).Infof("Node informers in vSphere cloud provider initialized")
|
klog.V(4).Infof("Node informers in vSphere cloud provider initialized")
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user