Merge pull request #55558 from gnufied/implement-node-taint-for-attaching-volumes

Automatic merge from submit-queue (batch tested with PRs 50457, 55558, 53483, 55731, 52842). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Apply taint when a volume is stuck in attaching state

When a volume is stuck in attaching state for too long on a node, it is best to make node unschedulable so as any other pod may not be scheduled on it.

Fixes https://github.com/kubernetes/kubernetes/issues/55502 

```release-note
AWS: Apply taint to a node if volumes being attached to it are stuck in attaching state
```
This commit is contained in:
Kubernetes Submit Queue 2017-11-18 11:36:16 -08:00 committed by GitHub
commit 1b3d880725
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 55 additions and 1 deletions

View File

@ -49,9 +49,14 @@ go_library(
"//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
"//vendor/gopkg.in/gcfg.v1:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
"//vendor/k8s.io/client-go/kubernetes/scheme:go_default_library",
"//vendor/k8s.io/client-go/kubernetes/typed/core/v1:go_default_library",
"//vendor/k8s.io/client-go/tools/record:go_default_library",
],
)

View File

@ -41,13 +41,18 @@ import (
"github.com/aws/aws-sdk-go/service/kms"
"github.com/golang/glog"
"github.com/prometheus/client_golang/prometheus"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/record"
"path"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes/scheme"
v1core "k8s.io/client-go/kubernetes/typed/core/v1"
"k8s.io/kubernetes/pkg/api/v1/service"
"k8s.io/kubernetes/pkg/cloudprovider"
"k8s.io/kubernetes/pkg/controller"
@ -149,6 +154,12 @@ const ServiceAnnotationLoadBalancerBEProtocol = "service.beta.kubernetes.io/aws-
// For example: "Key1=Val1,Key2=Val2,KeyNoVal1=,KeyNoVal2"
const ServiceAnnotationLoadBalancerAdditionalTags = "service.beta.kubernetes.io/aws-load-balancer-additional-resource-tags"
// Event key when a volume is stuck on attaching state when being attached to a volume
const volumeAttachmentStuck = "VolumeAttachmentStuck"
// Indicates that a node has volumes stuck in attaching state and hence it is not fit for scheduling more pods
const nodeWithImpairedVolumes = "NodeWithImpairedVolumes"
const (
// volumeAttachmentConsecutiveErrorLimit is the number of consecutive errors we will ignore when waiting for a volume to attach/detach
volumeAttachmentStatusConsecutiveErrorLimit = 10
@ -405,6 +416,11 @@ type Cloud struct {
instanceCache instanceCache
clientBuilder controller.ControllerClientBuilder
kubeClient clientset.Interface
eventBroadcaster record.EventBroadcaster
eventRecorder record.EventRecorder
// We keep an active list of devices we have assigned but not yet
// attached, to avoid a race condition where we assign a device mapping
// and then get a second request before we attach the volume
@ -964,7 +980,14 @@ func newAWSCloud(config io.Reader, awsServices Services) (*Cloud, error) {
}
// Initialize passes a Kubernetes clientBuilder interface to the cloud provider
func (c *Cloud) Initialize(clientBuilder controller.ControllerClientBuilder) {}
func (c *Cloud) Initialize(clientBuilder controller.ControllerClientBuilder) {
c.clientBuilder = clientBuilder
c.kubeClient = clientBuilder.ClientOrDie("cloud-provider")
c.eventBroadcaster = record.NewBroadcaster()
c.eventBroadcaster.StartLogging(glog.Infof)
c.eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: v1core.New(c.kubeClient.CoreV1().RESTClient()).Events("")})
c.eventRecorder = c.eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "aws-cloudprovider"})
}
// Clusters returns the list of clusters.
func (c *Cloud) Clusters() (cloudprovider.Clusters, bool) {
@ -1532,6 +1555,28 @@ func (d *awsDisk) describeVolume() (*ec2.Volume, error) {
return volumes[0], nil
}
// applyUnSchedulableTaint applies a unschedulable taint to a node after verifying
// if node has become unusable because of volumes getting stuck in attaching state.
func (c *Cloud) applyUnSchedulableTaint(nodeName types.NodeName, reason string) {
node, fetchErr := c.kubeClient.CoreV1().Nodes().Get(string(nodeName), metav1.GetOptions{})
if fetchErr != nil {
glog.Errorf("Error fetching node %s with %v", nodeName, fetchErr)
return
}
taint := &v1.Taint{
Key: nodeWithImpairedVolumes,
Value: "true",
Effect: v1.TaintEffectNoSchedule,
}
err := controller.AddOrUpdateTaintOnNode(c.kubeClient, string(nodeName), taint)
if err != nil {
glog.Errorf("Error applying taint to node %s with error %v", nodeName, err)
return
}
c.eventRecorder.Eventf(node, v1.EventTypeWarning, volumeAttachmentStuck, reason)
}
// waitForAttachmentStatus polls until the attachment status is the expected value
// On success, it returns the last attachment state.
func (d *awsDisk) waitForAttachmentStatus(status string) (*ec2.VolumeAttachment, error) {
@ -1751,7 +1796,11 @@ func (c *Cloud) AttachDisk(diskName KubernetesVolumeID, nodeName types.NodeName,
}
attachment, err := disk.waitForAttachmentStatus("attached")
if err != nil {
if err == wait.ErrWaitTimeout {
c.applyUnSchedulableTaint(nodeName, "Volume stuck in attaching state - node needs reboot to fix impaired state.")
}
return "", err
}