Merge pull request #83567 from jsafrane/fix-aws-stale-describe

Fix attachment of just detached AWS volumes
This commit is contained in:
Kubernetes Prow Robot 2019-10-12 05:28:37 -07:00 committed by GitHub
commit 1fcd46c250
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -47,7 +47,7 @@ import (
"github.com/aws/aws-sdk-go/service/kms" "github.com/aws/aws-sdk-go/service/kms"
"github.com/aws/aws-sdk-go/service/sts" "github.com/aws/aws-sdk-go/service/sts"
"gopkg.in/gcfg.v1" "gopkg.in/gcfg.v1"
"k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/klog" "k8s.io/klog"
"k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/api/resource"
@ -63,7 +63,7 @@ import (
"k8s.io/client-go/pkg/version" "k8s.io/client-go/pkg/version"
"k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/record" "k8s.io/client-go/tools/record"
"k8s.io/cloud-provider" cloudprovider "k8s.io/cloud-provider"
nodehelpers "k8s.io/cloud-provider/node/helpers" nodehelpers "k8s.io/cloud-provider/node/helpers"
servicehelpers "k8s.io/cloud-provider/service/helpers" servicehelpers "k8s.io/cloud-provider/service/helpers"
cloudvolume "k8s.io/cloud-provider/volume" cloudvolume "k8s.io/cloud-provider/volume"
@ -1861,6 +1861,7 @@ func (c *Cloud) getMountDevice(
assign bool) (assigned mountDevice, alreadyAttached bool, err error) { assign bool) (assigned mountDevice, alreadyAttached bool, err error) {
deviceMappings := map[mountDevice]EBSVolumeID{} deviceMappings := map[mountDevice]EBSVolumeID{}
volumeStatus := map[EBSVolumeID]string{} // for better logging of volume status
for _, blockDevice := range info.BlockDeviceMappings { for _, blockDevice := range info.BlockDeviceMappings {
name := aws.StringValue(blockDevice.DeviceName) name := aws.StringValue(blockDevice.DeviceName)
if strings.HasPrefix(name, "/dev/sd") { if strings.HasPrefix(name, "/dev/sd") {
@ -1872,6 +1873,10 @@ func (c *Cloud) getMountDevice(
if len(name) < 1 || len(name) > 2 { if len(name) < 1 || len(name) > 2 {
klog.Warningf("Unexpected EBS DeviceName: %q", aws.StringValue(blockDevice.DeviceName)) klog.Warningf("Unexpected EBS DeviceName: %q", aws.StringValue(blockDevice.DeviceName))
} }
if blockDevice.Ebs != nil && blockDevice.Ebs.VolumeId != nil {
volumeStatus[EBSVolumeID(*blockDevice.Ebs.VolumeId)] = aws.StringValue(blockDevice.Ebs.Status)
}
deviceMappings[mountDevice(name)] = EBSVolumeID(aws.StringValue(blockDevice.Ebs.VolumeId)) deviceMappings[mountDevice(name)] = EBSVolumeID(aws.StringValue(blockDevice.Ebs.VolumeId))
} }
@ -1889,7 +1894,15 @@ func (c *Cloud) getMountDevice(
for mountDevice, mappingVolumeID := range deviceMappings { for mountDevice, mappingVolumeID := range deviceMappings {
if volumeID == mappingVolumeID { if volumeID == mappingVolumeID {
if assign { if assign {
klog.Warningf("Got assignment call for already-assigned volume: %s@%s", mountDevice, mappingVolumeID) // DescribeInstances shows the volume as attached / detaching, while Kubernetes
// cloud provider thinks it's detached.
// This can happened when the volume has just been detached from the same node
// and AWS API returns stale data in this DescribeInstances ("eventual consistency").
// Fail the attachment and let A/D controller retry in a while, hoping that
// AWS API returns consistent result next time (i.e. the volume is detached).
status := volumeStatus[mappingVolumeID]
klog.Warningf("Got assignment call for already-assigned volume: %s@%s, volume status: %s", mountDevice, mappingVolumeID, status)
return mountDevice, false, fmt.Errorf("volume is still being detached from the node")
} }
return mountDevice, true, nil return mountDevice, true, nil
} }