Merge pull request #56846 from zetaab/fixvolumeattached

Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

fix cinder detach problems

**What this PR does / why we need it**: We have currently huge problems in cinder volume detach. This PR tries to fix these issues.

**Which issue(s) this PR fixes**:
Fixes #50004 
Fixes #57497

**Special notes for your reviewer**: 
**Release note**:

```release-note
openstack cinder detach problem is fixed if nova is shutdowned
```
This commit is contained in:
Kubernetes Submit Queue 2018-01-11 04:24:09 -08:00 committed by GitHub
commit 53a8ac753b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 183 additions and 52 deletions

View File

@ -26,6 +26,7 @@ go_library(
"//pkg/controller:go_default_library",
"//pkg/util/mount:go_default_library",
"//pkg/volume:go_default_library",
"//pkg/volume/util:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/github.com/gophercloud/gophercloud:go_default_library",
"//vendor/github.com/gophercloud/gophercloud/openstack:go_default_library",

View File

@ -319,6 +319,22 @@ func mapNodeNameToServerName(nodeName types.NodeName) string {
return string(nodeName)
}
// getNodeNameByID maps instanceid to types.NodeName
func (os *OpenStack) GetNodeNameByID(instanceID string) (types.NodeName, error) {
client, err := os.NewComputeV2()
var nodeName types.NodeName
if err != nil {
return nodeName, err
}
server, err := servers.Get(client, instanceID).Extract()
if err != nil {
return nodeName, err
}
nodeName = mapServerToNodeName(server)
return nodeName, nil
}
// mapServerToNodeName maps an OpenStack Server to a k8s NodeName
func mapServerToNodeName(server *servers.Server) types.NodeName {
// Node names are always lowercase, and (at least)
@ -346,11 +362,14 @@ func foreachServer(client *gophercloud.ServiceClient, opts servers.ListOptsBuild
return err
}
func getServerByName(client *gophercloud.ServiceClient, name types.NodeName) (*servers.Server, error) {
func getServerByName(client *gophercloud.ServiceClient, name types.NodeName, showOnlyActive bool) (*servers.Server, error) {
opts := servers.ListOpts{
Name: fmt.Sprintf("^%s$", regexp.QuoteMeta(mapNodeNameToServerName(name))),
Status: "ACTIVE",
Name: fmt.Sprintf("^%s$", regexp.QuoteMeta(mapNodeNameToServerName(name))),
}
if showOnlyActive {
opts.Status = "ACTIVE"
}
pager := servers.List(client, opts)
serverList := make([]servers.Server, 0, 1)
@ -432,7 +451,7 @@ func nodeAddresses(srv *servers.Server) ([]v1.NodeAddress, error) {
}
func getAddressesByName(client *gophercloud.ServiceClient, name types.NodeName) ([]v1.NodeAddress, error) {
srv, err := getServerByName(client, name)
srv, err := getServerByName(client, name, true)
if err != nil {
return nil, err
}
@ -582,7 +601,7 @@ func (os *OpenStack) GetZoneByNodeName(nodeName types.NodeName) (cloudprovider.Z
return cloudprovider.Zone{}, err
}
srv, err := getServerByName(compute, nodeName)
srv, err := getServerByName(compute, nodeName, true)
if err != nil {
if err == ErrNotFound {
return cloudprovider.Zone{}, cloudprovider.InstanceNotFound

View File

@ -103,7 +103,7 @@ func (i *Instances) NodeAddressesByProviderID(providerID string) ([]v1.NodeAddre
// ExternalID returns the cloud provider ID of the specified instance (deprecated).
func (i *Instances) ExternalID(name types.NodeName) (string, error) {
srv, err := getServerByName(i.compute, name)
srv, err := getServerByName(i.compute, name, true)
if err != nil {
if err == ErrNotFound {
return "", cloudprovider.InstanceNotFound
@ -151,7 +151,7 @@ func (os *OpenStack) InstanceID() (string, error) {
// InstanceID returns the cloud provider ID of the specified instance.
func (i *Instances) InstanceID(name types.NodeName) (string, error) {
srv, err := getServerByName(i.compute, name)
srv, err := getServerByName(i.compute, name, true)
if err != nil {
if err == ErrNotFound {
return "", cloudprovider.InstanceNotFound
@ -184,7 +184,7 @@ func (i *Instances) InstanceTypeByProviderID(providerID string) (string, error)
// InstanceType returns the type of the specified instance.
func (i *Instances) InstanceType(name types.NodeName) (string, error) {
srv, err := getServerByName(i.compute, name)
srv, err := getServerByName(i.compute, name, true)
if err != nil {
return "", err

View File

@ -551,7 +551,7 @@ func getNodeSecurityGroupIDForLB(compute *gophercloud.ServiceClient, nodes []*v1
for _, node := range nodes {
nodeName := types.NodeName(node.Name)
srv, err := getServerByName(compute, nodeName)
srv, err := getServerByName(compute, nodeName, true)
if err != nil {
return nodeSecurityGroupIDs.List(), err
}

View File

@ -288,7 +288,7 @@ func (r *Routes) DeleteRoute(clusterName string, route *cloudprovider.Route) err
}
func getPortIDByIP(compute *gophercloud.ServiceClient, targetNode types.NodeName, ipAddress string) (string, error) {
srv, err := getServerByName(compute, targetNode)
srv, err := getServerByName(compute, targetNode, true)
if err != nil {
return "", err
}

View File

@ -17,6 +17,7 @@ limitations under the License.
package openstack
import (
"errors"
"fmt"
"io/ioutil"
"path"
@ -25,7 +26,9 @@ import (
"time"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/types"
k8s_volume "k8s.io/kubernetes/pkg/volume"
volumeutil "k8s.io/kubernetes/pkg/volume/util"
"github.com/gophercloud/gophercloud"
volumeexpand "github.com/gophercloud/gophercloud/openstack/blockstorage/extensions/volumeactions"
@ -318,7 +321,17 @@ func (os *OpenStack) AttachDisk(instanceID, volumeID string) (string, error) {
glog.V(4).Infof("Disk %s is already attached to instance %s", volumeID, instanceID)
return volume.ID, nil
}
return "", fmt.Errorf("disk %s is attached to a different instance (%s)", volumeID, volume.AttachedServerId)
nodeName, err := os.GetNodeNameByID(volume.AttachedServerId)
attachErr := fmt.Sprintf("disk %s path %s is attached to a different instance (%s)", volumeID, volume.AttachedDevice, volume.AttachedServerId)
if err != nil {
glog.Error(attachErr)
return "", errors.New(attachErr)
}
// using volume.AttachedDevice may cause problems because cinder does not report device path correctly see issue #33128
devicePath := volume.AttachedDevice
danglingErr := volumeutil.NewDanglingError(attachErr, nodeName, devicePath)
glog.V(2).Infof("Found dangling volume %s attached to node %s", volumeID, nodeName)
return "", danglingErr
}
startTime := time.Now()
@ -578,6 +591,9 @@ func (os *OpenStack) GetAttachmentDiskPath(instanceID, volumeID string) (string,
// DiskIsAttached queries if a volume is attached to a compute instance
func (os *OpenStack) DiskIsAttached(instanceID, volumeID string) (bool, error) {
if instanceID == "" {
glog.Warningf("calling DiskIsAttached with empty instanceid: %s %s", instanceID, volumeID)
}
volume, err := os.getVolume(volumeID)
if err != nil {
return false, err
@ -586,6 +602,29 @@ func (os *OpenStack) DiskIsAttached(instanceID, volumeID string) (bool, error) {
return instanceID == volume.AttachedServerId, nil
}
// DiskIsAttachedByName queries if a volume is attached to a compute instance by name
func (os *OpenStack) DiskIsAttachedByName(nodeName types.NodeName, volumeID string) (bool, string, error) {
cClient, err := os.NewComputeV2()
if err != nil {
return false, "", err
}
srv, err := getServerByName(cClient, nodeName, false)
if err != nil {
if err == ErrNotFound {
// instance not found anymore in cloudprovider, assume that cinder is detached
return false, "", nil
} else {
return false, "", err
}
}
instanceID := "/" + srv.ID
if ind := strings.LastIndex(instanceID, "/"); ind >= 0 {
instanceID = instanceID[(ind + 1):]
}
attached, err := os.DiskIsAttached(instanceID, volumeID)
return attached, instanceID, err
}
// DisksAreAttached queries if a list of volumes are attached to a compute instance
func (os *OpenStack) DisksAreAttached(instanceID string, volumeIDs []string) (map[string]bool, error) {
attached := make(map[string]bool)
@ -600,6 +639,32 @@ func (os *OpenStack) DisksAreAttached(instanceID string, volumeIDs []string) (ma
return attached, nil
}
// DisksAreAttachedByName queries if a list of volumes are attached to a compute instance by name
func (os *OpenStack) DisksAreAttachedByName(nodeName types.NodeName, volumeIDs []string) (map[string]bool, error) {
attached := make(map[string]bool)
cClient, err := os.NewComputeV2()
if err != nil {
return attached, err
}
srv, err := getServerByName(cClient, nodeName, false)
if err != nil {
if err == ErrNotFound {
// instance not found anymore, mark all volumes as detached
for _, volumeID := range volumeIDs {
attached[volumeID] = false
}
return attached, nil
} else {
return attached, err
}
}
instanceID := "/" + srv.ID
if ind := strings.LastIndex(instanceID, "/"); ind >= 0 {
instanceID = instanceID[(ind + 1):]
}
return os.DisksAreAttached(instanceID, volumeIDs)
}
// diskIsUsed returns true a disk is attached to any node.
func (os *OpenStack) diskIsUsed(volumeID string) (bool, error) {
volume, err := os.getVolume(volumeID)

View File

@ -27,7 +27,6 @@ import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/kubernetes/pkg/cloudprovider"
"k8s.io/kubernetes/pkg/util/mount"
"k8s.io/kubernetes/pkg/volume"
volumeutil "k8s.io/kubernetes/pkg/volume/util"
@ -187,23 +186,7 @@ func (attacher *cinderDiskAttacher) VolumesAreAttached(specs []*volume.Spec, nod
volumeSpecMap[volumeSource.VolumeID] = spec
}
instanceID, err := attacher.nodeInstanceID(nodeName)
if err != nil {
if err == cloudprovider.InstanceNotFound {
// If node doesn't exist, OpenStack Nova will assume the volumes are not attached to it.
// Mark the volumes as detached and return false without error.
glog.Warningf("VolumesAreAttached: node %q does not exist.", nodeName)
for spec := range volumesAttachedCheck {
volumesAttachedCheck[spec] = false
}
return volumesAttachedCheck, nil
}
return volumesAttachedCheck, err
}
attachedResult, err := attacher.cinderProvider.DisksAreAttached(instanceID, volumeIDList)
attachedResult, err := attacher.cinderProvider.DisksAreAttachedByName(nodeName, volumeIDList)
if err != nil {
// Log error and continue with attach
glog.Errorf(
@ -381,20 +364,10 @@ func (detacher *cinderDiskDetacher) waitDiskDetached(instanceID, volumeID string
func (detacher *cinderDiskDetacher) Detach(volumeName string, nodeName types.NodeName) error {
volumeID := path.Base(volumeName)
instances, res := detacher.cinderProvider.Instances()
if !res {
return fmt.Errorf("failed to list openstack instances")
}
instanceID, err := instances.InstanceID(nodeName)
if ind := strings.LastIndex(instanceID, "/"); ind >= 0 {
instanceID = instanceID[(ind + 1):]
}
if err := detacher.waitOperationFinished(volumeID); err != nil {
return err
}
attached, err := detacher.cinderProvider.DiskIsAttached(instanceID, volumeID)
attached, instanceID, err := detacher.cinderProvider.DiskIsAttachedByName(nodeName, volumeID)
if err != nil {
// Log error and continue with detach
glog.Errorf(

View File

@ -132,7 +132,7 @@ func TestAttachDetach(t *testing.T) {
name: "Attach_Positive",
instanceID: instanceID,
operationPending: operationPendingCall{volumeID, false, done, nil},
diskIsAttached: diskIsAttachedCall{instanceID, volumeID, false, nil},
diskIsAttached: diskIsAttachedCall{instanceID, nodeName, volumeID, false, nil},
attach: attachCall{instanceID, volumeID, "", nil},
diskPath: diskPathCall{instanceID, volumeID, "/dev/sda", nil},
test: func(testcase *testcase) (string, error) {
@ -147,7 +147,7 @@ func TestAttachDetach(t *testing.T) {
name: "Attach_Positive_AlreadyAttached",
instanceID: instanceID,
operationPending: operationPendingCall{volumeID, false, done, nil},
diskIsAttached: diskIsAttachedCall{instanceID, volumeID, true, nil},
diskIsAttached: diskIsAttachedCall{instanceID, nodeName, volumeID, true, nil},
diskPath: diskPathCall{instanceID, volumeID, "/dev/sda", nil},
test: func(testcase *testcase) (string, error) {
attacher := newAttacher(testcase)
@ -173,7 +173,7 @@ func TestAttachDetach(t *testing.T) {
name: "Attach_Negative",
instanceID: instanceID,
operationPending: operationPendingCall{volumeID, false, done, nil},
diskIsAttached: diskIsAttachedCall{instanceID, volumeID, false, diskCheckError},
diskIsAttached: diskIsAttachedCall{instanceID, nodeName, volumeID, false, diskCheckError},
attach: attachCall{instanceID, volumeID, "/dev/sda", attachError},
test: func(testcase *testcase) (string, error) {
attacher := newAttacher(testcase)
@ -187,7 +187,7 @@ func TestAttachDetach(t *testing.T) {
name: "Attach_Negative_DiskPatchFails",
instanceID: instanceID,
operationPending: operationPendingCall{volumeID, false, done, nil},
diskIsAttached: diskIsAttachedCall{instanceID, volumeID, false, nil},
diskIsAttached: diskIsAttachedCall{instanceID, nodeName, volumeID, false, nil},
attach: attachCall{instanceID, volumeID, "", nil},
diskPath: diskPathCall{instanceID, volumeID, "", diskPathError},
test: func(testcase *testcase) (string, error) {
@ -201,7 +201,7 @@ func TestAttachDetach(t *testing.T) {
{
name: "VolumesAreAttached_Positive",
instanceID: instanceID,
disksAreAttached: disksAreAttachedCall{instanceID, []string{volumeID}, map[string]bool{volumeID: true}, nil},
disksAreAttached: disksAreAttachedCall{instanceID, nodeName, []string{volumeID}, map[string]bool{volumeID: true}, nil},
test: func(testcase *testcase) (string, error) {
attacher := newAttacher(testcase)
attachments, err := attacher.VolumesAreAttached([]*volume.Spec{spec}, nodeName)
@ -214,7 +214,7 @@ func TestAttachDetach(t *testing.T) {
{
name: "VolumesAreAttached_Negative",
instanceID: instanceID,
disksAreAttached: disksAreAttachedCall{instanceID, []string{volumeID}, map[string]bool{volumeID: false}, nil},
disksAreAttached: disksAreAttachedCall{instanceID, nodeName, []string{volumeID}, map[string]bool{volumeID: false}, nil},
test: func(testcase *testcase) (string, error) {
attacher := newAttacher(testcase)
attachments, err := attacher.VolumesAreAttached([]*volume.Spec{spec}, nodeName)
@ -227,7 +227,7 @@ func TestAttachDetach(t *testing.T) {
{
name: "VolumesAreAttached_CinderFailed",
instanceID: instanceID,
disksAreAttached: disksAreAttachedCall{instanceID, []string{volumeID}, nil, disksCheckError},
disksAreAttached: disksAreAttachedCall{instanceID, nodeName, []string{volumeID}, nil, disksCheckError},
test: func(testcase *testcase) (string, error) {
attacher := newAttacher(testcase)
attachments, err := attacher.VolumesAreAttached([]*volume.Spec{spec}, nodeName)
@ -242,7 +242,7 @@ func TestAttachDetach(t *testing.T) {
name: "Detach_Positive",
instanceID: instanceID,
operationPending: operationPendingCall{volumeID, false, done, nil},
diskIsAttached: diskIsAttachedCall{instanceID, volumeID, true, nil},
diskIsAttached: diskIsAttachedCall{instanceID, nodeName, volumeID, true, nil},
detach: detachCall{instanceID, volumeID, nil},
test: func(testcase *testcase) (string, error) {
detacher := newDetacher(testcase)
@ -255,7 +255,7 @@ func TestAttachDetach(t *testing.T) {
name: "Detach_Positive_AlreadyDetached",
instanceID: instanceID,
operationPending: operationPendingCall{volumeID, false, done, nil},
diskIsAttached: diskIsAttachedCall{instanceID, volumeID, false, nil},
diskIsAttached: diskIsAttachedCall{instanceID, nodeName, volumeID, false, nil},
test: func(testcase *testcase) (string, error) {
detacher := newDetacher(testcase)
return "", detacher.Detach(volumeID, nodeName)
@ -267,7 +267,7 @@ func TestAttachDetach(t *testing.T) {
name: "Detach_Positive_CheckFails",
instanceID: instanceID,
operationPending: operationPendingCall{volumeID, false, done, nil},
diskIsAttached: diskIsAttachedCall{instanceID, volumeID, false, diskCheckError},
diskIsAttached: diskIsAttachedCall{instanceID, nodeName, volumeID, false, diskCheckError},
detach: detachCall{instanceID, volumeID, nil},
test: func(testcase *testcase) (string, error) {
detacher := newDetacher(testcase)
@ -280,7 +280,7 @@ func TestAttachDetach(t *testing.T) {
name: "Detach_Negative",
instanceID: instanceID,
operationPending: operationPendingCall{volumeID, false, done, nil},
diskIsAttached: diskIsAttachedCall{instanceID, volumeID, false, diskCheckError},
diskIsAttached: diskIsAttachedCall{instanceID, nodeName, volumeID, false, diskCheckError},
detach: detachCall{instanceID, volumeID, detachError},
test: func(testcase *testcase) (string, error) {
detacher := newDetacher(testcase)
@ -426,6 +426,7 @@ type operationPendingCall struct {
type diskIsAttachedCall struct {
instanceID string
nodeName types.NodeName
volumeID string
isAttached bool
ret error
@ -440,6 +441,7 @@ type diskPathCall struct {
type disksAreAttachedCall struct {
instanceID string
nodeName types.NodeName
volumeIDs []string
areAttached map[string]bool
ret error
@ -572,6 +574,46 @@ func (testcase *testcase) ShouldTrustDevicePath() bool {
return true
}
func (testcase *testcase) DiskIsAttachedByName(nodeName types.NodeName, volumeID string) (bool, string, error) {
expected := &testcase.diskIsAttached
instanceID := expected.instanceID
// If testcase call DetachDisk*, return false
if *testcase.attachOrDetach == detachStatus {
return false, instanceID, nil
}
// If testcase call AttachDisk*, return true
if *testcase.attachOrDetach == attachStatus {
return true, instanceID, nil
}
if expected.nodeName != nodeName {
testcase.t.Errorf("Unexpected DiskIsAttachedByName call: expected nodename %s, got %s", expected.nodeName, nodeName)
return false, instanceID, errors.New("Unexpected DiskIsAttachedByName call: wrong nodename")
}
if expected.volumeID == "" && expected.instanceID == "" {
// testcase.diskIsAttached looks uninitialized, test did not expect to
// call DiskIsAttached
testcase.t.Errorf("Unexpected DiskIsAttachedByName call!")
return false, instanceID, errors.New("Unexpected DiskIsAttachedByName call!")
}
if expected.volumeID != volumeID {
testcase.t.Errorf("Unexpected DiskIsAttachedByName call: expected volumeID %s, got %s", expected.volumeID, volumeID)
return false, instanceID, errors.New("Unexpected DiskIsAttachedByName call: wrong volumeID")
}
if expected.instanceID != instanceID {
testcase.t.Errorf("Unexpected DiskIsAttachedByName call: expected instanceID %s, got %s", expected.instanceID, instanceID)
return false, instanceID, errors.New("Unexpected DiskIsAttachedByName call: wrong instanceID")
}
glog.V(4).Infof("DiskIsAttachedByName call: %s, %s, returning %v, %v", volumeID, nodeName, expected.isAttached, expected.instanceID, expected.ret)
return expected.isAttached, expected.instanceID, expected.ret
}
func (testcase *testcase) CreateVolume(name string, size int, vtype, availability string, tags *map[string]string) (string, string, bool, error) {
return "", "", false, errors.New("Not implemented")
}
@ -626,6 +668,36 @@ func (testcase *testcase) DisksAreAttached(instanceID string, volumeIDs []string
return expected.areAttached, expected.ret
}
func (testcase *testcase) DisksAreAttachedByName(nodeName types.NodeName, volumeIDs []string) (map[string]bool, error) {
expected := &testcase.disksAreAttached
areAttached := make(map[string]bool)
instanceID := expected.instanceID
if expected.nodeName != nodeName {
testcase.t.Errorf("Unexpected DisksAreAttachedByName call: expected nodeName %s, got %s", expected.nodeName, nodeName)
return areAttached, errors.New("Unexpected DisksAreAttachedByName call: wrong nodename")
}
if len(expected.volumeIDs) == 0 && expected.instanceID == "" {
// testcase.volumeIDs looks uninitialized, test did not expect to call DisksAreAttached
testcase.t.Errorf("Unexpected DisksAreAttachedByName call!")
return areAttached, errors.New("Unexpected DisksAreAttachedByName call")
}
if !reflect.DeepEqual(expected.volumeIDs, volumeIDs) {
testcase.t.Errorf("Unexpected DisksAreAttachedByName call: expected volumeIDs %v, got %v", expected.volumeIDs, volumeIDs)
return areAttached, errors.New("Unexpected DisksAreAttachedByName call: wrong volumeID")
}
if expected.instanceID != instanceID {
testcase.t.Errorf("Unexpected DisksAreAttachedByName call: expected instanceID %s, got %s", expected.instanceID, instanceID)
return areAttached, errors.New("Unexpected DisksAreAttachedByName call: wrong instanceID")
}
glog.V(4).Infof("DisksAreAttachedByName call: %v, %s, returning %v, %v", volumeIDs, nodeName, expected.areAttached, expected.ret)
return expected.areAttached, expected.ret
}
// Implementation of fake cloudprovider.Instances
type instances struct {
instanceID string

View File

@ -52,7 +52,8 @@ type CinderProvider interface {
GetAttachmentDiskPath(instanceID, volumeID string) (string, error)
OperationPending(diskName string) (bool, string, error)
DiskIsAttached(instanceID, volumeID string) (bool, error)
DisksAreAttached(instanceID string, volumeIDs []string) (map[string]bool, error)
DiskIsAttachedByName(nodeName types.NodeName, volumeID string) (bool, string, error)
DisksAreAttachedByName(nodeName types.NodeName, volumeIDs []string) (map[string]bool, error)
ShouldTrustDevicePath() bool
Instances() (cloudprovider.Instances, bool)
ExpandVolume(volumeID string, oldSize resource.Quantity, newSize resource.Quantity) (resource.Quantity, error)