Merge pull request #41597 from rootfs/rbd-fencing2

Automatic merge from submit-queue (batch tested with PRs 41597, 42185, 42075, 42178, 41705)

force rbd image unlock if the image is not used

**What this PR does / why we need it**:
Ceph RBD image could be locked if the host that holds the lock is down. In such case, the image cannot be used by other Pods. 

The fix is to detect the orphaned locks and force unlock.
**Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: fixes #31790

**Special notes for your reviewer**:

Note, previously, RBD volume plugin maps the image, mount it, and create a lock on the image. Since the proposed fix uses `rbd status` output to determine if the image is being used, the sequence has to change to: rbd lock checking (through `rbd lock list`), mapping check (through `rbd status`), forced unlock if necessary (through `rbd lock rm`), image lock, image mapping, and mount.




**Release note**:

```release-note
force unlock rbd image if the image is not used
```
This commit is contained in:
Kubernetes Submit Queue 2017-03-01 00:36:01 -08:00 committed by GitHub
commit 4e46ae1d3b

View File

@ -43,6 +43,7 @@ import (
const (
imageWatcherStr = "watcher="
kubeLockMagic = "kubelet_lock_magic_"
)
// search /sys/bus for rbd device that matches given pool and image
@ -114,8 +115,15 @@ func (util *RBDUtil) rbdLock(b rbdMounter, lock bool) error {
} else {
secret_opt = []string{"-k", b.Keyring}
}
if len(b.adminId) == 0 {
b.adminId = b.Id
}
if len(b.adminSecret) == 0 {
b.adminSecret = b.Secret
}
// construct lock id using host name and a magic prefix
lock_id := "kubelet_lock_magic_" + node.GetHostname("")
lock_id := kubeLockMagic + node.GetHostname("")
l := len(b.Mon)
// avoid mount storm, pick a host randomly
@ -130,7 +138,7 @@ func (util *RBDUtil) rbdLock(b rbdMounter, lock bool) error {
cmd, err = b.plugin.execCommand("rbd",
append([]string{"lock", "list", b.Image, "--pool", b.Pool, "--id", b.Id, "-m", mon}, secret_opt...))
output = string(cmd)
glog.Infof("lock list output %q", output)
if err != nil {
continue
}
@ -142,6 +150,23 @@ func (util *RBDUtil) rbdLock(b rbdMounter, lock bool) error {
glog.V(1).Infof("rbd: lock already held for %s", lock_id)
return nil
}
// clean up orphaned lock if no watcher on the image
used, statusErr := util.rbdStatus(&b)
if statusErr == nil && !used {
re := regexp.MustCompile("client.* " + kubeLockMagic + ".*")
locks := re.FindAllStringSubmatch(output, -1)
for _, v := range locks {
if len(v) > 0 {
lockInfo := strings.Split(v[0], " ")
if len(lockInfo) > 2 {
cmd, err = b.plugin.execCommand("rbd",
append([]string{"lock", "remove", b.Image, lockInfo[1], lockInfo[0], "--pool", b.Pool, "--id", b.Id, "-m", mon}, secret_opt...))
glog.Infof("remove orphaned locker %s from client %s: err %v, output: %s", lockInfo[1], lockInfo[0], err, string(cmd))
}
}
}
}
// hold a lock: rbd lock add
cmd, err = b.plugin.execCommand("rbd",
append([]string{"lock", "add", b.Image, lock_id, "--pool", b.Pool, "--id", b.Id, "-m", mon}, secret_opt...))
@ -220,6 +245,20 @@ func (util *RBDUtil) AttachDisk(b rbdMounter) error {
var err error
var output []byte
// create mount point
globalPDPath := b.manager.MakeGlobalPDName(*b.rbd)
notMnt, err := b.mounter.IsLikelyNotMountPoint(globalPDPath)
// in the first time, the path shouldn't exist and IsLikelyNotMountPoint is expected to get NotExist
if err != nil && !os.IsNotExist(err) {
return fmt.Errorf("rbd: %s failed to check mountpoint", globalPDPath)
}
if !notMnt {
return nil
}
if err = os.MkdirAll(globalPDPath, 0750); err != nil {
return fmt.Errorf("rbd: failed to mkdir %s, error", globalPDPath)
}
devicePath, found := waitForPath(b.Pool, b.Image, 1)
if !found {
// modprobe
@ -227,6 +266,18 @@ func (util *RBDUtil) AttachDisk(b rbdMounter) error {
if err != nil {
return fmt.Errorf("rbd: failed to modprobe rbd error:%v", err)
}
// fence off other mappers
if err = util.fencing(b); err != nil {
return fmt.Errorf("rbd: image %s is locked by other nodes", b.Image)
}
// rbd lock remove needs ceph and image config
// but kubelet doesn't get them from apiserver during teardown
// so persit rbd config so upon disk detach, rbd lock can be removed
// since rbd json is persisted in the same local directory that is used as rbd mountpoint later,
// the json file remains invisible during rbd mount and thus won't be removed accidentally.
util.persistRBD(b, globalPDPath)
// rbd map
l := len(b.Mon)
// avoid mount storm, pick a host randomly
@ -255,34 +306,8 @@ func (util *RBDUtil) AttachDisk(b rbdMounter) error {
return errors.New("Could not map image: Timeout after 10s")
}
}
// mount it
globalPDPath := b.manager.MakeGlobalPDName(*b.rbd)
notMnt, err := b.mounter.IsLikelyNotMountPoint(globalPDPath)
// in the first time, the path shouldn't exist and IsLikelyNotMountPoint is expected to get NotExist
if err != nil && !os.IsNotExist(err) {
return fmt.Errorf("rbd: %s failed to check mountpoint", globalPDPath)
}
if !notMnt {
return nil
}
if err := os.MkdirAll(globalPDPath, 0750); err != nil {
return fmt.Errorf("rbd: failed to mkdir %s, error", globalPDPath)
}
// fence off other mappers
if err := util.fencing(b); err != nil {
// rbd unmap before exit
b.plugin.execCommand("rbd", []string{"unmap", devicePath})
return fmt.Errorf("rbd: image %s is locked by other nodes", b.Image)
}
// rbd lock remove needs ceph and image config
// but kubelet doesn't get them from apiserver during teardown
// so persit rbd config so upon disk detach, rbd lock can be removed
// since rbd json is persisted in the same local directory that is used as rbd mountpoint later,
// the json file remains invisible during rbd mount and thus won't be removed accidentally.
util.persistRBD(b, globalPDPath)
if err = b.mounter.FormatAndMount(devicePath, globalPDPath, b.fsType, nil); err != nil {
err = fmt.Errorf("rbd: failed to mount rbd volume %s [%s] to %s, error %v", devicePath, b.fsType, globalPDPath, err)
}