diff --git a/pkg/volume/gce_pd/BUILD b/pkg/volume/gce_pd/BUILD index 91770ff306d..56ad3673b49 100644 --- a/pkg/volume/gce_pd/BUILD +++ b/pkg/volume/gce_pd/BUILD @@ -21,6 +21,7 @@ go_library( "//pkg/cloudprovider/providers/gce:go_default_library", "//pkg/features:go_default_library", "//pkg/kubelet/apis:go_default_library", + "//pkg/util/file:go_default_library", "//pkg/util/mount:go_default_library", "//pkg/util/strings:go_default_library", "//pkg/volume:go_default_library", @@ -43,6 +44,7 @@ go_test( "attacher_test.go", "gce_pd_block_test.go", "gce_pd_test.go", + "gce_util_test.go", ], embed = [":go_default_library"], deps = [ diff --git a/pkg/volume/gce_pd/attacher.go b/pkg/volume/gce_pd/attacher.go index 8a6256b617f..6b73edbc663 100644 --- a/pkg/volume/gce_pd/attacher.go +++ b/pkg/volume/gce_pd/attacher.go @@ -157,7 +157,7 @@ func (attacher *gcePersistentDiskAttacher) WaitForAttach(spec *volume.Spec, devi select { case <-ticker.C: glog.V(5).Infof("Checking GCE PD %q is attached.", pdName) - path, err := verifyDevicePath(devicePaths, sdBeforeSet) + path, err := verifyDevicePath(devicePaths, sdBeforeSet, pdName) if err != nil { // Log error, if any, and continue checking periodically. See issue #11321 glog.Errorf("Error verifying GCE PD (%q) is attached: %v", pdName, err) diff --git a/pkg/volume/gce_pd/gce_util.go b/pkg/volume/gce_pd/gce_util.go index 8774d77c8fc..ee9dedc8255 100644 --- a/pkg/volume/gce_pd/gce_util.go +++ b/pkg/volume/gce_pd/gce_util.go @@ -20,6 +20,7 @@ import ( "fmt" "path" "path/filepath" + "regexp" "strings" "time" @@ -31,6 +32,7 @@ import ( gcecloud "k8s.io/kubernetes/pkg/cloudprovider/providers/gce" "k8s.io/kubernetes/pkg/features" kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis" + utilfile "k8s.io/kubernetes/pkg/util/file" "k8s.io/kubernetes/pkg/volume" volumeutil "k8s.io/kubernetes/pkg/volume/util" "k8s.io/utils/exec" @@ -50,12 +52,19 @@ const ( // Replication type constants must be lower case. replicationTypeNone = "none" replicationTypeRegionalPD = "regional-pd" + + // scsi_id output should be in the form of: + // 0Google PersistentDisk + scsiPattern = `^0Google\s+PersistentDisk\s+([\S]+)\s*$` ) -// These variables are modified only in unit tests and should be constant -// otherwise. var ( + // errorSleepDuration is modified only in unit tests and should be constant + // otherwise. errorSleepDuration time.Duration = 5 * time.Second + + // regex to parse scsi_id output and extract the serial + scsiRegex = regexp.MustCompile(scsiPattern) ) type GCEDiskUtil struct{} @@ -261,9 +270,11 @@ func createRegionalPD( } // Returns the first path that exists, or empty string if none exist. -func verifyDevicePath(devicePaths []string, sdBeforeSet sets.String) (string, error) { +func verifyDevicePath(devicePaths []string, sdBeforeSet sets.String, diskName string) (string, error) { if err := udevadmChangeToNewDrives(sdBeforeSet); err != nil { - // udevadm errors should not block disk detachment, log and continue + // It's possible udevadm was called on other disks so it should not block this + // call. If it did fail on this disk, then the devicePath will either + // not exist or be wrong. If it's wrong, then the scsi_id check below will fail. glog.Errorf("udevadmChangeToNewDrives failed with: %v", err) } @@ -271,6 +282,22 @@ func verifyDevicePath(devicePaths []string, sdBeforeSet sets.String) (string, er if pathExists, err := volumeutil.PathExists(path); err != nil { return "", fmt.Errorf("Error checking if path exists: %v", err) } else if pathExists { + // validate that the path actually resolves to the correct disk + serial, err := getScsiSerial(path, diskName) + if err != nil { + return "", fmt.Errorf("failed to get scsi serial %v", err) + } + if serial != diskName { + // The device link is not pointing to the correct device + // Trigger udev on this device to try to fix the link + if udevErr := udevadmChangeToDrive(path); udevErr != nil { + glog.Errorf("udevadmChangeToDrive %q failed with: %v", path, err) + } + + // Return error to retry WaitForAttach and verifyDevicePath + return "", fmt.Errorf("scsi_id serial %q for device %q doesn't match disk %q", serial, path, diskName) + } + // The device link is correct return path, nil } } @@ -278,22 +305,38 @@ func verifyDevicePath(devicePaths []string, sdBeforeSet sets.String) (string, er return "", nil } -// Returns the first path that exists, or empty string if none exist. -func verifyAllPathsRemoved(devicePaths []string) (bool, error) { - allPathsRemoved := true - for _, path := range devicePaths { - if err := udevadmChangeToDrive(path); err != nil { - // udevadm errors should not block disk detachment, log and continue - glog.Errorf("%v", err) - } - if exists, err := volumeutil.PathExists(path); err != nil { - return false, fmt.Errorf("Error checking if path exists: %v", err) - } else { - allPathsRemoved = allPathsRemoved && !exists - } +// Calls scsi_id on the given devicePath to get the serial number reported by that device. +func getScsiSerial(devicePath, diskName string) (string, error) { + exists, err := utilfile.FileExists("/lib/udev/scsi_id") + if err != nil { + return "", fmt.Errorf("failed to check scsi_id existence: %v", err) } - return allPathsRemoved, nil + if !exists { + glog.V(6).Infof("scsi_id doesn't exist; skipping check for %v", devicePath) + return diskName, nil + } + + out, err := exec.New().Command( + "/lib/udev/scsi_id", + "--page=0x83", + "--whitelisted", + fmt.Sprintf("--device=%v", devicePath)).CombinedOutput() + if err != nil { + return "", fmt.Errorf("scsi_id failed for device %q with %v.", devicePath, err) + } + + return parseScsiSerial(string(out)) +} + +// Parse the output returned by scsi_id and extract the serial number +func parseScsiSerial(output string) (string, error) { + substrings := scsiRegex.FindStringSubmatch(output) + if substrings == nil { + return "", fmt.Errorf("scsi_id output cannot be parsed: %q", output) + } + + return substrings[1], nil } // Returns list of all /dev/disk/by-id/* paths for given PD. diff --git a/pkg/volume/gce_pd/gce_util_test.go b/pkg/volume/gce_pd/gce_util_test.go new file mode 100644 index 00000000000..91b33cd2a75 --- /dev/null +++ b/pkg/volume/gce_pd/gce_util_test.go @@ -0,0 +1,62 @@ +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package gce_pd + +import "testing" + +func TestParseScsiSerial(t *testing.T) { + cases := []struct { + name string + output string + diskName string + expectErr bool + }{ + { + name: "valid", + output: "0Google PersistentDisk test-disk", + diskName: "test-disk", + }, + { + name: "valid with newline", + output: "0Google PersistentDisk test-disk\n", + diskName: "test-disk", + }, + { + name: "invalid prefix", + output: "00Google PersistentDisk test-disk", + expectErr: true, + }, + { + name: "invalid suffix", + output: "0Google PersistentDisk test-disk more", + expectErr: true, + }, + } + + for _, test := range cases { + serial, err := parseScsiSerial(test.output) + if err != nil && !test.expectErr { + t.Errorf("test %v failed: %v", test.name, err) + } + if err == nil && test.expectErr { + t.Errorf("test %q failed: got success", test.name) + } + if serial != test.diskName { + t.Errorf("test %v failed: expected serial %q, got %q", test.name, test.diskName, serial) + } + } +} diff --git a/test/e2e/storage/persistent_volumes.go b/test/e2e/storage/persistent_volumes.go index 36e166d6c26..acf126d4eff 100644 --- a/test/e2e/storage/persistent_volumes.go +++ b/test/e2e/storage/persistent_volumes.go @@ -300,4 +300,53 @@ var _ = utils.SIGDescribe("PersistentVolumes", func() { }) }) }) + + Describe("Default StorageClass", func() { + Context("pods that use multiple volumes", func() { + It("should be reschedulable", func() { + // Only run on providers with default storageclass + framework.SkipUnlessProviderIs("openstack", "gce", "gke", "vsphere", "azure") + + numVols := 4 + pvcs := []*v1.PersistentVolumeClaim{} + + By("Creating PVCs") + for i := 0; i < numVols; i++ { + pvc = framework.MakePersistentVolumeClaim(framework.PersistentVolumeClaimConfig{}, ns) + pvc, err = framework.CreatePVC(c, ns, pvc) + Expect(err).NotTo(HaveOccurred()) + pvcs = append(pvcs, pvc) + } + + By("Waiting for PVCs to be bound") + for _, pvc := range pvcs { + framework.Logf("Created PVC %q", pvc.Name) + framework.ExpectNoError(framework.WaitForPersistentVolumeClaimPhase(v1.ClaimBound, c, ns, pvc.Name, framework.Poll, framework.ClaimProvisionTimeout)) + } + + By("Creating a pod and initializing data") + writeCmd := "true" + for i, pvc := range pvcs { + // mountPath is /mnt/volume + writeCmd += fmt.Sprintf("&& touch /mnt/volume%v/%v", i+1, pvc.Name) + } + pod := framework.MakePod(ns, nil, pvcs, false, writeCmd) + pod, err = c.CoreV1().Pods(ns).Create(pod) + Expect(err).NotTo(HaveOccurred()) + framework.ExpectNoError(framework.WaitForPodSuccessInNamespace(c, pod.Name, ns)) + + By("Recreating the pod and validating the data") + framework.ExpectNoError(framework.DeletePodWithWait(f, c, pod)) + validateCmd := "true" + for i, pvc := range pvcs { + // mountPath is /mnt/volume + validateCmd += fmt.Sprintf("&& test -f /mnt/volume%v/%v", i+1, pvc.Name) + } + pod = framework.MakePod(ns, nil, pvcs, false, validateCmd) + pod, err = c.CoreV1().Pods(ns).Create(pod) + Expect(err).NotTo(HaveOccurred()) + framework.ExpectNoError(framework.WaitForPodSuccessInNamespace(c, pod.Name, ns)) + }) + }) + }) })