diff --git a/pkg/controller/statefulset/stateful_set_utils.go b/pkg/controller/statefulset/stateful_set_utils.go index 69e13c6c4b8..59811c1e6d4 100644 --- a/pkg/controller/statefulset/stateful_set_utils.go +++ b/pkg/controller/statefulset/stateful_set_utils.go @@ -91,6 +91,7 @@ func getPodName(set *apps.StatefulSet, ordinal int) string { // getPersistentVolumeClaimName getsthe name of PersistentVolumeClaim for a Pod with an ordinal index of ordinal. claim // must be a PersistentVolumeClaim from set's VolumeClaims template. func getPersistentVolumeClaimName(set *apps.StatefulSet, claim *v1.PersistentVolumeClaim, ordinal int) string { + // NOTE: This name format is used by the heuristics for zone spreading in ChooseZoneForVolume return fmt.Sprintf("%s-%s-%d", claim.Name, set.Name, ordinal) } diff --git a/pkg/volume/BUILD b/pkg/volume/BUILD index 11fd1df09be..3ed82d67d74 100644 --- a/pkg/volume/BUILD +++ b/pkg/volume/BUILD @@ -62,6 +62,7 @@ go_test( "//vendor:k8s.io/apimachinery/pkg/api/resource", "//vendor:k8s.io/apimachinery/pkg/apis/meta/v1", "//vendor:k8s.io/apimachinery/pkg/types", + "//vendor:k8s.io/apimachinery/pkg/util/sets", "//vendor:k8s.io/apimachinery/pkg/watch", "//vendor:k8s.io/client-go/util/testing", ], diff --git a/pkg/volume/util.go b/pkg/volume/util.go index d3a061e1713..adc18adfd66 100644 --- a/pkg/volume/util.go +++ b/pkg/volume/util.go @@ -300,16 +300,34 @@ func ChooseZoneForVolume(zones sets.String, pvcName string) string { // Heuristic to make sure that volumes in a StatefulSet are spread across zones // StatefulSet PVCs are (currently) named ClaimName-StatefulSetName-Id, - // where Id is an integer index + // where Id is an integer index. + // Note though that if a StatefulSet pod has multiple claims, we need them to be + // in the same zone, because otherwise the pod will be unable to mount both volumes, + // and will be unschedulable. So we hash _only_ the "StatefulSetName" portion when + // it looks like `ClaimName-StatefulSetName-Id`. + // We continue to round-robin volume names that look like `Name-Id` also; this is a useful + // feature for users that are creating statefulset-like functionality without using statefulsets. lastDash := strings.LastIndexByte(pvcName, '-') if lastDash != -1 { - petIDString := pvcName[lastDash+1:] - petID, err := strconv.ParseUint(petIDString, 10, 32) + statefulsetIDString := pvcName[lastDash+1:] + statefulsetID, err := strconv.ParseUint(statefulsetIDString, 10, 32) if err == nil { - // Offset by the pet id, so we round-robin across zones - index = uint32(petID) - // We still hash the volume name, but only the base + // Offset by the statefulsetID, so we round-robin across zones + index = uint32(statefulsetID) + // We still hash the volume name, but only the prefix hashString = pvcName[:lastDash] + + // In the special case where it looks like `ClaimName-StatefulSetName-Id`, + // hash only the StatefulSetName, so that different claims on the same StatefulSet + // member end up in the same zone. + // Note that StatefulSetName (and ClaimName) might themselves both have dashes. + // We actually just take the portion after the final - of ClaimName-StatefulSetName. + // For our purposes it doesn't much matter (just suboptimal spreading). + lastDash := strings.LastIndexByte(hashString, '-') + if lastDash != -1 { + hashString = hashString[lastDash+1:] + } + glog.V(2).Infof("Detected StatefulSet-style volume name %q; index=%d", pvcName, index) } } diff --git a/pkg/volume/util_test.go b/pkg/volume/util_test.go index 924a5e0151d..0f85b3fad00 100644 --- a/pkg/volume/util_test.go +++ b/pkg/volume/util_test.go @@ -18,12 +18,14 @@ package volume import ( "fmt" + "hash/fnv" "strings" "testing" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/watch" "k8s.io/kubernetes/pkg/api" "k8s.io/kubernetes/pkg/api/v1" @@ -304,3 +306,151 @@ func TestGenerateVolumeName(t *testing.T) { } } + +func checkFnv32(t *testing.T, s string, expected int) { + h := fnv.New32() + h.Write([]byte(s)) + h.Sum32() + + if int(h.Sum32()) != expected { + t.Fatalf("hash of %q was %v, expected %v", s, h.Sum32(), expected) + } +} + +func TestChooseZoneForVolume(t *testing.T) { + checkFnv32(t, "henley", 1180403676) + // 1180403676 mod 3 == 0, so the offset from "henley" is 0, which makes it easier to verify this by inspection + + // A few others + checkFnv32(t, "henley-", 2652299129) + checkFnv32(t, "henley-a", 1459735322) + checkFnv32(t, "", 2166136261) + + tests := []struct { + Zones []string + VolumeName string + Expected string + }{ + // Test for PVC names that don't have a dash + { + Zones: []string{"a", "b", "c"}, + VolumeName: "henley", + Expected: "a", // hash("henley") == 0 + }, + // Tests for PVC names that end in - number, but don't look like statefulset PVCs + { + Zones: []string{"a", "b", "c"}, + VolumeName: "henley-0", + Expected: "a", // hash("henley") == 0 + }, + { + Zones: []string{"a", "b", "c"}, + VolumeName: "henley-1", + Expected: "b", // hash("henley") + 1 == 1 + }, + { + Zones: []string{"a", "b", "c"}, + VolumeName: "henley-2", + Expected: "c", // hash("henley") + 2 == 2 + }, + { + Zones: []string{"a", "b", "c"}, + VolumeName: "henley-3", + Expected: "a", // hash("henley") + 3 == 3 === 0 mod 3 + }, + { + Zones: []string{"a", "b", "c"}, + VolumeName: "henley-4", + Expected: "b", // hash("henley") + 4 == 4 === 1 mod 3 + }, + // Tests for PVC names that are edge cases + { + Zones: []string{"a", "b", "c"}, + VolumeName: "henley-", + Expected: "c", // hash("henley-") = 2652299129 === 2 mod 3 + }, + { + Zones: []string{"a", "b", "c"}, + VolumeName: "henley-a", + Expected: "c", // hash("henley-a") = 1459735322 === 2 mod 3 + }, + { + Zones: []string{"a", "b", "c"}, + VolumeName: "medium--1", + Expected: "c", // hash("") + 1 == 2166136261 + 1 === 2 mod 3 + }, + // Tests for PVC names for simple StatefulSet cases + { + Zones: []string{"a", "b", "c"}, + VolumeName: "medium-henley-1", + Expected: "b", // hash("henley") + 1 == 1 + }, + { + Zones: []string{"a", "b", "c"}, + VolumeName: "loud-henley-1", + Expected: "b", // hash("henley") + 1 == 1 + }, + { + Zones: []string{"a", "b", "c"}, + VolumeName: "quiet-henley-2", + Expected: "c", // hash("henley") + 2 == 2 + }, + { + Zones: []string{"a", "b", "c"}, + VolumeName: "medium-henley-2", + Expected: "c", // hash("henley") + 2 == 2 + }, + { + Zones: []string{"a", "b", "c"}, + VolumeName: "medium-henley-3", + Expected: "a", // hash("henley") + 3 == 3 === 0 mod 3 + }, + { + Zones: []string{"a", "b", "c"}, + VolumeName: "medium-henley-4", + Expected: "b", // hash("henley") + 4 == 4 === 1 mod 3 + }, + // Tests for statefulsets (or claims) with dashes in the names + { + Zones: []string{"a", "b", "c"}, + VolumeName: "medium-alpha-henley-2", + Expected: "c", // hash("henley") + 2 == 2 + }, + { + Zones: []string{"a", "b", "c"}, + VolumeName: "medium-beta-henley-3", + Expected: "a", // hash("henley") + 3 == 3 === 0 mod 3 + }, + { + Zones: []string{"a", "b", "c"}, + VolumeName: "medium-gamma-henley-4", + Expected: "b", // hash("henley") + 4 == 4 === 1 mod 3 + }, + // Tests for statefulsets name ending in - + { + Zones: []string{"a", "b", "c"}, + VolumeName: "medium-henley--2", + Expected: "a", // hash("") + 2 == 0 mod 3 + }, + { + Zones: []string{"a", "b", "c"}, + VolumeName: "medium-henley--3", + Expected: "b", // hash("") + 3 == 1 mod 3 + }, + { + Zones: []string{"a", "b", "c"}, + VolumeName: "medium-henley--4", + Expected: "c", // hash("") + 4 == 2 mod 3 + }, + } + + for _, test := range tests { + zonesSet := sets.NewString(test.Zones...) + + actual := ChooseZoneForVolume(zonesSet, test.VolumeName) + + for actual != test.Expected { + t.Errorf("Test %v failed, expected zone %q, actual %q", test, test.Expected, actual) + } + } +}