From bba343d066dd37b3ee525965ceb50b63d7b425e3 Mon Sep 17 00:00:00 2001 From: Justin Santa Barbara Date: Fri, 3 Feb 2017 00:57:20 -0500 Subject: [PATCH] Allow multiple mounts in StatefulSet volume zone placement We have some heuristics that ensure that volumes (and hence stateful set pods) are spread out across zones. Sadly they forgot to account for multiple mounts. This PR updates the heuristic to ignore the mount name when we see something that looks like a statefulset volume, thus ensuring that multiple mounts end up in the same AZ. Fix #35695 --- .../statefulset/stateful_set_utils.go | 1 + pkg/volume/BUILD | 1 + pkg/volume/util.go | 30 +++- pkg/volume/util_test.go | 150 ++++++++++++++++++ 4 files changed, 176 insertions(+), 6 deletions(-) diff --git a/pkg/controller/statefulset/stateful_set_utils.go b/pkg/controller/statefulset/stateful_set_utils.go index c8c3ec1ed90..170f10780df 100644 --- a/pkg/controller/statefulset/stateful_set_utils.go +++ b/pkg/controller/statefulset/stateful_set_utils.go @@ -91,6 +91,7 @@ func getPodName(set *apps.StatefulSet, ordinal int) string { // getPersistentVolumeClaimName getsthe name of PersistentVolumeClaim for a Pod with an ordinal index of ordinal. claim // must be a PersistentVolumeClaim from set's VolumeClaims template. func getPersistentVolumeClaimName(set *apps.StatefulSet, claim *v1.PersistentVolumeClaim, ordinal int) string { + // NOTE: This name format is used by the heuristics for zone spreading in ChooseZoneForVolume return fmt.Sprintf("%s-%s-%d", claim.Name, set.Name, ordinal) } diff --git a/pkg/volume/BUILD b/pkg/volume/BUILD index 9b114c49f19..c9cb3a01808 100644 --- a/pkg/volume/BUILD +++ b/pkg/volume/BUILD @@ -62,6 +62,7 @@ go_test( "//vendor:k8s.io/apimachinery/pkg/api/resource", "//vendor:k8s.io/apimachinery/pkg/apis/meta/v1", "//vendor:k8s.io/apimachinery/pkg/types", + "//vendor:k8s.io/apimachinery/pkg/util/sets", "//vendor:k8s.io/apimachinery/pkg/watch", "//vendor:k8s.io/client-go/util/testing", ], diff --git a/pkg/volume/util.go b/pkg/volume/util.go index d3a061e1713..adc18adfd66 100644 --- a/pkg/volume/util.go +++ b/pkg/volume/util.go @@ -300,16 +300,34 @@ func ChooseZoneForVolume(zones sets.String, pvcName string) string { // Heuristic to make sure that volumes in a StatefulSet are spread across zones // StatefulSet PVCs are (currently) named ClaimName-StatefulSetName-Id, - // where Id is an integer index + // where Id is an integer index. + // Note though that if a StatefulSet pod has multiple claims, we need them to be + // in the same zone, because otherwise the pod will be unable to mount both volumes, + // and will be unschedulable. So we hash _only_ the "StatefulSetName" portion when + // it looks like `ClaimName-StatefulSetName-Id`. + // We continue to round-robin volume names that look like `Name-Id` also; this is a useful + // feature for users that are creating statefulset-like functionality without using statefulsets. lastDash := strings.LastIndexByte(pvcName, '-') if lastDash != -1 { - petIDString := pvcName[lastDash+1:] - petID, err := strconv.ParseUint(petIDString, 10, 32) + statefulsetIDString := pvcName[lastDash+1:] + statefulsetID, err := strconv.ParseUint(statefulsetIDString, 10, 32) if err == nil { - // Offset by the pet id, so we round-robin across zones - index = uint32(petID) - // We still hash the volume name, but only the base + // Offset by the statefulsetID, so we round-robin across zones + index = uint32(statefulsetID) + // We still hash the volume name, but only the prefix hashString = pvcName[:lastDash] + + // In the special case where it looks like `ClaimName-StatefulSetName-Id`, + // hash only the StatefulSetName, so that different claims on the same StatefulSet + // member end up in the same zone. + // Note that StatefulSetName (and ClaimName) might themselves both have dashes. + // We actually just take the portion after the final - of ClaimName-StatefulSetName. + // For our purposes it doesn't much matter (just suboptimal spreading). + lastDash := strings.LastIndexByte(hashString, '-') + if lastDash != -1 { + hashString = hashString[lastDash+1:] + } + glog.V(2).Infof("Detected StatefulSet-style volume name %q; index=%d", pvcName, index) } } diff --git a/pkg/volume/util_test.go b/pkg/volume/util_test.go index 924a5e0151d..0f85b3fad00 100644 --- a/pkg/volume/util_test.go +++ b/pkg/volume/util_test.go @@ -18,12 +18,14 @@ package volume import ( "fmt" + "hash/fnv" "strings" "testing" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/watch" "k8s.io/kubernetes/pkg/api" "k8s.io/kubernetes/pkg/api/v1" @@ -304,3 +306,151 @@ func TestGenerateVolumeName(t *testing.T) { } } + +func checkFnv32(t *testing.T, s string, expected int) { + h := fnv.New32() + h.Write([]byte(s)) + h.Sum32() + + if int(h.Sum32()) != expected { + t.Fatalf("hash of %q was %v, expected %v", s, h.Sum32(), expected) + } +} + +func TestChooseZoneForVolume(t *testing.T) { + checkFnv32(t, "henley", 1180403676) + // 1180403676 mod 3 == 0, so the offset from "henley" is 0, which makes it easier to verify this by inspection + + // A few others + checkFnv32(t, "henley-", 2652299129) + checkFnv32(t, "henley-a", 1459735322) + checkFnv32(t, "", 2166136261) + + tests := []struct { + Zones []string + VolumeName string + Expected string + }{ + // Test for PVC names that don't have a dash + { + Zones: []string{"a", "b", "c"}, + VolumeName: "henley", + Expected: "a", // hash("henley") == 0 + }, + // Tests for PVC names that end in - number, but don't look like statefulset PVCs + { + Zones: []string{"a", "b", "c"}, + VolumeName: "henley-0", + Expected: "a", // hash("henley") == 0 + }, + { + Zones: []string{"a", "b", "c"}, + VolumeName: "henley-1", + Expected: "b", // hash("henley") + 1 == 1 + }, + { + Zones: []string{"a", "b", "c"}, + VolumeName: "henley-2", + Expected: "c", // hash("henley") + 2 == 2 + }, + { + Zones: []string{"a", "b", "c"}, + VolumeName: "henley-3", + Expected: "a", // hash("henley") + 3 == 3 === 0 mod 3 + }, + { + Zones: []string{"a", "b", "c"}, + VolumeName: "henley-4", + Expected: "b", // hash("henley") + 4 == 4 === 1 mod 3 + }, + // Tests for PVC names that are edge cases + { + Zones: []string{"a", "b", "c"}, + VolumeName: "henley-", + Expected: "c", // hash("henley-") = 2652299129 === 2 mod 3 + }, + { + Zones: []string{"a", "b", "c"}, + VolumeName: "henley-a", + Expected: "c", // hash("henley-a") = 1459735322 === 2 mod 3 + }, + { + Zones: []string{"a", "b", "c"}, + VolumeName: "medium--1", + Expected: "c", // hash("") + 1 == 2166136261 + 1 === 2 mod 3 + }, + // Tests for PVC names for simple StatefulSet cases + { + Zones: []string{"a", "b", "c"}, + VolumeName: "medium-henley-1", + Expected: "b", // hash("henley") + 1 == 1 + }, + { + Zones: []string{"a", "b", "c"}, + VolumeName: "loud-henley-1", + Expected: "b", // hash("henley") + 1 == 1 + }, + { + Zones: []string{"a", "b", "c"}, + VolumeName: "quiet-henley-2", + Expected: "c", // hash("henley") + 2 == 2 + }, + { + Zones: []string{"a", "b", "c"}, + VolumeName: "medium-henley-2", + Expected: "c", // hash("henley") + 2 == 2 + }, + { + Zones: []string{"a", "b", "c"}, + VolumeName: "medium-henley-3", + Expected: "a", // hash("henley") + 3 == 3 === 0 mod 3 + }, + { + Zones: []string{"a", "b", "c"}, + VolumeName: "medium-henley-4", + Expected: "b", // hash("henley") + 4 == 4 === 1 mod 3 + }, + // Tests for statefulsets (or claims) with dashes in the names + { + Zones: []string{"a", "b", "c"}, + VolumeName: "medium-alpha-henley-2", + Expected: "c", // hash("henley") + 2 == 2 + }, + { + Zones: []string{"a", "b", "c"}, + VolumeName: "medium-beta-henley-3", + Expected: "a", // hash("henley") + 3 == 3 === 0 mod 3 + }, + { + Zones: []string{"a", "b", "c"}, + VolumeName: "medium-gamma-henley-4", + Expected: "b", // hash("henley") + 4 == 4 === 1 mod 3 + }, + // Tests for statefulsets name ending in - + { + Zones: []string{"a", "b", "c"}, + VolumeName: "medium-henley--2", + Expected: "a", // hash("") + 2 == 0 mod 3 + }, + { + Zones: []string{"a", "b", "c"}, + VolumeName: "medium-henley--3", + Expected: "b", // hash("") + 3 == 1 mod 3 + }, + { + Zones: []string{"a", "b", "c"}, + VolumeName: "medium-henley--4", + Expected: "c", // hash("") + 4 == 2 mod 3 + }, + } + + for _, test := range tests { + zonesSet := sets.NewString(test.Zones...) + + actual := ChooseZoneForVolume(zonesSet, test.VolumeName) + + for actual != test.Expected { + t.Errorf("Test %v failed, expected zone %q, actual %q", test, test.Expected, actual) + } + } +}