Merge pull request #40910 from justinsb/fix_35695

Automatic merge from submit-queue (batch tested with PRs 41667, 41820, 40910, 41645, 41361)

Allow multiple mounts in StatefulSet volume zone placement

We have some heuristics that ensure that volumes (and hence stateful set
pods) are spread out across zones.  Sadly they forgot to account for
multiple mounts.  This PR updates the heuristic to ignore the mount name
when we see something that looks like a statefulset volume, thus
ensuring that multiple mounts end up in the same AZ.

Fix #35695

```release-note
Fix zone placement heuristics so that multiple mounts in a StatefulSet pod are created in the same zone
```
This commit is contained in:
Kubernetes Submit Queue 2017-02-23 20:57:29 -08:00 committed by GitHub
commit b5d010d6a3
4 changed files with 176 additions and 6 deletions

View File

@ -91,6 +91,7 @@ func getPodName(set *apps.StatefulSet, ordinal int) string {
// getPersistentVolumeClaimName getsthe name of PersistentVolumeClaim for a Pod with an ordinal index of ordinal. claim
// must be a PersistentVolumeClaim from set's VolumeClaims template.
func getPersistentVolumeClaimName(set *apps.StatefulSet, claim *v1.PersistentVolumeClaim, ordinal int) string {
// NOTE: This name format is used by the heuristics for zone spreading in ChooseZoneForVolume
return fmt.Sprintf("%s-%s-%d", claim.Name, set.Name, ordinal)
}

View File

@ -62,6 +62,7 @@ go_test(
"//vendor:k8s.io/apimachinery/pkg/api/resource",
"//vendor:k8s.io/apimachinery/pkg/apis/meta/v1",
"//vendor:k8s.io/apimachinery/pkg/types",
"//vendor:k8s.io/apimachinery/pkg/util/sets",
"//vendor:k8s.io/apimachinery/pkg/watch",
"//vendor:k8s.io/client-go/util/testing",
],

View File

@ -300,16 +300,34 @@ func ChooseZoneForVolume(zones sets.String, pvcName string) string {
// Heuristic to make sure that volumes in a StatefulSet are spread across zones
// StatefulSet PVCs are (currently) named ClaimName-StatefulSetName-Id,
// where Id is an integer index
// where Id is an integer index.
// Note though that if a StatefulSet pod has multiple claims, we need them to be
// in the same zone, because otherwise the pod will be unable to mount both volumes,
// and will be unschedulable. So we hash _only_ the "StatefulSetName" portion when
// it looks like `ClaimName-StatefulSetName-Id`.
// We continue to round-robin volume names that look like `Name-Id` also; this is a useful
// feature for users that are creating statefulset-like functionality without using statefulsets.
lastDash := strings.LastIndexByte(pvcName, '-')
if lastDash != -1 {
petIDString := pvcName[lastDash+1:]
petID, err := strconv.ParseUint(petIDString, 10, 32)
statefulsetIDString := pvcName[lastDash+1:]
statefulsetID, err := strconv.ParseUint(statefulsetIDString, 10, 32)
if err == nil {
// Offset by the pet id, so we round-robin across zones
index = uint32(petID)
// We still hash the volume name, but only the base
// Offset by the statefulsetID, so we round-robin across zones
index = uint32(statefulsetID)
// We still hash the volume name, but only the prefix
hashString = pvcName[:lastDash]
// In the special case where it looks like `ClaimName-StatefulSetName-Id`,
// hash only the StatefulSetName, so that different claims on the same StatefulSet
// member end up in the same zone.
// Note that StatefulSetName (and ClaimName) might themselves both have dashes.
// We actually just take the portion after the final - of ClaimName-StatefulSetName.
// For our purposes it doesn't much matter (just suboptimal spreading).
lastDash := strings.LastIndexByte(hashString, '-')
if lastDash != -1 {
hashString = hashString[lastDash+1:]
}
glog.V(2).Infof("Detected StatefulSet-style volume name %q; index=%d", pvcName, index)
}
}

View File

@ -18,12 +18,14 @@ package volume
import (
"fmt"
"hash/fnv"
"strings"
"testing"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/v1"
@ -304,3 +306,151 @@ func TestGenerateVolumeName(t *testing.T) {
}
}
func checkFnv32(t *testing.T, s string, expected int) {
h := fnv.New32()
h.Write([]byte(s))
h.Sum32()
if int(h.Sum32()) != expected {
t.Fatalf("hash of %q was %v, expected %v", s, h.Sum32(), expected)
}
}
func TestChooseZoneForVolume(t *testing.T) {
checkFnv32(t, "henley", 1180403676)
// 1180403676 mod 3 == 0, so the offset from "henley" is 0, which makes it easier to verify this by inspection
// A few others
checkFnv32(t, "henley-", 2652299129)
checkFnv32(t, "henley-a", 1459735322)
checkFnv32(t, "", 2166136261)
tests := []struct {
Zones []string
VolumeName string
Expected string
}{
// Test for PVC names that don't have a dash
{
Zones: []string{"a", "b", "c"},
VolumeName: "henley",
Expected: "a", // hash("henley") == 0
},
// Tests for PVC names that end in - number, but don't look like statefulset PVCs
{
Zones: []string{"a", "b", "c"},
VolumeName: "henley-0",
Expected: "a", // hash("henley") == 0
},
{
Zones: []string{"a", "b", "c"},
VolumeName: "henley-1",
Expected: "b", // hash("henley") + 1 == 1
},
{
Zones: []string{"a", "b", "c"},
VolumeName: "henley-2",
Expected: "c", // hash("henley") + 2 == 2
},
{
Zones: []string{"a", "b", "c"},
VolumeName: "henley-3",
Expected: "a", // hash("henley") + 3 == 3 === 0 mod 3
},
{
Zones: []string{"a", "b", "c"},
VolumeName: "henley-4",
Expected: "b", // hash("henley") + 4 == 4 === 1 mod 3
},
// Tests for PVC names that are edge cases
{
Zones: []string{"a", "b", "c"},
VolumeName: "henley-",
Expected: "c", // hash("henley-") = 2652299129 === 2 mod 3
},
{
Zones: []string{"a", "b", "c"},
VolumeName: "henley-a",
Expected: "c", // hash("henley-a") = 1459735322 === 2 mod 3
},
{
Zones: []string{"a", "b", "c"},
VolumeName: "medium--1",
Expected: "c", // hash("") + 1 == 2166136261 + 1 === 2 mod 3
},
// Tests for PVC names for simple StatefulSet cases
{
Zones: []string{"a", "b", "c"},
VolumeName: "medium-henley-1",
Expected: "b", // hash("henley") + 1 == 1
},
{
Zones: []string{"a", "b", "c"},
VolumeName: "loud-henley-1",
Expected: "b", // hash("henley") + 1 == 1
},
{
Zones: []string{"a", "b", "c"},
VolumeName: "quiet-henley-2",
Expected: "c", // hash("henley") + 2 == 2
},
{
Zones: []string{"a", "b", "c"},
VolumeName: "medium-henley-2",
Expected: "c", // hash("henley") + 2 == 2
},
{
Zones: []string{"a", "b", "c"},
VolumeName: "medium-henley-3",
Expected: "a", // hash("henley") + 3 == 3 === 0 mod 3
},
{
Zones: []string{"a", "b", "c"},
VolumeName: "medium-henley-4",
Expected: "b", // hash("henley") + 4 == 4 === 1 mod 3
},
// Tests for statefulsets (or claims) with dashes in the names
{
Zones: []string{"a", "b", "c"},
VolumeName: "medium-alpha-henley-2",
Expected: "c", // hash("henley") + 2 == 2
},
{
Zones: []string{"a", "b", "c"},
VolumeName: "medium-beta-henley-3",
Expected: "a", // hash("henley") + 3 == 3 === 0 mod 3
},
{
Zones: []string{"a", "b", "c"},
VolumeName: "medium-gamma-henley-4",
Expected: "b", // hash("henley") + 4 == 4 === 1 mod 3
},
// Tests for statefulsets name ending in -
{
Zones: []string{"a", "b", "c"},
VolumeName: "medium-henley--2",
Expected: "a", // hash("") + 2 == 0 mod 3
},
{
Zones: []string{"a", "b", "c"},
VolumeName: "medium-henley--3",
Expected: "b", // hash("") + 3 == 1 mod 3
},
{
Zones: []string{"a", "b", "c"},
VolumeName: "medium-henley--4",
Expected: "c", // hash("") + 4 == 2 mod 3
},
}
for _, test := range tests {
zonesSet := sets.NewString(test.Zones...)
actual := ChooseZoneForVolume(zonesSet, test.VolumeName)
for actual != test.Expected {
t.Errorf("Test %v failed, expected zone %q, actual %q", test, test.Expected, actual)
}
}
}