mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-22 11:21:47 +00:00
Add comments & misc review fixes
Lots of comments describing the heuristics, how it fits together and the limitations. In particular, we can't guarantee correct volume placement if the set of zones is changing between allocating volumes.
This commit is contained in:
parent
9c2566572d
commit
dd94997619
@ -919,7 +919,10 @@ func (aws *AWSCloud) List(filter string) ([]string, error) {
|
||||
// getAllZones retrieves a list of all the zones in which nodes are running
|
||||
// It currently involves querying all instances
|
||||
func (c *AWSCloud) getAllZones() (sets.String, error) {
|
||||
// TODO: Caching, although we currently only use this in volume creation
|
||||
// We don't currently cache this; it is currently used only in volume
|
||||
// creation which is expected to be a comparatively rare occurence.
|
||||
|
||||
// TODO: Caching / expose api.Nodes to the cloud provider?
|
||||
// TODO: We could also query for subnets, I think
|
||||
|
||||
filters := []*ec2.Filter{newEc2Filter("instance-state-name", "running")}
|
||||
|
@ -2070,6 +2070,7 @@ func (gce *GCECloud) List(filter string) ([]string, error) {
|
||||
|
||||
// GetAllZones returns all the zones in which nodes are running
|
||||
func (gce *GCECloud) GetAllZones() (sets.String, error) {
|
||||
// Fast-path for non-multizone
|
||||
if len(gce.managedZones) == 1 {
|
||||
return sets.NewString(gce.managedZones...), nil
|
||||
}
|
||||
@ -2084,6 +2085,15 @@ func (gce *GCECloud) GetAllZones() (sets.String, error) {
|
||||
listCall := gce.service.Instances.List(gce.projectID, zone)
|
||||
|
||||
// No filter: We assume that a zone is either used or unused
|
||||
// We could only consider running nodes (like we do in List above),
|
||||
// but probably if instances are starting we still want to consider them.
|
||||
// I think we should wait until we have a reason to make the
|
||||
// call one way or the other; we generally can't guarantee correct
|
||||
// volume spreading if the set of zones is changing
|
||||
// (and volume spreading is currently only a heuristic).
|
||||
// Long term we want to replace GetAllZones (which primarily supports volume
|
||||
// spreading) with a scheduler policy that is able to see the global state of
|
||||
// volumes and the health of zones.
|
||||
|
||||
// Just a minimal set of fields - we only care about existence
|
||||
listCall = listCall.Fields("items(name)")
|
||||
@ -2258,6 +2268,12 @@ func (gce *GCECloud) GetAutoLabelsForPD(name string, zone string) (map[string]st
|
||||
var disk *gceDisk
|
||||
var err error
|
||||
if zone == "" {
|
||||
// We would like as far as possible to avoid this case,
|
||||
// because GCE doesn't guarantee that volumes are uniquely named per region,
|
||||
// just per zone. However, creation of GCE PDs was originally done only
|
||||
// by name, so we have to continue to support that.
|
||||
// However, wherever possible the zone should be passed (and it is passed
|
||||
// for most cases that we can control, e.g. dynamic volume provisioning)
|
||||
disk, err = gce.getDiskByNameUnknownZone(name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -49,7 +49,7 @@ type VolumeOptions struct {
|
||||
// PV.Name of the appropriate PersistentVolume. Used to generate cloud
|
||||
// volume name.
|
||||
PVName string
|
||||
// PVC.Name of the PersistentVolumeClaim, if we are auto-provisioning.
|
||||
// PVC.Name of the PersistentVolumeClaim; only set during dynamic provisioning.
|
||||
PVCName string
|
||||
// Unique name of Kubernetes cluster.
|
||||
ClusterName string
|
||||
|
@ -194,6 +194,10 @@ func GenerateVolumeName(clusterName, pvName string, maxLength int) string {
|
||||
}
|
||||
|
||||
// ChooseZone implements our heuristics for choosing a zone for volume creation based on the volume name
|
||||
// Volumes are generally round-robin-ed across all active zones, using the hash of the PVC Name.
|
||||
// However, if the PVCName ends with `-<integer>`, we will hash the prefix, and then add the integer to the hash.
|
||||
// This means that a PetSet's volumes (`claimname-petsetname-id`) will spread across available zones,
|
||||
// assuming the id values are consecutive.
|
||||
func ChooseZoneForVolume(zones sets.String, pvcName string) string {
|
||||
// We create the volume in a zone determined by the name
|
||||
// Eventually the scheduler will coordinate placement into an available zone
|
||||
@ -202,7 +206,7 @@ func ChooseZoneForVolume(zones sets.String, pvcName string) string {
|
||||
|
||||
if pvcName == "" {
|
||||
// We should always be called with a name; this shouldn't happen
|
||||
glog.Warningf("No Name defined during volume create; choosing random zone")
|
||||
glog.Warningf("No name defined during volume create; choosing random zone")
|
||||
|
||||
hash = rand.Uint32()
|
||||
} else {
|
||||
@ -230,6 +234,14 @@ func ChooseZoneForVolume(zones sets.String, pvcName string) string {
|
||||
hash = h.Sum32()
|
||||
}
|
||||
|
||||
// Zones.List returns zones in a consistent order (sorted)
|
||||
// We do have a potential failure case where volumes will not be properly spread,
|
||||
// if the set of zones changes during PetSet volume creation. However, this is
|
||||
// probably relatively unlikely because we expect the set of zones to be essentially
|
||||
// static for clusters.
|
||||
// Hopefully we can address this problem if/when we do full scheduler integration of
|
||||
// PVC placement (which could also e.g. avoid putting volumes in overloaded or
|
||||
// unhealthy zones)
|
||||
zoneSlice := zones.List()
|
||||
zone := zoneSlice[(hash+index)%uint32(len(zoneSlice))]
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user