Scheduler support for hugepages

This commit is contained in:
Derek Carr 2017-08-17 14:53:10 -04:00
parent 1ec2a69d9a
commit 41a4e2ccad
6 changed files with 111 additions and 20 deletions

View File

@ -10,6 +10,7 @@ go_library(
srcs = ["qos.go"],
deps = [
"//pkg/api:go_default_library",
"//pkg/api/helper:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
],

View File

@ -22,11 +22,16 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/helper"
)
// supportedComputeResources is the list of compute resources for with QoS is supported.
var supportedQoSComputeResources = sets.NewString(string(api.ResourceCPU), string(api.ResourceMemory))
func isSupportedQoSComputeResource(name api.ResourceName) bool {
return supportedQoSComputeResources.Has(string(name)) || helper.IsHugePageResourceName(name)
}
// GetPodQOS returns the QoS class of a pod.
// A pod is besteffort if none of its containers have specified any requests or limits.
// A pod is guaranteed only when requests and limits are specified for all the containers and they are equal.
@ -39,7 +44,7 @@ func GetPodQOS(pod *api.Pod) api.PodQOSClass {
for _, container := range pod.Spec.Containers {
// process requests
for name, quantity := range container.Resources.Requests {
if !supportedQoSComputeResources.Has(string(name)) {
if !isSupportedQoSComputeResource(name) {
continue
}
if quantity.Cmp(zeroQuantity) == 1 {
@ -55,7 +60,7 @@ func GetPodQOS(pod *api.Pod) api.PodQOSClass {
// process limits
qosLimitsFound := sets.NewString()
for name, quantity := range container.Resources.Limits {
if !supportedQoSComputeResources.Has(string(name)) {
if !isSupportedQoSComputeResource(name) {
continue
}
if quantity.Cmp(zeroQuantity) == 1 {

View File

@ -132,7 +132,7 @@ func TestGetPodQOS(t *testing.T) {
},
{
pod: newPod("burstable-hugepages", []v1.Container{
newContainer("burstable", getResourceList("0", "0"), addResource("hugepages-2Mi", "1Gi", getResourceList("0", "0"))),
newContainer("burstable", addResource("hugepages-2Mi", "1Gi", getResourceList("0", "0")), addResource("hugepages-2Mi", "1Gi", getResourceList("0", "0"))),
}),
expected: v1.PodQOSBurstable,
},
@ -147,7 +147,7 @@ func TestGetPodQOS(t *testing.T) {
k8sv1.Convert_v1_Pod_To_api_Pod(testCase.pod, &pod, nil)
if actual := qos.GetPodQOS(&pod); api.PodQOSClass(testCase.expected) != actual {
t.Errorf("[%d]: invalid qos pod %s, expected: %s, actual: %s", id, testCase.pod.Name, testCase.expected, actual)
t.Errorf("[%d]: conversion invalid qos pod %s, expected: %s, actual: %s", id, testCase.pod.Name, testCase.expected, actual)
}
}
}

View File

@ -509,6 +509,12 @@ func GetResourceRequest(pod *v1.Pod) *schedulercache.Resource {
result.SetExtended(rName, value)
}
}
if v1helper.IsHugePageResourceName(rName) {
value := rQuantity.Value()
if value > result.HugePages[rName] {
result.SetHugePages(rName, value)
}
}
}
}
}
@ -542,7 +548,12 @@ func PodFitsResources(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.No
// We couldn't parse metadata - fallback to computing it.
podRequest = GetResourceRequest(pod)
}
if podRequest.MilliCPU == 0 && podRequest.Memory == 0 && podRequest.NvidiaGPU == 0 && podRequest.EphemeralStorage == 0 && len(podRequest.ExtendedResources) == 0 {
if podRequest.MilliCPU == 0 &&
podRequest.Memory == 0 &&
podRequest.NvidiaGPU == 0 &&
podRequest.EphemeralStorage == 0 &&
len(podRequest.ExtendedResources) == 0 &&
len(podRequest.HugePages) == 0 {
return len(predicateFails) == 0, predicateFails, nil
}
@ -567,6 +578,12 @@ func PodFitsResources(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.No
}
}
for rName, rQuant := range podRequest.HugePages {
if allocatable.HugePages[rName] < rQuant+nodeInfo.RequestedResource().HugePages[rName] {
predicateFails = append(predicateFails, NewInsufficientResourceError(rName, podRequest.HugePages[rName], nodeInfo.RequestedResource().HugePages[rName], allocatable.HugePages[rName]))
}
}
if glog.V(10) {
if len(predicateFails) == 0 {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is

View File

@ -73,11 +73,12 @@ func (pvs FakePersistentVolumeInfo) GetPersistentVolumeInfo(pvID string) (*v1.Pe
}
var (
opaqueResourceA = v1helper.OpaqueIntResourceName("AAA")
opaqueResourceB = v1helper.OpaqueIntResourceName("BBB")
opaqueResourceA = v1helper.OpaqueIntResourceName("AAA")
opaqueResourceB = v1helper.OpaqueIntResourceName("BBB")
hugePageResourceA = v1helper.HugePageResourceName(resource.MustParse("2Mi"))
)
func makeResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA, storage int64) v1.NodeResources {
func makeResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA, storage, hugePageA int64) v1.NodeResources {
return v1.NodeResources{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
@ -86,11 +87,12 @@ func makeResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA, storage int64) v
v1.ResourceNvidiaGPU: *resource.NewQuantity(nvidiaGPUs, resource.DecimalSI),
opaqueResourceA: *resource.NewQuantity(opaqueA, resource.DecimalSI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(storage, resource.BinarySI),
hugePageResourceA: *resource.NewQuantity(hugePageA, resource.BinarySI),
},
}
}
func makeAllocatableResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA, storage int64) v1.ResourceList {
func makeAllocatableResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA, storage, hugePageA int64) v1.ResourceList {
return v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(memory, resource.BinarySI),
@ -98,6 +100,7 @@ func makeAllocatableResources(milliCPU, memory, nvidiaGPUs, pods, opaqueA, stora
v1.ResourceNvidiaGPU: *resource.NewQuantity(nvidiaGPUs, resource.DecimalSI),
opaqueResourceA: *resource.NewQuantity(opaqueA, resource.DecimalSI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(storage, resource.BinarySI),
hugePageResourceA: *resource.NewQuantity(hugePageA, resource.BinarySI),
}
}
@ -348,10 +351,38 @@ func TestPodFitsResources(t *testing.T) {
test: "opaque resource allocatable enforced for unknown resource for init container",
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(opaqueResourceB, 1, 0, 0)},
},
{
pod: newResourcePod(
schedulercache.Resource{MilliCPU: 1, Memory: 1, HugePages: map[v1.ResourceName]int64{hugePageResourceA: 10}}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 0, Memory: 0, HugePages: map[v1.ResourceName]int64{hugePageResourceA: 0}})),
fits: false,
test: "hugepages resource capacity enforced",
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(hugePageResourceA, 10, 0, 5)},
},
{
pod: newResourceInitPod(newResourcePod(schedulercache.Resource{}),
schedulercache.Resource{MilliCPU: 1, Memory: 1, HugePages: map[v1.ResourceName]int64{hugePageResourceA: 10}}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 0, Memory: 0, HugePages: map[v1.ResourceName]int64{hugePageResourceA: 0}})),
fits: false,
test: "hugepages resource capacity enforced for init container",
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(hugePageResourceA, 10, 0, 5)},
},
{
pod: newResourcePod(
schedulercache.Resource{MilliCPU: 1, Memory: 1, HugePages: map[v1.ResourceName]int64{hugePageResourceA: 3}},
schedulercache.Resource{MilliCPU: 1, Memory: 1, HugePages: map[v1.ResourceName]int64{hugePageResourceA: 3}}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 0, Memory: 0, HugePages: map[v1.ResourceName]int64{hugePageResourceA: 2}})),
fits: false,
test: "hugepages resource allocatable enforced for multiple containers",
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(hugePageResourceA, 6, 2, 5)},
},
}
for _, test := range enoughPodsTests {
node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5, 20).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5, 20)}}
node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5, 20, 5).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5, 20, 5)}}
test.nodeInfo.SetNode(&node)
fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo)
if err != nil {
@ -406,7 +437,7 @@ func TestPodFitsResources(t *testing.T) {
},
}
for _, test := range notEnoughPodsTests {
node := v1.Node{Status: v1.NodeStatus{Capacity: v1.ResourceList{}, Allocatable: makeAllocatableResources(10, 20, 0, 1, 0, 0)}}
node := v1.Node{Status: v1.NodeStatus{Capacity: v1.ResourceList{}, Allocatable: makeAllocatableResources(10, 20, 0, 1, 0, 0, 0)}}
test.nodeInfo.SetNode(&node)
fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo)
if err != nil {
@ -464,7 +495,7 @@ func TestPodFitsResources(t *testing.T) {
}
for _, test := range storagePodsTests {
node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5, 20).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5, 20)}}
node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5, 20, 5).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5, 20, 5)}}
test.nodeInfo.SetNode(&node)
fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo)
if err != nil {
@ -1889,7 +1920,7 @@ func TestRunGeneralPredicates(t *testing.T) {
newResourcePod(schedulercache.Resource{MilliCPU: 9, Memory: 19})),
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0)},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0, 0)},
},
fits: true,
wErr: nil,
@ -1901,7 +1932,7 @@ func TestRunGeneralPredicates(t *testing.T) {
newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 19})),
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0)},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0, 0)},
},
fits: false,
wErr: nil,
@ -1915,7 +1946,7 @@ func TestRunGeneralPredicates(t *testing.T) {
pod: &v1.Pod{},
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 9, Memory: 19})),
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0)}},
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0, 0)}},
fits: true,
wErr: nil,
test: "no resources/port/host requested always fits on GPU machine",
@ -1924,7 +1955,7 @@ func TestRunGeneralPredicates(t *testing.T) {
pod: newResourcePod(schedulercache.Resource{MilliCPU: 3, Memory: 1, NvidiaGPU: 1}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 10, NvidiaGPU: 1})),
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0)}},
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0, 0)}},
fits: false,
wErr: nil,
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(v1.ResourceNvidiaGPU, 1, 1, 1)},
@ -1934,7 +1965,7 @@ func TestRunGeneralPredicates(t *testing.T) {
pod: newResourcePod(schedulercache.Resource{MilliCPU: 3, Memory: 1, NvidiaGPU: 1}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 10, NvidiaGPU: 0})),
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0)}},
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0, 0)}},
fits: true,
wErr: nil,
test: "enough GPU resource",
@ -1948,7 +1979,7 @@ func TestRunGeneralPredicates(t *testing.T) {
nodeInfo: schedulercache.NewNodeInfo(),
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0)},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0, 0)},
},
fits: false,
wErr: nil,
@ -1960,7 +1991,7 @@ func TestRunGeneralPredicates(t *testing.T) {
nodeInfo: schedulercache.NewNodeInfo(newPodWithPort(123)),
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0)},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0, 0)},
},
fits: false,
wErr: nil,
@ -3252,7 +3283,7 @@ func TestPodSchedulesOnNodeWithMemoryPressureCondition(t *testing.T) {
ImagePullPolicy: "Always",
// at least one requirement -> burstable pod
Resources: v1.ResourceRequirements{
Requests: makeAllocatableResources(100, 100, 100, 100, 0, 0),
Requests: makeAllocatableResources(100, 100, 100, 100, 0, 0, 0),
},
},
},

View File

@ -72,6 +72,7 @@ type Resource struct {
// explicitly as int, to avoid conversions and improve performance.
AllowedPodNumber int
ExtendedResources map[v1.ResourceName]int64
HugePages map[v1.ResourceName]int64
}
// New creates a Resource from ResourceList
@ -103,6 +104,9 @@ func (r *Resource) Add(rl v1.ResourceList) {
if v1helper.IsExtendedResourceName(rName) {
r.AddExtended(rName, rQuant.Value())
}
if v1helper.IsHugePageResourceName(rName) {
r.AddHugePages(rName, rQuant.Value())
}
}
}
}
@ -118,6 +122,9 @@ func (r *Resource) ResourceList() v1.ResourceList {
for rName, rQuant := range r.ExtendedResources {
result[rName] = *resource.NewQuantity(rQuant, resource.DecimalSI)
}
for rName, rQuant := range r.HugePages {
result[rName] = *resource.NewQuantity(rQuant, resource.BinarySI)
}
return result
}
@ -135,6 +142,12 @@ func (r *Resource) Clone() *Resource {
res.ExtendedResources[k] = v
}
}
if r.HugePages != nil {
res.HugePages = make(map[v1.ResourceName]int64)
for k, v := range r.HugePages {
res.HugePages[k] = v
}
}
return res
}
@ -150,6 +163,18 @@ func (r *Resource) SetExtended(name v1.ResourceName, quantity int64) {
r.ExtendedResources[name] = quantity
}
func (r *Resource) AddHugePages(name v1.ResourceName, quantity int64) {
r.SetHugePages(name, r.HugePages[name]+quantity)
}
func (r *Resource) SetHugePages(name v1.ResourceName, quantity int64) {
// Lazily allocate hugepages resource map.
if r.HugePages == nil {
r.HugePages = map[v1.ResourceName]int64{}
}
r.HugePages[name] = quantity
}
// NewNodeInfo returns a ready to use empty NodeInfo object.
// If any pods are given in arguments, their information will be aggregated in
// the returned object.
@ -307,6 +332,12 @@ func (n *NodeInfo) addPod(pod *v1.Pod) {
for rName, rQuant := range res.ExtendedResources {
n.requestedResource.ExtendedResources[rName] += rQuant
}
if n.requestedResource.HugePages == nil && len(res.HugePages) > 0 {
n.requestedResource.HugePages = map[v1.ResourceName]int64{}
}
for rName, rQuant := range res.HugePages {
n.requestedResource.HugePages[rName] += rQuant
}
n.nonzeroRequest.MilliCPU += non0_cpu
n.nonzeroRequest.Memory += non0_mem
n.pods = append(n.pods, pod)
@ -362,6 +393,12 @@ func (n *NodeInfo) removePod(pod *v1.Pod) error {
for rName, rQuant := range res.ExtendedResources {
n.requestedResource.ExtendedResources[rName] -= rQuant
}
if len(res.HugePages) > 0 && n.requestedResource.HugePages == nil {
n.requestedResource.HugePages = map[v1.ResourceName]int64{}
}
for rName, rQuant := range res.HugePages {
n.requestedResource.HugePages[rName] -= rQuant
}
n.nonzeroRequest.MilliCPU -= non0_cpu
n.nonzeroRequest.Memory -= non0_mem