Update kubelet to use per-pod QoS policy.

Signed-off-by: Vishnu kannan <vishnuk@google.com>
This commit is contained in:
Vishnu kannan 2016-05-03 17:30:09 -07:00
parent f48c83600c
commit f884180deb
5 changed files with 228 additions and 165 deletions

View File

@ -1412,7 +1412,7 @@ func containerAndPodFromLabels(inspect *dockertypes.ContainerJSON) (pod *api.Pod
return
}
func (dm *DockerManager) applyOOMScoreAdj(container *api.Container, containerInfo *dockertypes.ContainerJSON) error {
func (dm *DockerManager) applyOOMScoreAdj(pod *api.Pod, container *api.Container, containerInfo *dockertypes.ContainerJSON) error {
if containerInfo.State.Pid == 0 {
// Container exited. We cannot do anything about it. Ignore this error.
glog.V(2).Infof("Failed to apply OOM score adj on container %q with ID %q. Init process does not exist.", containerInfo.Name, containerInfo.ID)
@ -1428,7 +1428,7 @@ func (dm *DockerManager) applyOOMScoreAdj(container *api.Container, containerInf
}
return err
}
oomScoreAdj := dm.calculateOomScoreAdj(container)
oomScoreAdj := dm.calculateOomScoreAdj(pod, container)
if err = dm.oomAdjuster.ApplyOOMScoreAdjContainer(cgroupName, oomScoreAdj, 5); err != nil {
if err == os.ErrNotExist {
// Container exited. We cannot do anything about it. Ignore this error.
@ -1464,7 +1464,7 @@ func (dm *DockerManager) runContainerInPod(pod *api.Pod, container *api.Containe
utsMode = namespaceModeHost
}
oomScoreAdj := dm.calculateOomScoreAdj(container)
oomScoreAdj := dm.calculateOomScoreAdj(pod, container)
id, err := dm.runContainer(pod, container, opts, ref, netMode, ipcMode, utsMode, pidMode, restartCount, oomScoreAdj)
if err != nil {
@ -1503,7 +1503,7 @@ func (dm *DockerManager) runContainerInPod(pod *api.Pod, container *api.Containe
// Check if current docker version is higher than 1.10. Otherwise, we have to apply OOMScoreAdj instead of using docker API.
// TODO: Remove this logic after we stop supporting docker version < 1.10.
if err := dm.applyOOMScoreAdjIfNeeded(container, containerInfo); err != nil {
if err = dm.applyOOMScoreAdjIfNeeded(pod, container, containerInfo); err != nil {
return kubecontainer.ContainerID{}, err
}
@ -1521,7 +1521,7 @@ func (dm *DockerManager) runContainerInPod(pod *api.Pod, container *api.Containe
return id, err
}
func (dm *DockerManager) applyOOMScoreAdjIfNeeded(container *api.Container, containerInfo *dockertypes.ContainerJSON) error {
func (dm *DockerManager) applyOOMScoreAdjIfNeeded(pod *api.Pod, container *api.Container, containerInfo *dockertypes.ContainerJSON) error {
// Compare current API version with expected api version.
result, err := dm.checkDockerAPIVersion(dockerv110APIVersion)
if err != nil {
@ -1529,7 +1529,7 @@ func (dm *DockerManager) applyOOMScoreAdjIfNeeded(container *api.Container, cont
}
// If current api version is older than OOMScoreAdj requested, use the old way.
if result < 0 {
if err := dm.applyOOMScoreAdj(container, containerInfo); err != nil {
if err := dm.applyOOMScoreAdj(pod, container, containerInfo); err != nil {
return fmt.Errorf("Failed to apply oom-score-adj to container %q- %v", err, containerInfo.Name)
}
}
@ -1537,7 +1537,7 @@ func (dm *DockerManager) applyOOMScoreAdjIfNeeded(container *api.Container, cont
return nil
}
func (dm *DockerManager) calculateOomScoreAdj(container *api.Container) int {
func (dm *DockerManager) calculateOomScoreAdj(pod *api.Pod, container *api.Container) int {
// Set OOM score of the container based on the priority of the container.
// Processes in lower-priority pods should be killed first if the system runs out of memory.
// The main pod infrastructure container is considered high priority, since if it is killed the
@ -1546,7 +1546,7 @@ func (dm *DockerManager) calculateOomScoreAdj(container *api.Container) int {
if container.Name == PodInfraContainerName {
oomScoreAdj = qos.PodInfraOOMAdj
} else {
oomScoreAdj = qos.GetContainerOOMScoreAdjust(container, int64(dm.machineInfo.MemoryCapacity))
oomScoreAdj = qos.GetContainerOOMScoreAdjust(pod, container, int64(dm.machineInfo.MemoryCapacity))
}

View File

@ -18,53 +18,32 @@ package qos
import (
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/kubelet/qos/util"
)
const (
PodInfraOOMAdj int = -999
KubeletOOMScoreAdj int = -999
KubeProxyOOMScoreAdj int = -999
guaranteedOOMScoreAdj int = -998
besteffortOOMScoreAdj int = 1000
)
// isBestEffort returns true if the container's resource requirements are best-effort.
func isBestEffort(container *api.Container) bool {
// A container is best-effort if any of its resource requests is unspecified or 0.
if container.Resources.Requests.Memory().Value() == 0 ||
container.Resources.Requests.Cpu().Value() == 0 {
return true
}
return false
}
// isGuaranteed returns true if the container's resource requirements are Guaranteed.
func isGuaranteed(container *api.Container) bool {
// A container is guaranteed if all its request == limit.
memoryRequest := container.Resources.Requests.Memory().Value()
memoryLimit := container.Resources.Limits.Memory().Value()
cpuRequest := container.Resources.Requests.Cpu().Value()
cpuLimit := container.Resources.Limits.Cpu().Value()
if memoryRequest != 0 &&
cpuRequest != 0 &&
cpuRequest == cpuLimit &&
memoryRequest == memoryLimit {
return true
}
return false
}
// GetContainerOOMAdjust returns the amount by which the OOM score of all processes in the
// container should be adjusted. The OOM score of a process is the percentage of memory it consumes
// container should be adjusted.
// The OOM score of a process is the percentage of memory it consumes
// multiplied by 10 (barring exceptional cases) + a configurable quantity which is between -1000
// and 1000. Containers with higher OOM scores are killed if the system runs out of memory.
// See https://lwn.net/Articles/391222/ for more information.
func GetContainerOOMScoreAdjust(container *api.Container, memoryCapacity int64) int {
if isGuaranteed(container) {
func GetContainerOOMScoreAdjust(pod *api.Pod, container *api.Container, memoryCapacity int64) int {
switch util.GetPodQos(pod) {
case util.Guaranteed:
// Guaranteed containers should be the last to get killed.
return -999
} else if isBestEffort(container) {
// Best-effort containers should be the first to be killed.
return 1000
} else {
return guaranteedOOMScoreAdj
case util.BestEffort:
return besteffortOOMScoreAdj
}
// Burstable containers are a middle tier, between Guaranteed and Best-Effort. Ideally,
// we want to protect Burstable containers that consume less memory than requested.
// The formula below is a heuristic. A container requesting for 10% of a system's
@ -75,11 +54,14 @@ func GetContainerOOMScoreAdjust(container *api.Container, memoryCapacity int64)
// Note that this is a heuristic, it won't work if a container has many small processes.
memoryRequest := container.Resources.Requests.Memory().Value()
oomScoreAdjust := 1000 - (1000*memoryRequest)/memoryCapacity
// A guaranteed container using 100% of memory can have an OOM score of 1. Ensure
// that burstable containers have a higher OOM score.
// A guaranteed pod using 100% of memory can have an OOM score of 1. Ensure
// that burstable pods have a higher OOM score adjustment.
if oomScoreAdjust < 2 {
return 2
}
// Give burstable pods a higher chance of survival over besteffort pods.
if int(oomScoreAdjust) == besteffortOOMScoreAdj {
return int(oomScoreAdjust - 1)
}
return int(oomScoreAdjust)
}
}

View File

@ -29,15 +29,24 @@ const (
)
var (
zeroRequestBestEffort = api.Container{
cpuLimit = api.Pod{
Spec: api.PodSpec{
Containers: []api.Container{
{
Resources: api.ResourceRequirements{
Limits: api.ResourceList{
api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
},
},
},
},
},
}
edgeBestEffort = api.Container{
memoryLimitCPURequest = api.Pod{
Spec: api.PodSpec{
Containers: []api.Container{
{
Resources: api.ResourceRequirements{
Requests: api.ResourceList{
api.ResourceName(api.ResourceCPU): resource.MustParse("0"),
@ -46,19 +55,39 @@ var (
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
},
},
},
},
},
}
noRequestBestEffort = api.Container{
zeroMemoryLimit = api.Pod{
Spec: api.PodSpec{
Containers: []api.Container{
{
Resources: api.ResourceRequirements{
Limits: api.ResourceList{
api.ResourceName(api.ResourceMemory): resource.MustParse("0"),
},
},
},
},
},
}
noLimitBestEffort = api.Container{}
noRequestLimit = api.Pod{
Spec: api.PodSpec{
Containers: []api.Container{
{
Resources: api.ResourceRequirements{},
},
},
},
}
guaranteed = api.Container{
equalRequestLimitCPUMemory = api.Pod{
Spec: api.PodSpec{
Containers: []api.Container{
{
Resources: api.ResourceRequirements{
Requests: api.ResourceList{
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
@ -69,9 +98,15 @@ var (
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
},
},
},
},
},
}
burstable = api.Container{
cpuUnlimitedMemoryLimitedWithRequests = api.Pod{
Spec: api.PodSpec{
Containers: []api.Container{
{
Resources: api.ResourceRequirements{
Requests: api.ResourceList{
api.ResourceName(api.ResourceMemory): resource.MustParse(strconv.Itoa(standardMemoryAmount / 2)),
@ -81,50 +116,29 @@ var (
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
},
},
},
},
},
}
burstableNoLimit = api.Container{
requestNoLimit = api.Pod{
Spec: api.PodSpec{
Containers: []api.Container{
{
Resources: api.ResourceRequirements{
Requests: api.ResourceList{
api.ResourceName(api.ResourceMemory): resource.MustParse(strconv.Itoa(standardMemoryAmount - 1)),
api.ResourceName(api.ResourceCPU): resource.MustParse("5m"),
},
},
},
},
},
}
)
func TestIsBestEffort(t *testing.T) {
validCases := []api.Container{zeroRequestBestEffort, noRequestBestEffort, noLimitBestEffort, edgeBestEffort}
for _, container := range validCases {
if !isBestEffort(&container) {
t.Errorf("container %+v is best-effort", container)
}
}
invalidCases := []api.Container{guaranteed, burstable}
for _, container := range invalidCases {
if isBestEffort(&container) {
t.Errorf("container %+v is not best-effort", container)
}
}
}
func TestIsGuaranteed(t *testing.T) {
validCases := []api.Container{guaranteed}
for _, container := range validCases {
if !isGuaranteed(&container) {
t.Errorf("container %+v is guaranteed", container)
}
}
invalidCases := []api.Container{zeroRequestBestEffort, noRequestBestEffort, noLimitBestEffort, edgeBestEffort, burstable}
for _, container := range invalidCases {
if isGuaranteed(&container) {
t.Errorf("container %+v is not guaranteed", container)
}
}
}
type oomTest struct {
container *api.Container
pod *api.Pod
memoryCapacity int64
lowOOMScoreAdj int // The max oom_score_adj score the container should be assigned.
highOOMScoreAdj int // The min oom_score_adj score the container should be assigned.
@ -133,50 +147,50 @@ type oomTest struct {
func TestGetContainerOOMScoreAdjust(t *testing.T) {
oomTests := []oomTest{
{
container: &zeroRequestBestEffort,
pod: &cpuLimit,
memoryCapacity: 4000000000,
lowOOMScoreAdj: 1000,
highOOMScoreAdj: 1000,
lowOOMScoreAdj: 999,
highOOMScoreAdj: 999,
},
{
container: &edgeBestEffort,
pod: &memoryLimitCPURequest,
memoryCapacity: 8000000000,
lowOOMScoreAdj: 1000,
highOOMScoreAdj: 1000,
lowOOMScoreAdj: 999,
highOOMScoreAdj: 999,
},
{
container: &noRequestBestEffort,
pod: &zeroMemoryLimit,
memoryCapacity: 7230457451,
lowOOMScoreAdj: 1000,
highOOMScoreAdj: 1000,
},
{
container: &noLimitBestEffort,
pod: &noRequestLimit,
memoryCapacity: 4000000000,
lowOOMScoreAdj: 1000,
highOOMScoreAdj: 1000,
},
{
container: &guaranteed,
pod: &equalRequestLimitCPUMemory,
memoryCapacity: 123456789,
lowOOMScoreAdj: -999,
highOOMScoreAdj: -999,
lowOOMScoreAdj: -998,
highOOMScoreAdj: -998,
},
{
container: &burstable,
pod: &cpuUnlimitedMemoryLimitedWithRequests,
memoryCapacity: standardMemoryAmount,
lowOOMScoreAdj: 495,
highOOMScoreAdj: 505,
},
{
container: &burstableNoLimit,
pod: &requestNoLimit,
memoryCapacity: standardMemoryAmount,
lowOOMScoreAdj: 2,
highOOMScoreAdj: 2,
},
}
for _, test := range oomTests {
oomScoreAdj := GetContainerOOMScoreAdjust(test.container, test.memoryCapacity)
oomScoreAdj := GetContainerOOMScoreAdjust(test.pod, &test.pod.Spec.Containers[0], test.memoryCapacity)
if oomScoreAdj < test.lowOOMScoreAdj || oomScoreAdj > test.highOOMScoreAdj {
t.Errorf("oom_score_adj should be between %d and %d, but was %d", test.lowOOMScoreAdj, test.highOOMScoreAdj, oomScoreAdj)
}

View File

@ -18,7 +18,7 @@ package util
import (
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/util/sets"
"k8s.io/kubernetes/pkg/api/resource"
)
const (
@ -48,23 +48,62 @@ func isResourceBestEffort(container *api.Container, resource api.ResourceName) b
}
// GetPodQos returns the QoS class of a pod.
// The QoS class of a pod is the lowest QoS class for each resource in each container.
// A pod is besteffort if none of its containers have specified any requests or limits.
// A pod is guaranteed only when requests and limits are specified for all the containers and they are equal.
// A pod is burstable if limits and requests do not match across all containers.
func GetPodQos(pod *api.Pod) string {
qosValues := sets.NewString()
requests := api.ResourceList{}
limits := api.ResourceList{}
zeroQuantity := resource.MustParse("0")
isGuaranteed := true
for _, container := range pod.Spec.Containers {
qosPerResource := GetQoS(&container)
for _, qosValue := range qosPerResource {
qosValues.Insert(qosValue)
// process requests
for name, quantity := range container.Resources.Requests {
if quantity.Cmp(zeroQuantity) == 1 {
delta := quantity.Copy()
if _, exists := requests[name]; !exists {
requests[name] = *delta
} else {
delta.Add(requests[name])
requests[name] = *delta
}
}
if qosValues.Has(BestEffort) {
}
// process limits
for name, quantity := range container.Resources.Limits {
if quantity.Cmp(zeroQuantity) == 1 {
delta := quantity.Copy()
if _, exists := limits[name]; !exists {
limits[name] = *delta
} else {
delta.Add(limits[name])
limits[name] = *delta
}
}
}
if len(container.Resources.Limits) != len(supportedComputeResources) {
isGuaranteed = false
}
}
if len(requests) == 0 && len(limits) == 0 {
return BestEffort
}
if qosValues.Has(Burstable) {
return Burstable
// Check is requests match limits for all resources.
if isGuaranteed {
for name, req := range requests {
if lim, exists := limits[name]; !exists || lim.Cmp(req) != 0 {
isGuaranteed = false
break
}
}
}
if isGuaranteed &&
len(requests) == len(limits) &&
len(limits) == len(supportedComputeResources) {
return Guaranteed
}
return Burstable
}
// GetQos returns a mapping of resource name to QoS class of a container
func GetQoS(container *api.Container) map[api.ResourceName]string {

View File

@ -64,24 +64,58 @@ func TestGetPodQos(t *testing.T) {
pod *api.Pod
expected string
}{
{
pod: newPod("guaranteed", []api.Container{
newContainer("guaranteed", getResourceList("100m", "100Mi"), getResourceList("100m", "100Mi")),
}),
expected: Guaranteed,
},
{
pod: newPod("guaranteed-guaranteed", []api.Container{
newContainer("guaranteed", getResourceList("100m", "100Mi"), getResourceList("100m", "100Mi")),
newContainer("guaranteed", getResourceList("100m", "100Mi"), getResourceList("100m", "100Mi")),
}),
expected: Guaranteed,
},
{
pod: newPod("best-effort-best-effort", []api.Container{
newContainer("best-effort", getResourceList("", ""), getResourceList("", "")),
newContainer("best-effort", getResourceList("", ""), getResourceList("", "")),
}),
expected: BestEffort,
},
{
pod: newPod("best-effort", []api.Container{
newContainer("best-effort", getResourceList("", ""), getResourceList("", "")),
}),
expected: BestEffort,
},
{
pod: newPod("best-effort-burstable", []api.Container{
newContainer("best-effort", getResourceList("", ""), getResourceList("", "")),
newContainer("burstable", getResourceList("1", ""), getResourceList("2", "")),
}),
expected: Burstable,
},
{
pod: newPod("best-effort-guaranteed", []api.Container{
newContainer("best-effort", getResourceList("", ""), getResourceList("", "")),
newContainer("guaranteed", getResourceList("10m", "100Mi"), getResourceList("10m", "100Mi")),
}),
expected: BestEffort,
expected: Burstable,
},
{
pod: newPod("best-effort-cpu-guaranteed-memory", []api.Container{
newContainer("best-effort", getResourceList("", "100Mi"), getResourceList("", "100Mi")),
pod: newPod("burstable-cpu-guaranteed-memory", []api.Container{
newContainer("burstable", getResourceList("", "100Mi"), getResourceList("", "100Mi")),
}),
expected: BestEffort,
expected: Burstable,
},
{
pod: newPod("burstable-guaranteed", []api.Container{
newContainer("burstable", getResourceList("1", "100Mi"), getResourceList("2", "100Mi")),
newContainer("guaranteed", getResourceList("100m", "100Mi"), getResourceList("100m", "100Mi")),
}),
expected: Burstable,
},
{
pod: newPod("burstable", []api.Container{
@ -89,12 +123,6 @@ func TestGetPodQos(t *testing.T) {
}),
expected: Burstable,
},
{
pod: newPod("guaranteed", []api.Container{
newContainer("guaranteed", getResourceList("100m", "100Mi"), getResourceList("100m", "100Mi")),
}),
expected: Guaranteed,
},
}
for _, testCase := range testCases {
if actual := GetPodQos(testCase.pod); testCase.expected != actual {