mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-13 22:05:59 +00:00
Merge pull request #21741 from resouer/oomscore
Automatic merge from submit-queue Use OomScoreAdj in kubelet for newer docker api fixes: #20121 Related: client side PR [pull 454](https://github.com/fsouza/go-dockerclient/pull/454) Godeps has already been updated to `0099401a7342ad77e71ca9f9a57c5e72fb80f6b2`, which included client side's modification. But it seems too aggressive to upgrade the docker api version of kubelet.
This commit is contained in:
commit
df21974730
@ -55,8 +55,10 @@ type Runtime interface {
|
|||||||
|
|
||||||
// Version returns the version information of the container runtime.
|
// Version returns the version information of the container runtime.
|
||||||
Version() (Version, error)
|
Version() (Version, error)
|
||||||
// APIVersion returns the API version information of the container
|
|
||||||
// runtime. This may be different from the runtime engine's version.
|
// APIVersion returns the cached API version information of the container
|
||||||
|
// runtime. Implementation is expected to update this cache periodically.
|
||||||
|
// This may be different from the runtime engine's version.
|
||||||
// TODO(random-liu): We should fold this into Version()
|
// TODO(random-liu): We should fold this into Version()
|
||||||
APIVersion() (Version, error)
|
APIVersion() (Version, error)
|
||||||
// Status returns error if the runtime is unhealthy; nil otherwise.
|
// Status returns error if the runtime is unhealthy; nil otherwise.
|
||||||
|
@ -402,6 +402,8 @@ func (f *FakeDockerClient) PullImage(opts docker.PullImageOptions, auth docker.A
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (f *FakeDockerClient) Version() (*docker.Env, error) {
|
func (f *FakeDockerClient) Version() (*docker.Env, error) {
|
||||||
|
f.Lock()
|
||||||
|
defer f.Unlock()
|
||||||
return &f.VersionInfo, f.popError("version")
|
return &f.VersionInfo, f.popError("version")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -24,6 +24,7 @@ import (
|
|||||||
"k8s.io/kubernetes/pkg/kubelet/network"
|
"k8s.io/kubernetes/pkg/kubelet/network"
|
||||||
proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results"
|
proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results"
|
||||||
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
|
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/util/cache"
|
||||||
"k8s.io/kubernetes/pkg/types"
|
"k8s.io/kubernetes/pkg/types"
|
||||||
"k8s.io/kubernetes/pkg/util/flowcontrol"
|
"k8s.io/kubernetes/pkg/util/flowcontrol"
|
||||||
"k8s.io/kubernetes/pkg/util/oom"
|
"k8s.io/kubernetes/pkg/util/oom"
|
||||||
@ -52,6 +53,9 @@ func NewFakeDockerManager(
|
|||||||
burst, containerLogsDir, osInterface, networkPlugin, runtimeHelper, httpClient, &NativeExecHandler{},
|
burst, containerLogsDir, osInterface, networkPlugin, runtimeHelper, httpClient, &NativeExecHandler{},
|
||||||
fakeOOMAdjuster, fakeProcFs, false, imageBackOff, false, false, true)
|
fakeOOMAdjuster, fakeProcFs, false, imageBackOff, false, false, true)
|
||||||
dm.dockerPuller = &FakeDockerPuller{}
|
dm.dockerPuller = &FakeDockerPuller{}
|
||||||
|
dm.versionCache = cache.NewVersionCache(func() (kubecontainer.Version, kubecontainer.Version, error) {
|
||||||
|
return dm.getVersionInfo()
|
||||||
|
})
|
||||||
return dm
|
return dm
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -46,6 +46,7 @@ import (
|
|||||||
proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results"
|
proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/qos"
|
"k8s.io/kubernetes/pkg/kubelet/qos"
|
||||||
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
|
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/util/cache"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/util/format"
|
"k8s.io/kubernetes/pkg/kubelet/util/format"
|
||||||
"k8s.io/kubernetes/pkg/runtime"
|
"k8s.io/kubernetes/pkg/runtime"
|
||||||
"k8s.io/kubernetes/pkg/securitycontext"
|
"k8s.io/kubernetes/pkg/securitycontext"
|
||||||
@ -62,6 +63,8 @@ const (
|
|||||||
|
|
||||||
minimumDockerAPIVersion = "1.20"
|
minimumDockerAPIVersion = "1.20"
|
||||||
|
|
||||||
|
dockerv110APIVersion = "1.21"
|
||||||
|
|
||||||
// ndots specifies the minimum number of dots that a domain name must contain for the resolver to consider it as FQDN (fully-qualified)
|
// ndots specifies the minimum number of dots that a domain name must contain for the resolver to consider it as FQDN (fully-qualified)
|
||||||
// we want to able to consider SRV lookup names like _dns._udp.kube-dns.default.svc to be considered relative.
|
// we want to able to consider SRV lookup names like _dns._udp.kube-dns.default.svc to be considered relative.
|
||||||
// hence, setting ndots to be 5.
|
// hence, setting ndots to be 5.
|
||||||
@ -156,6 +159,9 @@ type DockerManager struct {
|
|||||||
// A false value means the kubelet just backs off from setting it,
|
// A false value means the kubelet just backs off from setting it,
|
||||||
// it might already be true.
|
// it might already be true.
|
||||||
configureHairpinMode bool
|
configureHairpinMode bool
|
||||||
|
|
||||||
|
// The api version cache of docker daemon.
|
||||||
|
versionCache *cache.VersionCache
|
||||||
}
|
}
|
||||||
|
|
||||||
// A subset of the pod.Manager interface extracted for testing purposes.
|
// A subset of the pod.Manager interface extracted for testing purposes.
|
||||||
@ -247,6 +253,15 @@ func NewDockerManager(
|
|||||||
optf(dm)
|
optf(dm)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// initialize versionCache with a updater
|
||||||
|
dm.versionCache = cache.NewVersionCache(func() (kubecontainer.Version, kubecontainer.Version, error) {
|
||||||
|
return dm.getVersionInfo()
|
||||||
|
})
|
||||||
|
// update version cache periodically.
|
||||||
|
if dm.machineInfo != nil {
|
||||||
|
dm.versionCache.UpdateCachePeriodly(dm.machineInfo.MachineID)
|
||||||
|
}
|
||||||
|
|
||||||
return dm
|
return dm
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -503,7 +518,8 @@ func (dm *DockerManager) runContainer(
|
|||||||
ipcMode string,
|
ipcMode string,
|
||||||
utsMode string,
|
utsMode string,
|
||||||
pidMode string,
|
pidMode string,
|
||||||
restartCount int) (kubecontainer.ContainerID, error) {
|
restartCount int,
|
||||||
|
oomScoreAdj int) (kubecontainer.ContainerID, error) {
|
||||||
|
|
||||||
dockerName := KubeletContainerName{
|
dockerName := KubeletContainerName{
|
||||||
PodFullName: kubecontainer.GetPodFullName(pod),
|
PodFullName: kubecontainer.GetPodFullName(pod),
|
||||||
@ -584,6 +600,14 @@ func (dm *DockerManager) runContainer(
|
|||||||
SecurityOpt: securityOpts,
|
SecurityOpt: securityOpts,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If current api version is newer than docker 1.10 requested, set OomScoreAdj to HostConfig
|
||||||
|
result, err := dm.checkDockerAPIVersion(dockerv110APIVersion)
|
||||||
|
if err != nil {
|
||||||
|
glog.Errorf("Failed to check docker api version: %v", err)
|
||||||
|
} else if result >= 0 {
|
||||||
|
hc.OomScoreAdj = oomScoreAdj
|
||||||
|
}
|
||||||
|
|
||||||
if dm.cpuCFSQuota {
|
if dm.cpuCFSQuota {
|
||||||
// if cpuLimit.Amount is nil, then the appropriate default value is returned to allow full usage of cpu resource.
|
// if cpuLimit.Amount is nil, then the appropriate default value is returned to allow full usage of cpu resource.
|
||||||
cpuQuota, cpuPeriod := milliCPUToQuota(cpuLimit.MilliValue())
|
cpuQuota, cpuPeriod := milliCPUToQuota(cpuLimit.MilliValue())
|
||||||
@ -1429,17 +1453,7 @@ func (dm *DockerManager) applyOOMScoreAdj(container *api.Container, containerInf
|
|||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
// Set OOM score of the container based on the priority of the container.
|
oomScoreAdj := dm.calculateOomScoreAdj(container)
|
||||||
// Processes in lower-priority pods should be killed first if the system runs out of memory.
|
|
||||||
// The main pod infrastructure container is considered high priority, since if it is killed the
|
|
||||||
// whole pod will die.
|
|
||||||
// TODO: Cache this value.
|
|
||||||
var oomScoreAdj int
|
|
||||||
if containerInfo.Name == PodInfraContainerName {
|
|
||||||
oomScoreAdj = qos.PodInfraOOMAdj
|
|
||||||
} else {
|
|
||||||
oomScoreAdj = qos.GetContainerOOMScoreAdjust(container, int64(dm.machineInfo.MemoryCapacity))
|
|
||||||
}
|
|
||||||
if err = dm.oomAdjuster.ApplyOOMScoreAdjContainer(cgroupName, oomScoreAdj, 5); err != nil {
|
if err = dm.oomAdjuster.ApplyOOMScoreAdjContainer(cgroupName, oomScoreAdj, 5); err != nil {
|
||||||
if err == os.ErrNotExist {
|
if err == os.ErrNotExist {
|
||||||
// Container exited. We cannot do anything about it. Ignore this error.
|
// Container exited. We cannot do anything about it. Ignore this error.
|
||||||
@ -1473,7 +1487,10 @@ func (dm *DockerManager) runContainerInPod(pod *api.Pod, container *api.Containe
|
|||||||
if usesHostNetwork(pod) {
|
if usesHostNetwork(pod) {
|
||||||
utsMode = namespaceModeHost
|
utsMode = namespaceModeHost
|
||||||
}
|
}
|
||||||
id, err := dm.runContainer(pod, container, opts, ref, netMode, ipcMode, utsMode, pidMode, restartCount)
|
|
||||||
|
oomScoreAdj := dm.calculateOomScoreAdj(container)
|
||||||
|
|
||||||
|
id, err := dm.runContainer(pod, container, opts, ref, netMode, ipcMode, utsMode, pidMode, restartCount, oomScoreAdj)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return kubecontainer.ContainerID{}, fmt.Errorf("runContainer: %v", err)
|
return kubecontainer.ContainerID{}, fmt.Errorf("runContainer: %v", err)
|
||||||
}
|
}
|
||||||
@ -1512,9 +1529,12 @@ func (dm *DockerManager) runContainerInPod(pod *api.Pod, container *api.Containe
|
|||||||
return kubecontainer.ContainerID{}, fmt.Errorf("can't get init PID for container %q", id)
|
return kubecontainer.ContainerID{}, fmt.Errorf("can't get init PID for container %q", id)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := dm.applyOOMScoreAdj(container, containerInfo); err != nil {
|
// Check if current docker version is higher than 1.10. Otherwise, we have to apply OOMScoreAdj instead of using docker API.
|
||||||
return kubecontainer.ContainerID{}, fmt.Errorf("failed to apply oom-score-adj to container %q- %v", err, containerInfo.Name)
|
err = dm.applyOOMScoreAdjIfNeeded(container, containerInfo)
|
||||||
|
if err != nil {
|
||||||
|
return kubecontainer.ContainerID{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// The addNDotsOption call appends the ndots option to the resolv.conf file generated by docker.
|
// The addNDotsOption call appends the ndots option to the resolv.conf file generated by docker.
|
||||||
// This resolv.conf file is shared by all containers of the same pod, and needs to be modified only once per pod.
|
// This resolv.conf file is shared by all containers of the same pod, and needs to be modified only once per pod.
|
||||||
// we modify it when the pause container is created since it is the first container created in the pod since it holds
|
// we modify it when the pause container is created since it is the first container created in the pod since it holds
|
||||||
@ -1529,6 +1549,69 @@ func (dm *DockerManager) runContainerInPod(pod *api.Pod, container *api.Containe
|
|||||||
return id, err
|
return id, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (dm *DockerManager) applyOOMScoreAdjIfNeeded(container *api.Container, containerInfo *docker.Container) error {
|
||||||
|
// Compare current API version with expected api version.
|
||||||
|
result, err := dm.checkDockerAPIVersion(dockerv110APIVersion)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("Failed to check docker api version: %v", err)
|
||||||
|
}
|
||||||
|
// If current api version is older than OOMScoreAdj requested, use the old way.
|
||||||
|
if result < 0 {
|
||||||
|
if err := dm.applyOOMScoreAdj(container, containerInfo); err != nil {
|
||||||
|
return fmt.Errorf("Failed to apply oom-score-adj to container %q- %v", err, containerInfo.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (dm *DockerManager) calculateOomScoreAdj(container *api.Container) int {
|
||||||
|
// Set OOM score of the container based on the priority of the container.
|
||||||
|
// Processes in lower-priority pods should be killed first if the system runs out of memory.
|
||||||
|
// The main pod infrastructure container is considered high priority, since if it is killed the
|
||||||
|
// whole pod will die.
|
||||||
|
var oomScoreAdj int
|
||||||
|
if container.Name == PodInfraContainerName {
|
||||||
|
oomScoreAdj = qos.PodInfraOOMAdj
|
||||||
|
} else {
|
||||||
|
oomScoreAdj = qos.GetContainerOOMScoreAdjust(container, int64(dm.machineInfo.MemoryCapacity))
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return oomScoreAdj
|
||||||
|
}
|
||||||
|
|
||||||
|
// getCachedVersionInfo gets cached version info of docker runtime.
|
||||||
|
func (dm *DockerManager) getCachedVersionInfo() (kubecontainer.Version, kubecontainer.Version, error) {
|
||||||
|
apiVersion, daemonVersion, err := dm.versionCache.Get(dm.machineInfo.MachineID)
|
||||||
|
if err != nil {
|
||||||
|
glog.Errorf("Failed to get cached docker api version %v ", err)
|
||||||
|
}
|
||||||
|
// If we got nil versions, try to update version info.
|
||||||
|
if apiVersion == nil || daemonVersion == nil {
|
||||||
|
dm.versionCache.Update(dm.machineInfo.MachineID)
|
||||||
|
}
|
||||||
|
return apiVersion, daemonVersion, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// checkDockerAPIVersion checks current docker API version against expected version.
|
||||||
|
// Return:
|
||||||
|
// 1 : newer than expected version
|
||||||
|
// -1: older than expected version
|
||||||
|
// 0 : same version
|
||||||
|
func (dm *DockerManager) checkDockerAPIVersion(expectedVersion string) (int, error) {
|
||||||
|
apiVersion, _, err := dm.getCachedVersionInfo()
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
result, err := apiVersion.Compare(expectedVersion)
|
||||||
|
if err != nil {
|
||||||
|
return 0, fmt.Errorf("Failed to compare current docker api version %v with OOMScoreAdj supported Docker version %q - %v",
|
||||||
|
apiVersion, expectedVersion, err)
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
func addNDotsOption(resolvFilePath string) error {
|
func addNDotsOption(resolvFilePath string) error {
|
||||||
if len(resolvFilePath) == 0 {
|
if len(resolvFilePath) == 0 {
|
||||||
glog.Errorf("ResolvConfPath is empty.")
|
glog.Errorf("ResolvConfPath is empty.")
|
||||||
@ -2112,3 +2195,17 @@ func (dm *DockerManager) GetPodStatus(uid types.UID, name, namespace string) (*k
|
|||||||
podStatus.ContainerStatuses = containerStatuses
|
podStatus.ContainerStatuses = containerStatuses
|
||||||
return podStatus, nil
|
return podStatus, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// getVersionInfo returns apiVersion & daemonVersion of docker runtime
|
||||||
|
func (dm *DockerManager) getVersionInfo() (kubecontainer.Version, kubecontainer.Version, error) {
|
||||||
|
apiVersion, err := dm.APIVersion()
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
daemonVersion, err := dm.Version()
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return apiVersion, daemonVersion, nil
|
||||||
|
}
|
||||||
|
@ -1933,7 +1933,7 @@ func TestGetPodStatusNoSuchContainer(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}})
|
}})
|
||||||
|
|
||||||
fakeDocker.Errors = map[string]error{"inspect": &docker.NoSuchContainer{}}
|
fakeDocker.InjectErrors(map[string]error{"inspect": &docker.NoSuchContainer{}})
|
||||||
runSyncPod(t, dm, fakeDocker, pod, nil, false)
|
runSyncPod(t, dm, fakeDocker, pod, nil, false)
|
||||||
|
|
||||||
// Verify that we will try to start new contrainers even if the inspections
|
// Verify that we will try to start new contrainers even if the inspections
|
||||||
|
80
pkg/kubelet/util/cache/version_cache.go
vendored
Normal file
80
pkg/kubelet/util/cache/version_cache.go
vendored
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2016 The Kubernetes Authors All rights reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package cache
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/golang/glog"
|
||||||
|
|
||||||
|
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||||
|
"k8s.io/kubernetes/pkg/util/wait"
|
||||||
|
)
|
||||||
|
|
||||||
|
type VersionCache struct {
|
||||||
|
lock sync.RWMutex
|
||||||
|
cache map[string]versionInfo
|
||||||
|
updater func() (kubecontainer.Version, kubecontainer.Version, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
// versionInfo caches api version and daemon version.
|
||||||
|
type versionInfo struct {
|
||||||
|
apiVersion kubecontainer.Version
|
||||||
|
version kubecontainer.Version
|
||||||
|
}
|
||||||
|
|
||||||
|
const maxVersionCacheEntries = 1000
|
||||||
|
|
||||||
|
func NewVersionCache(f func() (kubecontainer.Version, kubecontainer.Version, error)) *VersionCache {
|
||||||
|
return &VersionCache{
|
||||||
|
cache: map[string]versionInfo{},
|
||||||
|
updater: f,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update updates cached versionInfo by using a unique string (e.g. machineInfo) as the key.
|
||||||
|
func (c *VersionCache) Update(key string) {
|
||||||
|
apiVersion, daemonVersion, err := c.updater()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
glog.Errorf("Fail to get version info from container runtime: %v", err)
|
||||||
|
} else {
|
||||||
|
c.lock.Lock()
|
||||||
|
defer c.lock.Unlock()
|
||||||
|
c.cache[key] = versionInfo{apiVersion, daemonVersion}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get gets cached versionInfo by using a unique string (e.g. machineInfo) as the key.
|
||||||
|
// It returns apiVersion first and followed by daemon version.
|
||||||
|
func (c *VersionCache) Get(key string) (kubecontainer.Version, kubecontainer.Version, error) {
|
||||||
|
c.lock.RLock()
|
||||||
|
defer c.lock.RUnlock()
|
||||||
|
value, ok := c.cache[key]
|
||||||
|
if !ok {
|
||||||
|
return nil, nil, fmt.Errorf("Failed to get version info from cache by key: ", key)
|
||||||
|
}
|
||||||
|
return value.apiVersion, value.version, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *VersionCache) UpdateCachePeriodly(key string) {
|
||||||
|
go wait.Until(func() {
|
||||||
|
c.Update(key)
|
||||||
|
}, 1*time.Minute, wait.NeverStop)
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user