Merge pull request #41234 from vishh/nodeaphase2

Automatic merge from submit-queue

Enforce Node Allocatable via cgroups

This PR enforces node allocatable across all pods using a top level cgroup as described in https://github.com/kubernetes/community/pull/348

This PR also provides an option to enforce `kubeReserved` and `systemReserved` on user specified cgroups. 

This PR will by default make kubelet create top level cgroups even if `kubeReserved` and `systemReserved` is not specified and hence `Allocatable = Capacity`.

```release-note
New Kubelet flag `--enforce-node-allocatable` with a default value of `pods` is added which will make kubelet create a top level cgroup for all pods to enforce Node Allocatable. Optionally, `system-reserved` & `kube-reserved` values can also be specified separated by comma to enforce node allocatable on cgroups specified via `--system-reserved-cgroup` & `--kube-reserved-cgroup` respectively. Note the default value of the latter flags are "".
This feature requires a **Node Drain** prior to upgrade failing which pods will be restarted if possible or terminated if they have a `RestartNever` policy.
```

cc @kubernetes/sig-node-pr-reviews @kubernetes/sig-node-feature-requests 

TODO:

- [x] Adjust effective Node Allocatable to subtract hard eviction thresholds
- [x] Add unit tests
- [x] Complete pending e2e tests
- [x] Manual testing
- [x] Get the proposal merged

@dashpole is working on adding support for evictions for enforcing Node allocatable more gracefully. That work will show up in a subsequent PR for v1.6
This commit is contained in:
Kubernetes Submit Queue
2017-02-27 23:55:46 -08:00
committed by GitHub
48 changed files with 1824 additions and 681 deletions

View File

@@ -18,7 +18,6 @@ go_test(
tags = ["automanaged"],
deps = [
"//pkg/apis/componentconfig:go_default_library",
"//pkg/kubelet:go_default_library",
"//vendor:k8s.io/apimachinery/pkg/util/diff",
"//vendor:k8s.io/client-go/rest",
],
@@ -56,6 +55,8 @@ go_library(
"//pkg/kubelet/config:go_default_library",
"//pkg/kubelet/container:go_default_library",
"//pkg/kubelet/dockertools:go_default_library",
"//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/eviction/api:go_default_library",
"//pkg/kubelet/network:go_default_library",
"//pkg/kubelet/network/cni:go_default_library",
"//pkg/kubelet/network/kubenet:go_default_library",
@@ -98,10 +99,12 @@ go_library(
"//vendor:github.com/spf13/cobra",
"//vendor:github.com/spf13/pflag",
"//vendor:golang.org/x/exp/inotify",
"//vendor:k8s.io/apimachinery/pkg/api/resource",
"//vendor:k8s.io/apimachinery/pkg/apis/meta/v1",
"//vendor:k8s.io/apimachinery/pkg/runtime",
"//vendor:k8s.io/apimachinery/pkg/types",
"//vendor:k8s.io/apimachinery/pkg/util/runtime",
"//vendor:k8s.io/apimachinery/pkg/util/sets",
"//vendor:k8s.io/apimachinery/pkg/util/wait",
"//vendor:k8s.io/apiserver/pkg/authentication/authenticator",
"//vendor:k8s.io/apiserver/pkg/authentication/authenticatorfactory",

View File

@@ -225,8 +225,6 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
fs.Float64Var(&s.ChaosChance, "chaos-chance", s.ChaosChance, "If > 0.0, introduce random client errors and latency. Intended for testing. [default=0.0]")
fs.BoolVar(&s.Containerized, "containerized", s.Containerized, "Experimental support for running kubelet in a container. Intended for testing. [default=false]")
fs.Int64Var(&s.MaxOpenFiles, "max-open-files", s.MaxOpenFiles, "Number of files that can be opened by Kubelet process. [default=1000000]")
fs.Var(&s.SystemReserved, "system-reserved", "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for non-kubernetes components. Currently only cpu and memory are supported. See http://kubernetes.io/docs/user-guide/compute-resources for more detail. [default=none]")
fs.Var(&s.KubeReserved, "kube-reserved", "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for kubernetes system components. Currently only cpu and memory are supported. See http://kubernetes.io/docs/user-guide/compute-resources for more detail. [default=none]")
fs.BoolVar(&s.RegisterSchedulable, "register-schedulable", s.RegisterSchedulable, "Register the node as schedulable. Won't have any effect if register-node is false. [default=true]")
fs.MarkDeprecated("register-schedulable", "will be removed in a future version")
fs.Var(utiltaints.NewTaintsVar(&s.RegisterWithTaints), "register-with-taints", "Register the node with the given list of taints (comma seperated \"<key>=<value>:<effect>\"). No-op if register-node is false.")
@@ -264,4 +262,12 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
fs.StringVar(&s.RemoteImageEndpoint, "image-service-endpoint", s.RemoteImageEndpoint, "[Experimental] The unix socket endpoint of remote image service. If not specified, it will be the same with container-runtime-endpoint by default. The endpoint is used only when CRI integration is enabled (--enable-cri)")
fs.BoolVar(&s.ExperimentalCheckNodeCapabilitiesBeforeMount, "experimental-check-node-capabilities-before-mount", s.ExperimentalCheckNodeCapabilitiesBeforeMount, "[Experimental] if set true, the kubelet will check the underlying node for required componenets (binaries, etc.) before performing the mount")
// Node Allocatable Flags
fs.Var(&s.SystemReserved, "system-reserved", "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for non-kubernetes components. Currently only cpu and memory are supported. See http://kubernetes.io/docs/user-guide/compute-resources for more detail. [default=none]")
fs.Var(&s.KubeReserved, "kube-reserved", "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for kubernetes system components. Currently only cpu and memory are supported. See http://kubernetes.io/docs/user-guide/compute-resources for more detail. [default=none]")
fs.StringSliceVar(&s.EnforceNodeAllocatable, "enforce-node-allocatable", s.EnforceNodeAllocatable, "A comma separated list of levels of node allocatable enforcement to be enforced by kubelet. Acceptible options are 'pods', 'system-reserved' & 'kube-reserved'. If the latter two options are specified, '--system-reserved-cgroup' & '--kube-reserved-cgroup' must also be set respectively. See https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md for more details. [default='']")
fs.StringVar(&s.SystemReservedCgroup, "system-reserved-cgroup", s.SystemReservedCgroup, "Absolute name of the top level cgroup that is used to manage non-kubernetes components for which compute resources were reserved via '--system-reserved' flag. Ex. '/system-reserved'. [default='']")
fs.StringVar(&s.KubeReservedCgroup, "kube-reserved-cgroup", s.KubeReservedCgroup, "Absolute name of the top level cgroup that is used to manage kubernetes components for which compute resources were reserved via '--kube-reserved' flag. Ex. '/kube-reserved'. [default='']")
fs.BoolVar(&s.ExperimentalNodeAllocatableIgnoreEvictionThreshold, "experimental-allocatable-ignore-eviction", s.ExperimentalNodeAllocatableIgnoreEvictionThreshold, "When set to 'true', Hard Eviction Thresholds will be ignored while calculating Node Allocatable. See https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md for more details. [default=false]")
}

View File

@@ -36,10 +36,12 @@ import (
"github.com/spf13/cobra"
"github.com/spf13/pflag"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apiserver/pkg/server/healthz"
utilfeature "k8s.io/apiserver/pkg/util/feature"
@@ -69,6 +71,8 @@ import (
"k8s.io/kubernetes/pkg/kubelet/config"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/dockertools"
"k8s.io/kubernetes/pkg/kubelet/eviction"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
"k8s.io/kubernetes/pkg/kubelet/server"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/util/configz"
@@ -81,12 +85,17 @@ import (
"k8s.io/kubernetes/pkg/version"
)
const (
// Kubelet component name
componentKubelet = "kubelet"
)
// NewKubeletCommand creates a *cobra.Command object with default parameters
func NewKubeletCommand() *cobra.Command {
s := options.NewKubeletServer()
s.AddFlags(pflag.CommandLine)
cmd := &cobra.Command{
Use: "kubelet",
Use: componentKubelet,
Long: `The kubelet is the primary "node agent" that runs on each
node. The kubelet works in terms of a PodSpec. A PodSpec is a YAML or JSON object
that describes a pod. The kubelet takes a set of PodSpecs that are provided through
@@ -305,6 +314,44 @@ func initConfigz(kc *componentconfig.KubeletConfiguration) (*configz.Config, err
return cz, err
}
// validateConfig validates configuration of Kubelet and returns an error is the input configuration is invalid.
func validateConfig(s *options.KubeletServer) error {
if !s.CgroupsPerQOS && len(s.EnforceNodeAllocatable) > 0 {
return fmt.Errorf("Node Allocatable enforcement is not supported unless Cgroups Per QOS feature is turned on")
}
if s.SystemCgroups != "" && s.CgroupRoot == "" {
return fmt.Errorf("invalid configuration: system container was specified and cgroup root was not specified")
}
for _, val := range s.EnforceNodeAllocatable {
switch val {
case cm.NodeAllocatableEnforcementKey:
case cm.SystemReservedEnforcementKey:
case cm.KubeReservedEnforcementKey:
continue
default:
return fmt.Errorf("invalid option %q specified for EnforceNodeAllocatable setting. Valid options are %q, %q or %q", val, cm.NodeAllocatableEnforcementKey, cm.SystemReservedEnforcementKey, cm.KubeReservedEnforcementKey)
}
}
return nil
}
// makeEventRecorder sets up kubeDeps.Recorder if its nil. Its a no-op otherwise.
func makeEventRecorder(s *componentconfig.KubeletConfiguration, kubeDeps *kubelet.KubeletDeps, nodeName types.NodeName) {
if kubeDeps.Recorder != nil {
return
}
eventBroadcaster := record.NewBroadcaster()
kubeDeps.Recorder = eventBroadcaster.NewRecorder(api.Scheme, clientv1.EventSource{Component: componentKubelet, Host: string(nodeName)})
eventBroadcaster.StartLogging(glog.V(3).Infof)
if kubeDeps.EventClient != nil {
glog.V(4).Infof("Sending events to api server.")
eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: kubeDeps.EventClient.Events("")})
} else {
glog.Warning("No api server defined - no events will be sent to API server.")
}
}
func run(s *options.KubeletServer, kubeDeps *kubelet.KubeletDeps) (err error) {
// TODO: this should be replaced by a --standalone flag
standaloneMode := (len(s.APIServerList) == 0 && !s.RequireKubeConfig)
@@ -362,6 +409,11 @@ func run(s *options.KubeletServer, kubeDeps *kubelet.KubeletDeps) (err error) {
}
}
// Validate configuration.
if err := validateConfig(s); err != nil {
return err
}
if kubeDeps == nil {
var kubeClient clientset.Interface
var eventClient v1core.EventsGetter
@@ -380,11 +432,12 @@ func run(s *options.KubeletServer, kubeDeps *kubelet.KubeletDeps) (err error) {
}
}
nodeName, err := getNodeName(cloud, nodeutil.GetHostname(s.HostnameOverride))
if err != nil {
return err
}
if s.BootstrapKubeconfig != "" {
nodeName, err := getNodeName(cloud, nodeutil.GetHostname(s.HostnameOverride))
if err != nil {
return err
}
if err := bootstrapClientCert(s.KubeConfig.Value(), s.BootstrapKubeconfig, s.CertDirectory, nodeName); err != nil {
return err
}
@@ -428,12 +481,12 @@ func run(s *options.KubeletServer, kubeDeps *kubelet.KubeletDeps) (err error) {
kubeDeps.EventClient = eventClient
}
if kubeDeps.Auth == nil {
nodeName, err := getNodeName(kubeDeps.Cloud, nodeutil.GetHostname(s.HostnameOverride))
if err != nil {
return err
}
nodeName, err := getNodeName(kubeDeps.Cloud, nodeutil.GetHostname(s.HostnameOverride))
if err != nil {
return err
}
if kubeDeps.Auth == nil {
auth, err := buildAuth(nodeName, kubeDeps.ExternalKubeClient, s.KubeletConfiguration)
if err != nil {
return err
@@ -448,14 +501,30 @@ func run(s *options.KubeletServer, kubeDeps *kubelet.KubeletDeps) (err error) {
}
}
// Setup event recorder if required.
makeEventRecorder(&s.KubeletConfiguration, kubeDeps, nodeName)
if kubeDeps.ContainerManager == nil {
if s.SystemCgroups != "" && s.CgroupRoot == "" {
return fmt.Errorf("invalid configuration: system container was specified and cgroup root was not specified")
}
if s.CgroupsPerQOS && s.CgroupRoot == "" {
glog.Infof("--cgroups-per-qos enabled, but --cgroup-root was not specified. defaulting to /")
s.CgroupRoot = "/"
}
kubeReserved, err := parseResourceList(s.KubeReserved)
if err != nil {
return err
}
systemReserved, err := parseResourceList(s.SystemReserved)
if err != nil {
return err
}
var hardEvictionThresholds []evictionapi.Threshold
// If the user requested to ignore eviction thresholds, then do not set valid values for hardEvictionThresholds here.
if !s.ExperimentalNodeAllocatableIgnoreEvictionThreshold {
hardEvictionThresholds, err = eviction.ParseThresholdConfig(s.EvictionHard, "", "", "")
if err != nil {
return err
}
}
kubeDeps.ContainerManager, err = cm.NewContainerManager(
kubeDeps.Mounter,
kubeDeps.CAdvisorInterface,
@@ -469,8 +538,17 @@ func run(s *options.KubeletServer, kubeDeps *kubelet.KubeletDeps) (err error) {
CgroupDriver: s.CgroupDriver,
ProtectKernelDefaults: s.ProtectKernelDefaults,
EnableCRI: s.EnableCRI,
NodeAllocatableConfig: cm.NodeAllocatableConfig{
KubeReservedCgroupName: s.KubeReservedCgroup,
SystemReservedCgroupName: s.SystemReservedCgroup,
EnforceNodeAllocatable: sets.NewString(s.EnforceNodeAllocatable...),
KubeReserved: kubeReserved,
SystemReserved: systemReserved,
HardEvictionThresholds: hardEvictionThresholds,
},
},
s.ExperimentalFailSwapOn)
s.ExperimentalFailSwapOn,
kubeDeps.Recorder)
if err != nil {
return err
@@ -685,16 +763,8 @@ func RunKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *kubelet
if err != nil {
return err
}
eventBroadcaster := record.NewBroadcaster()
kubeDeps.Recorder = eventBroadcaster.NewRecorder(api.Scheme, clientv1.EventSource{Component: "kubelet", Host: string(nodeName)})
eventBroadcaster.StartLogging(glog.V(3).Infof)
if kubeDeps.EventClient != nil {
glog.V(4).Infof("Sending events to api server.")
eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: kubeDeps.EventClient.Events("")})
} else {
glog.Warning("No api server defined - no events will be sent to API server.")
}
// Setup event recorder if required.
makeEventRecorder(kubeCfg, kubeDeps, nodeName)
// TODO(mtaufen): I moved the validation of these fields here, from UnsecuredKubeletConfig,
// so that I could remove the associated fields from KubeletConfig. I would
@@ -828,3 +898,29 @@ func CreateAndInitKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDep
return k, nil
}
// parseResourceList parses the given configuration map into an API
// ResourceList or returns an error.
func parseResourceList(m componentconfig.ConfigurationMap) (v1.ResourceList, error) {
if len(m) == 0 {
return nil, nil
}
rl := make(v1.ResourceList)
for k, v := range m {
switch v1.ResourceName(k) {
// Only CPU and memory resources are supported.
case v1.ResourceCPU, v1.ResourceMemory:
q, err := resource.ParseQuantity(v)
if err != nil {
return nil, err
}
if q.Sign() == -1 {
return nil, fmt.Errorf("resource quantity for %q cannot be negative: %v", k, v)
}
rl[v1.ResourceName(k)] = q
default:
return nil, fmt.Errorf("cannot reserve %q resource", k)
}
}
return rl, nil
}

View File

@@ -20,7 +20,6 @@ import (
"testing"
"k8s.io/kubernetes/pkg/apis/componentconfig"
"k8s.io/kubernetes/pkg/kubelet"
)
func TestValueOfAllocatableResources(t *testing.T) {
@@ -32,13 +31,13 @@ func TestValueOfAllocatableResources(t *testing.T) {
}{
{
kubeReserved: "cpu=200m,memory=-150G",
systemReserved: "cpu=200m,memory=150G",
systemReserved: "cpu=200m,memory=15Ki",
errorExpected: true,
name: "negative quantity value",
},
{
kubeReserved: "cpu=200m,memory=150GG",
systemReserved: "cpu=200m,memory=150G",
kubeReserved: "cpu=200m,memory=150Gi",
systemReserved: "cpu=200m,memory=15Ky",
errorExpected: true,
name: "invalid quantity unit",
},
@@ -57,17 +56,15 @@ func TestValueOfAllocatableResources(t *testing.T) {
kubeReservedCM.Set(test.kubeReserved)
systemReservedCM.Set(test.systemReserved)
_, err := kubelet.ParseReservation(kubeReservedCM, systemReservedCM)
if err != nil {
t.Logf("%s: error returned: %v", test.name, err)
}
_, err1 := parseResourceList(kubeReservedCM)
_, err2 := parseResourceList(systemReservedCM)
if test.errorExpected {
if err == nil {
if err1 == nil && err2 == nil {
t.Errorf("%s: error expected", test.name)
}
} else {
if err != nil {
t.Errorf("%s: unexpected error: %v", test.name, err)
if err1 != nil || err2 != nil {
t.Errorf("%s: unexpected error: %v, %v", test.name, err1, err2)
}
}
}

View File

@@ -173,6 +173,7 @@ pkg/kubelet/api
pkg/kubelet/container
pkg/kubelet/envvars
pkg/kubelet/eviction
pkg/kubelet/eviction/api
pkg/kubelet/util/csr
pkg/kubelet/util/format
pkg/kubelet/util/ioutils

View File

@@ -29,6 +29,7 @@ RUNTIME_CONFIG=${RUNTIME_CONFIG:-""}
KUBELET_AUTHORIZATION_WEBHOOK=${KUBELET_AUTHORIZATION_WEBHOOK:-""}
KUBELET_AUTHENTICATION_WEBHOOK=${KUBELET_AUTHENTICATION_WEBHOOK:-""}
POD_MANIFEST_PATH=${POD_MANIFEST_PATH:-"/var/run/kubernetes/static-pods"}
KUBELET_FLAGS=${KUBELET_FLAGS:-""}
# Name of the network plugin, eg: "kubenet"
NET_PLUGIN=${NET_PLUGIN:-""}
# Place the binaries required by NET_PLUGIN in this directory, eg: "/home/kubernetes/bin".
@@ -603,7 +604,8 @@ function start_kubelet {
${net_plugin_args} \
${container_runtime_endpoint_args} \
${image_service_endpoint_args} \
--port="$KUBELET_PORT" >"${KUBELET_LOG}" 2>&1 &
--port="$KUBELET_PORT" \
${KUBELET_FLAGS} >"${KUBELET_LOG}" 2>&1 &
KUBELET_PID=$!
# Quick check that kubelet is running.
if ps -p $KUBELET_PID > /dev/null ; then

View File

@@ -14,7 +14,6 @@ cluster/gce/configure-vm.sh: cloud_config: ${CLOUD_CONFIG}
cluster/gce/configure-vm.sh: env-to-grains "feature_gates"
cluster/gce/configure-vm.sh: env-to-grains "runtime_config"
cluster/gce/configure-vm.sh: kubelet_api_servers: '${KUBELET_APISERVER}'
cluster/gce/configure-vm.sh: local -r client_ca_file="/srv/salt-overlay/salt/kubelet/ca.crt"
cluster/gce/container-linux/configure-helper.sh: authorization_mode+=",ABAC"
cluster/gce/container-linux/configure-helper.sh: authorization_mode+=",Webhook"
cluster/gce/container-linux/configure-helper.sh: grep -o "{{ *pillar\.get('storage_backend', '\(.*\)') *}}" | \
@@ -40,7 +39,6 @@ cluster/gce/trusty/configure-helper.sh: sed -i -e "s@{{ *pillar\.get('storage
cluster/gce/trusty/configure-helper.sh: sed -i -e "s@{{pillar\['allow_privileged'\]}}@true@g" "${src_file}"
cluster/gce/util.sh: local node_ip=$(gcloud compute instances describe --project "${PROJECT}" --zone "${ZONE}" \
cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py: context['pillar'] = {'num_nodes': get_node_count()}
cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py: msg = "Cannot change {0} to {1}".format(service_cidr(),
cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py: ca_cert_path = layer_options.get('ca_certificate_path')
cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py: cluster_dns.set_dns_info(53, hookenv.config('dns_domain'), dns_ip)
cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py: ip = service_cidr().split('/')[0]
@@ -171,6 +169,8 @@ test/e2e_node/container_manager_test.go: return fmt.Errorf("expected pid %d's o
test/e2e_node/container_manager_test.go: return fmt.Errorf("failed to get oom_score_adj for %d", pid)
test/e2e_node/container_manager_test.go: return fmt.Errorf("failed to get oom_score_adj for %d: %v", pid, err)
test/e2e_node/container_manager_test.go: procfsPath := path.Join("/proc", strconv.Itoa(pid), "oom_score_adj")
test/e2e_node/node_container_manager_test.go: kubeReservedCgroup = "/kube_reserved"
test/e2e_node/node_container_manager_test.go: systemReservedCgroup = "/system_reserved"
test/images/mount-tester/mt.go: flag.BoolVar(&breakOnExpectedContent, "break_on_expected_content", true, "Break out of loop on expected content, (use with --file_content_in_loop flag only)")
test/images/mount-tester/mt.go: flag.IntVar(&retryDuration, "retry_time", 180, "Retry time during the loop")
test/images/mount-tester/mt.go: flag.StringVar(&readFileContentInLoopPath, "file_content_in_loop", "", "Path to read the file content in loop from")

View File

@@ -7,9 +7,9 @@ advertised-address
algorithm-provider
all-namespaces
allocate-node-cidrs
allowed-not-ready-nodes
allow-missing-template-keys
allow-privileged
allowed-not-ready-nodes
anonymous-auth
api-advertise-addresses
api-burst
@@ -18,23 +18,24 @@ api-port
api-prefix
api-rate
api-server-advertise-address
api-server-port
api-server-service-type
api-servers
api-token
api-version
apiserver-arg-overrides
apiserver-arg-overrides
apiserver-count
apiserver-count
apiserver-count
apiserver-count
api-server-port
api-server-port
api-servers
api-servers
api-server-service-type
api-token
api-version
attach-detach-reconcile-sync-period
audit-log-maxage
audit-log-maxbackup
audit-log-maxsize
audit-log-path
auth-provider
auth-provider
auth-provider-arg
auth-provider-arg
authentication-kubeconfig
authentication-token-webhook
authentication-token-webhook-cache-ttl
@@ -46,6 +47,10 @@ authorization-rbac-super-user
authorization-webhook-cache-authorized-ttl
authorization-webhook-cache-unauthorized-ttl
authorization-webhook-config-file
auth-provider
auth-provider
auth-provider-arg
auth-provider-arg
azure-container-registry-config
babysit-daemons
basic-auth-file
@@ -101,8 +106,8 @@ concurrent-gc-syncs
concurrent-namespace-syncs
concurrent-replicaset-syncs
concurrent-resource-quota-syncs
concurrent-service-syncs
concurrent-serviceaccount-token-syncs
concurrent-service-syncs
config-map
config-map-namespace
config-sync-period
@@ -115,13 +120,13 @@ conntrack-tcp-timeout-established
consumer-port
consumer-service-name
consumer-service-namespace
contain-pod-resources
container-port
container-runtime
container-runtime-endpoint
contain-pod-resources
contention-profiling
controller-start-interval
controllermanager-arg-overrides
controller-start-interval
core-kubeconfig
cors-allowed-origins
cpu-cfs-quota
@@ -155,13 +160,13 @@ dns-port
dns-provider
dns-provider-config
dns-zone-name
dockercfg-path
docker-email
docker-endpoint
docker-exec-handler
docker-password
docker-server
docker-username
dockercfg-path
driver-port
drop-embedded-fields
dry-run
@@ -169,10 +174,6 @@ dump-logs-on-failure
duration-sec
e2e-output-dir
e2e-verify-service-account
etcd-metrics-scrape-uri
etcd-upgrade-storage
etcd-upgrade-version
etcd-version-scrape-uri
enable-controller-attach-detach
enable-cri
enable-custom-metrics
@@ -185,12 +186,14 @@ enable-hostpath-provisioner
enable-server
enable-swagger-ui
enable-taint-manager
enforce-node-allocatable
etcd-address
etcd-cafile
etcd-certfile
etcd-config
etcd-keyfile
etcd-metrics-scrape-uri
etcd-metrics-scrape-uri
etcd-mutation-timeout
etcd-persistent-storage
etcd-prefix
@@ -199,6 +202,9 @@ etcd-quorum-read
etcd-server
etcd-servers
etcd-servers-overrides
etcd-upgrade-storage
etcd-upgrade-version
etcd-version-scrape-uri
etcd-version-scrape-uri
event-burst
event-qps
@@ -214,16 +220,17 @@ executor-logv
executor-path
executor-suicide-timeout
exit-on-lock-contention
experimental-allocatable-ignore-eviction
experimental-allowed-unsafe-sysctls
experimental-bootstrap-kubeconfig
experimental-bootstrap-token-auth
experimental-keystone-url
experimental-check-node-capabilities-before-mount
experimental-cri
experimental-fail-swap-on
experimental-kernel-memcg-notification
experimental-keystone-ca-file
experimental-keystone-url
experimental-keystone-url
experimental-mounter-path
experimental-nvidia-gpus
experimental-prefix
@@ -245,8 +252,8 @@ federated-kube-context
federation-name
federation-system-namespace
file-check-frequency
file-suffix
file_content_in_loop
file-suffix
flex-volume-plugin-dir
forward-services
framework-name
@@ -282,11 +289,11 @@ heapster-service
horizontal-pod-autoscaler-sync-period
host-cluster-context
host-ipc-sources
hostname-override
host-network-sources
host-pid-sources
host-port-endpoints
host-system-namespace
hostname-override
http-check-frequency
http-port
ignore-daemonsets
@@ -298,9 +305,9 @@ image-project
image-pull-policy
image-pull-progress-deadline
image-service-endpoint
include-extended-apis
include-extended-apis
included-types-overrides
include-extended-apis
include-extended-apis
initial-sync-timeout
input-base
input-dirs
@@ -339,15 +346,13 @@ kops-ssh-key
kops-state
kops-up-timeout
kops-zones
kubeadm-cmd-skip
kubeadm-cmd-skip
kubeadm-path
kubeadm-path
kube-api-burst
kube-api-content-type
kube-api-qps
kube-master
kube-master
kube-master-url
kube-reserved
kubeadm-cmd-skip
kubeadm-path
kubecfg-file
kubectl-path
kubelet-address
@@ -371,6 +376,15 @@ kubelet-read-only-port
kubelet-root-dir
kubelet-sync-frequency
kubelet-timeout
kube-master
kube-master
kube-master
kube-master
kube-master-url
kube-master-url
kube-reserved
kube-reserved
kube-reserved-cgroup
kubernetes-anywhere-cluster
kubernetes-anywhere-path
kubernetes-anywhere-phase2-provider
@@ -404,6 +418,8 @@ master-os-distro
master-service-namespace
max-concurrency
max-connection-bytes-per-sec
maximum-dead-containers
maximum-dead-containers-per-container
max-log-age
max-log-backups
max-log-size
@@ -413,8 +429,6 @@ max-outgoing-burst
max-outgoing-qps
max-pods
max-requests-inflight
maximum-dead-containers
maximum-dead-containers-per-container
mesos-authentication-principal
mesos-authentication-provider
mesos-authentication-secret-file
@@ -430,23 +444,19 @@ mesos-sandbox-overlay
mesos-user
metrics-path
min-available
min-pr-number
min-request-timeout
min-resync-period
minimum-container-ttl-duration
minimum-image-ttl-duration
minion-max-log-age
minion-max-log-backups
minion-max-log-size
minion-path-override
min-pr-number
min-request-timeout
min-resync-period
namespace-sync-period
network-plugin
network-plugin-dir
network-plugin-mtu
no-headers
no-headers
no-suggestions
no-suggestions
node-cidr-mask-size
node-eviction-rate
node-instance-group
@@ -465,7 +475,11 @@ node-schedulable-timeout
node-startup-grace-period
node-status-update-frequency
node-sync-period
no-headers
no-headers
non-masquerade-cidr
no-suggestions
no-suggestions
num-nodes
oidc-ca-file
oidc-client-id
@@ -474,7 +488,6 @@ oidc-issuer-url
oidc-username-claim
only-idl
oom-score-adj
out-version
outofdisk-transition-frequency
output-base
output-directory
@@ -482,6 +495,7 @@ output-file-base
output-package
output-print-type
output-version
out-version
path-override
pod-cidr
pod-eviction-timeout
@@ -506,6 +520,8 @@ proxy-logv
proxy-mode
proxy-port-range
public-address-override
pvclaimbinder-sync-period
pvclaimbinder-sync-period
pv-recycler-increment-timeout-nfs
pv-recycler-maximum-retry
pv-recycler-minimum-timeout-hostpath
@@ -513,7 +529,6 @@ pv-recycler-minimum-timeout-nfs
pv-recycler-pod-template-filepath-hostpath
pv-recycler-pod-template-filepath-nfs
pv-recycler-timeout-increment-hostpath
pvclaimbinder-sync-period
quiet
read-only-port
really-crash-for-testing
@@ -540,8 +555,8 @@ requestheader-client-ca-file
requestheader-extra-headers-prefix
requestheader-group-headers
requestheader-username-headers
require-kubeconfig
required-contexts
require-kubeconfig
resolv-conf
resource
resource-container
@@ -624,6 +639,7 @@ sync-frequency
system-cgroups
system-pods-startup-timeout
system-reserved
system-reserved-cgroup
system-validate-mode
target-port
target-ram-mb
@@ -635,8 +651,9 @@ tls-ca-file
tls-cert-file
tls-private-key-file
tls-sni-cert-key
to-version
token-auth-file
to-version
to-version
ttl-keys-prefix
ttl-secs
type-src
@@ -648,10 +665,11 @@ update-period
upgrade-image
upgrade-target
use-kubernetes-cluster-service
use-service-account-credentials
use-kubernetes-version
use-taint-based-evictions
user-whitelist
use-service-account-credentials
use-service-account-credentials
use-taint-based-evictions
verb
verify-only
versioned-clientset-package

View File

@@ -442,16 +442,6 @@ type KubeletConfiguration struct {
// manage attachment/detachment of volumes scheduled to this node, and
// disables kubelet from executing any attach/detach operations
EnableControllerAttachDetach bool
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs
// that describe resources reserved for non-kubernetes components.
// Currently only cpu and memory are supported. [default=none]
// See http://kubernetes.io/docs/user-guide/compute-resources for more detail.
SystemReserved ConfigurationMap
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs
// that describe resources reserved for kubernetes system components.
// Currently only cpu and memory are supported. [default=none]
// See http://kubernetes.io/docs/user-guide/compute-resources for more detail.
KubeReserved ConfigurationMap
// Default behaviour for kernel tuning
ProtectKernelDefaults bool
// If true, Kubelet ensures a set of iptables rules are present on host.
@@ -485,6 +475,32 @@ type KubeletConfiguration struct {
// This flag, if set, instructs the kubelet to keep volumes from terminated pods mounted to the node.
// This can be useful for debugging volume related issues.
KeepTerminatedPodVolumes bool
/* following flags are meant for Node Allocatable */
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs
// that describe resources reserved for non-kubernetes components.
// Currently only cpu and memory are supported. [default=none]
// See http://kubernetes.io/docs/user-guide/compute-resources for more detail.
SystemReserved ConfigurationMap
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs
// that describe resources reserved for kubernetes system components.
// Currently only cpu and memory are supported. [default=none]
// See http://kubernetes.io/docs/user-guide/compute-resources for more detail.
KubeReserved ConfigurationMap
// This flag helps kubelet identify absolute name of top level cgroup used to enforce `SystemReserved` compute resource reservation for OS system daemons.
// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.
SystemReservedCgroup string
// This flag helps kubelet identify absolute name of top level cgroup used to enforce `KubeReserved` compute resource reservation for Kubernetes node system daemons.
// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.
KubeReservedCgroup string
// This flag specifies the various Node Allocatable enforcements that Kubelet needs to perform.
// This flag accepts a list of options. Acceptible options are `pods`, `system-reserved` & `kube-reserved`.
// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.
EnforceNodeAllocatable []string
// This flag, if set, will avoid including `EvictionHard` limits while computing Node Allocatable.
// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.
ExperimentalNodeAllocatableIgnoreEvictionThreshold bool
}
type KubeletAuthorizationMode string

View File

@@ -48,7 +48,12 @@ const (
defaultIPTablesDropBit = 15
)
var zeroDuration = metav1.Duration{}
var (
zeroDuration = metav1.Duration{}
// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.
// TODO: Set the default to "pods" once cgroups per qos is turned on by default.
defaultNodeAllocatableEnforcement = []string{}
)
func addDefaultingFuncs(scheme *kruntime.Scheme) error {
RegisterDefaults(scheme)
@@ -401,6 +406,9 @@ func SetDefaults_KubeletConfiguration(obj *KubeletConfiguration) {
if obj.CgroupDriver == "" {
obj.CgroupDriver = "cgroupfs"
}
if obj.EnforceNodeAllocatable == nil {
obj.EnforceNodeAllocatable = defaultNodeAllocatableEnforcement
}
if obj.EnableCRI == nil {
obj.EnableCRI = boolVar(true)
}

View File

@@ -478,16 +478,6 @@ type KubeletConfiguration struct {
// manage attachment/detachment of volumes scheduled to this node, and
// disables kubelet from executing any attach/detach operations
EnableControllerAttachDetach *bool `json:"enableControllerAttachDetach"`
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs
// that describe resources reserved for non-kubernetes components.
// Currently only cpu and memory are supported. [default=none]
// See http://kubernetes.io/docs/user-guide/compute-resources for more detail.
SystemReserved map[string]string `json:"systemReserved"`
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs
// that describe resources reserved for kubernetes system components.
// Currently only cpu and memory are supported. [default=none]
// See http://kubernetes.io/docs/user-guide/compute-resources for more detail.
KubeReserved map[string]string `json:"kubeReserved"`
// Default behaviour for kernel tuning
ProtectKernelDefaults bool `json:"protectKernelDefaults"`
// If true, Kubelet ensures a set of iptables rules are present on host.
@@ -522,6 +512,33 @@ type KubeletConfiguration struct {
// This flag, if set, instructs the kubelet to keep volumes from terminated pods mounted to the node.
// This can be useful for debugging volume related issues.
KeepTerminatedPodVolumes bool `json:"keepTerminatedPodVolumes,omitempty"`
/* following flags are meant for Node Allocatable */
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs
// that describe resources reserved for non-kubernetes components.
// Currently only cpu and memory are supported. [default=none]
// See http://kubernetes.io/docs/user-guide/compute-resources for more detail.
SystemReserved map[string]string `json:"systemReserved"`
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs
// that describe resources reserved for kubernetes system components.
// Currently only cpu and memory are supported. [default=none]
// See http://kubernetes.io/docs/user-guide/compute-resources for more detail.
KubeReserved map[string]string `json:"kubeReserved"`
// This flag helps kubelet identify absolute name of top level cgroup used to enforce `SystemReserved` compute resource reservation for OS system daemons.
// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.
SystemReservedCgroup string `json:"systemReservedCgroup,omitempty"`
// This flag helps kubelet identify absolute name of top level cgroup used to enforce `KubeReserved` compute resource reservation for Kubernetes node system daemons.
// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.
KubeReservedCgroup string `json:"kubeReservedCgroup,omitempty"`
// This flag specifies the various Node Allocatable enforcements that Kubelet needs to perform.
// This flag accepts a list of options. Acceptible options are `pods`, `system-reserved` & `kube-reserved`.
// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.
EnforceNodeAllocatable []string `json:"enforceNodeAllocatable,omitempty"`
// This flag, if set, will avoid including `EvictionHard` limits while computing Node Allocatable.
// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.
ExperimentalNodeAllocatableIgnoreEvictionThreshold bool `json:"experimentalNodeAllocatableIgnoreEvictionThreshold,omitempty"`
}
type KubeletAuthorizationMode string

View File

@@ -396,8 +396,6 @@ func autoConvert_v1alpha1_KubeletConfiguration_To_componentconfig_KubeletConfigu
if err := v1.Convert_Pointer_bool_To_bool(&in.EnableControllerAttachDetach, &out.EnableControllerAttachDetach, s); err != nil {
return err
}
out.SystemReserved = *(*componentconfig.ConfigurationMap)(unsafe.Pointer(&in.SystemReserved))
out.KubeReserved = *(*componentconfig.ConfigurationMap)(unsafe.Pointer(&in.KubeReserved))
out.ProtectKernelDefaults = in.ProtectKernelDefaults
if err := v1.Convert_Pointer_bool_To_bool(&in.MakeIPTablesUtilChains, &out.MakeIPTablesUtilChains, s); err != nil {
return err
@@ -416,6 +414,12 @@ func autoConvert_v1alpha1_KubeletConfiguration_To_componentconfig_KubeletConfigu
out.ExperimentalFailSwapOn = in.ExperimentalFailSwapOn
out.ExperimentalCheckNodeCapabilitiesBeforeMount = in.ExperimentalCheckNodeCapabilitiesBeforeMount
out.KeepTerminatedPodVolumes = in.KeepTerminatedPodVolumes
out.SystemReserved = *(*componentconfig.ConfigurationMap)(unsafe.Pointer(&in.SystemReserved))
out.KubeReserved = *(*componentconfig.ConfigurationMap)(unsafe.Pointer(&in.KubeReserved))
out.SystemReservedCgroup = in.SystemReservedCgroup
out.KubeReservedCgroup = in.KubeReservedCgroup
out.EnforceNodeAllocatable = *(*[]string)(unsafe.Pointer(&in.EnforceNodeAllocatable))
out.ExperimentalNodeAllocatableIgnoreEvictionThreshold = in.ExperimentalNodeAllocatableIgnoreEvictionThreshold
return nil
}
@@ -570,8 +574,6 @@ func autoConvert_componentconfig_KubeletConfiguration_To_v1alpha1_KubeletConfigu
if err := v1.Convert_bool_To_Pointer_bool(&in.EnableControllerAttachDetach, &out.EnableControllerAttachDetach, s); err != nil {
return err
}
out.SystemReserved = *(*map[string]string)(unsafe.Pointer(&in.SystemReserved))
out.KubeReserved = *(*map[string]string)(unsafe.Pointer(&in.KubeReserved))
out.ProtectKernelDefaults = in.ProtectKernelDefaults
if err := v1.Convert_bool_To_Pointer_bool(&in.MakeIPTablesUtilChains, &out.MakeIPTablesUtilChains, s); err != nil {
return err
@@ -590,6 +592,12 @@ func autoConvert_componentconfig_KubeletConfiguration_To_v1alpha1_KubeletConfigu
out.ExperimentalFailSwapOn = in.ExperimentalFailSwapOn
out.ExperimentalCheckNodeCapabilitiesBeforeMount = in.ExperimentalCheckNodeCapabilitiesBeforeMount
out.KeepTerminatedPodVolumes = in.KeepTerminatedPodVolumes
out.SystemReserved = *(*map[string]string)(unsafe.Pointer(&in.SystemReserved))
out.KubeReserved = *(*map[string]string)(unsafe.Pointer(&in.KubeReserved))
out.SystemReservedCgroup = in.SystemReservedCgroup
out.KubeReservedCgroup = in.KubeReservedCgroup
out.EnforceNodeAllocatable = *(*[]string)(unsafe.Pointer(&in.EnforceNodeAllocatable))
out.ExperimentalNodeAllocatableIgnoreEvictionThreshold = in.ExperimentalNodeAllocatableIgnoreEvictionThreshold
return nil
}

View File

@@ -266,20 +266,6 @@ func DeepCopy_v1alpha1_KubeletConfiguration(in interface{}, out interface{}, c *
*out = new(bool)
**out = **in
}
if in.SystemReserved != nil {
in, out := &in.SystemReserved, &out.SystemReserved
*out = make(map[string]string)
for key, val := range *in {
(*out)[key] = val
}
}
if in.KubeReserved != nil {
in, out := &in.KubeReserved, &out.KubeReserved
*out = make(map[string]string)
for key, val := range *in {
(*out)[key] = val
}
}
if in.MakeIPTablesUtilChains != nil {
in, out := &in.MakeIPTablesUtilChains, &out.MakeIPTablesUtilChains
*out = new(bool)
@@ -305,6 +291,25 @@ func DeepCopy_v1alpha1_KubeletConfiguration(in interface{}, out interface{}, c *
*out = new(bool)
**out = **in
}
if in.SystemReserved != nil {
in, out := &in.SystemReserved, &out.SystemReserved
*out = make(map[string]string)
for key, val := range *in {
(*out)[key] = val
}
}
if in.KubeReserved != nil {
in, out := &in.KubeReserved, &out.KubeReserved
*out = make(map[string]string)
for key, val := range *in {
(*out)[key] = val
}
}
if in.EnforceNodeAllocatable != nil {
in, out := &in.EnforceNodeAllocatable, &out.EnforceNodeAllocatable
*out = make([]string, len(*in))
copy(*out, *in)
}
return nil
}
}

View File

@@ -177,6 +177,11 @@ func DeepCopy_componentconfig_KubeletConfiguration(in interface{}, out interface
(*out)[key] = val
}
}
if in.AllowedUnsafeSysctls != nil {
in, out := &in.AllowedUnsafeSysctls, &out.AllowedUnsafeSysctls
*out = make([]string, len(*in))
copy(*out, *in)
}
if in.SystemReserved != nil {
in, out := &in.SystemReserved, &out.SystemReserved
*out = make(ConfigurationMap)
@@ -191,8 +196,8 @@ func DeepCopy_componentconfig_KubeletConfiguration(in interface{}, out interface
(*out)[key] = val
}
}
if in.AllowedUnsafeSysctls != nil {
in, out := &in.AllowedUnsafeSysctls, &out.AllowedUnsafeSysctls
if in.EnforceNodeAllocatable != nil {
in, out := &in.EnforceNodeAllocatable, &out.EnforceNodeAllocatable
*out = make([]string, len(*in))
copy(*out, *in)
}

View File

@@ -189,16 +189,16 @@ func TestTryOrdering(t *testing.T) {
switch value.Value {
case "first":
if !value.AddedAt.Equal(time.Unix(0, time.Millisecond.Nanoseconds())) {
t.Fatalf("added time for %s is %d", value.Value, value.AddedAt)
t.Fatalf("added time for %s is %v", value.Value, value.AddedAt)
}
case "second":
if !value.AddedAt.Equal(time.Unix(0, 2*time.Millisecond.Nanoseconds())) {
t.Fatalf("added time for %s is %d", value.Value, value.AddedAt)
t.Fatalf("added time for %s is %v", value.Value, value.AddedAt)
}
if hasQueued {
if !value.ProcessAt.Equal(time.Unix(0, 6*time.Millisecond.Nanoseconds())) {
t.Fatalf("process time for %s is %d", value.Value, value.ProcessAt)
t.Fatalf("process time for %s is %v", value.Value, value.ProcessAt)
}
break
}
@@ -209,7 +209,7 @@ func TestTryOrdering(t *testing.T) {
case "third":
if !value.AddedAt.Equal(time.Unix(0, 3*time.Millisecond.Nanoseconds())) {
t.Fatalf("added time for %s is %d", value.Value, value.AddedAt)
t.Fatalf("added time for %s is %v", value.Value, value.AddedAt)
}
}
order = append(order, value.Value)

View File

@@ -13283,34 +13283,6 @@ func GetOpenAPIDefinitions(ref openapi.ReferenceCallback) map[string]openapi.Ope
Format: "",
},
},
"systemReserved": {
SchemaProps: spec.SchemaProps{
Description: "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for non-kubernetes components. Currently only cpu and memory are supported. [default=none] See http://kubernetes.io/docs/user-guide/compute-resources for more detail.",
Type: []string{"object"},
AdditionalProperties: &spec.SchemaOrBool{
Schema: &spec.Schema{
SchemaProps: spec.SchemaProps{
Type: []string{"string"},
Format: "",
},
},
},
},
},
"kubeReserved": {
SchemaProps: spec.SchemaProps{
Description: "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for kubernetes system components. Currently only cpu and memory are supported. [default=none] See http://kubernetes.io/docs/user-guide/compute-resources for more detail.",
Type: []string{"object"},
AdditionalProperties: &spec.SchemaOrBool{
Schema: &spec.Schema{
SchemaProps: spec.SchemaProps{
Type: []string{"string"},
Format: "",
},
},
},
},
},
"protectKernelDefaults": {
SchemaProps: spec.SchemaProps{
Description: "Default behaviour for kernel tuning",
@@ -13388,8 +13360,71 @@ func GetOpenAPIDefinitions(ref openapi.ReferenceCallback) map[string]openapi.Ope
Format: "",
},
},
"systemReserved": {
SchemaProps: spec.SchemaProps{
Description: "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for non-kubernetes components. Currently only cpu and memory are supported. [default=none] See http://kubernetes.io/docs/user-guide/compute-resources for more detail.",
Type: []string{"object"},
AdditionalProperties: &spec.SchemaOrBool{
Schema: &spec.Schema{
SchemaProps: spec.SchemaProps{
Type: []string{"string"},
Format: "",
},
},
},
},
},
"kubeReserved": {
SchemaProps: spec.SchemaProps{
Description: "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for kubernetes system components. Currently only cpu and memory are supported. [default=none] See http://kubernetes.io/docs/user-guide/compute-resources for more detail.",
Type: []string{"object"},
AdditionalProperties: &spec.SchemaOrBool{
Schema: &spec.Schema{
SchemaProps: spec.SchemaProps{
Type: []string{"string"},
Format: "",
},
},
},
},
},
"systemReservedCgroup": {
SchemaProps: spec.SchemaProps{
Description: "This flag helps kubelet identify absolute name of top level cgroup used to enforce `SystemReserved` compute resource reservation for OS system daemons. Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.",
Type: []string{"string"},
Format: "",
},
},
"kubeReservedCgroup": {
SchemaProps: spec.SchemaProps{
Description: "This flag helps kubelet identify absolute name of top level cgroup used to enforce `KubeReserved` compute resource reservation for Kubernetes node system daemons. Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.",
Type: []string{"string"},
Format: "",
},
},
"enforceNodeAllocatable": {
SchemaProps: spec.SchemaProps{
Description: "This flag specifies the various Node Allocatable enforcements that Kubelet needs to perform. This flag accepts a list of options. Acceptible options are `pods`, `system-reserved` & `kube-reserved`. Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.",
Type: []string{"array"},
Items: &spec.SchemaOrArray{
Schema: &spec.Schema{
SchemaProps: spec.SchemaProps{
Type: []string{"string"},
Format: "",
},
},
},
},
},
"experimentalNodeAllocatableIgnoreEvictionThreshold": {
SchemaProps: spec.SchemaProps{
Description: "This flag, if set, will avoid including `EvictionHard` limits while computing Node Allocatable. Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.",
Type: []string{"boolean"},
Format: "",
},
},
},
Required: []string{"podManifestPath", "syncFrequency", "fileCheckFrequency", "httpCheckFrequency", "manifestURL", "manifestURLHeader", "enableServer", "address", "port", "readOnlyPort", "tlsCertFile", "tlsPrivateKeyFile", "certDirectory", "authentication", "authorization", "hostnameOverride", "podInfraContainerImage", "dockerEndpoint", "rootDirectory", "seccompProfileRoot", "allowPrivileged", "hostNetworkSources", "hostPIDSources", "hostIPCSources", "registryPullQPS", "registryBurst", "eventRecordQPS", "eventBurst", "enableDebuggingHandlers", "minimumGCAge", "maxPerPodContainerCount", "maxContainerCount", "cAdvisorPort", "healthzPort", "healthzBindAddress", "oomScoreAdj", "registerNode", "clusterDomain", "masterServiceNamespace", "clusterDNS", "streamingConnectionIdleTimeout", "nodeStatusUpdateFrequency", "imageMinimumGCAge", "imageGCHighThresholdPercent", "imageGCLowThresholdPercent", "lowDiskSpaceThresholdMB", "volumeStatsAggPeriod", "networkPluginName", "networkPluginDir", "cniConfDir", "cniBinDir", "networkPluginMTU", "volumePluginDir", "cloudProvider", "cloudConfigFile", "kubeletCgroups", "runtimeCgroups", "systemCgroups", "cgroupRoot", "containerRuntime", "remoteRuntimeEndpoint", "remoteImageEndpoint", "runtimeRequestTimeout", "rktPath", "rktAPIEndpoint", "rktStage1Image", "lockFilePath", "exitOnLockContention", "hairpinMode", "babysitDaemons", "maxPods", "nvidiaGPUs", "dockerExecHandlerName", "podCIDR", "resolvConf", "cpuCFSQuota", "containerized", "maxOpenFiles", "registerSchedulable", "registerWithTaints", "contentType", "kubeAPIQPS", "kubeAPIBurst", "serializeImagePulls", "outOfDiskTransitionFrequency", "nodeIP", "nodeLabels", "nonMasqueradeCIDR", "enableCustomMetrics", "evictionHard", "evictionSoft", "evictionSoftGracePeriod", "evictionPressureTransitionPeriod", "evictionMaxPodGracePeriod", "evictionMinimumReclaim", "experimentalKernelMemcgNotification", "podsPerCore", "enableControllerAttachDetach", "systemReserved", "kubeReserved", "protectKernelDefaults", "makeIPTablesUtilChains", "iptablesMasqueradeBit", "iptablesDropBit"},
Required: []string{"podManifestPath", "syncFrequency", "fileCheckFrequency", "httpCheckFrequency", "manifestURL", "manifestURLHeader", "enableServer", "address", "port", "readOnlyPort", "tlsCertFile", "tlsPrivateKeyFile", "certDirectory", "authentication", "authorization", "hostnameOverride", "podInfraContainerImage", "dockerEndpoint", "rootDirectory", "seccompProfileRoot", "allowPrivileged", "hostNetworkSources", "hostPIDSources", "hostIPCSources", "registryPullQPS", "registryBurst", "eventRecordQPS", "eventBurst", "enableDebuggingHandlers", "minimumGCAge", "maxPerPodContainerCount", "maxContainerCount", "cAdvisorPort", "healthzPort", "healthzBindAddress", "oomScoreAdj", "registerNode", "clusterDomain", "masterServiceNamespace", "clusterDNS", "streamingConnectionIdleTimeout", "nodeStatusUpdateFrequency", "imageMinimumGCAge", "imageGCHighThresholdPercent", "imageGCLowThresholdPercent", "lowDiskSpaceThresholdMB", "volumeStatsAggPeriod", "networkPluginName", "networkPluginDir", "cniConfDir", "cniBinDir", "networkPluginMTU", "volumePluginDir", "cloudProvider", "cloudConfigFile", "kubeletCgroups", "runtimeCgroups", "systemCgroups", "cgroupRoot", "containerRuntime", "remoteRuntimeEndpoint", "remoteImageEndpoint", "runtimeRequestTimeout", "rktPath", "rktAPIEndpoint", "rktStage1Image", "lockFilePath", "exitOnLockContention", "hairpinMode", "babysitDaemons", "maxPods", "nvidiaGPUs", "dockerExecHandlerName", "podCIDR", "resolvConf", "cpuCFSQuota", "containerized", "maxOpenFiles", "registerSchedulable", "registerWithTaints", "contentType", "kubeAPIQPS", "kubeAPIBurst", "serializeImagePulls", "outOfDiskTransitionFrequency", "nodeIP", "nodeLabels", "nonMasqueradeCIDR", "enableCustomMetrics", "evictionHard", "evictionSoft", "evictionSoftGracePeriod", "evictionPressureTransitionPeriod", "evictionMaxPodGracePeriod", "evictionMinimumReclaim", "experimentalKernelMemcgNotification", "podsPerCore", "enableControllerAttachDetach", "protectKernelDefaults", "makeIPTablesUtilChains", "iptablesMasqueradeBit", "iptablesDropBit", "systemReserved", "kubeReserved"},
},
},
Dependencies: []string{

View File

@@ -16,6 +16,7 @@ go_library(
"container_manager_linux.go",
"container_manager_stub.go",
"helpers_linux.go",
"node_container_manager.go",
"pod_container_manager_linux.go",
"pod_container_manager_stub.go",
"types.go",
@@ -25,6 +26,8 @@ go_library(
"//pkg/api/v1:go_default_library",
"//pkg/kubelet/cadvisor:go_default_library",
"//pkg/kubelet/cm/util:go_default_library",
"//pkg/kubelet/events:go_default_library",
"//pkg/kubelet/eviction/api:go_default_library",
"//pkg/kubelet/qos:go_default_library",
"//pkg/util:go_default_library",
"//pkg/util/mount:go_default_library",
@@ -43,6 +46,7 @@ go_library(
"//vendor:k8s.io/apimachinery/pkg/util/runtime",
"//vendor:k8s.io/apimachinery/pkg/util/sets",
"//vendor:k8s.io/apimachinery/pkg/util/wait",
"//vendor:k8s.io/client-go/tools/record",
],
)
@@ -52,11 +56,13 @@ go_test(
"cgroup_manager_linux_test.go",
"container_manager_linux_test.go",
"helpers_linux_test.go",
"node_container_manager_test.go",
],
library = ":go_default_library",
tags = ["automanaged"],
deps = [
"//pkg/api/v1:go_default_library",
"//pkg/kubelet/eviction/api:go_default_library",
"//pkg/util/mount:go_default_library",
"//vendor:github.com/stretchr/testify/assert",
"//vendor:github.com/stretchr/testify/require",

View File

@@ -147,6 +147,7 @@ func (l *libcontainerAdapter) revertName(name string) CgroupName {
panic(err)
}
driverName = strings.TrimSuffix(driverName, ".slice")
driverName = strings.Replace(driverName, "-", "/", -1)
driverName = strings.Replace(driverName, "_", "-", -1)
return CgroupName(driverName)
}

View File

@@ -16,7 +16,12 @@ limitations under the License.
package cm
import "k8s.io/kubernetes/pkg/api/v1"
import (
"k8s.io/apimachinery/pkg/util/sets"
// TODO: Migrate kubelet to either use its own internal objects or client library.
"k8s.io/kubernetes/pkg/api/v1"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
)
// Manages the containers running on a machine.
type ContainerManager interface {
@@ -44,6 +49,9 @@ type ContainerManager interface {
// GetQOSContainersInfo returns the names of top level QoS containers
GetQOSContainersInfo() QOSContainersInfo
// GetNodeAllocatable returns the amount of compute resources that have to be reserved from scheduling.
GetNodeAllocatableReservation() v1.ResourceList
}
type NodeConfig struct {
@@ -56,9 +64,26 @@ type NodeConfig struct {
CgroupDriver string
ProtectKernelDefaults bool
EnableCRI bool
NodeAllocatableConfig
}
type NodeAllocatableConfig struct {
KubeReservedCgroupName string
SystemReservedCgroupName string
EnforceNodeAllocatable sets.String
KubeReserved v1.ResourceList
SystemReserved v1.ResourceList
HardEvictionThresholds []evictionapi.Threshold
}
type Status struct {
// Any soft requirements that were unsatisfied.
SoftRequirements error
}
const (
// Uer visible keys for managing node allocatable enforcement on the node.
NodeAllocatableEnforcementKey = "pods"
SystemReservedEnforcementKey = "system-reserved"
KubeReservedEnforcementKey = "kube-reserved"
)

View File

@@ -38,6 +38,7 @@ import (
"k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/tools/record"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/kubelet/cadvisor"
cmutil "k8s.io/kubernetes/pkg/kubelet/cm/util"
@@ -101,10 +102,20 @@ type containerManagerImpl struct {
// External containers being managed.
systemContainers []*systemContainer
qosContainers QOSContainersInfo
periodicTasks []func()
// Tasks that are run periodically
periodicTasks []func()
// holds all the mounted cgroup subsystems
subsystems *CgroupSubsystems
nodeInfo *v1.Node
// Interface for cgroup management
cgroupManager CgroupManager
// Capacity of this node.
capacity v1.ResourceList
// Absolute cgroupfs path to a cgroup that Kubelet needs to place all pods under.
// This path include a top level container for enforcing Node Allocatable.
cgroupRoot string
// Event recorder interface.
recorder record.EventRecorder
}
type features struct {
@@ -167,7 +178,7 @@ func validateSystemRequirements(mountUtil mount.Interface) (features, error) {
// TODO(vmarmol): Add limits to the system containers.
// Takes the absolute name of the specified containers.
// Empty container name disables use of the specified container.
func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.Interface, nodeConfig NodeConfig, failSwapOn bool) (ContainerManager, error) {
func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.Interface, nodeConfig NodeConfig, failSwapOn bool, recorder record.EventRecorder) (ContainerManager, error) {
subsystems, err := GetCgroupSubsystems()
if err != nil {
return nil, fmt.Errorf("failed to get mounted cgroup subsystems: %v", err)
@@ -204,7 +215,17 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
"This will be a fatal error by default starting in K8s v1.6! " +
"In the meantime, you can opt-in to making this a fatal error by enabling --experimental-fail-swap-on.")
}
var capacity = v1.ResourceList{}
// It is safe to invoke `MachineInfo` on cAdvisor before logically initializing cAdvisor here because
// machine info is computed and cached once as part of cAdvisor object creation.
if info, err := cadvisorInterface.MachineInfo(); err == nil {
capacity = cadvisor.CapacityFromMachineInfo(info)
} else {
return nil, err
}
cgroupRoot := nodeConfig.CgroupRoot
cgroupManager := NewCgroupManager(subsystems, nodeConfig.CgroupDriver)
// Check if Cgroup-root actually exists on the node
if nodeConfig.CgroupsPerQOS {
// this does default to / when enabled, but this tests against regressions.
@@ -216,17 +237,24 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
// of note, we always use the cgroupfs driver when performing this check since
// the input is provided in that format.
// this is important because we do not want any name conversion to occur.
cgroupManager := NewCgroupManager(subsystems, "cgroupfs")
if !cgroupManager.Exists(CgroupName(nodeConfig.CgroupRoot)) {
return nil, fmt.Errorf("invalid configuration: cgroup-root doesn't exist: %v", err)
if !cgroupManager.Exists(CgroupName(cgroupRoot)) {
return nil, fmt.Errorf("invalid configuration: cgroup-root %q doesn't exist: %v", cgroupRoot, err)
}
glog.Infof("container manager verified cgroup-root exists: %v", nodeConfig.CgroupRoot)
glog.Infof("container manager verified user specified cgroup-root exists: %v", cgroupRoot)
// Include the the top level cgroup for enforcing node allocatable into cgroup-root.
// This way, all sub modules can avoid having to understand the concept of node allocatable.
cgroupRoot = path.Join(cgroupRoot, defaultNodeAllocatableCgroupName)
}
glog.Infof("Creating Container Manager object based on Node Config: %+v", nodeConfig)
return &containerManagerImpl{
cadvisorInterface: cadvisorInterface,
mountUtil: mountUtil,
NodeConfig: nodeConfig,
subsystems: subsystems,
cgroupManager: cgroupManager,
capacity: capacity,
cgroupRoot: cgroupRoot,
recorder: recorder,
}, nil
}
@@ -239,11 +267,11 @@ func (cm *containerManagerImpl) NewPodContainerManager() PodContainerManager {
qosContainersInfo: cm.qosContainers,
nodeInfo: cm.nodeInfo,
subsystems: cm.subsystems,
cgroupManager: NewCgroupManager(cm.subsystems, cm.NodeConfig.CgroupDriver),
cgroupManager: cm.cgroupManager,
}
}
return &podContainerManagerNoop{
cgroupRoot: CgroupName(cm.NodeConfig.CgroupRoot),
cgroupRoot: CgroupName(cm.cgroupRoot),
}
}
@@ -373,13 +401,21 @@ func (cm *containerManagerImpl) setupNode() error {
// Setup top level qos containers only if CgroupsPerQOS flag is specified as true
if cm.NodeConfig.CgroupsPerQOS {
qosContainersInfo, err := InitQOS(cm.NodeConfig.CgroupDriver, cm.NodeConfig.CgroupRoot, cm.subsystems)
if err := cm.createNodeAllocatableCgroups(); err != nil {
return err
}
qosContainersInfo, err := InitQOS(cm.NodeConfig.CgroupDriver, cm.cgroupRoot, cm.subsystems)
if err != nil {
return fmt.Errorf("failed to initialise top level QOS containers: %v", err)
}
cm.qosContainers = qosContainersInfo
}
// Enforce Node Allocatable (if required)
if err := cm.enforceNodeAllocatableCgroups(); err != nil {
return err
}
systemContainers := []*systemContainer{}
if cm.ContainerRuntime == "docker" {
dockerVersion := getDockerVersion(cm.cadvisorInterface)
@@ -405,11 +441,7 @@ func (cm *containerManagerImpl) setupNode() error {
})
} else if cm.RuntimeCgroupsName != "" {
cont := newSystemCgroups(cm.RuntimeCgroupsName)
var capacity = v1.ResourceList{}
if info, err := cm.cadvisorInterface.MachineInfo(); err == nil {
capacity = cadvisor.CapacityFromMachineInfo(info)
}
memoryLimit := (int64(capacity.Memory().Value() * DockerMemoryLimitThresholdPercent / 100))
memoryLimit := (int64(cm.capacity.Memory().Value() * DockerMemoryLimitThresholdPercent / 100))
if memoryLimit < MinDockerMemoryLimit {
glog.Warningf("Memory limit %d for container %s is too small, reset it to %d", memoryLimit, cm.RuntimeCgroupsName, MinDockerMemoryLimit)
memoryLimit = MinDockerMemoryLimit
@@ -544,6 +576,10 @@ func (cm *containerManagerImpl) Start(node *v1.Node) error {
if err := cm.setupNode(); err != nil {
return err
}
// Ensure that node allocatable configuration is valid.
if err := cm.validateNodeAllocatable(); err != nil {
return err
}
// Don't run a background thread if there are no ensureStateFuncs.
hasEnsureStateFuncs := false
for _, cont := range cm.systemContainers {
@@ -823,3 +859,7 @@ func getDockerVersion(cadvisor cadvisor.Interface) *utilversion.Version {
}
return dockerVersion
}
func (m *containerManagerImpl) GetCapacity() v1.ResourceList {
return m.capacity
}

View File

@@ -50,6 +50,10 @@ func (cm *containerManagerStub) Status() Status {
return Status{}
}
func (cm *containerManagerStub) GetNodeAllocatableReservation() v1.ResourceList {
return nil
}
func (cm *containerManagerStub) NewPodContainerManager() PodContainerManager {
return &podContainerManagerStub{}
}

View File

@@ -21,6 +21,7 @@ package cm
import (
"fmt"
"k8s.io/client-go/tools/record"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/kubelet/cadvisor"
"k8s.io/kubernetes/pkg/util/mount"
@@ -55,10 +56,14 @@ func (cm *unsupportedContainerManager) Status() Status {
return Status{}
}
func (cm *unsupportedContainerManager) GetNodeAllocatableReservation() v1.ResourceList {
return nil
}
func (cm *unsupportedContainerManager) NewPodContainerManager() PodContainerManager {
return &unsupportedPodContainerManager{}
}
func NewContainerManager(_ mount.Interface, _ cadvisor.Interface, _ NodeConfig, failSwapOn bool) (ContainerManager, error) {
func NewContainerManager(_ mount.Interface, _ cadvisor.Interface, _ NodeConfig, failSwapOn bool, recorder record.EventRecorder) (ContainerManager, error) {
return &unsupportedContainerManager{}, nil
}

View File

@@ -21,6 +21,7 @@ package cm
import (
"github.com/golang/glog"
"k8s.io/client-go/tools/record"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/kubelet/cadvisor"
"k8s.io/kubernetes/pkg/util/mount"
@@ -37,6 +38,6 @@ func (cm *containerManagerImpl) Start(_ *v1.Node) error {
return nil
}
func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.Interface, nodeConfig NodeConfig, failSwapOn bool) (ContainerManager, error) {
func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.Interface, nodeConfig NodeConfig, failSwapOn bool, recorder record.EventRecorder) (ContainerManager, error) {
return &containerManagerImpl{}, nil
}

View File

@@ -0,0 +1,229 @@
// +build linux
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cm
import (
"fmt"
"strings"
"time"
"github.com/golang/glog"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/kubelet/events"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
)
const (
defaultNodeAllocatableCgroupName = "kubepods"
)
func (cm *containerManagerImpl) createNodeAllocatableCgroups() error {
cgroupConfig := &CgroupConfig{
Name: CgroupName(cm.cgroupRoot),
// The default limits for cpu shares can be very low which can lead to CPU starvation for pods.
ResourceParameters: getCgroupConfig(cm.capacity),
}
if cm.cgroupManager.Exists(cgroupConfig.Name) {
return nil
}
if err := cm.cgroupManager.Create(cgroupConfig); err != nil {
glog.Errorf("Failed to create %q cgroup", cm.cgroupRoot)
return err
}
return nil
}
// Enforce Node Allocatable Cgroup settings.
func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error {
nc := cm.NodeConfig.NodeAllocatableConfig
// We need to update limits on node allocatable cgroup no matter what because
// default cpu shares on cgroups are low and can cause cpu starvation.
nodeAllocatable := cm.capacity
// Use Node Allocatable limits instead of capacity if the user requested enforcing node allocatable.
if cm.CgroupsPerQOS && nc.EnforceNodeAllocatable.Has(NodeAllocatableEnforcementKey) {
nodeAllocatable = cm.getNodeAllocatableAbsolute()
}
glog.V(4).Infof("Attempting to enforce Node Allocatable with config: %+v", nc)
cgroupConfig := &CgroupConfig{
Name: CgroupName(cm.cgroupRoot),
ResourceParameters: getCgroupConfig(nodeAllocatable),
}
// If Node Allocatable is enforced on a node that has not been drained or is updated on an existing node to a lower value,
// existing memory usage across pods might be higher that current Node Allocatable Memory Limits.
// Pod Evictions are expected to bring down memory usage to below Node Allocatable limits.
// Until evictions happen retry cgroup updates.
// Update limits on non root cgroup-root to be safe since the default limits for CPU can be too low.
if cm.cgroupRoot != "/" {
go func() {
for {
err := cm.cgroupManager.Update(cgroupConfig)
if err == nil {
cm.recorder.Event(cm.nodeInfo, v1.EventTypeNormal, events.SuccessfulNodeAllocatableEnforcement, "Updated Node Allocatable limit across pods")
return
}
message := fmt.Sprintf("Failed to update Node Allocatable Limits %q: %v", cm.cgroupRoot, err)
cm.recorder.Event(cm.nodeInfo, v1.EventTypeWarning, events.FailedNodeAllocatableEnforcement, message)
time.Sleep(time.Minute)
}
}()
}
// Now apply kube reserved and system reserved limits if required.
if nc.EnforceNodeAllocatable.Has(SystemReservedEnforcementKey) {
glog.V(2).Infof("Enforcing System reserved on cgroup %q with limits: %+v", nc.SystemReservedCgroupName, nc.SystemReserved)
if err := enforceExistingCgroup(cm.cgroupManager, nc.SystemReservedCgroupName, nc.SystemReserved); err != nil {
message := fmt.Sprintf("Failed to enforce System Reserved Cgroup Limits on %q: %v", nc.SystemReservedCgroupName, err)
cm.recorder.Event(cm.nodeInfo, v1.EventTypeWarning, events.FailedNodeAllocatableEnforcement, message)
return fmt.Errorf(message)
}
cm.recorder.Eventf(cm.nodeInfo, v1.EventTypeNormal, events.SuccessfulNodeAllocatableEnforcement, "Updated limits on system reserved cgroup %v", nc.SystemReservedCgroupName)
}
if nc.EnforceNodeAllocatable.Has(KubeReservedEnforcementKey) {
glog.V(2).Infof("Enforcing kube reserved on cgroup %q with limits: %+v", nc.KubeReservedCgroupName, nc.KubeReserved)
if err := enforceExistingCgroup(cm.cgroupManager, nc.KubeReservedCgroupName, nc.KubeReserved); err != nil {
message := fmt.Sprintf("Failed to enforce Kube Reserved Cgroup Limits on %q: %v", nc.KubeReservedCgroupName, err)
cm.recorder.Event(cm.nodeInfo, v1.EventTypeWarning, events.FailedNodeAllocatableEnforcement, message)
return fmt.Errorf(message)
}
cm.recorder.Eventf(cm.nodeInfo, v1.EventTypeNormal, events.SuccessfulNodeAllocatableEnforcement, "Updated limits on kube reserved cgroup %v", nc.KubeReservedCgroupName)
}
return nil
}
// enforceExistingCgroup updates the limits `rl` on existing cgroup `cName` using `cgroupManager` interface.
func enforceExistingCgroup(cgroupManager CgroupManager, cName string, rl v1.ResourceList) error {
cgroupConfig := &CgroupConfig{
Name: CgroupName(cName),
ResourceParameters: getCgroupConfig(rl),
}
glog.V(4).Infof("Enforcing limits on cgroup %q with %d cpu shares and %d bytes of memory", cName, cgroupConfig.ResourceParameters.CpuShares, cgroupConfig.ResourceParameters.Memory)
if !cgroupManager.Exists(cgroupConfig.Name) {
return fmt.Errorf("%q cgroup does not exist", cgroupConfig.Name)
}
if err := cgroupManager.Update(cgroupConfig); err != nil {
return err
}
return nil
}
// Returns a ResourceConfig object that can be used to create or update cgroups via CgroupManager interface.
func getCgroupConfig(rl v1.ResourceList) *ResourceConfig {
// TODO(vishh): Set CPU Quota if necessary.
if rl == nil {
return nil
}
var rc ResourceConfig
if q, exists := rl[v1.ResourceMemory]; exists {
// Memory is defined in bytes.
val := q.Value()
rc.Memory = &val
}
if q, exists := rl[v1.ResourceCPU]; exists {
// CPU is defined in milli-cores.
val := MilliCPUToShares(q.MilliValue())
rc.CpuShares = &val
}
return &rc
}
// getNodeAllocatableAbsolute returns the absolute value of Node Allocatable which is primarily useful for enforcement.
// Note that not all resources that are available on the node are included in the returned list of resources.
// Returns a ResourceList.
func (cm *containerManagerImpl) getNodeAllocatableAbsolute() v1.ResourceList {
result := make(v1.ResourceList)
for k, v := range cm.capacity {
value := *(v.Copy())
if cm.NodeConfig.SystemReserved != nil {
value.Sub(cm.NodeConfig.SystemReserved[k])
}
if cm.NodeConfig.KubeReserved != nil {
value.Sub(cm.NodeConfig.KubeReserved[k])
}
if value.Sign() < 0 {
// Negative Allocatable resources don't make sense.
value.Set(0)
}
result[k] = value
}
return result
}
// GetNodeAllocatable returns amount of compute resource that have to be reserved on this node from scheduling.
func (cm *containerManagerImpl) GetNodeAllocatableReservation() v1.ResourceList {
evictionReservation := hardEvictionReservation(cm.HardEvictionThresholds, cm.capacity)
result := make(v1.ResourceList)
for k := range cm.capacity {
value := resource.NewQuantity(0, resource.DecimalSI)
if cm.NodeConfig.SystemReserved != nil {
value.Add(cm.NodeConfig.SystemReserved[k])
}
if cm.NodeConfig.KubeReserved != nil {
value.Add(cm.NodeConfig.KubeReserved[k])
}
if evictionReservation != nil {
value.Add(evictionReservation[k])
}
if !value.IsZero() {
result[k] = *value
}
}
return result
}
// hardEvictionReservation returns a resourcelist that includes reservation of resources based on hard eviction thresholds.
func hardEvictionReservation(thresholds []evictionapi.Threshold, capacity v1.ResourceList) v1.ResourceList {
if len(thresholds) == 0 {
return nil
}
ret := v1.ResourceList{}
for _, threshold := range thresholds {
if threshold.Operator != evictionapi.OpLessThan {
continue
}
switch threshold.Signal {
case evictionapi.SignalMemoryAvailable:
memoryCapacity := capacity[v1.ResourceMemory]
value := evictionapi.GetThresholdQuantity(threshold.Value, &memoryCapacity)
ret[v1.ResourceMemory] = *value
}
}
return ret
}
// validateNodeAllocatable ensures that the user specified Node Allocatable Configuration doesn't reserve more than the node capacity.
// Returns error if the configuration is invalid, nil otherwise.
func (cm *containerManagerImpl) validateNodeAllocatable() error {
na := cm.GetNodeAllocatableReservation()
zeroValue := resource.MustParse("0")
var errors []string
for key, val := range na {
if val.Cmp(zeroValue) <= 0 {
errors = append(errors, fmt.Sprintf("Resource %q has an allocatable of %v", key, val))
}
}
if len(errors) > 0 {
return fmt.Errorf("Invalid Node Allocatable configuration. %s", strings.Join(errors, " "))
}
return nil
}

View File

@@ -0,0 +1,305 @@
// +build linux
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cm
import (
"testing"
"github.com/stretchr/testify/assert"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/kubernetes/pkg/api/v1"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
)
func TestNodeAllocatableReservationForScheduling(t *testing.T) {
memoryEvictionThreshold := resource.MustParse("100Mi")
testCases := []struct {
kubeReserved v1.ResourceList
systemReserved v1.ResourceList
expected v1.ResourceList
capacity v1.ResourceList
hardThreshold evictionapi.ThresholdValue
}{
{
kubeReserved: getResourceList("100m", "100Mi"),
systemReserved: getResourceList("50m", "50Mi"),
capacity: getResourceList("10", "10Gi"),
expected: getResourceList("150m", "150Mi"),
},
{
kubeReserved: getResourceList("100m", "100Mi"),
systemReserved: getResourceList("50m", "50Mi"),
hardThreshold: evictionapi.ThresholdValue{
Quantity: &memoryEvictionThreshold,
},
capacity: getResourceList("10", "10Gi"),
expected: getResourceList("150m", "250Mi"),
},
{
kubeReserved: getResourceList("100m", "100Mi"),
systemReserved: getResourceList("50m", "50Mi"),
capacity: getResourceList("10", "10Gi"),
hardThreshold: evictionapi.ThresholdValue{
Percentage: 0.05,
},
expected: getResourceList("150m", "694157320"),
},
{
kubeReserved: v1.ResourceList{},
systemReserved: v1.ResourceList{},
capacity: getResourceList("10", "10Gi"),
expected: getResourceList("", ""),
},
{
kubeReserved: getResourceList("", "100Mi"),
systemReserved: getResourceList("50m", "50Mi"),
capacity: getResourceList("10", "10Gi"),
expected: getResourceList("50m", "150Mi"),
},
{
kubeReserved: getResourceList("50m", "100Mi"),
systemReserved: getResourceList("", "50Mi"),
capacity: getResourceList("10", "10Gi"),
expected: getResourceList("50m", "150Mi"),
},
{
kubeReserved: getResourceList("", "100Mi"),
systemReserved: getResourceList("", "50Mi"),
capacity: getResourceList("10", ""),
expected: getResourceList("", "150Mi"),
},
}
for idx, tc := range testCases {
nc := NodeConfig{
NodeAllocatableConfig: NodeAllocatableConfig{
KubeReserved: tc.kubeReserved,
SystemReserved: tc.systemReserved,
HardEvictionThresholds: []evictionapi.Threshold{
{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: tc.hardThreshold,
},
},
},
}
cm := &containerManagerImpl{
NodeConfig: nc,
capacity: tc.capacity,
}
for k, v := range cm.GetNodeAllocatableReservation() {
expected, exists := tc.expected[k]
assert.True(t, exists, "test case %d expected resource %q", idx+1, k)
assert.Equal(t, expected.MilliValue(), v.MilliValue(), "test case %d failed for resource %q", idx+1, k)
}
}
}
func TestNodeAllocatableWithNilHardThreshold(t *testing.T) {
nc := NodeConfig{
NodeAllocatableConfig: NodeAllocatableConfig{
KubeReserved: getResourceList("100m", "100Mi"),
SystemReserved: getResourceList("50m", "50Mi"),
},
}
cm := &containerManagerImpl{
NodeConfig: nc,
capacity: getResourceList("10", "10Gi"),
}
expected := getResourceList("150m", "150Mi")
for k, v := range cm.GetNodeAllocatableReservation() {
expected, exists := expected[k]
assert.True(t, exists)
assert.Equal(t, expected.MilliValue(), v.MilliValue(), "failed for resource %q", k)
}
}
func TestNodeAllocatableForEnforcement(t *testing.T) {
memoryEvictionThreshold := resource.MustParse("100Mi")
testCases := []struct {
kubeReserved v1.ResourceList
systemReserved v1.ResourceList
capacity v1.ResourceList
expected v1.ResourceList
hardThreshold evictionapi.ThresholdValue
}{
{
kubeReserved: getResourceList("100m", "100Mi"),
systemReserved: getResourceList("50m", "50Mi"),
capacity: getResourceList("10", "10Gi"),
expected: getResourceList("9850m", "10090Mi"),
},
{
kubeReserved: getResourceList("100m", "100Mi"),
systemReserved: getResourceList("50m", "50Mi"),
hardThreshold: evictionapi.ThresholdValue{
Quantity: &memoryEvictionThreshold,
},
capacity: getResourceList("10", "10Gi"),
expected: getResourceList("9850m", "10090Mi"),
},
{
kubeReserved: getResourceList("100m", "100Mi"),
systemReserved: getResourceList("50m", "50Mi"),
hardThreshold: evictionapi.ThresholdValue{
Percentage: 0.05,
},
capacity: getResourceList("10", "10Gi"),
expected: getResourceList("9850m", "10090Mi"),
},
{
kubeReserved: v1.ResourceList{},
systemReserved: v1.ResourceList{},
capacity: getResourceList("10", "10Gi"),
expected: getResourceList("10", "10Gi"),
},
{
kubeReserved: getResourceList("", "100Mi"),
systemReserved: getResourceList("50m", "50Mi"),
capacity: getResourceList("10", "10Gi"),
expected: getResourceList("9950m", "10090Mi"),
},
{
kubeReserved: getResourceList("50m", "100Mi"),
systemReserved: getResourceList("", "50Mi"),
capacity: getResourceList("10", "10Gi"),
expected: getResourceList("9950m", "10090Mi"),
},
{
kubeReserved: getResourceList("", "100Mi"),
systemReserved: getResourceList("", "50Mi"),
capacity: getResourceList("10", ""),
expected: getResourceList("10", ""),
},
}
for idx, tc := range testCases {
nc := NodeConfig{
NodeAllocatableConfig: NodeAllocatableConfig{
KubeReserved: tc.kubeReserved,
SystemReserved: tc.systemReserved,
HardEvictionThresholds: []evictionapi.Threshold{
{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: tc.hardThreshold,
},
},
},
}
cm := &containerManagerImpl{
NodeConfig: nc,
capacity: tc.capacity,
}
for k, v := range cm.getNodeAllocatableAbsolute() {
expected, exists := tc.expected[k]
assert.True(t, exists)
assert.Equal(t, expected.MilliValue(), v.MilliValue(), "test case %d failed for resource %q", idx+1, k)
}
}
}
func TestNodeAllocatableInputValidation(t *testing.T) {
memoryEvictionThreshold := resource.MustParse("100Mi")
highMemoryEvictionThreshold := resource.MustParse("2Gi")
testCases := []struct {
kubeReserved v1.ResourceList
systemReserved v1.ResourceList
capacity v1.ResourceList
hardThreshold evictionapi.ThresholdValue
invalidConfiguration bool
}{
{
kubeReserved: getResourceList("100m", "100Mi"),
systemReserved: getResourceList("50m", "50Mi"),
capacity: getResourceList("10", "10Gi"),
},
{
kubeReserved: getResourceList("100m", "100Mi"),
systemReserved: getResourceList("50m", "50Mi"),
hardThreshold: evictionapi.ThresholdValue{
Quantity: &memoryEvictionThreshold,
},
capacity: getResourceList("10", "10Gi"),
},
{
kubeReserved: getResourceList("100m", "100Mi"),
systemReserved: getResourceList("50m", "50Mi"),
hardThreshold: evictionapi.ThresholdValue{
Percentage: 0.05,
},
capacity: getResourceList("10", "10Gi"),
},
{
kubeReserved: v1.ResourceList{},
systemReserved: v1.ResourceList{},
capacity: getResourceList("10", "10Gi"),
},
{
kubeReserved: getResourceList("", "100Mi"),
systemReserved: getResourceList("50m", "50Mi"),
capacity: getResourceList("10", "10Gi"),
},
{
kubeReserved: getResourceList("50m", "100Mi"),
systemReserved: getResourceList("", "50Mi"),
capacity: getResourceList("10", "10Gi"),
},
{
kubeReserved: getResourceList("", "100Mi"),
systemReserved: getResourceList("", "50Mi"),
capacity: getResourceList("10", ""),
},
{
kubeReserved: getResourceList("5", "10Gi"),
systemReserved: getResourceList("5", "10Gi"),
hardThreshold: evictionapi.ThresholdValue{
Quantity: &highMemoryEvictionThreshold,
},
capacity: getResourceList("10", "11Gi"),
invalidConfiguration: true,
},
}
for _, tc := range testCases {
nc := NodeConfig{
NodeAllocatableConfig: NodeAllocatableConfig{
KubeReserved: tc.kubeReserved,
SystemReserved: tc.systemReserved,
HardEvictionThresholds: []evictionapi.Threshold{
{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: tc.hardThreshold,
},
},
},
}
cm := &containerManagerImpl{
NodeConfig: nc,
capacity: tc.capacity,
}
if err := cm.validateNodeAllocatable(); err != nil && !tc.invalidConfiguration {
t.Logf("Expected valid node allocatable configuration: %v", err)
t.FailNow()
}
}
}

View File

@@ -200,24 +200,31 @@ func (m *podContainerManagerImpl) GetAllPodsFromCgroups() (map[types.UID]CgroupN
return nil, fmt.Errorf("failed to read the cgroup directory %v : %v", qc, err)
}
for i := range dirInfo {
// note: we do a contains check because on systemd, the literal cgroupfs name will prefix the qos as well.
if dirInfo[i].IsDir() && strings.Contains(dirInfo[i].Name(), podCgroupNamePrefix) {
// we need to convert the name to an internal identifier
internalName := m.cgroupManager.CgroupName(dirInfo[i].Name())
// we then split the name on the pod prefix to determine the uid
parts := strings.Split(string(internalName), podCgroupNamePrefix)
// the uid is missing, so we log the unexpected cgroup not of form pod<uid>
if len(parts) != 2 {
location := path.Join(qc, dirInfo[i].Name())
glog.Errorf("pod cgroup manager ignoring unexpected cgroup %v because it is not a pod", location)
continue
}
podUID := parts[1]
// because the literal cgroupfs name could encode the qos tier (on systemd), we avoid double encoding
// by just rebuilding the fully qualified CgroupName according to our internal convention.
cgroupName := CgroupName(path.Join(qosContainerName, podCgroupNamePrefix+podUID))
foundPods[types.UID(podUID)] = cgroupName
// its not a directory, so continue on...
if !dirInfo[i].IsDir() {
continue
}
// convert the concrete cgroupfs name back to an internal identifier
// this is needed to handle path conversion for systemd environments.
// we pass the fully qualified path so decoding can work as expected
// since systemd encodes the path in each segment.
cgroupfsPath := path.Join(qcConversion, dirInfo[i].Name())
internalPath := m.cgroupManager.CgroupName(cgroupfsPath)
// we only care about base segment of the converted path since that
// is what we are reading currently to know if it is a pod or not.
basePath := path.Base(string(internalPath))
if !strings.Contains(basePath, podCgroupNamePrefix) {
continue
}
// we then split the name on the pod prefix to determine the uid
parts := strings.Split(basePath, podCgroupNamePrefix)
// the uid is missing, so we log the unexpected cgroup not of form pod<uid>
if len(parts) != 2 {
glog.Errorf("pod cgroup manager ignoring unexpected cgroup %v because it is not a pod", cgroupfsPath)
continue
}
podUID := parts[1]
foundPods[types.UID(podUID)] = internalPath
}
}
}

View File

@@ -41,28 +41,30 @@ const (
BackOffPullImage = "BackOff"
// kubelet event reason list
NodeReady = "NodeReady"
NodeNotReady = "NodeNotReady"
NodeSchedulable = "NodeSchedulable"
NodeNotSchedulable = "NodeNotSchedulable"
StartingKubelet = "Starting"
KubeletSetupFailed = "KubeletSetupFailed"
FailedDetachVolume = "FailedDetachVolume"
FailedMountVolume = "FailedMount"
FailedUnMountVolume = "FailedUnMount"
SuccessfulDetachVolume = "SuccessfulDetachVolume"
SuccessfulMountVolume = "SuccessfulMountVolume"
SuccessfulUnMountVolume = "SuccessfulUnMountVolume"
HostPortConflict = "HostPortConflict"
NodeSelectorMismatching = "NodeSelectorMismatching"
InsufficientFreeCPU = "InsufficientFreeCPU"
InsufficientFreeMemory = "InsufficientFreeMemory"
OutOfDisk = "OutOfDisk"
HostNetworkNotSupported = "HostNetworkNotSupported"
UndefinedShaper = "NilShaper"
NodeRebooted = "Rebooted"
ContainerGCFailed = "ContainerGCFailed"
ImageGCFailed = "ImageGCFailed"
NodeReady = "NodeReady"
NodeNotReady = "NodeNotReady"
NodeSchedulable = "NodeSchedulable"
NodeNotSchedulable = "NodeNotSchedulable"
StartingKubelet = "Starting"
KubeletSetupFailed = "KubeletSetupFailed"
FailedDetachVolume = "FailedDetachVolume"
FailedMountVolume = "FailedMount"
FailedUnMountVolume = "FailedUnMount"
SuccessfulDetachVolume = "SuccessfulDetachVolume"
SuccessfulMountVolume = "SuccessfulMountVolume"
SuccessfulUnMountVolume = "SuccessfulUnMountVolume"
HostPortConflict = "HostPortConflict"
NodeSelectorMismatching = "NodeSelectorMismatching"
InsufficientFreeCPU = "InsufficientFreeCPU"
InsufficientFreeMemory = "InsufficientFreeMemory"
OutOfDisk = "OutOfDisk"
HostNetworkNotSupported = "HostNetworkNotSupported"
UndefinedShaper = "NilShaper"
NodeRebooted = "Rebooted"
ContainerGCFailed = "ContainerGCFailed"
ImageGCFailed = "ImageGCFailed"
FailedNodeAllocatableEnforcement = "FailedNodeAllocatableEnforcement"
SuccessfulNodeAllocatableEnforcement = "NodeAllocatableEnforced"
// Image manager event reason list
InvalidDiskCapacity = "InvalidDiskCapacity"

View File

@@ -33,6 +33,7 @@ go_test(
"//pkg/api:go_default_library",
"//pkg/api/v1:go_default_library",
"//pkg/kubelet/api/v1alpha1/stats:go_default_library",
"//pkg/kubelet/eviction/api:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
"//pkg/kubelet/types:go_default_library",
"//pkg/quota:go_default_library",
@@ -62,6 +63,7 @@ go_library(
"//pkg/features:go_default_library",
"//pkg/kubelet/api/v1alpha1/stats:go_default_library",
"//pkg/kubelet/cm:go_default_library",
"//pkg/kubelet/eviction/api:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
"//pkg/kubelet/pod:go_default_library",
"//pkg/kubelet/qos:go_default_library",
@@ -90,6 +92,9 @@ filegroup(
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
srcs = [
":package-srcs",
"//pkg/kubelet/eviction/api:all-srcs",
],
tags = ["automanaged"],
)

View File

@@ -0,0 +1,28 @@
package(default_visibility = ["//visibility:public"])
licenses(["notice"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = ["types.go"],
tags = ["automanaged"],
deps = ["//vendor:k8s.io/apimachinery/pkg/api/resource"],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)

View File

@@ -0,0 +1,79 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package api
import (
"time"
"k8s.io/apimachinery/pkg/api/resource"
)
// Signal defines a signal that can trigger eviction of pods on a node.
type Signal string
const (
// SignalMemoryAvailable is memory available (i.e. capacity - workingSet), in bytes.
SignalMemoryAvailable Signal = "memory.available"
// SignalNodeFsAvailable is amount of storage available on filesystem that kubelet uses for volumes, daemon logs, etc.
SignalNodeFsAvailable Signal = "nodefs.available"
// SignalNodeFsInodesFree is amount of inodes available on filesystem that kubelet uses for volumes, daemon logs, etc.
SignalNodeFsInodesFree Signal = "nodefs.inodesFree"
// SignalImageFsAvailable is amount of storage available on filesystem that container runtime uses for storing images and container writable layers.
SignalImageFsAvailable Signal = "imagefs.available"
// SignalImageFsInodesFree is amount of inodes available on filesystem that container runtime uses for storing images and container writeable layers.
SignalImageFsInodesFree Signal = "imagefs.inodesFree"
)
// ThresholdOperator is the operator used to express a Threshold.
type ThresholdOperator string
const (
// OpLessThan is the operator that expresses a less than operator.
OpLessThan ThresholdOperator = "LessThan"
)
// ThresholdValue is a value holder that abstracts literal versus percentage based quantity
type ThresholdValue struct {
// The following fields are exclusive. Only the topmost non-zero field is used.
// Quantity is a quantity associated with the signal that is evaluated against the specified operator.
Quantity *resource.Quantity
// Percentage represents the usage percentage over the total resource that is evaluated against the specified operator.
Percentage float32
}
// Threshold defines a metric for when eviction should occur.
type Threshold struct {
// Signal defines the entity that was measured.
Signal Signal
// Operator represents a relationship of a signal to a value.
Operator ThresholdOperator
// Value is the threshold the resource is evaluated against.
Value ThresholdValue
// GracePeriod represents the amount of time that a threshold must be met before eviction is triggered.
GracePeriod time.Duration
// MinReclaim represents the minimum amount of resource to reclaim if the threshold is met.
MinReclaim *ThresholdValue
}
// GetThresholdQuantity returns the expected quantity value for a thresholdValue
func GetThresholdQuantity(value ThresholdValue, capacity *resource.Quantity) *resource.Quantity {
if value.Quantity != nil {
return value.Quantity.Copy()
}
return resource.NewQuantity(int64(float64(capacity.Value())*float64(value.Percentage)), resource.BinarySI)
}

View File

@@ -33,6 +33,7 @@ import (
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/cm"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
kubepod "k8s.io/kubernetes/pkg/kubelet/pod"
"k8s.io/kubernetes/pkg/kubelet/qos"
@@ -66,7 +67,7 @@ type managerImpl struct {
// records when a threshold was first observed
thresholdsFirstObservedAt thresholdsObservedAt
// records the set of thresholds that have been met (including graceperiod) but not yet resolved
thresholdsMet []Threshold
thresholdsMet []evictionapi.Threshold
// resourceToRankFunc maps a resource to ranking function for that resource.
resourceToRankFunc map[v1.ResourceName]rankFunc
// resourceToNodeReclaimFuncs maps a resource to an ordered list of functions that know how to reclaim that resource.
@@ -152,12 +153,12 @@ func (m *managerImpl) IsUnderDiskPressure() bool {
return hasNodeCondition(m.nodeConditions, v1.NodeDiskPressure)
}
func startMemoryThresholdNotifier(thresholds []Threshold, observations signalObservations, hard bool, handler thresholdNotifierHandlerFunc) error {
func startMemoryThresholdNotifier(thresholds []evictionapi.Threshold, observations signalObservations, hard bool, handler thresholdNotifierHandlerFunc) error {
for _, threshold := range thresholds {
if threshold.Signal != SignalMemoryAvailable || hard != isHardEvictionThreshold(threshold) {
if threshold.Signal != evictionapi.SignalMemoryAvailable || hard != isHardEvictionThreshold(threshold) {
continue
}
observed, found := observations[SignalMemoryAvailable]
observed, found := observations[evictionapi.SignalMemoryAvailable]
if !found {
continue
}
@@ -171,7 +172,7 @@ func startMemoryThresholdNotifier(thresholds []Threshold, observations signalObs
return fmt.Errorf("memory cgroup mount point not found")
}
attribute := "memory.usage_in_bytes"
quantity := getThresholdQuantity(threshold.Value, observed.capacity)
quantity := evictionapi.GetThresholdQuantity(threshold.Value, observed.capacity)
usageThreshold := resource.NewQuantity(observed.capacity.Value(), resource.DecimalSI)
usageThreshold.Sub(*quantity)
description := fmt.Sprintf("<%s available", formatThresholdValue(threshold.Value))

View File

@@ -29,6 +29,7 @@ import (
kubeapi "k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/v1"
statsapi "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
)
@@ -180,18 +181,18 @@ func TestMemoryPressure(t *testing.T) {
config := Config{
MaxPodGracePeriodSeconds: 5,
PressureTransitionPeriod: time.Minute * 5,
Thresholds: []Threshold{
Thresholds: []evictionapi.Threshold{
{
Signal: SignalMemoryAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Gi"),
},
},
{
Signal: SignalMemoryAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("2Gi"),
},
GracePeriod: time.Minute * 2,
@@ -397,18 +398,18 @@ func TestDiskPressureNodeFs(t *testing.T) {
config := Config{
MaxPodGracePeriodSeconds: 5,
PressureTransitionPeriod: time.Minute * 5,
Thresholds: []Threshold{
Thresholds: []evictionapi.Threshold{
{
Signal: SignalNodeFsAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalNodeFsAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Gi"),
},
},
{
Signal: SignalNodeFsAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalNodeFsAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("2Gi"),
},
GracePeriod: time.Minute * 2,
@@ -594,14 +595,14 @@ func TestMinReclaim(t *testing.T) {
config := Config{
MaxPodGracePeriodSeconds: 5,
PressureTransitionPeriod: time.Minute * 5,
Thresholds: []Threshold{
Thresholds: []evictionapi.Threshold{
{
Signal: SignalMemoryAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Gi"),
},
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Quantity: quantityMustParse("500Mi"),
},
},
@@ -733,14 +734,14 @@ func TestNodeReclaimFuncs(t *testing.T) {
config := Config{
MaxPodGracePeriodSeconds: 5,
PressureTransitionPeriod: time.Minute * 5,
Thresholds: []Threshold{
Thresholds: []evictionapi.Threshold{
{
Signal: SignalNodeFsAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalNodeFsAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Gi"),
},
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Quantity: quantityMustParse("500Mi"),
},
},
@@ -925,18 +926,18 @@ func TestInodePressureNodeFsInodes(t *testing.T) {
config := Config{
MaxPodGracePeriodSeconds: 5,
PressureTransitionPeriod: time.Minute * 5,
Thresholds: []Threshold{
Thresholds: []evictionapi.Threshold{
{
Signal: SignalNodeFsInodesFree,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalNodeFsInodesFree,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Mi"),
},
},
{
Signal: SignalNodeFsInodesFree,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalNodeFsInodesFree,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("2Mi"),
},
GracePeriod: time.Minute * 2,
@@ -1127,18 +1128,18 @@ func TestCriticalPodsAreNotEvicted(t *testing.T) {
config := Config{
MaxPodGracePeriodSeconds: 5,
PressureTransitionPeriod: time.Minute * 5,
Thresholds: []Threshold{
Thresholds: []evictionapi.Threshold{
{
Signal: SignalMemoryAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Gi"),
},
},
{
Signal: SignalMemoryAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("2Gi"),
},
GracePeriod: time.Minute * 2,

View File

@@ -29,6 +29,7 @@ import (
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/v1"
statsapi "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
"k8s.io/kubernetes/pkg/kubelet/qos"
"k8s.io/kubernetes/pkg/kubelet/server/stats"
"k8s.io/kubernetes/pkg/quota/evaluator/core"
@@ -56,44 +57,44 @@ const (
var (
// signalToNodeCondition maps a signal to the node condition to report if threshold is met.
signalToNodeCondition map[Signal]v1.NodeConditionType
signalToNodeCondition map[evictionapi.Signal]v1.NodeConditionType
// signalToResource maps a Signal to its associated Resource.
signalToResource map[Signal]v1.ResourceName
signalToResource map[evictionapi.Signal]v1.ResourceName
// resourceToSignal maps a Resource to its associated Signal
resourceToSignal map[v1.ResourceName]Signal
resourceToSignal map[v1.ResourceName]evictionapi.Signal
)
func init() {
// map eviction signals to node conditions
signalToNodeCondition = map[Signal]v1.NodeConditionType{}
signalToNodeCondition[SignalMemoryAvailable] = v1.NodeMemoryPressure
signalToNodeCondition[SignalImageFsAvailable] = v1.NodeDiskPressure
signalToNodeCondition[SignalNodeFsAvailable] = v1.NodeDiskPressure
signalToNodeCondition[SignalImageFsInodesFree] = v1.NodeDiskPressure
signalToNodeCondition[SignalNodeFsInodesFree] = v1.NodeDiskPressure
signalToNodeCondition = map[evictionapi.Signal]v1.NodeConditionType{}
signalToNodeCondition[evictionapi.SignalMemoryAvailable] = v1.NodeMemoryPressure
signalToNodeCondition[evictionapi.SignalImageFsAvailable] = v1.NodeDiskPressure
signalToNodeCondition[evictionapi.SignalNodeFsAvailable] = v1.NodeDiskPressure
signalToNodeCondition[evictionapi.SignalImageFsInodesFree] = v1.NodeDiskPressure
signalToNodeCondition[evictionapi.SignalNodeFsInodesFree] = v1.NodeDiskPressure
// map signals to resources (and vice-versa)
signalToResource = map[Signal]v1.ResourceName{}
signalToResource[SignalMemoryAvailable] = v1.ResourceMemory
signalToResource[SignalImageFsAvailable] = resourceImageFs
signalToResource[SignalImageFsInodesFree] = resourceImageFsInodes
signalToResource[SignalNodeFsAvailable] = resourceNodeFs
signalToResource[SignalNodeFsInodesFree] = resourceNodeFsInodes
resourceToSignal = map[v1.ResourceName]Signal{}
signalToResource = map[evictionapi.Signal]v1.ResourceName{}
signalToResource[evictionapi.SignalMemoryAvailable] = v1.ResourceMemory
signalToResource[evictionapi.SignalImageFsAvailable] = resourceImageFs
signalToResource[evictionapi.SignalImageFsInodesFree] = resourceImageFsInodes
signalToResource[evictionapi.SignalNodeFsAvailable] = resourceNodeFs
signalToResource[evictionapi.SignalNodeFsInodesFree] = resourceNodeFsInodes
resourceToSignal = map[v1.ResourceName]evictionapi.Signal{}
for key, value := range signalToResource {
resourceToSignal[value] = key
}
}
// validSignal returns true if the signal is supported.
func validSignal(signal Signal) bool {
func validSignal(signal evictionapi.Signal) bool {
_, found := signalToResource[signal]
return found
}
// ParseThresholdConfig parses the flags for thresholds.
func ParseThresholdConfig(evictionHard, evictionSoft, evictionSoftGracePeriod, evictionMinimumReclaim string) ([]Threshold, error) {
results := []Threshold{}
func ParseThresholdConfig(evictionHard, evictionSoft, evictionSoftGracePeriod, evictionMinimumReclaim string) ([]evictionapi.Threshold, error) {
results := []evictionapi.Threshold{}
hardThresholds, err := parseThresholdStatements(evictionHard)
if err != nil {
@@ -134,11 +135,11 @@ func ParseThresholdConfig(evictionHard, evictionSoft, evictionSoftGracePeriod, e
}
// parseThresholdStatements parses the input statements into a list of Threshold objects.
func parseThresholdStatements(expr string) ([]Threshold, error) {
func parseThresholdStatements(expr string) ([]evictionapi.Threshold, error) {
if len(expr) == 0 {
return nil, nil
}
results := []Threshold{}
results := []evictionapi.Threshold{}
statements := strings.Split(expr, ",")
signalsFound := sets.NewString()
for _, statement := range statements {
@@ -156,12 +157,12 @@ func parseThresholdStatements(expr string) ([]Threshold, error) {
}
// parseThresholdStatement parses a threshold statement.
func parseThresholdStatement(statement string) (Threshold, error) {
tokens2Operator := map[string]ThresholdOperator{
"<": OpLessThan,
func parseThresholdStatement(statement string) (evictionapi.Threshold, error) {
tokens2Operator := map[string]evictionapi.ThresholdOperator{
"<": evictionapi.OpLessThan,
}
var (
operator ThresholdOperator
operator evictionapi.ThresholdOperator
parts []string
)
for token := range tokens2Operator {
@@ -173,41 +174,41 @@ func parseThresholdStatement(statement string) (Threshold, error) {
}
}
if len(operator) == 0 || len(parts) != 2 {
return Threshold{}, fmt.Errorf("invalid eviction threshold syntax %v, expected <signal><operator><value>", statement)
return evictionapi.Threshold{}, fmt.Errorf("invalid eviction threshold syntax %v, expected <signal><operator><value>", statement)
}
signal := Signal(parts[0])
signal := evictionapi.Signal(parts[0])
if !validSignal(signal) {
return Threshold{}, fmt.Errorf(unsupportedEvictionSignal, signal)
return evictionapi.Threshold{}, fmt.Errorf(unsupportedEvictionSignal, signal)
}
quantityValue := parts[1]
if strings.HasSuffix(quantityValue, "%") {
percentage, err := parsePercentage(quantityValue)
if err != nil {
return Threshold{}, err
return evictionapi.Threshold{}, err
}
if percentage <= 0 {
return Threshold{}, fmt.Errorf("eviction percentage threshold %v must be positive: %s", signal, quantityValue)
return evictionapi.Threshold{}, fmt.Errorf("eviction percentage threshold %v must be positive: %s", signal, quantityValue)
}
return Threshold{
return evictionapi.Threshold{
Signal: signal,
Operator: operator,
Value: ThresholdValue{
Value: evictionapi.ThresholdValue{
Percentage: percentage,
},
}, nil
}
quantity, err := resource.ParseQuantity(quantityValue)
if err != nil {
return Threshold{}, err
return evictionapi.Threshold{}, err
}
if quantity.Sign() < 0 || quantity.IsZero() {
return Threshold{}, fmt.Errorf("eviction threshold %v must be positive: %s", signal, &quantity)
return evictionapi.Threshold{}, fmt.Errorf("eviction threshold %v must be positive: %s", signal, &quantity)
}
return Threshold{
return evictionapi.Threshold{
Signal: signal,
Operator: operator,
Value: ThresholdValue{
Value: evictionapi.ThresholdValue{
Quantity: &quantity,
},
}, nil
@@ -223,18 +224,18 @@ func parsePercentage(input string) (float32, error) {
}
// parseGracePeriods parses the grace period statements
func parseGracePeriods(expr string) (map[Signal]time.Duration, error) {
func parseGracePeriods(expr string) (map[evictionapi.Signal]time.Duration, error) {
if len(expr) == 0 {
return nil, nil
}
results := map[Signal]time.Duration{}
results := map[evictionapi.Signal]time.Duration{}
statements := strings.Split(expr, ",")
for _, statement := range statements {
parts := strings.Split(statement, "=")
if len(parts) != 2 {
return nil, fmt.Errorf("invalid eviction grace period syntax %v, expected <signal>=<duration>", statement)
}
signal := Signal(parts[0])
signal := evictionapi.Signal(parts[0])
if !validSignal(signal) {
return nil, fmt.Errorf(unsupportedEvictionSignal, signal)
}
@@ -257,18 +258,18 @@ func parseGracePeriods(expr string) (map[Signal]time.Duration, error) {
}
// parseMinimumReclaims parses the minimum reclaim statements
func parseMinimumReclaims(expr string) (map[Signal]ThresholdValue, error) {
func parseMinimumReclaims(expr string) (map[evictionapi.Signal]evictionapi.ThresholdValue, error) {
if len(expr) == 0 {
return nil, nil
}
results := map[Signal]ThresholdValue{}
results := map[evictionapi.Signal]evictionapi.ThresholdValue{}
statements := strings.Split(expr, ",")
for _, statement := range statements {
parts := strings.Split(statement, "=")
if len(parts) != 2 {
return nil, fmt.Errorf("invalid eviction minimum reclaim syntax: %v, expected <signal>=<value>", statement)
}
signal := Signal(parts[0])
signal := evictionapi.Signal(parts[0])
if !validSignal(signal) {
return nil, fmt.Errorf(unsupportedEvictionSignal, signal)
}
@@ -286,7 +287,7 @@ func parseMinimumReclaims(expr string) (map[Signal]ThresholdValue, error) {
if _, found := results[signal]; found {
return nil, fmt.Errorf("duplicate eviction minimum reclaim specified for %v", signal)
}
results[signal] = ThresholdValue{
results[signal] = evictionapi.ThresholdValue{
Percentage: percentage,
}
continue
@@ -302,7 +303,7 @@ func parseMinimumReclaims(expr string) (map[Signal]ThresholdValue, error) {
if err != nil {
return nil, err
}
results[signal] = ThresholdValue{
results[signal] = evictionapi.ThresholdValue{
Quantity: &quantity,
}
}
@@ -402,12 +403,12 @@ func podMemoryUsage(podStats statsapi.PodStats) (v1.ResourceList, error) {
}
// formatThreshold formats a threshold for logging.
func formatThreshold(threshold Threshold) string {
return fmt.Sprintf("threshold(signal=%v, operator=%v, value=%v, gracePeriod=%v)", threshold.Signal, formatThresholdValue(threshold.Value), threshold.Operator, threshold.GracePeriod)
func formatThreshold(threshold evictionapi.Threshold) string {
return fmt.Sprintf("threshold(signal=%v, operator=%v, value=%v, gracePeriod=%v)", threshold.Signal, threshold.Operator, evictionapi.ThresholdValue(threshold.Value), threshold.GracePeriod)
}
// formatThresholdValue formats a thresholdValue for logging.
func formatThresholdValue(value ThresholdValue) string {
// formatevictionapi.ThresholdValue formats a thresholdValue for logging.
func formatThresholdValue(value evictionapi.ThresholdValue) string {
if value.Quantity != nil {
return value.Quantity.String()
}
@@ -622,7 +623,7 @@ func makeSignalObservations(summaryProvider stats.SummaryProvider) (signalObserv
result := signalObservations{}
if memory := summary.Node.Memory; memory != nil && memory.AvailableBytes != nil && memory.WorkingSetBytes != nil {
result[SignalMemoryAvailable] = signalObservation{
result[evictionapi.SignalMemoryAvailable] = signalObservation{
available: resource.NewQuantity(int64(*memory.AvailableBytes), resource.BinarySI),
capacity: resource.NewQuantity(int64(*memory.AvailableBytes+*memory.WorkingSetBytes), resource.BinarySI),
time: memory.Time,
@@ -630,14 +631,14 @@ func makeSignalObservations(summaryProvider stats.SummaryProvider) (signalObserv
}
if nodeFs := summary.Node.Fs; nodeFs != nil {
if nodeFs.AvailableBytes != nil && nodeFs.CapacityBytes != nil {
result[SignalNodeFsAvailable] = signalObservation{
result[evictionapi.SignalNodeFsAvailable] = signalObservation{
available: resource.NewQuantity(int64(*nodeFs.AvailableBytes), resource.BinarySI),
capacity: resource.NewQuantity(int64(*nodeFs.CapacityBytes), resource.BinarySI),
// TODO: add timestamp to stat (see memory stat)
}
}
if nodeFs.InodesFree != nil && nodeFs.Inodes != nil {
result[SignalNodeFsInodesFree] = signalObservation{
result[evictionapi.SignalNodeFsInodesFree] = signalObservation{
available: resource.NewQuantity(int64(*nodeFs.InodesFree), resource.BinarySI),
capacity: resource.NewQuantity(int64(*nodeFs.Inodes), resource.BinarySI),
// TODO: add timestamp to stat (see memory stat)
@@ -647,13 +648,13 @@ func makeSignalObservations(summaryProvider stats.SummaryProvider) (signalObserv
if summary.Node.Runtime != nil {
if imageFs := summary.Node.Runtime.ImageFs; imageFs != nil {
if imageFs.AvailableBytes != nil && imageFs.CapacityBytes != nil {
result[SignalImageFsAvailable] = signalObservation{
result[evictionapi.SignalImageFsAvailable] = signalObservation{
available: resource.NewQuantity(int64(*imageFs.AvailableBytes), resource.BinarySI),
capacity: resource.NewQuantity(int64(*imageFs.CapacityBytes), resource.BinarySI),
// TODO: add timestamp to stat (see memory stat)
}
if imageFs.InodesFree != nil && imageFs.Inodes != nil {
result[SignalImageFsInodesFree] = signalObservation{
result[evictionapi.SignalImageFsInodesFree] = signalObservation{
available: resource.NewQuantity(int64(*imageFs.InodesFree), resource.BinarySI),
capacity: resource.NewQuantity(int64(*imageFs.Inodes), resource.BinarySI),
// TODO: add timestamp to stat (see memory stat)
@@ -666,8 +667,8 @@ func makeSignalObservations(summaryProvider stats.SummaryProvider) (signalObserv
}
// thresholdsMet returns the set of thresholds that were met independent of grace period
func thresholdsMet(thresholds []Threshold, observations signalObservations, enforceMinReclaim bool) []Threshold {
results := []Threshold{}
func thresholdsMet(thresholds []evictionapi.Threshold, observations signalObservations, enforceMinReclaim bool) []evictionapi.Threshold {
results := []evictionapi.Threshold{}
for i := range thresholds {
threshold := thresholds[i]
observed, found := observations[threshold.Signal]
@@ -677,14 +678,14 @@ func thresholdsMet(thresholds []Threshold, observations signalObservations, enfo
}
// determine if we have met the specified threshold
thresholdMet := false
quantity := getThresholdQuantity(threshold.Value, observed.capacity)
quantity := evictionapi.GetThresholdQuantity(threshold.Value, observed.capacity)
// if enforceMinReclaim is specified, we compare relative to value - minreclaim
if enforceMinReclaim && threshold.MinReclaim != nil {
quantity.Add(*getThresholdQuantity(*threshold.MinReclaim, observed.capacity))
quantity.Add(*evictionapi.GetThresholdQuantity(*threshold.MinReclaim, observed.capacity))
}
thresholdResult := quantity.Cmp(*observed.available)
switch threshold.Operator {
case OpLessThan:
case evictionapi.OpLessThan:
thresholdMet = thresholdResult > 0
}
if thresholdMet {
@@ -704,12 +705,12 @@ func debugLogObservations(logPrefix string, observations signalObservations) {
}
}
func debugLogThresholdsWithObservation(logPrefix string, thresholds []Threshold, observations signalObservations) {
func debugLogThresholdsWithObservation(logPrefix string, thresholds []evictionapi.Threshold, observations signalObservations) {
for i := range thresholds {
threshold := thresholds[i]
observed, found := observations[threshold.Signal]
if found {
quantity := getThresholdQuantity(threshold.Value, observed.capacity)
quantity := evictionapi.GetThresholdQuantity(threshold.Value, observed.capacity)
glog.V(3).Infof("eviction manager: %v: threshold [signal=%v, quantity=%v] observed %v", logPrefix, threshold.Signal, quantity, observed.available)
} else {
glog.V(3).Infof("eviction manager: %v: threshold [signal=%v] had no observation", logPrefix, threshold.Signal)
@@ -717,8 +718,8 @@ func debugLogThresholdsWithObservation(logPrefix string, thresholds []Threshold,
}
}
func thresholdsUpdatedStats(thresholds []Threshold, observations, lastObservations signalObservations) []Threshold {
results := []Threshold{}
func thresholdsUpdatedStats(thresholds []evictionapi.Threshold, observations, lastObservations signalObservations) []evictionapi.Threshold {
results := []evictionapi.Threshold{}
for i := range thresholds {
threshold := thresholds[i]
observed, found := observations[threshold.Signal]
@@ -734,16 +735,8 @@ func thresholdsUpdatedStats(thresholds []Threshold, observations, lastObservatio
return results
}
// getThresholdQuantity returns the expected quantity value for a thresholdValue
func getThresholdQuantity(value ThresholdValue, capacity *resource.Quantity) *resource.Quantity {
if value.Quantity != nil {
return value.Quantity.Copy()
}
return resource.NewQuantity(int64(float64(capacity.Value())*float64(value.Percentage)), resource.BinarySI)
}
// thresholdsFirstObservedAt merges the input set of thresholds with the previous observation to determine when active set of thresholds were initially met.
func thresholdsFirstObservedAt(thresholds []Threshold, lastObservedAt thresholdsObservedAt, now time.Time) thresholdsObservedAt {
func thresholdsFirstObservedAt(thresholds []evictionapi.Threshold, lastObservedAt thresholdsObservedAt, now time.Time) thresholdsObservedAt {
results := thresholdsObservedAt{}
for i := range thresholds {
observedAt, found := lastObservedAt[thresholds[i]]
@@ -756,8 +749,8 @@ func thresholdsFirstObservedAt(thresholds []Threshold, lastObservedAt thresholds
}
// thresholdsMetGracePeriod returns the set of thresholds that have satisfied associated grace period
func thresholdsMetGracePeriod(observedAt thresholdsObservedAt, now time.Time) []Threshold {
results := []Threshold{}
func thresholdsMetGracePeriod(observedAt thresholdsObservedAt, now time.Time) []evictionapi.Threshold {
results := []evictionapi.Threshold{}
for threshold, at := range observedAt {
duration := now.Sub(at)
if duration < threshold.GracePeriod {
@@ -770,7 +763,7 @@ func thresholdsMetGracePeriod(observedAt thresholdsObservedAt, now time.Time) []
}
// nodeConditions returns the set of node conditions associated with a threshold
func nodeConditions(thresholds []Threshold) []v1.NodeConditionType {
func nodeConditions(thresholds []evictionapi.Threshold) []v1.NodeConditionType {
results := []v1.NodeConditionType{}
for _, threshold := range thresholds {
if nodeCondition, found := signalToNodeCondition[threshold.Signal]; found {
@@ -832,7 +825,7 @@ func hasNodeCondition(inputs []v1.NodeConditionType, item v1.NodeConditionType)
}
// mergeThresholds will merge both threshold lists eliminating duplicates.
func mergeThresholds(inputsA []Threshold, inputsB []Threshold) []Threshold {
func mergeThresholds(inputsA []evictionapi.Threshold, inputsB []evictionapi.Threshold) []evictionapi.Threshold {
results := inputsA
for _, threshold := range inputsB {
if !hasThreshold(results, threshold) {
@@ -843,7 +836,7 @@ func mergeThresholds(inputsA []Threshold, inputsB []Threshold) []Threshold {
}
// hasThreshold returns true if the threshold is in the input list
func hasThreshold(inputs []Threshold, item Threshold) bool {
func hasThreshold(inputs []evictionapi.Threshold, item evictionapi.Threshold) bool {
for _, input := range inputs {
if input.GracePeriod == item.GracePeriod && input.Operator == item.Operator && input.Signal == item.Signal && compareThresholdValue(input.Value, item.Value) {
return true
@@ -853,7 +846,7 @@ func hasThreshold(inputs []Threshold, item Threshold) bool {
}
// compareThresholdValue returns true if the two thresholdValue objects are logically the same
func compareThresholdValue(a ThresholdValue, b ThresholdValue) bool {
func compareThresholdValue(a evictionapi.ThresholdValue, b evictionapi.ThresholdValue) bool {
if a.Quantity != nil {
if b.Quantity == nil {
return false
@@ -867,7 +860,7 @@ func compareThresholdValue(a ThresholdValue, b ThresholdValue) bool {
}
// getStarvedResources returns the set of resources that are starved based on thresholds met.
func getStarvedResources(thresholds []Threshold) []v1.ResourceName {
func getStarvedResources(thresholds []evictionapi.Threshold) []v1.ResourceName {
results := []v1.ResourceName{}
for _, threshold := range thresholds {
if starvedResource, found := signalToResource[threshold.Signal]; found {
@@ -878,7 +871,7 @@ func getStarvedResources(thresholds []Threshold) []v1.ResourceName {
}
// isSoftEviction returns true if the thresholds met for the starved resource are only soft thresholds
func isSoftEvictionThresholds(thresholds []Threshold, starvedResource v1.ResourceName) bool {
func isSoftEvictionThresholds(thresholds []evictionapi.Threshold, starvedResource v1.ResourceName) bool {
for _, threshold := range thresholds {
if resourceToCheck := signalToResource[threshold.Signal]; resourceToCheck != starvedResource {
continue
@@ -891,7 +884,7 @@ func isSoftEvictionThresholds(thresholds []Threshold, starvedResource v1.Resourc
}
// isSoftEviction returns true if the thresholds met for the starved resource are only soft thresholds
func isHardEvictionThreshold(threshold Threshold) bool {
func isHardEvictionThreshold(threshold evictionapi.Threshold) bool {
return threshold.GracePeriod == time.Duration(0)
}

View File

@@ -28,6 +28,7 @@ import (
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/v1"
statsapi "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
"k8s.io/kubernetes/pkg/quota"
)
@@ -44,7 +45,7 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod string
evictionMinReclaim string
expectErr bool
expectThresholds []Threshold
expectThresholds []evictionapi.Threshold
}{
"no values": {
evictionHard: "",
@@ -52,7 +53,7 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod: "",
evictionMinReclaim: "",
expectErr: false,
expectThresholds: []Threshold{},
expectThresholds: []evictionapi.Threshold{},
},
"all flag values": {
evictionHard: "memory.available<150Mi",
@@ -60,25 +61,25 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod: "memory.available=30s",
evictionMinReclaim: "memory.available=0",
expectErr: false,
expectThresholds: []Threshold{
expectThresholds: []evictionapi.Threshold{
{
Signal: SignalMemoryAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("150Mi"),
},
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Quantity: quantityMustParse("0"),
},
},
{
Signal: SignalMemoryAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("300Mi"),
},
GracePeriod: gracePeriod,
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Quantity: quantityMustParse("0"),
},
},
@@ -90,25 +91,25 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod: "memory.available=30s",
evictionMinReclaim: "memory.available=5%",
expectErr: false,
expectThresholds: []Threshold{
expectThresholds: []evictionapi.Threshold{
{
Signal: SignalMemoryAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Percentage: 0.1,
},
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Percentage: 0.05,
},
},
{
Signal: SignalMemoryAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Percentage: 0.3,
},
GracePeriod: gracePeriod,
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Percentage: 0.05,
},
},
@@ -120,46 +121,46 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod: "imagefs.available=30s,nodefs.available=30s",
evictionMinReclaim: "imagefs.available=2Gi,nodefs.available=1Gi",
expectErr: false,
expectThresholds: []Threshold{
expectThresholds: []evictionapi.Threshold{
{
Signal: SignalImageFsAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalImageFsAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("150Mi"),
},
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Quantity: quantityMustParse("2Gi"),
},
},
{
Signal: SignalNodeFsAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalNodeFsAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("100Mi"),
},
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Gi"),
},
},
{
Signal: SignalImageFsAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalImageFsAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("300Mi"),
},
GracePeriod: gracePeriod,
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Quantity: quantityMustParse("2Gi"),
},
},
{
Signal: SignalNodeFsAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalNodeFsAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("200Mi"),
},
GracePeriod: gracePeriod,
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Gi"),
},
},
@@ -171,46 +172,46 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod: "imagefs.available=30s,nodefs.available=30s",
evictionMinReclaim: "imagefs.available=10%,nodefs.available=5%",
expectErr: false,
expectThresholds: []Threshold{
expectThresholds: []evictionapi.Threshold{
{
Signal: SignalImageFsAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalImageFsAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Percentage: 0.15,
},
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Percentage: 0.1,
},
},
{
Signal: SignalNodeFsAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalNodeFsAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Percentage: 0.105,
},
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Percentage: 0.05,
},
},
{
Signal: SignalImageFsAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalImageFsAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Percentage: 0.3,
},
GracePeriod: gracePeriod,
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Percentage: 0.1,
},
},
{
Signal: SignalNodeFsAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalNodeFsAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Percentage: 0.205,
},
GracePeriod: gracePeriod,
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Percentage: 0.05,
},
},
@@ -222,46 +223,46 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod: "imagefs.inodesFree=30s,nodefs.inodesFree=30s",
evictionMinReclaim: "imagefs.inodesFree=2Gi,nodefs.inodesFree=1Gi",
expectErr: false,
expectThresholds: []Threshold{
expectThresholds: []evictionapi.Threshold{
{
Signal: SignalImageFsInodesFree,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalImageFsInodesFree,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("150Mi"),
},
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Quantity: quantityMustParse("2Gi"),
},
},
{
Signal: SignalNodeFsInodesFree,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalNodeFsInodesFree,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("100Mi"),
},
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Gi"),
},
},
{
Signal: SignalImageFsInodesFree,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalImageFsInodesFree,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("300Mi"),
},
GracePeriod: gracePeriod,
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Quantity: quantityMustParse("2Gi"),
},
},
{
Signal: SignalNodeFsInodesFree,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalNodeFsInodesFree,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("200Mi"),
},
GracePeriod: gracePeriod,
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Gi"),
},
},
@@ -273,7 +274,7 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod: "",
evictionMinReclaim: "",
expectErr: true,
expectThresholds: []Threshold{},
expectThresholds: []evictionapi.Threshold{},
},
"hard-signal-negative": {
evictionHard: "memory.available<-150Mi",
@@ -281,7 +282,7 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod: "",
evictionMinReclaim: "",
expectErr: true,
expectThresholds: []Threshold{},
expectThresholds: []evictionapi.Threshold{},
},
"hard-signal-negative-percentage": {
evictionHard: "memory.available<-15%",
@@ -289,7 +290,7 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod: "",
evictionMinReclaim: "",
expectErr: true,
expectThresholds: []Threshold{},
expectThresholds: []evictionapi.Threshold{},
},
"soft-signal-negative": {
evictionHard: "",
@@ -297,7 +298,7 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod: "",
evictionMinReclaim: "",
expectErr: true,
expectThresholds: []Threshold{},
expectThresholds: []evictionapi.Threshold{},
},
"duplicate-signal": {
evictionHard: "memory.available<150Mi,memory.available<100Mi",
@@ -305,7 +306,7 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod: "",
evictionMinReclaim: "",
expectErr: true,
expectThresholds: []Threshold{},
expectThresholds: []evictionapi.Threshold{},
},
"valid-and-invalid-signal": {
evictionHard: "memory.available<150Mi,invalid.foo<150Mi",
@@ -313,7 +314,7 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod: "",
evictionMinReclaim: "",
expectErr: true,
expectThresholds: []Threshold{},
expectThresholds: []evictionapi.Threshold{},
},
"soft-no-grace-period": {
evictionHard: "",
@@ -321,7 +322,7 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod: "",
evictionMinReclaim: "",
expectErr: true,
expectThresholds: []Threshold{},
expectThresholds: []evictionapi.Threshold{},
},
"soft-neg-grace-period": {
evictionHard: "",
@@ -329,7 +330,7 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod: "memory.available=-30s",
evictionMinReclaim: "",
expectErr: true,
expectThresholds: []Threshold{},
expectThresholds: []evictionapi.Threshold{},
},
"neg-reclaim": {
evictionHard: "",
@@ -337,7 +338,7 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod: "",
evictionMinReclaim: "memory.available=-300Mi",
expectErr: true,
expectThresholds: []Threshold{},
expectThresholds: []evictionapi.Threshold{},
},
"duplicate-reclaim": {
evictionHard: "",
@@ -345,7 +346,7 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod: "",
evictionMinReclaim: "memory.available=-300Mi,memory.available=-100Mi",
expectErr: true,
expectThresholds: []Threshold{},
expectThresholds: []evictionapi.Threshold{},
},
}
for testName, testCase := range testCases {
@@ -359,7 +360,7 @@ func TestParseThresholdConfig(t *testing.T) {
}
}
func thresholdsEqual(expected []Threshold, actual []Threshold) bool {
func thresholdsEqual(expected []evictionapi.Threshold, actual []evictionapi.Threshold) bool {
if len(expected) != len(actual) {
return false
}
@@ -388,7 +389,7 @@ func thresholdsEqual(expected []Threshold, actual []Threshold) bool {
return true
}
func thresholdEqual(a Threshold, b Threshold) bool {
func thresholdEqual(a evictionapi.Threshold, b evictionapi.Threshold) bool {
return a.GracePeriod == b.GracePeriod &&
a.Operator == b.Operator &&
a.Signal == b.Signal &&
@@ -746,7 +747,7 @@ func TestMakeSignalObservations(t *testing.T) {
if err != nil {
t.Errorf("Unexpected err: %v", err)
}
memQuantity, found := actualObservations[SignalMemoryAvailable]
memQuantity, found := actualObservations[evictionapi.SignalMemoryAvailable]
if !found {
t.Errorf("Expected available memory observation: %v", err)
}
@@ -756,7 +757,7 @@ func TestMakeSignalObservations(t *testing.T) {
if expectedBytes := int64(nodeWorkingSetBytes + nodeAvailableBytes); memQuantity.capacity.Value() != expectedBytes {
t.Errorf("Expected %v, actual: %v", expectedBytes, memQuantity.capacity.Value())
}
nodeFsQuantity, found := actualObservations[SignalNodeFsAvailable]
nodeFsQuantity, found := actualObservations[evictionapi.SignalNodeFsAvailable]
if !found {
t.Errorf("Expected available nodefs observation: %v", err)
}
@@ -766,7 +767,7 @@ func TestMakeSignalObservations(t *testing.T) {
if expectedBytes := int64(nodeFsCapacityBytes); nodeFsQuantity.capacity.Value() != expectedBytes {
t.Errorf("Expected %v, actual: %v", expectedBytes, nodeFsQuantity.capacity.Value())
}
nodeFsInodesQuantity, found := actualObservations[SignalNodeFsInodesFree]
nodeFsInodesQuantity, found := actualObservations[evictionapi.SignalNodeFsInodesFree]
if !found {
t.Errorf("Expected inodes free nodefs observation: %v", err)
}
@@ -776,7 +777,7 @@ func TestMakeSignalObservations(t *testing.T) {
if expected := int64(nodeFsInodes); nodeFsInodesQuantity.capacity.Value() != expected {
t.Errorf("Expected %v, actual: %v", expected, nodeFsInodesQuantity.capacity.Value())
}
imageFsQuantity, found := actualObservations[SignalImageFsAvailable]
imageFsQuantity, found := actualObservations[evictionapi.SignalImageFsAvailable]
if !found {
t.Errorf("Expected available imagefs observation: %v", err)
}
@@ -786,7 +787,7 @@ func TestMakeSignalObservations(t *testing.T) {
if expectedBytes := int64(imageFsCapacityBytes); imageFsQuantity.capacity.Value() != expectedBytes {
t.Errorf("Expected %v, actual: %v", expectedBytes, imageFsQuantity.capacity.Value())
}
imageFsInodesQuantity, found := actualObservations[SignalImageFsInodesFree]
imageFsInodesQuantity, found := actualObservations[evictionapi.SignalImageFsInodesFree]
if !found {
t.Errorf("Expected inodes free imagefs observation: %v", err)
}
@@ -811,67 +812,67 @@ func TestMakeSignalObservations(t *testing.T) {
}
func TestThresholdsMet(t *testing.T) {
hardThreshold := Threshold{
Signal: SignalMemoryAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
hardThreshold := evictionapi.Threshold{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Gi"),
},
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Quantity: quantityMustParse("500Mi"),
},
}
testCases := map[string]struct {
enforceMinReclaim bool
thresholds []Threshold
thresholds []evictionapi.Threshold
observations signalObservations
result []Threshold
result []evictionapi.Threshold
}{
"empty": {
enforceMinReclaim: false,
thresholds: []Threshold{},
thresholds: []evictionapi.Threshold{},
observations: signalObservations{},
result: []Threshold{},
result: []evictionapi.Threshold{},
},
"threshold-met-memory": {
enforceMinReclaim: false,
thresholds: []Threshold{hardThreshold},
thresholds: []evictionapi.Threshold{hardThreshold},
observations: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
available: quantityMustParse("500Mi"),
},
},
result: []Threshold{hardThreshold},
result: []evictionapi.Threshold{hardThreshold},
},
"threshold-not-met": {
enforceMinReclaim: false,
thresholds: []Threshold{hardThreshold},
thresholds: []evictionapi.Threshold{hardThreshold},
observations: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
available: quantityMustParse("2Gi"),
},
},
result: []Threshold{},
result: []evictionapi.Threshold{},
},
"threshold-met-with-min-reclaim": {
enforceMinReclaim: true,
thresholds: []Threshold{hardThreshold},
thresholds: []evictionapi.Threshold{hardThreshold},
observations: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
available: quantityMustParse("1.05Gi"),
},
},
result: []Threshold{hardThreshold},
result: []evictionapi.Threshold{hardThreshold},
},
"threshold-not-met-with-min-reclaim": {
enforceMinReclaim: true,
thresholds: []Threshold{hardThreshold},
thresholds: []evictionapi.Threshold{hardThreshold},
observations: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
available: quantityMustParse("2Gi"),
},
},
result: []Threshold{},
result: []evictionapi.Threshold{},
},
}
for testName, testCase := range testCases {
@@ -883,8 +884,8 @@ func TestThresholdsMet(t *testing.T) {
}
func TestThresholdsUpdatedStats(t *testing.T) {
updatedThreshold := Threshold{
Signal: SignalMemoryAvailable,
updatedThreshold := evictionapi.Threshold{
Signal: evictionapi.SignalMemoryAvailable,
}
locationUTC, err := time.LoadLocation("UTC")
if err != nil {
@@ -892,76 +893,76 @@ func TestThresholdsUpdatedStats(t *testing.T) {
return
}
testCases := map[string]struct {
thresholds []Threshold
thresholds []evictionapi.Threshold
observations signalObservations
last signalObservations
result []Threshold
result []evictionapi.Threshold
}{
"empty": {
thresholds: []Threshold{},
thresholds: []evictionapi.Threshold{},
observations: signalObservations{},
last: signalObservations{},
result: []Threshold{},
result: []evictionapi.Threshold{},
},
"no-time": {
thresholds: []Threshold{updatedThreshold},
thresholds: []evictionapi.Threshold{updatedThreshold},
observations: signalObservations{
SignalMemoryAvailable: signalObservation{},
evictionapi.SignalMemoryAvailable: signalObservation{},
},
last: signalObservations{},
result: []Threshold{updatedThreshold},
result: []evictionapi.Threshold{updatedThreshold},
},
"no-last-observation": {
thresholds: []Threshold{updatedThreshold},
thresholds: []evictionapi.Threshold{updatedThreshold},
observations: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
time: metav1.Date(2016, 1, 1, 0, 0, 0, 0, locationUTC),
},
},
last: signalObservations{},
result: []Threshold{updatedThreshold},
result: []evictionapi.Threshold{updatedThreshold},
},
"time-machine": {
thresholds: []Threshold{updatedThreshold},
thresholds: []evictionapi.Threshold{updatedThreshold},
observations: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
time: metav1.Date(2016, 1, 1, 0, 0, 0, 0, locationUTC),
},
},
last: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
time: metav1.Date(2016, 1, 1, 0, 1, 0, 0, locationUTC),
},
},
result: []Threshold{},
result: []evictionapi.Threshold{},
},
"same-observation": {
thresholds: []Threshold{updatedThreshold},
thresholds: []evictionapi.Threshold{updatedThreshold},
observations: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
time: metav1.Date(2016, 1, 1, 0, 0, 0, 0, locationUTC),
},
},
last: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
time: metav1.Date(2016, 1, 1, 0, 0, 0, 0, locationUTC),
},
},
result: []Threshold{},
result: []evictionapi.Threshold{},
},
"new-observation": {
thresholds: []Threshold{updatedThreshold},
thresholds: []evictionapi.Threshold{updatedThreshold},
observations: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
time: metav1.Date(2016, 1, 1, 0, 1, 0, 0, locationUTC),
},
},
last: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
time: metav1.Date(2016, 1, 1, 0, 0, 0, 0, locationUTC),
},
},
result: []Threshold{updatedThreshold},
result: []evictionapi.Threshold{updatedThreshold},
},
}
for testName, testCase := range testCases {
@@ -973,21 +974,21 @@ func TestThresholdsUpdatedStats(t *testing.T) {
}
func TestPercentageThresholdsMet(t *testing.T) {
specificThresholds := []Threshold{
specificThresholds := []evictionapi.Threshold{
{
Signal: SignalMemoryAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Percentage: 0.2,
},
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Percentage: 0.05,
},
},
{
Signal: SignalNodeFsAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalNodeFsAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Percentage: 0.3,
},
},
@@ -995,19 +996,19 @@ func TestPercentageThresholdsMet(t *testing.T) {
testCases := map[string]struct {
enforceMinRelaim bool
thresholds []Threshold
thresholds []evictionapi.Threshold
observations signalObservations
result []Threshold
result []evictionapi.Threshold
}{
"BothMet": {
enforceMinRelaim: false,
thresholds: specificThresholds,
observations: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
available: quantityMustParse("100Mi"),
capacity: quantityMustParse("1000Mi"),
},
SignalNodeFsAvailable: signalObservation{
evictionapi.SignalNodeFsAvailable: signalObservation{
available: quantityMustParse("100Gi"),
capacity: quantityMustParse("1000Gi"),
},
@@ -1018,68 +1019,68 @@ func TestPercentageThresholdsMet(t *testing.T) {
enforceMinRelaim: false,
thresholds: specificThresholds,
observations: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
available: quantityMustParse("300Mi"),
capacity: quantityMustParse("1000Mi"),
},
SignalNodeFsAvailable: signalObservation{
evictionapi.SignalNodeFsAvailable: signalObservation{
available: quantityMustParse("400Gi"),
capacity: quantityMustParse("1000Gi"),
},
},
result: []Threshold{},
result: []evictionapi.Threshold{},
},
"DiskMet": {
enforceMinRelaim: false,
thresholds: specificThresholds,
observations: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
available: quantityMustParse("300Mi"),
capacity: quantityMustParse("1000Mi"),
},
SignalNodeFsAvailable: signalObservation{
evictionapi.SignalNodeFsAvailable: signalObservation{
available: quantityMustParse("100Gi"),
capacity: quantityMustParse("1000Gi"),
},
},
result: []Threshold{specificThresholds[1]},
result: []evictionapi.Threshold{specificThresholds[1]},
},
"MemoryMet": {
enforceMinRelaim: false,
thresholds: specificThresholds,
observations: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
available: quantityMustParse("100Mi"),
capacity: quantityMustParse("1000Mi"),
},
SignalNodeFsAvailable: signalObservation{
evictionapi.SignalNodeFsAvailable: signalObservation{
available: quantityMustParse("400Gi"),
capacity: quantityMustParse("1000Gi"),
},
},
result: []Threshold{specificThresholds[0]},
result: []evictionapi.Threshold{specificThresholds[0]},
},
"MemoryMetWithMinReclaim": {
enforceMinRelaim: true,
thresholds: specificThresholds,
observations: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
available: quantityMustParse("225Mi"),
capacity: quantityMustParse("1000Mi"),
},
},
result: []Threshold{specificThresholds[0]},
result: []evictionapi.Threshold{specificThresholds[0]},
},
"MemoryNotMetWithMinReclaim": {
enforceMinRelaim: true,
thresholds: specificThresholds,
observations: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
available: quantityMustParse("300Mi"),
capacity: quantityMustParse("1000Mi"),
},
},
result: []Threshold{},
result: []evictionapi.Threshold{},
},
}
for testName, testCase := range testCases {
@@ -1091,29 +1092,29 @@ func TestPercentageThresholdsMet(t *testing.T) {
}
func TestThresholdsFirstObservedAt(t *testing.T) {
hardThreshold := Threshold{
Signal: SignalMemoryAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
hardThreshold := evictionapi.Threshold{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Gi"),
},
}
now := metav1.Now()
oldTime := metav1.NewTime(now.Time.Add(-1 * time.Minute))
testCases := map[string]struct {
thresholds []Threshold
thresholds []evictionapi.Threshold
lastObservedAt thresholdsObservedAt
now time.Time
result thresholdsObservedAt
}{
"empty": {
thresholds: []Threshold{},
thresholds: []evictionapi.Threshold{},
lastObservedAt: thresholdsObservedAt{},
now: now.Time,
result: thresholdsObservedAt{},
},
"no-previous-observation": {
thresholds: []Threshold{hardThreshold},
thresholds: []evictionapi.Threshold{hardThreshold},
lastObservedAt: thresholdsObservedAt{},
now: now.Time,
result: thresholdsObservedAt{
@@ -1121,7 +1122,7 @@ func TestThresholdsFirstObservedAt(t *testing.T) {
},
},
"previous-observation": {
thresholds: []Threshold{hardThreshold},
thresholds: []evictionapi.Threshold{hardThreshold},
lastObservedAt: thresholdsObservedAt{
hardThreshold: oldTime.Time,
},
@@ -1141,17 +1142,17 @@ func TestThresholdsFirstObservedAt(t *testing.T) {
func TestThresholdsMetGracePeriod(t *testing.T) {
now := metav1.Now()
hardThreshold := Threshold{
Signal: SignalMemoryAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
hardThreshold := evictionapi.Threshold{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Gi"),
},
}
softThreshold := Threshold{
Signal: SignalMemoryAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
softThreshold := evictionapi.Threshold{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("2Gi"),
},
GracePeriod: 1 * time.Minute,
@@ -1160,33 +1161,33 @@ func TestThresholdsMetGracePeriod(t *testing.T) {
testCases := map[string]struct {
observedAt thresholdsObservedAt
now time.Time
result []Threshold
result []evictionapi.Threshold
}{
"empty": {
observedAt: thresholdsObservedAt{},
now: now.Time,
result: []Threshold{},
result: []evictionapi.Threshold{},
},
"hard-threshold-met": {
observedAt: thresholdsObservedAt{
hardThreshold: now.Time,
},
now: now.Time,
result: []Threshold{hardThreshold},
result: []evictionapi.Threshold{hardThreshold},
},
"soft-threshold-not-met": {
observedAt: thresholdsObservedAt{
softThreshold: now.Time,
},
now: now.Time,
result: []Threshold{},
result: []evictionapi.Threshold{},
},
"soft-threshold-met": {
observedAt: thresholdsObservedAt{
softThreshold: oldTime.Time,
},
now: now.Time,
result: []Threshold{softThreshold},
result: []evictionapi.Threshold{softThreshold},
},
}
for testName, testCase := range testCases {
@@ -1199,16 +1200,16 @@ func TestThresholdsMetGracePeriod(t *testing.T) {
func TestNodeConditions(t *testing.T) {
testCases := map[string]struct {
inputs []Threshold
inputs []evictionapi.Threshold
result []v1.NodeConditionType
}{
"empty-list": {
inputs: []Threshold{},
inputs: []evictionapi.Threshold{},
result: []v1.NodeConditionType{},
},
"memory.available": {
inputs: []Threshold{
{Signal: SignalMemoryAvailable},
inputs: []evictionapi.Threshold{
{Signal: evictionapi.SignalMemoryAvailable},
},
result: []v1.NodeConditionType{v1.NodeMemoryPressure},
},
@@ -1327,24 +1328,24 @@ func TestHasNodeConditions(t *testing.T) {
func TestGetStarvedResources(t *testing.T) {
testCases := map[string]struct {
inputs []Threshold
inputs []evictionapi.Threshold
result []v1.ResourceName
}{
"memory.available": {
inputs: []Threshold{
{Signal: SignalMemoryAvailable},
inputs: []evictionapi.Threshold{
{Signal: evictionapi.SignalMemoryAvailable},
},
result: []v1.ResourceName{v1.ResourceMemory},
},
"imagefs.available": {
inputs: []Threshold{
{Signal: SignalImageFsAvailable},
inputs: []evictionapi.Threshold{
{Signal: evictionapi.SignalImageFsAvailable},
},
result: []v1.ResourceName{resourceImageFs},
},
"nodefs.available": {
inputs: []Threshold{
{Signal: SignalNodeFsAvailable},
inputs: []evictionapi.Threshold{
{Signal: evictionapi.SignalNodeFsAvailable},
},
result: []v1.ResourceName{resourceNodeFs},
},
@@ -1397,50 +1398,50 @@ func testParsePercentage(t *testing.T) {
func testCompareThresholdValue(t *testing.T) {
testCases := []struct {
a, b ThresholdValue
a, b evictionapi.ThresholdValue
equal bool
}{
{
a: ThresholdValue{
a: evictionapi.ThresholdValue{
Quantity: resource.NewQuantity(123, resource.BinarySI),
},
b: ThresholdValue{
b: evictionapi.ThresholdValue{
Quantity: resource.NewQuantity(123, resource.BinarySI),
},
equal: true,
},
{
a: ThresholdValue{
a: evictionapi.ThresholdValue{
Quantity: resource.NewQuantity(123, resource.BinarySI),
},
b: ThresholdValue{
b: evictionapi.ThresholdValue{
Quantity: resource.NewQuantity(456, resource.BinarySI),
},
equal: false,
},
{
a: ThresholdValue{
a: evictionapi.ThresholdValue{
Quantity: resource.NewQuantity(123, resource.BinarySI),
},
b: ThresholdValue{
b: evictionapi.ThresholdValue{
Percentage: 0.1,
},
equal: false,
},
{
a: ThresholdValue{
a: evictionapi.ThresholdValue{
Percentage: 0.1,
},
b: ThresholdValue{
b: evictionapi.ThresholdValue{
Percentage: 0.1,
},
equal: true,
},
{
a: ThresholdValue{
a: evictionapi.ThresholdValue{
Percentage: 0.2,
},
b: ThresholdValue{
b: evictionapi.ThresholdValue{
Percentage: 0.1,
},
equal: false,
@@ -1601,7 +1602,7 @@ func (s1 nodeConditionList) Equal(s2 nodeConditionList) bool {
}
// thresholdList is a simple alias to support equality checking independent of order
type thresholdList []Threshold
type thresholdList []evictionapi.Threshold
// Equal adds the ability to check equality between two lists of node conditions.
func (s1 thresholdList) Equal(s2 thresholdList) bool {

View File

@@ -23,22 +23,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/kubernetes/pkg/api/v1"
statsapi "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats"
)
// Signal defines a signal that can trigger eviction of pods on a node.
type Signal string
const (
// SignalMemoryAvailable is memory available (i.e. capacity - workingSet), in bytes.
SignalMemoryAvailable Signal = "memory.available"
// SignalNodeFsAvailable is amount of storage available on filesystem that kubelet uses for volumes, daemon logs, etc.
SignalNodeFsAvailable Signal = "nodefs.available"
// SignalNodeFsInodesFree is amount of inodes available on filesystem that kubelet uses for volumes, daemon logs, etc.
SignalNodeFsInodesFree Signal = "nodefs.inodesFree"
// SignalImageFsAvailable is amount of storage available on filesystem that container runtime uses for storing images and container writable layers.
SignalImageFsAvailable Signal = "imagefs.available"
// SignalImageFsInodesFree is amount of inodes available on filesystem that container runtime uses for storing images and container writeable layers.
SignalImageFsInodesFree Signal = "imagefs.inodesFree"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
)
// fsStatsType defines the types of filesystem stats to collect.
@@ -53,14 +38,6 @@ const (
fsStatsRoot fsStatsType = "root"
)
// ThresholdOperator is the operator used to express a Threshold.
type ThresholdOperator string
const (
// OpLessThan is the operator that expresses a less than operator.
OpLessThan ThresholdOperator = "LessThan"
)
// Config holds information about how eviction is configured.
type Config struct {
// PressureTransitionPeriod is duration the kubelet has to wait before transititioning out of a pressure condition.
@@ -68,35 +45,11 @@ type Config struct {
// Maximum allowed grace period (in seconds) to use when terminating pods in response to a soft eviction threshold being met.
MaxPodGracePeriodSeconds int64
// Thresholds define the set of conditions monitored to trigger eviction.
Thresholds []Threshold
Thresholds []evictionapi.Threshold
// KernelMemcgNotification if true will integrate with the kernel memcg notification to determine if memory thresholds are crossed.
KernelMemcgNotification bool
}
// ThresholdValue is a value holder that abstracts literal versus percentage based quantity
type ThresholdValue struct {
// The following fields are exclusive. Only the topmost non-zero field is used.
// Quantity is a quantity associated with the signal that is evaluated against the specified operator.
Quantity *resource.Quantity
// Percentage represents the usage percentage over the total resource that is evaluated against the specified operator.
Percentage float32
}
// Threshold defines a metric for when eviction should occur.
type Threshold struct {
// Signal defines the entity that was measured.
Signal Signal
// Operator represents a relationship of a signal to a value.
Operator ThresholdOperator
// Value is the threshold the resource is evaluated against.
Value ThresholdValue
// GracePeriod represents the amount of time that a threshold must be met before eviction is triggered.
GracePeriod time.Duration
// MinReclaim represents the minimum amount of resource to reclaim if the threshold is met.
MinReclaim *ThresholdValue
}
// Manager evaluates when an eviction threshold for node stability has been met on the node.
type Manager interface {
// Start starts the control loop to monitor eviction thresholds at specified interval.
@@ -150,10 +103,10 @@ type signalObservation struct {
}
// signalObservations maps a signal to an observed quantity
type signalObservations map[Signal]signalObservation
type signalObservations map[evictionapi.Signal]signalObservation
// thresholdsObservedAt maps a threshold to a time that it was observed
type thresholdsObservedAt map[Threshold]time.Time
type thresholdsObservedAt map[evictionapi.Threshold]time.Time
// nodeConditionsObservedAt maps a node condition to a time that it was observed
type nodeConditionsObservedAt map[v1.NodeConditionType]time.Time

View File

@@ -34,7 +34,6 @@ import (
clientgoclientset "k8s.io/client-go/kubernetes"
cadvisorapi "github.com/google/cadvisor/info/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
@@ -359,11 +358,6 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub
KernelMemcgNotification: kubeCfg.ExperimentalKernelMemcgNotification,
}
reservation, err := ParseReservation(kubeCfg.KubeReserved, kubeCfg.SystemReserved)
if err != nil {
return nil, err
}
var dockerExecHandler dockertools.ExecHandler
switch kubeCfg.DockerExecHandlerName {
case "native":
@@ -465,7 +459,6 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub
nodeIP: net.ParseIP(kubeCfg.NodeIP),
clock: clock.RealClock{},
outOfDiskTransitionFrequency: kubeCfg.OutOfDiskTransitionFrequency.Duration,
reservation: *reservation,
enableCustomMetrics: kubeCfg.EnableCustomMetrics,
babysitDaemons: kubeCfg.BabysitDaemons,
enableControllerAttachDetach: kubeCfg.EnableControllerAttachDetach,
@@ -1034,10 +1027,6 @@ type Kubelet struct {
// getting rescheduled onto the node.
outOfDiskTransitionFrequency time.Duration
// reservation specifies resources which are reserved for non-pod usage, including kubernetes and
// non-kubernetes system processes.
reservation kubetypes.Reservation
// support gathering custom metrics.
enableCustomMetrics bool
@@ -2119,47 +2108,6 @@ func isSyncPodWorthy(event *pleg.PodLifecycleEvent) bool {
return event.Type != pleg.ContainerRemoved
}
// parseResourceList parses the given configuration map into an API
// ResourceList or returns an error.
func parseResourceList(m componentconfig.ConfigurationMap) (v1.ResourceList, error) {
rl := make(v1.ResourceList)
for k, v := range m {
switch v1.ResourceName(k) {
// Only CPU and memory resources are supported.
case v1.ResourceCPU, v1.ResourceMemory:
q, err := resource.ParseQuantity(v)
if err != nil {
return nil, err
}
if q.Sign() == -1 {
return nil, fmt.Errorf("resource quantity for %q cannot be negative: %v", k, v)
}
rl[v1.ResourceName(k)] = q
default:
return nil, fmt.Errorf("cannot reserve %q resource", k)
}
}
return rl, nil
}
// ParseReservation parses the given kubelet- and system- reservations
// configuration maps into an internal Reservation instance or returns an
// error.
func ParseReservation(kubeReserved, systemReserved componentconfig.ConfigurationMap) (*kubetypes.Reservation, error) {
reservation := new(kubetypes.Reservation)
if rl, err := parseResourceList(kubeReserved); err != nil {
return nil, err
} else {
reservation.Kubernetes = rl
}
if rl, err := parseResourceList(systemReserved); err != nil {
return nil, err
} else {
reservation.System = rl
}
return reservation, nil
}
// Gets the streaming server configuration to use with in-process CRI shims.
func getStreamingConfig(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *KubeletDeps) *streaming.Config {
config := &streaming.Config{

View File

@@ -522,18 +522,14 @@ func (kl *Kubelet) setNodeStatusMachineInfo(node *v1.Node) {
}
// Set Allocatable.
node.Status.Allocatable = make(v1.ResourceList)
if node.Status.Allocatable == nil {
node.Status.Allocatable = make(v1.ResourceList)
}
allocatableReservation := kl.containerManager.GetNodeAllocatableReservation()
for k, v := range node.Status.Capacity {
value := *(v.Copy())
if kl.reservation.System != nil {
value.Sub(kl.reservation.System[k])
}
if kl.reservation.Kubernetes != nil {
value.Sub(kl.reservation.Kubernetes[k])
}
if value.Sign() < 0 {
// Negative Allocatable resources don't make sense.
value.Set(0)
if res, exists := allocatableReservation[k]; exists {
value.Sub(res)
}
node.Status.Allocatable[k] = value
}

View File

@@ -41,6 +41,7 @@ import (
core "k8s.io/client-go/testing"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/client/clientset_generated/clientset/fake"
"k8s.io/kubernetes/pkg/kubelet/cm"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/util/sliceutils"
"k8s.io/kubernetes/pkg/version"
@@ -109,6 +110,15 @@ func applyNodeStatusPatch(originalNode *v1.Node, patch []byte) (*v1.Node, error)
return updatedNode, nil
}
type localCM struct {
cm.ContainerManager
allocatable v1.ResourceList
}
func (lcm *localCM) GetNodeAllocatableReservation() v1.ResourceList {
return lcm.allocatable
}
func TestUpdateNewNodeStatus(t *testing.T) {
// generate one more than maxImagesInNodeStatus in inputImageList
inputImageList, expectedImageList := generateTestingImageList(maxImagesInNodeStatus + 1)
@@ -116,6 +126,13 @@ func TestUpdateNewNodeStatus(t *testing.T) {
t, inputImageList, false /* controllerAttachDetachEnabled */)
defer testKubelet.Cleanup()
kubelet := testKubelet.kubelet
kubelet.containerManager = &localCM{
ContainerManager: cm.NewStubContainerManager(),
allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(200, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(100E6, resource.BinarySI),
},
}
kubeClient := testKubelet.fakeKubeClient
existingNode := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: testKubeletHostname}}
kubeClient.ReactionChain = fake.NewSimpleClientset(&v1.NodeList{Items: []v1.Node{existingNode}}).ReactionChain
@@ -332,6 +349,14 @@ func TestUpdateExistingNodeStatus(t *testing.T) {
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
defer testKubelet.Cleanup()
kubelet := testKubelet.kubelet
kubelet.containerManager = &localCM{
ContainerManager: cm.NewStubContainerManager(),
allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(200, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(100E6, resource.BinarySI),
},
}
kubeClient := testKubelet.fakeKubeClient
existingNode := v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: testKubeletHostname},
@@ -377,9 +402,10 @@ func TestUpdateExistingNodeStatus(t *testing.T) {
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2800, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(19900E6, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI),
v1.ResourceCPU: *resource.NewMilliQuantity(2800, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(19900E6, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI),
v1.ResourceNvidiaGPU: *resource.NewQuantity(0, resource.DecimalSI),
},
},
}
@@ -687,6 +713,14 @@ func TestUpdateNodeStatusWithRuntimeStateError(t *testing.T) {
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
defer testKubelet.Cleanup()
kubelet := testKubelet.kubelet
kubelet.containerManager = &localCM{
ContainerManager: cm.NewStubContainerManager(),
allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(200, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(100E6, resource.BinarySI),
},
}
clock := testKubelet.fakeClock
kubeClient := testKubelet.fakeKubeClient
existingNode := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: testKubeletHostname}}

View File

@@ -1533,7 +1533,7 @@ func (kl *Kubelet) cleanupOrphanedPodCgroups(
// If volumes have not been unmounted/detached, do not delete the cgroup in case so the charge does not go to the parent.
if podVolumesExist := kl.podVolumesExist(uid); podVolumesExist {
glog.V(3).Infof("Orphaned pod %q found, but volumes are not cleaned up, Skipping cgroups deletion: %v", uid)
glog.V(3).Infof("Orphaned pod %q found, but volumes are not cleaned up, Skipping cgroups deletion.", uid)
continue
}
glog.V(3).Infof("Orphaned pod %q found, removing pod cgroups", uid)

View File

@@ -19,6 +19,8 @@ package kubelet
import (
"fmt"
"github.com/golang/glog"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/fieldpath"
@@ -41,7 +43,7 @@ func (kl *Kubelet) defaultPodLimitsForDownwardApi(pod *v1.Pod, container *v1.Con
return nil, nil, fmt.Errorf("failed to find node object, expected a node")
}
allocatable := node.Status.Allocatable
glog.Errorf("allocatable: %v", allocatable)
podCopy, err := api.Scheme.Copy(pod)
if err != nil {
return nil, nil, fmt.Errorf("failed to perform a deep copy of pod object: %v", err)

View File

@@ -25,8 +25,8 @@ import (
cadvisorapiv2 "github.com/google/cadvisor/info/v2"
apiequality "k8s.io/apimachinery/pkg/api/equality"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/kubernetes/pkg/api/v1"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
)
func TestPodResourceLimitsDefaulting(t *testing.T) {
@@ -41,18 +41,21 @@ func TestPodResourceLimitsDefaulting(t *testing.T) {
}, nil)
tk.fakeCadvisor.On("ImagesFsInfo").Return(cadvisorapiv2.FsInfo{}, nil)
tk.fakeCadvisor.On("RootFsInfo").Return(cadvisorapiv2.FsInfo{}, nil)
tk.kubelet.reservation = kubetypes.Reservation{
Kubernetes: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("3"),
v1.ResourceMemory: resource.MustParse("4Gi"),
},
System: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1"),
v1.ResourceMemory: resource.MustParse("2Gi"),
tk.kubelet.nodeInfo = &testNodeInfo{
nodes: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: string(tk.kubelet.nodeName),
},
Status: v1.NodeStatus{
Allocatable: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("6"),
v1.ResourceMemory: resource.MustParse("4Gi"),
},
},
},
},
}
cases := []struct {
pod *v1.Pod
expected *v1.Pod

View File

@@ -222,12 +222,6 @@ func newTestKubeletWithImageList(
kubelet.backOff.Clock = fakeClock
kubelet.podKillingCh = make(chan *kubecontainer.PodPair, 20)
kubelet.resyncInterval = 10 * time.Second
kubelet.reservation = kubetypes.Reservation{
Kubernetes: v1.ResourceList{
v1.ResourceCPU: resource.MustParse(testReservationCPU),
v1.ResourceMemory: resource.MustParse(testReservationMemory),
},
}
kubelet.workQueue = queue.NewBasicWorkQueue(fakeClock)
// Relist period does not affect the tests.
kubelet.pleg = pleg.NewGenericPLEG(fakeRuntime, 100, time.Hour, nil, clock.RealClock{})

View File

@@ -210,7 +210,7 @@ func TestIgnoreDeleteNotFound(t *testing.T) {
t.Fatalf("expect the DeletionGracePeriodSeconds to be set")
}
if *deletedPod.DeletionGracePeriodSeconds != 0 {
t.Errorf("expect the DeletionGracePeriodSeconds to be 0, got %d", *deletedPod.DeletionTimestamp)
t.Errorf("expect the DeletionGracePeriodSeconds to be 0, got %v", *deletedPod.DeletionTimestamp)
}
}

View File

@@ -50,7 +50,6 @@ go_test(
name = "go_default_test",
srcs = [
"apparmor_test.go",
"cgroup_manager_test.go",
"container_manager_test.go",
"critical_pod_test.go",
"density_test.go",
@@ -65,6 +64,8 @@ go_test(
"log_path_test.go",
"memory_eviction_test.go",
"mirror_pod_test.go",
"node_container_manager_test.go",
"pods_container_manager_test.go",
"resource_usage_test.go",
"restart_test.go",
"runtime_conformance_test.go",
@@ -117,6 +118,7 @@ go_test(
"//vendor:k8s.io/apimachinery/pkg/util/intstr",
"//vendor:k8s.io/apimachinery/pkg/util/uuid",
"//vendor:k8s.io/apimachinery/pkg/watch",
"//vendor:k8s.io/client-go/pkg/api",
"//vendor:k8s.io/client-go/tools/cache",
],
)

View File

@@ -70,9 +70,8 @@ func validateOOMScoreAdjSettingIsInRange(pid int, expectedMinOOMScoreAdj, expect
return nil
}
var _ = framework.KubeDescribe("Kubelet Container Manager [Serial]", func() {
var _ = framework.KubeDescribe("Container Manager Misc [Serial]", func() {
f := framework.NewDefaultFramework("kubelet-container-manager")
Describe("Validate OOM score adjustments", func() {
Context("once the node is setup", func() {
It("docker daemon's oom-score-adj should be -999", func() {

View File

@@ -0,0 +1,247 @@
// +build linux
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package e2e_node
import (
"fmt"
"io/ioutil"
"path"
"path/filepath"
"strconv"
"strings"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/pkg/api"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/apis/componentconfig"
"k8s.io/kubernetes/pkg/kubelet/cm"
"k8s.io/kubernetes/test/e2e/framework"
. "github.com/onsi/ginkgo"
)
func setDesiredConfiguration(initialConfig *componentconfig.KubeletConfiguration) {
initialConfig.EnforceNodeAllocatable = []string{"pods", "kube-reserved", "system-reserved"}
initialConfig.SystemReserved = componentconfig.ConfigurationMap{
"cpu": "100m",
"memory": "100Mi",
}
initialConfig.KubeReserved = componentconfig.ConfigurationMap{
"cpu": "100m",
"memory": "100Mi",
}
initialConfig.EvictionHard = "memory.available<100Mi"
// Necessary for allocatable cgroup creation.
initialConfig.CgroupsPerQOS = true
initialConfig.KubeReservedCgroup = kubeReservedCgroup
initialConfig.SystemReservedCgroup = systemReservedCgroup
}
var _ = framework.KubeDescribe("Node Container Manager [Serial]", func() {
f := framework.NewDefaultFramework("node-container-manager")
Describe("Validate Node Allocatable", func() {
It("set's up the node and runs the test", func() {
framework.ExpectNoError(runTest(f))
})
})
})
func expectFileValToEqual(filePath string, expectedValue, delta int64) error {
out, err := ioutil.ReadFile(filePath)
if err != nil {
return fmt.Errorf("failed to read file %q", filePath)
}
actual, err := strconv.ParseInt(strings.TrimSpace(string(out)), 10, 64)
if err != nil {
return fmt.Errorf("failed to parse output %v", err)
}
// Ensure that values are within a delta range to work arounding rounding errors.
if (actual < (expectedValue - delta)) || (actual > (expectedValue + delta)) {
return fmt.Errorf("Expected value at %q to be between %d and %d. Got %d", filePath, (expectedValue - delta), (expectedValue + delta), actual)
}
return nil
}
func getAllocatableLimits(cpu, memory string, capacity v1.ResourceList) (*resource.Quantity, *resource.Quantity) {
var allocatableCPU, allocatableMemory *resource.Quantity
// Total cpu reservation is 200m.
for k, v := range capacity {
if k == v1.ResourceCPU {
allocatableCPU = v.Copy()
allocatableCPU.Sub(resource.MustParse(cpu))
}
if k == v1.ResourceMemory {
allocatableMemory = v.Copy()
allocatableMemory.Sub(resource.MustParse(memory))
}
}
return allocatableCPU, allocatableMemory
}
const (
kubeReservedCgroup = "/kube_reserved"
systemReservedCgroup = "/system_reserved"
)
func createIfNotExists(cm cm.CgroupManager, cgroupConfig *cm.CgroupConfig) error {
if !cm.Exists(cgroupConfig.Name) {
if err := cm.Create(cgroupConfig); err != nil {
return err
}
}
return nil
}
func createTemporaryCgroupsForReservation(cgroupManager cm.CgroupManager) error {
// Create kube reserved cgroup
cgroupConfig := &cm.CgroupConfig{
Name: cm.CgroupName(kubeReservedCgroup),
}
if err := createIfNotExists(cgroupManager, cgroupConfig); err != nil {
return err
}
// Create system reserved cgroup
cgroupConfig.Name = cm.CgroupName(systemReservedCgroup)
return createIfNotExists(cgroupManager, cgroupConfig)
}
func destroyTemporaryCgroupsForReservation(cgroupManager cm.CgroupManager) error {
// Create kube reserved cgroup
cgroupConfig := &cm.CgroupConfig{
Name: cm.CgroupName(kubeReservedCgroup),
}
if err := cgroupManager.Destroy(cgroupConfig); err != nil {
return err
}
cgroupConfig.Name = cm.CgroupName(systemReservedCgroup)
return cgroupManager.Destroy(cgroupConfig)
}
func runTest(f *framework.Framework) error {
var oldCfg *componentconfig.KubeletConfiguration
subsystems, err := cm.GetCgroupSubsystems()
if err != nil {
return err
}
// Get current kubelet configuration
oldCfg, err = getCurrentKubeletConfig()
if err != nil {
return err
}
// Create a cgroup manager object for manipulating cgroups.
cgroupManager := cm.NewCgroupManager(subsystems, oldCfg.CgroupDriver)
defer destroyTemporaryCgroupsForReservation(cgroupManager)
defer func() {
if oldCfg != nil {
framework.ExpectNoError(setKubeletConfiguration(f, oldCfg))
}
}()
if err := createTemporaryCgroupsForReservation(cgroupManager); err != nil {
return err
}
clone, err := api.Scheme.DeepCopy(oldCfg)
if err != nil {
return err
}
newCfg := clone.(*componentconfig.KubeletConfiguration)
// Change existing kubelet configuration
setDesiredConfiguration(newCfg)
// Set the new kubelet configuration.
err = setKubeletConfiguration(f, newCfg)
if err != nil {
return err
}
// Set new config and current config.
currentConfig := newCfg
expectedNAPodCgroup := path.Join(currentConfig.CgroupRoot, "kubepods")
if !cgroupManager.Exists(cm.CgroupName(expectedNAPodCgroup)) {
return fmt.Errorf("Expected Node Allocatable Cgroup Does not exist")
}
// TODO: Update cgroupManager to expose a Status interface to get current Cgroup Settings.
nodeList, err := f.ClientSet.Core().Nodes().List(metav1.ListOptions{})
if err != nil {
return err
}
if len(nodeList.Items) != 1 {
return fmt.Errorf("Unexpected number of node objects for node e2e. Expects only one node: %+v", nodeList)
}
node := nodeList.Items[0]
capacity := node.Status.Capacity
allocatableCPU, allocatableMemory := getAllocatableLimits("200m", "200Mi", capacity)
// Total Memory reservation is 200Mi excluding eviction thresholds.
// Expect CPU shares on node allocatable cgroup to equal allocatable.
if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["cpu"], "kubepods", "cpu.shares"), cm.MilliCPUToShares(allocatableCPU.MilliValue()), 10); err != nil {
return err
}
// Expect Memory limit on node allocatable cgroup to equal allocatable.
if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["memory"], "kubepods", "memory.limit_in_bytes"), allocatableMemory.Value(), 0); err != nil {
return err
}
// Check that Allocatable reported to scheduler includes eviction thresholds.
schedulerAllocatable := node.Status.Allocatable
// Memory allocatable should take into account eviction thresholds.
allocatableCPU, allocatableMemory = getAllocatableLimits("200m", "300Mi", capacity)
// Expect allocatable to include all resources in capacity.
if len(schedulerAllocatable) != len(capacity) {
return fmt.Errorf("Expected all resources in capacity to be found in allocatable")
}
// CPU based evictions are not supported.
if allocatableCPU.Cmp(schedulerAllocatable["cpu"]) != 0 {
return fmt.Errorf("Unexpected cpu allocatable value exposed by the node. Expected: %v, got: %v, capacity: %v", allocatableCPU, schedulerAllocatable["cpu"], capacity["cpu"])
}
if allocatableMemory.Cmp(schedulerAllocatable["memory"]) != 0 {
return fmt.Errorf("Unexpected cpu allocatable value exposed by the node. Expected: %v, got: %v, capacity: %v", allocatableCPU, schedulerAllocatable["cpu"], capacity["memory"])
}
if !cgroupManager.Exists(cm.CgroupName(kubeReservedCgroup)) {
return fmt.Errorf("Expected kube reserved cgroup Does not exist")
}
// Expect CPU shares on kube reserved cgroup to equal it's reservation which is `100m`.
kubeReservedCPU := resource.MustParse(currentConfig.KubeReserved["cpu"])
if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["cpu"], kubeReservedCgroup, "cpu.shares"), cm.MilliCPUToShares(kubeReservedCPU.MilliValue()), 10); err != nil {
return err
}
// Expect Memory limit kube reserved cgroup to equal configured value `100Mi`.
kubeReservedMemory := resource.MustParse(currentConfig.KubeReserved["memory"])
if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["memory"], kubeReservedCgroup, "memory.limit_in_bytes"), kubeReservedMemory.Value(), 0); err != nil {
return err
}
if !cgroupManager.Exists(cm.CgroupName(systemReservedCgroup)) {
return fmt.Errorf("Expected system reserved cgroup Does not exist")
}
// Expect CPU shares on system reserved cgroup to equal it's reservation which is `100m`.
systemReservedCPU := resource.MustParse(currentConfig.SystemReserved["cpu"])
if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["cpu"], systemReservedCgroup, "cpu.shares"), cm.MilliCPUToShares(systemReservedCPU.MilliValue()), 10); err != nil {
return err
}
// Expect Memory limit on node allocatable cgroup to equal allocatable.
systemReservedMemory := resource.MustParse(currentConfig.SystemReserved["memory"])
if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["memory"], systemReservedCgroup, "memory.limit_in_bytes"), systemReservedMemory.Value(), 0); err != nil {
return err
}
return nil
}

View File

@@ -17,6 +17,8 @@ limitations under the License.
package e2e_node
import (
"path"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/uuid"
@@ -24,6 +26,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/cm"
"k8s.io/kubernetes/test/e2e/framework"
"github.com/golang/glog"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
)
@@ -49,18 +52,23 @@ func getResourceRequirements(requests, limits v1.ResourceList) v1.ResourceRequir
return res
}
// Kubelet internal cgroup name for node allocatable cgroup.
const defaultNodeAllocatableCgroup = "kubepods"
// makePodToVerifyCgroups returns a pod that verifies the existence of the specified cgroups.
func makePodToVerifyCgroups(cgroupNames []cm.CgroupName) *v1.Pod {
// convert the names to their literal cgroupfs forms...
cgroupFsNames := []string{}
for _, cgroupName := range cgroupNames {
// Add top level cgroup used to enforce node allocatable.
cgroupName = cm.CgroupName(path.Join(defaultNodeAllocatableCgroup, string(cgroupName)))
if framework.TestContext.KubeletConfig.CgroupDriver == "systemd" {
cgroupFsNames = append(cgroupFsNames, cm.ConvertCgroupNameToSystemd(cgroupName, true))
} else {
cgroupFsNames = append(cgroupFsNames, string(cgroupName))
}
}
glog.Infof("expecting %v cgroups to be found", cgroupFsNames)
// build the pod command to either verify cgroups exist
command := ""
for _, cgroupFsName := range cgroupFsNames {

View File

@@ -95,6 +95,7 @@ func tempSetEvictionHard(f *framework.Framework, evictionHard string) {
// Must be called within a Context. Allows the function to modify the KubeletConfiguration during the BeforeEach of the context.
// The change is reverted in the AfterEach of the context.
// Returns true on success.
func tempSetCurrentKubeletConfig(f *framework.Framework, updateFunction func(initialConfig *componentconfig.KubeletConfiguration)) {
var oldCfg *componentconfig.KubeletConfiguration
BeforeEach(func() {
@@ -292,3 +293,9 @@ func logNodeEvents(f *framework.Framework) {
err := framework.ListNamespaceEvents(f.ClientSet, "")
framework.ExpectNoError(err)
}
func getLocalNode(f *framework.Framework) *v1.Node {
nodeList := framework.GetReadySchedulableNodesOrDie(f.ClientSet)
Expect(len(nodeList.Items)).To(Equal(1), "Unexpected number of node objects for node e2e. Expects only one node.")
return &nodeList.Items[0]
}