From 8a8cd06ea418a4ed51a6b67acc4d7dbf36e2d867 Mon Sep 17 00:00:00 2001 From: Wojciech Tyczynski Date: Wed, 3 Aug 2016 11:44:10 +0200 Subject: [PATCH 1/2] Configurable cache sizes of cachers --- cmd/kube-apiserver/app/server.go | 2 + .../cmd/federation-apiserver/app/server.go | 1 + hack/verify-flags/exceptions.txt | 2 +- hack/verify-flags/known-flags.txt | 1 + .../options/server_run_options.go | 4 ++ pkg/registry/cachesize/cachesize.go | 66 +++++++++++-------- pkg/registry/configmap/etcd/etcd.go | 9 ++- pkg/registry/podsecuritypolicy/etcd/etcd.go | 3 +- pkg/registry/storageclass/etcd/etcd.go | 3 +- 9 files changed, 58 insertions(+), 33 deletions(-) diff --git a/cmd/kube-apiserver/app/server.go b/cmd/kube-apiserver/app/server.go index d82dde1b850..2d7dabe034e 100644 --- a/cmd/kube-apiserver/app/server.go +++ b/cmd/kube-apiserver/app/server.go @@ -282,6 +282,8 @@ func Run(s *options.APIServer) error { } if s.EnableWatchCache { + glog.V(2).Infof("Initalizing cache sizes based on %dMB limit", s.TargetRAMMB) + cachesize.InitializeWatchCacheSizes(s.TargetRAMMB) cachesize.SetWatchCacheSizes(s.WatchCacheSizes) } diff --git a/federation/cmd/federation-apiserver/app/server.go b/federation/cmd/federation-apiserver/app/server.go index 4384749e599..cc6b80feb9c 100644 --- a/federation/cmd/federation-apiserver/app/server.go +++ b/federation/cmd/federation-apiserver/app/server.go @@ -142,6 +142,7 @@ func Run(s *genericoptions.ServerRunOptions) error { // TODO: Move this to generic api server (Need to move the command line flag). if s.EnableWatchCache { + cachesize.InitializeWatchCacheSizes(s.TargetRAMMB) cachesize.SetWatchCacheSizes(s.WatchCacheSizes) } diff --git a/hack/verify-flags/exceptions.txt b/hack/verify-flags/exceptions.txt index 2c4dada6a48..1fa88af25e1 100644 --- a/hack/verify-flags/exceptions.txt +++ b/hack/verify-flags/exceptions.txt @@ -39,7 +39,7 @@ cluster/rackspace/util.sh: local node_ip=$(nova show --minimal ${NODE_NAMES[$ cluster/saltbase/salt/cluster-autoscaler/cluster-autoscaler.manifest:{% set params = pillar['autoscaler_mig_config'] + " " + cloud_config -%} cluster/saltbase/salt/kube-admission-controls/init.sls:{% if 'LimitRanger' in pillar.get('admission_control', '') %} cluster/saltbase/salt/kube-apiserver/kube-apiserver.manifest:{% set enable_garbage_collector = pillar['enable_garbage_collector'] -%} -cluster/saltbase/salt/kube-apiserver/kube-apiserver.manifest:{% set params = address + " " + etcd_servers + " " + etcd_servers_overrides + " " + cloud_provider + " " + cloud_config + " " + runtime_config + " " + admission_control + " " + service_cluster_ip_range + " " + client_ca_file + basic_auth_file + " " + min_request_timeout + " " + enable_garbage_collector -%} +cluster/saltbase/salt/kube-apiserver/kube-apiserver.manifest:{% set params = address + " " + etcd_servers + " " + etcd_servers_overrides + " " + cloud_provider + " " + cloud_config + " " + runtime_config + " " + admission_control + " " + target_ram_mb + " " + service_cluster_ip_range + " " + client_ca_file + basic_auth_file + " " + min_request_timeout + " " + enable_garbage_collector -%} cluster/saltbase/salt/kube-controller-manager/kube-controller-manager.manifest:{% set enable_garbage_collector = pillar['enable_garbage_collector'] -%} cluster/saltbase/salt/kube-controller-manager/kube-controller-manager.manifest:{% set params = "--master=127.0.0.1:8080" + " " + cluster_name + " " + cluster_cidr + " " + allocate_node_cidrs + " " + service_cluster_ip_range + " " + terminated_pod_gc + " " + enable_garbage_collector + " " + cloud_provider + " " + cloud_config + " " + service_account_key + " " + log_level + " " + root_ca_file -%} cluster/saltbase/salt/kube-proxy/kube-proxy.manifest: {% set api_servers_with_port = api_servers + ":6443" -%} diff --git a/hack/verify-flags/known-flags.txt b/hack/verify-flags/known-flags.txt index c0797ce7e48..6e26d2a16b8 100644 --- a/hack/verify-flags/known-flags.txt +++ b/hack/verify-flags/known-flags.txt @@ -471,6 +471,7 @@ system-container system-pods-startup-timeout system-reserved target-port +target-ram-mb tcp-services terminated-pod-gc-threshold test-timeout diff --git a/pkg/genericapiserver/options/server_run_options.go b/pkg/genericapiserver/options/server_run_options.go index c03b7b98b18..3f22fbe1133 100644 --- a/pkg/genericapiserver/options/server_run_options.go +++ b/pkg/genericapiserver/options/server_run_options.go @@ -98,6 +98,7 @@ type ServerRunOptions struct { // these; you can change this if you want to change the defaults (e.g., // for testing). This is not actually exposed as a flag. DefaultStorageVersions string + TargetRAMMB int TLSCertFile string TLSPrivateKeyFile string TokenAuthFile string @@ -304,6 +305,9 @@ func (s *ServerRunOptions) AddFlags(fs *pflag.FlagSet) { "Per-resource etcd servers overrides, comma separated. The individual override "+ "format: group/resource#servers, where servers are http://ip:port, semicolon separated.") + fs.IntVar(&s.TargetRAMMB, "target-ram-mb", s.TargetRAMMB, + "Memory limit for apiserver in MB (used to configure sizes of caches, etc.)") + fs.StringVar(&s.ExternalHost, "external-hostname", s.ExternalHost, "The hostname to use when generating externalized URLs for this master (e.g. Swagger API Docs).") diff --git a/pkg/registry/cachesize/cachesize.go b/pkg/registry/cachesize/cachesize.go index f3d05196ab7..2ca9de87022 100644 --- a/pkg/registry/cachesize/cachesize.go +++ b/pkg/registry/cachesize/cachesize.go @@ -31,6 +31,7 @@ const ( CertificateSigningRequests Resource = "certificatesigningrequests" ClusterRoles Resource = "clusterroles" ClusterRoleBindings Resource = "clusterrolebindings" + ConfigMaps Resource = "configmaps" Controllers Resource = "controllers" Daemonsets Resource = "daemonsets" Deployments Resource = "deployments" @@ -47,6 +48,7 @@ const ( PersistentVolumes Resource = "persistentvolumes" PersistentVolumeClaims Resource = "persistentvolumeclaims" Pods Resource = "pods" + PodSecurityPolicies Resource = "podsecuritypolicies" PodTemplates Resource = "podtemplates" Replicasets Resource = "replicasets" ResourceQuotas Resource = "resourcequotas" @@ -56,40 +58,36 @@ const ( Secrets Resource = "secrets" ServiceAccounts Resource = "serviceaccounts" Services Resource = "services" + StorageClasses Resource = "storageclasses" + + // Default value of watch cache size for a resource if not specified. + defaultWatchCacheSize = 100 ) +// TODO: This shouldn't be a global variable. var watchCacheSizes map[Resource]int func init() { watchCacheSizes = make(map[Resource]int) - watchCacheSizes[CertificateSigningRequests] = 1000 - watchCacheSizes[ClusterRoles] = 100 - watchCacheSizes[ClusterRoleBindings] = 100 - watchCacheSizes[Controllers] = 100 - watchCacheSizes[Daemonsets] = 100 - watchCacheSizes[Deployments] = 100 - watchCacheSizes[Endpoints] = 1000 - watchCacheSizes[HorizontalPodAutoscalers] = 100 - watchCacheSizes[Ingress] = 100 - watchCacheSizes[PetSet] = 100 - watchCacheSizes[PodDisruptionBudget] = 100 - watchCacheSizes[Jobs] = 100 - watchCacheSizes[LimitRanges] = 100 - watchCacheSizes[Namespaces] = 100 - watchCacheSizes[NetworkPolicys] = 100 - watchCacheSizes[Nodes] = 1000 - watchCacheSizes[PersistentVolumes] = 100 - watchCacheSizes[PersistentVolumeClaims] = 100 - watchCacheSizes[Pods] = 1000 - watchCacheSizes[PodTemplates] = 100 - watchCacheSizes[Replicasets] = 100 - watchCacheSizes[ResourceQuotas] = 100 - watchCacheSizes[ScheduledJobs] = 100 - watchCacheSizes[Roles] = 100 - watchCacheSizes[RoleBindings] = 100 - watchCacheSizes[Secrets] = 100 - watchCacheSizes[ServiceAccounts] = 100 - watchCacheSizes[Services] = 100 +} + +func InitializeWatchCacheSizes(expectedRAMCapacityMB int) { + // This is the heuristics that from memory capacity is trying to infer + // the maximum number of nodes in the cluster and set cache sizes based + // on that value. + // From our documentation, we officially recomment 120GB machines for + // 2000 nodes, and we scale from that point. Thus we assume ~60MB of + // capacity per node. + // TODO: Revisit this heuristics + clusterSize := expectedRAMCapacityMB / 60 + + // We should specify cache size for a given resource only if it + // is supposed to have non-default value. + // + // TODO: Figure out which resource we should have non-default value. + watchCacheSizes[Endpoints] = maxInt(10*clusterSize, 1000) + watchCacheSizes[Nodes] = maxInt(3*clusterSize, 1000) + watchCacheSizes[Pods] = maxInt(10*clusterSize, 1000) } func SetWatchCacheSizes(cacheSizes []string) { @@ -111,5 +109,15 @@ func SetWatchCacheSizes(cacheSizes []string) { } func GetWatchCacheSizeByResource(resource Resource) int { - return watchCacheSizes[resource] + if value, found := watchCacheSizes[resource]; found { + return value + } + return defaultWatchCacheSize +} + +func maxInt(a, b int) int { + if a > b { + return a + } + return b } diff --git a/pkg/registry/configmap/etcd/etcd.go b/pkg/registry/configmap/etcd/etcd.go index 66fd775d208..baedd414d7f 100644 --- a/pkg/registry/configmap/etcd/etcd.go +++ b/pkg/registry/configmap/etcd/etcd.go @@ -18,6 +18,7 @@ package etcd import ( "k8s.io/kubernetes/pkg/api" + "k8s.io/kubernetes/pkg/registry/cachesize" "k8s.io/kubernetes/pkg/registry/configmap" "k8s.io/kubernetes/pkg/registry/generic" "k8s.io/kubernetes/pkg/registry/generic/registry" @@ -36,7 +37,13 @@ func NewREST(opts generic.RESTOptions) *REST { newListFunc := func() runtime.Object { return &api.ConfigMapList{} } storageInterface := opts.Decorator( - opts.Storage, 100, &api.ConfigMap{}, prefix, configmap.Strategy, newListFunc, storage.NoTriggerPublisher) + opts.Storage, + cachesize.GetWatchCacheSizeByResource(cachesize.ConfigMaps), + &api.ConfigMap{}, + prefix, + configmap.Strategy, + newListFunc, + storage.NoTriggerPublisher) store := ®istry.Store{ NewFunc: func() runtime.Object { diff --git a/pkg/registry/podsecuritypolicy/etcd/etcd.go b/pkg/registry/podsecuritypolicy/etcd/etcd.go index 33ed738cec7..15ec74192ca 100644 --- a/pkg/registry/podsecuritypolicy/etcd/etcd.go +++ b/pkg/registry/podsecuritypolicy/etcd/etcd.go @@ -19,6 +19,7 @@ package etcd import ( "k8s.io/kubernetes/pkg/api" "k8s.io/kubernetes/pkg/apis/extensions" + "k8s.io/kubernetes/pkg/registry/cachesize" "k8s.io/kubernetes/pkg/registry/generic" "k8s.io/kubernetes/pkg/registry/generic/registry" "k8s.io/kubernetes/pkg/registry/podsecuritypolicy" @@ -38,7 +39,7 @@ func NewREST(opts generic.RESTOptions) *REST { newListFunc := func() runtime.Object { return &extensions.PodSecurityPolicyList{} } storageInterface := opts.Decorator( opts.Storage, - 100, + cachesize.GetWatchCacheSizeByResource(cachesize.PodSecurityPolicies), &extensions.PodSecurityPolicy{}, prefix, podsecuritypolicy.Strategy, diff --git a/pkg/registry/storageclass/etcd/etcd.go b/pkg/registry/storageclass/etcd/etcd.go index 998254c16f5..ae119cedad5 100644 --- a/pkg/registry/storageclass/etcd/etcd.go +++ b/pkg/registry/storageclass/etcd/etcd.go @@ -19,6 +19,7 @@ package etcd import ( "k8s.io/kubernetes/pkg/api" "k8s.io/kubernetes/pkg/apis/extensions" + "k8s.io/kubernetes/pkg/registry/cachesize" "k8s.io/kubernetes/pkg/registry/generic" "k8s.io/kubernetes/pkg/registry/generic/registry" "k8s.io/kubernetes/pkg/registry/storageclass" @@ -37,7 +38,7 @@ func NewREST(opts generic.RESTOptions) *REST { newListFunc := func() runtime.Object { return &extensions.StorageClassList{} } storageInterface := opts.Decorator( opts.Storage, - 100, + cachesize.GetWatchCacheSizeByResource(cachesize.StorageClasses), &extensions.StorageClass{}, prefix, storageclass.Strategy, From 38c28177d6c1659b45004bdc7631fc74513f5120 Mon Sep 17 00:00:00 2001 From: Wojciech Tyczynski Date: Wed, 3 Aug 2016 12:30:21 +0200 Subject: [PATCH 2/2] Propagate cluster size through salt --- cluster/gce/gci/configure-helper.sh | 6 ++++++ cluster/gce/trusty/configure-helper.sh | 6 ++++++ .../salt/kube-apiserver/kube-apiserver.manifest | 15 ++++++++++++--- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/cluster/gce/gci/configure-helper.sh b/cluster/gce/gci/configure-helper.sh index cf4c1707525..2652eaf132a 100644 --- a/cluster/gce/gci/configure-helper.sh +++ b/cluster/gce/gci/configure-helper.sh @@ -635,6 +635,12 @@ function start-kube-apiserver { if [[ -n "${ENABLE_GARBAGE_COLLECTOR:-}" ]]; then params+=" --enable-garbage-collector=${ENABLE_GARBAGE_COLLECTOR}" fi + if [[ -n "${NUM_NODES:-}" ]]; then + # Set amount of memory available for apiserver based on number of nodes. + # TODO: Once we start setting proper requests and limits for apiserver + # we should reuse the same logic here instead of current heuristic. + params+=" --target-ram-mb=$((${NUM_NODES} * 60))" + fi if [[ -n "${SERVICE_CLUSTER_IP_RANGE:-}" ]]; then params+=" --service-cluster-ip-range=${SERVICE_CLUSTER_IP_RANGE}" fi diff --git a/cluster/gce/trusty/configure-helper.sh b/cluster/gce/trusty/configure-helper.sh index 90e952f4296..67cf49a8b6a 100644 --- a/cluster/gce/trusty/configure-helper.sh +++ b/cluster/gce/trusty/configure-helper.sh @@ -519,6 +519,12 @@ start_kube_apiserver() { params="${params} --authorization-policy-file=/etc/srv/kubernetes/abac-authz-policy.jsonl" params="${params} --etcd-servers-overrides=/events#http://127.0.0.1:4002" + if [ -n "${NUM_NODES:-}" ]; then + # Set amount of memory available for apiserver based on number of nodes. + # TODO: Once we start setting proper requests and limits for apiserver + # we should reuse the same logic here instead of current heuristic. + params="${params} --target-ram-mb=$((${NUM_NODES} * 60))" + fi if [ -n "${SERVICE_CLUSTER_IP_RANGE:-}" ]; then params="${params} --service-cluster-ip-range=${SERVICE_CLUSTER_IP_RANGE}" fi diff --git a/cluster/saltbase/salt/kube-apiserver/kube-apiserver.manifest b/cluster/saltbase/salt/kube-apiserver/kube-apiserver.manifest index a818f77ea16..a9ded4a88a7 100644 --- a/cluster/saltbase/salt/kube-apiserver/kube-apiserver.manifest +++ b/cluster/saltbase/salt/kube-apiserver/kube-apiserver.manifest @@ -57,6 +57,15 @@ {% set etcd_servers = "--etcd-servers=http://127.0.0.1:4001" -%} {% set etcd_servers_overrides = "--etcd-servers-overrides=/events#http://127.0.0.1:4002" -%} +{% set target_ram_mb = "" -%} +{% if pillar['num_nodes'] is defined -%} + # Set amount of memory available for apiserver based on number of nodes. + # TODO: Once we start setting proper requests and limits for apiserver + # we should reuse the same logic here instead of current heuristic. + {% set tmp_ram_mb = pillar['num_nodes']|int * 60 %} + {% set target_ram_mb = "--target-ram-mb=" + tmp_ram_mb|string -%} +{% endif -%} + {% set service_cluster_ip_range = "" -%} {% if pillar['service_cluster_ip_range'] is defined -%} {% set service_cluster_ip_range = "--service-cluster-ip-range=" + pillar['service_cluster_ip_range'] -%} @@ -124,15 +133,15 @@ {% set enable_garbage_collector = "" -%} {% if pillar['enable_garbage_collector'] is defined -%} -{% set enable_garbage_collector = "--enable-garbage-collector=" + pillar['enable_garbage_collector'] -%} + {% set enable_garbage_collector = "--enable-garbage-collector=" + pillar['enable_garbage_collector'] -%} {% endif -%} -{% set params = address + " " + etcd_servers + " " + etcd_servers_overrides + " " + cloud_provider + " " + cloud_config + " " + runtime_config + " " + admission_control + " " + service_cluster_ip_range + " " + client_ca_file + basic_auth_file + " " + min_request_timeout + " " + enable_garbage_collector -%} +{% set params = address + " " + etcd_servers + " " + etcd_servers_overrides + " " + cloud_provider + " " + cloud_config + " " + runtime_config + " " + admission_control + " " + target_ram_mb + " " + service_cluster_ip_range + " " + client_ca_file + basic_auth_file + " " + min_request_timeout + " " + enable_garbage_collector -%} {% set params = params + " " + cert_file + " " + key_file + " --secure-port=" + secure_port + token_auth_file + " " + bind_address + " " + log_level + " " + advertise_address + " " + proxy_ssh_options + authz_mode + abac_policy_file + webhook_authentication_config + webhook_authorization_config -%} # test_args has to be kept at the end, so they'll overwrite any prior configuration {% if pillar['apiserver_test_args'] is defined -%} -{% set params = params + " " + pillar['apiserver_test_args'] -%} + {% set params = params + " " + pillar['apiserver_test_args'] -%} {% endif -%} {