From a52dcd0d9f3111489d9b19deebe32074186efef6 Mon Sep 17 00:00:00 2001
From: Eric Lin <exlin@google.com>
Date: Tue, 14 Feb 2023 09:50:15 +0000
Subject: [PATCH] Add DelayCacheUntilActive option to start informers after
 leader election

If scheduler fails to be active (elected if leader election is enabled),
setting this option will not start informers so that to avoid memory
overhead.

Signed-off-by: Eric Lin <exlin@google.com>
---
 .../app/options/options_test.go               | 27 ++++++++-----
 cmd/kube-scheduler/app/server.go              | 32 ++++++++++------
 pkg/generated/openapi/zz_generated.openapi.go |  7 ++++
 .../apis/config/scheme/scheme_test.go         | 11 ++++--
 pkg/scheduler/apis/config/types.go            |  6 +++
 pkg/scheduler/apis/config/v1/defaults_test.go | 38 +++++++++++++++++++
 .../apis/config/v1/zz_generated.conversion.go |  2 +
 .../config/v1beta3/zz_generated.conversion.go |  1 +
 .../k8s.io/kube-scheduler/config/v1/types.go  |  6 +++
 9 files changed, 107 insertions(+), 23 deletions(-)

diff --git a/cmd/kube-scheduler/app/options/options_test.go b/cmd/kube-scheduler/app/options/options_test.go
index c8b93ed98d8..43f0415c645 100644
--- a/cmd/kube-scheduler/app/options/options_test.go
+++ b/cmd/kube-scheduler/app/options/options_test.go
@@ -398,7 +398,8 @@ profiles:
 				TypeMeta: metav1.TypeMeta{
 					APIVersion: v1.SchemeGroupVersion.String(),
 				},
-				Parallelism: 16,
+				Parallelism:           16,
+				DelayCacheUntilActive: false,
 				DebuggingConfiguration: componentbaseconfig.DebuggingConfiguration{
 					EnableProfiling:           true,
 					EnableContentionProfiling: true,
@@ -469,7 +470,8 @@ profiles:
 				TypeMeta: metav1.TypeMeta{
 					APIVersion: v1beta3.SchemeGroupVersion.String(),
 				},
-				Parallelism: 16,
+				Parallelism:           16,
+				DelayCacheUntilActive: false,
 				DebuggingConfiguration: componentbaseconfig.DebuggingConfiguration{
 					EnableProfiling:           true,
 					EnableContentionProfiling: true,
@@ -571,7 +573,8 @@ profiles:
 				TypeMeta: metav1.TypeMeta{
 					APIVersion: v1.SchemeGroupVersion.String(),
 				},
-				Parallelism: 16,
+				Parallelism:           16,
+				DelayCacheUntilActive: false,
 				DebuggingConfiguration: componentbaseconfig.DebuggingConfiguration{
 					EnableProfiling:           true,
 					EnableContentionProfiling: true,
@@ -641,7 +644,8 @@ profiles:
 				TypeMeta: metav1.TypeMeta{
 					APIVersion: v1.SchemeGroupVersion.String(),
 				},
-				Parallelism: 16,
+				Parallelism:           16,
+				DelayCacheUntilActive: false,
 				DebuggingConfiguration: componentbaseconfig.DebuggingConfiguration{
 					EnableProfiling:           true,
 					EnableContentionProfiling: true,
@@ -685,7 +689,8 @@ profiles:
 				TypeMeta: metav1.TypeMeta{
 					APIVersion: v1.SchemeGroupVersion.String(),
 				},
-				Parallelism: 16,
+				Parallelism:           16,
+				DelayCacheUntilActive: false,
 				DebuggingConfiguration: componentbaseconfig.DebuggingConfiguration{
 					EnableProfiling:           true,
 					EnableContentionProfiling: true,
@@ -804,7 +809,8 @@ profiles:
 				TypeMeta: metav1.TypeMeta{
 					APIVersion: v1beta3.SchemeGroupVersion.String(),
 				},
-				Parallelism: 16,
+				Parallelism:           16,
+				DelayCacheUntilActive: false,
 				DebuggingConfiguration: componentbaseconfig.DebuggingConfiguration{
 					EnableProfiling:           true,
 					EnableContentionProfiling: true,
@@ -926,7 +932,8 @@ profiles:
 				TypeMeta: metav1.TypeMeta{
 					APIVersion: v1.SchemeGroupVersion.String(),
 				},
-				Parallelism: 16,
+				Parallelism:           16,
+				DelayCacheUntilActive: false,
 				DebuggingConfiguration: componentbaseconfig.DebuggingConfiguration{
 					EnableProfiling:           true,
 					EnableContentionProfiling: true,
@@ -1040,7 +1047,8 @@ profiles:
 				TypeMeta: metav1.TypeMeta{
 					APIVersion: v1beta3.SchemeGroupVersion.String(),
 				},
-				Parallelism: 16,
+				Parallelism:           16,
+				DelayCacheUntilActive: false,
 				DebuggingConfiguration: componentbaseconfig.DebuggingConfiguration{
 					EnableProfiling:           true,
 					EnableContentionProfiling: true,
@@ -1179,7 +1187,8 @@ profiles:
 				TypeMeta: metav1.TypeMeta{
 					APIVersion: v1.SchemeGroupVersion.String(),
 				},
-				Parallelism: 16,
+				Parallelism:           16,
+				DelayCacheUntilActive: false,
 				DebuggingConfiguration: componentbaseconfig.DebuggingConfiguration{
 					EnableProfiling:           true,
 					EnableContentionProfiling: true,
diff --git a/cmd/kube-scheduler/app/server.go b/cmd/kube-scheduler/app/server.go
index 07de3418ece..9991611e372 100644
--- a/cmd/kube-scheduler/app/server.go
+++ b/cmd/kube-scheduler/app/server.go
@@ -194,18 +194,23 @@ func Run(ctx context.Context, cc *schedulerserverconfig.CompletedConfig, sched *
 		}
 	}
 
-	// Start all informers.
-	cc.InformerFactory.Start(ctx.Done())
-	// DynInformerFactory can be nil in tests.
-	if cc.DynInformerFactory != nil {
-		cc.DynInformerFactory.Start(ctx.Done())
-	}
+	startInformersAndWaitForSync := func(ctx context.Context) {
+		// Start all informers.
+		cc.InformerFactory.Start(ctx.Done())
+		// DynInformerFactory can be nil in tests.
+		if cc.DynInformerFactory != nil {
+			cc.DynInformerFactory.Start(ctx.Done())
+		}
 
-	// Wait for all caches to sync before scheduling.
-	cc.InformerFactory.WaitForCacheSync(ctx.Done())
-	// DynInformerFactory can be nil in tests.
-	if cc.DynInformerFactory != nil {
-		cc.DynInformerFactory.WaitForCacheSync(ctx.Done())
+		// Wait for all caches to sync before scheduling.
+		cc.InformerFactory.WaitForCacheSync(ctx.Done())
+		// DynInformerFactory can be nil in tests.
+		if cc.DynInformerFactory != nil {
+			cc.DynInformerFactory.WaitForCacheSync(ctx.Done())
+		}
+	}
+	if !cc.ComponentConfig.DelayCacheUntilActive || cc.LeaderElection == nil {
+		startInformersAndWaitForSync(ctx)
 	}
 
 	// If leader election is enabled, runCommand via LeaderElector until done and exit.
@@ -213,6 +218,11 @@ func Run(ctx context.Context, cc *schedulerserverconfig.CompletedConfig, sched *
 		cc.LeaderElection.Callbacks = leaderelection.LeaderCallbacks{
 			OnStartedLeading: func(ctx context.Context) {
 				close(waitingForLeader)
+				if cc.ComponentConfig.DelayCacheUntilActive {
+					logger.Info("Starting informers and waiting for sync...")
+					startInformersAndWaitForSync(ctx)
+					logger.Info("Sync completed")
+				}
 				sched.Run(ctx)
 			},
 			OnStoppedLeading: func() {
diff --git a/pkg/generated/openapi/zz_generated.openapi.go b/pkg/generated/openapi/zz_generated.openapi.go
index 7c1073246a3..b7dad245930 100644
--- a/pkg/generated/openapi/zz_generated.openapi.go
+++ b/pkg/generated/openapi/zz_generated.openapi.go
@@ -54521,6 +54521,13 @@ func schema_k8sio_kube_scheduler_config_v1_KubeSchedulerConfiguration(ref common
 							},
 						},
 					},
+					"delayCacheUntilActive": {
+						SchemaProps: spec.SchemaProps{
+							Description: "DelayCacheUntilActive specifies when to start caching. If this is true and leader election is enabled, the scheduler will wait to fill informer caches until it is the leader. Doing so will have slower failover with the benefit of lower memory overhead while waiting to become leader. Defaults to false.",
+							Type:        []string{"boolean"},
+							Format:      "",
+						},
+					},
 				},
 				Required: []string{"leaderElection", "clientConnection"},
 			},
diff --git a/pkg/scheduler/apis/config/scheme/scheme_test.go b/pkg/scheduler/apis/config/scheme/scheme_test.go
index 0d10a19558c..d559de416d2 100644
--- a/pkg/scheduler/apis/config/scheme/scheme_test.go
+++ b/pkg/scheduler/apis/config/scheme/scheme_test.go
@@ -999,7 +999,8 @@ profiles:
 			name:    "v1beta3 in-tree and out-of-tree plugins from internal",
 			version: v1beta3.SchemeGroupVersion,
 			obj: &config.KubeSchedulerConfiguration{
-				Parallelism: 8,
+				Parallelism:           8,
+				DelayCacheUntilActive: true,
 				Profiles: []config.KubeSchedulerProfile{
 					{
 						PluginConfig: []config.PluginConfig{
@@ -1219,7 +1220,8 @@ profiles:
 			name:    "v1 in-tree and out-of-tree plugins from internal",
 			version: v1.SchemeGroupVersion,
 			obj: &config.KubeSchedulerConfiguration{
-				Parallelism: 8,
+				Parallelism:           8,
+				DelayCacheUntilActive: true,
 				Profiles: []config.KubeSchedulerProfile{
 					{
 						PluginConfig: []config.PluginConfig{
@@ -1265,6 +1267,7 @@ clientConnection:
   contentType: ""
   kubeconfig: ""
   qps: 0
+delayCacheUntilActive: true
 enableContentionProfiling: false
 enableProfiling: false
 kind: KubeSchedulerConfiguration
@@ -1315,7 +1318,8 @@ profiles:
 			name:    "v1 ignorePreferredTermsOfExistingPods is enabled",
 			version: v1.SchemeGroupVersion,
 			obj: &config.KubeSchedulerConfiguration{
-				Parallelism: 8,
+				Parallelism:           8,
+				DelayCacheUntilActive: true,
 				Profiles: []config.KubeSchedulerProfile{
 					{
 						PluginConfig: []config.PluginConfig{
@@ -1337,6 +1341,7 @@ clientConnection:
   contentType: ""
   kubeconfig: ""
   qps: 0
+delayCacheUntilActive: true
 enableContentionProfiling: false
 enableProfiling: false
 kind: KubeSchedulerConfiguration
diff --git a/pkg/scheduler/apis/config/types.go b/pkg/scheduler/apis/config/types.go
index 7157616635d..a87a8d1d6b4 100644
--- a/pkg/scheduler/apis/config/types.go
+++ b/pkg/scheduler/apis/config/types.go
@@ -96,6 +96,12 @@ type KubeSchedulerConfiguration struct {
 	// Extenders are the list of scheduler extenders, each holding the values of how to communicate
 	// with the extender. These extenders are shared by all scheduler profiles.
 	Extenders []Extender
+
+	// DelayCacheUntilActive specifies when to start caching. If this is true and leader election is enabled,
+	// the scheduler will wait to fill informer caches until it is the leader. Doing so will have slower
+	// failover with the benefit of lower memory overhead while waiting to become leader.
+	// Defaults to false.
+	DelayCacheUntilActive bool
 }
 
 // KubeSchedulerProfile is a scheduling profile.
diff --git a/pkg/scheduler/apis/config/v1/defaults_test.go b/pkg/scheduler/apis/config/v1/defaults_test.go
index 38ec7ef8f74..d2861e8f640 100644
--- a/pkg/scheduler/apis/config/v1/defaults_test.go
+++ b/pkg/scheduler/apis/config/v1/defaults_test.go
@@ -439,6 +439,44 @@ func TestSchedulerDefaults(t *testing.T) {
 				},
 			},
 		},
+		{
+			name: "set non default delayCacheUntilActive",
+			config: &configv1.KubeSchedulerConfiguration{
+				DelayCacheUntilActive: true,
+			},
+			expected: &configv1.KubeSchedulerConfiguration{
+				Parallelism:           pointer.Int32(16),
+				DelayCacheUntilActive: true,
+				DebuggingConfiguration: componentbaseconfig.DebuggingConfiguration{
+					EnableProfiling:           &enable,
+					EnableContentionProfiling: &enable,
+				},
+				LeaderElection: componentbaseconfig.LeaderElectionConfiguration{
+					LeaderElect:       pointer.Bool(true),
+					LeaseDuration:     metav1.Duration{Duration: 15 * time.Second},
+					RenewDeadline:     metav1.Duration{Duration: 10 * time.Second},
+					RetryPeriod:       metav1.Duration{Duration: 2 * time.Second},
+					ResourceLock:      "leases",
+					ResourceNamespace: "kube-system",
+					ResourceName:      "kube-scheduler",
+				},
+				ClientConnection: componentbaseconfig.ClientConnectionConfiguration{
+					QPS:         50,
+					Burst:       100,
+					ContentType: "application/vnd.kubernetes.protobuf",
+				},
+				PercentageOfNodesToScore: pointer.Int32(config.DefaultPercentageOfNodesToScore),
+				PodInitialBackoffSeconds: pointer.Int64(1),
+				PodMaxBackoffSeconds:     pointer.Int64(10),
+				Profiles: []configv1.KubeSchedulerProfile{
+					{
+						Plugins:       getDefaultPlugins(),
+						PluginConfig:  pluginConfigs,
+						SchedulerName: pointer.String("default-scheduler"),
+					},
+				},
+			},
+		},
 		{
 			name: "set non default global percentageOfNodesToScore",
 			config: &configv1.KubeSchedulerConfiguration{
diff --git a/pkg/scheduler/apis/config/v1/zz_generated.conversion.go b/pkg/scheduler/apis/config/v1/zz_generated.conversion.go
index dd8d9b23109..ca9be957b42 100644
--- a/pkg/scheduler/apis/config/v1/zz_generated.conversion.go
+++ b/pkg/scheduler/apis/config/v1/zz_generated.conversion.go
@@ -429,6 +429,7 @@ func autoConvert_v1_KubeSchedulerConfiguration_To_config_KubeSchedulerConfigurat
 		out.Profiles = nil
 	}
 	out.Extenders = *(*[]config.Extender)(unsafe.Pointer(&in.Extenders))
+	out.DelayCacheUntilActive = in.DelayCacheUntilActive
 	return nil
 }
 
@@ -466,6 +467,7 @@ func autoConvert_config_KubeSchedulerConfiguration_To_v1_KubeSchedulerConfigurat
 		out.Profiles = nil
 	}
 	out.Extenders = *(*[]v1.Extender)(unsafe.Pointer(&in.Extenders))
+	out.DelayCacheUntilActive = in.DelayCacheUntilActive
 	return nil
 }
 
diff --git a/pkg/scheduler/apis/config/v1beta3/zz_generated.conversion.go b/pkg/scheduler/apis/config/v1beta3/zz_generated.conversion.go
index 841c537c3f7..a12860fe77b 100644
--- a/pkg/scheduler/apis/config/v1beta3/zz_generated.conversion.go
+++ b/pkg/scheduler/apis/config/v1beta3/zz_generated.conversion.go
@@ -466,6 +466,7 @@ func autoConvert_config_KubeSchedulerConfiguration_To_v1beta3_KubeSchedulerConfi
 		out.Profiles = nil
 	}
 	out.Extenders = *(*[]v1beta3.Extender)(unsafe.Pointer(&in.Extenders))
+	// WARNING: in.DelayCacheUntilActive requires manual conversion: does not exist in peer-type
 	return nil
 }
 
diff --git a/staging/src/k8s.io/kube-scheduler/config/v1/types.go b/staging/src/k8s.io/kube-scheduler/config/v1/types.go
index 703516fb78c..2ee4e4cddd8 100644
--- a/staging/src/k8s.io/kube-scheduler/config/v1/types.go
+++ b/staging/src/k8s.io/kube-scheduler/config/v1/types.go
@@ -89,6 +89,12 @@ type KubeSchedulerConfiguration struct {
 	// with the extender. These extenders are shared by all scheduler profiles.
 	// +listType=set
 	Extenders []Extender `json:"extenders,omitempty"`
+
+	// DelayCacheUntilActive specifies when to start caching. If this is true and leader election is enabled,
+	// the scheduler will wait to fill informer caches until it is the leader. Doing so will have slower
+	// failover with the benefit of lower memory overhead while waiting to become leader.
+	// Defaults to false.
+	DelayCacheUntilActive bool `json:"delayCacheUntilActive,omitempty"`
 }
 
 // DecodeNestedObjects decodes plugin args for known types.