diff --git a/pkg/apis/componentconfig/types.go b/pkg/apis/componentconfig/types.go index 78abfe24611..6e935c1fd9c 100644 --- a/pkg/apis/componentconfig/types.go +++ b/pkg/apis/componentconfig/types.go @@ -599,6 +599,16 @@ type KubeSchedulerConfiguration struct { LockObjectNamespace string // LockObjectName defines the lock object name LockObjectName string + // PolicyConfigMapName is the name of the ConfigMap object that specifies + // the scheduler's policy config. If UseLegacyPolicyConfig is true, scheduler + // uses PolicyConfigFile. If UseLegacyPolicyConfig is false and + // PolicyConfigMapName is not empty, the ConfigMap object with this name must + // exist in the default system namespace ("kube-system") before scheduler + // initialization. + PolicyConfigMapName string + // UseLegacyPolicyConfig tells the scheduler to ignore Policy ConfigMap and + // to use PolicyConfigFile if available. + UseLegacyPolicyConfig bool } // LeaderElectionConfiguration defines the configuration of leader election diff --git a/pkg/apis/componentconfig/v1alpha1/types.go b/pkg/apis/componentconfig/v1alpha1/types.go index 7ef07c29fb8..82a5939da3f 100644 --- a/pkg/apis/componentconfig/v1alpha1/types.go +++ b/pkg/apis/componentconfig/v1alpha1/types.go @@ -134,6 +134,16 @@ type KubeSchedulerConfiguration struct { LockObjectNamespace string `json:"lockObjectNamespace"` // LockObjectName defines the lock object name LockObjectName string `json:"lockObjectName"` + // PolicyConfigMapName is the name of the ConfigMap object that specifies + // the scheduler's policy config. If UseLegacyPolicyConfig is true, scheduler + // uses PolicyConfigFile. If UseLegacyPolicyConfig is false and + // PolicyConfigMapName is not empty, the ConfigMap object with this name must + // exist in the default system namespace ("kube-system") before scheduler + // initialization. + PolicyConfigMapName string `json:"policyConfigMapName"` + // UseLegacyPolicyConfig tells the scheduler to ignore Policy ConfigMap and + // to use PolicyConfigFile if available. + UseLegacyPolicyConfig bool `json:"useLegacyPolicyConfig"` } // HairpinMode denotes how the kubelet should configure networking to handle diff --git a/pkg/generated/openapi/zz_generated.openapi.go b/pkg/generated/openapi/zz_generated.openapi.go index 866525ffa71..2d2e88e5520 100644 --- a/pkg/generated/openapi/zz_generated.openapi.go +++ b/pkg/generated/openapi/zz_generated.openapi.go @@ -12913,6 +12913,20 @@ func GetOpenAPIDefinitions(ref openapi.ReferenceCallback) map[string]openapi.Ope Format: "", }, }, + "policyConfigMapName": { + SchemaProps: spec.SchemaProps{ + Description: "PolicyConfigMapName is the name of the ConfigMap object that specifies the scheduler's policy config. If UseLegacyPolicyConfig is true, scheduler uses PolicyConfigFile. If UseLegacyPolicyConfig is false and PolicyConfigMapName is not empty, the ConfigMap object with this name must exist in the default system namespace (\"kube-system\") before scheduler initialization.", + Type: []string{"string"}, + Format: "", + }, + }, + "useLegacyPolicyConfig": { + SchemaProps: spec.SchemaProps{ + Description: "UseLegacyPolicyConfig tells the scheduler to ignore Policy ConfigMap and to use PolicyConfigFile if available.", + Type: []string{"boolean"}, + Format: "", + }, + }, }, Required: []string{"port", "address", "algorithmProvider", "policyConfigFile", "enableProfiling", "enableContentionProfiling", "contentType", "kubeAPIQPS", "kubeAPIBurst", "schedulerName", "hardPodAffinitySymmetricWeight", "failureDomains", "leaderElection", "lockObjectNamespace", "lockObjectName"}, }, diff --git a/plugin/cmd/kube-scheduler/app/configurator.go b/plugin/cmd/kube-scheduler/app/configurator.go index d2900f93bc1..6512928a539 100644 --- a/plugin/cmd/kube-scheduler/app/configurator.go +++ b/plugin/cmd/kube-scheduler/app/configurator.go @@ -26,6 +26,7 @@ import ( extensionsinformers "k8s.io/kubernetes/pkg/client/informers/informers_generated/externalversions/extensions/v1beta1" "k8s.io/kubernetes/plugin/cmd/kube-scheduler/app/options" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" v1core "k8s.io/client-go/kubernetes/typed/core/v1" @@ -71,8 +72,8 @@ func createClient(s *options.SchedulerServer) (*clientset.Clientset, error) { return cli, nil } -// createScheduler encapsulates the entire creation of a runnable scheduler. -func createScheduler( +// CreateScheduler encapsulates the entire creation of a runnable scheduler. +func CreateScheduler( s *options.SchedulerServer, kubecli *clientset.Clientset, nodeInformer coreinformers.NodeInformer, @@ -101,38 +102,88 @@ func createScheduler( configurator = &schedulerConfigurator{ configurator, s.PolicyConfigFile, - s.AlgorithmProvider} + s.AlgorithmProvider, + s.PolicyConfigMapName, + s.UseLegacyPolicyConfig, + } return scheduler.NewFromConfigurator(configurator, func(cfg *scheduler.Config) { cfg.Recorder = recorder }) } -// schedulerConfigurator is an interface wrapper that provides default Configuration creation based on user -// provided config file. +// schedulerConfigurator is an interface wrapper that provides a way to create +// a scheduler from a user provided config file or ConfigMap object. type schedulerConfigurator struct { scheduler.Configurator - policyFile string - algorithmProvider string + policyFile string + algorithmProvider string + policyConfigMap string + useLegacyPolicyConfig bool } -// Create implements the interface for the Configurator, hence it is exported even through the struct is not. +// getSchedulerPolicyConfig finds and decodes scheduler's policy config. If no +// such policy is found, it returns nil, nil. +func (sc schedulerConfigurator) getSchedulerPolicyConfig() (*schedulerapi.Policy, error) { + var configData []byte + var policyConfigMapFound bool + var policy schedulerapi.Policy + + // If not in legacy mode, try to find policy ConfigMap. + if !sc.useLegacyPolicyConfig && len(sc.policyConfigMap) != 0 { + policyConfigMap, err := sc.GetClient().CoreV1().ConfigMaps(metav1.NamespaceSystem).Get(sc.policyConfigMap, metav1.GetOptions{}) + if err != nil { + return nil, fmt.Errorf("Error getting scheduler policy ConfigMap: %v.", err) + } + if policyConfigMap != nil { + // We expect the first element in the Data member of the ConfigMap to + // contain the policy config. + if len(policyConfigMap.Data) != 1 { + return nil, fmt.Errorf("ConfigMap %v has %v entries in its 'Data'. It must have only one.", sc.policyConfigMap, len(policyConfigMap.Data)) + } + policyConfigMapFound = true + // This loop should iterate only once, as we have already checked the length of Data. + for _, val := range policyConfigMap.Data { + glog.V(5).Infof("Scheduler policy ConfigMap: %v", val) + configData = []byte(val) + } + } + } + + // If there we are in legacy mode or ConfigMap name is empty, try to use + // policy config file. + if !policyConfigMapFound { + if _, err := os.Stat(sc.policyFile); err != nil { + // No config file is found. + return nil, nil + } + var err error + configData, err = ioutil.ReadFile(sc.policyFile) + if err != nil { + return nil, fmt.Errorf("unable to read policy config: %v", err) + } + } + + if err := runtime.DecodeInto(latestschedulerapi.Codec, configData, &policy); err != nil { + return nil, fmt.Errorf("invalid configuration: %v", err) + } + return &policy, nil +} + +// Create implements the interface for the Configurator, hence it is exported +// even through the struct is not. func (sc schedulerConfigurator) Create() (*scheduler.Config, error) { - if _, err := os.Stat(sc.policyFile); err != nil { + policy, err := sc.getSchedulerPolicyConfig() + if err != nil { + return nil, err + } + // If no policy is found, create scheduler from algorithm provider. + if policy == nil { if sc.Configurator != nil { return sc.Configurator.CreateFromProvider(sc.algorithmProvider) } return nil, fmt.Errorf("Configurator was nil") } - // policy file is valid, try to create a configuration from it. - var policy schedulerapi.Policy - configData, err := ioutil.ReadFile(sc.policyFile) - if err != nil { - return nil, fmt.Errorf("unable to read policy config: %v", err) - } - if err := runtime.DecodeInto(latestschedulerapi.Codec, configData, &policy); err != nil { - return nil, fmt.Errorf("invalid configuration: %v", err) - } - return sc.CreateFromConfig(policy) + return sc.CreateFromConfig(*policy) } diff --git a/plugin/cmd/kube-scheduler/app/options/options.go b/plugin/cmd/kube-scheduler/app/options/options.go index eb13671f49c..f2b9a635689 100644 --- a/plugin/cmd/kube-scheduler/app/options/options.go +++ b/plugin/cmd/kube-scheduler/app/options/options.go @@ -63,7 +63,9 @@ func (s *SchedulerServer) AddFlags(fs *pflag.FlagSet) { fs.Int32Var(&s.Port, "port", s.Port, "The port that the scheduler's http service runs on") fs.StringVar(&s.Address, "address", s.Address, "The IP address to serve on (set to 0.0.0.0 for all interfaces)") fs.StringVar(&s.AlgorithmProvider, "algorithm-provider", s.AlgorithmProvider, "The scheduling algorithm provider to use, one of: "+factory.ListAlgorithmProviders()) - fs.StringVar(&s.PolicyConfigFile, "policy-config-file", s.PolicyConfigFile, "File with scheduler policy configuration") + fs.StringVar(&s.PolicyConfigFile, "policy-config-file", s.PolicyConfigFile, "File with scheduler policy configuration. This file is used if policy ConfigMap is not provided or scheduler is using legacy policy config.") + fs.StringVar(&s.PolicyConfigMapName, "policy-configmap", s.PolicyConfigMapName, "Name of the ConfigMap object that contains scheduler's policy configuration. It must exist in the system namespace before scheduler initialization if scheduler is not using legacy policy config.") + fs.BoolVar(&s.UseLegacyPolicyConfig, "use-legacy-policy-config", false, "When set to true, scheduler will ignore policy ConfigMap and uses policy config file") fs.BoolVar(&s.EnableProfiling, "profiling", true, "Enable profiling via web interface host:port/debug/pprof/") fs.BoolVar(&s.EnableContentionProfiling, "contention-profiling", false, "Enable lock contention profiling, if profiling is enabled") fs.StringVar(&s.Master, "master", s.Master, "The address of the Kubernetes API server (overrides any value in kubeconfig)") @@ -80,6 +82,5 @@ func (s *SchedulerServer) AddFlags(fs *pflag.FlagSet) { fs.StringVar(&s.FailureDomains, "failure-domains", api.DefaultFailureDomains, "Indicate the \"all topologies\" set for an empty topologyKey when it's used for PreferredDuringScheduling pod anti-affinity.") fs.MarkDeprecated("failure-domains", "Doesn't have any effect. Will be removed in future version.") leaderelection.BindFlags(&s.LeaderElection, fs) - utilfeature.DefaultFeatureGate.AddFlag(fs) } diff --git a/plugin/cmd/kube-scheduler/app/server.go b/plugin/cmd/kube-scheduler/app/server.go index fd70e7cf22e..c164a6283d5 100644 --- a/plugin/cmd/kube-scheduler/app/server.go +++ b/plugin/cmd/kube-scheduler/app/server.go @@ -73,7 +73,7 @@ func Run(s *options.SchedulerServer) error { informerFactory := informers.NewSharedInformerFactory(kubecli, 0) - sched, err := createScheduler( + sched, err := CreateScheduler( s, kubecli, informerFactory.Core().V1().Nodes(), diff --git a/test/integration/scheduler/scheduler_test.go b/test/integration/scheduler/scheduler_test.go index 7a98b3cd181..a8ec0a738bc 100644 --- a/test/integration/scheduler/scheduler_test.go +++ b/test/integration/scheduler/scheduler_test.go @@ -39,6 +39,8 @@ import ( "k8s.io/kubernetes/pkg/client/clientset_generated/clientset" informers "k8s.io/kubernetes/pkg/client/informers/informers_generated/externalversions" corelisters "k8s.io/kubernetes/pkg/client/listers/core/v1" + "k8s.io/kubernetes/plugin/cmd/kube-scheduler/app" + "k8s.io/kubernetes/plugin/cmd/kube-scheduler/app/options" "k8s.io/kubernetes/plugin/pkg/scheduler" _ "k8s.io/kubernetes/plugin/pkg/scheduler/algorithmprovider" "k8s.io/kubernetes/plugin/pkg/scheduler/factory" @@ -53,6 +55,162 @@ type nodeStateManager struct { makeUnSchedulable nodeMutationFunc } +// TestSchedulerCreationFromConfigMap verifies that scheduler can be created +// from configurations provided by a ConfigMap object. +func TestSchedulerCreationFromConfigMap(t *testing.T) { + _, s := framework.RunAMaster(nil) + defer s.Close() + + ns := framework.CreateTestingNamespace("configmap", s, t) + defer framework.DeleteTestingNamespace(ns, s, t) + + clientSet := clientset.NewForConfigOrDie(&restclient.Config{Host: s.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &api.Registry.GroupOrDie(v1.GroupName).GroupVersion}}) + informerFactory := informers.NewSharedInformerFactory(clientSet, 0) + + defer clientSet.Core().Nodes().DeleteCollection(nil, metav1.ListOptions{}) + + // Add a ConfigMap object. + configPolicyName := "scheduler-custom-policy-config" + policyConfigMap := v1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Namespace: metav1.NamespaceSystem, Name: configPolicyName}, + Data: map[string]string{ + "scheduler-policy-config.json": `{ + "kind" : "Policy", + "apiVersion" : "v1", + "predicates" : [ + {"name" : "PodFitsHostPorts"}, + {"name" : "PodFitsResources"}, + {"name" : "NoDiskConflict"}, + {"name" : "NoVolumeZoneConflict"}, + {"name" : "MatchNodeSelector"}, + {"name" : "HostName"} + ], + "priorities" : [ + {"name" : "LeastRequestedPriority", "weight" : 1}, + {"name" : "BalancedResourceAllocation", "weight" : 1}, + {"name" : "ServiceSpreadingPriority", "weight" : 1}, + {"name" : "EqualPriority", "weight" : 1} + ] + }`, + }, + } + policyConfigMap.APIVersion = api.Registry.GroupOrDie(v1.GroupName).GroupVersion.String() + clientSet.Core().ConfigMaps(metav1.NamespaceSystem).Create(&policyConfigMap) + + eventBroadcaster := record.NewBroadcaster() + eventBroadcaster.StartRecordingToSink(&clientv1core.EventSinkImpl{Interface: clientv1core.New(clientSet.Core().RESTClient()).Events("")}) + ss := options.NewSchedulerServer() + ss.HardPodAffinitySymmetricWeight = v1.DefaultHardPodAffinitySymmetricWeight + ss.PolicyConfigMapName = configPolicyName + sched, err := app.CreateScheduler(ss, clientSet, + informerFactory.Core().V1().Nodes(), + informerFactory.Core().V1().PersistentVolumes(), + informerFactory.Core().V1().PersistentVolumeClaims(), + informerFactory.Core().V1().ReplicationControllers(), + informerFactory.Extensions().V1beta1().ReplicaSets(), + informerFactory.Apps().V1beta1().StatefulSets(), + informerFactory.Core().V1().Services(), + eventBroadcaster.NewRecorder(api.Scheme, clientv1.EventSource{Component: v1.DefaultSchedulerName}), + ) + + if err != nil { + t.Fatalf("Error creating scheduler: %v", err) + } + + stop := make(chan struct{}) + informerFactory.Start(stop) + + sched.Run() + defer close(stop) + + DoTestUnschedulableNodes(t, clientSet, ns, informerFactory.Core().V1().Nodes().Lister()) +} + +// TestSchedulerCreationFromNonExistentConfigMap ensures that creation of the +// scheduler from a non-existent ConfigMap fails. +func TestSchedulerCreationFromNonExistentConfigMap(t *testing.T) { + _, s := framework.RunAMaster(nil) + defer s.Close() + + ns := framework.CreateTestingNamespace("configmap", s, t) + defer framework.DeleteTestingNamespace(ns, s, t) + + clientSet := clientset.NewForConfigOrDie(&restclient.Config{Host: s.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &api.Registry.GroupOrDie(v1.GroupName).GroupVersion}}) + defer clientSet.Core().Nodes().DeleteCollection(nil, metav1.ListOptions{}) + + informerFactory := informers.NewSharedInformerFactory(clientSet, 0) + + eventBroadcaster := record.NewBroadcaster() + eventBroadcaster.StartRecordingToSink(&clientv1core.EventSinkImpl{Interface: clientv1core.New(clientSet.Core().RESTClient()).Events("")}) + + ss := options.NewSchedulerServer() + ss.PolicyConfigMapName = "non-existent-config" + + _, err := app.CreateScheduler(ss, clientSet, + informerFactory.Core().V1().Nodes(), + informerFactory.Core().V1().PersistentVolumes(), + informerFactory.Core().V1().PersistentVolumeClaims(), + informerFactory.Core().V1().ReplicationControllers(), + informerFactory.Extensions().V1beta1().ReplicaSets(), + informerFactory.Apps().V1beta1().StatefulSets(), + informerFactory.Core().V1().Services(), + eventBroadcaster.NewRecorder(api.Scheme, clientv1.EventSource{Component: v1.DefaultSchedulerName}), + ) + + if err == nil { + t.Fatalf("Creation of scheduler didn't fail while the policy ConfigMap didn't exist.") + } + + stop := make(chan struct{}) + informerFactory.Start(stop) + + defer close(stop) +} + +// TestSchedulerCreationInLegacyMode ensures that creation of the scheduler +// works fine when legacy mode is enabled. +func TestSchedulerCreationInLegacyMode(t *testing.T) { + _, s := framework.RunAMaster(nil) + defer s.Close() + + ns := framework.CreateTestingNamespace("configmap", s, t) + defer framework.DeleteTestingNamespace(ns, s, t) + + clientSet := clientset.NewForConfigOrDie(&restclient.Config{Host: s.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &api.Registry.GroupOrDie(v1.GroupName).GroupVersion}}) + defer clientSet.Core().Nodes().DeleteCollection(nil, metav1.ListOptions{}) + + informerFactory := informers.NewSharedInformerFactory(clientSet, 0) + + eventBroadcaster := record.NewBroadcaster() + eventBroadcaster.StartRecordingToSink(&clientv1core.EventSinkImpl{Interface: clientv1core.New(clientSet.Core().RESTClient()).Events("")}) + + ss := options.NewSchedulerServer() + ss.HardPodAffinitySymmetricWeight = v1.DefaultHardPodAffinitySymmetricWeight + ss.PolicyConfigMapName = "non-existent-configmap" + ss.UseLegacyPolicyConfig = true + + sched, err := app.CreateScheduler(ss, clientSet, + informerFactory.Core().V1().Nodes(), + informerFactory.Core().V1().PersistentVolumes(), + informerFactory.Core().V1().PersistentVolumeClaims(), + informerFactory.Core().V1().ReplicationControllers(), + informerFactory.Extensions().V1beta1().ReplicaSets(), + informerFactory.Apps().V1beta1().StatefulSets(), + informerFactory.Core().V1().Services(), + eventBroadcaster.NewRecorder(api.Scheme, clientv1.EventSource{Component: v1.DefaultSchedulerName}), + ) + + if err != nil { + t.Fatalf("Creation of scheduler in legacy mode failed: %v", err) + } + + stop := make(chan struct{}) + informerFactory.Start(stop) + + sched.Run() + defer close(stop) +} + func TestUnschedulableNodes(t *testing.T) { _, s := framework.RunAMaster(nil) defer s.Close()