diff --git a/cmd/kubelet/app/server.go b/cmd/kubelet/app/server.go index 374677b5937..c03cf47e39b 100644 --- a/cmd/kubelet/app/server.go +++ b/cmd/kubelet/app/server.go @@ -108,6 +108,7 @@ type KubeletServer struct { ContainerRuntime string DockerDaemonContainer string ConfigureCBR0 bool + MaxPods int // Flags intended for testing @@ -226,6 +227,7 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) { fs.StringVar(&s.ContainerRuntime, "container_runtime", s.ContainerRuntime, "The container runtime to use. Possible values: 'docker', 'rkt'. Default: 'docker'.") fs.StringVar(&s.DockerDaemonContainer, "docker-daemon-container", s.DockerDaemonContainer, "Optional resource-only container in which to place the Docker Daemon. Empty for no container (Default: /docker-daemon).") fs.BoolVar(&s.ConfigureCBR0, "configure-cbr0", s.ConfigureCBR0, "If true, kubelet will configure cbr0 based on Node.Spec.PodCIDR.") + fs.IntVar(&s.MaxPods, "max_pods", 100, "Number of Pods that can run on this Kubelet.") // Flags intended for testing, not recommended used in production environments. fs.BoolVar(&s.ReallyCrashForTesting, "really-crash-for-testing", s.ReallyCrashForTesting, "If true, when panics occur crash. Intended for testing.") @@ -342,6 +344,7 @@ func (s *KubeletServer) Run(_ []string) error { Mounter: mounter, DockerDaemonContainer: s.DockerDaemonContainer, ConfigureCBR0: s.ConfigureCBR0, + MaxPods: s.MaxPods, } RunKubelet(&kcfg, nil) @@ -500,6 +503,7 @@ func SimpleKubelet(client *client.Client, ContainerRuntime: "docker", Mounter: mount.New(), DockerDaemonContainer: "/docker-daemon", + MaxPods: 32, } return &kcfg } @@ -633,6 +637,7 @@ type KubeletConfig struct { Mounter mount.Interface DockerDaemonContainer string ConfigureCBR0 bool + MaxPods int } func createAndInitKubelet(kc *KubeletConfig) (k KubeletBootstrap, pc *config.PodConfig, err error) { @@ -683,7 +688,8 @@ func createAndInitKubelet(kc *KubeletConfig) (k KubeletBootstrap, pc *config.Pod kc.ContainerRuntime, kc.Mounter, kc.DockerDaemonContainer, - kc.ConfigureCBR0) + kc.ConfigureCBR0, + kc.MaxPods) if err != nil { return nil, nil, err diff --git a/pkg/api/resource_helpers.go b/pkg/api/resource_helpers.go index faf58c73f69..ac0a870d6d9 100644 --- a/pkg/api/resource_helpers.go +++ b/pkg/api/resource_helpers.go @@ -41,6 +41,13 @@ func (self *ResourceList) Memory() *resource.Quantity { return &resource.Quantity{} } +func (self *ResourceList) MaxPods() *resource.Quantity { + if val, ok := (*self)[ResourceMaxPods]; ok { + return &val + } + return &resource.Quantity{} +} + func GetContainerStatus(statuses []ContainerStatus, name string) (ContainerStatus, bool) { for i := range statuses { if statuses[i].Name == name { diff --git a/pkg/api/types.go b/pkg/api/types.go index 19d11062cf5..50328bd30a9 100644 --- a/pkg/api/types.go +++ b/pkg/api/types.go @@ -1238,6 +1238,8 @@ const ( ResourceMemory ResourceName = "memory" // Volume size, in bytes (e,g. 5Gi = 5GiB = 5 * 1024 * 1024 * 1024) ResourceStorage ResourceName = "storage" + // Number of Pods that may be running on this Node. + ResourceMaxPods ResourceName = "maxpods" ) // ResourceList is a set of (resource name, quantity) pairs. diff --git a/pkg/cloudprovider/nodecontroller/nodecontroller.go b/pkg/cloudprovider/nodecontroller/nodecontroller.go index 005db108450..235b36c5a18 100644 --- a/pkg/cloudprovider/nodecontroller/nodecontroller.go +++ b/pkg/cloudprovider/nodecontroller/nodecontroller.go @@ -27,6 +27,7 @@ import ( "github.com/GoogleCloudPlatform/kubernetes/pkg/api" apierrors "github.com/GoogleCloudPlatform/kubernetes/pkg/api/errors" + "github.com/GoogleCloudPlatform/kubernetes/pkg/api/resource" "github.com/GoogleCloudPlatform/kubernetes/pkg/client" "github.com/GoogleCloudPlatform/kubernetes/pkg/client/record" "github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider" @@ -670,6 +671,9 @@ func (nc *NodeController) getCloudNodesWithSpec() (*api.NodeList, error) { } if resources != nil { node.Status.Capacity = resources.Capacity + if node.Status.Capacity != nil { + node.Status.Capacity[api.ResourceMaxPods] = *resource.NewQuantity(0, resource.DecimalSI) + } } instanceID, err := instances.ExternalID(node.Name) if err != nil { diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 60014070826..205a4de3e93 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -31,6 +31,7 @@ import ( "time" "github.com/GoogleCloudPlatform/kubernetes/pkg/api" + "github.com/GoogleCloudPlatform/kubernetes/pkg/api/resource" "github.com/GoogleCloudPlatform/kubernetes/pkg/api/validation" "github.com/GoogleCloudPlatform/kubernetes/pkg/client" "github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache" @@ -57,6 +58,7 @@ import ( "github.com/GoogleCloudPlatform/kubernetes/pkg/watch" "github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/algorithm/predicates" "github.com/golang/glog" + cadvisorApi "github.com/google/cadvisor/info/v1" ) @@ -139,7 +141,8 @@ func NewMainKubelet( containerRuntime string, mounter mount.Interface, dockerDaemonContainer string, - configureCBR0 bool) (*Kubelet, error) { + configureCBR0 bool, + maxPods int) (*Kubelet, error) { if rootDirectory == "" { return nil, fmt.Errorf("invalid root directory %q", rootDirectory) } @@ -246,6 +249,7 @@ func NewMainKubelet( cgroupRoot: cgroupRoot, mounter: mounter, configureCBR0: configureCBR0, + maxPods: maxPods, } if plug, err := network.InitNetworkPlugin(networkPlugins, networkPluginName, &networkHost{klet}); err != nil { @@ -462,6 +466,9 @@ type Kubelet struct { // Whether or not kubelet should take responsibility for keeping cbr0 in // the correct state. configureCBR0 bool + + // Number of Pods which can be run by this Kubelet + maxPods int } // getRootDir returns the full path to the directory under which kubelet can @@ -1673,6 +1680,8 @@ func (kl *Kubelet) tryUpdateNodeStatus() error { node.Status.NodeInfo.MachineID = info.MachineID node.Status.NodeInfo.SystemUUID = info.SystemUUID node.Status.Capacity = CapacityFromMachineInfo(info) + node.Status.Capacity[api.ResourceMaxPods] = *resource.NewQuantity( + int64(kl.maxPods), resource.DecimalSI) if node.Status.NodeInfo.BootID != "" && node.Status.NodeInfo.BootID != info.BootID { // TODO: This requires a transaction, either both node status is updated diff --git a/pkg/kubelet/kubelet_test.go b/pkg/kubelet/kubelet_test.go index 102a8664cc1..edaa8092d98 100644 --- a/pkg/kubelet/kubelet_test.go +++ b/pkg/kubelet/kubelet_test.go @@ -3310,8 +3310,9 @@ func TestUpdateNewNodeStatus(t *testing.T) { KubeProxyVersion: version.Get().String(), }, Capacity: api.ResourceList{ - api.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), - api.ResourceMemory: *resource.NewQuantity(1024, resource.BinarySI), + api.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), + api.ResourceMemory: *resource.NewQuantity(1024, resource.BinarySI), + api.ResourceMaxPods: *resource.NewQuantity(0, resource.DecimalSI), }, }, } @@ -3359,8 +3360,9 @@ func TestUpdateExistingNodeStatus(t *testing.T) { }, }, Capacity: api.ResourceList{ - api.ResourceCPU: *resource.NewMilliQuantity(3000, resource.DecimalSI), - api.ResourceMemory: *resource.NewQuantity(2048, resource.BinarySI), + api.ResourceCPU: *resource.NewMilliQuantity(3000, resource.DecimalSI), + api.ResourceMemory: *resource.NewQuantity(2048, resource.BinarySI), + api.ResourceMaxPods: *resource.NewQuantity(0, resource.DecimalSI), }, }, }, @@ -3404,8 +3406,9 @@ func TestUpdateExistingNodeStatus(t *testing.T) { KubeProxyVersion: version.Get().String(), }, Capacity: api.ResourceList{ - api.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), - api.ResourceMemory: *resource.NewQuantity(1024, resource.BinarySI), + api.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), + api.ResourceMemory: *resource.NewQuantity(1024, resource.BinarySI), + api.ResourceMaxPods: *resource.NewQuantity(0, resource.DecimalSI), }, }, } @@ -3488,8 +3491,9 @@ func TestUpdateNodeStatusWithoutContainerRuntime(t *testing.T) { KubeProxyVersion: version.Get().String(), }, Capacity: api.ResourceList{ - api.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), - api.ResourceMemory: *resource.NewQuantity(1024, resource.BinarySI), + api.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), + api.ResourceMemory: *resource.NewQuantity(1024, resource.BinarySI), + api.ResourceMaxPods: *resource.NewQuantity(0, resource.DecimalSI), }, }, } diff --git a/plugin/pkg/scheduler/algorithm/predicates/predicates.go b/plugin/pkg/scheduler/algorithm/predicates/predicates.go index 7eb3252985d..e41152d18b5 100644 --- a/plugin/pkg/scheduler/algorithm/predicates/predicates.go +++ b/plugin/pkg/scheduler/algorithm/predicates/predicates.go @@ -137,19 +137,18 @@ func CheckPodsExceedingCapacity(pods []*api.Pod, capacity api.ResourceList) (fit // PodFitsResources calculates fit based on requested, rather than used resources func (r *ResourceFit) PodFitsResources(pod *api.Pod, existingPods []*api.Pod, node string) (bool, error) { podRequest := getResourceRequest(pod) - if podRequest.milliCPU == 0 && podRequest.memory == 0 { - // no resources requested always fits. - return true, nil - } info, err := r.info.GetNodeInfo(node) if err != nil { return false, err } + if podRequest.milliCPU == 0 && podRequest.memory == 0 { + return int64(len(existingPods)) < info.Status.Capacity.MaxPods().Value(), nil + } pods := []*api.Pod{} copy(pods, existingPods) pods = append(existingPods, pod) _, exceeding := CheckPodsExceedingCapacity(pods, info.Status.Capacity) - if len(exceeding) > 0 { + if len(exceeding) > 0 || int64(len(pods)) > info.Status.Capacity.MaxPods().Value() { return false, nil } return true, nil diff --git a/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go b/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go index 129c236027b..49b45e13ad8 100644 --- a/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go +++ b/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go @@ -44,11 +44,12 @@ func (nodes FakeNodeListInfo) GetNodeInfo(nodeName string) (*api.Node, error) { return nil, fmt.Errorf("Unable to find node: %s", nodeName) } -func makeResources(milliCPU int64, memory int64) api.NodeResources { +func makeResources(milliCPU int64, memory int64, maxPods int64) api.NodeResources { return api.NodeResources{ Capacity: api.ResourceList{ - "cpu": *resource.NewMilliQuantity(milliCPU, resource.DecimalSI), - "memory": *resource.NewQuantity(memory, resource.BinarySI), + "cpu": *resource.NewMilliQuantity(milliCPU, resource.DecimalSI), + "memory": *resource.NewQuantity(memory, resource.BinarySI), + "maxpods": *resource.NewQuantity(maxPods, resource.DecimalSI), }, } } @@ -73,7 +74,8 @@ func newResourcePod(usage ...resourceRequest) *api.Pod { } func TestPodFitsResources(t *testing.T) { - tests := []struct { + + enoughPodsTests := []struct { pod *api.Pod existingPods []*api.Pod fits bool @@ -120,8 +122,53 @@ func TestPodFitsResources(t *testing.T) { test: "equal edge case", }, } - for _, test := range tests { - node := api.Node{Status: api.NodeStatus{Capacity: makeResources(10, 20).Capacity}} + + for _, test := range enoughPodsTests { + node := api.Node{Status: api.NodeStatus{Capacity: makeResources(10, 20, 32).Capacity}} + + fit := ResourceFit{FakeNodeInfo(node)} + fits, err := fit.PodFitsResources(test.pod, test.existingPods, "machine") + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if fits != test.fits { + t.Errorf("%s: expected: %v got %v", test.test, test.fits, fits) + } + } + + notEnoughPodsTests := []struct { + pod *api.Pod + existingPods []*api.Pod + fits bool + test string + }{ + { + pod: &api.Pod{}, + existingPods: []*api.Pod{ + newResourcePod(resourceRequest{milliCPU: 10, memory: 20}), + }, + fits: false, + test: "even without specified resources predicate fails when there's no available ips", + }, + { + pod: newResourcePod(resourceRequest{milliCPU: 1, memory: 1}), + existingPods: []*api.Pod{ + newResourcePod(resourceRequest{milliCPU: 5, memory: 5}), + }, + fits: false, + test: "even if both resources fit predicate fails when there's no available ips", + }, + { + pod: newResourcePod(resourceRequest{milliCPU: 5, memory: 1}), + existingPods: []*api.Pod{ + newResourcePod(resourceRequest{milliCPU: 5, memory: 19}), + }, + fits: false, + test: "even for equal edge case predicate fails when there's no available ips", + }, + } + for _, test := range notEnoughPodsTests { + node := api.Node{Status: api.NodeStatus{Capacity: makeResources(10, 20, 1).Capacity}} fit := ResourceFit{FakeNodeInfo(node)} fits, err := fit.PodFitsResources(test.pod, test.existingPods, "machine") diff --git a/test/integration/scheduler_test.go b/test/integration/scheduler_test.go index be8681ce4d1..7a1a49fb003 100644 --- a/test/integration/scheduler_test.go +++ b/test/integration/scheduler_test.go @@ -29,6 +29,7 @@ import ( "github.com/GoogleCloudPlatform/kubernetes/pkg/api" "github.com/GoogleCloudPlatform/kubernetes/pkg/api/errors" + "github.com/GoogleCloudPlatform/kubernetes/pkg/api/resource" "github.com/GoogleCloudPlatform/kubernetes/pkg/api/testapi" "github.com/GoogleCloudPlatform/kubernetes/pkg/apiserver" "github.com/GoogleCloudPlatform/kubernetes/pkg/client" @@ -144,6 +145,9 @@ func DoTestUnschedulableNodes(t *testing.T, restClient *client.Client, nodeStore ObjectMeta: api.ObjectMeta{Name: "node-scheduling-test-node"}, Spec: api.NodeSpec{Unschedulable: false}, Status: api.NodeStatus{ + Capacity: api.ResourceList{ + "maxpods": *resource.NewQuantity(32, resource.DecimalSI), + }, Conditions: []api.NodeCondition{goodCondition}, }, } @@ -194,6 +198,9 @@ func DoTestUnschedulableNodes(t *testing.T, restClient *client.Client, nodeStore { makeUnSchedulable: func(t *testing.T, n *api.Node, s cache.Store, c *client.Client) { n.Status = api.NodeStatus{ + Capacity: api.ResourceList{ + "maxpods": *resource.NewQuantity(32, resource.DecimalSI), + }, Conditions: []api.NodeCondition{badCondition}, } if _, err = c.Nodes().UpdateStatus(n); err != nil { @@ -208,6 +215,9 @@ func DoTestUnschedulableNodes(t *testing.T, restClient *client.Client, nodeStore }, makeSchedulable: func(t *testing.T, n *api.Node, s cache.Store, c *client.Client) { n.Status = api.NodeStatus{ + Capacity: api.ResourceList{ + "maxpods": *resource.NewQuantity(32, resource.DecimalSI), + }, Conditions: []api.NodeCondition{goodCondition}, } if _, err = c.Nodes().UpdateStatus(n); err != nil {