diff --git a/hack/test-go.sh b/hack/test-go.sh
index 2406878eb02..0d0148ba30c 100755
--- a/hack/test-go.sh
+++ b/hack/test-go.sh
@@ -39,6 +39,7 @@ kube::test::find_dirs() {
-o -path './test/e2e/*' \
-o -path './test/e2e_node/*' \
-o -path './test/integration/*' \
+ -o -path './test/component/scheduler/perf/*' \
\) -prune \
\) -name '*_test.go' -print0 | xargs -0n1 dirname | sed 's|^\./||' | sort -u
)
diff --git a/test/component/scheduler/perf/README.md b/test/component/scheduler/perf/README.md
new file mode 100644
index 00000000000..edbcc13f61d
--- /dev/null
+++ b/test/component/scheduler/perf/README.md
@@ -0,0 +1,75 @@
+
+
+
+
+
+
+
+
+
+
+
PLEASE NOTE: This document applies to the HEAD of the source tree
+
+If you are using a released version of Kubernetes, you should
+refer to the docs that go with that version.
+
+
+The latest release of this document can be found
+[here](http://releases.k8s.io/release-1.1/docs/proposals/choosing-scheduler.md).
+
+Documentation for other releases can be found at
+[releases.k8s.io](http://releases.k8s.io).
+
+--
+
+
+
+
+
+Scheduler Performance Test
+======
+
+Motivation
+------
+We already have a performance testing system -- Kubemark. However, Kubemark requires setting up and bootstrapping a whole cluster, which takes a lot of time.
+
+We want to have a standard way to reproduce scheduling latency metrics result and benchmark scheduler as simple and fast as possible. We have the following goals:
+
+- Save time on testing
+ - The test and benchmark can be run in a single box.
+ We only set up components necessary to scheduling without booting up a cluster.
+- Profiling runtime metrics to find out bottleneck
+ - Write scheduler integration test but focus on performance measurement.
+ Take advantage of go profiling tools and collect fine-grained metrics,
+ like cpu-profiling, memory-profiling and block-profiling.
+- Reproduce test result easily
+ - We want to have a known place to do the performance related test for scheduler.
+ Developers should just run one script to collect all the information they need.
+
+Currently the test suite has the following:
+
+- density test (by adding a new Go test)
+ - schedule 30k pods on 1000 (fake) nodes and 3k pods on 100 (fake) nodes
+ - print out scheduling rate every second
+ - let you learn the rate changes vs number of scheduled pods
+- benchmark
+ - make use of `go test -bench` and report nanosecond/op.
+ - schedule b.N pods when the cluster has N nodes and P scheduled pods. Since it takes relatively long time to finish one round, b.N is small: 10 - 100.
+
+
+How To Run
+------
+```
+cd kubernetes/test/component/scheduler/perf
+./test-performance.sh
+```
+
+
+
+[]()
+
diff --git a/test/component/scheduler/perf/scheduler_bench_test.go b/test/component/scheduler/perf/scheduler_bench_test.go
new file mode 100644
index 00000000000..202cbca58e1
--- /dev/null
+++ b/test/component/scheduler/perf/scheduler_bench_test.go
@@ -0,0 +1,79 @@
+/*
+Copyright 2015 The Kubernetes Authors All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package benchmark
+
+import (
+ "testing"
+ "time"
+)
+
+// BenchmarkScheduling100Nodes0Pods benchmarks the scheduling rate
+// when the cluster has 100 nodes and 0 scheduled pods
+func BenchmarkScheduling100Nodes0Pods(b *testing.B) {
+ benchmarkScheduling(100, 0, b)
+}
+
+// BenchmarkScheduling100Nodes1000Pods benchmarks the scheduling rate
+// when the cluster has 100 nodes and 1000 scheduled pods
+func BenchmarkScheduling100Nodes1000Pods(b *testing.B) {
+ benchmarkScheduling(100, 1000, b)
+}
+
+// BenchmarkScheduling1000Nodes0Pods benchmarks the scheduling rate
+// when the cluster has 1000 nodes and 0 scheduled pods
+func BenchmarkScheduling1000Nodes0Pods(b *testing.B) {
+ benchmarkScheduling(1000, 0, b)
+}
+
+// BenchmarkScheduling1000Nodes1000Pods benchmarks the scheduling rate
+// when the cluster has 1000 nodes and 1000 scheduled pods
+func BenchmarkScheduling1000Nodes1000Pods(b *testing.B) {
+ benchmarkScheduling(1000, 1000, b)
+}
+
+// benchmarkScheduling benchmarks scheduling rate with specific number of nodes
+// and specific number of pods already scheduled. Since an operation takes relatively
+// long time, b.N should be small: 10 - 100.
+func benchmarkScheduling(numNodes, numScheduledPods int, b *testing.B) {
+ schedulerConfigFactory, finalFunc := mustSetupScheduler()
+ defer finalFunc()
+ c := schedulerConfigFactory.Client
+
+ makeNodes(c, numNodes)
+ makePods(c, numScheduledPods)
+ for {
+ scheduled := schedulerConfigFactory.ScheduledPodLister.Store.List()
+ if len(scheduled) >= numScheduledPods {
+ break
+ }
+ time.Sleep(1 * time.Second)
+ }
+ // start benchmark
+ b.ResetTimer()
+ makePods(c, b.N)
+ for {
+ // This can potentially affect performance of scheduler, since List() is done under mutex.
+ // TODO: Setup watch on apiserver and wait until all pods scheduled.
+ scheduled := schedulerConfigFactory.ScheduledPodLister.Store.List()
+ if len(scheduled) >= numScheduledPods+b.N {
+ break
+ }
+ // Note: This might introduce slight deviation in accuracy of benchmark results.
+ // Since the total amount of time is relatively large, it might not be a concern.
+ time.Sleep(100 * time.Millisecond)
+ }
+}
diff --git a/test/component/scheduler/perf/scheduler_test.go b/test/component/scheduler/perf/scheduler_test.go
new file mode 100644
index 00000000000..5c54851a64c
--- /dev/null
+++ b/test/component/scheduler/perf/scheduler_test.go
@@ -0,0 +1,61 @@
+/*
+Copyright 2015 The Kubernetes Authors All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package benchmark
+
+import (
+ "fmt"
+ "testing"
+ "time"
+)
+
+// TestSchedule100Node3KPods schedules 3k pods on 100 nodes.
+func TestSchedule100Node3KPods(t *testing.T) {
+ schedulePods(100, 3000)
+}
+
+// TestSchedule1000Node30KPods schedules 30k pods on 1000 nodes.
+func TestSchedule1000Node30KPods(t *testing.T) {
+ schedulePods(1000, 30000)
+}
+
+// schedulePods schedules specific number of pods on specific number of nodes.
+// This is used to learn the scheduling throughput on various
+// sizes of cluster and changes as more and more pods are scheduled.
+// It won't stop until all pods are scheduled.
+func schedulePods(numNodes, numPods int) {
+ schedulerConfigFactory, destroyFunc := mustSetupScheduler()
+ defer destroyFunc()
+ c := schedulerConfigFactory.Client
+
+ makeNodes(c, numNodes)
+ makePods(c, numPods)
+
+ prev := 0
+ start := time.Now()
+ for {
+ // This can potentially affect performance of scheduler, since List() is done under mutex.
+ // Listing 10000 pods is an expensive operation, so running it frequently may impact scheduler.
+ // TODO: Setup watch on apiserver and wait until all pods scheduled.
+ scheduled := schedulerConfigFactory.ScheduledPodLister.Store.List()
+ fmt.Printf("%ds\trate: %d\ttotal: %d\n", time.Since(start)/time.Second, len(scheduled)-prev, len(scheduled))
+ if len(scheduled) >= numPods {
+ return
+ }
+ prev = len(scheduled)
+ time.Sleep(1 * time.Second)
+ }
+}
diff --git a/test/component/scheduler/perf/test-performance.sh b/test/component/scheduler/perf/test-performance.sh
new file mode 100755
index 00000000000..50257ecb12b
--- /dev/null
+++ b/test/component/scheduler/perf/test-performance.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+
+# Copyright 2014 The Kubernetes Authors All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -o errexit
+set -o nounset
+set -o pipefail
+
+pushd "../../../.."
+source "./hack/lib/util.sh"
+source "./hack/lib/logging.sh"
+source "./hack/lib/etcd.sh"
+popd
+
+cleanup() {
+ kube::etcd::cleanup
+ kube::log::status "performance test cleanup complete"
+}
+
+trap cleanup EXIT
+
+kube::etcd::start
+kube::log::status "performance test start"
+
+# TODO: set log-dir and prof output dir.
+DIR_BASENAME=$(basename `pwd`)
+go test -c -o "${DIR_BASENAME}.test"
+# We are using the benchmark suite to do profiling. Because it only runs a few pods and
+# theoretically it has less variance.
+"./${DIR_BASENAME}.test" -test.bench=. -test.run=xxxx -test.cpuprofile=prof.out -logtostderr=false
+kube::log::status "benchmark tests finished"
+# Running density tests. It might take a long time.
+"./${DIR_BASENAME}.test" -test.run=. -test.timeout=60m
+kube::log::status "density tests finished"
diff --git a/test/component/scheduler/perf/util.go b/test/component/scheduler/perf/util.go
new file mode 100644
index 00000000000..9a262740a8d
--- /dev/null
+++ b/test/component/scheduler/perf/util.go
@@ -0,0 +1,158 @@
+/*
+Copyright 2015 The Kubernetes Authors All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package benchmark
+
+import (
+ "net/http"
+ "net/http/httptest"
+
+ "github.com/golang/glog"
+ "k8s.io/kubernetes/pkg/api"
+ "k8s.io/kubernetes/pkg/api/resource"
+ "k8s.io/kubernetes/pkg/api/testapi"
+ "k8s.io/kubernetes/pkg/client/record"
+ client "k8s.io/kubernetes/pkg/client/unversioned"
+ "k8s.io/kubernetes/pkg/master"
+ "k8s.io/kubernetes/plugin/pkg/scheduler"
+ _ "k8s.io/kubernetes/plugin/pkg/scheduler/algorithmprovider"
+ "k8s.io/kubernetes/plugin/pkg/scheduler/factory"
+ "k8s.io/kubernetes/test/integration/framework"
+)
+
+// mustSetupScheduler starts the following components:
+// - k8s api server (a.k.a. master)
+// - scheduler
+// It returns scheduler config factory and destroyFunc which should be used to
+// remove resources after finished.
+// Notes on rate limiter:
+// - The BindPodsRateLimiter is nil, meaning no rate limits.
+// - client rate limit is set to 5000.
+func mustSetupScheduler() (schedulerConfigFactory *factory.ConfigFactory, destroyFunc func()) {
+ framework.DeleteAllEtcdKeys()
+
+ var m *master.Master
+ masterConfig := framework.NewIntegrationTestMasterConfig()
+ m = master.New(masterConfig)
+ s := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
+ m.Handler.ServeHTTP(w, req)
+ }))
+
+ c := client.NewOrDie(&client.Config{
+ Host: s.URL,
+ GroupVersion: testapi.Default.GroupVersion(),
+ QPS: 5000.0,
+ Burst: 5000,
+ })
+
+ schedulerConfigFactory = factory.NewConfigFactory(c, nil)
+ schedulerConfig, err := schedulerConfigFactory.Create()
+ if err != nil {
+ panic("Couldn't create scheduler config")
+ }
+ eventBroadcaster := record.NewBroadcaster()
+ schedulerConfig.Recorder = eventBroadcaster.NewRecorder(api.EventSource{Component: "scheduler"})
+ eventBroadcaster.StartRecordingToSink(c.Events(""))
+ scheduler.New(schedulerConfig).Run()
+
+ destroyFunc = func() {
+ glog.Infof("destroying")
+ close(schedulerConfig.StopEverything)
+ s.Close()
+ glog.Infof("destroyed")
+ }
+ return
+}
+
+func makeNodes(c client.Interface, nodeCount int) {
+ glog.Infof("making %d nodes", nodeCount)
+ baseNode := &api.Node{
+ ObjectMeta: api.ObjectMeta{
+ GenerateName: "scheduler-test-node-",
+ },
+ Spec: api.NodeSpec{
+ ExternalID: "foobar",
+ },
+ Status: api.NodeStatus{
+ Capacity: api.ResourceList{
+ api.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
+ api.ResourceCPU: resource.MustParse("4"),
+ api.ResourceMemory: resource.MustParse("32Gi"),
+ },
+ Phase: api.NodeRunning,
+ Conditions: []api.NodeCondition{
+ {Type: api.NodeReady, Status: api.ConditionTrue},
+ },
+ },
+ }
+ for i := 0; i < nodeCount; i++ {
+ if _, err := c.Nodes().Create(baseNode); err != nil {
+ panic("error creating node: " + err.Error())
+ }
+ }
+}
+
+// makePods will setup specified number of scheduled pods.
+// Currently it goes through scheduling path and it's very slow to setup large number of pods.
+// TODO: Setup pods evenly on all nodes and quickly/non-linearly.
+func makePods(c client.Interface, podCount int) {
+ glog.Infof("making %d pods", podCount)
+ basePod := &api.Pod{
+ ObjectMeta: api.ObjectMeta{
+ GenerateName: "scheduler-test-pod-",
+ },
+ Spec: api.PodSpec{
+ Containers: []api.Container{{
+ Name: "pause",
+ Image: "gcr.io/google_containers/pause:1.0",
+ Resources: api.ResourceRequirements{
+ Limits: api.ResourceList{
+ api.ResourceCPU: resource.MustParse("100m"),
+ api.ResourceMemory: resource.MustParse("500Mi"),
+ },
+ Requests: api.ResourceList{
+ api.ResourceCPU: resource.MustParse("100m"),
+ api.ResourceMemory: resource.MustParse("500Mi"),
+ },
+ },
+ }},
+ },
+ }
+ threads := 30
+ remaining := make(chan int, 1000)
+ go func() {
+ for i := 0; i < podCount; i++ {
+ remaining <- i
+ }
+ close(remaining)
+ }()
+ for i := 0; i < threads; i++ {
+ go func() {
+ for {
+ _, ok := <-remaining
+ if !ok {
+ return
+ }
+ for {
+ _, err := c.Pods("default").Create(basePod)
+ if err == nil {
+ break
+ }
+ }
+ }
+ }()
+ }
+}
diff --git a/test/integration/scheduler_test.go b/test/integration/scheduler_test.go
index bdbf8a3a009..d7c7b60530f 100644
--- a/test/integration/scheduler_test.go
+++ b/test/integration/scheduler_test.go
@@ -24,7 +24,6 @@ import (
"fmt"
"net/http"
"net/http/httptest"
- "sync"
"testing"
"time"
@@ -274,133 +273,3 @@ func DoTestUnschedulableNodes(t *testing.T, restClient *client.Client, nodeStore
}
}
}
-
-func BenchmarkScheduling(b *testing.B) {
- framework.DeleteAllEtcdKeys()
-
- var m *master.Master
- s := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
- m.Handler.ServeHTTP(w, req)
- }))
- defer s.Close()
-
- masterConfig := framework.NewIntegrationTestMasterConfig()
- m = master.New(masterConfig)
-
- c := client.NewOrDie(&client.Config{
- Host: s.URL,
- GroupVersion: testapi.Default.GroupVersion(),
- QPS: 5000.0,
- Burst: 5000,
- })
-
- schedulerConfigFactory := factory.NewConfigFactory(c, nil)
- schedulerConfig, err := schedulerConfigFactory.Create()
- if err != nil {
- b.Fatalf("Couldn't create scheduler config: %v", err)
- }
- eventBroadcaster := record.NewBroadcaster()
- schedulerConfig.Recorder = eventBroadcaster.NewRecorder(api.EventSource{Component: "scheduler"})
- eventBroadcaster.StartRecordingToSink(c.Events(""))
- scheduler.New(schedulerConfig).Run()
-
- defer close(schedulerConfig.StopEverything)
-
- makeNNodes(c, 1000)
- N := b.N
- b.ResetTimer()
- makeNPods(c, N)
- for {
- objs := schedulerConfigFactory.ScheduledPodLister.Store.List()
- if len(objs) >= N {
- fmt.Printf("%v pods scheduled.\n", len(objs))
- /* // To prove that this actually works:
- for _, o := range objs {
- fmt.Printf("%s\n", o.(*api.Pod).Spec.NodeName)
- }
- */
- break
- }
- time.Sleep(time.Millisecond)
- }
- b.StopTimer()
-}
-
-func makeNNodes(c client.Interface, N int) {
- baseNode := &api.Node{
- ObjectMeta: api.ObjectMeta{
- GenerateName: "scheduler-test-node-",
- },
- Spec: api.NodeSpec{
- ExternalID: "foobar",
- },
- Status: api.NodeStatus{
- Capacity: api.ResourceList{
- api.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
- api.ResourceCPU: resource.MustParse("4"),
- api.ResourceMemory: resource.MustParse("32Gi"),
- },
- Phase: api.NodeRunning,
- Conditions: []api.NodeCondition{
- {Type: api.NodeReady, Status: api.ConditionTrue},
- },
- },
- }
- for i := 0; i < N; i++ {
- if _, err := c.Nodes().Create(baseNode); err != nil {
- panic("error creating node: " + err.Error())
- }
- }
-}
-
-func makeNPods(c client.Interface, N int) {
- basePod := &api.Pod{
- ObjectMeta: api.ObjectMeta{
- GenerateName: "scheduler-test-pod-",
- },
- Spec: api.PodSpec{
- Containers: []api.Container{{
- Name: "pause",
- Image: "gcr.io/google_containers/pause:1.0",
- Resources: api.ResourceRequirements{
- Limits: api.ResourceList{
- api.ResourceCPU: resource.MustParse("100m"),
- api.ResourceMemory: resource.MustParse("500Mi"),
- },
- Requests: api.ResourceList{
- api.ResourceCPU: resource.MustParse("100m"),
- api.ResourceMemory: resource.MustParse("500Mi"),
- },
- },
- }},
- },
- }
- wg := sync.WaitGroup{}
- threads := 30
- wg.Add(threads)
- remaining := make(chan int, N)
- go func() {
- for i := 0; i < N; i++ {
- remaining <- i
- }
- close(remaining)
- }()
- for i := 0; i < threads; i++ {
- go func() {
- defer wg.Done()
- for {
- _, ok := <-remaining
- if !ok {
- return
- }
- for {
- _, err := c.Pods("default").Create(basePod)
- if err == nil {
- break
- }
- }
- }
- }()
- }
- wg.Wait()
-}