diff --git a/hack/test-go.sh b/hack/test-go.sh index 2406878eb02..0d0148ba30c 100755 --- a/hack/test-go.sh +++ b/hack/test-go.sh @@ -39,6 +39,7 @@ kube::test::find_dirs() { -o -path './test/e2e/*' \ -o -path './test/e2e_node/*' \ -o -path './test/integration/*' \ + -o -path './test/component/scheduler/perf/*' \ \) -prune \ \) -name '*_test.go' -print0 | xargs -0n1 dirname | sed 's|^\./||' | sort -u ) diff --git a/test/component/scheduler/perf/README.md b/test/component/scheduler/perf/README.md new file mode 100644 index 00000000000..edbcc13f61d --- /dev/null +++ b/test/component/scheduler/perf/README.md @@ -0,0 +1,75 @@ + + + + +WARNING +WARNING +WARNING +WARNING +WARNING + +

PLEASE NOTE: This document applies to the HEAD of the source tree

+ +If you are using a released version of Kubernetes, you should +refer to the docs that go with that version. + + +The latest release of this document can be found +[here](http://releases.k8s.io/release-1.1/docs/proposals/choosing-scheduler.md). + +Documentation for other releases can be found at +[releases.k8s.io](http://releases.k8s.io). + +-- + + + + + +Scheduler Performance Test +====== + +Motivation +------ +We already have a performance testing system -- Kubemark. However, Kubemark requires setting up and bootstrapping a whole cluster, which takes a lot of time. + +We want to have a standard way to reproduce scheduling latency metrics result and benchmark scheduler as simple and fast as possible. We have the following goals: + +- Save time on testing + - The test and benchmark can be run in a single box. + We only set up components necessary to scheduling without booting up a cluster. +- Profiling runtime metrics to find out bottleneck + - Write scheduler integration test but focus on performance measurement. + Take advantage of go profiling tools and collect fine-grained metrics, + like cpu-profiling, memory-profiling and block-profiling. +- Reproduce test result easily + - We want to have a known place to do the performance related test for scheduler. + Developers should just run one script to collect all the information they need. + +Currently the test suite has the following: + +- density test (by adding a new Go test) + - schedule 30k pods on 1000 (fake) nodes and 3k pods on 100 (fake) nodes + - print out scheduling rate every second + - let you learn the rate changes vs number of scheduled pods +- benchmark + - make use of `go test -bench` and report nanosecond/op. + - schedule b.N pods when the cluster has N nodes and P scheduled pods. Since it takes relatively long time to finish one round, b.N is small: 10 - 100. + + +How To Run +------ +``` +cd kubernetes/test/component/scheduler/perf +./test-performance.sh +``` + + + +[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/test/component/scheduler/perf/README.md?pixel)]() + diff --git a/test/component/scheduler/perf/scheduler_bench_test.go b/test/component/scheduler/perf/scheduler_bench_test.go new file mode 100644 index 00000000000..202cbca58e1 --- /dev/null +++ b/test/component/scheduler/perf/scheduler_bench_test.go @@ -0,0 +1,79 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package benchmark + +import ( + "testing" + "time" +) + +// BenchmarkScheduling100Nodes0Pods benchmarks the scheduling rate +// when the cluster has 100 nodes and 0 scheduled pods +func BenchmarkScheduling100Nodes0Pods(b *testing.B) { + benchmarkScheduling(100, 0, b) +} + +// BenchmarkScheduling100Nodes1000Pods benchmarks the scheduling rate +// when the cluster has 100 nodes and 1000 scheduled pods +func BenchmarkScheduling100Nodes1000Pods(b *testing.B) { + benchmarkScheduling(100, 1000, b) +} + +// BenchmarkScheduling1000Nodes0Pods benchmarks the scheduling rate +// when the cluster has 1000 nodes and 0 scheduled pods +func BenchmarkScheduling1000Nodes0Pods(b *testing.B) { + benchmarkScheduling(1000, 0, b) +} + +// BenchmarkScheduling1000Nodes1000Pods benchmarks the scheduling rate +// when the cluster has 1000 nodes and 1000 scheduled pods +func BenchmarkScheduling1000Nodes1000Pods(b *testing.B) { + benchmarkScheduling(1000, 1000, b) +} + +// benchmarkScheduling benchmarks scheduling rate with specific number of nodes +// and specific number of pods already scheduled. Since an operation takes relatively +// long time, b.N should be small: 10 - 100. +func benchmarkScheduling(numNodes, numScheduledPods int, b *testing.B) { + schedulerConfigFactory, finalFunc := mustSetupScheduler() + defer finalFunc() + c := schedulerConfigFactory.Client + + makeNodes(c, numNodes) + makePods(c, numScheduledPods) + for { + scheduled := schedulerConfigFactory.ScheduledPodLister.Store.List() + if len(scheduled) >= numScheduledPods { + break + } + time.Sleep(1 * time.Second) + } + // start benchmark + b.ResetTimer() + makePods(c, b.N) + for { + // This can potentially affect performance of scheduler, since List() is done under mutex. + // TODO: Setup watch on apiserver and wait until all pods scheduled. + scheduled := schedulerConfigFactory.ScheduledPodLister.Store.List() + if len(scheduled) >= numScheduledPods+b.N { + break + } + // Note: This might introduce slight deviation in accuracy of benchmark results. + // Since the total amount of time is relatively large, it might not be a concern. + time.Sleep(100 * time.Millisecond) + } +} diff --git a/test/component/scheduler/perf/scheduler_test.go b/test/component/scheduler/perf/scheduler_test.go new file mode 100644 index 00000000000..5c54851a64c --- /dev/null +++ b/test/component/scheduler/perf/scheduler_test.go @@ -0,0 +1,61 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package benchmark + +import ( + "fmt" + "testing" + "time" +) + +// TestSchedule100Node3KPods schedules 3k pods on 100 nodes. +func TestSchedule100Node3KPods(t *testing.T) { + schedulePods(100, 3000) +} + +// TestSchedule1000Node30KPods schedules 30k pods on 1000 nodes. +func TestSchedule1000Node30KPods(t *testing.T) { + schedulePods(1000, 30000) +} + +// schedulePods schedules specific number of pods on specific number of nodes. +// This is used to learn the scheduling throughput on various +// sizes of cluster and changes as more and more pods are scheduled. +// It won't stop until all pods are scheduled. +func schedulePods(numNodes, numPods int) { + schedulerConfigFactory, destroyFunc := mustSetupScheduler() + defer destroyFunc() + c := schedulerConfigFactory.Client + + makeNodes(c, numNodes) + makePods(c, numPods) + + prev := 0 + start := time.Now() + for { + // This can potentially affect performance of scheduler, since List() is done under mutex. + // Listing 10000 pods is an expensive operation, so running it frequently may impact scheduler. + // TODO: Setup watch on apiserver and wait until all pods scheduled. + scheduled := schedulerConfigFactory.ScheduledPodLister.Store.List() + fmt.Printf("%ds\trate: %d\ttotal: %d\n", time.Since(start)/time.Second, len(scheduled)-prev, len(scheduled)) + if len(scheduled) >= numPods { + return + } + prev = len(scheduled) + time.Sleep(1 * time.Second) + } +} diff --git a/test/component/scheduler/perf/test-performance.sh b/test/component/scheduler/perf/test-performance.sh new file mode 100755 index 00000000000..50257ecb12b --- /dev/null +++ b/test/component/scheduler/perf/test-performance.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash + +# Copyright 2014 The Kubernetes Authors All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -o errexit +set -o nounset +set -o pipefail + +pushd "../../../.." +source "./hack/lib/util.sh" +source "./hack/lib/logging.sh" +source "./hack/lib/etcd.sh" +popd + +cleanup() { + kube::etcd::cleanup + kube::log::status "performance test cleanup complete" +} + +trap cleanup EXIT + +kube::etcd::start +kube::log::status "performance test start" + +# TODO: set log-dir and prof output dir. +DIR_BASENAME=$(basename `pwd`) +go test -c -o "${DIR_BASENAME}.test" +# We are using the benchmark suite to do profiling. Because it only runs a few pods and +# theoretically it has less variance. +"./${DIR_BASENAME}.test" -test.bench=. -test.run=xxxx -test.cpuprofile=prof.out -logtostderr=false +kube::log::status "benchmark tests finished" +# Running density tests. It might take a long time. +"./${DIR_BASENAME}.test" -test.run=. -test.timeout=60m +kube::log::status "density tests finished" diff --git a/test/component/scheduler/perf/util.go b/test/component/scheduler/perf/util.go new file mode 100644 index 00000000000..9a262740a8d --- /dev/null +++ b/test/component/scheduler/perf/util.go @@ -0,0 +1,158 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package benchmark + +import ( + "net/http" + "net/http/httptest" + + "github.com/golang/glog" + "k8s.io/kubernetes/pkg/api" + "k8s.io/kubernetes/pkg/api/resource" + "k8s.io/kubernetes/pkg/api/testapi" + "k8s.io/kubernetes/pkg/client/record" + client "k8s.io/kubernetes/pkg/client/unversioned" + "k8s.io/kubernetes/pkg/master" + "k8s.io/kubernetes/plugin/pkg/scheduler" + _ "k8s.io/kubernetes/plugin/pkg/scheduler/algorithmprovider" + "k8s.io/kubernetes/plugin/pkg/scheduler/factory" + "k8s.io/kubernetes/test/integration/framework" +) + +// mustSetupScheduler starts the following components: +// - k8s api server (a.k.a. master) +// - scheduler +// It returns scheduler config factory and destroyFunc which should be used to +// remove resources after finished. +// Notes on rate limiter: +// - The BindPodsRateLimiter is nil, meaning no rate limits. +// - client rate limit is set to 5000. +func mustSetupScheduler() (schedulerConfigFactory *factory.ConfigFactory, destroyFunc func()) { + framework.DeleteAllEtcdKeys() + + var m *master.Master + masterConfig := framework.NewIntegrationTestMasterConfig() + m = master.New(masterConfig) + s := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { + m.Handler.ServeHTTP(w, req) + })) + + c := client.NewOrDie(&client.Config{ + Host: s.URL, + GroupVersion: testapi.Default.GroupVersion(), + QPS: 5000.0, + Burst: 5000, + }) + + schedulerConfigFactory = factory.NewConfigFactory(c, nil) + schedulerConfig, err := schedulerConfigFactory.Create() + if err != nil { + panic("Couldn't create scheduler config") + } + eventBroadcaster := record.NewBroadcaster() + schedulerConfig.Recorder = eventBroadcaster.NewRecorder(api.EventSource{Component: "scheduler"}) + eventBroadcaster.StartRecordingToSink(c.Events("")) + scheduler.New(schedulerConfig).Run() + + destroyFunc = func() { + glog.Infof("destroying") + close(schedulerConfig.StopEverything) + s.Close() + glog.Infof("destroyed") + } + return +} + +func makeNodes(c client.Interface, nodeCount int) { + glog.Infof("making %d nodes", nodeCount) + baseNode := &api.Node{ + ObjectMeta: api.ObjectMeta{ + GenerateName: "scheduler-test-node-", + }, + Spec: api.NodeSpec{ + ExternalID: "foobar", + }, + Status: api.NodeStatus{ + Capacity: api.ResourceList{ + api.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI), + api.ResourceCPU: resource.MustParse("4"), + api.ResourceMemory: resource.MustParse("32Gi"), + }, + Phase: api.NodeRunning, + Conditions: []api.NodeCondition{ + {Type: api.NodeReady, Status: api.ConditionTrue}, + }, + }, + } + for i := 0; i < nodeCount; i++ { + if _, err := c.Nodes().Create(baseNode); err != nil { + panic("error creating node: " + err.Error()) + } + } +} + +// makePods will setup specified number of scheduled pods. +// Currently it goes through scheduling path and it's very slow to setup large number of pods. +// TODO: Setup pods evenly on all nodes and quickly/non-linearly. +func makePods(c client.Interface, podCount int) { + glog.Infof("making %d pods", podCount) + basePod := &api.Pod{ + ObjectMeta: api.ObjectMeta{ + GenerateName: "scheduler-test-pod-", + }, + Spec: api.PodSpec{ + Containers: []api.Container{{ + Name: "pause", + Image: "gcr.io/google_containers/pause:1.0", + Resources: api.ResourceRequirements{ + Limits: api.ResourceList{ + api.ResourceCPU: resource.MustParse("100m"), + api.ResourceMemory: resource.MustParse("500Mi"), + }, + Requests: api.ResourceList{ + api.ResourceCPU: resource.MustParse("100m"), + api.ResourceMemory: resource.MustParse("500Mi"), + }, + }, + }}, + }, + } + threads := 30 + remaining := make(chan int, 1000) + go func() { + for i := 0; i < podCount; i++ { + remaining <- i + } + close(remaining) + }() + for i := 0; i < threads; i++ { + go func() { + for { + _, ok := <-remaining + if !ok { + return + } + for { + _, err := c.Pods("default").Create(basePod) + if err == nil { + break + } + } + } + }() + } +} diff --git a/test/integration/scheduler_test.go b/test/integration/scheduler_test.go index bdbf8a3a009..d7c7b60530f 100644 --- a/test/integration/scheduler_test.go +++ b/test/integration/scheduler_test.go @@ -24,7 +24,6 @@ import ( "fmt" "net/http" "net/http/httptest" - "sync" "testing" "time" @@ -274,133 +273,3 @@ func DoTestUnschedulableNodes(t *testing.T, restClient *client.Client, nodeStore } } } - -func BenchmarkScheduling(b *testing.B) { - framework.DeleteAllEtcdKeys() - - var m *master.Master - s := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { - m.Handler.ServeHTTP(w, req) - })) - defer s.Close() - - masterConfig := framework.NewIntegrationTestMasterConfig() - m = master.New(masterConfig) - - c := client.NewOrDie(&client.Config{ - Host: s.URL, - GroupVersion: testapi.Default.GroupVersion(), - QPS: 5000.0, - Burst: 5000, - }) - - schedulerConfigFactory := factory.NewConfigFactory(c, nil) - schedulerConfig, err := schedulerConfigFactory.Create() - if err != nil { - b.Fatalf("Couldn't create scheduler config: %v", err) - } - eventBroadcaster := record.NewBroadcaster() - schedulerConfig.Recorder = eventBroadcaster.NewRecorder(api.EventSource{Component: "scheduler"}) - eventBroadcaster.StartRecordingToSink(c.Events("")) - scheduler.New(schedulerConfig).Run() - - defer close(schedulerConfig.StopEverything) - - makeNNodes(c, 1000) - N := b.N - b.ResetTimer() - makeNPods(c, N) - for { - objs := schedulerConfigFactory.ScheduledPodLister.Store.List() - if len(objs) >= N { - fmt.Printf("%v pods scheduled.\n", len(objs)) - /* // To prove that this actually works: - for _, o := range objs { - fmt.Printf("%s\n", o.(*api.Pod).Spec.NodeName) - } - */ - break - } - time.Sleep(time.Millisecond) - } - b.StopTimer() -} - -func makeNNodes(c client.Interface, N int) { - baseNode := &api.Node{ - ObjectMeta: api.ObjectMeta{ - GenerateName: "scheduler-test-node-", - }, - Spec: api.NodeSpec{ - ExternalID: "foobar", - }, - Status: api.NodeStatus{ - Capacity: api.ResourceList{ - api.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI), - api.ResourceCPU: resource.MustParse("4"), - api.ResourceMemory: resource.MustParse("32Gi"), - }, - Phase: api.NodeRunning, - Conditions: []api.NodeCondition{ - {Type: api.NodeReady, Status: api.ConditionTrue}, - }, - }, - } - for i := 0; i < N; i++ { - if _, err := c.Nodes().Create(baseNode); err != nil { - panic("error creating node: " + err.Error()) - } - } -} - -func makeNPods(c client.Interface, N int) { - basePod := &api.Pod{ - ObjectMeta: api.ObjectMeta{ - GenerateName: "scheduler-test-pod-", - }, - Spec: api.PodSpec{ - Containers: []api.Container{{ - Name: "pause", - Image: "gcr.io/google_containers/pause:1.0", - Resources: api.ResourceRequirements{ - Limits: api.ResourceList{ - api.ResourceCPU: resource.MustParse("100m"), - api.ResourceMemory: resource.MustParse("500Mi"), - }, - Requests: api.ResourceList{ - api.ResourceCPU: resource.MustParse("100m"), - api.ResourceMemory: resource.MustParse("500Mi"), - }, - }, - }}, - }, - } - wg := sync.WaitGroup{} - threads := 30 - wg.Add(threads) - remaining := make(chan int, N) - go func() { - for i := 0; i < N; i++ { - remaining <- i - } - close(remaining) - }() - for i := 0; i < threads; i++ { - go func() { - defer wg.Done() - for { - _, ok := <-remaining - if !ok { - return - } - for { - _, err := c.Pods("default").Create(basePod) - if err == nil { - break - } - } - } - }() - } - wg.Wait() -}