From fda0d07eb66951ae76aafd8c415fd29b612e67d8 Mon Sep 17 00:00:00 2001 From: Yongkun Anfernee Gui Date: Thu, 8 Mar 2018 16:32:53 -0800 Subject: [PATCH] Scheduler cache comparer A debug tool that collects resources from api server and compares it with the scheduler cache. It currently only compares the node list, but it should be easy to extend. The compare is triggered by signal USER2, by doing kill -12 ${SCHED_PID} The compare result goes to scheduler log. Towards #60860 --- pkg/scheduler/factory/BUILD | 39 +++++++- pkg/scheduler/factory/cache_comparer.go | 99 ++++++++++++++++++++ pkg/scheduler/factory/cache_comparer_test.go | 79 ++++++++++++++++ pkg/scheduler/factory/factory.go | 23 +++++ pkg/scheduler/factory/signal.go | 25 +++++ pkg/scheduler/factory/signal_windows.go | 23 +++++ pkg/scheduler/testing/fake_cache.go | 1 + 7 files changed, 288 insertions(+), 1 deletion(-) create mode 100644 pkg/scheduler/factory/cache_comparer.go create mode 100644 pkg/scheduler/factory/cache_comparer_test.go create mode 100644 pkg/scheduler/factory/signal.go create mode 100644 pkg/scheduler/factory/signal_windows.go diff --git a/pkg/scheduler/factory/BUILD b/pkg/scheduler/factory/BUILD index a4c87066f6a..05b52544892 100644 --- a/pkg/scheduler/factory/BUILD +++ b/pkg/scheduler/factory/BUILD @@ -9,9 +9,45 @@ load( go_library( name = "go_default_library", srcs = [ + "cache_comparer.go", "factory.go", "plugins.go", - ], + ] + select({ + "@io_bazel_rules_go//go/platform:android": [ + "signal.go", + ], + "@io_bazel_rules_go//go/platform:darwin": [ + "signal.go", + ], + "@io_bazel_rules_go//go/platform:dragonfly": [ + "signal.go", + ], + "@io_bazel_rules_go//go/platform:freebsd": [ + "signal.go", + ], + "@io_bazel_rules_go//go/platform:linux": [ + "signal.go", + ], + "@io_bazel_rules_go//go/platform:nacl": [ + "signal.go", + ], + "@io_bazel_rules_go//go/platform:netbsd": [ + "signal.go", + ], + "@io_bazel_rules_go//go/platform:openbsd": [ + "signal.go", + ], + "@io_bazel_rules_go//go/platform:plan9": [ + "signal.go", + ], + "@io_bazel_rules_go//go/platform:solaris": [ + "signal.go", + ], + "@io_bazel_rules_go//go/platform:windows": [ + "signal_windows.go", + ], + "//conditions:default": [], + }), importpath = "k8s.io/kubernetes/pkg/scheduler/factory", deps = [ "//pkg/api/v1/pod:go_default_library", @@ -59,6 +95,7 @@ go_library( go_test( name = "go_default_test", srcs = [ + "cache_comparer_test.go", "factory_test.go", "plugins_test.go", ], diff --git a/pkg/scheduler/factory/cache_comparer.go b/pkg/scheduler/factory/cache_comparer.go new file mode 100644 index 00000000000..3a6227af3cd --- /dev/null +++ b/pkg/scheduler/factory/cache_comparer.go @@ -0,0 +1,99 @@ +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package factory + +import ( + "sort" + "strings" + + "github.com/golang/glog" + "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/labels" + corelisters "k8s.io/client-go/listers/core/v1" + "k8s.io/kubernetes/pkg/scheduler/schedulercache" +) + +type cacheComparer struct { + nodeLister corelisters.NodeLister + podLister corelisters.PodLister + cache schedulercache.Cache + + compareStrategy +} + +func (c *cacheComparer) Compare() error { + glog.V(3).Info("cache comparer started") + defer glog.V(3).Info("cache comparer finished") + + nodes, err := c.nodeLister.List(labels.Everything()) + if err != nil { + return err + } + + snapshot := c.cache.Snapshot() + + if missed, redundant := c.CompareNodes(nodes, snapshot.Nodes); len(missed)+len(redundant) != 0 { + glog.Warningf("cache mismatch: missed nodes: %s; redundant nodes: %s", missed, redundant) + } + + return nil +} + +type compareStrategy struct { +} + +func (c compareStrategy) CompareNodes(nodes []*v1.Node, nodeinfos map[string]*schedulercache.NodeInfo) (missed, redundant []string) { + missed, redundant = []string{}, []string{} + + actual := []string{} + for _, node := range nodes { + actual = append(actual, node.Name) + } + + cached := []string{} + for nodeName := range nodeinfos { + cached = append(cached, nodeName) + } + + sort.Strings(actual) + sort.Strings(cached) + + compare := func(i, j int) int { + if i == len(actual) { + return 1 + } else if j == len(cached) { + return -1 + } + return strings.Compare(actual[i], cached[j]) + } + + for i, j := 0, 0; i < len(actual) || j < len(cached); { + switch compare(i, j) { + case 0: + i++ + j++ + case -1: + missed = append(missed, actual[i]) + i++ + case 1: + redundant = append(redundant, cached[j]) + j++ + } + } + + return +} diff --git a/pkg/scheduler/factory/cache_comparer_test.go b/pkg/scheduler/factory/cache_comparer_test.go new file mode 100644 index 00000000000..ae6622f4b35 --- /dev/null +++ b/pkg/scheduler/factory/cache_comparer_test.go @@ -0,0 +1,79 @@ +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package factory + +import ( + "reflect" + "testing" + + "k8s.io/api/core/v1" + "k8s.io/kubernetes/pkg/scheduler/schedulercache" +) + +func TestCompareNodes(t *testing.T) { + compare := compareStrategy{} + + tests := []struct { + actual []string + cached []string + missing []string + redundant []string + }{ + { + actual: []string{"foo", "bar"}, + cached: []string{"bar", "foo", "foobar"}, + missing: []string{}, + redundant: []string{"foobar"}, + }, + { + actual: []string{"foo", "bar", "foobar"}, + cached: []string{"bar", "foo"}, + missing: []string{"foobar"}, + redundant: []string{}, + }, + { + actual: []string{"foo", "bar", "foobar"}, + cached: []string{"bar", "foobar", "foo"}, + missing: []string{}, + redundant: []string{}, + }, + } + + for _, test := range tests { + nodes := []*v1.Node{} + for _, nodeName := range test.actual { + node := &v1.Node{} + node.Name = nodeName + nodes = append(nodes, node) + } + + nodeInfo := make(map[string]*schedulercache.NodeInfo) + for _, nodeName := range test.cached { + nodeInfo[nodeName] = &schedulercache.NodeInfo{} + } + + m, r := compare.CompareNodes(nodes, nodeInfo) + + if !reflect.DeepEqual(m, test.missing) { + t.Errorf("missing expected to be %s; got %s", test.missing, m) + } + + if !reflect.DeepEqual(r, test.redundant) { + t.Errorf("redundant expected to be %s; got %s", test.redundant, r) + } + } +} diff --git a/pkg/scheduler/factory/factory.go b/pkg/scheduler/factory/factory.go index 407d5b85c12..9f089004663 100644 --- a/pkg/scheduler/factory/factory.go +++ b/pkg/scheduler/factory/factory.go @@ -20,6 +20,8 @@ package factory import ( "fmt" + "os" + "os/signal" "reflect" "time" @@ -295,6 +297,27 @@ func NewConfigFactory( c.volumeBinder = volumebinder.NewVolumeBinder(client, pvcInformer, pvInformer, nodeInformer, storageClassInformer) } + // Setup cache comparer + comparer := &cacheComparer{ + podLister: podInformer.Lister(), + nodeLister: nodeInformer.Lister(), + cache: c.schedulerCache, + } + + ch := make(chan os.Signal, 1) + signal.Notify(ch, compareSignal) + + go func() { + for { + select { + case <-c.StopEverything: + return + case <-ch: + comparer.Compare() + } + } + }() + return c } diff --git a/pkg/scheduler/factory/signal.go b/pkg/scheduler/factory/signal.go new file mode 100644 index 00000000000..8ec17048ebb --- /dev/null +++ b/pkg/scheduler/factory/signal.go @@ -0,0 +1,25 @@ +// +build !windows + +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package factory + +import "syscall" + +// compareSignal is the signal to trigger cache compare. For non-windows +// environment it's SIGUSR2. +var compareSignal = syscall.SIGUSR2 diff --git a/pkg/scheduler/factory/signal_windows.go b/pkg/scheduler/factory/signal_windows.go new file mode 100644 index 00000000000..9df239874c3 --- /dev/null +++ b/pkg/scheduler/factory/signal_windows.go @@ -0,0 +1,23 @@ +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package factory + +import "os" + +// compareSignal is the signal to trigger cache compare. For windows, +// it's SIGINT. +var compareSignal = os.Interrupt diff --git a/pkg/scheduler/testing/fake_cache.go b/pkg/scheduler/testing/fake_cache.go index 6e1143d2e70..e958a14b2f7 100644 --- a/pkg/scheduler/testing/fake_cache.go +++ b/pkg/scheduler/testing/fake_cache.go @@ -101,6 +101,7 @@ func (f *FakeCache) FilteredList(filter schedulercache.PodFilter, selector label return nil, nil } +// Snapshot is a fake method for testing func (f *FakeCache) Snapshot() *schedulercache.Snapshot { return &schedulercache.Snapshot{} }