mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-25 12:43:23 +00:00
Merge pull request #124926 from kerthcet/feat/sharing-waitingPods
enhancement(scheduler): share waitingPods among profiles
This commit is contained in:
commit
027f346f60
@ -364,6 +364,7 @@ func TestPostFilter(t *testing.T) {
|
||||
frameworkruntime.WithExtenders(extenders),
|
||||
frameworkruntime.WithSnapshotSharedLister(internalcache.NewSnapshot(tt.pods, tt.nodes)),
|
||||
frameworkruntime.WithLogger(logger),
|
||||
frameworkruntime.WithWaitingPods(frameworkruntime.NewWaitingPodsMap()),
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
@ -1702,6 +1703,8 @@ func TestPreempt(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
waitingPods := frameworkruntime.NewWaitingPodsMap()
|
||||
|
||||
cache := internalcache.New(ctx, time.Duration(0))
|
||||
for _, pod := range test.pods {
|
||||
cache.AddPod(logger, pod)
|
||||
@ -1745,6 +1748,7 @@ func TestPreempt(t *testing.T) {
|
||||
frameworkruntime.WithPodNominator(internalqueue.NewPodNominator(informerFactory.Core().V1().Pods().Lister())),
|
||||
frameworkruntime.WithSnapshotSharedLister(internalcache.NewSnapshot(test.pods, nodes)),
|
||||
frameworkruntime.WithInformerFactory(informerFactory),
|
||||
frameworkruntime.WithWaitingPods(waitingPods),
|
||||
frameworkruntime.WithLogger(logger),
|
||||
)
|
||||
if err != nil {
|
||||
|
@ -132,6 +132,7 @@ type frameworkOptions struct {
|
||||
extenders []framework.Extender
|
||||
captureProfile CaptureProfile
|
||||
parallelizer parallelize.Parallelizer
|
||||
waitingPods *waitingPodsMap
|
||||
logger *klog.Logger
|
||||
}
|
||||
|
||||
@ -221,6 +222,13 @@ func WithMetricsRecorder(r *metrics.MetricAsyncRecorder) Option {
|
||||
}
|
||||
}
|
||||
|
||||
// WithWaitingPods sets waitingPods for the scheduling frameworkImpl.
|
||||
func WithWaitingPods(wp *waitingPodsMap) Option {
|
||||
return func(o *frameworkOptions) {
|
||||
o.waitingPods = wp
|
||||
}
|
||||
}
|
||||
|
||||
// WithLogger overrides the default logger from k8s.io/klog.
|
||||
func WithLogger(logger klog.Logger) Option {
|
||||
return func(o *frameworkOptions) {
|
||||
@ -254,7 +262,7 @@ func NewFramework(ctx context.Context, r Registry, profile *config.KubeScheduler
|
||||
registry: r,
|
||||
snapshotSharedLister: options.snapshotSharedLister,
|
||||
scorePluginWeight: make(map[string]int),
|
||||
waitingPods: newWaitingPodsMap(),
|
||||
waitingPods: options.waitingPods,
|
||||
clientSet: options.clientSet,
|
||||
kubeConfig: options.kubeConfig,
|
||||
eventRecorder: options.eventRecorder,
|
||||
|
@ -2808,7 +2808,9 @@ func TestPermitPlugins(t *testing.T) {
|
||||
profile := config.KubeSchedulerProfile{Plugins: configPlugins}
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
f, err := newFrameworkWithQueueSortAndBind(ctx, registry, profile)
|
||||
f, err := newFrameworkWithQueueSortAndBind(ctx, registry, profile,
|
||||
WithWaitingPods(NewWaitingPodsMap()),
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("fail to create framework: %s", err)
|
||||
}
|
||||
@ -2990,7 +2992,10 @@ func TestRecordingMetrics(t *testing.T) {
|
||||
SchedulerName: testProfileName,
|
||||
Plugins: plugins,
|
||||
}
|
||||
f, err := newFrameworkWithQueueSortAndBind(ctx, r, profile, withMetricsRecorder(recorder))
|
||||
f, err := newFrameworkWithQueueSortAndBind(ctx, r, profile,
|
||||
withMetricsRecorder(recorder),
|
||||
WithWaitingPods(NewWaitingPodsMap()),
|
||||
)
|
||||
if err != nil {
|
||||
cancel()
|
||||
t.Fatalf("Failed to create framework for testing: %v", err)
|
||||
@ -3160,7 +3165,9 @@ func TestPermitWaitDurationMetric(t *testing.T) {
|
||||
profile := config.KubeSchedulerProfile{Plugins: plugins}
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
f, err := newFrameworkWithQueueSortAndBind(ctx, r, profile)
|
||||
f, err := newFrameworkWithQueueSortAndBind(ctx, r, profile,
|
||||
WithWaitingPods(NewWaitingPodsMap()),
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create framework for testing: %v", err)
|
||||
}
|
||||
@ -3216,7 +3223,9 @@ func TestWaitOnPermit(t *testing.T) {
|
||||
profile := config.KubeSchedulerProfile{Plugins: plugins}
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
f, err := newFrameworkWithQueueSortAndBind(ctx, r, profile)
|
||||
f, err := newFrameworkWithQueueSortAndBind(ctx, r, profile,
|
||||
WithWaitingPods(NewWaitingPodsMap()),
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create framework for testing: %v", err)
|
||||
}
|
||||
|
@ -32,8 +32,8 @@ type waitingPodsMap struct {
|
||||
mu sync.RWMutex
|
||||
}
|
||||
|
||||
// newWaitingPodsMap returns a new waitingPodsMap.
|
||||
func newWaitingPodsMap() *waitingPodsMap {
|
||||
// NewWaitingPodsMap returns a new waitingPodsMap.
|
||||
func NewWaitingPodsMap() *waitingPodsMap {
|
||||
return &waitingPodsMap{
|
||||
pods: make(map[types.UID]*waitingPod),
|
||||
}
|
||||
|
@ -778,7 +778,9 @@ func TestSchedulerScheduleOne(t *testing.T) {
|
||||
registerPluginFuncs,
|
||||
testSchedulerName,
|
||||
frameworkruntime.WithClientSet(client),
|
||||
frameworkruntime.WithEventRecorder(eventBroadcaster.NewRecorder(scheme.Scheme, testSchedulerName)))
|
||||
frameworkruntime.WithEventRecorder(eventBroadcaster.NewRecorder(scheme.Scheme, testSchedulerName)),
|
||||
frameworkruntime.WithWaitingPods(frameworkruntime.NewWaitingPodsMap()),
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -3539,6 +3541,7 @@ func setupTestScheduler(ctx context.Context, t *testing.T, queuedPodStore *clien
|
||||
informerFactory = informers.NewSharedInformerFactory(clientsetfake.NewSimpleClientset(), 0)
|
||||
}
|
||||
schedulingQueue := internalqueue.NewTestQueueWithInformerFactory(ctx, nil, informerFactory)
|
||||
waitingPods := frameworkruntime.NewWaitingPodsMap()
|
||||
|
||||
fwk, _ := tf.NewFramework(
|
||||
ctx,
|
||||
@ -3548,6 +3551,7 @@ func setupTestScheduler(ctx context.Context, t *testing.T, queuedPodStore *clien
|
||||
frameworkruntime.WithEventRecorder(recorder),
|
||||
frameworkruntime.WithInformerFactory(informerFactory),
|
||||
frameworkruntime.WithPodNominator(internalqueue.NewPodNominator(informerFactory.Core().V1().Pods().Lister())),
|
||||
frameworkruntime.WithWaitingPods(waitingPods),
|
||||
)
|
||||
|
||||
errChan := make(chan error, 1)
|
||||
|
@ -292,6 +292,8 @@ func New(ctx context.Context,
|
||||
|
||||
snapshot := internalcache.NewEmptySnapshot()
|
||||
metricsRecorder := metrics.NewMetricsAsyncRecorder(1000, time.Second, stopEverything)
|
||||
// waitingPods holds all the pods that are in the scheduler and waiting in the permit stage
|
||||
waitingPods := frameworkruntime.NewWaitingPodsMap()
|
||||
|
||||
profiles, err := profile.NewMap(ctx, options.profiles, registry, recorderFactory,
|
||||
frameworkruntime.WithComponentConfigVersion(options.componentConfigVersion),
|
||||
@ -303,6 +305,7 @@ func New(ctx context.Context,
|
||||
frameworkruntime.WithParallelism(int(options.parallelism)),
|
||||
frameworkruntime.WithExtenders(extenders),
|
||||
frameworkruntime.WithMetricsRecorder(metricsRecorder),
|
||||
frameworkruntime.WithWaitingPods(waitingPods),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("initializing profiles: %v", err)
|
||||
|
@ -21,13 +21,16 @@ import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
eventsv1 "k8s.io/api/events/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/client-go/informers"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
@ -437,12 +440,14 @@ func initScheduler(ctx context.Context, cache internalcache.Cache, queue interna
|
||||
tf.RegisterBindPlugin(defaultbinder.Name, defaultbinder.New),
|
||||
}
|
||||
eventBroadcaster := events.NewBroadcaster(&events.EventSinkImpl{Interface: client.EventsV1()})
|
||||
waitingPods := frameworkruntime.NewWaitingPodsMap()
|
||||
fwk, err := tf.NewFramework(ctx,
|
||||
registerPluginFuncs,
|
||||
testSchedulerName,
|
||||
frameworkruntime.WithClientSet(client),
|
||||
frameworkruntime.WithInformerFactory(informerFactory),
|
||||
frameworkruntime.WithEventRecorder(eventBroadcaster.NewRecorder(scheme.Scheme, testSchedulerName)),
|
||||
frameworkruntime.WithWaitingPods(waitingPods),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
@ -591,6 +596,7 @@ const (
|
||||
queueSort = "no-op-queue-sort-plugin"
|
||||
fakeBind = "bind-plugin"
|
||||
emptyEventExtensions = "emptyEventExtensions"
|
||||
fakePermit = "fakePermit"
|
||||
)
|
||||
|
||||
func Test_buildQueueingHintMap(t *testing.T) {
|
||||
@ -905,6 +911,160 @@ func newFramework(ctx context.Context, r frameworkruntime.Registry, profile sche
|
||||
)
|
||||
}
|
||||
|
||||
func TestFrameworkHandler_IterateOverWaitingPods(t *testing.T) {
|
||||
const (
|
||||
testSchedulerProfile1 = "test-scheduler-profile-1"
|
||||
testSchedulerProfile2 = "test-scheduler-profile-2"
|
||||
testSchedulerProfile3 = "test-scheduler-profile-3"
|
||||
)
|
||||
|
||||
nodes := []runtime.Object{
|
||||
st.MakeNode().Name("node1").UID("node1").Obj(),
|
||||
st.MakeNode().Name("node2").UID("node2").Obj(),
|
||||
st.MakeNode().Name("node3").UID("node3").Obj(),
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
name string
|
||||
profiles []schedulerapi.KubeSchedulerProfile
|
||||
waitSchedulingPods []*v1.Pod
|
||||
expectPodNamesInWaitingPods []string
|
||||
}{
|
||||
{
|
||||
name: "pods with same profile are waiting on permit stage",
|
||||
profiles: []schedulerapi.KubeSchedulerProfile{
|
||||
{
|
||||
SchedulerName: testSchedulerProfile1,
|
||||
Plugins: &schedulerapi.Plugins{
|
||||
QueueSort: schedulerapi.PluginSet{Enabled: []schedulerapi.Plugin{{Name: "PrioritySort"}}},
|
||||
Permit: schedulerapi.PluginSet{Enabled: []schedulerapi.Plugin{{Name: fakePermit}}},
|
||||
Bind: schedulerapi.PluginSet{Enabled: []schedulerapi.Plugin{{Name: "DefaultBinder"}}},
|
||||
},
|
||||
},
|
||||
},
|
||||
waitSchedulingPods: []*v1.Pod{
|
||||
st.MakePod().Name("pod1").UID("pod1").SchedulerName(testSchedulerProfile1).Obj(),
|
||||
st.MakePod().Name("pod2").UID("pod2").SchedulerName(testSchedulerProfile1).Obj(),
|
||||
st.MakePod().Name("pod3").UID("pod3").SchedulerName(testSchedulerProfile1).Obj(),
|
||||
},
|
||||
expectPodNamesInWaitingPods: []string{"pod1", "pod2", "pod3"},
|
||||
},
|
||||
{
|
||||
name: "pods with different profiles are waiting on permit stage",
|
||||
profiles: []schedulerapi.KubeSchedulerProfile{
|
||||
{
|
||||
SchedulerName: testSchedulerProfile1,
|
||||
Plugins: &schedulerapi.Plugins{
|
||||
QueueSort: schedulerapi.PluginSet{Enabled: []schedulerapi.Plugin{{Name: "PrioritySort"}}},
|
||||
Permit: schedulerapi.PluginSet{Enabled: []schedulerapi.Plugin{{Name: fakePermit}}},
|
||||
Bind: schedulerapi.PluginSet{Enabled: []schedulerapi.Plugin{{Name: "DefaultBinder"}}},
|
||||
},
|
||||
},
|
||||
{
|
||||
SchedulerName: testSchedulerProfile2,
|
||||
Plugins: &schedulerapi.Plugins{
|
||||
QueueSort: schedulerapi.PluginSet{Enabled: []schedulerapi.Plugin{{Name: "PrioritySort"}}},
|
||||
Permit: schedulerapi.PluginSet{Enabled: []schedulerapi.Plugin{{Name: fakePermit}}},
|
||||
Bind: schedulerapi.PluginSet{Enabled: []schedulerapi.Plugin{{Name: "DefaultBinder"}}},
|
||||
},
|
||||
},
|
||||
{
|
||||
SchedulerName: testSchedulerProfile3,
|
||||
Plugins: &schedulerapi.Plugins{
|
||||
QueueSort: schedulerapi.PluginSet{Enabled: []schedulerapi.Plugin{{Name: "PrioritySort"}}},
|
||||
Permit: schedulerapi.PluginSet{Enabled: []schedulerapi.Plugin{{Name: fakePermit}}},
|
||||
Bind: schedulerapi.PluginSet{Enabled: []schedulerapi.Plugin{{Name: "DefaultBinder"}}},
|
||||
},
|
||||
},
|
||||
},
|
||||
waitSchedulingPods: []*v1.Pod{
|
||||
st.MakePod().Name("pod1").UID("pod1").SchedulerName(testSchedulerProfile1).Obj(),
|
||||
st.MakePod().Name("pod2").UID("pod2").SchedulerName(testSchedulerProfile1).Obj(),
|
||||
st.MakePod().Name("pod3").UID("pod3").SchedulerName(testSchedulerProfile2).Obj(),
|
||||
st.MakePod().Name("pod4").UID("pod4").SchedulerName(testSchedulerProfile3).Obj(),
|
||||
},
|
||||
expectPodNamesInWaitingPods: []string{"pod1", "pod2", "pod3", "pod4"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
// Set up scheduler for the 3 nodes.
|
||||
objs := append([]runtime.Object{&v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: ""}}}, nodes...)
|
||||
fakeClient := fake.NewSimpleClientset(objs...)
|
||||
informerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
|
||||
eventBroadcaster := events.NewBroadcaster(&events.EventSinkImpl{Interface: fakeClient.EventsV1()})
|
||||
eventRecorder := eventBroadcaster.NewRecorder(scheme.Scheme, fakePermit)
|
||||
|
||||
outOfTreeRegistry := frameworkruntime.Registry{
|
||||
fakePermit: newFakePermitPlugin(eventRecorder),
|
||||
}
|
||||
|
||||
_, ctx := ktesting.NewTestContext(t)
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
scheduler, err := New(
|
||||
ctx,
|
||||
fakeClient,
|
||||
informerFactory,
|
||||
nil,
|
||||
profile.NewRecorderFactory(eventBroadcaster),
|
||||
WithProfiles(tc.profiles...),
|
||||
WithFrameworkOutOfTreeRegistry(outOfTreeRegistry),
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create scheduler: %v", err)
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
waitSchedulingPodNumber := len(tc.waitSchedulingPods)
|
||||
wg.Add(waitSchedulingPodNumber)
|
||||
stopFn, err := eventBroadcaster.StartEventWatcher(func(obj runtime.Object) {
|
||||
e, ok := obj.(*eventsv1.Event)
|
||||
if !ok || (e.Reason != podWaitingReason) {
|
||||
return
|
||||
}
|
||||
wg.Done()
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer stopFn()
|
||||
|
||||
// Run scheduler.
|
||||
informerFactory.Start(ctx.Done())
|
||||
informerFactory.WaitForCacheSync(ctx.Done())
|
||||
go scheduler.Run(ctx)
|
||||
|
||||
// Send pods to be scheduled.
|
||||
for _, p := range tc.waitSchedulingPods {
|
||||
_, err = fakeClient.CoreV1().Pods("").Create(ctx, p, metav1.CreateOptions{})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
// Wait all pods in waitSchedulingPods to be scheduled.
|
||||
wg.Wait()
|
||||
|
||||
// Ensure that all waitingPods in scheduler can be obtained from any profiles.
|
||||
for _, fwk := range scheduler.Profiles {
|
||||
actualPodNamesInWaitingPods := sets.NewString()
|
||||
fwk.IterateOverWaitingPods(func(pod framework.WaitingPod) {
|
||||
actualPodNamesInWaitingPods.Insert(pod.GetPod().Name)
|
||||
})
|
||||
// Validate the name of pods in waitingPods matches expectations.
|
||||
if actualPodNamesInWaitingPods.Len() != len(tc.expectPodNamesInWaitingPods) ||
|
||||
!actualPodNamesInWaitingPods.HasAll(tc.expectPodNamesInWaitingPods...) {
|
||||
t.Fatalf("Unexpected waitingPods in scheduler profile %s, expect: %#v, got: %#v", fwk.ProfileName(), tc.expectPodNamesInWaitingPods, actualPodNamesInWaitingPods.List())
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
var _ framework.QueueSortPlugin = &fakeQueueSortPlugin{}
|
||||
|
||||
// fakeQueueSortPlugin is a no-op implementation for QueueSort extension point.
|
||||
@ -1004,3 +1164,36 @@ func (*emptyEventsToRegisterPlugin) Filter(_ context.Context, _ *framework.Cycle
|
||||
}
|
||||
|
||||
func (*emptyEventsToRegisterPlugin) EventsToRegister() []framework.ClusterEventWithHint { return nil }
|
||||
|
||||
// fakePermitPlugin only implements PermitPlugin interface.
|
||||
type fakePermitPlugin struct {
|
||||
eventRecorder events.EventRecorder
|
||||
}
|
||||
|
||||
func newFakePermitPlugin(eventRecorder events.EventRecorder) frameworkruntime.PluginFactory {
|
||||
return func(ctx context.Context, configuration runtime.Object, f framework.Handle) (framework.Plugin, error) {
|
||||
pl := &fakePermitPlugin{
|
||||
eventRecorder: eventRecorder,
|
||||
}
|
||||
return pl, nil
|
||||
}
|
||||
}
|
||||
|
||||
func (f fakePermitPlugin) Name() string {
|
||||
return fakePermit
|
||||
}
|
||||
|
||||
const (
|
||||
podWaitingReason = "podWaiting"
|
||||
)
|
||||
|
||||
func (f fakePermitPlugin) Permit(ctx context.Context, state *framework.CycleState, p *v1.Pod, nodeName string) (*framework.Status, time.Duration) {
|
||||
defer func() {
|
||||
// Send event with podWaiting reason to broadcast this pod is already waiting in the permit stage.
|
||||
f.eventRecorder.Eventf(p, nil, v1.EventTypeWarning, podWaitingReason, "", "")
|
||||
}()
|
||||
|
||||
return framework.NewStatus(framework.Wait), 100 * time.Second
|
||||
}
|
||||
|
||||
var _ framework.PermitPlugin = &fakePermitPlugin{}
|
||||
|
Loading…
Reference in New Issue
Block a user