Use seqeuence number to represent generation of equivalence cache.

- snapshot equivalence cache generation numbers before snapshotting the
scheduler cache
- skip update when generation does not match live generation
- keep the node and increment its generation to invalidate it instead of
deletion
- use predicates order ID as key to improve performance
This commit is contained in:
Yecheng Fu
2018-09-22 12:08:21 +08:00
parent a2cc1b1a20
commit 2f46bc8a18
11 changed files with 262 additions and 149 deletions

View File

@@ -1474,7 +1474,10 @@ func TestCacheInvalidationRace(t *testing.T) {
cacheInvalidated: make(chan struct{}),
}
eCache := equivalence.NewCache()
ps := map[string]algorithm.FitPredicate{"testPredicate": testPredicate}
algorithmpredicates.SetPredicatesOrdering([]string{"testPredicate"})
eCache := equivalence.NewCache(algorithmpredicates.Ordering())
eCache.GetNodeCache(testNode.Name)
// Ensure that equivalence cache invalidation happens after the scheduling cycle starts, but before
// the equivalence cache would be updated.
go func() {
@@ -1490,8 +1493,6 @@ func TestCacheInvalidationRace(t *testing.T) {
}()
// Set up the scheduler.
ps := map[string]algorithm.FitPredicate{"testPredicate": testPredicate}
algorithmpredicates.SetPredicatesOrdering([]string{"testPredicate"})
prioritizers := []algorithm.PriorityConfig{{Map: EqualPriorityMap, Weight: 1}}
pvcLister := schedulertesting.FakePersistentVolumeClaimLister([]*v1.PersistentVolumeClaim{})
scheduler := NewGenericScheduler(
@@ -1522,3 +1523,84 @@ func TestCacheInvalidationRace(t *testing.T) {
t.Errorf("Predicate should have been called twice. Was called %d times.", callCount)
}
}
// TestCacheInvalidationRace2 tests that cache invalidation is correctly handled
// when an invalidation event happens while a predicate is running.
func TestCacheInvalidationRace2(t *testing.T) {
// Create a predicate that returns false the first time and true on subsequent calls.
var (
podWillFit = false
callCount int
cycleStart = make(chan struct{})
cacheInvalidated = make(chan struct{})
once sync.Once
)
testPredicate := func(pod *v1.Pod,
meta algorithm.PredicateMetadata,
nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
callCount++
once.Do(func() {
cycleStart <- struct{}{}
<-cacheInvalidated
})
if !podWillFit {
podWillFit = true
return false, []algorithm.PredicateFailureReason{algorithmpredicates.ErrFakePredicate}, nil
}
return true, nil, nil
}
// Set up the mock cache.
cache := schedulercache.New(time.Duration(0), wait.NeverStop)
testNode := &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}}
cache.AddNode(testNode)
ps := map[string]algorithm.FitPredicate{"testPredicate": testPredicate}
algorithmpredicates.SetPredicatesOrdering([]string{"testPredicate"})
eCache := equivalence.NewCache(algorithmpredicates.Ordering())
eCache.GetNodeCache(testNode.Name)
// Ensure that equivalence cache invalidation happens after the scheduling cycle starts, but before
// the equivalence cache would be updated.
go func() {
<-cycleStart
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "new-pod", UID: "new-pod"},
Spec: v1.PodSpec{NodeName: "machine1"}}
if err := cache.AddPod(pod); err != nil {
t.Errorf("Could not add pod to cache: %v", err)
}
eCache.InvalidateAllPredicatesOnNode("machine1")
cacheInvalidated <- struct{}{}
}()
// Set up the scheduler.
prioritizers := []algorithm.PriorityConfig{{Map: EqualPriorityMap, Weight: 1}}
pvcLister := schedulertesting.FakePersistentVolumeClaimLister([]*v1.PersistentVolumeClaim{})
scheduler := NewGenericScheduler(
cache,
eCache,
NewSchedulingQueue(),
ps,
algorithm.EmptyPredicateMetadataProducer,
prioritizers,
algorithm.EmptyPriorityMetadataProducer,
nil, nil, pvcLister, true, false,
schedulerapi.DefaultPercentageOfNodesToScore)
// First scheduling attempt should fail.
nodeLister := schedulertesting.FakeNodeLister(makeNodeList([]string{"machine1"}))
pod := &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}}
machine, err := scheduler.Schedule(pod, nodeLister)
if machine != "" || err == nil {
t.Error("First scheduling attempt did not fail")
}
// Second scheduling attempt should succeed because cache was invalidated.
_, err = scheduler.Schedule(pod, nodeLister)
if err != nil {
t.Errorf("Second scheduling attempt failed: %v", err)
}
if callCount != 2 {
t.Errorf("Predicate should have been called twice. Was called %d times.", callCount)
}
}