mirror of
				https://github.com/k3s-io/kubernetes.git
				synced 2025-11-04 07:49:35 +00:00 
			
		
		
		
	Merge pull request #21016 from hongchaodeng/cache
Auto commit by PR queue bot
This commit is contained in:
		
							
								
								
									
										264
									
								
								plugin/pkg/scheduler/schedulercache/cache.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										264
									
								
								plugin/pkg/scheduler/schedulercache/cache.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,264 @@
 | 
			
		||||
/*
 | 
			
		||||
Copyright 2015 The Kubernetes Authors All rights reserved.
 | 
			
		||||
 | 
			
		||||
Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
you may not use this file except in compliance with the License.
 | 
			
		||||
You may obtain a copy of the License at
 | 
			
		||||
 | 
			
		||||
    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
 | 
			
		||||
Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
See the License for the specific language governing permissions and
 | 
			
		||||
limitations under the License.
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
package schedulercache
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"sync"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	"github.com/golang/glog"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/api"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/labels"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/util/wait"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
var (
 | 
			
		||||
	cleanAssumedPeriod = 1 * time.Second
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// New returns a Cache implementation.
 | 
			
		||||
// It automatically starts a go routine that manages expiration of assumed pods.
 | 
			
		||||
// "ttl" is how long the assumed pod will get expired.
 | 
			
		||||
// "stop" is the channel that would close the background goroutine.
 | 
			
		||||
func New(ttl time.Duration, stop chan struct{}) Cache {
 | 
			
		||||
	cache := newSchedulerCache(ttl, cleanAssumedPeriod, stop)
 | 
			
		||||
	cache.run()
 | 
			
		||||
	return cache
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type schedulerCache struct {
 | 
			
		||||
	stop   chan struct{}
 | 
			
		||||
	ttl    time.Duration
 | 
			
		||||
	period time.Duration
 | 
			
		||||
 | 
			
		||||
	// This mutex guards all fields within this cache struct.
 | 
			
		||||
	mu sync.Mutex
 | 
			
		||||
	// a set of assumed pod keys.
 | 
			
		||||
	// The key could further be used to get an entry in podStates.
 | 
			
		||||
	assumedPods map[string]bool
 | 
			
		||||
	// a map from pod key to podState.
 | 
			
		||||
	podStates map[string]*podState
 | 
			
		||||
	nodes     map[string]*NodeInfo
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type podState struct {
 | 
			
		||||
	pod *api.Pod
 | 
			
		||||
	// Used by assumedPod to determinate expiration.
 | 
			
		||||
	deadline time.Time
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func newSchedulerCache(ttl, period time.Duration, stop chan struct{}) *schedulerCache {
 | 
			
		||||
	return &schedulerCache{
 | 
			
		||||
		ttl:    ttl,
 | 
			
		||||
		period: period,
 | 
			
		||||
		stop:   stop,
 | 
			
		||||
 | 
			
		||||
		nodes:       make(map[string]*NodeInfo),
 | 
			
		||||
		assumedPods: make(map[string]bool),
 | 
			
		||||
		podStates:   make(map[string]*podState),
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (cache *schedulerCache) GetNodeNameToInfoMap() (map[string]*NodeInfo, error) {
 | 
			
		||||
	nodeNameToInfo := make(map[string]*NodeInfo)
 | 
			
		||||
	cache.mu.Lock()
 | 
			
		||||
	defer cache.mu.Unlock()
 | 
			
		||||
	for name, info := range cache.nodes {
 | 
			
		||||
		nodeNameToInfo[name] = info.Clone()
 | 
			
		||||
	}
 | 
			
		||||
	return nodeNameToInfo, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (cache *schedulerCache) List(selector labels.Selector) ([]*api.Pod, error) {
 | 
			
		||||
	cache.mu.Lock()
 | 
			
		||||
	defer cache.mu.Unlock()
 | 
			
		||||
	var pods []*api.Pod
 | 
			
		||||
	for _, info := range cache.nodes {
 | 
			
		||||
		for _, pod := range info.pods {
 | 
			
		||||
			if selector.Matches(labels.Set(pod.Labels)) {
 | 
			
		||||
				pods = append(pods, pod)
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return pods, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (cache *schedulerCache) AssumePodIfBindSucceed(pod *api.Pod, bind func() bool) error {
 | 
			
		||||
	return cache.assumePodIfBindSucceed(pod, bind, time.Now())
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// assumePodScheduled exists for making test deterministic by taking time as input argument.
 | 
			
		||||
func (cache *schedulerCache) assumePodIfBindSucceed(pod *api.Pod, bind func() bool, now time.Time) error {
 | 
			
		||||
	cache.mu.Lock()
 | 
			
		||||
	defer cache.mu.Unlock()
 | 
			
		||||
 | 
			
		||||
	if !bind() {
 | 
			
		||||
		return nil
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	key, err := getPodKey(pod)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	if _, ok := cache.podStates[key]; ok {
 | 
			
		||||
		return fmt.Errorf("pod state wasn't initial but get assumed. Pod key: %v", key)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	cache.addPod(pod)
 | 
			
		||||
	ps := &podState{
 | 
			
		||||
		pod:      pod,
 | 
			
		||||
		deadline: now.Add(cache.ttl),
 | 
			
		||||
	}
 | 
			
		||||
	cache.podStates[key] = ps
 | 
			
		||||
	cache.assumedPods[key] = true
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (cache *schedulerCache) AddPod(pod *api.Pod) error {
 | 
			
		||||
	key, err := getPodKey(pod)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	cache.mu.Lock()
 | 
			
		||||
	defer cache.mu.Unlock()
 | 
			
		||||
 | 
			
		||||
	_, ok := cache.podStates[key]
 | 
			
		||||
	switch {
 | 
			
		||||
	case ok && cache.assumedPods[key]:
 | 
			
		||||
		delete(cache.assumedPods, key)
 | 
			
		||||
	case !ok:
 | 
			
		||||
		// Pod was expired. We should add it back.
 | 
			
		||||
		cache.addPod(pod)
 | 
			
		||||
	default:
 | 
			
		||||
		return fmt.Errorf("pod was already in added state. Pod key: %v", key)
 | 
			
		||||
	}
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (cache *schedulerCache) UpdatePod(oldPod, newPod *api.Pod) error {
 | 
			
		||||
	key, err := getPodKey(oldPod)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	cache.mu.Lock()
 | 
			
		||||
	defer cache.mu.Unlock()
 | 
			
		||||
 | 
			
		||||
	_, ok := cache.podStates[key]
 | 
			
		||||
	switch {
 | 
			
		||||
	// An assumed pod won't have Update/Remove event. It needs to have Add event
 | 
			
		||||
	// before Update event, in which case the state would change from Assumed to Added.
 | 
			
		||||
	case ok && !cache.assumedPods[key]:
 | 
			
		||||
		if err := cache.updatePod(oldPod, newPod); err != nil {
 | 
			
		||||
			return err
 | 
			
		||||
		}
 | 
			
		||||
	default:
 | 
			
		||||
		return fmt.Errorf("pod state wasn't added but get updated. Pod key: %v", key)
 | 
			
		||||
	}
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (cache *schedulerCache) updatePod(oldPod, newPod *api.Pod) error {
 | 
			
		||||
	if err := cache.deletePod(oldPod); err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	cache.addPod(newPod)
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (cache *schedulerCache) addPod(pod *api.Pod) {
 | 
			
		||||
	n, ok := cache.nodes[pod.Spec.NodeName]
 | 
			
		||||
	if !ok {
 | 
			
		||||
		n = NewNodeInfo()
 | 
			
		||||
		cache.nodes[pod.Spec.NodeName] = n
 | 
			
		||||
	}
 | 
			
		||||
	n.addPod(pod)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (cache *schedulerCache) deletePod(pod *api.Pod) error {
 | 
			
		||||
	n := cache.nodes[pod.Spec.NodeName]
 | 
			
		||||
	if err := n.removePod(pod); err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	if len(n.pods) == 0 {
 | 
			
		||||
		delete(cache.nodes, pod.Spec.NodeName)
 | 
			
		||||
	}
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (cache *schedulerCache) RemovePod(pod *api.Pod) error {
 | 
			
		||||
	key, err := getPodKey(pod)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	cache.mu.Lock()
 | 
			
		||||
	defer cache.mu.Unlock()
 | 
			
		||||
 | 
			
		||||
	_, ok := cache.podStates[key]
 | 
			
		||||
	switch {
 | 
			
		||||
	// An assumed pod won't have Delete/Remove event. It needs to have Add event
 | 
			
		||||
	// before Remove event, in which case the state would change from Assumed to Added.
 | 
			
		||||
	case ok && !cache.assumedPods[key]:
 | 
			
		||||
		err := cache.deletePod(pod)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return err
 | 
			
		||||
		}
 | 
			
		||||
		delete(cache.podStates, key)
 | 
			
		||||
	default:
 | 
			
		||||
		return fmt.Errorf("pod state wasn't added but get removed. Pod key: %v", key)
 | 
			
		||||
	}
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (cache *schedulerCache) run() {
 | 
			
		||||
	go wait.Until(cache.cleanupExpiredAssumedPods, cache.period, cache.stop)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (cache *schedulerCache) cleanupExpiredAssumedPods() {
 | 
			
		||||
	cache.cleanupAssumedPods(time.Now())
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// cleanupAssumedPods exists for making test deterministic by taking time as input argument.
 | 
			
		||||
func (cache *schedulerCache) cleanupAssumedPods(now time.Time) {
 | 
			
		||||
	cache.mu.Lock()
 | 
			
		||||
	defer cache.mu.Unlock()
 | 
			
		||||
 | 
			
		||||
	// The size of assumedPods should be small
 | 
			
		||||
	for key := range cache.assumedPods {
 | 
			
		||||
		ps, ok := cache.podStates[key]
 | 
			
		||||
		if !ok {
 | 
			
		||||
			panic("Key found in assumed set but not in podStates. Potentially a logical error.")
 | 
			
		||||
		}
 | 
			
		||||
		if now.After(ps.deadline) {
 | 
			
		||||
			if err := cache.expirePod(key, ps); err != nil {
 | 
			
		||||
				glog.Errorf(" expirePod failed for %s: %v", key, err)
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (cache *schedulerCache) expirePod(key string, ps *podState) error {
 | 
			
		||||
	if err := cache.deletePod(ps.pod); err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	delete(cache.assumedPods, key)
 | 
			
		||||
	delete(cache.podStates, key)
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										482
									
								
								plugin/pkg/scheduler/schedulercache/cache_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										482
									
								
								plugin/pkg/scheduler/schedulercache/cache_test.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,482 @@
 | 
			
		||||
/*
 | 
			
		||||
Copyright 2015 The Kubernetes Authors All rights reserved.
 | 
			
		||||
 | 
			
		||||
Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
you may not use this file except in compliance with the License.
 | 
			
		||||
You may obtain a copy of the License at
 | 
			
		||||
 | 
			
		||||
    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
 | 
			
		||||
Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
See the License for the specific language governing permissions and
 | 
			
		||||
limitations under the License.
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
package schedulercache
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"reflect"
 | 
			
		||||
	"testing"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	"k8s.io/kubernetes/pkg/api"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/api/resource"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/labels"
 | 
			
		||||
	priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// TestAssumePodScheduled tests that after a pod is assumed, its information is aggregated
 | 
			
		||||
// on node level.
 | 
			
		||||
func TestAssumePodScheduled(t *testing.T) {
 | 
			
		||||
	nodeName := "node"
 | 
			
		||||
	testPods := []*api.Pod{
 | 
			
		||||
		makeBasePod(nodeName, "test", "100m", "500", []api.ContainerPort{{HostPort: 80}}),
 | 
			
		||||
		makeBasePod(nodeName, "test-1", "100m", "500", []api.ContainerPort{{HostPort: 80}}),
 | 
			
		||||
		makeBasePod(nodeName, "test-2", "200m", "1Ki", []api.ContainerPort{{HostPort: 8080}}),
 | 
			
		||||
		makeBasePod(nodeName, "test-nonzero", "", "", []api.ContainerPort{{HostPort: 80}}),
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	tests := []struct {
 | 
			
		||||
		pods []*api.Pod
 | 
			
		||||
 | 
			
		||||
		wNodeInfo *NodeInfo
 | 
			
		||||
	}{{
 | 
			
		||||
		pods: []*api.Pod{testPods[0]},
 | 
			
		||||
		wNodeInfo: &NodeInfo{
 | 
			
		||||
			requestedResource: &Resource{
 | 
			
		||||
				MilliCPU: 100,
 | 
			
		||||
				Memory:   500,
 | 
			
		||||
			},
 | 
			
		||||
			nonzeroRequest: &Resource{
 | 
			
		||||
				MilliCPU: 100,
 | 
			
		||||
				Memory:   500,
 | 
			
		||||
			},
 | 
			
		||||
			pods: []*api.Pod{testPods[0]},
 | 
			
		||||
		},
 | 
			
		||||
	}, {
 | 
			
		||||
		pods: []*api.Pod{testPods[1], testPods[2]},
 | 
			
		||||
		wNodeInfo: &NodeInfo{
 | 
			
		||||
			requestedResource: &Resource{
 | 
			
		||||
				MilliCPU: 300,
 | 
			
		||||
				Memory:   1524,
 | 
			
		||||
			},
 | 
			
		||||
			nonzeroRequest: &Resource{
 | 
			
		||||
				MilliCPU: 300,
 | 
			
		||||
				Memory:   1524,
 | 
			
		||||
			},
 | 
			
		||||
			pods: []*api.Pod{testPods[1], testPods[2]},
 | 
			
		||||
		},
 | 
			
		||||
	}, { // test non-zero request
 | 
			
		||||
		pods: []*api.Pod{testPods[3]},
 | 
			
		||||
		wNodeInfo: &NodeInfo{
 | 
			
		||||
			requestedResource: &Resource{
 | 
			
		||||
				MilliCPU: 0,
 | 
			
		||||
				Memory:   0,
 | 
			
		||||
			},
 | 
			
		||||
			nonzeroRequest: &Resource{
 | 
			
		||||
				MilliCPU: priorityutil.DefaultMilliCpuRequest,
 | 
			
		||||
				Memory:   priorityutil.DefaultMemoryRequest,
 | 
			
		||||
			},
 | 
			
		||||
			pods: []*api.Pod{testPods[3]},
 | 
			
		||||
		},
 | 
			
		||||
	}}
 | 
			
		||||
 | 
			
		||||
	for i, tt := range tests {
 | 
			
		||||
		cache := newSchedulerCache(time.Second, time.Second, nil)
 | 
			
		||||
		for _, pod := range tt.pods {
 | 
			
		||||
			if err := cache.AssumePodIfBindSucceed(pod, alwaysTrue); err != nil {
 | 
			
		||||
				t.Fatalf("AssumePodScheduled failed: %v", err)
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		n := cache.nodes[nodeName]
 | 
			
		||||
		if !reflect.DeepEqual(n, tt.wNodeInfo) {
 | 
			
		||||
			t.Errorf("#%d: node info get=%s, want=%s", i, n, tt.wNodeInfo)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type testExpirePodStruct struct {
 | 
			
		||||
	pod         *api.Pod
 | 
			
		||||
	assumedTime time.Time
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// TestExpirePod tests that assumed pods will be removed if expired.
 | 
			
		||||
// The removal will be reflected in node info.
 | 
			
		||||
func TestExpirePod(t *testing.T) {
 | 
			
		||||
	nodeName := "node"
 | 
			
		||||
	testPods := []*api.Pod{
 | 
			
		||||
		makeBasePod(nodeName, "test-1", "100m", "500", []api.ContainerPort{{HostPort: 80}}),
 | 
			
		||||
		makeBasePod(nodeName, "test-2", "200m", "1Ki", []api.ContainerPort{{HostPort: 8080}}),
 | 
			
		||||
	}
 | 
			
		||||
	now := time.Now()
 | 
			
		||||
	ttl := 10 * time.Second
 | 
			
		||||
	tests := []struct {
 | 
			
		||||
		pods        []*testExpirePodStruct
 | 
			
		||||
		cleanupTime time.Time
 | 
			
		||||
 | 
			
		||||
		wNodeInfo *NodeInfo
 | 
			
		||||
	}{{ // assumed pod would expires
 | 
			
		||||
		pods: []*testExpirePodStruct{
 | 
			
		||||
			{pod: testPods[0], assumedTime: now},
 | 
			
		||||
		},
 | 
			
		||||
		cleanupTime: now.Add(2 * ttl),
 | 
			
		||||
		wNodeInfo:   nil,
 | 
			
		||||
	}, { // first one would expire, second one would not.
 | 
			
		||||
		pods: []*testExpirePodStruct{
 | 
			
		||||
			{pod: testPods[0], assumedTime: now},
 | 
			
		||||
			{pod: testPods[1], assumedTime: now.Add(3 * ttl / 2)},
 | 
			
		||||
		},
 | 
			
		||||
		cleanupTime: now.Add(2 * ttl),
 | 
			
		||||
		wNodeInfo: &NodeInfo{
 | 
			
		||||
			requestedResource: &Resource{
 | 
			
		||||
				MilliCPU: 200,
 | 
			
		||||
				Memory:   1024,
 | 
			
		||||
			},
 | 
			
		||||
			nonzeroRequest: &Resource{
 | 
			
		||||
				MilliCPU: 200,
 | 
			
		||||
				Memory:   1024,
 | 
			
		||||
			},
 | 
			
		||||
			pods: []*api.Pod{testPods[1]},
 | 
			
		||||
		},
 | 
			
		||||
	}}
 | 
			
		||||
 | 
			
		||||
	for i, tt := range tests {
 | 
			
		||||
		cache := newSchedulerCache(ttl, time.Second, nil)
 | 
			
		||||
 | 
			
		||||
		for _, pod := range tt.pods {
 | 
			
		||||
			if err := cache.assumePodIfBindSucceed(pod.pod, alwaysTrue, pod.assumedTime); err != nil {
 | 
			
		||||
				t.Fatalf("assumePod failed: %v", err)
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		// pods that have assumedTime + ttl < cleanupTime will get expired and removed
 | 
			
		||||
		cache.cleanupAssumedPods(tt.cleanupTime)
 | 
			
		||||
		n := cache.nodes[nodeName]
 | 
			
		||||
		if !reflect.DeepEqual(n, tt.wNodeInfo) {
 | 
			
		||||
			t.Errorf("#%d: node info get=%s, want=%s", i, n, tt.wNodeInfo)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// TestAddPodWillConfirm tests that a pod being Add()ed will be confirmed if assumed.
 | 
			
		||||
// The pod info should still exist after manually expiring unconfirmed pods.
 | 
			
		||||
func TestAddPodWillConfirm(t *testing.T) {
 | 
			
		||||
	nodeName := "node"
 | 
			
		||||
	now := time.Now()
 | 
			
		||||
	ttl := 10 * time.Second
 | 
			
		||||
 | 
			
		||||
	testPods := []*api.Pod{
 | 
			
		||||
		makeBasePod(nodeName, "test-1", "100m", "500", []api.ContainerPort{{HostPort: 80}}),
 | 
			
		||||
		makeBasePod(nodeName, "test-2", "200m", "1Ki", []api.ContainerPort{{HostPort: 8080}}),
 | 
			
		||||
	}
 | 
			
		||||
	tests := []struct {
 | 
			
		||||
		podsToAssume []*api.Pod
 | 
			
		||||
		podsToAdd    []*api.Pod
 | 
			
		||||
 | 
			
		||||
		wNodeInfo *NodeInfo
 | 
			
		||||
	}{{ // two pod were assumed at same time. But first one is called Add() and gets confirmed.
 | 
			
		||||
		podsToAssume: []*api.Pod{testPods[0], testPods[1]},
 | 
			
		||||
		podsToAdd:    []*api.Pod{testPods[0]},
 | 
			
		||||
		wNodeInfo: &NodeInfo{
 | 
			
		||||
			requestedResource: &Resource{
 | 
			
		||||
				MilliCPU: 100,
 | 
			
		||||
				Memory:   500,
 | 
			
		||||
			},
 | 
			
		||||
			nonzeroRequest: &Resource{
 | 
			
		||||
				MilliCPU: 100,
 | 
			
		||||
				Memory:   500,
 | 
			
		||||
			},
 | 
			
		||||
			pods: []*api.Pod{testPods[0]},
 | 
			
		||||
		},
 | 
			
		||||
	}}
 | 
			
		||||
 | 
			
		||||
	for i, tt := range tests {
 | 
			
		||||
		cache := newSchedulerCache(ttl, time.Second, nil)
 | 
			
		||||
		for _, podToAssume := range tt.podsToAssume {
 | 
			
		||||
			if err := cache.assumePodIfBindSucceed(podToAssume, alwaysTrue, now); err != nil {
 | 
			
		||||
				t.Fatalf("assumePod failed: %v", err)
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		for _, podToAdd := range tt.podsToAdd {
 | 
			
		||||
			if err := cache.AddPod(podToAdd); err != nil {
 | 
			
		||||
				t.Fatalf("AddPod failed: %v", err)
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		cache.cleanupAssumedPods(now.Add(2 * ttl))
 | 
			
		||||
		// check after expiration. confirmed pods shouldn't be expired.
 | 
			
		||||
		n := cache.nodes[nodeName]
 | 
			
		||||
		if !reflect.DeepEqual(n, tt.wNodeInfo) {
 | 
			
		||||
			t.Errorf("#%d: node info get=%s, want=%s", i, n, tt.wNodeInfo)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// TestAddPodAfterExpiration tests that a pod being Add()ed will be added back if expired.
 | 
			
		||||
func TestAddPodAfterExpiration(t *testing.T) {
 | 
			
		||||
	nodeName := "node"
 | 
			
		||||
	ttl := 10 * time.Second
 | 
			
		||||
	basePod := makeBasePod(nodeName, "test", "100m", "500", []api.ContainerPort{{HostPort: 80}})
 | 
			
		||||
	tests := []struct {
 | 
			
		||||
		pod *api.Pod
 | 
			
		||||
 | 
			
		||||
		wNodeInfo *NodeInfo
 | 
			
		||||
	}{{
 | 
			
		||||
		pod: basePod,
 | 
			
		||||
		wNodeInfo: &NodeInfo{
 | 
			
		||||
			requestedResource: &Resource{
 | 
			
		||||
				MilliCPU: 100,
 | 
			
		||||
				Memory:   500,
 | 
			
		||||
			},
 | 
			
		||||
			nonzeroRequest: &Resource{
 | 
			
		||||
				MilliCPU: 100,
 | 
			
		||||
				Memory:   500,
 | 
			
		||||
			},
 | 
			
		||||
			pods: []*api.Pod{basePod},
 | 
			
		||||
		},
 | 
			
		||||
	}}
 | 
			
		||||
 | 
			
		||||
	now := time.Now()
 | 
			
		||||
	for i, tt := range tests {
 | 
			
		||||
		cache := newSchedulerCache(ttl, time.Second, nil)
 | 
			
		||||
		if err := cache.assumePodIfBindSucceed(tt.pod, alwaysTrue, now); err != nil {
 | 
			
		||||
			t.Fatalf("assumePod failed: %v", err)
 | 
			
		||||
		}
 | 
			
		||||
		cache.cleanupAssumedPods(now.Add(2 * ttl))
 | 
			
		||||
		// It should be expired and removed.
 | 
			
		||||
		n := cache.nodes[nodeName]
 | 
			
		||||
		if n != nil {
 | 
			
		||||
			t.Errorf("#%d: expecting nil node info, but get=%v", i, n)
 | 
			
		||||
		}
 | 
			
		||||
		if err := cache.AddPod(tt.pod); err != nil {
 | 
			
		||||
			t.Fatalf("AddPod failed: %v", err)
 | 
			
		||||
		}
 | 
			
		||||
		// check after expiration. confirmed pods shouldn't be expired.
 | 
			
		||||
		n = cache.nodes[nodeName]
 | 
			
		||||
		if !reflect.DeepEqual(n, tt.wNodeInfo) {
 | 
			
		||||
			t.Errorf("#%d: node info get=%s, want=%s", i, n, tt.wNodeInfo)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// TestUpdatePod tests that a pod will be updated if added before.
 | 
			
		||||
func TestUpdatePod(t *testing.T) {
 | 
			
		||||
	nodeName := "node"
 | 
			
		||||
	ttl := 10 * time.Second
 | 
			
		||||
	testPods := []*api.Pod{
 | 
			
		||||
		makeBasePod(nodeName, "test", "100m", "500", []api.ContainerPort{{HostPort: 80}}),
 | 
			
		||||
		makeBasePod(nodeName, "test", "200m", "1Ki", []api.ContainerPort{{HostPort: 8080}}),
 | 
			
		||||
	}
 | 
			
		||||
	tests := []struct {
 | 
			
		||||
		podsToAssume []*api.Pod
 | 
			
		||||
		podsToAdd    []*api.Pod
 | 
			
		||||
		podsToUpdate []*api.Pod
 | 
			
		||||
 | 
			
		||||
		wNodeInfo []*NodeInfo
 | 
			
		||||
	}{{ // Pod is assumed and added. Then it would be updated twice.
 | 
			
		||||
		podsToAssume: []*api.Pod{testPods[0]},
 | 
			
		||||
		podsToAdd:    []*api.Pod{testPods[0]},
 | 
			
		||||
		podsToUpdate: []*api.Pod{testPods[0], testPods[1], testPods[0]},
 | 
			
		||||
		wNodeInfo: []*NodeInfo{{
 | 
			
		||||
			requestedResource: &Resource{
 | 
			
		||||
				MilliCPU: 200,
 | 
			
		||||
				Memory:   1024,
 | 
			
		||||
			},
 | 
			
		||||
			nonzeroRequest: &Resource{
 | 
			
		||||
				MilliCPU: 200,
 | 
			
		||||
				Memory:   1024,
 | 
			
		||||
			},
 | 
			
		||||
			pods: []*api.Pod{testPods[1]},
 | 
			
		||||
		}, {
 | 
			
		||||
			requestedResource: &Resource{
 | 
			
		||||
				MilliCPU: 100,
 | 
			
		||||
				Memory:   500,
 | 
			
		||||
			},
 | 
			
		||||
			nonzeroRequest: &Resource{
 | 
			
		||||
				MilliCPU: 100,
 | 
			
		||||
				Memory:   500,
 | 
			
		||||
			},
 | 
			
		||||
			pods: []*api.Pod{testPods[0]},
 | 
			
		||||
		}},
 | 
			
		||||
	}}
 | 
			
		||||
 | 
			
		||||
	now := time.Now()
 | 
			
		||||
	for _, tt := range tests {
 | 
			
		||||
		cache := newSchedulerCache(ttl, time.Second, nil)
 | 
			
		||||
		for _, podToAssume := range tt.podsToAssume {
 | 
			
		||||
			if err := cache.assumePodIfBindSucceed(podToAssume, alwaysTrue, now); err != nil {
 | 
			
		||||
				t.Fatalf("assumePod failed: %v", err)
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		for _, podToAdd := range tt.podsToAdd {
 | 
			
		||||
			if err := cache.AddPod(podToAdd); err != nil {
 | 
			
		||||
				t.Fatalf("AddPod failed: %v", err)
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		for i := range tt.podsToUpdate {
 | 
			
		||||
			if i == 0 {
 | 
			
		||||
				continue
 | 
			
		||||
			}
 | 
			
		||||
			if err := cache.UpdatePod(tt.podsToUpdate[i-1], tt.podsToUpdate[i]); err != nil {
 | 
			
		||||
				t.Fatalf("UpdatePod failed: %v", err)
 | 
			
		||||
			}
 | 
			
		||||
			// check after expiration. confirmed pods shouldn't be expired.
 | 
			
		||||
			n := cache.nodes[nodeName]
 | 
			
		||||
			if !reflect.DeepEqual(n, tt.wNodeInfo[i-1]) {
 | 
			
		||||
				t.Errorf("#%d: node info get=%s, want=%s", i-1, n, tt.wNodeInfo)
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// TestRemovePod tests after added pod is removed, its information should also be subtracted.
 | 
			
		||||
func TestRemovePod(t *testing.T) {
 | 
			
		||||
	nodeName := "node"
 | 
			
		||||
	basePod := makeBasePod(nodeName, "test", "100m", "500", []api.ContainerPort{{HostPort: 80}})
 | 
			
		||||
	tests := []struct {
 | 
			
		||||
		pod *api.Pod
 | 
			
		||||
 | 
			
		||||
		wNodeInfo *NodeInfo
 | 
			
		||||
	}{{
 | 
			
		||||
		pod: basePod,
 | 
			
		||||
		wNodeInfo: &NodeInfo{
 | 
			
		||||
			requestedResource: &Resource{
 | 
			
		||||
				MilliCPU: 100,
 | 
			
		||||
				Memory:   500,
 | 
			
		||||
			},
 | 
			
		||||
			nonzeroRequest: &Resource{
 | 
			
		||||
				MilliCPU: 100,
 | 
			
		||||
				Memory:   500,
 | 
			
		||||
			},
 | 
			
		||||
			pods: []*api.Pod{basePod},
 | 
			
		||||
		},
 | 
			
		||||
	}}
 | 
			
		||||
 | 
			
		||||
	for i, tt := range tests {
 | 
			
		||||
		cache := newSchedulerCache(time.Second, time.Second, nil)
 | 
			
		||||
		if err := cache.AssumePodIfBindSucceed(tt.pod, alwaysTrue); err != nil {
 | 
			
		||||
			t.Fatalf("assumePod failed: %v", err)
 | 
			
		||||
		}
 | 
			
		||||
		if err := cache.AddPod(tt.pod); err != nil {
 | 
			
		||||
			t.Fatalf("AddPod failed: %v", err)
 | 
			
		||||
		}
 | 
			
		||||
		n := cache.nodes[nodeName]
 | 
			
		||||
		if !reflect.DeepEqual(n, tt.wNodeInfo) {
 | 
			
		||||
			t.Errorf("#%d: node info get=%s, want=%s", i, n, tt.wNodeInfo)
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if err := cache.RemovePod(tt.pod); err != nil {
 | 
			
		||||
			t.Fatalf("RemovePod failed: %v", err)
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		n = cache.nodes[nodeName]
 | 
			
		||||
		if n != nil {
 | 
			
		||||
			t.Errorf("#%d: expecting pod deleted and nil node info, get=%s", i, n)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func BenchmarkGetNodeNameToInfoMap1kNodes30kPods(b *testing.B) {
 | 
			
		||||
	cache := setupCacheOf1kNodes30kPods(b)
 | 
			
		||||
	b.ResetTimer()
 | 
			
		||||
	for n := 0; n < b.N; n++ {
 | 
			
		||||
		cache.GetNodeNameToInfoMap()
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func BenchmarkList1kNodes30kPods(b *testing.B) {
 | 
			
		||||
	cache := setupCacheOf1kNodes30kPods(b)
 | 
			
		||||
	b.ResetTimer()
 | 
			
		||||
	for n := 0; n < b.N; n++ {
 | 
			
		||||
		cache.List(labels.Everything())
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func BenchmarkExpire100Pods(b *testing.B) {
 | 
			
		||||
	benchmarkExpire(b, 100)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func BenchmarkExpire1kPods(b *testing.B) {
 | 
			
		||||
	benchmarkExpire(b, 1000)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func BenchmarkExpire10kPods(b *testing.B) {
 | 
			
		||||
	benchmarkExpire(b, 10000)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func benchmarkExpire(b *testing.B, podNum int) {
 | 
			
		||||
	now := time.Now()
 | 
			
		||||
	for n := 0; n < b.N; n++ {
 | 
			
		||||
		b.StopTimer()
 | 
			
		||||
		cache := setupCacheWithAssumedPods(b, podNum, now)
 | 
			
		||||
		b.StartTimer()
 | 
			
		||||
		cache.cleanupAssumedPods(now.Add(2 * time.Second))
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func makeBasePod(nodeName, objName, cpu, mem string, ports []api.ContainerPort) *api.Pod {
 | 
			
		||||
	req := api.ResourceList{}
 | 
			
		||||
	if cpu != "" {
 | 
			
		||||
		req = api.ResourceList{
 | 
			
		||||
			api.ResourceCPU:    resource.MustParse(cpu),
 | 
			
		||||
			api.ResourceMemory: resource.MustParse(mem),
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return &api.Pod{
 | 
			
		||||
		ObjectMeta: api.ObjectMeta{
 | 
			
		||||
			Namespace: "node_info_cache_test",
 | 
			
		||||
			Name:      objName,
 | 
			
		||||
		},
 | 
			
		||||
		Spec: api.PodSpec{
 | 
			
		||||
			Containers: []api.Container{{
 | 
			
		||||
				Resources: api.ResourceRequirements{
 | 
			
		||||
					Requests: req,
 | 
			
		||||
				},
 | 
			
		||||
				Ports: ports,
 | 
			
		||||
			}},
 | 
			
		||||
			NodeName: nodeName,
 | 
			
		||||
		},
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func setupCacheOf1kNodes30kPods(b *testing.B) Cache {
 | 
			
		||||
	cache := newSchedulerCache(time.Second, time.Second, nil)
 | 
			
		||||
	for i := 0; i < 1000; i++ {
 | 
			
		||||
		nodeName := fmt.Sprintf("node-%d", i)
 | 
			
		||||
		for j := 0; j < 30; j++ {
 | 
			
		||||
			objName := fmt.Sprintf("%s-pod-%d", nodeName, j)
 | 
			
		||||
			pod := makeBasePod(nodeName, objName, "0", "0", nil)
 | 
			
		||||
 | 
			
		||||
			err := cache.AssumePodIfBindSucceed(pod, alwaysTrue)
 | 
			
		||||
			if err != nil {
 | 
			
		||||
				b.Fatalf("AssumePodIfBindSucceed failed: %v", err)
 | 
			
		||||
			}
 | 
			
		||||
			err = cache.AddPod(pod)
 | 
			
		||||
			if err != nil {
 | 
			
		||||
				b.Fatalf("AddPod failed: %v", err)
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return cache
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func setupCacheWithAssumedPods(b *testing.B, podNum int, assumedTime time.Time) *schedulerCache {
 | 
			
		||||
	cache := newSchedulerCache(time.Second, time.Second, nil)
 | 
			
		||||
	for i := 0; i < podNum; i++ {
 | 
			
		||||
		nodeName := fmt.Sprintf("node-%d", i/10)
 | 
			
		||||
		objName := fmt.Sprintf("%s-pod-%d", nodeName, i%10)
 | 
			
		||||
		pod := makeBasePod(nodeName, objName, "0", "0", nil)
 | 
			
		||||
 | 
			
		||||
		err := cache.assumePodIfBindSucceed(pod, alwaysTrue, assumedTime)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			b.Fatalf("assumePodIfBindSucceed failed: %v", err)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return cache
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func alwaysTrue() bool {
 | 
			
		||||
	return true
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										81
									
								
								plugin/pkg/scheduler/schedulercache/interface.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										81
									
								
								plugin/pkg/scheduler/schedulercache/interface.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,81 @@
 | 
			
		||||
/*
 | 
			
		||||
Copyright 2015 The Kubernetes Authors All rights reserved.
 | 
			
		||||
 | 
			
		||||
Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
you may not use this file except in compliance with the License.
 | 
			
		||||
You may obtain a copy of the License at
 | 
			
		||||
 | 
			
		||||
    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
 | 
			
		||||
Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
See the License for the specific language governing permissions and
 | 
			
		||||
limitations under the License.
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
package schedulercache
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"k8s.io/kubernetes/pkg/api"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/labels"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// Cache collects pods' information and provides node-level aggregated information.
 | 
			
		||||
// It's intended for generic scheduler to do efficient lookup.
 | 
			
		||||
// Cache's operations are pod centric. It incrementally updates itself based on pod events.
 | 
			
		||||
// Pod events are sent via network. We don't have guaranteed delivery of all events:
 | 
			
		||||
// We use Reflector to list and watch from remote.
 | 
			
		||||
// Reflector might be slow and do a relist, which would lead to missing events.
 | 
			
		||||
//
 | 
			
		||||
// State Machine of a pod's events in scheduler's cache:
 | 
			
		||||
//
 | 
			
		||||
//                                                +-------+
 | 
			
		||||
//                                                |       |
 | 
			
		||||
//                                                |       | Update
 | 
			
		||||
//           Assume                Add            +       |
 | 
			
		||||
// Initial +--------> Assumed +------------+---> Added <--+
 | 
			
		||||
//                       +                 |       +
 | 
			
		||||
//                       |                 |       |
 | 
			
		||||
//                       |             Add |       | Remove
 | 
			
		||||
//                       |                 |       |
 | 
			
		||||
//                       |                 +       |
 | 
			
		||||
//                       +-------------> Expired   +----> Deleted
 | 
			
		||||
//                          Expire
 | 
			
		||||
//
 | 
			
		||||
// Note that an assumed pod can expire, because if we haven't received Add event notifying us
 | 
			
		||||
// for a while, there might be some problems and we shouldn't keep the pod in cache anymore.
 | 
			
		||||
//
 | 
			
		||||
// Note that "Initial", "Expired", and "Deleted" pods do not actually exist in cache.
 | 
			
		||||
// Based on existing use cases, we are making the following assumptions:
 | 
			
		||||
// - No pod would be assumed twice
 | 
			
		||||
// - If a pod wasn't added, it wouldn't be removed or updated.
 | 
			
		||||
// - Both "Expired" and "Deleted" are valid end states. In case of some problems, e.g. network issue,
 | 
			
		||||
//   a pod might have changed its state (e.g. added and deleted) without delivering notification to the cache.
 | 
			
		||||
type Cache interface {
 | 
			
		||||
	// AssumePodIfBindSucceed assumes a pod to be scheduled if binding the pod succeeded.
 | 
			
		||||
	// If binding return true, the pod's information is aggregated into designated node.
 | 
			
		||||
	// Note that both binding and assuming are done as one atomic operation from cache's view.
 | 
			
		||||
	// No other events like Add would happen in between binding and assuming.
 | 
			
		||||
	// We are passing the binding function and let implementation take care of concurrency control details.
 | 
			
		||||
	// The implementation also decides the policy to expire pod before being confirmed (receiving Add event).
 | 
			
		||||
	// After expiration, its information would be subtracted.
 | 
			
		||||
	AssumePodIfBindSucceed(pod *api.Pod, bind func() bool) error
 | 
			
		||||
 | 
			
		||||
	// AddPod either confirms a pod if it's assumed, or adds it back if it's expired.
 | 
			
		||||
	// If added back, the pod's information would be added again.
 | 
			
		||||
	AddPod(pod *api.Pod) error
 | 
			
		||||
 | 
			
		||||
	// UpdatePod removes oldPod's information and adds newPod's information.
 | 
			
		||||
	UpdatePod(oldPod, newPod *api.Pod) error
 | 
			
		||||
 | 
			
		||||
	// RemovePod removes a pod. The pod's information would be subtracted from assigned node.
 | 
			
		||||
	RemovePod(pod *api.Pod) error
 | 
			
		||||
 | 
			
		||||
	// GetNodeNameToInfoMap returns a map of node names to node info. The node info contains
 | 
			
		||||
	// aggregated information of pods scheduled (including assumed to be) on this node.
 | 
			
		||||
	GetNodeNameToInfoMap() (map[string]*NodeInfo, error)
 | 
			
		||||
 | 
			
		||||
	// List lists all cached pods (including assumed ones).
 | 
			
		||||
	List(labels.Selector) ([]*api.Pod, error)
 | 
			
		||||
}
 | 
			
		||||
@@ -19,7 +19,11 @@ package schedulercache
 | 
			
		||||
import (
 | 
			
		||||
	"fmt"
 | 
			
		||||
 | 
			
		||||
	"github.com/golang/glog"
 | 
			
		||||
 | 
			
		||||
	"k8s.io/kubernetes/pkg/api"
 | 
			
		||||
	clientcache "k8s.io/kubernetes/pkg/client/cache"
 | 
			
		||||
	priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
var emptyResource = Resource{}
 | 
			
		||||
@@ -31,6 +35,7 @@ type NodeInfo struct {
 | 
			
		||||
	// didn't get it as scheduled yet.
 | 
			
		||||
	requestedResource *Resource
 | 
			
		||||
	pods              []*api.Pod
 | 
			
		||||
	nonzeroRequest    *Resource
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Resource is a collection of compute resource.
 | 
			
		||||
@@ -45,6 +50,7 @@ type Resource struct {
 | 
			
		||||
func NewNodeInfo(pods ...*api.Pod) *NodeInfo {
 | 
			
		||||
	ni := &NodeInfo{
 | 
			
		||||
		requestedResource: &Resource{},
 | 
			
		||||
		nonzeroRequest:    &Resource{},
 | 
			
		||||
	}
 | 
			
		||||
	for _, pod := range pods {
 | 
			
		||||
		ni.addPod(pod)
 | 
			
		||||
@@ -68,29 +74,86 @@ func (n *NodeInfo) RequestedResource() Resource {
 | 
			
		||||
	return *n.requestedResource
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// NonZeroRequest returns aggregated nonzero resource request of pods on this node.
 | 
			
		||||
func (n *NodeInfo) NonZeroRequest() Resource {
 | 
			
		||||
	if n == nil {
 | 
			
		||||
		return emptyResource
 | 
			
		||||
	}
 | 
			
		||||
	return *n.nonzeroRequest
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (n *NodeInfo) Clone() *NodeInfo {
 | 
			
		||||
	pods := append([]*api.Pod(nil), n.pods...)
 | 
			
		||||
	clone := &NodeInfo{
 | 
			
		||||
		requestedResource: &(*n.requestedResource),
 | 
			
		||||
		nonzeroRequest:    &(*n.nonzeroRequest),
 | 
			
		||||
		pods:              pods,
 | 
			
		||||
	}
 | 
			
		||||
	return clone
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// String returns representation of human readable format of this NodeInfo.
 | 
			
		||||
func (n *NodeInfo) String() string {
 | 
			
		||||
	podKeys := make([]string, len(n.pods))
 | 
			
		||||
	for i, pod := range n.pods {
 | 
			
		||||
		podKeys[i] = pod.Name
 | 
			
		||||
	}
 | 
			
		||||
	return fmt.Sprintf("&NodeInfo{Pods:%v, RequestedResource:%#v}", podKeys, n.requestedResource)
 | 
			
		||||
	return fmt.Sprintf("&NodeInfo{Pods:%v, RequestedResource:%#v, NonZeroRequest: %#v}", podKeys, n.requestedResource, n.nonzeroRequest)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// addPod adds pod information to this NodeInfo.
 | 
			
		||||
func (n *NodeInfo) addPod(pod *api.Pod) {
 | 
			
		||||
	cpu, mem := calculateResource(pod)
 | 
			
		||||
	cpu, mem, non0_cpu, non0_mem := calculateResource(pod)
 | 
			
		||||
	n.requestedResource.MilliCPU += cpu
 | 
			
		||||
	n.requestedResource.Memory += mem
 | 
			
		||||
	n.nonzeroRequest.MilliCPU += non0_cpu
 | 
			
		||||
	n.nonzeroRequest.Memory += non0_mem
 | 
			
		||||
	n.pods = append(n.pods, pod)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func calculateResource(pod *api.Pod) (int64, int64) {
 | 
			
		||||
	var cpu, mem int64
 | 
			
		||||
// removePod subtracts pod information to this NodeInfo.
 | 
			
		||||
func (n *NodeInfo) removePod(pod *api.Pod) error {
 | 
			
		||||
	k1, err := getPodKey(pod)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	cpu, mem, non0_cpu, non0_mem := calculateResource(pod)
 | 
			
		||||
	n.requestedResource.MilliCPU -= cpu
 | 
			
		||||
	n.requestedResource.Memory -= mem
 | 
			
		||||
	n.nonzeroRequest.MilliCPU -= non0_cpu
 | 
			
		||||
	n.nonzeroRequest.Memory -= non0_mem
 | 
			
		||||
 | 
			
		||||
	for i := range n.pods {
 | 
			
		||||
		k2, err := getPodKey(n.pods[i])
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			glog.Errorf("Cannot get pod key, err: %v", err)
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
		if k1 == k2 {
 | 
			
		||||
			// delete the element
 | 
			
		||||
			n.pods[i] = n.pods[len(n.pods)-1]
 | 
			
		||||
			n.pods = n.pods[:len(n.pods)-1]
 | 
			
		||||
			return nil
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return fmt.Errorf("no corresponding pod in pods")
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func calculateResource(pod *api.Pod) (cpu int64, mem int64, non0_cpu int64, non0_mem int64) {
 | 
			
		||||
	for _, c := range pod.Spec.Containers {
 | 
			
		||||
		req := c.Resources.Requests
 | 
			
		||||
		cpu += req.Cpu().MilliValue()
 | 
			
		||||
		mem += req.Memory().Value()
 | 
			
		||||
 | 
			
		||||
		non0_cpu_req, non0_mem_req := priorityutil.GetNonzeroRequests(&req)
 | 
			
		||||
		non0_cpu += non0_cpu_req
 | 
			
		||||
		non0_mem += non0_mem_req
 | 
			
		||||
	}
 | 
			
		||||
	return cpu, mem
 | 
			
		||||
	return
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// getPodKey returns the string key of a pod.
 | 
			
		||||
func getPodKey(pod *api.Pod) (string, error) {
 | 
			
		||||
	return clientcache.MetaNamespaceKeyFunc(pod)
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user