Cpu manager reconclie loop can restore state

This commit is contained in:
Szymon Scharmach 2017-10-18 14:43:55 +02:00
parent 02a7c12cbd
commit 552e4d3a9d
3 changed files with 30 additions and 3 deletions

View File

@ -160,8 +160,8 @@ func NewManager(
}
func (m *manager) Start(activePods ActivePodsFunc, podStatusProvider status.PodStatusProvider, containerRuntime runtimeService) {
glog.Infof("[cpumanger] starting with %s policy", m.policy.Name())
glog.Infof("[cpumanger] reconciling every %v", m.reconcilePeriod)
glog.Infof("[cpumanager] starting with %s policy", m.policy.Name())
glog.Infof("[cpumanager] reconciling every %v", m.reconcilePeriod)
m.activePods = activePods
m.podStatusProvider = podStatusProvider
@ -242,6 +242,25 @@ func (m *manager) reconcileState() (success []reconciledContainer, failure []rec
continue
}
// Check whether container is present in state, there may be 3 reasons why it's not present:
// - policy does not want to track the container
// - kubelet has just been restarted - and there is no previous state file
// - container has been removed from state by RemoveContainer call (DeletionTimestamp is set)
if _, ok := m.state.GetCPUSet(containerID); !ok {
if status.Phase == v1.PodRunning && pod.DeletionTimestamp == nil {
glog.V(4).Infof("[cpumanager] reconcileState: container is not present in state - trying to add (pod: %s, container: %s, container id: %s)", pod.Name, container.Name, containerID)
err := m.AddContainer(pod, &container, containerID)
if err != nil {
glog.Errorf("[cpumanager] reconcileState: failed to add container (pod: %s, container: %s, container id: %s, error: %v)", pod.Name, container.Name, containerID, err)
failure = append(failure, reconciledContainer{pod.Name, container.Name, containerID})
}
} else {
// if DeletionTimestamp is set, pod has already been removed from state
// skip the pod/container since it's not running and will be deleted soon
continue
}
}
cset := m.state.GetCPUSetOrDefault(containerID)
if cset.IsEmpty() {
// NOTE: This should not happen outside of tests.

View File

@ -25,6 +25,8 @@ import (
type Policy interface {
Name() string
Start(s state.State)
// AddContainer call is idempotent
AddContainer(s state.State, pod *v1.Pod, container *v1.Container, containerID string) error
// RemoveContainer call is idempotent
RemoveContainer(s state.State, containerID string) error
}

View File

@ -156,9 +156,15 @@ func (p *staticPolicy) assignableCPUs(s state.State) cpuset.CPUSet {
}
func (p *staticPolicy) AddContainer(s state.State, pod *v1.Pod, container *v1.Container, containerID string) error {
glog.Infof("[cpumanager] static policy: AddContainer (pod: %s, container: %s, container id: %s)", pod.Name, container.Name, containerID)
if numCPUs := guaranteedCPUs(pod, container); numCPUs != 0 {
glog.Infof("[cpumanager] static policy: AddContainer (pod: %s, container: %s, container id: %s)", pod.Name, container.Name, containerID)
// container belongs in an exclusively allocated pool
if _, ok := s.GetCPUSet(containerID); ok {
glog.Infof("[cpumanager] static policy: container already present in state, skipping (container: %s, container id: %s)", container.Name, containerID)
return nil
}
cpuset, err := p.allocateCPUs(s, numCPUs)
if err != nil {
glog.Errorf("[cpumanager] unable to allocate %d CPUs (container id: %s, error: %v)", numCPUs, containerID, err)