mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-30 06:54:01 +00:00
Merge pull request #54410 from intelsdi-x/cpu-reconcile-state
Automatic merge from submit-queue (batch tested with PRs 54410, 56184, 56199, 56191, 56231). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Cpu manager reconcile loop - restore state **What this PR does / why we need it**: Cpu manager reconcile loop can add orphaned containers to `State` calling `policy.AddContainer()` Previous PR: #54409 e2e tests PR: #53378 Blocked by #56191
This commit is contained in:
commit
e9a9da8aa3
@ -160,8 +160,8 @@ func NewManager(
|
||||
}
|
||||
|
||||
func (m *manager) Start(activePods ActivePodsFunc, podStatusProvider status.PodStatusProvider, containerRuntime runtimeService) {
|
||||
glog.Infof("[cpumanger] starting with %s policy", m.policy.Name())
|
||||
glog.Infof("[cpumanger] reconciling every %v", m.reconcilePeriod)
|
||||
glog.Infof("[cpumanager] starting with %s policy", m.policy.Name())
|
||||
glog.Infof("[cpumanager] reconciling every %v", m.reconcilePeriod)
|
||||
|
||||
m.activePods = activePods
|
||||
m.podStatusProvider = podStatusProvider
|
||||
@ -242,6 +242,25 @@ func (m *manager) reconcileState() (success []reconciledContainer, failure []rec
|
||||
continue
|
||||
}
|
||||
|
||||
// Check whether container is present in state, there may be 3 reasons why it's not present:
|
||||
// - policy does not want to track the container
|
||||
// - kubelet has just been restarted - and there is no previous state file
|
||||
// - container has been removed from state by RemoveContainer call (DeletionTimestamp is set)
|
||||
if _, ok := m.state.GetCPUSet(containerID); !ok {
|
||||
if status.Phase == v1.PodRunning && pod.DeletionTimestamp == nil {
|
||||
glog.V(4).Infof("[cpumanager] reconcileState: container is not present in state - trying to add (pod: %s, container: %s, container id: %s)", pod.Name, container.Name, containerID)
|
||||
err := m.AddContainer(pod, &container, containerID)
|
||||
if err != nil {
|
||||
glog.Errorf("[cpumanager] reconcileState: failed to add container (pod: %s, container: %s, container id: %s, error: %v)", pod.Name, container.Name, containerID, err)
|
||||
failure = append(failure, reconciledContainer{pod.Name, container.Name, containerID})
|
||||
}
|
||||
} else {
|
||||
// if DeletionTimestamp is set, pod has already been removed from state
|
||||
// skip the pod/container since it's not running and will be deleted soon
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
cset := m.state.GetCPUSetOrDefault(containerID)
|
||||
if cset.IsEmpty() {
|
||||
// NOTE: This should not happen outside of tests.
|
||||
|
@ -25,6 +25,8 @@ import (
|
||||
type Policy interface {
|
||||
Name() string
|
||||
Start(s state.State)
|
||||
// AddContainer call is idempotent
|
||||
AddContainer(s state.State, pod *v1.Pod, container *v1.Container, containerID string) error
|
||||
// RemoveContainer call is idempotent
|
||||
RemoveContainer(s state.State, containerID string) error
|
||||
}
|
||||
|
@ -156,9 +156,15 @@ func (p *staticPolicy) assignableCPUs(s state.State) cpuset.CPUSet {
|
||||
}
|
||||
|
||||
func (p *staticPolicy) AddContainer(s state.State, pod *v1.Pod, container *v1.Container, containerID string) error {
|
||||
glog.Infof("[cpumanager] static policy: AddContainer (pod: %s, container: %s, container id: %s)", pod.Name, container.Name, containerID)
|
||||
if numCPUs := guaranteedCPUs(pod, container); numCPUs != 0 {
|
||||
glog.Infof("[cpumanager] static policy: AddContainer (pod: %s, container: %s, container id: %s)", pod.Name, container.Name, containerID)
|
||||
// container belongs in an exclusively allocated pool
|
||||
|
||||
if _, ok := s.GetCPUSet(containerID); ok {
|
||||
glog.Infof("[cpumanager] static policy: container already present in state, skipping (container: %s, container id: %s)", container.Name, containerID)
|
||||
return nil
|
||||
}
|
||||
|
||||
cpuset, err := p.allocateCPUs(s, numCPUs)
|
||||
if err != nil {
|
||||
glog.Errorf("[cpumanager] unable to allocate %d CPUs (container id: %s, error: %v)", numCPUs, containerID, err)
|
||||
|
Loading…
Reference in New Issue
Block a user