mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-04 09:49:50 +00:00
Merge pull request #54410 from intelsdi-x/cpu-reconcile-state
Automatic merge from submit-queue (batch tested with PRs 54410, 56184, 56199, 56191, 56231). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Cpu manager reconcile loop - restore state **What this PR does / why we need it**: Cpu manager reconcile loop can add orphaned containers to `State` calling `policy.AddContainer()` Previous PR: #54409 e2e tests PR: #53378 Blocked by #56191
This commit is contained in:
commit
e9a9da8aa3
@ -160,8 +160,8 @@ func NewManager(
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (m *manager) Start(activePods ActivePodsFunc, podStatusProvider status.PodStatusProvider, containerRuntime runtimeService) {
|
func (m *manager) Start(activePods ActivePodsFunc, podStatusProvider status.PodStatusProvider, containerRuntime runtimeService) {
|
||||||
glog.Infof("[cpumanger] starting with %s policy", m.policy.Name())
|
glog.Infof("[cpumanager] starting with %s policy", m.policy.Name())
|
||||||
glog.Infof("[cpumanger] reconciling every %v", m.reconcilePeriod)
|
glog.Infof("[cpumanager] reconciling every %v", m.reconcilePeriod)
|
||||||
|
|
||||||
m.activePods = activePods
|
m.activePods = activePods
|
||||||
m.podStatusProvider = podStatusProvider
|
m.podStatusProvider = podStatusProvider
|
||||||
@ -242,6 +242,25 @@ func (m *manager) reconcileState() (success []reconciledContainer, failure []rec
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check whether container is present in state, there may be 3 reasons why it's not present:
|
||||||
|
// - policy does not want to track the container
|
||||||
|
// - kubelet has just been restarted - and there is no previous state file
|
||||||
|
// - container has been removed from state by RemoveContainer call (DeletionTimestamp is set)
|
||||||
|
if _, ok := m.state.GetCPUSet(containerID); !ok {
|
||||||
|
if status.Phase == v1.PodRunning && pod.DeletionTimestamp == nil {
|
||||||
|
glog.V(4).Infof("[cpumanager] reconcileState: container is not present in state - trying to add (pod: %s, container: %s, container id: %s)", pod.Name, container.Name, containerID)
|
||||||
|
err := m.AddContainer(pod, &container, containerID)
|
||||||
|
if err != nil {
|
||||||
|
glog.Errorf("[cpumanager] reconcileState: failed to add container (pod: %s, container: %s, container id: %s, error: %v)", pod.Name, container.Name, containerID, err)
|
||||||
|
failure = append(failure, reconciledContainer{pod.Name, container.Name, containerID})
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// if DeletionTimestamp is set, pod has already been removed from state
|
||||||
|
// skip the pod/container since it's not running and will be deleted soon
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
cset := m.state.GetCPUSetOrDefault(containerID)
|
cset := m.state.GetCPUSetOrDefault(containerID)
|
||||||
if cset.IsEmpty() {
|
if cset.IsEmpty() {
|
||||||
// NOTE: This should not happen outside of tests.
|
// NOTE: This should not happen outside of tests.
|
||||||
|
@ -25,6 +25,8 @@ import (
|
|||||||
type Policy interface {
|
type Policy interface {
|
||||||
Name() string
|
Name() string
|
||||||
Start(s state.State)
|
Start(s state.State)
|
||||||
|
// AddContainer call is idempotent
|
||||||
AddContainer(s state.State, pod *v1.Pod, container *v1.Container, containerID string) error
|
AddContainer(s state.State, pod *v1.Pod, container *v1.Container, containerID string) error
|
||||||
|
// RemoveContainer call is idempotent
|
||||||
RemoveContainer(s state.State, containerID string) error
|
RemoveContainer(s state.State, containerID string) error
|
||||||
}
|
}
|
||||||
|
@ -156,9 +156,15 @@ func (p *staticPolicy) assignableCPUs(s state.State) cpuset.CPUSet {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (p *staticPolicy) AddContainer(s state.State, pod *v1.Pod, container *v1.Container, containerID string) error {
|
func (p *staticPolicy) AddContainer(s state.State, pod *v1.Pod, container *v1.Container, containerID string) error {
|
||||||
glog.Infof("[cpumanager] static policy: AddContainer (pod: %s, container: %s, container id: %s)", pod.Name, container.Name, containerID)
|
|
||||||
if numCPUs := guaranteedCPUs(pod, container); numCPUs != 0 {
|
if numCPUs := guaranteedCPUs(pod, container); numCPUs != 0 {
|
||||||
|
glog.Infof("[cpumanager] static policy: AddContainer (pod: %s, container: %s, container id: %s)", pod.Name, container.Name, containerID)
|
||||||
// container belongs in an exclusively allocated pool
|
// container belongs in an exclusively allocated pool
|
||||||
|
|
||||||
|
if _, ok := s.GetCPUSet(containerID); ok {
|
||||||
|
glog.Infof("[cpumanager] static policy: container already present in state, skipping (container: %s, container id: %s)", container.Name, containerID)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
cpuset, err := p.allocateCPUs(s, numCPUs)
|
cpuset, err := p.allocateCPUs(s, numCPUs)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorf("[cpumanager] unable to allocate %d CPUs (container id: %s, error: %v)", numCPUs, containerID, err)
|
glog.Errorf("[cpumanager] unable to allocate %d CPUs (container id: %s, error: %v)", numCPUs, containerID, err)
|
||||||
|
Loading…
Reference in New Issue
Block a user