mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-24 04:06:03 +00:00
Merge pull request #90377 from cbf123/container_cpuset_fixup_2
Fix exclusive CPU allocations being deleted at container restart
This commit is contained in:
commit
7fdc1275d9
@ -212,6 +212,9 @@ func (m *manager) Start(activePods ActivePodsFunc, sourcesReady config.SourcesRe
|
||||
}
|
||||
|
||||
func (m *manager) Allocate(p *v1.Pod, c *v1.Container) error {
|
||||
// Garbage collect any stranded resources before allocating CPUs.
|
||||
m.removeStaleState()
|
||||
|
||||
m.Lock()
|
||||
defer m.Unlock()
|
||||
|
||||
@ -384,18 +387,14 @@ func (m *manager) reconcileState() (success []reconciledContainer, failure []rec
|
||||
}
|
||||
|
||||
if cstatus.State.Terminated != nil {
|
||||
// Since the container is terminated, we know it is safe to
|
||||
// remove it without any reconciliation. Removing the container
|
||||
// will also remove it from the `containerMap` so that this
|
||||
// container will be skipped next time around the loop.
|
||||
// The container is terminated but we can't call m.RemoveContainer()
|
||||
// here because it could remove the allocated cpuset for the container
|
||||
// which may be in the process of being restarted. That would result
|
||||
// in the container losing any exclusively-allocated CPUs that it
|
||||
// was allocated.
|
||||
_, _, err := m.containerMap.GetContainerRef(containerID)
|
||||
if err == nil {
|
||||
klog.Warningf("[cpumanager] reconcileState: skipping container; already terminated (pod: %s, container id: %s)", pod.Name, containerID)
|
||||
err := m.RemoveContainer(containerID)
|
||||
if err != nil {
|
||||
klog.Errorf("[cpumanager] reconcileState: failed to remove container (pod: %s, container id: %s, error: %v)", pod.Name, containerID, err)
|
||||
failure = append(failure, reconciledContainer{pod.Name, container.Name, containerID})
|
||||
}
|
||||
klog.Warningf("[cpumanager] reconcileState: ignoring terminated container (pod: %s, container id: %s)", pod.Name, containerID)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
@ -269,12 +269,14 @@ func TestCPUManagerAdd(t *testing.T) {
|
||||
err: testCase.updateErr,
|
||||
},
|
||||
containerMap: containermap.NewContainerMap(),
|
||||
activePods: func() []*v1.Pod { return nil },
|
||||
podStatusProvider: mockPodStatusProvider{},
|
||||
sourcesReady: &sourcesReadyStub{},
|
||||
}
|
||||
|
||||
pod := makePod("fakePod", "fakeContainer", "2", "2")
|
||||
container := &pod.Spec.Containers[0]
|
||||
mgr.activePods = func() []*v1.Pod { return []*v1.Pod{pod} }
|
||||
|
||||
err := mgr.Allocate(pod, container)
|
||||
if !reflect.DeepEqual(err, testCase.expAllocateErr) {
|
||||
t.Errorf("CPU Manager Allocate() error (%v). expected error: %v but got: %v",
|
||||
@ -487,8 +489,11 @@ func TestCPUManagerAddWithInitContainers(t *testing.T) {
|
||||
state: state,
|
||||
containerRuntime: mockRuntimeService{},
|
||||
containerMap: containermap.NewContainerMap(),
|
||||
activePods: func() []*v1.Pod { return nil },
|
||||
podStatusProvider: mockPodStatusProvider{},
|
||||
sourcesReady: &sourcesReadyStub{},
|
||||
activePods: func() []*v1.Pod {
|
||||
return []*v1.Pod{testCase.pod}
|
||||
},
|
||||
}
|
||||
|
||||
containers := append(
|
||||
@ -1030,12 +1035,14 @@ func TestCPUManagerAddWithResvList(t *testing.T) {
|
||||
err: testCase.updateErr,
|
||||
},
|
||||
containerMap: containermap.NewContainerMap(),
|
||||
activePods: func() []*v1.Pod { return nil },
|
||||
podStatusProvider: mockPodStatusProvider{},
|
||||
sourcesReady: &sourcesReadyStub{},
|
||||
}
|
||||
|
||||
pod := makePod("fakePod", "fakeContainer", "2", "2")
|
||||
container := &pod.Spec.Containers[0]
|
||||
mgr.activePods = func() []*v1.Pod { return []*v1.Pod{pod} }
|
||||
|
||||
err := mgr.Allocate(pod, container)
|
||||
if !reflect.DeepEqual(err, testCase.expAllocateErr) {
|
||||
t.Errorf("CPU Manager Allocate() error (%v). expected error: %v but got: %v",
|
||||
|
@ -54,19 +54,10 @@ func (i *internalContainerLifecycleImpl) PreStartContainer(pod *v1.Pod, containe
|
||||
}
|
||||
|
||||
func (i *internalContainerLifecycleImpl) PreStopContainer(containerID string) error {
|
||||
if i.cpuManager != nil {
|
||||
return i.cpuManager.RemoveContainer(containerID)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *internalContainerLifecycleImpl) PostStopContainer(containerID string) error {
|
||||
if i.cpuManager != nil {
|
||||
err := i.cpuManager.RemoveContainer(containerID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.TopologyManager) {
|
||||
err := i.topologyManager.RemoveContainer(containerID)
|
||||
if err != nil {
|
||||
|
Loading…
Reference in New Issue
Block a user