mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-22 19:31:44 +00:00
Merge pull request #52363 from balajismaniam/fix-cpuman-restartpol-never-bug
Automatic merge from submit-queue (batch tested with PRs 52442, 52247, 46542, 52363, 51781) Make CPU manager release CPUs when Pod enters completed phase. **What this PR does / why we need it**: When CPU manager is enabled, this PR releases allocated CPUs when container is not running and is non-restartable. **Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: fixes #52351 **Special notes for your reviewer**: This bug is only reproduced for pods with `restartPolicy` = `Never` or `OnFailure`. The following output is from a 4 CPU node. This bug can be reproduced as long >= half the cores are requested. pod1.yaml: ``` apiVersion: v1 kind: Pod metadata: name: test-pod1 spec: containers: - image: ubuntu command: ["/bin/bash"] args: ["-c", "sleep 5"] name: test-container1 resources: requests: cpu: 2 memory: 100Mi limits: cpu: 2 memory: 100Mi restartPolicy: "Never" ``` pod2.yaml: ``` apiVersion: v1 kind: Pod metadata: name: test-pod2 spec: containers: - image: ubuntu command: ["/bin/bash"] args: ["-c", "sleep 5"] name: test-container1 resources: requests: cpu: 2 memory: 100Mi limits: cpu: 2 memory: 100Mi restartPolicy: "Never" ``` Run a local Kubernetes cluster with CPU manager enabled. ```sh KUBELET_FLAGS='--feature-gates=CPUManager=true --cpu-manager-policy=static --cpu-manager-reconcile-period=1s --kube-reserved=cpu=500m' ./hack/local-up-cluster.sh ``` _Before:_ Create `test-pod1` using pod1.yaml. ``` ./cluster/kubectl.sh create -f pod1.yaml ``` Wait for the pod to complete and wait another 90 seconds (give enough time for GC to kick-in). Create `test-pod2` using pod2.yaml. ``` ./cluster/kubectl.sh create -f pod2.yaml ``` Get all pods in the cluster. ``` ./cluster/kubectl.sh get pods -a NAME READY STATUS RESTARTS AGE test-pod1 0/1 Completed 0 1m test-pod2 0/1 not enough cpus available to satisfy request 0 9s ``` _After:_ Create `test-pod1` using pod1.yaml. ``` ./cluster/kubectl.sh create -f pod1.yaml ``` Wait for the pod to complete and wait another 90 seconds (give enough time for GC to kick-in). Create `test-pod2` using pod2.yaml. ``` ./cluster/kubectl.sh create -f pod2.yaml ``` Get all pods in the cluster. ``` ./cluster/kubectl.sh get pods -a NAME READY STATUS RESTARTS AGE test-pod1 0/1 Completed 0 1m test-pod2 0/1 Completed 0 9s ```
This commit is contained in:
commit
ce5c41ab0f
@ -474,6 +474,17 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku
|
||||
// check the status of containers.
|
||||
for idx, container := range pod.Spec.Containers {
|
||||
containerStatus := podStatus.FindContainerStatusByName(container.Name)
|
||||
|
||||
// Call internal container post-stop lifecycle hook for any non-running container so that any
|
||||
// allocated cpus are released immediately. If the container is restarted, cpus will be re-allocated
|
||||
// to it.
|
||||
if containerStatus != nil && containerStatus.State != kubecontainer.ContainerStateRunning {
|
||||
if err := m.internalLifecycle.PostStopContainer(containerStatus.ID.ID); err != nil {
|
||||
glog.Errorf("internal container post-stop lifecycle hook failed for container %v in pod %v with error %v",
|
||||
container.Name, pod.Name, err)
|
||||
}
|
||||
}
|
||||
|
||||
// If container does not exist, or is not running, check whether we
|
||||
// need to restart it.
|
||||
if containerStatus == nil || containerStatus.State != kubecontainer.ContainerStateRunning {
|
||||
|
Loading…
Reference in New Issue
Block a user