mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-21 10:51:29 +00:00
Merge pull request #67430 from choury/cpumanager
Automatic merge from submit-queue (batch tested with PRs 67430, 67550). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. cpumanager: rollback state if updateContainerCPUSet failed **What this PR does / why we need it**: **Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*: Fixes #63018 If `updateContainerCPUSet` failed, the container will start failed. We should rollback the state to avoid CPU leak. **Special notes for your reviewer**: **Release note**: ```release-note cpumanager: rollback state if updateContainerCPUSet failed ```
This commit is contained in:
commit
c491d48cde
@ -181,12 +181,16 @@ func (m *manager) AddContainer(p *v1.Pod, c *v1.Container, containerID string) e
|
||||
err = m.updateContainerCPUSet(containerID, cpus)
|
||||
if err != nil {
|
||||
glog.Errorf("[cpumanager] AddContainer error: %v", err)
|
||||
return err
|
||||
m.Lock()
|
||||
err := m.policy.RemoveContainer(m.state, containerID)
|
||||
if err != nil {
|
||||
glog.Errorf("[cpumanager] AddContainer rollback state error: %v", err)
|
||||
}
|
||||
m.Unlock()
|
||||
}
|
||||
} else {
|
||||
glog.V(5).Infof("[cpumanager] update container resources is skipped due to cpu set is empty")
|
||||
return err
|
||||
}
|
||||
|
||||
glog.V(5).Infof("[cpumanager] update container resources is skipped due to cpu set is empty")
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -245,6 +249,7 @@ func (m *manager) reconcileState() (success []reconciledContainer, failure []rec
|
||||
if err != nil {
|
||||
glog.Errorf("[cpumanager] reconcileState: failed to add container (pod: %s, container: %s, container id: %s, error: %v)", pod.Name, container.Name, containerID, err)
|
||||
failure = append(failure, reconciledContainer{pod.Name, container.Name, containerID})
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
// if DeletionTimestamp is set, pod has already been removed from state
|
||||
|
@ -23,16 +23,18 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
cadvisorapi "github.com/google/cadvisor/info/v1"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
|
||||
cadvisorapi "github.com/google/cadvisor/info/v1"
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
|
||||
"os"
|
||||
)
|
||||
|
||||
type mockState struct {
|
||||
@ -139,40 +141,56 @@ func makePod(cpuRequest, cpuLimit string) *v1.Pod {
|
||||
}
|
||||
|
||||
func TestCPUManagerAdd(t *testing.T) {
|
||||
testPolicy := NewStaticPolicy(
|
||||
&topology.CPUTopology{
|
||||
NumCPUs: 4,
|
||||
NumSockets: 1,
|
||||
NumCores: 4,
|
||||
CPUDetails: map[int]topology.CPUInfo{
|
||||
0: {CoreID: 0, SocketID: 0},
|
||||
1: {CoreID: 1, SocketID: 0},
|
||||
2: {CoreID: 2, SocketID: 0},
|
||||
3: {CoreID: 3, SocketID: 0},
|
||||
},
|
||||
}, 0)
|
||||
testCases := []struct {
|
||||
description string
|
||||
regErr error
|
||||
updateErr error
|
||||
policy Policy
|
||||
expCPUSet cpuset.CPUSet
|
||||
expErr error
|
||||
}{
|
||||
{
|
||||
description: "cpu manager add - no error",
|
||||
regErr: nil,
|
||||
updateErr: nil,
|
||||
policy: testPolicy,
|
||||
expCPUSet: cpuset.NewCPUSet(3, 4),
|
||||
expErr: nil,
|
||||
},
|
||||
{
|
||||
description: "cpu manager add - policy add container error",
|
||||
regErr: fmt.Errorf("fake reg error"),
|
||||
updateErr: nil,
|
||||
expErr: fmt.Errorf("fake reg error"),
|
||||
policy: &mockPolicy{
|
||||
err: fmt.Errorf("fake reg error"),
|
||||
},
|
||||
expCPUSet: cpuset.NewCPUSet(1, 2, 3, 4),
|
||||
expErr: fmt.Errorf("fake reg error"),
|
||||
},
|
||||
{
|
||||
description: "cpu manager add - container update error",
|
||||
regErr: nil,
|
||||
updateErr: fmt.Errorf("fake update error"),
|
||||
expErr: nil,
|
||||
policy: testPolicy,
|
||||
expCPUSet: cpuset.NewCPUSet(1, 2, 3, 4),
|
||||
expErr: fmt.Errorf("fake update error"),
|
||||
},
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
mgr := &manager{
|
||||
policy: &mockPolicy{
|
||||
err: testCase.regErr,
|
||||
},
|
||||
policy: testCase.policy,
|
||||
state: &mockState{
|
||||
assignments: state.ContainerCPUAssignments{},
|
||||
defaultCPUSet: cpuset.NewCPUSet(),
|
||||
defaultCPUSet: cpuset.NewCPUSet(1, 2, 3, 4),
|
||||
},
|
||||
containerRuntime: mockRuntimeService{
|
||||
err: testCase.updateErr,
|
||||
@ -181,13 +199,17 @@ func TestCPUManagerAdd(t *testing.T) {
|
||||
podStatusProvider: mockPodStatusProvider{},
|
||||
}
|
||||
|
||||
pod := makePod("1000", "1000")
|
||||
pod := makePod("2", "2")
|
||||
container := &pod.Spec.Containers[0]
|
||||
err := mgr.AddContainer(pod, container, "fakeID")
|
||||
if !reflect.DeepEqual(err, testCase.expErr) {
|
||||
t.Errorf("CPU Manager AddContainer() error (%v). expected error: %v but got: %v",
|
||||
testCase.description, testCase.expErr, err)
|
||||
}
|
||||
if !testCase.expCPUSet.Equals(mgr.state.GetDefaultCPUSet()) {
|
||||
t.Errorf("CPU Manager AddContainer() error (%v). expected cpuset: %v but got: %v",
|
||||
testCase.description, testCase.expCPUSet, mgr.state.GetDefaultCPUSet())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user