Merge pull request #67430 from choury/cpumanager

Automatic merge from submit-queue (batch tested with PRs 67430, 67550). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

cpumanager: rollback state if updateContainerCPUSet failed

**What this PR does / why we need it**:

**Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*:
Fixes #63018

If `updateContainerCPUSet`  failed, the container will start failed. We should rollback the state to avoid CPU leak.
**Special notes for your reviewer**:

**Release note**:

```release-note
cpumanager: rollback state if updateContainerCPUSet failed
```
This commit is contained in:
Kubernetes Submit Queue 2018-08-21 23:20:58 -07:00 committed by GitHub
commit c491d48cde
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 44 additions and 17 deletions

View File

@ -181,12 +181,16 @@ func (m *manager) AddContainer(p *v1.Pod, c *v1.Container, containerID string) e
err = m.updateContainerCPUSet(containerID, cpus)
if err != nil {
glog.Errorf("[cpumanager] AddContainer error: %v", err)
return err
m.Lock()
err := m.policy.RemoveContainer(m.state, containerID)
if err != nil {
glog.Errorf("[cpumanager] AddContainer rollback state error: %v", err)
}
m.Unlock()
}
} else {
glog.V(5).Infof("[cpumanager] update container resources is skipped due to cpu set is empty")
return err
}
glog.V(5).Infof("[cpumanager] update container resources is skipped due to cpu set is empty")
return nil
}
@ -245,6 +249,7 @@ func (m *manager) reconcileState() (success []reconciledContainer, failure []rec
if err != nil {
glog.Errorf("[cpumanager] reconcileState: failed to add container (pod: %s, container: %s, container id: %s, error: %v)", pod.Name, container.Name, containerID, err)
failure = append(failure, reconciledContainer{pod.Name, container.Name, containerID})
continue
}
} else {
// if DeletionTimestamp is set, pod has already been removed from state

View File

@ -23,16 +23,18 @@ import (
"testing"
"time"
cadvisorapi "github.com/google/cadvisor/info/v1"
"io/ioutil"
"os"
cadvisorapi "github.com/google/cadvisor/info/v1"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2"
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state"
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
"os"
)
type mockState struct {
@ -139,40 +141,56 @@ func makePod(cpuRequest, cpuLimit string) *v1.Pod {
}
func TestCPUManagerAdd(t *testing.T) {
testPolicy := NewStaticPolicy(
&topology.CPUTopology{
NumCPUs: 4,
NumSockets: 1,
NumCores: 4,
CPUDetails: map[int]topology.CPUInfo{
0: {CoreID: 0, SocketID: 0},
1: {CoreID: 1, SocketID: 0},
2: {CoreID: 2, SocketID: 0},
3: {CoreID: 3, SocketID: 0},
},
}, 0)
testCases := []struct {
description string
regErr error
updateErr error
policy Policy
expCPUSet cpuset.CPUSet
expErr error
}{
{
description: "cpu manager add - no error",
regErr: nil,
updateErr: nil,
policy: testPolicy,
expCPUSet: cpuset.NewCPUSet(3, 4),
expErr: nil,
},
{
description: "cpu manager add - policy add container error",
regErr: fmt.Errorf("fake reg error"),
updateErr: nil,
expErr: fmt.Errorf("fake reg error"),
policy: &mockPolicy{
err: fmt.Errorf("fake reg error"),
},
expCPUSet: cpuset.NewCPUSet(1, 2, 3, 4),
expErr: fmt.Errorf("fake reg error"),
},
{
description: "cpu manager add - container update error",
regErr: nil,
updateErr: fmt.Errorf("fake update error"),
expErr: nil,
policy: testPolicy,
expCPUSet: cpuset.NewCPUSet(1, 2, 3, 4),
expErr: fmt.Errorf("fake update error"),
},
}
for _, testCase := range testCases {
mgr := &manager{
policy: &mockPolicy{
err: testCase.regErr,
},
policy: testCase.policy,
state: &mockState{
assignments: state.ContainerCPUAssignments{},
defaultCPUSet: cpuset.NewCPUSet(),
defaultCPUSet: cpuset.NewCPUSet(1, 2, 3, 4),
},
containerRuntime: mockRuntimeService{
err: testCase.updateErr,
@ -181,13 +199,17 @@ func TestCPUManagerAdd(t *testing.T) {
podStatusProvider: mockPodStatusProvider{},
}
pod := makePod("1000", "1000")
pod := makePod("2", "2")
container := &pod.Spec.Containers[0]
err := mgr.AddContainer(pod, container, "fakeID")
if !reflect.DeepEqual(err, testCase.expErr) {
t.Errorf("CPU Manager AddContainer() error (%v). expected error: %v but got: %v",
testCase.description, testCase.expErr, err)
}
if !testCase.expCPUSet.Equals(mgr.state.GetDefaultCPUSet()) {
t.Errorf("CPU Manager AddContainer() error (%v). expected cpuset: %v but got: %v",
testCase.description, testCase.expCPUSet, mgr.state.GetDefaultCPUSet())
}
}
}