Merge pull request #67430 from choury/cpumanager

Automatic merge from submit-queue (batch tested with PRs 67430, 67550). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

cpumanager: rollback state if updateContainerCPUSet failed

**What this PR does / why we need it**:

**Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*:
Fixes #63018

If `updateContainerCPUSet`  failed, the container will start failed. We should rollback the state to avoid CPU leak.
**Special notes for your reviewer**:

**Release note**:

```release-note
cpumanager: rollback state if updateContainerCPUSet failed
```
This commit is contained in:
Kubernetes Submit Queue 2018-08-21 23:20:58 -07:00 committed by GitHub
commit c491d48cde
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 44 additions and 17 deletions

View File

@ -181,12 +181,16 @@ func (m *manager) AddContainer(p *v1.Pod, c *v1.Container, containerID string) e
err = m.updateContainerCPUSet(containerID, cpus) err = m.updateContainerCPUSet(containerID, cpus)
if err != nil { if err != nil {
glog.Errorf("[cpumanager] AddContainer error: %v", err) glog.Errorf("[cpumanager] AddContainer error: %v", err)
m.Lock()
err := m.policy.RemoveContainer(m.state, containerID)
if err != nil {
glog.Errorf("[cpumanager] AddContainer rollback state error: %v", err)
}
m.Unlock()
}
return err return err
} }
} else {
glog.V(5).Infof("[cpumanager] update container resources is skipped due to cpu set is empty") glog.V(5).Infof("[cpumanager] update container resources is skipped due to cpu set is empty")
}
return nil return nil
} }
@ -245,6 +249,7 @@ func (m *manager) reconcileState() (success []reconciledContainer, failure []rec
if err != nil { if err != nil {
glog.Errorf("[cpumanager] reconcileState: failed to add container (pod: %s, container: %s, container id: %s, error: %v)", pod.Name, container.Name, containerID, err) glog.Errorf("[cpumanager] reconcileState: failed to add container (pod: %s, container: %s, container id: %s, error: %v)", pod.Name, container.Name, containerID, err)
failure = append(failure, reconciledContainer{pod.Name, container.Name, containerID}) failure = append(failure, reconciledContainer{pod.Name, container.Name, containerID})
continue
} }
} else { } else {
// if DeletionTimestamp is set, pod has already been removed from state // if DeletionTimestamp is set, pod has already been removed from state

View File

@ -23,16 +23,18 @@ import (
"testing" "testing"
"time" "time"
cadvisorapi "github.com/google/cadvisor/info/v1"
"io/ioutil" "io/ioutil"
"os"
cadvisorapi "github.com/google/cadvisor/info/v1"
"k8s.io/api/core/v1" "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/types"
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2" runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2"
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state"
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset" "k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
"os"
) )
type mockState struct { type mockState struct {
@ -139,40 +141,56 @@ func makePod(cpuRequest, cpuLimit string) *v1.Pod {
} }
func TestCPUManagerAdd(t *testing.T) { func TestCPUManagerAdd(t *testing.T) {
testPolicy := NewStaticPolicy(
&topology.CPUTopology{
NumCPUs: 4,
NumSockets: 1,
NumCores: 4,
CPUDetails: map[int]topology.CPUInfo{
0: {CoreID: 0, SocketID: 0},
1: {CoreID: 1, SocketID: 0},
2: {CoreID: 2, SocketID: 0},
3: {CoreID: 3, SocketID: 0},
},
}, 0)
testCases := []struct { testCases := []struct {
description string description string
regErr error
updateErr error updateErr error
policy Policy
expCPUSet cpuset.CPUSet
expErr error expErr error
}{ }{
{ {
description: "cpu manager add - no error", description: "cpu manager add - no error",
regErr: nil,
updateErr: nil, updateErr: nil,
policy: testPolicy,
expCPUSet: cpuset.NewCPUSet(3, 4),
expErr: nil, expErr: nil,
}, },
{ {
description: "cpu manager add - policy add container error", description: "cpu manager add - policy add container error",
regErr: fmt.Errorf("fake reg error"),
updateErr: nil, updateErr: nil,
policy: &mockPolicy{
err: fmt.Errorf("fake reg error"),
},
expCPUSet: cpuset.NewCPUSet(1, 2, 3, 4),
expErr: fmt.Errorf("fake reg error"), expErr: fmt.Errorf("fake reg error"),
}, },
{ {
description: "cpu manager add - container update error", description: "cpu manager add - container update error",
regErr: nil,
updateErr: fmt.Errorf("fake update error"), updateErr: fmt.Errorf("fake update error"),
expErr: nil, policy: testPolicy,
expCPUSet: cpuset.NewCPUSet(1, 2, 3, 4),
expErr: fmt.Errorf("fake update error"),
}, },
} }
for _, testCase := range testCases { for _, testCase := range testCases {
mgr := &manager{ mgr := &manager{
policy: &mockPolicy{ policy: testCase.policy,
err: testCase.regErr,
},
state: &mockState{ state: &mockState{
assignments: state.ContainerCPUAssignments{}, assignments: state.ContainerCPUAssignments{},
defaultCPUSet: cpuset.NewCPUSet(), defaultCPUSet: cpuset.NewCPUSet(1, 2, 3, 4),
}, },
containerRuntime: mockRuntimeService{ containerRuntime: mockRuntimeService{
err: testCase.updateErr, err: testCase.updateErr,
@ -181,13 +199,17 @@ func TestCPUManagerAdd(t *testing.T) {
podStatusProvider: mockPodStatusProvider{}, podStatusProvider: mockPodStatusProvider{},
} }
pod := makePod("1000", "1000") pod := makePod("2", "2")
container := &pod.Spec.Containers[0] container := &pod.Spec.Containers[0]
err := mgr.AddContainer(pod, container, "fakeID") err := mgr.AddContainer(pod, container, "fakeID")
if !reflect.DeepEqual(err, testCase.expErr) { if !reflect.DeepEqual(err, testCase.expErr) {
t.Errorf("CPU Manager AddContainer() error (%v). expected error: %v but got: %v", t.Errorf("CPU Manager AddContainer() error (%v). expected error: %v but got: %v",
testCase.description, testCase.expErr, err) testCase.description, testCase.expErr, err)
} }
if !testCase.expCPUSet.Equals(mgr.state.GetDefaultCPUSet()) {
t.Errorf("CPU Manager AddContainer() error (%v). expected cpuset: %v but got: %v",
testCase.description, testCase.expCPUSet, mgr.state.GetDefaultCPUSet())
}
} }
} }