From 36b92b9b29b27038163398e0a6507d8a2a4927df Mon Sep 17 00:00:00 2001 From: choury Date: Wed, 15 Aug 2018 18:24:05 +0800 Subject: [PATCH] cpumanager: rollback state if updateContainerCPUSet failed --- pkg/kubelet/cm/cpumanager/cpu_manager.go | 13 +++-- pkg/kubelet/cm/cpumanager/cpu_manager_test.go | 48 ++++++++++++++----- 2 files changed, 44 insertions(+), 17 deletions(-) diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go index 1f3953e41be..0754a4da092 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_manager.go +++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go @@ -181,12 +181,16 @@ func (m *manager) AddContainer(p *v1.Pod, c *v1.Container, containerID string) e err = m.updateContainerCPUSet(containerID, cpus) if err != nil { glog.Errorf("[cpumanager] AddContainer error: %v", err) - return err + m.Lock() + err := m.policy.RemoveContainer(m.state, containerID) + if err != nil { + glog.Errorf("[cpumanager] AddContainer rollback state error: %v", err) + } + m.Unlock() } - } else { - glog.V(5).Infof("[cpumanager] update container resources is skipped due to cpu set is empty") + return err } - + glog.V(5).Infof("[cpumanager] update container resources is skipped due to cpu set is empty") return nil } @@ -245,6 +249,7 @@ func (m *manager) reconcileState() (success []reconciledContainer, failure []rec if err != nil { glog.Errorf("[cpumanager] reconcileState: failed to add container (pod: %s, container: %s, container id: %s, error: %v)", pod.Name, container.Name, containerID, err) failure = append(failure, reconciledContainer{pod.Name, container.Name, containerID}) + continue } } else { // if DeletionTimestamp is set, pod has already been removed from state diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go index baeb6a50809..ba1ae95e1f6 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go +++ b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go @@ -23,16 +23,18 @@ import ( "testing" "time" - cadvisorapi "github.com/google/cadvisor/info/v1" "io/ioutil" + "os" + + cadvisorapi "github.com/google/cadvisor/info/v1" "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state" + "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology" "k8s.io/kubernetes/pkg/kubelet/cm/cpuset" - "os" ) type mockState struct { @@ -139,40 +141,56 @@ func makePod(cpuRequest, cpuLimit string) *v1.Pod { } func TestCPUManagerAdd(t *testing.T) { + testPolicy := NewStaticPolicy( + &topology.CPUTopology{ + NumCPUs: 4, + NumSockets: 1, + NumCores: 4, + CPUDetails: map[int]topology.CPUInfo{ + 0: {CoreID: 0, SocketID: 0}, + 1: {CoreID: 1, SocketID: 0}, + 2: {CoreID: 2, SocketID: 0}, + 3: {CoreID: 3, SocketID: 0}, + }, + }, 0) testCases := []struct { description string - regErr error updateErr error + policy Policy + expCPUSet cpuset.CPUSet expErr error }{ { description: "cpu manager add - no error", - regErr: nil, updateErr: nil, + policy: testPolicy, + expCPUSet: cpuset.NewCPUSet(3, 4), expErr: nil, }, { description: "cpu manager add - policy add container error", - regErr: fmt.Errorf("fake reg error"), updateErr: nil, - expErr: fmt.Errorf("fake reg error"), + policy: &mockPolicy{ + err: fmt.Errorf("fake reg error"), + }, + expCPUSet: cpuset.NewCPUSet(1, 2, 3, 4), + expErr: fmt.Errorf("fake reg error"), }, { description: "cpu manager add - container update error", - regErr: nil, updateErr: fmt.Errorf("fake update error"), - expErr: nil, + policy: testPolicy, + expCPUSet: cpuset.NewCPUSet(1, 2, 3, 4), + expErr: fmt.Errorf("fake update error"), }, } for _, testCase := range testCases { mgr := &manager{ - policy: &mockPolicy{ - err: testCase.regErr, - }, + policy: testCase.policy, state: &mockState{ assignments: state.ContainerCPUAssignments{}, - defaultCPUSet: cpuset.NewCPUSet(), + defaultCPUSet: cpuset.NewCPUSet(1, 2, 3, 4), }, containerRuntime: mockRuntimeService{ err: testCase.updateErr, @@ -181,13 +199,17 @@ func TestCPUManagerAdd(t *testing.T) { podStatusProvider: mockPodStatusProvider{}, } - pod := makePod("1000", "1000") + pod := makePod("2", "2") container := &pod.Spec.Containers[0] err := mgr.AddContainer(pod, container, "fakeID") if !reflect.DeepEqual(err, testCase.expErr) { t.Errorf("CPU Manager AddContainer() error (%v). expected error: %v but got: %v", testCase.description, testCase.expErr, err) } + if !testCase.expCPUSet.Equals(mgr.state.GetDefaultCPUSet()) { + t.Errorf("CPU Manager AddContainer() error (%v). expected cpuset: %v but got: %v", + testCase.description, testCase.expCPUSet, mgr.state.GetDefaultCPUSet()) + } } }