mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-24 20:24:09 +00:00
Merge pull request #67430 from choury/cpumanager
Automatic merge from submit-queue (batch tested with PRs 67430, 67550). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. cpumanager: rollback state if updateContainerCPUSet failed **What this PR does / why we need it**: **Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*: Fixes #63018 If `updateContainerCPUSet` failed, the container will start failed. We should rollback the state to avoid CPU leak. **Special notes for your reviewer**: **Release note**: ```release-note cpumanager: rollback state if updateContainerCPUSet failed ```
This commit is contained in:
commit
c491d48cde
@ -181,12 +181,16 @@ func (m *manager) AddContainer(p *v1.Pod, c *v1.Container, containerID string) e
|
|||||||
err = m.updateContainerCPUSet(containerID, cpus)
|
err = m.updateContainerCPUSet(containerID, cpus)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorf("[cpumanager] AddContainer error: %v", err)
|
glog.Errorf("[cpumanager] AddContainer error: %v", err)
|
||||||
|
m.Lock()
|
||||||
|
err := m.policy.RemoveContainer(m.state, containerID)
|
||||||
|
if err != nil {
|
||||||
|
glog.Errorf("[cpumanager] AddContainer rollback state error: %v", err)
|
||||||
|
}
|
||||||
|
m.Unlock()
|
||||||
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
glog.V(5).Infof("[cpumanager] update container resources is skipped due to cpu set is empty")
|
glog.V(5).Infof("[cpumanager] update container resources is skipped due to cpu set is empty")
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -245,6 +249,7 @@ func (m *manager) reconcileState() (success []reconciledContainer, failure []rec
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorf("[cpumanager] reconcileState: failed to add container (pod: %s, container: %s, container id: %s, error: %v)", pod.Name, container.Name, containerID, err)
|
glog.Errorf("[cpumanager] reconcileState: failed to add container (pod: %s, container: %s, container id: %s, error: %v)", pod.Name, container.Name, containerID, err)
|
||||||
failure = append(failure, reconciledContainer{pod.Name, container.Name, containerID})
|
failure = append(failure, reconciledContainer{pod.Name, container.Name, containerID})
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// if DeletionTimestamp is set, pod has already been removed from state
|
// if DeletionTimestamp is set, pod has already been removed from state
|
||||||
|
@ -23,16 +23,18 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cadvisorapi "github.com/google/cadvisor/info/v1"
|
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
cadvisorapi "github.com/google/cadvisor/info/v1"
|
||||||
"k8s.io/api/core/v1"
|
"k8s.io/api/core/v1"
|
||||||
"k8s.io/apimachinery/pkg/api/resource"
|
"k8s.io/apimachinery/pkg/api/resource"
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
"k8s.io/apimachinery/pkg/types"
|
"k8s.io/apimachinery/pkg/types"
|
||||||
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2"
|
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state"
|
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
|
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
|
||||||
"os"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type mockState struct {
|
type mockState struct {
|
||||||
@ -139,40 +141,56 @@ func makePod(cpuRequest, cpuLimit string) *v1.Pod {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestCPUManagerAdd(t *testing.T) {
|
func TestCPUManagerAdd(t *testing.T) {
|
||||||
|
testPolicy := NewStaticPolicy(
|
||||||
|
&topology.CPUTopology{
|
||||||
|
NumCPUs: 4,
|
||||||
|
NumSockets: 1,
|
||||||
|
NumCores: 4,
|
||||||
|
CPUDetails: map[int]topology.CPUInfo{
|
||||||
|
0: {CoreID: 0, SocketID: 0},
|
||||||
|
1: {CoreID: 1, SocketID: 0},
|
||||||
|
2: {CoreID: 2, SocketID: 0},
|
||||||
|
3: {CoreID: 3, SocketID: 0},
|
||||||
|
},
|
||||||
|
}, 0)
|
||||||
testCases := []struct {
|
testCases := []struct {
|
||||||
description string
|
description string
|
||||||
regErr error
|
|
||||||
updateErr error
|
updateErr error
|
||||||
|
policy Policy
|
||||||
|
expCPUSet cpuset.CPUSet
|
||||||
expErr error
|
expErr error
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
description: "cpu manager add - no error",
|
description: "cpu manager add - no error",
|
||||||
regErr: nil,
|
|
||||||
updateErr: nil,
|
updateErr: nil,
|
||||||
|
policy: testPolicy,
|
||||||
|
expCPUSet: cpuset.NewCPUSet(3, 4),
|
||||||
expErr: nil,
|
expErr: nil,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "cpu manager add - policy add container error",
|
description: "cpu manager add - policy add container error",
|
||||||
regErr: fmt.Errorf("fake reg error"),
|
|
||||||
updateErr: nil,
|
updateErr: nil,
|
||||||
|
policy: &mockPolicy{
|
||||||
|
err: fmt.Errorf("fake reg error"),
|
||||||
|
},
|
||||||
|
expCPUSet: cpuset.NewCPUSet(1, 2, 3, 4),
|
||||||
expErr: fmt.Errorf("fake reg error"),
|
expErr: fmt.Errorf("fake reg error"),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "cpu manager add - container update error",
|
description: "cpu manager add - container update error",
|
||||||
regErr: nil,
|
|
||||||
updateErr: fmt.Errorf("fake update error"),
|
updateErr: fmt.Errorf("fake update error"),
|
||||||
expErr: nil,
|
policy: testPolicy,
|
||||||
|
expCPUSet: cpuset.NewCPUSet(1, 2, 3, 4),
|
||||||
|
expErr: fmt.Errorf("fake update error"),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, testCase := range testCases {
|
for _, testCase := range testCases {
|
||||||
mgr := &manager{
|
mgr := &manager{
|
||||||
policy: &mockPolicy{
|
policy: testCase.policy,
|
||||||
err: testCase.regErr,
|
|
||||||
},
|
|
||||||
state: &mockState{
|
state: &mockState{
|
||||||
assignments: state.ContainerCPUAssignments{},
|
assignments: state.ContainerCPUAssignments{},
|
||||||
defaultCPUSet: cpuset.NewCPUSet(),
|
defaultCPUSet: cpuset.NewCPUSet(1, 2, 3, 4),
|
||||||
},
|
},
|
||||||
containerRuntime: mockRuntimeService{
|
containerRuntime: mockRuntimeService{
|
||||||
err: testCase.updateErr,
|
err: testCase.updateErr,
|
||||||
@ -181,13 +199,17 @@ func TestCPUManagerAdd(t *testing.T) {
|
|||||||
podStatusProvider: mockPodStatusProvider{},
|
podStatusProvider: mockPodStatusProvider{},
|
||||||
}
|
}
|
||||||
|
|
||||||
pod := makePod("1000", "1000")
|
pod := makePod("2", "2")
|
||||||
container := &pod.Spec.Containers[0]
|
container := &pod.Spec.Containers[0]
|
||||||
err := mgr.AddContainer(pod, container, "fakeID")
|
err := mgr.AddContainer(pod, container, "fakeID")
|
||||||
if !reflect.DeepEqual(err, testCase.expErr) {
|
if !reflect.DeepEqual(err, testCase.expErr) {
|
||||||
t.Errorf("CPU Manager AddContainer() error (%v). expected error: %v but got: %v",
|
t.Errorf("CPU Manager AddContainer() error (%v). expected error: %v but got: %v",
|
||||||
testCase.description, testCase.expErr, err)
|
testCase.description, testCase.expErr, err)
|
||||||
}
|
}
|
||||||
|
if !testCase.expCPUSet.Equals(mgr.state.GetDefaultCPUSet()) {
|
||||||
|
t.Errorf("CPU Manager AddContainer() error (%v). expected cpuset: %v but got: %v",
|
||||||
|
testCase.description, testCase.expCPUSet, mgr.state.GetDefaultCPUSet())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user