kubelet: Fix race when KillPod followed by IsPodPendingTermination

Ensures the pod to be pending termination or be killed, after
(*podKillerWithChannel).KillPod has been returned, by limiting
one request per pod in (*podKillerWithChannel).KillPod.
This commit is contained in:
Geonju Kim 2021-02-10 02:19:43 +09:00
parent a1e310b200
commit b451c15bf7
2 changed files with 40 additions and 17 deletions

View File

@ -1143,7 +1143,7 @@ func (kl *Kubelet) HandlePodCleanups() error {
// PodKiller handles requests for killing pods
type PodKiller interface {
// KillPod receives pod speficier representing the pod to kill
KillPod(pair *kubecontainer.PodPair)
KillPod(podPair *kubecontainer.PodPair)
// PerformPodKillingWork performs the actual pod killing work via calling CRI
// It returns after its Close() func is called and all outstanding pod killing requests are served
PerformPodKillingWork()
@ -1221,10 +1221,9 @@ func (pk *podKillerWithChannel) markPodTerminated(uid string) {
delete(pk.podTerminationMap, uid)
}
// checkAndMarkPodPendingTerminationByPod checks to see if the pod is being
// killed and returns true if it is, otherwise the pod is added to the map and
// returns false
func (pk *podKillerWithChannel) checkAndMarkPodPendingTerminationByPod(podPair *kubecontainer.PodPair) bool {
// KillPod sends pod killing request to the killer after marks the pod
// unless the given pod has been marked to be killed
func (pk *podKillerWithChannel) KillPod(podPair *kubecontainer.PodPair) {
pk.podKillingLock.Lock()
defer pk.podKillingLock.Unlock()
var apiPodExists bool
@ -1255,9 +1254,10 @@ func (pk *podKillerWithChannel) checkAndMarkPodPendingTerminationByPod(podPair *
} else {
klog.V(4).Infof("running pod %q is pending termination", podPair.RunningPod.ID)
}
return true
return
}
return false
// Limit to one request per pod
pk.podKillingCh <- podPair
}
// Close closes the channel through which requests are delivered
@ -1265,20 +1265,10 @@ func (pk *podKillerWithChannel) Close() {
close(pk.podKillingCh)
}
// KillPod sends pod killing request to the killer
func (pk *podKillerWithChannel) KillPod(pair *kubecontainer.PodPair) {
pk.podKillingCh <- pair
}
// PerformPodKillingWork launches a goroutine to kill a pod received from the channel if
// another goroutine isn't already in action.
func (pk *podKillerWithChannel) PerformPodKillingWork() {
for podPair := range pk.podKillingCh {
if pk.checkAndMarkPodPendingTerminationByPod(podPair) {
// Pod is already being killed
continue
}
runningPod := podPair.RunningPod
apiPod := podPair.APIPod

View File

@ -500,6 +500,39 @@ func TestSyncPodsDeletesWhenSourcesAreReady(t *testing.T) {
fakeRuntime.AssertKilledPods([]string{"12345678"})
}
func TestKillPodFollwedByIsPodPendingTermination(t *testing.T) {
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
defer testKubelet.Cleanup()
defer testKubelet.kubelet.podKiller.Close()
go testKubelet.kubelet.podKiller.PerformPodKillingWork()
pod := &kubecontainer.Pod{
ID: "12345678",
Name: "foo",
Namespace: "new",
Containers: []*kubecontainer.Container{
{Name: "bar"},
},
}
fakeRuntime := testKubelet.fakeRuntime
fakeContainerManager := testKubelet.fakeContainerManager
fakeContainerManager.PodContainerManager.AddPodFromCgroups(pod) // add pod to mock cgroup
fakeRuntime.PodList = []*containertest.FakePod{
{Pod: pod},
}
kl := testKubelet.kubelet
kl.podKiller.KillPod(&kubecontainer.PodPair{
APIPod: nil,
RunningPod: pod,
})
if !(kl.podKiller.IsPodPendingTerminationByUID(pod.ID) || fakeRuntime.AssertKilledPods([]string{"12345678"}) == nil) {
t.Fatal("Race condition: When KillPod is complete, the pod should be pending termination or be killed")
}
}
type testNodeLister struct {
nodes []*v1.Node
}