register all pending pod deletions and check for kill

do not delete the cgroup from a pod when it is being killed
This commit is contained in:
Ryan Phillips
2021-01-26 16:22:08 -06:00
parent 35bc0fbad5
commit f918e11e3a
6 changed files with 473 additions and 67 deletions

View File

@@ -1033,7 +1033,7 @@ func (kl *Kubelet) removeOrphanedPodStatuses(pods []*v1.Pod, mirrorPods []*v1.Po
func (kl *Kubelet) deleteOrphanedMirrorPods() {
podFullNames := kl.podManager.GetOrphanedMirrorPodNames()
for _, podFullname := range podFullNames {
if !kl.podKiller.IsMirrorPodPendingTerminationByPodName(podFullname) {
if !kl.podKiller.IsPodPendingTerminationByPodName(podFullname) {
_, err := kl.podManager.DeleteMirrorPod(podFullname, nil)
if err != nil {
klog.Errorf("encountered error when deleting mirror pod %q : %v", podFullname, err)
@@ -1143,12 +1143,10 @@ type PodKiller interface {
PerformPodKillingWork()
// After Close() is called, this pod killer wouldn't accept any more pod killing requests
Close()
// IsMirrorPodPendingTerminationByPodName checks whether the mirror pod for the given full pod name is pending termination
IsMirrorPodPendingTerminationByPodName(podFullname string) bool
// IsMirrorPodPendingTerminationByUID checks whether the mirror pod for the given uid is pending termination
IsMirrorPodPendingTerminationByUID(uid types.UID) bool
// MarkMirrorPodPendingTermination marks the mirror pod entering grace period of termination
MarkMirrorPodPendingTermination(pod *v1.Pod)
// IsPodPendingTerminationByPodName checks whether any pod for the given full pod name is pending termination (thread safe)
IsPodPendingTerminationByPodName(podFullname string) bool
// IsPodPendingTerminationByUID checks whether the mirror pod for the given uid is pending termination (thread safe)
IsPodPendingTerminationByUID(uid types.UID) bool
}
// podKillerWithChannel is an implementation of PodKiller which receives pod killing requests via channel
@@ -1156,10 +1154,13 @@ type podKillerWithChannel struct {
// Channel for getting pods to kill.
podKillingCh chan *kubecontainer.PodPair
// lock for synchronization between HandlePodCleanups and pod killer
podKillingLock *sync.Mutex
podKillingLock *sync.RWMutex
// mirrorPodTerminationMap keeps track of the progress of mirror pod termination
// The key is the UID of the pod and the value is the full name of the pod
mirrorPodTerminationMap map[string]string
// podTerminationMap keeps track of the progress of pod termination.
// The key is the UID of the pod and the value is the full name of the pod
podTerminationMap map[string]string
// killPod is the func which invokes runtime to kill the pod
killPod func(pod *v1.Pod, runningPod *kubecontainer.Pod, status *kubecontainer.PodStatus, gracePeriodOverride *int64) error
}
@@ -1168,47 +1169,89 @@ type podKillerWithChannel struct {
func NewPodKiller(kl *Kubelet) PodKiller {
podKiller := &podKillerWithChannel{
podKillingCh: make(chan *kubecontainer.PodPair, podKillingChannelCapacity),
podKillingLock: &sync.Mutex{},
podKillingLock: &sync.RWMutex{},
mirrorPodTerminationMap: make(map[string]string),
podTerminationMap: make(map[string]string),
killPod: kl.killPod,
}
return podKiller
}
// IsMirrorPodPendingTerminationByUID checks whether the pod for the given uid is pending termination
func (pk *podKillerWithChannel) IsMirrorPodPendingTerminationByUID(uid types.UID) bool {
pk.podKillingLock.Lock()
defer pk.podKillingLock.Unlock()
_, ok := pk.mirrorPodTerminationMap[string(uid)]
return ok
// IsPodPendingTerminationByUID checks whether the pod for the given uid is pending termination
func (pk *podKillerWithChannel) IsPodPendingTerminationByUID(uid types.UID) bool {
pk.podKillingLock.RLock()
defer pk.podKillingLock.RUnlock()
if _, ok := pk.podTerminationMap[string(uid)]; ok {
return ok
}
if _, ok := pk.mirrorPodTerminationMap[string(uid)]; ok {
return ok
}
return false
}
// IsMirrorPodPendingTerminationByPodName checks whether the given pod is in grace period of termination
func (pk *podKillerWithChannel) IsMirrorPodPendingTerminationByPodName(podFullname string) bool {
pk.podKillingLock.Lock()
defer pk.podKillingLock.Unlock()
func (pk *podKillerWithChannel) IsPodPendingTerminationByPodName(podFullname string) bool {
pk.podKillingLock.RLock()
defer pk.podKillingLock.RUnlock()
for _, name := range pk.mirrorPodTerminationMap {
if name == podFullname {
return true
}
}
for _, name := range pk.podTerminationMap {
if name == podFullname {
return true
}
}
return false
}
func (pk *podKillerWithChannel) markMirrorPodTerminated(uid string) {
pk.podKillingLock.Lock()
func (pk *podKillerWithChannel) markPodTerminated(uid string) {
klog.V(4).Infof("marking pod termination %q", uid)
pk.podKillingLock.Lock()
defer pk.podKillingLock.Unlock()
delete(pk.mirrorPodTerminationMap, uid)
pk.podKillingLock.Unlock()
delete(pk.podTerminationMap, uid)
}
// MarkMirrorPodPendingTermination marks the pod entering grace period of termination
func (pk *podKillerWithChannel) MarkMirrorPodPendingTermination(pod *v1.Pod) {
fullname := kubecontainer.GetPodFullName(pod)
klog.V(3).Infof("marking pod pending termination %q", string(pod.UID))
// checkAndMarkPodPendingTerminationByPod checks to see if the pod is being
// killed and returns true if it is, otherwise the pod is added to the map and
// returns false
func (pk *podKillerWithChannel) checkAndMarkPodPendingTerminationByPod(podPair *kubecontainer.PodPair) bool {
pk.podKillingLock.Lock()
pk.mirrorPodTerminationMap[string(pod.UID)] = fullname
pk.podKillingLock.Unlock()
defer pk.podKillingLock.Unlock()
var apiPodExists bool
var runningPodExists bool
if podPair.APIPod != nil {
uid := string(podPair.APIPod.UID)
_, apiPodExists = pk.mirrorPodTerminationMap[uid]
if !apiPodExists {
fullname := kubecontainer.GetPodFullName(podPair.APIPod)
klog.V(4).Infof("marking api pod pending termination %q : %q", uid, fullname)
pk.mirrorPodTerminationMap[uid] = fullname
}
}
if podPair.RunningPod != nil {
uid := string(podPair.RunningPod.ID)
_, runningPodExists = pk.podTerminationMap[uid]
if !runningPodExists {
fullname := podPair.RunningPod.Name
klog.V(4).Infof("marking running pod pending termination %q: %q", uid, fullname)
pk.podTerminationMap[uid] = fullname
}
}
if apiPodExists || runningPodExists {
if apiPodExists && runningPodExists {
klog.V(4).Infof("api pod %q and running pod %q is pending termination", podPair.APIPod.UID, podPair.RunningPod.ID)
} else if apiPodExists {
klog.V(4).Infof("api pod %q is pending termination", podPair.APIPod.UID)
} else {
klog.V(4).Infof("running pod %q is pending termination", podPair.RunningPod.ID)
}
return true
}
return false
}
// Close closes the channel through which requests are delivered
@@ -1224,33 +1267,23 @@ func (pk *podKillerWithChannel) KillPod(pair *kubecontainer.PodPair) {
// PerformPodKillingWork launches a goroutine to kill a pod received from the channel if
// another goroutine isn't already in action.
func (pk *podKillerWithChannel) PerformPodKillingWork() {
killing := sets.NewString()
// guard for the killing set
lock := sync.Mutex{}
for podPair := range pk.podKillingCh {
if pk.checkAndMarkPodPendingTerminationByPod(podPair) {
// Pod is already being killed
continue
}
runningPod := podPair.RunningPod
apiPod := podPair.APIPod
lock.Lock()
exists := killing.Has(string(runningPod.ID))
if !exists {
killing.Insert(string(runningPod.ID))
}
lock.Unlock()
if !exists {
go func(apiPod *v1.Pod, runningPod *kubecontainer.Pod) {
klog.V(2).Infof("Killing unwanted pod %q", runningPod.Name)
err := pk.killPod(apiPod, runningPod, nil, nil)
if err != nil {
klog.Errorf("Failed killing the pod %q: %v", runningPod.Name, err)
}
lock.Lock()
killing.Delete(string(runningPod.ID))
lock.Unlock()
pk.markMirrorPodTerminated(string(runningPod.ID))
}(apiPod, runningPod)
}
go func(apiPod *v1.Pod, runningPod *kubecontainer.Pod) {
klog.V(2).Infof("Killing unwanted pod %q", runningPod.Name)
err := pk.killPod(apiPod, runningPod, nil, nil)
if err != nil {
klog.Errorf("Failed killing the pod %q: %v", runningPod.Name, err)
}
pk.markPodTerminated(string(runningPod.ID))
}(apiPod, runningPod)
}
}
@@ -1943,7 +1976,7 @@ func (kl *Kubelet) cleanupOrphanedPodCgroups(pcm cm.PodContainerManager, cgroupP
}
// if the pod is within termination grace period, we shouldn't cleanup the underlying cgroup
if kl.podKiller.IsMirrorPodPendingTerminationByUID(uid) {
if kl.podKiller.IsPodPendingTerminationByUID(uid) {
klog.V(3).Infof("pod %q is pending termination", uid)
continue
}