Deleting gracefully terminating, not-scheduled pre-scheduled pods

In upstream the kubelet is responsible for all pods which have the spec.NodeName
set. In Mesos we have a two-stage scheduling process:

1. pods with a pre-set spec.NodeName are still scheduled by the scheduler.
2. The kubelet will only see them when a Mesos task was started and the executor
   passes the pod to the kubelet.

With this PR a pod with spec.NodeName which is gracefully terminated, but not
yet scheduled, e.g.

- because the termination happened just after creation and the scheduler was
  not fast enough
- because the NodeSelector does not match

is deleted by the Mesos scheduler.
This commit is contained in:
Dr. Stefan Schimanski 2015-09-26 13:43:29 +02:00
parent d49ca164ef
commit 67746908e5
2 changed files with 10 additions and 10 deletions

View File

@ -440,11 +440,6 @@ func (q *queuer) Run(done <-chan struct{}) {
if recoverAssignedSlave(pod.Pod) != "" {
log.V(3).Infof("dequeuing assigned pod for scheduling: %v", pod.Pod.Name)
q.dequeue(pod.GetUID())
} else if pod.InGracefulTermination() {
// pods which are pre-scheduled (i.e. NodeName is set) may be gracefully deleted,
// even though they are not running yet.
log.V(3).Infof("dequeuing graceful deleted pre-scheduled pod for scheduling: %v", pod.Pod.Name)
q.dequeue(pod.GetUID())
} else {
// use ReplaceExisting because we are always pushing the latest state
now := time.Now()
@ -744,6 +739,16 @@ func (s *schedulingPlugin) Run(done <-chan struct{}) {
// with the Modeler stuff removed since we don't use it because we have mesos.
func (s *schedulingPlugin) scheduleOne() {
pod := s.config.NextPod()
// pods which are pre-scheduled (i.e. NodeName is set) are deleted by the kubelet
// in upstream. Not so in Mesos because the kubelet hasn't see that pod yet. Hence,
// the scheduler has to take care of this:
if pod.Spec.NodeName != "" && pod.DeletionTimestamp != nil {
log.V(3).Infof("deleting pre-scheduled, not yet running pod: %s/%s", pod.Namespace, pod.Name)
s.client.Pods(pod.Namespace).Delete(pod.Name, api.NewDeleteOptions(0))
return
}
log.V(3).Infof("Attempting to schedule: %+v", pod)
dest, err := s.config.Algorithm.Schedule(pod, s.config.NodeLister) // call kubeScheduler.Schedule
if err != nil {

View File

@ -78,8 +78,3 @@ func (p *Pod) String() string {
}
return fmt.Sprintf("{pod:%v, deadline:%v, delay:%v}", p.Pod.Name, displayDeadline, p.GetDelay())
}
func (p *Pod) InGracefulTermination() bool {
return p.Pod.DeletionTimestamp != nil &&
p.Pod.DeletionGracePeriodSeconds != nil && *p.Pod.DeletionGracePeriodSeconds > 0
}