diff --git a/contrib/mesos/pkg/scheduler/scheduler.go b/contrib/mesos/pkg/scheduler/scheduler.go index 1a2bc767f17..3fe9d8e0da6 100644 --- a/contrib/mesos/pkg/scheduler/scheduler.go +++ b/contrib/mesos/pkg/scheduler/scheduler.go @@ -437,11 +437,13 @@ func (k *KubernetesScheduler) reconcileTerminalTask(driver bindings.SchedulerDri if (state == podtask.StateRunning || state == podtask.StatePending) && ((taskStatus.GetSource() == mesos.TaskStatus_SOURCE_MASTER && taskStatus.GetReason() == mesos.TaskStatus_REASON_RECONCILIATION) || (taskStatus.GetSource() == mesos.TaskStatus_SOURCE_SLAVE && taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_TERMINATED) || - (taskStatus.GetSource() == mesos.TaskStatus_SOURCE_SLAVE && taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_UNREGISTERED)) { + (taskStatus.GetSource() == mesos.TaskStatus_SOURCE_SLAVE && taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_UNREGISTERED) || + (taskStatus.GetSource() == mesos.TaskStatus_SOURCE_EXECUTOR && taskStatus.GetMessage() == messages.ContainersDisappeared)) { //-- // pod-task has metadata that refers to: // (1) a task that Mesos no longer knows about, or else // (2) a pod that the Kubelet will never report as "failed" + // (3) a pod that the kubeletExecutor reported as lost (likely due to docker daemon crash/restart) // For now, destroy the pod and hope that there's a replication controller backing it up. // TODO(jdef) for case #2 don't delete the pod, just update it's status to Failed pod := &task.Pod