diff --git a/contrib/mesos/pkg/executor/executor.go b/contrib/mesos/pkg/executor/executor.go index 6020ffc752a..1a219fe9ba4 100644 --- a/contrib/mesos/pkg/executor/executor.go +++ b/contrib/mesos/pkg/executor/executor.go @@ -700,7 +700,7 @@ waitForRunningPod: k.lock.Lock() defer k.lock.Unlock() reportLost: - k.reportLostTask(driver, taskId, messages.LaunchTaskFailed) + k.reportLostTask(driver, taskId, messages.KubeletPodLaunchFailed) } func (k *Executor) __launchTask(driver bindings.ExecutorDriver, taskId, podFullName string, psf podStatusFunc) { diff --git a/contrib/mesos/pkg/executor/messages/messages.go b/contrib/mesos/pkg/executor/messages/messages.go index f16a81eaad5..9f0e20ef7b2 100644 --- a/contrib/mesos/pkg/executor/messages/messages.go +++ b/contrib/mesos/pkg/executor/messages/messages.go @@ -25,6 +25,7 @@ const ( ExecutorUnregistered = "executor-unregistered" ExecutorShutdown = "executor-shutdown" LaunchTaskFailed = "launch-task-failed" + KubeletPodLaunchFailed = "kubelet-pod-launch-failed" TaskKilled = "task-killed" TaskLost = "task-lost" UnmarshalTaskDataFailure = "unmarshal-task-data-failure" diff --git a/contrib/mesos/pkg/scheduler/components/framework/framework.go b/contrib/mesos/pkg/scheduler/components/framework/framework.go index 8106c9b933f..b38a50f8e8b 100644 --- a/contrib/mesos/pkg/scheduler/components/framework/framework.go +++ b/contrib/mesos/pkg/scheduler/components/framework/framework.go @@ -482,13 +482,15 @@ func (k *framework) reconcileTerminalTask(driver bindings.SchedulerDriver, taskS (taskStatus.GetSource() == mesos.TaskStatus_SOURCE_SLAVE && taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_TERMINATED) || (taskStatus.GetSource() == mesos.TaskStatus_SOURCE_SLAVE && taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_UNREGISTERED) || (taskStatus.GetSource() == mesos.TaskStatus_SOURCE_EXECUTOR && taskStatus.GetMessage() == messages.ContainersDisappeared) || + (taskStatus.GetSource() == mesos.TaskStatus_SOURCE_EXECUTOR && taskStatus.GetMessage() == messages.KubeletPodLaunchFailed) || (taskStatus.GetSource() == mesos.TaskStatus_SOURCE_EXECUTOR && taskStatus.GetMessage() == messages.TaskKilled && !task.Has(podtask.Deleted))) { //-- // pod-task has metadata that refers to: // (1) a task that Mesos no longer knows about, or else // (2) a pod that the Kubelet will never report as "failed" // (3) a pod that the kubeletExecutor reported as lost (likely due to docker daemon crash/restart) - // (4) a pod that the kubeletExecutor killed, but the scheduler didn't ask for that (maybe killed by the master) + // (4) a pod that the kubeletExecutor reported as lost because the kubelet didn't manage to launch it (in time) + // (5) a pod that the kubeletExecutor killed, but the scheduler didn't ask for that (maybe killed by the master) // For now, destroy the pod and hope that there's a replication controller backing it up. // TODO(jdef) for case #2 don't delete the pod, just update it's status to Failed pod := &task.Pod