handle task lost status update from executor on docker daemon crash/restart by deleting the pod, so that the RC creates a new one, if the pod is backed by RC

2025-08-25 19:43:22 +00:00 · 2015-10-14 23:14:22 -07:00 · 2015-10-14 23:14:22 -07:00 · 99b03baa9b
commit 99b03baa9b
parent 0e296c3dcd
1 changed files with 3 additions and 1 deletions
--- a/contrib/mesos/pkg/scheduler/scheduler.go
+++ b/contrib/mesos/pkg/scheduler/scheduler.go
@ -437,11 +437,13 @@ func (k *KubernetesScheduler) reconcileTerminalTask(driver bindings.SchedulerDri
 	if (state == podtask.StateRunning || state == podtask.StatePending) &&
 		((taskStatus.GetSource() == mesos.TaskStatus_SOURCE_MASTER && taskStatus.GetReason() == mesos.TaskStatus_REASON_RECONCILIATION) ||
 			(taskStatus.GetSource() == mesos.TaskStatus_SOURCE_SLAVE && taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_TERMINATED) ||
-			(taskStatus.GetSource() == mesos.TaskStatus_SOURCE_SLAVE && taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_UNREGISTERED)) {
+			(taskStatus.GetSource() == mesos.TaskStatus_SOURCE_SLAVE && taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_UNREGISTERED) ||
 			(taskStatus.GetSource() == mesos.TaskStatus_SOURCE_EXECUTOR && taskStatus.GetMessage() == messages.ContainersDisappeared)) {
 		//--
 		// pod-task has metadata that refers to:
 		// (1) a task that Mesos no longer knows about, or else
 		// (2) a pod that the Kubelet will never report as "failed"
 		// (3) a pod that the kubeletExecutor reported as lost (likely due to docker daemon crash/restart)
 		// For now, destroy the pod and hope that there's a replication controller backing it up.
 		// TODO(jdef) for case #2 don't delete the pod, just update it's status to Failed
 		pod := &task.Pod