mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-31 07:20:13 +00:00
Merge pull request #52 from mesosphere/sttts-task-lost-during-kubelet-pod-launch
MESOS: scheduler: handle lost task status updates during kubelet pod launch
This commit is contained in:
parent
e28404b23b
commit
271eeb008b
@ -700,7 +700,7 @@ waitForRunningPod:
|
||||
k.lock.Lock()
|
||||
defer k.lock.Unlock()
|
||||
reportLost:
|
||||
k.reportLostTask(driver, taskId, messages.LaunchTaskFailed)
|
||||
k.reportLostTask(driver, taskId, messages.KubeletPodLaunchFailed)
|
||||
}
|
||||
|
||||
func (k *Executor) __launchTask(driver bindings.ExecutorDriver, taskId, podFullName string, psf podStatusFunc) {
|
||||
|
@ -25,6 +25,7 @@ const (
|
||||
ExecutorUnregistered = "executor-unregistered"
|
||||
ExecutorShutdown = "executor-shutdown"
|
||||
LaunchTaskFailed = "launch-task-failed"
|
||||
KubeletPodLaunchFailed = "kubelet-pod-launch-failed"
|
||||
TaskKilled = "task-killed"
|
||||
TaskLost = "task-lost"
|
||||
UnmarshalTaskDataFailure = "unmarshal-task-data-failure"
|
||||
|
@ -482,13 +482,15 @@ func (k *framework) reconcileTerminalTask(driver bindings.SchedulerDriver, taskS
|
||||
(taskStatus.GetSource() == mesos.TaskStatus_SOURCE_SLAVE && taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_TERMINATED) ||
|
||||
(taskStatus.GetSource() == mesos.TaskStatus_SOURCE_SLAVE && taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_UNREGISTERED) ||
|
||||
(taskStatus.GetSource() == mesos.TaskStatus_SOURCE_EXECUTOR && taskStatus.GetMessage() == messages.ContainersDisappeared) ||
|
||||
(taskStatus.GetSource() == mesos.TaskStatus_SOURCE_EXECUTOR && taskStatus.GetMessage() == messages.KubeletPodLaunchFailed) ||
|
||||
(taskStatus.GetSource() == mesos.TaskStatus_SOURCE_EXECUTOR && taskStatus.GetMessage() == messages.TaskKilled && !task.Has(podtask.Deleted))) {
|
||||
//--
|
||||
// pod-task has metadata that refers to:
|
||||
// (1) a task that Mesos no longer knows about, or else
|
||||
// (2) a pod that the Kubelet will never report as "failed"
|
||||
// (3) a pod that the kubeletExecutor reported as lost (likely due to docker daemon crash/restart)
|
||||
// (4) a pod that the kubeletExecutor killed, but the scheduler didn't ask for that (maybe killed by the master)
|
||||
// (4) a pod that the kubeletExecutor reported as lost because the kubelet didn't manage to launch it (in time)
|
||||
// (5) a pod that the kubeletExecutor killed, but the scheduler didn't ask for that (maybe killed by the master)
|
||||
// For now, destroy the pod and hope that there's a replication controller backing it up.
|
||||
// TODO(jdef) for case #2 don't delete the pod, just update it's status to Failed
|
||||
pod := &task.Pod
|
||||
|
Loading…
Reference in New Issue
Block a user