bubble up reason when killing pod

2025-09-20 17:38:50 +00:00 · 2015-10-10 17:12:47 +08:00
parent d80e0e837c
commit 02f8e4277f
2 changed files with 38 additions and 28 deletions
--- a/pkg/kubelet/dockertools/manager.go
+++ b/pkg/kubelet/dockertools/manager.go
@@ -1314,7 +1314,7 @@ func (dm *DockerManager) KillPod(pod *api.Pod, runningPod kubecontainer.Pod) err
 				return
 			}

-			err := dm.KillContainerInPod(container.ID, containerSpec, pod)
+			err := dm.KillContainerInPod(container.ID, containerSpec, pod, "Need to kill pod.")
 			if err != nil {
 				glog.Errorf("Failed to delete container: %v; Skipping pod %q", err, runningPod.ID)
 				errs <- err
@@ -1327,7 +1327,7 @@ func (dm *DockerManager) KillPod(pod *api.Pod, runningPod kubecontainer.Pod) err
 			glog.Errorf("Failed tearing down the infra container: %v", err)
 			errs <- err
 		}
-		if err := dm.KillContainerInPod(networkContainer.ID, networkSpec, pod); err != nil {
+		if err := dm.KillContainerInPod(networkContainer.ID, networkSpec, pod, "Need to kill pod."); err != nil {
 			glog.Errorf("Failed to delete container: %v; Skipping pod %q", err, runningPod.ID)
 			errs <- err
 		}
@@ -1345,7 +1345,7 @@ func (dm *DockerManager) KillPod(pod *api.Pod, runningPod kubecontainer.Pod) err

 // KillContainerInPod kills a container in the pod. It must be passed either a container ID or a container and pod,
 // and will attempt to lookup the other information if missing.
-func (dm *DockerManager) KillContainerInPod(containerID kubecontainer.ContainerID, container *api.Container, pod *api.Pod) error {
+func (dm *DockerManager) KillContainerInPod(containerID kubecontainer.ContainerID, container *api.Container, pod *api.Pod, message string) error {
 	switch {
 	case containerID.IsEmpty():
 		// Locate the container.
@@ -1377,12 +1377,12 @@ func (dm *DockerManager) KillContainerInPod(containerID kubecontainer.ContainerI
 			pod = storedPod
 		}
 	}
-	return dm.killContainer(containerID, container, pod)
+	return dm.killContainer(containerID, container, pod, message)
 }

 // killContainer accepts a containerID and an optional container or pod containing shutdown policies. Invoke
 // KillContainerInPod if information must be retrieved first.
-func (dm *DockerManager) killContainer(containerID kubecontainer.ContainerID, container *api.Container, pod *api.Pod) error {
+func (dm *DockerManager) killContainer(containerID kubecontainer.ContainerID, container *api.Container, pod *api.Pod, reason string) error {
 	ID := containerID.ID
 	name := ID
 	if container != nil {
@@ -1441,8 +1441,11 @@ func (dm *DockerManager) killContainer(containerID kubecontainer.ContainerID, co
 	if !ok {
 		glog.Warningf("No ref for pod '%q'", name)
 	} else {
-		// TODO: pass reason down here, and state, or move this call up the stack.
-		dm.recorder.Eventf(ref, "Killing", "Killing with docker id %v", util.ShortenString(ID, 12))
+		message := fmt.Sprintf("Killing with docker id %v", util.ShortenString(ID, 12))
+		if reason != "" {
+			message = fmt.Sprint(message, ": ", reason)
+		}
+		dm.recorder.Event(ref, "Killing", message)
 		dm.containerRefManager.ClearRef(containerID)
 	}
 	return err
@@ -1525,8 +1528,9 @@ func (dm *DockerManager) runContainerInPod(pod *api.Pod, container *api.Containe
 	if container.Lifecycle != nil && container.Lifecycle.PostStart != nil {
 		handlerErr := dm.runner.Run(id, pod, container, container.Lifecycle.PostStart)
 		if handlerErr != nil {
-			dm.KillContainerInPod(id, container, pod)
-			return kubecontainer.ContainerID{}, fmt.Errorf("failed to call event handler: %v", handlerErr)
+			err := fmt.Errorf("failed to call event handler: %v", handlerErr)
+			dm.KillContainerInPod(id, container, pod, err.Error())
+			return kubecontainer.ContainerID{}, err
 		}
 	}

@@ -1663,18 +1667,17 @@ func (dm *DockerManager) createPodInfraContainer(pod *api.Pod) (kubetypes.Docker
 // - startInfraContainer is true if new Infra Containers have to be started and old one (if running) killed.
 //   Additionally if it is true then containersToKeep have to be empty
 // - infraContainerId have to be set if and only if startInfraContainer is false. It stores dockerID of running Infra Container
-// - containersToStart keeps indices of Specs of containers that have to be started.
+// - containersToStart keeps indices of Specs of containers that have to be started and reasons why containers will be started.
 // - containersToKeep stores mapping from dockerIDs of running containers to indices of their Specs for containers that
 //   should be kept running. If startInfraContainer is false then it contains an entry for infraContainerId (mapped to -1).
 //   It shouldn't be the case where containersToStart is empty and containersToKeep contains only infraContainerId. In such case
 //   Infra Container should be killed, hence it's removed from this map.
 // - all running containers which are NOT contained in containersToKeep should be killed.
-type empty struct{}
 type PodContainerChangesSpec struct {
 	StartInfraContainer bool
 	InfraChanged        bool
 	InfraContainerId    kubetypes.DockerID
-	ContainersToStart   map[int]empty
+	ContainersToStart   map[int]string
 	ContainersToKeep    map[kubetypes.DockerID]int
 }

@@ -1688,7 +1691,7 @@ func (dm *DockerManager) computePodContainerChanges(pod *api.Pod, runningPod kub
 	uid := pod.UID
 	glog.V(4).Infof("Syncing Pod %+v, podFullName: %q, uid: %q", pod, podFullName, uid)

-	containersToStart := make(map[int]empty)
+	containersToStart := make(map[int]string)
 	containersToKeep := make(map[kubetypes.DockerID]int)

 	var err error
@@ -1724,8 +1727,9 @@ func (dm *DockerManager) computePodContainerChanges(pod *api.Pod, runningPod kub
 				// If we are here it means that the container is dead and should be restarted, or never existed and should
 				// be created. We may be inserting this ID again if the container has changed and it has
 				// RestartPolicy::Always, but it's not a big deal.
-				glog.V(3).Infof("Container %+v is dead, but RestartPolicy says that we should restart it.", container)
-				containersToStart[index] = empty{}
+				message := fmt.Sprintf("Container %+v is dead, but RestartPolicy says that we should restart it.", container)
+				glog.V(3).Info(message)
+				containersToStart[index] = message
 			}
 			continue
 		}
@@ -1740,8 +1744,9 @@ func (dm *DockerManager) computePodContainerChanges(pod *api.Pod, runningPod kub
 			// If RestartPolicy is Always or OnFailure we restart containers that were running before we
 			// killed them when restarting Infra Container.
 			if pod.Spec.RestartPolicy != api.RestartPolicyNever {
-				glog.V(1).Infof("Infra Container is being recreated. %q will be restarted.", container.Name)
-				containersToStart[index] = empty{}
+				message := fmt.Sprintf("Infra Container is being recreated. %q will be restarted.", container.Name)
+				glog.V(1).Info(message)
+				containersToStart[index] = message
 			}
 			continue
 		}
@@ -1750,8 +1755,9 @@ func (dm *DockerManager) computePodContainerChanges(pod *api.Pod, runningPod kub
 		// We will look for changes and check healthiness for the container.
 		containerChanged := hash != 0 && hash != expectedHash
 		if containerChanged {
-			glog.Infof("pod %q container %q hash changed (%d vs %d), it will be killed and re-created.", podFullName, container.Name, hash, expectedHash)
-			containersToStart[index] = empty{}
+			message := fmt.Sprintf("pod %q container %q hash changed (%d vs %d), it will be killed and re-created.", podFullName, container.Name, hash, expectedHash)
+			glog.Info(message)
+			containersToStart[index] = message
 			continue
 		}

@@ -1761,8 +1767,9 @@ func (dm *DockerManager) computePodContainerChanges(pod *api.Pod, runningPod kub
 			continue
 		}
 		if pod.Spec.RestartPolicy != api.RestartPolicyNever {
-			glog.Infof("pod %q container %q is unhealthy, it will be killed and re-created.", podFullName, container.Name)
-			containersToStart[index] = empty{}
+			message := fmt.Sprintf("pod %q container %q is unhealthy, it will be killed and re-created.", podFullName, container.Name)
+			glog.Info(message)
+			containersToStart[index] = message
 		}
 	}

@@ -1840,13 +1847,15 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, pod
 				glog.V(3).Infof("Killing unwanted container %+v", container)
 				// attempt to find the appropriate container policy
 				var podContainer *api.Container
+				var killMessage string
 				for i, c := range pod.Spec.Containers {
 					if c.Name == container.Name {
 						podContainer = &pod.Spec.Containers[i]
+						killMessage = containerChanges.ContainersToStart[i]
 						break
 					}
 				}
-				if err := dm.KillContainerInPod(container.ID, podContainer, pod); err != nil {
+				if err := dm.KillContainerInPod(container.ID, podContainer, pod, killMessage); err != nil {
 					glog.Errorf("Error killing container: %v", err)
 					return err
 				}
@@ -1867,11 +1876,12 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, pod
 		// Call the networking plugin
 		err = dm.networkPlugin.SetUpPod(pod.Namespace, pod.Name, podInfraContainerID)
 		if err != nil {
-			glog.Errorf("Failed to setup networking for pod %q using network plugins: %v; Skipping pod", podFullName, err)
+			message := fmt.Sprintf("Failed to setup networking for pod %q using network plugins: %v; Skipping pod", podFullName, err)
+			glog.Error(message)
 			// Delete infra container
 			if delErr := dm.KillContainerInPod(kubecontainer.ContainerID{
 				ID:   string(podInfraContainerID),
-				Type: "docker"}, nil, pod); delErr != nil {
+				Type: "docker"}, nil, pod, message); delErr != nil {
 				glog.Warningf("Clear infra container failed for pod %q: %v", podFullName, delErr)
 			}
 			return err
--- a/pkg/kubelet/dockertools/manager_test.go
+++ b/pkg/kubelet/dockertools/manager_test.go
@@ -403,7 +403,7 @@ func TestKillContainerInPod(t *testing.T) {
 	containerToSpare := &containers[1]
 	fakeDocker.ContainerList = containers

-	if err := manager.KillContainerInPod(kubecontainer.ContainerID{}, &pod.Spec.Containers[0], pod); err != nil {
+	if err := manager.KillContainerInPod(kubecontainer.ContainerID{}, &pod.Spec.Containers[0], pod, "test kill container in pod."); err != nil {
 		t.Errorf("unexpected error: %v", err)
 	}
 	// Assert the container has been stopped.
@@ -468,7 +468,7 @@ func TestKillContainerInPodWithPreStop(t *testing.T) {
 		},
 	}

-	if err := manager.KillContainerInPod(kubecontainer.ContainerID{}, &pod.Spec.Containers[0], pod); err != nil {
+	if err := manager.KillContainerInPod(kubecontainer.ContainerID{}, &pod.Spec.Containers[0], pod, "test kill container with preStop."); err != nil {
 		t.Errorf("unexpected error: %v", err)
 	}
 	// Assert the container has been stopped.
@@ -505,7 +505,7 @@ func TestKillContainerInPodWithError(t *testing.T) {
 	fakeDocker.ContainerList = containers
 	fakeDocker.Errors["stop"] = fmt.Errorf("sample error")

-	if err := manager.KillContainerInPod(kubecontainer.ContainerID{}, &pod.Spec.Containers[0], pod); err == nil {
+	if err := manager.KillContainerInPod(kubecontainer.ContainerID{}, &pod.Spec.Containers[0], pod, "test kill container with error."); err == nil {
 		t.Errorf("expected error, found nil")
 	}
 }
@@ -1565,7 +1565,7 @@ func TestGetRestartCount(t *testing.T) {
 			t.Fatalf("unexpected error %v", err)
 		}
 		containerID := kubecontainer.ParseContainerID(status.ContainerStatuses[0].ContainerID)
-		dm.KillContainerInPod(containerID, &pod.Spec.Containers[0], pod)
+		dm.KillContainerInPod(containerID, &pod.Spec.Containers[0], pod, "test container restart count.")
 	}
 	// Container "bar" starts the first time.
 	// TODO: container lists are expected to be sorted reversely by time.