From d185bfd56a0a3cb215828d5a4b17d67836a0278d Mon Sep 17 00:00:00 2001 From: Yu-Ju Hong Date: Fri, 8 May 2015 11:54:44 -0700 Subject: [PATCH] Record failure reason for image pulling --- pkg/kubelet/dockertools/manager.go | 55 +++++++------- pkg/kubelet/kubelet_test.go | 111 +++++++++++++++++++++++++---- 2 files changed, 130 insertions(+), 36 deletions(-) diff --git a/pkg/kubelet/dockertools/manager.go b/pkg/kubelet/dockertools/manager.go index 147607fc3ef..bac74bff629 100644 --- a/pkg/kubelet/dockertools/manager.go +++ b/pkg/kubelet/dockertools/manager.go @@ -435,8 +435,6 @@ func (dm *DockerManager) GetPodStatus(pod *api.Pod) (*api.PodStatus, error) { containerStatus.LastTerminationState = oldStatus.LastTerminationState } //Check image is ready on the node or not. - // TODO: If we integrate DockerPuller into DockerManager, we can - // record the pull failure and eliminate the image checking below. image := container.Image // TODO(dchen1107): docker/docker/issues/8365 to figure out if the image exists _, err := dm.client.InspectImage(image) @@ -475,19 +473,6 @@ func (dm *DockerManager) GetPodInfraContainer(pod kubecontainer.Pod) (kubecontai return kubecontainer.Container{}, fmt.Errorf("unable to find pod infra container for pod %v", pod.ID) } -func (dm *DockerManager) runContainerRecordErrorReason(pod *api.Pod, container *api.Container, opts *kubecontainer.RunContainerOptions, ref *api.ObjectReference) (string, error) { - dockerID, err := dm.runContainer(pod, container, opts, ref) - if err != nil { - errString := err.Error() - if errString != "" { - dm.reasonCache.Add(pod.UID, container.Name, errString) - } else { - dm.reasonCache.Remove(pod.UID, container.Name) - } - } - return dockerID, err -} - func (dm *DockerManager) runContainer(pod *api.Pod, container *api.Container, opts *kubecontainer.RunContainerOptions, ref *api.ObjectReference) (string, error) { dockerName := KubeletContainerName{ PodFullName: kubecontainer.GetPodFullName(pod), @@ -1100,7 +1085,7 @@ func (dm *DockerManager) RunContainer(pod *api.Pod, container *api.Container, ne return "", err } - id, err := dm.runContainerRecordErrorReason(pod, container, opts, ref) + id, err := dm.runContainer(pod, container, opts, ref) if err != nil { return "", err } @@ -1323,10 +1308,25 @@ func (dm *DockerManager) computePodContainerChanges(pod *api.Pod, runningPod kub }, nil } +// updateReasonCache updates the failure reason based on the latest error. +func (dm *DockerManager) updateReasonCache(pod *api.Pod, container *api.Container, err error) { + if err == nil { + return + } + errString := err.Error() + dm.reasonCache.Add(pod.UID, container.Name, errString) +} + +// clearReasonCache removes the entry in the reason cache. +func (dm *DockerManager) clearReasonCache(pod *api.Pod, container *api.Container) { + dm.reasonCache.Remove(pod.UID, container.Name) +} + // Pull the image for the specified pod and container. func (dm *DockerManager) pullImage(pod *api.Pod, container *api.Container) error { spec := kubecontainer.ImageSpec{container.Image} present, err := dm.IsImagePresent(spec) + if err != nil { ref, err := kubecontainer.GenerateContainerRef(pod, container) if err != nil { @@ -1337,7 +1337,6 @@ func (dm *DockerManager) pullImage(pod *api.Pod, container *api.Container) error } return fmt.Errorf("failed to inspect image %q: %v", container.Image, err) } - if !dm.runtimeHooks.ShouldPullImage(pod, container, present) { return nil } @@ -1399,20 +1398,28 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, pod } // Start everything - for container := range containerChanges.ContainersToStart { - glog.V(4).Infof("Creating container %+v", pod.Spec.Containers[container]) - containerSpec := &pod.Spec.Containers[container] - if err := dm.pullImage(pod, containerSpec); err != nil { - glog.Warningf("Failed to pull image %q from pod %q and container %q: %v", containerSpec.Image, kubecontainer.GetPodFullName(pod), containerSpec.Name, err) + for idx := range containerChanges.ContainersToStart { + container := &pod.Spec.Containers[idx] + glog.V(4).Infof("Creating container %+v", container) + err := dm.pullImage(pod, container) + dm.updateReasonCache(pod, container, err) + if err != nil { + glog.Warningf("Failed to pull image %q from pod %q and container %q: %v", container.Image, kubecontainer.GetPodFullName(pod), container.Name, err) continue } + // TODO(dawnchen): Check RestartPolicy.DelaySeconds before restart a container namespaceMode := fmt.Sprintf("container:%v", podInfraContainerID) - _, err := dm.RunContainer(pod, containerSpec, namespaceMode, namespaceMode) + _, err = dm.RunContainer(pod, container, namespaceMode, namespaceMode) + dm.updateReasonCache(pod, container, err) if err != nil { // TODO(bburns) : Perhaps blacklist a container after N failures? - glog.Errorf("Error running pod %q container %q: %v", kubecontainer.GetPodFullName(pod), containerSpec.Name, err) + glog.Errorf("Error running pod %q container %q: %v", kubecontainer.GetPodFullName(pod), container.Name, err) + continue } + // Successfully started the container; clear the entry in the failure + // reason cache. + dm.clearReasonCache(pod, container) } return nil diff --git a/pkg/kubelet/kubelet_test.go b/pkg/kubelet/kubelet_test.go index f02cb66ee6c..9af7f5b3564 100644 --- a/pkg/kubelet/kubelet_test.go +++ b/pkg/kubelet/kubelet_test.go @@ -4054,38 +4054,125 @@ func TestGetPodStatusWithLastTermination(t *testing.T) { } } -// TODO(vmarmol): Move this test away from using RunContainer(). func TestGetPodCreationFailureReason(t *testing.T) { testKubelet := newTestKubelet(t) + testKubelet.fakeCadvisor.On("MachineInfo").Return(&cadvisorApi.MachineInfo{}, nil) kubelet := testKubelet.kubelet fakeDocker := testKubelet.fakeDocker + waitGroup := testKubelet.waitGroup + + // Inject the creation failure error to docker. failureReason := "creation failure" fakeDocker.Errors = map[string]error{ "create": fmt.Errorf("%s", failureReason), } - fakeDocker.ContainerList = []docker.APIContainers{} + pod := &api.Pod{ ObjectMeta: api.ObjectMeta{ UID: "12345678", - Name: "bar", + Name: "foo", Namespace: "new", }, Spec: api.PodSpec{ - Containers: []api.Container{ - {Name: "foo"}, - }, + Containers: []api.Container{{Name: "bar"}}, }, } + + // Pretend that the pod infra container has already been created, so that + // we can run the user containers. + fakeDocker.ContainerList = []docker.APIContainers{ + { + Names: []string{"/k8s_POD." + strconv.FormatUint(generatePodInfraContainerHash(pod), 16) + "_foo_new_12345678_0"}, + ID: "9876", + }, + } + fakeDocker.ContainerMap = map[string]*docker.Container{ + "9876": { + ID: "9876", + HostConfig: &docker.HostConfig{}, + Config: &docker.Config{}, + }, + } + pods := []*api.Pod{pod} kubelet.podManager.SetPods(pods) kubelet.volumeManager.SetVolumes(pod.UID, kubecontainer.VolumeMap{}) - // TODO: Move this test to dockertools so that we don't have to do the hacky - // type assertion here. - dm := kubelet.containerRuntime.(*dockertools.DockerManager) - _, err := dm.RunContainer(pod, &pod.Spec.Containers[0], "", "") - if err == nil { - t.Errorf("expected error, found nil") + waitGroup.Add(1) + err := kubelet.SyncPods(pods, emptyPodUIDs, map[string]*api.Pod{}, time.Now()) + if err != nil { + t.Errorf("unexpected error: %v", err) } + waitGroup.Wait() + + status, err := kubelet.GetPodStatus(kubecontainer.GetPodFullName(pod)) + if err != nil { + t.Errorf("unexpected error %v", err) + } + if len(status.ContainerStatuses) < 1 { + t.Errorf("expected 1 container status, got %d", len(status.ContainerStatuses)) + } else { + state := status.ContainerStatuses[0].State + if state.Waiting == nil { + t.Errorf("expected waiting state, got %#v", state) + } else if state.Waiting.Reason != failureReason { + t.Errorf("expected reason %q, got %q", failureReason, state.Waiting.Reason) + } + } +} + +func TestGetPodPullImageFailureReason(t *testing.T) { + testKubelet := newTestKubelet(t) + testKubelet.fakeCadvisor.On("MachineInfo").Return(&cadvisorApi.MachineInfo{}, nil) + kubelet := testKubelet.kubelet + fakeDocker := testKubelet.fakeDocker + waitGroup := testKubelet.waitGroup + + // Initialize the FakeDockerPuller so that it'd try to pull non-existent + // images. + dm := kubelet.containerRuntime.(*dockertools.DockerManager) + puller := dm.Puller.(*dockertools.FakeDockerPuller) + puller.HasImages = []string{} + // Inject the pull image failure error. + failureReason := "pull image faiulre" + puller.ErrorsToInject = []error{fmt.Errorf("%s", failureReason)} + + pod := &api.Pod{ + ObjectMeta: api.ObjectMeta{ + UID: "12345678", + Name: "foo", + Namespace: "new", + }, + Spec: api.PodSpec{ + Containers: []api.Container{{Name: "bar", Image: "realImage", ImagePullPolicy: api.PullAlways}}, + }, + } + + // Pretend that the pod infra container has already been created, so that + // we can run the user containers. + fakeDocker.ContainerList = []docker.APIContainers{ + { + Names: []string{"/k8s_POD." + strconv.FormatUint(generatePodInfraContainerHash(pod), 16) + "_foo_new_12345678_0"}, + ID: "9876", + }, + } + fakeDocker.ContainerMap = map[string]*docker.Container{ + "9876": { + ID: "9876", + HostConfig: &docker.HostConfig{}, + Config: &docker.Config{}, + }, + } + + pods := []*api.Pod{pod} + kubelet.podManager.SetPods(pods) + kubelet.volumeManager.SetVolumes(pod.UID, kubecontainer.VolumeMap{}) + waitGroup.Add(1) + err := kubelet.SyncPods(pods, emptyPodUIDs, map[string]*api.Pod{}, time.Now()) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + waitGroup.Wait() + status, err := kubelet.GetPodStatus(kubecontainer.GetPodFullName(pod)) if err != nil { t.Errorf("unexpected error %v", err)