Merge pull request #7981 from yujuhong/reasons

Record failure reasons for image pulling
This commit is contained in:
Victor Marmol 2015-05-08 18:00:46 -07:00
commit 9ded52ac79
2 changed files with 130 additions and 36 deletions

View File

@ -435,8 +435,6 @@ func (dm *DockerManager) GetPodStatus(pod *api.Pod) (*api.PodStatus, error) {
containerStatus.LastTerminationState = oldStatus.LastTerminationState
}
//Check image is ready on the node or not.
// TODO: If we integrate DockerPuller into DockerManager, we can
// record the pull failure and eliminate the image checking below.
image := container.Image
// TODO(dchen1107): docker/docker/issues/8365 to figure out if the image exists
_, err := dm.client.InspectImage(image)
@ -475,19 +473,6 @@ func (dm *DockerManager) GetPodInfraContainer(pod kubecontainer.Pod) (kubecontai
return kubecontainer.Container{}, fmt.Errorf("unable to find pod infra container for pod %v", pod.ID)
}
func (dm *DockerManager) runContainerRecordErrorReason(pod *api.Pod, container *api.Container, opts *kubecontainer.RunContainerOptions, ref *api.ObjectReference) (string, error) {
dockerID, err := dm.runContainer(pod, container, opts, ref)
if err != nil {
errString := err.Error()
if errString != "" {
dm.reasonCache.Add(pod.UID, container.Name, errString)
} else {
dm.reasonCache.Remove(pod.UID, container.Name)
}
}
return dockerID, err
}
func (dm *DockerManager) runContainer(pod *api.Pod, container *api.Container, opts *kubecontainer.RunContainerOptions, ref *api.ObjectReference) (string, error) {
dockerName := KubeletContainerName{
PodFullName: kubecontainer.GetPodFullName(pod),
@ -1100,7 +1085,7 @@ func (dm *DockerManager) RunContainer(pod *api.Pod, container *api.Container, ne
return "", err
}
id, err := dm.runContainerRecordErrorReason(pod, container, opts, ref)
id, err := dm.runContainer(pod, container, opts, ref)
if err != nil {
return "", err
}
@ -1323,10 +1308,25 @@ func (dm *DockerManager) computePodContainerChanges(pod *api.Pod, runningPod kub
}, nil
}
// updateReasonCache updates the failure reason based on the latest error.
func (dm *DockerManager) updateReasonCache(pod *api.Pod, container *api.Container, err error) {
if err == nil {
return
}
errString := err.Error()
dm.reasonCache.Add(pod.UID, container.Name, errString)
}
// clearReasonCache removes the entry in the reason cache.
func (dm *DockerManager) clearReasonCache(pod *api.Pod, container *api.Container) {
dm.reasonCache.Remove(pod.UID, container.Name)
}
// Pull the image for the specified pod and container.
func (dm *DockerManager) pullImage(pod *api.Pod, container *api.Container) error {
spec := kubecontainer.ImageSpec{container.Image}
present, err := dm.IsImagePresent(spec)
if err != nil {
ref, err := kubecontainer.GenerateContainerRef(pod, container)
if err != nil {
@ -1337,7 +1337,6 @@ func (dm *DockerManager) pullImage(pod *api.Pod, container *api.Container) error
}
return fmt.Errorf("failed to inspect image %q: %v", container.Image, err)
}
if !dm.runtimeHooks.ShouldPullImage(pod, container, present) {
return nil
}
@ -1399,20 +1398,28 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, pod
}
// Start everything
for container := range containerChanges.ContainersToStart {
glog.V(4).Infof("Creating container %+v", pod.Spec.Containers[container])
containerSpec := &pod.Spec.Containers[container]
if err := dm.pullImage(pod, containerSpec); err != nil {
glog.Warningf("Failed to pull image %q from pod %q and container %q: %v", containerSpec.Image, kubecontainer.GetPodFullName(pod), containerSpec.Name, err)
for idx := range containerChanges.ContainersToStart {
container := &pod.Spec.Containers[idx]
glog.V(4).Infof("Creating container %+v", container)
err := dm.pullImage(pod, container)
dm.updateReasonCache(pod, container, err)
if err != nil {
glog.Warningf("Failed to pull image %q from pod %q and container %q: %v", container.Image, kubecontainer.GetPodFullName(pod), container.Name, err)
continue
}
// TODO(dawnchen): Check RestartPolicy.DelaySeconds before restart a container
namespaceMode := fmt.Sprintf("container:%v", podInfraContainerID)
_, err := dm.RunContainer(pod, containerSpec, namespaceMode, namespaceMode)
_, err = dm.RunContainer(pod, container, namespaceMode, namespaceMode)
dm.updateReasonCache(pod, container, err)
if err != nil {
// TODO(bburns) : Perhaps blacklist a container after N failures?
glog.Errorf("Error running pod %q container %q: %v", kubecontainer.GetPodFullName(pod), containerSpec.Name, err)
glog.Errorf("Error running pod %q container %q: %v", kubecontainer.GetPodFullName(pod), container.Name, err)
continue
}
// Successfully started the container; clear the entry in the failure
// reason cache.
dm.clearReasonCache(pod, container)
}
return nil

View File

@ -4054,38 +4054,125 @@ func TestGetPodStatusWithLastTermination(t *testing.T) {
}
}
// TODO(vmarmol): Move this test away from using RunContainer().
func TestGetPodCreationFailureReason(t *testing.T) {
testKubelet := newTestKubelet(t)
testKubelet.fakeCadvisor.On("MachineInfo").Return(&cadvisorApi.MachineInfo{}, nil)
kubelet := testKubelet.kubelet
fakeDocker := testKubelet.fakeDocker
waitGroup := testKubelet.waitGroup
// Inject the creation failure error to docker.
failureReason := "creation failure"
fakeDocker.Errors = map[string]error{
"create": fmt.Errorf("%s", failureReason),
}
fakeDocker.ContainerList = []docker.APIContainers{}
pod := &api.Pod{
ObjectMeta: api.ObjectMeta{
UID: "12345678",
Name: "bar",
Name: "foo",
Namespace: "new",
},
Spec: api.PodSpec{
Containers: []api.Container{
{Name: "foo"},
},
Containers: []api.Container{{Name: "bar"}},
},
}
// Pretend that the pod infra container has already been created, so that
// we can run the user containers.
fakeDocker.ContainerList = []docker.APIContainers{
{
Names: []string{"/k8s_POD." + strconv.FormatUint(generatePodInfraContainerHash(pod), 16) + "_foo_new_12345678_0"},
ID: "9876",
},
}
fakeDocker.ContainerMap = map[string]*docker.Container{
"9876": {
ID: "9876",
HostConfig: &docker.HostConfig{},
Config: &docker.Config{},
},
}
pods := []*api.Pod{pod}
kubelet.podManager.SetPods(pods)
kubelet.volumeManager.SetVolumes(pod.UID, kubecontainer.VolumeMap{})
// TODO: Move this test to dockertools so that we don't have to do the hacky
// type assertion here.
dm := kubelet.containerRuntime.(*dockertools.DockerManager)
_, err := dm.RunContainer(pod, &pod.Spec.Containers[0], "", "")
if err == nil {
t.Errorf("expected error, found nil")
waitGroup.Add(1)
err := kubelet.SyncPods(pods, emptyPodUIDs, map[string]*api.Pod{}, time.Now())
if err != nil {
t.Errorf("unexpected error: %v", err)
}
waitGroup.Wait()
status, err := kubelet.GetPodStatus(kubecontainer.GetPodFullName(pod))
if err != nil {
t.Errorf("unexpected error %v", err)
}
if len(status.ContainerStatuses) < 1 {
t.Errorf("expected 1 container status, got %d", len(status.ContainerStatuses))
} else {
state := status.ContainerStatuses[0].State
if state.Waiting == nil {
t.Errorf("expected waiting state, got %#v", state)
} else if state.Waiting.Reason != failureReason {
t.Errorf("expected reason %q, got %q", failureReason, state.Waiting.Reason)
}
}
}
func TestGetPodPullImageFailureReason(t *testing.T) {
testKubelet := newTestKubelet(t)
testKubelet.fakeCadvisor.On("MachineInfo").Return(&cadvisorApi.MachineInfo{}, nil)
kubelet := testKubelet.kubelet
fakeDocker := testKubelet.fakeDocker
waitGroup := testKubelet.waitGroup
// Initialize the FakeDockerPuller so that it'd try to pull non-existent
// images.
dm := kubelet.containerRuntime.(*dockertools.DockerManager)
puller := dm.Puller.(*dockertools.FakeDockerPuller)
puller.HasImages = []string{}
// Inject the pull image failure error.
failureReason := "pull image faiulre"
puller.ErrorsToInject = []error{fmt.Errorf("%s", failureReason)}
pod := &api.Pod{
ObjectMeta: api.ObjectMeta{
UID: "12345678",
Name: "foo",
Namespace: "new",
},
Spec: api.PodSpec{
Containers: []api.Container{{Name: "bar", Image: "realImage", ImagePullPolicy: api.PullAlways}},
},
}
// Pretend that the pod infra container has already been created, so that
// we can run the user containers.
fakeDocker.ContainerList = []docker.APIContainers{
{
Names: []string{"/k8s_POD." + strconv.FormatUint(generatePodInfraContainerHash(pod), 16) + "_foo_new_12345678_0"},
ID: "9876",
},
}
fakeDocker.ContainerMap = map[string]*docker.Container{
"9876": {
ID: "9876",
HostConfig: &docker.HostConfig{},
Config: &docker.Config{},
},
}
pods := []*api.Pod{pod}
kubelet.podManager.SetPods(pods)
kubelet.volumeManager.SetVolumes(pod.UID, kubecontainer.VolumeMap{})
waitGroup.Add(1)
err := kubelet.SyncPods(pods, emptyPodUIDs, map[string]*api.Pod{}, time.Now())
if err != nil {
t.Errorf("unexpected error: %v", err)
}
waitGroup.Wait()
status, err := kubelet.GetPodStatus(kubecontainer.GetPodFullName(pod))
if err != nil {
t.Errorf("unexpected error %v", err)