From 205a8b4574c4d3cf35ac7a430b19a426c835dda3 Mon Sep 17 00:00:00 2001 From: Clayton Coleman Date: Mon, 28 Mar 2016 23:08:54 -0400 Subject: [PATCH] Add init container loading to the kubelet --- pkg/api/resource_helpers.go | 13 +- pkg/kubelet/container/ref.go | 10 + pkg/kubelet/container/sync_result.go | 8 +- pkg/kubelet/dockertools/manager.go | 322 ++++++++++++++++++++---- pkg/kubelet/kubelet.go | 96 +++++-- pkg/kubelet/prober/manager.go | 9 + pkg/kubelet/server/server.go | 7 + pkg/kubelet/status/generate.go | 55 ++++ pkg/kubelet/status/manager.go | 71 +++++- pkg/kubelet/util.go | 5 + test/e2e/framework/util.go | 109 +++++++- test/e2e/pods.go | 358 +++++++++++++++++++++++++++ 12 files changed, 975 insertions(+), 88 deletions(-) diff --git a/pkg/api/resource_helpers.go b/pkg/api/resource_helpers.go index 65bc1b85f08..af1189b4fb3 100644 --- a/pkg/api/resource_helpers.go +++ b/pkg/api/resource_helpers.go @@ -92,6 +92,8 @@ func GetPodReadyCondition(status PodStatus) *PodCondition { return condition } +// GetPodCondition extracts the provided condition from the given status and returns that. +// Returns nil and -1 if the condition is not present, and the the index of the located condition. func GetPodCondition(status *PodStatus, conditionType PodConditionType) (int, *PodCondition) { for i, c := range status.Conditions { if c.Type == conditionType { @@ -131,17 +133,6 @@ func UpdatePodCondition(status *PodStatus, condition *PodCondition) bool { } } -// GetPodCondition extracts the provided condition from the given status and returns that. -// Returns nil if the condition is not present. -func GetPodCondition(status PodStatus, t PodConditionType) *PodCondition { - for i, c := range status.Conditions { - if c.Type == t { - return &status.Conditions[i] - } - } - return nil -} - // IsNodeReady returns true if a node is ready; false otherwise. func IsNodeReady(node *Node) bool { for _, c := range node.Status.Conditions { diff --git a/pkg/kubelet/container/ref.go b/pkg/kubelet/container/ref.go index 55e4d546575..ebfff2ebf70 100644 --- a/pkg/kubelet/container/ref.go +++ b/pkg/kubelet/container/ref.go @@ -57,5 +57,15 @@ func fieldPath(pod *api.Pod, container *api.Container) (string, error) { } } } + for i := range pod.Spec.InitContainers { + here := &pod.Spec.InitContainers[i] + if here.Name == container.Name { + if here.Name == "" { + return fmt.Sprintf("spec.initContainers[%d]", i), nil + } else { + return fmt.Sprintf("spec.initContainers{%s}", here.Name), nil + } + } + } return "", fmt.Errorf("container %#v not found in pod %#v", container, pod) } diff --git a/pkg/kubelet/container/sync_result.go b/pkg/kubelet/container/sync_result.go index 1c3aa9eea90..6a196f602b2 100644 --- a/pkg/kubelet/container/sync_result.go +++ b/pkg/kubelet/container/sync_result.go @@ -50,9 +50,10 @@ var ( ) var ( - ErrRunContainer = errors.New("RunContainerError") - ErrKillContainer = errors.New("KillContainerError") - ErrVerifyNonRoot = errors.New("VerifyNonRootError") + ErrRunContainer = errors.New("RunContainerError") + ErrKillContainer = errors.New("KillContainerError") + ErrVerifyNonRoot = errors.New("VerifyNonRootError") + ErrRunInitContainer = errors.New("RunInitContainerError") ) var ( @@ -69,6 +70,7 @@ const ( KillContainer SyncAction = "KillContainer" SetupNetwork SyncAction = "SetupNetwork" TeardownNetwork SyncAction = "TeardownNetwork" + InitContainer SyncAction = "InitContainer" ) // SyncResult is the result of sync action. diff --git a/pkg/kubelet/dockertools/manager.go b/pkg/kubelet/dockertools/manager.go index e9409818747..10ac9e281f3 100644 --- a/pkg/kubelet/dockertools/manager.go +++ b/pkg/kubelet/dockertools/manager.go @@ -37,6 +37,7 @@ import ( dockernat "github.com/docker/go-connections/nat" "github.com/golang/glog" cadvisorapi "github.com/google/cadvisor/info/v1" + "k8s.io/kubernetes/pkg/api" "k8s.io/kubernetes/pkg/api/unversioned" "k8s.io/kubernetes/pkg/client/record" @@ -57,6 +58,7 @@ import ( "k8s.io/kubernetes/pkg/util/oom" "k8s.io/kubernetes/pkg/util/procfs" utilruntime "k8s.io/kubernetes/pkg/util/runtime" + "k8s.io/kubernetes/pkg/util/sets" utilstrings "k8s.io/kubernetes/pkg/util/strings" ) @@ -876,6 +878,9 @@ func (dm *DockerManager) podInfraContainerChanged(pod *api.Pod, podInfraContaine } else if dm.networkPlugin.Name() != "cni" && dm.networkPlugin.Name() != "kubenet" { // Docker only exports ports from the pod infra container. Let's // collect all of the relevant ports and export them. + for _, container := range pod.Spec.InitContainers { + ports = append(ports, container.Ports...) + } for _, container := range pod.Spec.Containers { ports = append(ports, container.Ports...) } @@ -1179,6 +1184,14 @@ func (dm *DockerManager) killPodWithSyncResult(pod *api.Pod, runningPod kubecont break } } + if containerSpec == nil { + for i, c := range pod.Spec.InitContainers { + if c.Name == container.Name { + containerSpec = &pod.Spec.InitContainers[i] + break + } + } + } } // TODO: Handle this without signaling the pod infra container to @@ -1369,6 +1382,14 @@ func containerAndPodFromLabels(inspect *dockertypes.ContainerJSON) (pod *api.Pod break } } + if container == nil { + for ix := range pod.Spec.InitContainers { + if pod.Spec.InitContainers[ix].Name == name { + container = &pod.Spec.InitContainers[ix] + break + } + } + } if container == nil { err = fmt.Errorf("unable to find container %s in pod %v", name, pod) } @@ -1425,6 +1446,7 @@ func (dm *DockerManager) runContainerInPod(pod *api.Pod, container *api.Containe if err != nil { glog.Errorf("Can't make a ref to pod %v, container %v: '%v'", pod.Name, container.Name, err) } + glog.Infof("Generating ref for container %s: %#v", container.Name, ref) opts, err := dm.runtimeHelper.GenerateRunContainerOptions(pod, container, podIP) if err != nil { @@ -1603,6 +1625,9 @@ func (dm *DockerManager) createPodInfraContainer(pod *api.Pod) (kubecontainer.Do } else { // Docker only exports ports from the pod infra container. Let's // collect all of the relevant ports and export them. + for _, container := range pod.Spec.InitContainers { + ports = append(ports, container.Ports...) + } for _, container := range pod.Spec.Containers { ports = append(ports, container.Ports...) } @@ -1640,13 +1665,16 @@ func (dm *DockerManager) createPodInfraContainer(pod *api.Pod) (kubecontainer.Do // should be kept running. If startInfraContainer is false then it contains an entry for infraContainerId (mapped to -1). // It shouldn't be the case where containersToStart is empty and containersToKeep contains only infraContainerId. In such case // Infra Container should be killed, hence it's removed from this map. -// - all running containers which are NOT contained in containersToKeep should be killed. +// - all init containers are stored in initContainersToKeep +// - all running containers which are NOT contained in containersToKeep and initContainersToKeep should be killed. type podContainerChangesSpec struct { - StartInfraContainer bool - InfraChanged bool - InfraContainerId kubecontainer.DockerID - ContainersToStart map[int]string - ContainersToKeep map[kubecontainer.DockerID]int + StartInfraContainer bool + InfraChanged bool + InfraContainerId kubecontainer.DockerID + InitFailed bool + InitContainersToKeep map[kubecontainer.DockerID]int + ContainersToStart map[int]string + ContainersToKeep map[kubecontainer.DockerID]int } func (dm *DockerManager) computePodContainerChanges(pod *api.Pod, podStatus *kubecontainer.PodStatus) (podContainerChangesSpec, error) { @@ -1683,6 +1711,35 @@ func (dm *DockerManager) computePodContainerChanges(pod *api.Pod, podStatus *kub containersToKeep[podInfraContainerID] = -1 } + // check the status of the init containers + initFailed := false + initContainersToKeep := make(map[kubecontainer.DockerID]int) + // always reset the init containers if the pod is reset + if !createPodInfraContainer { + // keep all successfully completed containers up to and including the first failing container + Containers: + for i, container := range pod.Spec.InitContainers { + containerStatus := podStatus.FindContainerStatusByName(container.Name) + if containerStatus == nil { + continue + } + switch { + case containerStatus == nil: + continue + case containerStatus.State == kubecontainer.ContainerStateRunning: + initContainersToKeep[kubecontainer.DockerID(containerStatus.ID.ID)] = i + case containerStatus.State == kubecontainer.ContainerStateExited: + initContainersToKeep[kubecontainer.DockerID(containerStatus.ID.ID)] = i + // TODO: should we abstract the "did the init container fail" check? + if containerStatus.ExitCode != 0 { + initFailed = true + break Containers + } + } + } + } + + // check the status of the containers for index, container := range pod.Spec.Containers { expectedHash := kubecontainer.HashContainer(&container) @@ -1716,6 +1773,19 @@ func (dm *DockerManager) computePodContainerChanges(pod *api.Pod, podStatus *kub continue } + if initFailed { + // initialization failed and Container exists + // If we have an initialization failure everything will be killed anyway + // If RestartPolicy is Always or OnFailure we restart containers that were running before we + // killed them when re-running initialization + if pod.Spec.RestartPolicy != api.RestartPolicyNever { + message := fmt.Sprintf("Failed to initialize pod. %q will be restarted.", container.Name) + glog.V(1).Info(message) + containersToStart[index] = message + } + continue + } + // At this point, the container is running and pod infra container is good. // We will look for changes and check healthiness for the container. containerChanged := hash != 0 && hash != expectedHash @@ -1743,17 +1813,21 @@ func (dm *DockerManager) computePodContainerChanges(pod *api.Pod, podStatus *kub // (In fact, when createPodInfraContainer is false, containersToKeep will not be touched). // - createPodInfraContainer is false and containersToKeep contains at least ID of Infra Container - // If Infra container is the last running one, we don't want to keep it. + // If Infra container is the last running one, we don't want to keep it, and we don't want to + // keep any init containers. if !createPodInfraContainer && len(containersToStart) == 0 && len(containersToKeep) == 1 { containersToKeep = make(map[kubecontainer.DockerID]int) + initContainersToKeep = make(map[kubecontainer.DockerID]int) } return podContainerChangesSpec{ - StartInfraContainer: createPodInfraContainer, - InfraChanged: changed, - InfraContainerId: podInfraContainerID, - ContainersToStart: containersToStart, - ContainersToKeep: containersToKeep, + StartInfraContainer: createPodInfraContainer, + InfraChanged: changed, + InfraContainerId: podInfraContainerID, + InitFailed: initFailed, + InitContainersToKeep: initContainersToKeep, + ContainersToStart: containersToStart, + ContainersToKeep: containersToKeep, }, nil } @@ -1797,7 +1871,8 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, _ api.PodStatus, podStatus *kubec runningContainerStatues := podStatus.GetRunningContainerStatuses() for _, containerStatus := range runningContainerStatues { _, keep := containerChanges.ContainersToKeep[kubecontainer.DockerID(containerStatus.ID.ID)] - if !keep { + _, keepInit := containerChanges.InitContainersToKeep[kubecontainer.DockerID(containerStatus.ID.ID)] + if !keep && !keepInit { glog.V(3).Infof("Killing unwanted container %q(id=%q) for pod %q", containerStatus.Name, containerStatus.ID, format.Pod(pod)) // attempt to find the appropriate container policy var podContainer *api.Container @@ -1820,6 +1895,9 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, _ api.PodStatus, podStatus *kubec } } + // Keep terminated init containers fairly aggressively controlled + dm.pruneInitContainersBeforeStart(pod, podStatus, containerChanges.InitContainersToKeep) + // We pass the value of the podIP down to runContainerInPod, which in turn // passes it to various other functions, in order to facilitate // functionality that requires this value (hosts file and downward API) @@ -1889,14 +1967,78 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, _ api.PodStatus, podStatus *kubec } } - // Start everything + next, status, done := findActiveInitContainer(pod, podStatus) + if status != nil { + if status.ExitCode != 0 { + // container initialization has failed, flag the pod as failed + initContainerResult := kubecontainer.NewSyncResult(kubecontainer.InitContainer, status.Name) + initContainerResult.Fail(kubecontainer.ErrRunInitContainer, fmt.Sprintf("init container %q exited with %d", status.Name, status.ExitCode)) + result.AddSyncResult(initContainerResult) + if pod.Spec.RestartPolicy == api.RestartPolicyNever { + utilruntime.HandleError(fmt.Errorf("error running pod %q init container %q, restart=Never: %+v", format.Pod(pod), status.Name, status)) + return + } + utilruntime.HandleError(fmt.Errorf("Error running pod %q init container %q, restarting: %+v", format.Pod(pod), status.Name, status)) + } + } + + // Note: when configuring the pod's containers anything that can be configured by pointing + // to the namespace of the infra container should use namespaceMode. This includes things like the net namespace + // and IPC namespace. PID mode cannot point to another container right now. + // See createPodInfraContainer for infra container setup. + namespaceMode := fmt.Sprintf("container:%v", podInfraContainerID) + pidMode := getPidMode(pod) + + if next != nil { + if len(containerChanges.ContainersToStart) == 0 { + glog.V(4).Infof("No containers to start, stopping at init container %+v in pod %v", next.Name, format.Pod(pod)) + return + } + + // If we need to start the next container, do so now then exit + container := next + startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, container.Name) + result.AddSyncResult(startContainerResult) + + // containerChanges.StartInfraContainer causes the containers to be restarted for config reasons + if !containerChanges.StartInfraContainer { + isInBackOff, err, msg := dm.doBackOff(pod, container, podStatus, backOff) + if isInBackOff { + startContainerResult.Fail(err, msg) + glog.V(4).Infof("Backing Off restarting init container %+v in pod %v", container, format.Pod(pod)) + return + } + } + + glog.V(4).Infof("Creating init container %+v in pod %v", container, format.Pod(pod)) + if err, msg := dm.tryContainerStart(container, pod, podStatus, pullSecrets, namespaceMode, pidMode, podIP); err != nil { + startContainerResult.Fail(err, msg) + utilruntime.HandleError(fmt.Errorf("container start failed: %v: %s", err, msg)) + return + } + + // Successfully started the container; clear the entry in the failure + glog.V(4).Infof("Completed init container %q for pod %q", container.Name, format.Pod(pod)) + return + } + if !done { + // init container still running + glog.V(4).Infof("An init container is still running in pod %v", format.Pod(pod)) + return + } + if containerChanges.InitFailed { + // init container still running + glog.V(4).Infof("Not all init containers have succeeded for pod %v", format.Pod(pod)) + return + } + + // Start regular containers for idx := range containerChanges.ContainersToStart { container := &pod.Spec.Containers[idx] startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, container.Name) result.AddSyncResult(startContainerResult) // containerChanges.StartInfraContainer causes the containers to be restarted for config reasons - // ignore backoff if !containerChanges.StartInfraContainer { isInBackOff, err, msg := dm.doBackOff(pod, container, podStatus, backOff) if isInBackOff { @@ -1905,46 +2047,131 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, _ api.PodStatus, podStatus *kubec continue } } + glog.V(4).Infof("Creating container %+v in pod %v", container, format.Pod(pod)) - err, msg := dm.imagePuller.PullImage(pod, container, pullSecrets) - if err != nil { + if err, msg := dm.tryContainerStart(container, pod, podStatus, pullSecrets, namespaceMode, pidMode, podIP); err != nil { startContainerResult.Fail(err, msg) + utilruntime.HandleError(fmt.Errorf("container start failed: %v: %s", err, msg)) continue } - - if container.SecurityContext != nil && container.SecurityContext.RunAsNonRoot != nil && *container.SecurityContext.RunAsNonRoot { - err := dm.verifyNonRoot(container) - if err != nil { - startContainerResult.Fail(kubecontainer.ErrVerifyNonRoot, err.Error()) - glog.Errorf("Error running pod %q container %q: %v", format.Pod(pod), container.Name, err) - continue - } - } - // For a new container, the RestartCount should be 0 - restartCount := 0 - containerStatus := podStatus.FindContainerStatusByName(container.Name) - if containerStatus != nil { - restartCount = containerStatus.RestartCount + 1 - } - - // TODO(dawnchen): Check RestartPolicy.DelaySeconds before restart a container - // Note: when configuring the pod's containers anything that can be configured by pointing - // to the namespace of the infra container should use namespaceMode. This includes things like the net namespace - // and IPC namespace. PID mode cannot point to another container right now. - // See createPodInfraContainer for infra container setup. - namespaceMode := fmt.Sprintf("container:%v", podInfraContainerID) - _, err = dm.runContainerInPod(pod, container, namespaceMode, namespaceMode, getPidMode(pod), podIP, restartCount) - if err != nil { - startContainerResult.Fail(kubecontainer.ErrRunContainer, err.Error()) - // TODO(bburns) : Perhaps blacklist a container after N failures? - glog.Errorf("Error running pod %q container %q: %v", format.Pod(pod), container.Name, err) - continue - } - // Successfully started the container; clear the entry in the failure } return } +// tryContainerStart attempts to pull and start the container, returning an error and a reason string if the start +// was not successful. +func (dm *DockerManager) tryContainerStart(container *api.Container, pod *api.Pod, podStatus *kubecontainer.PodStatus, pullSecrets []api.Secret, namespaceMode, pidMode, podIP string) (err error, reason string) { + err, msg := dm.imagePuller.PullImage(pod, container, pullSecrets) + if err != nil { + return err, msg + } + + if container.SecurityContext != nil && container.SecurityContext.RunAsNonRoot != nil && *container.SecurityContext.RunAsNonRoot { + err := dm.verifyNonRoot(container) + if err != nil { + return kubecontainer.ErrVerifyNonRoot, err.Error() + } + } + + // For a new container, the RestartCount should be 0 + restartCount := 0 + containerStatus := podStatus.FindContainerStatusByName(container.Name) + if containerStatus != nil { + restartCount = containerStatus.RestartCount + 1 + } + + // TODO(dawnchen): Check RestartPolicy.DelaySeconds before restart a container + _, err = dm.runContainerInPod(pod, container, namespaceMode, namespaceMode, pidMode, podIP, restartCount) + if err != nil { + // TODO(bburns) : Perhaps blacklist a container after N failures? + return kubecontainer.ErrRunContainer, err.Error() + } + return nil, "" +} + +// pruneInitContainers ensures that before we begin creating init containers, we have reduced the number +// of outstanding init containers still present. This reduces load on the container garbage collector +// by only preserving the most recent terminated init container. +func (dm *DockerManager) pruneInitContainersBeforeStart(pod *api.Pod, podStatus *kubecontainer.PodStatus, initContainersToKeep map[kubecontainer.DockerID]int) { + // only the last execution of an init container should be preserved, and only preserve it if it is in the + // list of init containers to keep. + initContainerNames := sets.NewString() + for _, container := range pod.Spec.InitContainers { + initContainerNames.Insert(container.Name) + } + for name := range initContainerNames { + count := 0 + for _, status := range podStatus.ContainerStatuses { + if !initContainerNames.Has(status.Name) || status.State != kubecontainer.ContainerStateExited { + continue + } + count++ + // keep the first init container we see + if count == 1 { + continue + } + // if there is a reason to preserve the older container, do so + if _, ok := initContainersToKeep[kubecontainer.DockerID(status.ID.ID)]; ok { + continue + } + + // prune all other init containers that match this container name + // TODO: we may not need aggressive pruning + glog.V(4).Infof("Removing init container %q instance %q %d", status.Name, status.ID.ID, count) + if err := dm.client.RemoveContainer(status.ID.ID, dockertypes.ContainerRemoveOptions{RemoveVolumes: true}); err != nil { + if _, ok := err.(containerNotFoundError); ok { + count-- + continue + } + utilruntime.HandleError(fmt.Errorf("failed to remove pod init container %q: %v; Skipping pod %q", name, err, format.Pod(pod))) + // TODO: report serious errors + continue + } + + // remove any references to this container + if _, ok := dm.containerRefManager.GetRef(status.ID); ok { + dm.containerRefManager.ClearRef(status.ID) + } else { + glog.Warningf("No ref for pod '%q'", pod.Name) + } + } + } +} + +// findActiveInitContainer returns the status of the last failed container, the next init container to +// start, or done if there are no further init containers. Status is only returned if an init container +// failed, in which case next will point to the current container. +func findActiveInitContainer(pod *api.Pod, podStatus *kubecontainer.PodStatus) (next *api.Container, status *kubecontainer.ContainerStatus, done bool) { + if len(pod.Spec.InitContainers) == 0 { + return nil, nil, true + } + + for i := len(pod.Spec.InitContainers) - 1; i >= 0; i-- { + container := &pod.Spec.InitContainers[i] + status := podStatus.FindContainerStatusByName(container.Name) + switch { + case status == nil: + continue + case status.State == kubecontainer.ContainerStateRunning: + return nil, nil, false + case status.State == kubecontainer.ContainerStateExited: + switch { + // the container has failed, we'll have to retry + case status.ExitCode != 0: + return &pod.Spec.InitContainers[i], status, false + // all init containers successful + case i == (len(pod.Spec.InitContainers) - 1): + return nil, nil, true + // all containers up to i successful, go to i+1 + default: + return &pod.Spec.InitContainers[i+1], nil, false + } + } + } + + return &pod.Spec.InitContainers[0], nil, false +} + // verifyNonRoot returns an error if the container or image will run as the root user. func (dm *DockerManager) verifyNonRoot(container *api.Container) error { if securitycontext.HasRunAsUser(container) { @@ -2018,6 +2245,7 @@ func (dm *DockerManager) doBackOff(pod *api.Pod, container *api.Container, podSt } } if cStatus != nil { + glog.Infof("checking backoff for container %q in pod %q", container.Name, pod.Name) ts := cStatus.FinishedAt // found a container that requires backoff dockerName := KubeletContainerName{ diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 82e49538cb6..2163a3189d7 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -1761,6 +1761,7 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error { !firstSeenTime.IsZero() { metrics.PodStartLatency.Observe(metrics.SinceInMicroseconds(firstSeenTime)) } + // Update status in the status manager kl.statusManager.SetPodStatus(pod, apiPodStatus) @@ -2347,6 +2348,10 @@ func hasHostPortConflicts(pods []*api.Pod) bool { glog.Errorf("Pod %q: HostPort is already allocated, ignoring: %v", format.Pod(pod), errs) return true } + if errs := validation.AccumulateUniqueHostPorts(pod.Spec.InitContainers, &ports, field.NewPath("spec", "initContainers")); len(errs) > 0 { + glog.Errorf("Pod %q: HostPort is already allocated, ignoring: %v", format.Pod(pod), errs) + return true + } } return false } @@ -3310,12 +3315,46 @@ func (kl *Kubelet) tryUpdateNodeStatus() error { // This func is exported to simplify integration with 3rd party kubelet // integrations like kubernetes-mesos. func GetPhase(spec *api.PodSpec, info []api.ContainerStatus) api.PodPhase { + initialized := 0 + pendingInitialization := 0 + failedInitialization := 0 + for _, container := range spec.InitContainers { + containerStatus, ok := api.GetContainerStatus(info, container.Name) + if !ok { + pendingInitialization++ + continue + } + + switch { + case containerStatus.State.Running != nil: + pendingInitialization++ + case containerStatus.State.Terminated != nil: + if containerStatus.State.Terminated.ExitCode == 0 { + initialized++ + } else { + failedInitialization++ + } + case containerStatus.State.Waiting != nil: + if containerStatus.LastTerminationState.Terminated != nil { + if containerStatus.LastTerminationState.Terminated.ExitCode == 0 { + initialized++ + } else { + failedInitialization++ + } + } else { + pendingInitialization++ + } + default: + pendingInitialization++ + } + } + + unknown := 0 running := 0 waiting := 0 stopped := 0 failed := 0 succeeded := 0 - unknown := 0 for _, container := range spec.Containers { containerStatus, ok := api.GetContainerStatus(info, container.Name) if !ok { @@ -3344,7 +3383,13 @@ func GetPhase(spec *api.PodSpec, info []api.ContainerStatus) api.PodPhase { } } + if failedInitialization > 0 && spec.RestartPolicy == api.RestartPolicyNever { + return api.PodFailed + } + switch { + case pendingInitialization > 0: + fallthrough case waiting > 0: glog.V(5).Infof("pod waiting > 0, pending") // One or more containers has not been started @@ -3409,8 +3454,10 @@ func (kl *Kubelet) generateAPIPodStatus(pod *api.Pod, podStatus *kubecontainer.P // Assume info is ready to process spec := &pod.Spec - s.Phase = GetPhase(spec, s.ContainerStatuses) + allStatus := append(append([]api.ContainerStatus{}, s.ContainerStatuses...), s.InitContainerStatuses...) + s.Phase = GetPhase(spec, allStatus) kl.probeManager.UpdatePodStatus(pod.UID, s) + s.Conditions = append(s.Conditions, status.GeneratePodInitializedCondition(spec, s.InitContainerStatuses, s.Phase)) s.Conditions = append(s.Conditions, status.GeneratePodReadyCondition(spec, s.ContainerStatuses, s.Phase)) // s (the PodStatus we are creating) will not have a PodScheduled condition yet, because converStatusToAPIStatus() // does not create one. If the existing PodStatus has a PodScheduled condition, then copy it into s and make sure @@ -3443,9 +3490,27 @@ func (kl *Kubelet) generateAPIPodStatus(pod *api.Pod, podStatus *kubecontainer.P // alter the kubelet state at all. func (kl *Kubelet) convertStatusToAPIStatus(pod *api.Pod, podStatus *kubecontainer.PodStatus) *api.PodStatus { var apiPodStatus api.PodStatus - uid := pod.UID apiPodStatus.PodIP = podStatus.IP + apiPodStatus.ContainerStatuses = kl.convertToAPIContainerStatuses( + pod, podStatus, + pod.Status.ContainerStatuses, + pod.Spec.Containers, + len(pod.Spec.InitContainers) > 0, + false, + ) + apiPodStatus.InitContainerStatuses = kl.convertToAPIContainerStatuses( + pod, podStatus, + pod.Status.InitContainerStatuses, + pod.Spec.InitContainers, + len(pod.Spec.InitContainers) > 0, + true, + ) + + return &apiPodStatus +} + +func (kl *Kubelet) convertToAPIContainerStatuses(pod *api.Pod, podStatus *kubecontainer.PodStatus, previousStatus []api.ContainerStatus, containers []api.Container, hasInitContainers, isInitContainer bool) []api.ContainerStatus { convertContainerStatus := func(cs *kubecontainer.ContainerStatus) *api.ContainerStatus { cid := cs.ID.String() status := &api.ContainerStatus{ @@ -3474,15 +3539,19 @@ func (kl *Kubelet) convertStatusToAPIStatus(pod *api.Pod, podStatus *kubecontain } // Fetch old containers statuses from old pod status. - oldStatuses := make(map[string]api.ContainerStatus, len(pod.Spec.Containers)) - for _, status := range pod.Status.ContainerStatuses { + oldStatuses := make(map[string]api.ContainerStatus, len(containers)) + for _, status := range previousStatus { oldStatuses[status.Name] = status } // Set all container statuses to default waiting state - statuses := make(map[string]*api.ContainerStatus, len(pod.Spec.Containers)) + statuses := make(map[string]*api.ContainerStatus, len(containers)) defaultWaitingState := api.ContainerState{Waiting: &api.ContainerStateWaiting{Reason: "ContainerCreating"}} - for _, container := range pod.Spec.Containers { + if hasInitContainers { + defaultWaitingState = api.ContainerState{Waiting: &api.ContainerStateWaiting{Reason: "PodInitializing"}} + } + + for _, container := range containers { status := &api.ContainerStatus{ Name: container.Name, Image: container.Image, @@ -3498,7 +3567,6 @@ func (kl *Kubelet) convertStatusToAPIStatus(pod *api.Pod, podStatus *kubecontain // Make the latest container status comes first. sort.Sort(sort.Reverse(kubecontainer.SortContainerStatusesByCreationTime(podStatus.ContainerStatuses))) - // Set container statuses according to the statuses seen in pod status containerSeen := map[string]int{} for _, cStatus := range podStatus.ContainerStatuses { @@ -3520,13 +3588,13 @@ func (kl *Kubelet) convertStatusToAPIStatus(pod *api.Pod, podStatus *kubecontain } // Handle the containers failed to be started, which should be in Waiting state. - for _, container := range pod.Spec.Containers { + for _, container := range containers { // If a container should be restarted in next syncpod, it is *Waiting*. if !kubecontainer.ShouldContainerBeRestarted(&container, pod, podStatus) { continue } status := statuses[container.Name] - reason, message, ok := kl.reasonCache.Get(uid, container.Name) + reason, message, ok := kl.reasonCache.Get(pod.UID, container.Name) if !ok { // In fact, we could also apply Waiting state here, but it is less informative, // and the container will be restarted soon, so we prefer the original state here. @@ -3548,15 +3616,15 @@ func (kl *Kubelet) convertStatusToAPIStatus(pod *api.Pod, podStatus *kubecontain statuses[container.Name] = status } - apiPodStatus.ContainerStatuses = make([]api.ContainerStatus, 0) + var containerStatuses []api.ContainerStatus for _, status := range statuses { - apiPodStatus.ContainerStatuses = append(apiPodStatus.ContainerStatuses, *status) + containerStatuses = append(containerStatuses, *status) } // Sort the container statuses since clients of this interface expect the list // of containers in a pod has a deterministic order. - sort.Sort(kubetypes.SortedContainerStatuses(apiPodStatus.ContainerStatuses)) - return &apiPodStatus + sort.Sort(kubetypes.SortedContainerStatuses(containerStatuses)) + return containerStatuses } // Returns logs of current machine. diff --git a/pkg/kubelet/prober/manager.go b/pkg/kubelet/prober/manager.go index 9e46f0be3f7..01218a75df1 100644 --- a/pkg/kubelet/prober/manager.go +++ b/pkg/kubelet/prober/manager.go @@ -207,6 +207,15 @@ func (m *manager) UpdatePodStatus(podUID types.UID, podStatus *api.PodStatus) { } podStatus.ContainerStatuses[i].Ready = ready } + // init containers are ready if they have exited with success or if a readiness probe has + // succeeded. + for i, c := range podStatus.InitContainerStatuses { + var ready bool + if c.State.Terminated != nil && c.State.Terminated.ExitCode == 0 { + ready = true + } + podStatus.InitContainerStatuses[i].Ready = ready + } } func (m *manager) getWorker(podUID types.UID, containerName string, probeType probeType) (*worker, bool) { diff --git a/pkg/kubelet/server/server.go b/pkg/kubelet/server/server.go index 163ecf57b64..e8b06be494f 100644 --- a/pkg/kubelet/server/server.go +++ b/pkg/kubelet/server/server.go @@ -465,6 +465,13 @@ func (s *Server) getContainerLogs(request *restful.Request, response *restful.Re containerExists = true } } + if !containerExists { + for _, container := range pod.Spec.InitContainers { + if container.Name == containerName { + containerExists = true + } + } + } if !containerExists { response.WriteError(http.StatusNotFound, fmt.Errorf("container %q not found in pod %q\n", containerName, podID)) return diff --git a/pkg/kubelet/status/generate.go b/pkg/kubelet/status/generate.go index 05d845470b4..cc000929a07 100644 --- a/pkg/kubelet/status/generate.go +++ b/pkg/kubelet/status/generate.go @@ -77,3 +77,58 @@ func GeneratePodReadyCondition(spec *api.PodSpec, containerStatuses []api.Contai Status: api.ConditionTrue, } } + +// GeneratePodInitializedCondition returns initialized condition if all init containers in a pod are ready, else it +// returns an uninitialized condition. +func GeneratePodInitializedCondition(spec *api.PodSpec, containerStatuses []api.ContainerStatus, podPhase api.PodPhase) api.PodCondition { + // Find if all containers are ready or not. + if containerStatuses == nil && len(spec.InitContainers) > 0 { + return api.PodCondition{ + Type: api.PodInitialized, + Status: api.ConditionFalse, + Reason: "UnknownContainerStatuses", + } + } + unknownContainers := []string{} + unreadyContainers := []string{} + for _, container := range spec.InitContainers { + if containerStatus, ok := api.GetContainerStatus(containerStatuses, container.Name); ok { + if !containerStatus.Ready { + unreadyContainers = append(unreadyContainers, container.Name) + } + } else { + unknownContainers = append(unknownContainers, container.Name) + } + } + + // If all init containers are known and succeeded, just return PodCompleted. + if podPhase == api.PodSucceeded && len(unknownContainers) == 0 { + return api.PodCondition{ + Type: api.PodInitialized, + Status: api.ConditionTrue, + Reason: "PodCompleted", + } + } + + unreadyMessages := []string{} + if len(unknownContainers) > 0 { + unreadyMessages = append(unreadyMessages, fmt.Sprintf("containers with unknown status: %s", unknownContainers)) + } + if len(unreadyContainers) > 0 { + unreadyMessages = append(unreadyMessages, fmt.Sprintf("containers with incomplete status: %s", unreadyContainers)) + } + unreadyMessage := strings.Join(unreadyMessages, ", ") + if unreadyMessage != "" { + return api.PodCondition{ + Type: api.PodInitialized, + Status: api.ConditionFalse, + Reason: "ContainersNotInitialized", + Message: unreadyMessage, + } + } + + return api.PodCondition{ + Type: api.PodInitialized, + Status: api.ConditionTrue, + } +} diff --git a/pkg/kubelet/status/manager.go b/pkg/kubelet/status/manager.go index 815cc78091e..c9b5b1e4e2c 100644 --- a/pkg/kubelet/status/manager.go +++ b/pkg/kubelet/status/manager.go @@ -172,20 +172,14 @@ func (m *manager) SetContainerReadiness(podUID types.UID, containerID kubecontai } // Find the container to update. - containerIndex := -1 - for i, c := range oldStatus.status.ContainerStatuses { - if c.ContainerID == containerID.String() { - containerIndex = i - break - } - } - if containerIndex == -1 { + containerStatus, _, ok := findContainerStatus(&oldStatus.status, containerID.String()) + if !ok { glog.Warningf("Container readiness changed for unknown container: %q - %q", format.Pod(pod), containerID.String()) return } - if oldStatus.status.ContainerStatuses[containerIndex].Ready == ready { + if containerStatus.Ready == ready { glog.V(4).Infof("Container readiness unchanged (%v): %q - %q", ready, format.Pod(pod), containerID.String()) return @@ -196,7 +190,8 @@ func (m *manager) SetContainerReadiness(podUID types.UID, containerID kubecontai if err != nil { return } - status.ContainerStatuses[containerIndex].Ready = ready + containerStatus, _, _ = findContainerStatus(&status, containerID.String()) + containerStatus.Ready = ready // Update pod condition. readyConditionIndex := -1 @@ -217,6 +212,31 @@ func (m *manager) SetContainerReadiness(podUID types.UID, containerID kubecontai m.updateStatusInternal(pod, status, false) } +func findContainerStatus(status *api.PodStatus, containerID string) (containerStatus *api.ContainerStatus, init bool, ok bool) { + // Find the container to update. + containerIndex := -1 + for i, c := range status.ContainerStatuses { + if c.ContainerID == containerID { + containerIndex = i + break + } + } + if containerIndex != -1 { + return &status.ContainerStatuses[containerIndex], false, true + } + + for i, c := range status.InitContainerStatuses { + if c.ContainerID == containerID { + containerIndex = i + break + } + } + if containerIndex != -1 { + return &status.InitContainerStatuses[containerIndex], true, true + } + return nil, false, false +} + func (m *manager) TerminatePod(pod *api.Pod) { m.podStatusesLock.Lock() defer m.podStatusesLock.Unlock() @@ -233,6 +253,11 @@ func (m *manager) TerminatePod(pod *api.Pod) { Terminated: &api.ContainerStateTerminated{}, } } + for i := range status.InitContainerStatuses { + status.InitContainerStatuses[i].State = api.ContainerState{ + Terminated: &api.ContainerStateTerminated{}, + } + } m.updateStatusInternal(pod, pod.Status, true) } @@ -251,16 +276,27 @@ func (m *manager) updateStatusInternal(pod *api.Pod, status api.PodStatus, force } // Set ReadyCondition.LastTransitionTime. - if readyCondition := api.GetPodReadyCondition(status); readyCondition != nil { + if _, readyCondition := api.GetPodCondition(&status, api.PodReady); readyCondition != nil { // Need to set LastTransitionTime. lastTransitionTime := unversioned.Now() - oldReadyCondition := api.GetPodReadyCondition(oldStatus) + _, oldReadyCondition := api.GetPodCondition(&oldStatus, api.PodReady) if oldReadyCondition != nil && readyCondition.Status == oldReadyCondition.Status { lastTransitionTime = oldReadyCondition.LastTransitionTime } readyCondition.LastTransitionTime = lastTransitionTime } + // Set InitializedCondition.LastTransitionTime. + if _, initCondition := api.GetPodCondition(&status, api.PodInitialized); initCondition != nil { + // Need to set LastTransitionTime. + lastTransitionTime := unversioned.Now() + _, oldInitCondition := api.GetPodCondition(&oldStatus, api.PodInitialized) + if oldInitCondition != nil && initCondition.Status == oldInitCondition.Status { + lastTransitionTime = oldInitCondition.LastTransitionTime + } + initCondition.LastTransitionTime = lastTransitionTime + } + // ensure that the start time does not change across updates. if oldStatus.StartTime != nil && !oldStatus.StartTime.IsZero() { status.StartTime = oldStatus.StartTime @@ -490,6 +526,8 @@ func normalizeStatus(status *api.PodStatus) *api.PodStatus { normalizeTimeStamp(&condition.LastProbeTime) normalizeTimeStamp(&condition.LastTransitionTime) } + + // update container statuses for i := range status.ContainerStatuses { cstatus := &status.ContainerStatuses[i] normalizeContainerState(&cstatus.State) @@ -497,6 +535,15 @@ func normalizeStatus(status *api.PodStatus) *api.PodStatus { } // Sort the container statuses, so that the order won't affect the result of comparison sort.Sort(kubetypes.SortedContainerStatuses(status.ContainerStatuses)) + + // update init container statuses + for i := range status.InitContainerStatuses { + cstatus := &status.InitContainerStatuses[i] + normalizeContainerState(&cstatus.State) + normalizeContainerState(&cstatus.LastTerminationState) + } + // Sort the container statuses, so that the order won't affect the result of comparison + sort.Sort(kubetypes.SortedContainerStatuses(status.InitContainerStatuses)) return status } diff --git a/pkg/kubelet/util.go b/pkg/kubelet/util.go index a06a57ce566..ae2d94bfa12 100644 --- a/pkg/kubelet/util.go +++ b/pkg/kubelet/util.go @@ -63,6 +63,11 @@ func canRunPod(pod *api.Pod) error { return fmt.Errorf("pod with UID %q specified privileged container, but is disallowed", pod.UID) } } + for _, container := range pod.Spec.InitContainers { + if securitycontext.HasPrivilegedRequest(&container) { + return fmt.Errorf("pod with UID %q specified privileged container, but is disallowed", pod.UID) + } + } } return nil } diff --git a/test/e2e/framework/util.go b/test/e2e/framework/util.go index 8a1b6348514..e137dbd0457 100644 --- a/test/e2e/framework/util.go +++ b/test/e2e/framework/util.go @@ -879,6 +879,109 @@ func deleteNS(c *client.Client, namespace string, timeout time.Duration) error { return nil } +func ContainerInitInvariant(older, newer runtime.Object) error { + oldPod := older.(*api.Pod) + newPod := newer.(*api.Pod) + if len(oldPod.Spec.InitContainers) == 0 { + return nil + } + if len(oldPod.Spec.InitContainers) != len(newPod.Spec.InitContainers) { + return fmt.Errorf("init container list changed") + } + if oldPod.UID != newPod.UID { + return fmt.Errorf("two different pods exist in the condition: %s vs %s", oldPod.UID, newPod.UID) + } + if err := initContainersInvariants(oldPod); err != nil { + return err + } + if err := initContainersInvariants(newPod); err != nil { + return err + } + oldInit, _, _ := podInitialized(oldPod) + newInit, _, _ := podInitialized(newPod) + if oldInit && !newInit { + // TODO: we may in the future enable resetting PodInitialized = false if the kubelet needs to restart it + // from scratch + return fmt.Errorf("pod cannot be initialized and then regress to not being initialized") + } + return nil +} + +func podInitialized(pod *api.Pod) (ok bool, failed bool, err error) { + allInit := true + initFailed := false + for _, s := range pod.Status.InitContainerStatuses { + switch { + case initFailed && s.State.Waiting == nil: + return allInit, initFailed, fmt.Errorf("container %s is after a failed container but isn't waiting", s.Name) + case allInit && s.State.Waiting == nil: + return allInit, initFailed, fmt.Errorf("container %s is after an initializing container but isn't waiting", s.Name) + case s.State.Terminated == nil: + allInit = false + case s.State.Terminated.ExitCode != 0: + allInit = false + initFailed = true + case !s.Ready: + return allInit, initFailed, fmt.Errorf("container %s initialized but isn't marked as ready", s.Name) + } + } + return allInit, initFailed, nil +} + +func initContainersInvariants(pod *api.Pod) error { + allInit, initFailed, err := podInitialized(pod) + if err != nil { + return err + } + if !allInit || initFailed { + for _, s := range pod.Status.ContainerStatuses { + if s.State.Waiting == nil || s.RestartCount != 0 { + return fmt.Errorf("container %s is not waiting but initialization not complete", s.Name) + } + if s.State.Waiting.Reason != "PodInitializing" { + return fmt.Errorf("container %s should have reason PodInitializing: %s", s.Name, s.State.Waiting.Reason) + } + } + } + _, c := api.GetPodCondition(&pod.Status, api.PodInitialized) + if c == nil { + return fmt.Errorf("pod does not have initialized condition") + } + if c.LastTransitionTime.IsZero() { + return fmt.Errorf("PodInitialized condition should always have a transition time") + } + switch { + case c.Status == api.ConditionUnknown: + return fmt.Errorf("PodInitialized condition should never be Unknown") + case c.Status == api.ConditionTrue && (initFailed || !allInit): + return fmt.Errorf("PodInitialized condition was True but all not all containers initialized") + case c.Status == api.ConditionFalse && (!initFailed && allInit): + return fmt.Errorf("PodInitialized condition was False but all containers initialized") + } + return nil +} + +type InvariantFunc func(older, newer runtime.Object) error + +func CheckInvariants(events []watch.Event, fns ...InvariantFunc) error { + errs := sets.NewString() + for i := range events { + j := i + 1 + if j >= len(events) { + continue + } + for _, fn := range fns { + if err := fn(events[i].Object, events[j].Object); err != nil { + errs.Insert(err.Error()) + } + } + } + if errs.Len() > 0 { + return fmt.Errorf("invariants violated:\n* %s", strings.Join(errs.List(), "\n* ")) + } + return nil +} + // Waits default amount of time (PodStartTimeout) for the specified pod to become running. // Returns an error if timeout occurs first, or pod goes in to failed state. func WaitForPodRunningInNamespace(c *client.Client, podName string, namespace string) error { @@ -2218,7 +2321,11 @@ func DumpNodeDebugInfo(c *client.Client, nodeNames []string) { continue } for _, p := range podList.Items { - Logf("%v started at %v (%d container statuses recorded)", p.Name, p.Status.StartTime, len(p.Status.ContainerStatuses)) + Logf("%v started at %v (%d+%d container statuses recorded)", p.Name, p.Status.StartTime, len(p.Status.InitContainerStatuses), len(p.Status.ContainerStatuses)) + for _, c := range p.Status.InitContainerStatuses { + Logf("\tInit container %v ready: %v, restart count %v", + c.Name, c.Ready, c.RestartCount) + } for _, c := range p.Status.ContainerStatuses { Logf("\tContainer %v ready: %v, restart count %v", c.Name, c.Ready, c.RestartCount) diff --git a/test/e2e/pods.go b/test/e2e/pods.go index 47f5868b80a..da3d0821e46 100644 --- a/test/e2e/pods.go +++ b/test/e2e/pods.go @@ -659,6 +659,364 @@ var _ = framework.KubeDescribe("Pods", func() { }) }) + It("should invoke init containers on a RestartNever pod", func() { + podClient := f.Client.Pods(f.Namespace.Name) + + By("creating the pod") + name := "pod-init-" + string(util.NewUUID()) + value := strconv.Itoa(time.Now().Nanosecond()) + pod := &api.Pod{ + ObjectMeta: api.ObjectMeta{ + Name: name, + Labels: map[string]string{ + "name": "foo", + "time": value, + }, + }, + Spec: api.PodSpec{ + RestartPolicy: api.RestartPolicyNever, + InitContainers: []api.Container{ + { + Name: "init1", + Image: "gcr.io/google_containers/busybox:1.24", + Command: []string{"/bin/true"}, + }, + { + Name: "init2", + Image: "gcr.io/google_containers/busybox:1.24", + Command: []string{"/bin/true"}, + }, + }, + Containers: []api.Container{ + { + Name: "run1", + Image: "gcr.io/google_containers/busybox:1.24", + Command: []string{"/bin/true"}, + }, + }, + }, + } + defer podClient.Delete(pod.Name, nil) + startedPod, err := podClient.Create(pod) + if err != nil { + framework.Failf("Error creating a pod: %v", err) + } + w, err := podClient.Watch(api.SingleObject(startedPod.ObjectMeta)) + if err != nil { + framework.Failf("Error watching a pod: %v", err) + } + wr := watch.NewRecorder(w) + event, err := watch.Until(framework.PodStartTimeout, wr, client.PodCompleted) + Expect(err).To(BeNil()) + framework.CheckInvariants(wr.Events(), framework.ContainerInitInvariant) + endPod := event.Object.(*api.Pod) + + Expect(endPod.Status.Phase).To(Equal(api.PodSucceeded)) + _, init := api.GetPodCondition(&endPod.Status, api.PodInitialized) + Expect(init).NotTo(BeNil()) + Expect(init.Status).To(Equal(api.ConditionTrue)) + + Expect(len(endPod.Status.InitContainerStatuses)).To(Equal(2)) + for _, status := range endPod.Status.InitContainerStatuses { + Expect(status.Ready).To(BeTrue()) + Expect(status.State.Terminated).NotTo(BeNil()) + Expect(status.State.Terminated.ExitCode).To(BeZero()) + } + }) + + It("should invoke init containers on a RestartAlways pod", func() { + podClient := f.Client.Pods(f.Namespace.Name) + + By("creating the pod") + name := "pod-init-" + string(util.NewUUID()) + value := strconv.Itoa(time.Now().Nanosecond()) + pod := &api.Pod{ + ObjectMeta: api.ObjectMeta{ + Name: name, + Labels: map[string]string{ + "name": "foo", + "time": value, + }, + }, + Spec: api.PodSpec{ + InitContainers: []api.Container{ + { + Name: "init1", + Image: "gcr.io/google_containers/busybox:1.24", + Command: []string{"/bin/true"}, + }, + { + Name: "init2", + Image: "gcr.io/google_containers/busybox:1.24", + Command: []string{"/bin/true"}, + }, + }, + Containers: []api.Container{ + { + Name: "run1", + Image: "gcr.io/google_containers/pause:2.0", + Resources: api.ResourceRequirements{ + Limits: api.ResourceList{ + api.ResourceCPU: *resource.NewMilliQuantity(100, resource.DecimalSI), + api.ResourceMemory: *resource.NewQuantity(10*1024*1024, resource.DecimalSI), + }, + }, + }, + }, + }, + } + defer podClient.Delete(pod.Name, nil) + startedPod, err := podClient.Create(pod) + if err != nil { + framework.Failf("Error creating a pod: %v", err) + } + w, err := podClient.Watch(api.SingleObject(startedPod.ObjectMeta)) + if err != nil { + framework.Failf("Error watching a pod: %v", err) + } + wr := watch.NewRecorder(w) + event, err := watch.Until(framework.PodStartTimeout, wr, client.PodRunning) + Expect(err).To(BeNil()) + framework.CheckInvariants(wr.Events(), framework.ContainerInitInvariant) + endPod := event.Object.(*api.Pod) + + Expect(endPod.Status.Phase).To(Equal(api.PodRunning)) + _, init := api.GetPodCondition(&endPod.Status, api.PodInitialized) + Expect(init).NotTo(BeNil()) + Expect(init.Status).To(Equal(api.ConditionTrue)) + + Expect(len(endPod.Status.InitContainerStatuses)).To(Equal(2)) + for _, status := range endPod.Status.InitContainerStatuses { + Expect(status.Ready).To(BeTrue()) + Expect(status.State.Terminated).NotTo(BeNil()) + Expect(status.State.Terminated.ExitCode).To(BeZero()) + } + }) + + It("should not start app containers if init containers fail on a RestartAlways pod", func() { + podClient := f.Client.Pods(f.Namespace.Name) + + By("creating the pod") + name := "pod-init-" + string(util.NewUUID()) + value := strconv.Itoa(time.Now().Nanosecond()) + pod := &api.Pod{ + ObjectMeta: api.ObjectMeta{ + Name: name, + Labels: map[string]string{ + "name": "foo", + "time": value, + }, + }, + Spec: api.PodSpec{ + InitContainers: []api.Container{ + { + Name: "init1", + Image: "gcr.io/google_containers/busybox:1.24", + Command: []string{"/bin/false"}, + }, + { + Name: "init2", + Image: "gcr.io/google_containers/busybox:1.24", + Command: []string{"/bin/true"}, + }, + }, + Containers: []api.Container{ + { + Name: "run1", + Image: "gcr.io/google_containers/pause:2.0", + Resources: api.ResourceRequirements{ + Limits: api.ResourceList{ + api.ResourceCPU: *resource.NewMilliQuantity(100, resource.DecimalSI), + api.ResourceMemory: *resource.NewQuantity(10*1024*1024, resource.DecimalSI), + }, + }, + }, + }, + }, + } + defer podClient.Delete(pod.Name, nil) + startedPod, err := podClient.Create(pod) + if err != nil { + framework.Failf("Error creating a pod: %v", err) + } + w, err := podClient.Watch(api.SingleObject(startedPod.ObjectMeta)) + if err != nil { + framework.Failf("Error watching a pod: %v", err) + } + + wr := watch.NewRecorder(w) + event, err := watch.Until( + framework.PodStartTimeout, wr, + // check for the first container to fail at least once + func(evt watch.Event) (bool, error) { + switch t := evt.Object.(type) { + case *api.Pod: + for _, status := range t.Status.ContainerStatuses { + if status.State.Waiting == nil { + return false, fmt.Errorf("container %q should not be out of waiting: %#v", status.Name, status) + } + if status.State.Waiting.Reason != "PodInitializing" { + return false, fmt.Errorf("container %q should have reason PodInitializing: %#v", status.Name, status) + } + } + if len(t.Status.InitContainerStatuses) != 2 { + return false, nil + } + status := t.Status.InitContainerStatuses[1] + if status.State.Waiting == nil { + return false, fmt.Errorf("second init container should not be out of waiting: %#v", status) + } + if status.State.Waiting.Reason != "PodInitializing" { + return false, fmt.Errorf("second init container should have reason PodInitializing: %#v", status) + } + status = t.Status.InitContainerStatuses[0] + if status.State.Terminated != nil && status.State.Terminated.ExitCode == 0 { + return false, fmt.Errorf("first init container should have exitCode != 0: %#v", status) + } + // continue until we see an attempt to restart the pod + return status.LastTerminationState.Terminated != nil, nil + default: + return false, fmt.Errorf("unexpected object: %#v", t) + } + }, + // verify we get two restarts + func(evt watch.Event) (bool, error) { + switch t := evt.Object.(type) { + case *api.Pod: + status := t.Status.InitContainerStatuses[0] + if status.RestartCount < 3 { + return false, nil + } + framework.Logf("init container has failed twice: %#v", t) + // TODO: more conditions + return true, nil + default: + return false, fmt.Errorf("unexpected object: %#v", t) + } + }, + ) + Expect(err).To(BeNil()) + framework.CheckInvariants(wr.Events(), framework.ContainerInitInvariant) + endPod := event.Object.(*api.Pod) + + Expect(endPod.Status.Phase).To(Equal(api.PodPending)) + _, init := api.GetPodCondition(&endPod.Status, api.PodInitialized) + Expect(init).NotTo(BeNil()) + Expect(init.Status).To(Equal(api.ConditionFalse)) + Expect(init.Reason).To(Equal("ContainersNotInitialized")) + Expect(init.Message).To(Equal("containers with incomplete status: [init1 init2]")) + Expect(len(endPod.Status.InitContainerStatuses)).To(Equal(2)) + }) + + It("should not start app containers and fail the pod if init containers fail on a RestartNever pod", func() { + podClient := f.Client.Pods(f.Namespace.Name) + + By("creating the pod") + name := "pod-init-" + string(util.NewUUID()) + value := strconv.Itoa(time.Now().Nanosecond()) + pod := &api.Pod{ + ObjectMeta: api.ObjectMeta{ + Name: name, + Labels: map[string]string{ + "name": "foo", + "time": value, + }, + }, + Spec: api.PodSpec{ + RestartPolicy: api.RestartPolicyNever, + InitContainers: []api.Container{ + { + Name: "init1", + Image: "gcr.io/google_containers/busybox:1.24", + Command: []string{"/bin/true"}, + }, + { + Name: "init2", + Image: "gcr.io/google_containers/busybox:1.24", + Command: []string{"/bin/false"}, + }, + }, + Containers: []api.Container{ + { + Name: "run1", + Image: "gcr.io/google_containers/busybox:1.24", + Command: []string{"/bin/true"}, + Resources: api.ResourceRequirements{ + Limits: api.ResourceList{ + api.ResourceCPU: *resource.NewMilliQuantity(100, resource.DecimalSI), + api.ResourceMemory: *resource.NewQuantity(10*1024*1024, resource.DecimalSI), + }, + }, + }, + }, + }, + } + defer podClient.Delete(pod.Name, nil) + startedPod, err := podClient.Create(pod) + if err != nil { + framework.Failf("Error creating a pod: %v", err) + } + w, err := podClient.Watch(api.SingleObject(startedPod.ObjectMeta)) + if err != nil { + framework.Failf("Error watching a pod: %v", err) + } + + wr := watch.NewRecorder(w) + event, err := watch.Until( + framework.PodStartTimeout, wr, + // check for the second container to fail at least once + func(evt watch.Event) (bool, error) { + switch t := evt.Object.(type) { + case *api.Pod: + for _, status := range t.Status.ContainerStatuses { + if status.State.Waiting == nil { + return false, fmt.Errorf("container %q should not be out of waiting: %#v", status.Name, status) + } + if status.State.Waiting.Reason != "PodInitializing" { + return false, fmt.Errorf("container %q should have reason PodInitializing: %#v", status.Name, status) + } + } + if len(t.Status.InitContainerStatuses) != 2 { + return false, nil + } + status := t.Status.InitContainerStatuses[0] + if status.State.Terminated == nil { + if status.State.Waiting != nil && status.State.Waiting.Reason != "PodInitializing" { + return false, fmt.Errorf("second init container should have reason PodInitializing: %#v", status) + } + return false, nil + } + if status.State.Terminated != nil && status.State.Terminated.ExitCode != 0 { + return false, fmt.Errorf("first init container should have exitCode != 0: %#v", status) + } + status = t.Status.InitContainerStatuses[1] + if status.State.Terminated == nil { + return false, nil + } + if status.State.Terminated.ExitCode == 0 { + return false, fmt.Errorf("second init container should have failed: %#v", status) + } + return true, nil + default: + return false, fmt.Errorf("unexpected object: %#v", t) + } + }, + client.PodCompleted, + ) + Expect(err).To(BeNil()) + framework.CheckInvariants(wr.Events(), framework.ContainerInitInvariant) + endPod := event.Object.(*api.Pod) + + Expect(endPod.Status.Phase).To(Equal(api.PodFailed)) + _, init := api.GetPodCondition(&endPod.Status, api.PodInitialized) + Expect(init).NotTo(BeNil()) + Expect(init.Status).To(Equal(api.ConditionFalse)) + Expect(init.Reason).To(Equal("ContainersNotInitialized")) + Expect(init.Message).To(Equal("containers with incomplete status: [init2]")) + Expect(len(endPod.Status.InitContainerStatuses)).To(Equal(2)) + Expect(endPod.Status.ContainerStatuses[0].State.Waiting).ToNot(BeNil()) + }) + It("should be restarted with a docker exec \"cat /tmp/health\" liveness probe [Conformance]", func() { runLivenessTest(f.Client, f.Namespace.Name, &api.Pod{ ObjectMeta: api.ObjectMeta{