Add message to reason cache in kubelet

This commit is contained in:
jiangyaoguo
2015-09-16 22:07:25 +08:00
parent e8f77b3673
commit 0d1ca62530
2 changed files with 32 additions and 26 deletions

View File

@@ -99,7 +99,7 @@ type DockerManager struct {
// 2. We use an LRU cache to avoid extra garbage collection work. This // 2. We use an LRU cache to avoid extra garbage collection work. This
// means that some entries may be recycled before a pod has been // means that some entries may be recycled before a pod has been
// deleted. // deleted.
reasonCache stringCache reasonCache reasonInfoCache
// TODO(yifan): Record the pull failure so we can eliminate the image checking // TODO(yifan): Record the pull failure so we can eliminate the image checking
// in GetPodStatus()? // in GetPodStatus()?
// Lower level docker image puller. // Lower level docker image puller.
@@ -187,7 +187,7 @@ func NewDockerManager(
} }
} }
reasonCache := stringCache{cache: lru.New(maxReasonCacheEntries)} reasonCache := reasonInfoCache{cache: lru.New(maxReasonCacheEntries)}
dm := &DockerManager{ dm := &DockerManager{
client: client, client: client,
@@ -217,35 +217,39 @@ func NewDockerManager(
} }
// A cache which stores strings keyed by <pod_UID>_<container_name>. // A cache which stores strings keyed by <pod_UID>_<container_name>.
type stringCache struct { type reasonInfoCache struct {
lock sync.RWMutex lock sync.RWMutex
cache *lru.Cache cache *lru.Cache
} }
type reasonInfo struct {
reason string
message string
}
func (sc *stringCache) composeKey(uid types.UID, name string) string { func (sc *reasonInfoCache) composeKey(uid types.UID, name string) string {
return fmt.Sprintf("%s_%s", uid, name) return fmt.Sprintf("%s_%s", uid, name)
} }
func (sc *stringCache) Add(uid types.UID, name string, value string) { func (sc *reasonInfoCache) Add(uid types.UID, name string, reason, message string) {
sc.lock.Lock() sc.lock.Lock()
defer sc.lock.Unlock() defer sc.lock.Unlock()
sc.cache.Add(sc.composeKey(uid, name), value) sc.cache.Add(sc.composeKey(uid, name), reasonInfo{reason, message})
} }
func (sc *stringCache) Remove(uid types.UID, name string) { func (sc *reasonInfoCache) Remove(uid types.UID, name string) {
sc.lock.Lock() sc.lock.Lock()
defer sc.lock.Unlock() defer sc.lock.Unlock()
sc.cache.Remove(sc.composeKey(uid, name)) sc.cache.Remove(sc.composeKey(uid, name))
} }
func (sc *stringCache) Get(uid types.UID, name string) (string, bool) { func (sc *reasonInfoCache) Get(uid types.UID, name string) (reasonInfo, bool) {
sc.lock.RLock() sc.lock.RLock()
defer sc.lock.RUnlock() defer sc.lock.RUnlock()
value, ok := sc.cache.Get(sc.composeKey(uid, name)) value, ok := sc.cache.Get(sc.composeKey(uid, name))
if ok { if ok {
return value.(string), ok return value.(reasonInfo), ok
} else { } else {
return "", ok return reasonInfo{"", ""}, ok
} }
} }
@@ -485,8 +489,8 @@ func (dm *DockerManager) GetPodStatus(pod *api.Pod) (*api.PodStatus, error) {
// Handle the containers for which we cannot find any associated active or dead docker containers or are in restart backoff // Handle the containers for which we cannot find any associated active or dead docker containers or are in restart backoff
for _, container := range manifest.Containers { for _, container := range manifest.Containers {
if containerStatus, found := statuses[container.Name]; found { if containerStatus, found := statuses[container.Name]; found {
reason, ok := dm.reasonCache.Get(uid, container.Name) reasonInfo, ok := dm.reasonCache.Get(uid, container.Name)
if ok && reason == kubecontainer.ErrCrashLoopBackOff.Error() { if ok && reasonInfo.reason == kubecontainer.ErrCrashLoopBackOff.Error() {
// We need to increment the restart count if we are going to // We need to increment the restart count if we are going to
// move the current state to last terminated state. // move the current state to last terminated state.
if containerStatus.State.Terminated != nil { if containerStatus.State.Terminated != nil {
@@ -496,7 +500,8 @@ func (dm *DockerManager) GetPodStatus(pod *api.Pod) (*api.PodStatus, error) {
} }
} }
containerStatus.LastTerminationState = containerStatus.State containerStatus.LastTerminationState = containerStatus.State
containerStatus.State.Waiting = &api.ContainerStateWaiting{Reason: reason} containerStatus.State.Waiting = &api.ContainerStateWaiting{Reason: reasonInfo.reason,
Message: reasonInfo.message}
containerStatus.State.Running = nil containerStatus.State.Running = nil
} }
continue continue
@@ -532,8 +537,9 @@ func (dm *DockerManager) GetPodStatus(pod *api.Pod) (*api.PodStatus, error) {
for containerName, status := range statuses { for containerName, status := range statuses {
if status.State.Waiting != nil { if status.State.Waiting != nil {
// For containers in the waiting state, fill in a specific reason if it is recorded. // For containers in the waiting state, fill in a specific reason if it is recorded.
if reason, ok := dm.reasonCache.Get(uid, containerName); ok { if reasonInfo, ok := dm.reasonCache.Get(uid, containerName); ok {
status.State.Waiting.Reason = reason status.State.Waiting.Reason = reasonInfo.reason
status.State.Waiting.Message = reasonInfo.message
} }
} }
podStatus.ContainerStatuses = append(podStatus.ContainerStatuses, *status) podStatus.ContainerStatuses = append(podStatus.ContainerStatuses, *status)
@@ -1681,12 +1687,12 @@ func (dm *DockerManager) computePodContainerChanges(pod *api.Pod, runningPod kub
} }
// updateReasonCache updates the failure reason based on the latest error. // updateReasonCache updates the failure reason based on the latest error.
func (dm *DockerManager) updateReasonCache(pod *api.Pod, container *api.Container, err error) { func (dm *DockerManager) updateReasonCache(pod *api.Pod, container *api.Container, briefError string, err error) {
if err == nil { if briefError == "" || err == nil {
return return
} }
errString := err.Error() errString := err.Error()
dm.reasonCache.Add(pod.UID, container.Name, errString) dm.reasonCache.Add(pod.UID, container.Name, briefError, errString)
} }
// clearReasonCache removes the entry in the reason cache. // clearReasonCache removes the entry in the reason cache.
@@ -1780,7 +1786,7 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, pod
} }
glog.V(4).Infof("Creating container %+v in pod %v", container, podFullName) glog.V(4).Infof("Creating container %+v in pod %v", container, podFullName)
err := dm.imagePuller.PullImage(pod, container, pullSecrets) err := dm.imagePuller.PullImage(pod, container, pullSecrets)
dm.updateReasonCache(pod, container, err) dm.updateReasonCache(pod, container, "PullImageError", err)
if err != nil { if err != nil {
glog.Warningf("Failed to pull image %q from pod %q and container %q: %v", container.Image, kubecontainer.GetPodFullName(pod), container.Name, err) glog.Warningf("Failed to pull image %q from pod %q and container %q: %v", container.Image, kubecontainer.GetPodFullName(pod), container.Name, err)
continue continue
@@ -1788,7 +1794,7 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, pod
if container.SecurityContext != nil && container.SecurityContext.RunAsNonRoot { if container.SecurityContext != nil && container.SecurityContext.RunAsNonRoot {
err := dm.verifyNonRoot(container) err := dm.verifyNonRoot(container)
dm.updateReasonCache(pod, container, err) dm.updateReasonCache(pod, container, "VerifyNonRootError", err)
if err != nil { if err != nil {
glog.Errorf("Error running pod %q container %q: %v", kubecontainer.GetPodFullName(pod), container.Name, err) glog.Errorf("Error running pod %q container %q: %v", kubecontainer.GetPodFullName(pod), container.Name, err)
continue continue
@@ -1798,7 +1804,7 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, pod
// TODO(dawnchen): Check RestartPolicy.DelaySeconds before restart a container // TODO(dawnchen): Check RestartPolicy.DelaySeconds before restart a container
namespaceMode := fmt.Sprintf("container:%v", podInfraContainerID) namespaceMode := fmt.Sprintf("container:%v", podInfraContainerID)
_, err = dm.runContainerInPod(pod, container, namespaceMode, namespaceMode, getPidMode(pod)) _, err = dm.runContainerInPod(pod, container, namespaceMode, namespaceMode, getPidMode(pod))
dm.updateReasonCache(pod, container, err) dm.updateReasonCache(pod, container, "RunContainerError", err)
if err != nil { if err != nil {
// TODO(bburns) : Perhaps blacklist a container after N failures? // TODO(bburns) : Perhaps blacklist a container after N failures?
glog.Errorf("Error running pod %q container %q: %v", kubecontainer.GetPodFullName(pod), container.Name, err) glog.Errorf("Error running pod %q container %q: %v", kubecontainer.GetPodFullName(pod), container.Name, err)
@@ -1901,8 +1907,9 @@ func (dm *DockerManager) doBackOff(pod *api.Pod, container *api.Container, podSt
if ref, err := kubecontainer.GenerateContainerRef(pod, container); err == nil { if ref, err := kubecontainer.GenerateContainerRef(pod, container); err == nil {
dm.recorder.Eventf(ref, "Backoff", "Back-off restarting failed docker container") dm.recorder.Eventf(ref, "Backoff", "Back-off restarting failed docker container")
} }
dm.updateReasonCache(pod, container, kubecontainer.ErrCrashLoopBackOff) err := fmt.Errorf("Back-off %s restarting failed container=%s pod=%s", backOff.Get(stableName), container.Name, kubecontainer.GetPodFullName(pod))
glog.Infof("Back-off %s restarting failed container=%s pod=%s", backOff.Get(stableName), container.Name, kubecontainer.GetPodFullName(pod)) dm.updateReasonCache(pod, container, kubecontainer.ErrCrashLoopBackOff.Error(), err)
glog.Infof("%s", err.Error())
return true return true
} }
backOff.Next(stableName, ts.Time) backOff.Next(stableName, ts.Time)

View File

@@ -889,7 +889,6 @@ func TestSyncPodCreateNetAndContainer(t *testing.T) {
// Create container. // Create container.
"create", "start", "inspect_container", "create", "start", "inspect_container",
}) })
fakeDocker.Lock() fakeDocker.Lock()
found := false found := false
@@ -1728,7 +1727,7 @@ func TestGetPodCreationFailureReason(t *testing.T) {
dm, fakeDocker := newTestDockerManager() dm, fakeDocker := newTestDockerManager()
// Inject the creation failure error to docker. // Inject the creation failure error to docker.
failureReason := "creation failure" failureReason := "RunContainerError"
fakeDocker.Errors = map[string]error{ fakeDocker.Errors = map[string]error{
"create": fmt.Errorf("%s", failureReason), "create": fmt.Errorf("%s", failureReason),
} }
@@ -1786,7 +1785,7 @@ func TestGetPodPullImageFailureReason(t *testing.T) {
puller := dm.dockerPuller.(*FakeDockerPuller) puller := dm.dockerPuller.(*FakeDockerPuller)
puller.HasImages = []string{} puller.HasImages = []string{}
// Inject the pull image failure error. // Inject the pull image failure error.
failureReason := "pull image faiulre" failureReason := "PullImageError"
puller.ErrorsToInject = []error{fmt.Errorf("%s", failureReason)} puller.ErrorsToInject = []error{fmt.Errorf("%s", failureReason)}
pod := &api.Pod{ pod := &api.Pod{