mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-09-02 17:57:33 +00:00
Add message to reason cache in kubelet
This commit is contained in:
@@ -99,7 +99,7 @@ type DockerManager struct {
|
|||||||
// 2. We use an LRU cache to avoid extra garbage collection work. This
|
// 2. We use an LRU cache to avoid extra garbage collection work. This
|
||||||
// means that some entries may be recycled before a pod has been
|
// means that some entries may be recycled before a pod has been
|
||||||
// deleted.
|
// deleted.
|
||||||
reasonCache stringCache
|
reasonCache reasonInfoCache
|
||||||
// TODO(yifan): Record the pull failure so we can eliminate the image checking
|
// TODO(yifan): Record the pull failure so we can eliminate the image checking
|
||||||
// in GetPodStatus()?
|
// in GetPodStatus()?
|
||||||
// Lower level docker image puller.
|
// Lower level docker image puller.
|
||||||
@@ -187,7 +187,7 @@ func NewDockerManager(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
reasonCache := stringCache{cache: lru.New(maxReasonCacheEntries)}
|
reasonCache := reasonInfoCache{cache: lru.New(maxReasonCacheEntries)}
|
||||||
|
|
||||||
dm := &DockerManager{
|
dm := &DockerManager{
|
||||||
client: client,
|
client: client,
|
||||||
@@ -217,35 +217,39 @@ func NewDockerManager(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// A cache which stores strings keyed by <pod_UID>_<container_name>.
|
// A cache which stores strings keyed by <pod_UID>_<container_name>.
|
||||||
type stringCache struct {
|
type reasonInfoCache struct {
|
||||||
lock sync.RWMutex
|
lock sync.RWMutex
|
||||||
cache *lru.Cache
|
cache *lru.Cache
|
||||||
}
|
}
|
||||||
|
type reasonInfo struct {
|
||||||
|
reason string
|
||||||
|
message string
|
||||||
|
}
|
||||||
|
|
||||||
func (sc *stringCache) composeKey(uid types.UID, name string) string {
|
func (sc *reasonInfoCache) composeKey(uid types.UID, name string) string {
|
||||||
return fmt.Sprintf("%s_%s", uid, name)
|
return fmt.Sprintf("%s_%s", uid, name)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sc *stringCache) Add(uid types.UID, name string, value string) {
|
func (sc *reasonInfoCache) Add(uid types.UID, name string, reason, message string) {
|
||||||
sc.lock.Lock()
|
sc.lock.Lock()
|
||||||
defer sc.lock.Unlock()
|
defer sc.lock.Unlock()
|
||||||
sc.cache.Add(sc.composeKey(uid, name), value)
|
sc.cache.Add(sc.composeKey(uid, name), reasonInfo{reason, message})
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sc *stringCache) Remove(uid types.UID, name string) {
|
func (sc *reasonInfoCache) Remove(uid types.UID, name string) {
|
||||||
sc.lock.Lock()
|
sc.lock.Lock()
|
||||||
defer sc.lock.Unlock()
|
defer sc.lock.Unlock()
|
||||||
sc.cache.Remove(sc.composeKey(uid, name))
|
sc.cache.Remove(sc.composeKey(uid, name))
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sc *stringCache) Get(uid types.UID, name string) (string, bool) {
|
func (sc *reasonInfoCache) Get(uid types.UID, name string) (reasonInfo, bool) {
|
||||||
sc.lock.RLock()
|
sc.lock.RLock()
|
||||||
defer sc.lock.RUnlock()
|
defer sc.lock.RUnlock()
|
||||||
value, ok := sc.cache.Get(sc.composeKey(uid, name))
|
value, ok := sc.cache.Get(sc.composeKey(uid, name))
|
||||||
if ok {
|
if ok {
|
||||||
return value.(string), ok
|
return value.(reasonInfo), ok
|
||||||
} else {
|
} else {
|
||||||
return "", ok
|
return reasonInfo{"", ""}, ok
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -485,8 +489,8 @@ func (dm *DockerManager) GetPodStatus(pod *api.Pod) (*api.PodStatus, error) {
|
|||||||
// Handle the containers for which we cannot find any associated active or dead docker containers or are in restart backoff
|
// Handle the containers for which we cannot find any associated active or dead docker containers or are in restart backoff
|
||||||
for _, container := range manifest.Containers {
|
for _, container := range manifest.Containers {
|
||||||
if containerStatus, found := statuses[container.Name]; found {
|
if containerStatus, found := statuses[container.Name]; found {
|
||||||
reason, ok := dm.reasonCache.Get(uid, container.Name)
|
reasonInfo, ok := dm.reasonCache.Get(uid, container.Name)
|
||||||
if ok && reason == kubecontainer.ErrCrashLoopBackOff.Error() {
|
if ok && reasonInfo.reason == kubecontainer.ErrCrashLoopBackOff.Error() {
|
||||||
// We need to increment the restart count if we are going to
|
// We need to increment the restart count if we are going to
|
||||||
// move the current state to last terminated state.
|
// move the current state to last terminated state.
|
||||||
if containerStatus.State.Terminated != nil {
|
if containerStatus.State.Terminated != nil {
|
||||||
@@ -496,7 +500,8 @@ func (dm *DockerManager) GetPodStatus(pod *api.Pod) (*api.PodStatus, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
containerStatus.LastTerminationState = containerStatus.State
|
containerStatus.LastTerminationState = containerStatus.State
|
||||||
containerStatus.State.Waiting = &api.ContainerStateWaiting{Reason: reason}
|
containerStatus.State.Waiting = &api.ContainerStateWaiting{Reason: reasonInfo.reason,
|
||||||
|
Message: reasonInfo.message}
|
||||||
containerStatus.State.Running = nil
|
containerStatus.State.Running = nil
|
||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
@@ -532,8 +537,9 @@ func (dm *DockerManager) GetPodStatus(pod *api.Pod) (*api.PodStatus, error) {
|
|||||||
for containerName, status := range statuses {
|
for containerName, status := range statuses {
|
||||||
if status.State.Waiting != nil {
|
if status.State.Waiting != nil {
|
||||||
// For containers in the waiting state, fill in a specific reason if it is recorded.
|
// For containers in the waiting state, fill in a specific reason if it is recorded.
|
||||||
if reason, ok := dm.reasonCache.Get(uid, containerName); ok {
|
if reasonInfo, ok := dm.reasonCache.Get(uid, containerName); ok {
|
||||||
status.State.Waiting.Reason = reason
|
status.State.Waiting.Reason = reasonInfo.reason
|
||||||
|
status.State.Waiting.Message = reasonInfo.message
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
podStatus.ContainerStatuses = append(podStatus.ContainerStatuses, *status)
|
podStatus.ContainerStatuses = append(podStatus.ContainerStatuses, *status)
|
||||||
@@ -1681,12 +1687,12 @@ func (dm *DockerManager) computePodContainerChanges(pod *api.Pod, runningPod kub
|
|||||||
}
|
}
|
||||||
|
|
||||||
// updateReasonCache updates the failure reason based on the latest error.
|
// updateReasonCache updates the failure reason based on the latest error.
|
||||||
func (dm *DockerManager) updateReasonCache(pod *api.Pod, container *api.Container, err error) {
|
func (dm *DockerManager) updateReasonCache(pod *api.Pod, container *api.Container, briefError string, err error) {
|
||||||
if err == nil {
|
if briefError == "" || err == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
errString := err.Error()
|
errString := err.Error()
|
||||||
dm.reasonCache.Add(pod.UID, container.Name, errString)
|
dm.reasonCache.Add(pod.UID, container.Name, briefError, errString)
|
||||||
}
|
}
|
||||||
|
|
||||||
// clearReasonCache removes the entry in the reason cache.
|
// clearReasonCache removes the entry in the reason cache.
|
||||||
@@ -1780,7 +1786,7 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, pod
|
|||||||
}
|
}
|
||||||
glog.V(4).Infof("Creating container %+v in pod %v", container, podFullName)
|
glog.V(4).Infof("Creating container %+v in pod %v", container, podFullName)
|
||||||
err := dm.imagePuller.PullImage(pod, container, pullSecrets)
|
err := dm.imagePuller.PullImage(pod, container, pullSecrets)
|
||||||
dm.updateReasonCache(pod, container, err)
|
dm.updateReasonCache(pod, container, "PullImageError", err)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Warningf("Failed to pull image %q from pod %q and container %q: %v", container.Image, kubecontainer.GetPodFullName(pod), container.Name, err)
|
glog.Warningf("Failed to pull image %q from pod %q and container %q: %v", container.Image, kubecontainer.GetPodFullName(pod), container.Name, err)
|
||||||
continue
|
continue
|
||||||
@@ -1788,7 +1794,7 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, pod
|
|||||||
|
|
||||||
if container.SecurityContext != nil && container.SecurityContext.RunAsNonRoot {
|
if container.SecurityContext != nil && container.SecurityContext.RunAsNonRoot {
|
||||||
err := dm.verifyNonRoot(container)
|
err := dm.verifyNonRoot(container)
|
||||||
dm.updateReasonCache(pod, container, err)
|
dm.updateReasonCache(pod, container, "VerifyNonRootError", err)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorf("Error running pod %q container %q: %v", kubecontainer.GetPodFullName(pod), container.Name, err)
|
glog.Errorf("Error running pod %q container %q: %v", kubecontainer.GetPodFullName(pod), container.Name, err)
|
||||||
continue
|
continue
|
||||||
@@ -1798,7 +1804,7 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, pod
|
|||||||
// TODO(dawnchen): Check RestartPolicy.DelaySeconds before restart a container
|
// TODO(dawnchen): Check RestartPolicy.DelaySeconds before restart a container
|
||||||
namespaceMode := fmt.Sprintf("container:%v", podInfraContainerID)
|
namespaceMode := fmt.Sprintf("container:%v", podInfraContainerID)
|
||||||
_, err = dm.runContainerInPod(pod, container, namespaceMode, namespaceMode, getPidMode(pod))
|
_, err = dm.runContainerInPod(pod, container, namespaceMode, namespaceMode, getPidMode(pod))
|
||||||
dm.updateReasonCache(pod, container, err)
|
dm.updateReasonCache(pod, container, "RunContainerError", err)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// TODO(bburns) : Perhaps blacklist a container after N failures?
|
// TODO(bburns) : Perhaps blacklist a container after N failures?
|
||||||
glog.Errorf("Error running pod %q container %q: %v", kubecontainer.GetPodFullName(pod), container.Name, err)
|
glog.Errorf("Error running pod %q container %q: %v", kubecontainer.GetPodFullName(pod), container.Name, err)
|
||||||
@@ -1901,8 +1907,9 @@ func (dm *DockerManager) doBackOff(pod *api.Pod, container *api.Container, podSt
|
|||||||
if ref, err := kubecontainer.GenerateContainerRef(pod, container); err == nil {
|
if ref, err := kubecontainer.GenerateContainerRef(pod, container); err == nil {
|
||||||
dm.recorder.Eventf(ref, "Backoff", "Back-off restarting failed docker container")
|
dm.recorder.Eventf(ref, "Backoff", "Back-off restarting failed docker container")
|
||||||
}
|
}
|
||||||
dm.updateReasonCache(pod, container, kubecontainer.ErrCrashLoopBackOff)
|
err := fmt.Errorf("Back-off %s restarting failed container=%s pod=%s", backOff.Get(stableName), container.Name, kubecontainer.GetPodFullName(pod))
|
||||||
glog.Infof("Back-off %s restarting failed container=%s pod=%s", backOff.Get(stableName), container.Name, kubecontainer.GetPodFullName(pod))
|
dm.updateReasonCache(pod, container, kubecontainer.ErrCrashLoopBackOff.Error(), err)
|
||||||
|
glog.Infof("%s", err.Error())
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
backOff.Next(stableName, ts.Time)
|
backOff.Next(stableName, ts.Time)
|
||||||
|
@@ -889,7 +889,6 @@ func TestSyncPodCreateNetAndContainer(t *testing.T) {
|
|||||||
// Create container.
|
// Create container.
|
||||||
"create", "start", "inspect_container",
|
"create", "start", "inspect_container",
|
||||||
})
|
})
|
||||||
|
|
||||||
fakeDocker.Lock()
|
fakeDocker.Lock()
|
||||||
|
|
||||||
found := false
|
found := false
|
||||||
@@ -1728,7 +1727,7 @@ func TestGetPodCreationFailureReason(t *testing.T) {
|
|||||||
dm, fakeDocker := newTestDockerManager()
|
dm, fakeDocker := newTestDockerManager()
|
||||||
|
|
||||||
// Inject the creation failure error to docker.
|
// Inject the creation failure error to docker.
|
||||||
failureReason := "creation failure"
|
failureReason := "RunContainerError"
|
||||||
fakeDocker.Errors = map[string]error{
|
fakeDocker.Errors = map[string]error{
|
||||||
"create": fmt.Errorf("%s", failureReason),
|
"create": fmt.Errorf("%s", failureReason),
|
||||||
}
|
}
|
||||||
@@ -1786,7 +1785,7 @@ func TestGetPodPullImageFailureReason(t *testing.T) {
|
|||||||
puller := dm.dockerPuller.(*FakeDockerPuller)
|
puller := dm.dockerPuller.(*FakeDockerPuller)
|
||||||
puller.HasImages = []string{}
|
puller.HasImages = []string{}
|
||||||
// Inject the pull image failure error.
|
// Inject the pull image failure error.
|
||||||
failureReason := "pull image faiulre"
|
failureReason := "PullImageError"
|
||||||
puller.ErrorsToInject = []error{fmt.Errorf("%s", failureReason)}
|
puller.ErrorsToInject = []error{fmt.Errorf("%s", failureReason)}
|
||||||
|
|
||||||
pod := &api.Pod{
|
pod := &api.Pod{
|
||||||
|
Reference in New Issue
Block a user