Merge pull request #89160 from tedyu/symlink-first-seen

Remove potentially unhealthy symlink only for dead containers
This commit is contained in:
Kubernetes Prow Robot 2020-04-28 09:58:07 -07:00 committed by GitHub
commit f7907083c2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 47 additions and 1 deletions

View File

@ -356,9 +356,35 @@ func (cgc *containerGC) evictPodLogsDirectories(allSourcesReady bool) error {
logSymlinks, _ := osInterface.Glob(filepath.Join(legacyContainerLogsDir, fmt.Sprintf("*.%s", legacyLogSuffix)))
for _, logSymlink := range logSymlinks {
if _, err := osInterface.Stat(logSymlink); os.IsNotExist(err) {
if containerID, err := getContainerIDFromLegacyLogSymlink(logSymlink); err == nil {
status, err := cgc.manager.runtimeService.ContainerStatus(containerID)
if err != nil {
// TODO: we should handle container not found (i.e. container was deleted) case differently
// once https://github.com/kubernetes/kubernetes/issues/63336 is resolved
klog.Infof("Error getting ContainerStatus for containerID %q: %v", containerID, err)
} else if status.State != runtimeapi.ContainerState_CONTAINER_EXITED {
// Here is how container log rotation works (see containerLogManager#rotateLatestLog):
//
// 1. rename current log to rotated log file whose filename contains current timestamp (fmt.Sprintf("%s.%s", log, timestamp))
// 2. reopen the container log
// 3. if #2 fails, rename rotated log file back to container log
//
// There is small but indeterministic amount of time during which log file doesn't exist (between steps #1 and #2, between #1 and #3).
// Hence the symlink may be deemed unhealthy during that period.
// See https://github.com/kubernetes/kubernetes/issues/52172
//
// We only remove unhealthy symlink for dead containers
klog.V(5).Infof("Container %q is still running, not removing symlink %q.", containerID, logSymlink)
continue
}
} else {
klog.V(4).Infof("unable to obtain container Id: %v", err)
}
err := osInterface.Remove(logSymlink)
if err != nil {
klog.Errorf("Failed to remove container log dead symlink %q: %v", logSymlink, err)
} else {
klog.V(4).Infof("removed symlink %s", logSymlink)
}
}
}

View File

@ -19,6 +19,7 @@ package kuberuntime
import (
"fmt"
"path"
"strings"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
)
@ -44,6 +45,25 @@ func legacyLogSymlink(containerID string, containerName, podName, podNamespace s
containerName, containerID)
}
// getContainerIDFromLegacyLogSymlink returns error if container Id cannot be parsed
func getContainerIDFromLegacyLogSymlink(logSymlink string) (string, error) {
parts := strings.Split(logSymlink, "-")
if len(parts) == 0 {
return "", fmt.Errorf("unable to find separator in %q", logSymlink)
}
containerIDWithSuffix := parts[len(parts)-1]
suffix := fmt.Sprintf(".%s", legacyLogSuffix)
if !strings.HasSuffix(containerIDWithSuffix, suffix) {
return "", fmt.Errorf("%q doesn't end with %q", logSymlink, suffix)
}
containerIDWithoutSuffix := strings.TrimSuffix(containerIDWithSuffix, suffix)
// container can be retrieved with container Id as short as 6 characters
if len(containerIDWithoutSuffix) < 6 {
return "", fmt.Errorf("container Id %q is too short", containerIDWithoutSuffix)
}
return containerIDWithoutSuffix, nil
}
func logSymlink(containerLogsDir, podFullName, containerName, containerID string) string {
suffix := fmt.Sprintf(".%s", legacyLogSuffix)
logPath := fmt.Sprintf("%s_%s-%s", podFullName, containerName, containerID)

View File

@ -473,7 +473,7 @@ func (r *RemoteRuntimeService) ContainerStats(containerID string) (*runtimeapi.C
})
if err != nil {
if r.logReduction.ShouldMessageBePrinted(err.Error(), containerID) {
klog.Errorf("ContainerStatus %q from runtime service failed: %v", containerID, err)
klog.Errorf("ContainerStats %q from runtime service failed: %v", containerID, err)
}
return nil, err
}