mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-22 11:21:47 +00:00
Merge pull request #89160 from tedyu/symlink-first-seen
Remove potentially unhealthy symlink only for dead containers
This commit is contained in:
commit
f7907083c2
@ -356,9 +356,35 @@ func (cgc *containerGC) evictPodLogsDirectories(allSourcesReady bool) error {
|
||||
logSymlinks, _ := osInterface.Glob(filepath.Join(legacyContainerLogsDir, fmt.Sprintf("*.%s", legacyLogSuffix)))
|
||||
for _, logSymlink := range logSymlinks {
|
||||
if _, err := osInterface.Stat(logSymlink); os.IsNotExist(err) {
|
||||
if containerID, err := getContainerIDFromLegacyLogSymlink(logSymlink); err == nil {
|
||||
status, err := cgc.manager.runtimeService.ContainerStatus(containerID)
|
||||
if err != nil {
|
||||
// TODO: we should handle container not found (i.e. container was deleted) case differently
|
||||
// once https://github.com/kubernetes/kubernetes/issues/63336 is resolved
|
||||
klog.Infof("Error getting ContainerStatus for containerID %q: %v", containerID, err)
|
||||
} else if status.State != runtimeapi.ContainerState_CONTAINER_EXITED {
|
||||
// Here is how container log rotation works (see containerLogManager#rotateLatestLog):
|
||||
//
|
||||
// 1. rename current log to rotated log file whose filename contains current timestamp (fmt.Sprintf("%s.%s", log, timestamp))
|
||||
// 2. reopen the container log
|
||||
// 3. if #2 fails, rename rotated log file back to container log
|
||||
//
|
||||
// There is small but indeterministic amount of time during which log file doesn't exist (between steps #1 and #2, between #1 and #3).
|
||||
// Hence the symlink may be deemed unhealthy during that period.
|
||||
// See https://github.com/kubernetes/kubernetes/issues/52172
|
||||
//
|
||||
// We only remove unhealthy symlink for dead containers
|
||||
klog.V(5).Infof("Container %q is still running, not removing symlink %q.", containerID, logSymlink)
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
klog.V(4).Infof("unable to obtain container Id: %v", err)
|
||||
}
|
||||
err := osInterface.Remove(logSymlink)
|
||||
if err != nil {
|
||||
klog.Errorf("Failed to remove container log dead symlink %q: %v", logSymlink, err)
|
||||
} else {
|
||||
klog.V(4).Infof("removed symlink %s", logSymlink)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -19,6 +19,7 @@ package kuberuntime
|
||||
import (
|
||||
"fmt"
|
||||
"path"
|
||||
"strings"
|
||||
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
)
|
||||
@ -44,6 +45,25 @@ func legacyLogSymlink(containerID string, containerName, podName, podNamespace s
|
||||
containerName, containerID)
|
||||
}
|
||||
|
||||
// getContainerIDFromLegacyLogSymlink returns error if container Id cannot be parsed
|
||||
func getContainerIDFromLegacyLogSymlink(logSymlink string) (string, error) {
|
||||
parts := strings.Split(logSymlink, "-")
|
||||
if len(parts) == 0 {
|
||||
return "", fmt.Errorf("unable to find separator in %q", logSymlink)
|
||||
}
|
||||
containerIDWithSuffix := parts[len(parts)-1]
|
||||
suffix := fmt.Sprintf(".%s", legacyLogSuffix)
|
||||
if !strings.HasSuffix(containerIDWithSuffix, suffix) {
|
||||
return "", fmt.Errorf("%q doesn't end with %q", logSymlink, suffix)
|
||||
}
|
||||
containerIDWithoutSuffix := strings.TrimSuffix(containerIDWithSuffix, suffix)
|
||||
// container can be retrieved with container Id as short as 6 characters
|
||||
if len(containerIDWithoutSuffix) < 6 {
|
||||
return "", fmt.Errorf("container Id %q is too short", containerIDWithoutSuffix)
|
||||
}
|
||||
return containerIDWithoutSuffix, nil
|
||||
}
|
||||
|
||||
func logSymlink(containerLogsDir, podFullName, containerName, containerID string) string {
|
||||
suffix := fmt.Sprintf(".%s", legacyLogSuffix)
|
||||
logPath := fmt.Sprintf("%s_%s-%s", podFullName, containerName, containerID)
|
||||
|
@ -473,7 +473,7 @@ func (r *RemoteRuntimeService) ContainerStats(containerID string) (*runtimeapi.C
|
||||
})
|
||||
if err != nil {
|
||||
if r.logReduction.ShouldMessageBePrinted(err.Error(), containerID) {
|
||||
klog.Errorf("ContainerStatus %q from runtime service failed: %v", containerID, err)
|
||||
klog.Errorf("ContainerStats %q from runtime service failed: %v", containerID, err)
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user