mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-24 04:06:03 +00:00
Merge pull request #89160 from tedyu/symlink-first-seen
Remove potentially unhealthy symlink only for dead containers
This commit is contained in:
commit
f7907083c2
@ -356,9 +356,35 @@ func (cgc *containerGC) evictPodLogsDirectories(allSourcesReady bool) error {
|
|||||||
logSymlinks, _ := osInterface.Glob(filepath.Join(legacyContainerLogsDir, fmt.Sprintf("*.%s", legacyLogSuffix)))
|
logSymlinks, _ := osInterface.Glob(filepath.Join(legacyContainerLogsDir, fmt.Sprintf("*.%s", legacyLogSuffix)))
|
||||||
for _, logSymlink := range logSymlinks {
|
for _, logSymlink := range logSymlinks {
|
||||||
if _, err := osInterface.Stat(logSymlink); os.IsNotExist(err) {
|
if _, err := osInterface.Stat(logSymlink); os.IsNotExist(err) {
|
||||||
|
if containerID, err := getContainerIDFromLegacyLogSymlink(logSymlink); err == nil {
|
||||||
|
status, err := cgc.manager.runtimeService.ContainerStatus(containerID)
|
||||||
|
if err != nil {
|
||||||
|
// TODO: we should handle container not found (i.e. container was deleted) case differently
|
||||||
|
// once https://github.com/kubernetes/kubernetes/issues/63336 is resolved
|
||||||
|
klog.Infof("Error getting ContainerStatus for containerID %q: %v", containerID, err)
|
||||||
|
} else if status.State != runtimeapi.ContainerState_CONTAINER_EXITED {
|
||||||
|
// Here is how container log rotation works (see containerLogManager#rotateLatestLog):
|
||||||
|
//
|
||||||
|
// 1. rename current log to rotated log file whose filename contains current timestamp (fmt.Sprintf("%s.%s", log, timestamp))
|
||||||
|
// 2. reopen the container log
|
||||||
|
// 3. if #2 fails, rename rotated log file back to container log
|
||||||
|
//
|
||||||
|
// There is small but indeterministic amount of time during which log file doesn't exist (between steps #1 and #2, between #1 and #3).
|
||||||
|
// Hence the symlink may be deemed unhealthy during that period.
|
||||||
|
// See https://github.com/kubernetes/kubernetes/issues/52172
|
||||||
|
//
|
||||||
|
// We only remove unhealthy symlink for dead containers
|
||||||
|
klog.V(5).Infof("Container %q is still running, not removing symlink %q.", containerID, logSymlink)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
klog.V(4).Infof("unable to obtain container Id: %v", err)
|
||||||
|
}
|
||||||
err := osInterface.Remove(logSymlink)
|
err := osInterface.Remove(logSymlink)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.Errorf("Failed to remove container log dead symlink %q: %v", logSymlink, err)
|
klog.Errorf("Failed to remove container log dead symlink %q: %v", logSymlink, err)
|
||||||
|
} else {
|
||||||
|
klog.V(4).Infof("removed symlink %s", logSymlink)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -19,6 +19,7 @@ package kuberuntime
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"path"
|
"path"
|
||||||
|
"strings"
|
||||||
|
|
||||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||||
)
|
)
|
||||||
@ -44,6 +45,25 @@ func legacyLogSymlink(containerID string, containerName, podName, podNamespace s
|
|||||||
containerName, containerID)
|
containerName, containerID)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// getContainerIDFromLegacyLogSymlink returns error if container Id cannot be parsed
|
||||||
|
func getContainerIDFromLegacyLogSymlink(logSymlink string) (string, error) {
|
||||||
|
parts := strings.Split(logSymlink, "-")
|
||||||
|
if len(parts) == 0 {
|
||||||
|
return "", fmt.Errorf("unable to find separator in %q", logSymlink)
|
||||||
|
}
|
||||||
|
containerIDWithSuffix := parts[len(parts)-1]
|
||||||
|
suffix := fmt.Sprintf(".%s", legacyLogSuffix)
|
||||||
|
if !strings.HasSuffix(containerIDWithSuffix, suffix) {
|
||||||
|
return "", fmt.Errorf("%q doesn't end with %q", logSymlink, suffix)
|
||||||
|
}
|
||||||
|
containerIDWithoutSuffix := strings.TrimSuffix(containerIDWithSuffix, suffix)
|
||||||
|
// container can be retrieved with container Id as short as 6 characters
|
||||||
|
if len(containerIDWithoutSuffix) < 6 {
|
||||||
|
return "", fmt.Errorf("container Id %q is too short", containerIDWithoutSuffix)
|
||||||
|
}
|
||||||
|
return containerIDWithoutSuffix, nil
|
||||||
|
}
|
||||||
|
|
||||||
func logSymlink(containerLogsDir, podFullName, containerName, containerID string) string {
|
func logSymlink(containerLogsDir, podFullName, containerName, containerID string) string {
|
||||||
suffix := fmt.Sprintf(".%s", legacyLogSuffix)
|
suffix := fmt.Sprintf(".%s", legacyLogSuffix)
|
||||||
logPath := fmt.Sprintf("%s_%s-%s", podFullName, containerName, containerID)
|
logPath := fmt.Sprintf("%s_%s-%s", podFullName, containerName, containerID)
|
||||||
|
@ -473,7 +473,7 @@ func (r *RemoteRuntimeService) ContainerStats(containerID string) (*runtimeapi.C
|
|||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if r.logReduction.ShouldMessageBePrinted(err.Error(), containerID) {
|
if r.logReduction.ShouldMessageBePrinted(err.Error(), containerID) {
|
||||||
klog.Errorf("ContainerStatus %q from runtime service failed: %v", containerID, err)
|
klog.Errorf("ContainerStats %q from runtime service failed: %v", containerID, err)
|
||||||
}
|
}
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user