mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-28 05:57:25 +00:00
Merge pull request #99748 from rphillips/fixes/check_log_path_for_restart_count
kubelet: fix log files being overwritten on container state loss
This commit is contained in:
commit
a238eb2fe8
@ -21,12 +21,15 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"math/rand"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
goruntime "runtime"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@ -127,6 +130,40 @@ func (s *startSpec) getTargetID(podStatus *kubecontainer.PodStatus) (*kubecontai
|
||||
return &targetStatus.ID, nil
|
||||
}
|
||||
|
||||
func calcRestartCountByLogDir(path string) (int, error) {
|
||||
// if the path doesn't exist then it's not an error
|
||||
if _, err := os.Stat(path); err != nil {
|
||||
return 0, nil
|
||||
}
|
||||
restartCount := int(0)
|
||||
files, err := ioutil.ReadDir(path)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if len(files) == 0 {
|
||||
return 0, err
|
||||
}
|
||||
restartCountLogFileRegex := regexp.MustCompile(`(\d+).log(\..*)?`)
|
||||
for _, file := range files {
|
||||
if file.IsDir() {
|
||||
continue
|
||||
}
|
||||
matches := restartCountLogFileRegex.FindStringSubmatch(file.Name())
|
||||
if len(matches) == 0 {
|
||||
continue
|
||||
}
|
||||
count, err := strconv.Atoi(matches[1])
|
||||
if err != nil {
|
||||
return restartCount, err
|
||||
}
|
||||
count++
|
||||
if count > restartCount {
|
||||
restartCount = count
|
||||
}
|
||||
}
|
||||
return restartCount, nil
|
||||
}
|
||||
|
||||
// startContainer starts a container and returns a message indicates why it is failed on error.
|
||||
// It starts the container through the following steps:
|
||||
// * pull the image
|
||||
@ -150,6 +187,22 @@ func (m *kubeGenericRuntimeManager) startContainer(podSandboxID string, podSandb
|
||||
containerStatus := podStatus.FindContainerStatusByName(container.Name)
|
||||
if containerStatus != nil {
|
||||
restartCount = containerStatus.RestartCount + 1
|
||||
} else {
|
||||
// The container runtime keeps state on container statuses and
|
||||
// what the container restart count is. When nodes are rebooted
|
||||
// some container runtimes clear their state which causes the
|
||||
// restartCount to be reset to 0. This causes the logfile to
|
||||
// start at 0.log, which either overwrites or appends to the
|
||||
// already existing log.
|
||||
//
|
||||
// We are checking to see if the log directory exists, and find
|
||||
// the latest restartCount by checking the log name -
|
||||
// {restartCount}.log - and adding 1 to it.
|
||||
logDir := BuildContainerLogsDirectory(pod.Namespace, pod.Name, pod.UID, container.Name)
|
||||
restartCount, err = calcRestartCountByLogDir(logDir)
|
||||
if err != nil {
|
||||
klog.InfoS("Log directory exists but could not calculate restartCount", "logDir", logDir, "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
target, err := spec.getTargetID(podStatus)
|
||||
|
@ -18,6 +18,8 @@ package kuberuntime
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
@ -422,3 +424,45 @@ func TestStartSpec(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRestartCountByLogDir(t *testing.T) {
|
||||
for _, tc := range []struct {
|
||||
filenames []string
|
||||
restartCount int
|
||||
}{
|
||||
{
|
||||
filenames: []string{"0.log.rotated-log"},
|
||||
restartCount: 1,
|
||||
},
|
||||
{
|
||||
filenames: []string{"0.log"},
|
||||
restartCount: 1,
|
||||
},
|
||||
{
|
||||
filenames: []string{"0.log", "1.log", "2.log"},
|
||||
restartCount: 3,
|
||||
},
|
||||
{
|
||||
filenames: []string{"0.log.rotated", "1.log", "2.log"},
|
||||
restartCount: 3,
|
||||
},
|
||||
{
|
||||
filenames: []string{"5.log.rotated", "6.log.rotated"},
|
||||
restartCount: 7,
|
||||
},
|
||||
{
|
||||
filenames: []string{"5.log.rotated", "6.log", "7.log"},
|
||||
restartCount: 8,
|
||||
},
|
||||
} {
|
||||
tempDirPath, err := ioutil.TempDir("", "test-restart-count-")
|
||||
assert.NoError(t, err, "create tempdir error")
|
||||
defer os.RemoveAll(tempDirPath)
|
||||
for _, filename := range tc.filenames {
|
||||
err = ioutil.WriteFile(filepath.Join(tempDirPath, filename), []byte("a log line"), 0600)
|
||||
assert.NoError(t, err, "could not write log file")
|
||||
}
|
||||
count, _ := calcRestartCountByLogDir(tempDirPath)
|
||||
assert.Equal(t, count, tc.restartCount, "count %v should equal restartCount %v", count, tc.restartCount)
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user