mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-28 22:17:14 +00:00
Merge pull request #99748 from rphillips/fixes/check_log_path_for_restart_count
kubelet: fix log files being overwritten on container state loss
This commit is contained in:
commit
a238eb2fe8
@ -21,12 +21,15 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
"io/ioutil"
|
||||||
"math/rand"
|
"math/rand"
|
||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
goruntime "runtime"
|
goruntime "runtime"
|
||||||
"sort"
|
"sort"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
@ -127,6 +130,40 @@ func (s *startSpec) getTargetID(podStatus *kubecontainer.PodStatus) (*kubecontai
|
|||||||
return &targetStatus.ID, nil
|
return &targetStatus.ID, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func calcRestartCountByLogDir(path string) (int, error) {
|
||||||
|
// if the path doesn't exist then it's not an error
|
||||||
|
if _, err := os.Stat(path); err != nil {
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
|
restartCount := int(0)
|
||||||
|
files, err := ioutil.ReadDir(path)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
if len(files) == 0 {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
restartCountLogFileRegex := regexp.MustCompile(`(\d+).log(\..*)?`)
|
||||||
|
for _, file := range files {
|
||||||
|
if file.IsDir() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
matches := restartCountLogFileRegex.FindStringSubmatch(file.Name())
|
||||||
|
if len(matches) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
count, err := strconv.Atoi(matches[1])
|
||||||
|
if err != nil {
|
||||||
|
return restartCount, err
|
||||||
|
}
|
||||||
|
count++
|
||||||
|
if count > restartCount {
|
||||||
|
restartCount = count
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return restartCount, nil
|
||||||
|
}
|
||||||
|
|
||||||
// startContainer starts a container and returns a message indicates why it is failed on error.
|
// startContainer starts a container and returns a message indicates why it is failed on error.
|
||||||
// It starts the container through the following steps:
|
// It starts the container through the following steps:
|
||||||
// * pull the image
|
// * pull the image
|
||||||
@ -150,6 +187,22 @@ func (m *kubeGenericRuntimeManager) startContainer(podSandboxID string, podSandb
|
|||||||
containerStatus := podStatus.FindContainerStatusByName(container.Name)
|
containerStatus := podStatus.FindContainerStatusByName(container.Name)
|
||||||
if containerStatus != nil {
|
if containerStatus != nil {
|
||||||
restartCount = containerStatus.RestartCount + 1
|
restartCount = containerStatus.RestartCount + 1
|
||||||
|
} else {
|
||||||
|
// The container runtime keeps state on container statuses and
|
||||||
|
// what the container restart count is. When nodes are rebooted
|
||||||
|
// some container runtimes clear their state which causes the
|
||||||
|
// restartCount to be reset to 0. This causes the logfile to
|
||||||
|
// start at 0.log, which either overwrites or appends to the
|
||||||
|
// already existing log.
|
||||||
|
//
|
||||||
|
// We are checking to see if the log directory exists, and find
|
||||||
|
// the latest restartCount by checking the log name -
|
||||||
|
// {restartCount}.log - and adding 1 to it.
|
||||||
|
logDir := BuildContainerLogsDirectory(pod.Namespace, pod.Name, pod.UID, container.Name)
|
||||||
|
restartCount, err = calcRestartCountByLogDir(logDir)
|
||||||
|
if err != nil {
|
||||||
|
klog.InfoS("Log directory exists but could not calculate restartCount", "logDir", logDir, "err", err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
target, err := spec.getTargetID(podStatus)
|
target, err := spec.getTargetID(podStatus)
|
||||||
|
@ -18,6 +18,8 @@ package kuberuntime
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
@ -422,3 +424,45 @@ func TestStartSpec(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestRestartCountByLogDir(t *testing.T) {
|
||||||
|
for _, tc := range []struct {
|
||||||
|
filenames []string
|
||||||
|
restartCount int
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
filenames: []string{"0.log.rotated-log"},
|
||||||
|
restartCount: 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filenames: []string{"0.log"},
|
||||||
|
restartCount: 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filenames: []string{"0.log", "1.log", "2.log"},
|
||||||
|
restartCount: 3,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filenames: []string{"0.log.rotated", "1.log", "2.log"},
|
||||||
|
restartCount: 3,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filenames: []string{"5.log.rotated", "6.log.rotated"},
|
||||||
|
restartCount: 7,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filenames: []string{"5.log.rotated", "6.log", "7.log"},
|
||||||
|
restartCount: 8,
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
tempDirPath, err := ioutil.TempDir("", "test-restart-count-")
|
||||||
|
assert.NoError(t, err, "create tempdir error")
|
||||||
|
defer os.RemoveAll(tempDirPath)
|
||||||
|
for _, filename := range tc.filenames {
|
||||||
|
err = ioutil.WriteFile(filepath.Join(tempDirPath, filename), []byte("a log line"), 0600)
|
||||||
|
assert.NoError(t, err, "could not write log file")
|
||||||
|
}
|
||||||
|
count, _ := calcRestartCountByLogDir(tempDirPath)
|
||||||
|
assert.Equal(t, count, tc.restartCount, "count %v should equal restartCount %v", count, tc.restartCount)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user