mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-11 21:12:07 +00:00
CPU Manager panics on state initialization error.
- Update unit tests accordingly. - Minor related cleanup in state_file.go
This commit is contained in:
parent
1489d19443
commit
4f185e6b7f
@ -51,9 +51,10 @@ func NewFileState(filePath string, policyName string) State {
|
|||||||
|
|
||||||
if err := stateFile.tryRestoreState(); err != nil {
|
if err := stateFile.tryRestoreState(); err != nil {
|
||||||
// could not restore state, init new state file
|
// could not restore state, init new state file
|
||||||
glog.Infof("[cpumanager] state file: initializing empty state file - reason: \"%s\"", err)
|
msg := fmt.Sprintf("[cpumanager] state file: unable to restore state from disk (%s)\n", err.Error()) +
|
||||||
stateFile.cache.ClearState()
|
"Panicking because we cannot guarantee sane CPU affinity for existing containers.\n" +
|
||||||
stateFile.storeState()
|
fmt.Sprintf("Please drain this node and delete the CPU manager state file \"%s\" before restarting Kubelet.", stateFile.stateFilePath)
|
||||||
|
panic(msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
return stateFile
|
return stateFile
|
||||||
@ -73,45 +74,51 @@ func (sf *stateFile) tryRestoreState() error {
|
|||||||
|
|
||||||
var content []byte
|
var content []byte
|
||||||
|
|
||||||
if content, err = ioutil.ReadFile(sf.stateFilePath); os.IsNotExist(err) {
|
content, err = ioutil.ReadFile(sf.stateFilePath)
|
||||||
// Create file
|
|
||||||
if _, err = os.Create(sf.stateFilePath); err != nil {
|
|
||||||
glog.Errorf("[cpumanager] state file: unable to create state file \"%s\":%s", sf.stateFilePath, err.Error())
|
|
||||||
panic("[cpumanager] state file not created")
|
|
||||||
}
|
|
||||||
glog.Infof("[cpumanager] state file: created empty state file \"%s\"", sf.stateFilePath)
|
|
||||||
} else {
|
|
||||||
// File exists - try to read
|
|
||||||
var readState stateFileData
|
|
||||||
|
|
||||||
if err = json.Unmarshal(content, &readState); err != nil {
|
// If the state file does not exist or has zero length, write a new file.
|
||||||
glog.Warningf("[cpumanager] state file: could not unmarshal, corrupted state file - \"%s\"", sf.stateFilePath)
|
if os.IsNotExist(err) || len(content) == 0 {
|
||||||
return err
|
sf.storeState()
|
||||||
}
|
glog.Infof("[cpumanager] state file: created new state file \"%s\"", sf.stateFilePath)
|
||||||
|
return nil
|
||||||
if sf.policyName != readState.PolicyName {
|
|
||||||
return fmt.Errorf("policy configured \"%s\" != policy from state file \"%s\"", sf.policyName, readState.PolicyName)
|
|
||||||
}
|
|
||||||
|
|
||||||
if tmpDefaultCPUSet, err = cpuset.Parse(readState.DefaultCPUSet); err != nil {
|
|
||||||
glog.Warningf("[cpumanager] state file: could not parse state file - [defaultCpuSet:\"%s\"]", readState.DefaultCPUSet)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
for containerID, cpuString := range readState.Entries {
|
|
||||||
if tmpContainerCPUSet, err = cpuset.Parse(cpuString); err != nil {
|
|
||||||
glog.Warningf("[cpumanager] state file: could not parse state file - container id: %s, cpuset: \"%s\"", containerID, cpuString)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
tmpAssignments[containerID] = tmpContainerCPUSet
|
|
||||||
}
|
|
||||||
|
|
||||||
sf.cache.SetDefaultCPUSet(tmpDefaultCPUSet)
|
|
||||||
sf.cache.SetCPUAssignments(tmpAssignments)
|
|
||||||
|
|
||||||
glog.V(2).Infof("[cpumanager] state file: restored state from state file \"%s\"", sf.stateFilePath)
|
|
||||||
glog.V(2).Infof("[cpumanager] state file: defaultCPUSet: %s", tmpDefaultCPUSet.String())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Fail on any other file read error.
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// File exists; try to read it.
|
||||||
|
var readState stateFileData
|
||||||
|
|
||||||
|
if err = json.Unmarshal(content, &readState); err != nil {
|
||||||
|
glog.Errorf("[cpumanager] state file: could not unmarshal, corrupted state file - \"%s\"", sf.stateFilePath)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if sf.policyName != readState.PolicyName {
|
||||||
|
return fmt.Errorf("policy configured \"%s\" != policy from state file \"%s\"", sf.policyName, readState.PolicyName)
|
||||||
|
}
|
||||||
|
|
||||||
|
if tmpDefaultCPUSet, err = cpuset.Parse(readState.DefaultCPUSet); err != nil {
|
||||||
|
glog.Errorf("[cpumanager] state file: could not parse state file - [defaultCpuSet:\"%s\"]", readState.DefaultCPUSet)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for containerID, cpuString := range readState.Entries {
|
||||||
|
if tmpContainerCPUSet, err = cpuset.Parse(cpuString); err != nil {
|
||||||
|
glog.Errorf("[cpumanager] state file: could not parse state file - container id: %s, cpuset: \"%s\"", containerID, cpuString)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
tmpAssignments[containerID] = tmpContainerCPUSet
|
||||||
|
}
|
||||||
|
|
||||||
|
sf.cache.SetDefaultCPUSet(tmpDefaultCPUSet)
|
||||||
|
sf.cache.SetCPUAssignments(tmpAssignments)
|
||||||
|
|
||||||
|
glog.V(2).Infof("[cpumanager] state file: restored state from state file \"%s\"", sf.stateFilePath)
|
||||||
|
glog.V(2).Infof("[cpumanager] state file: defaultCPUSet: %s", tmpDefaultCPUSet.String())
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -77,33 +77,31 @@ func TestFileStateTryRestore(t *testing.T) {
|
|||||||
stateFileContent string
|
stateFileContent string
|
||||||
policyName string
|
policyName string
|
||||||
expErr string
|
expErr string
|
||||||
|
expPanic bool
|
||||||
expectedState *stateMemory
|
expectedState *stateMemory
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
"Invalid JSON - empty file",
|
"Invalid JSON - one byte file",
|
||||||
"\n",
|
"\n",
|
||||||
"none",
|
"none",
|
||||||
"state file: could not unmarshal, corrupted state file",
|
"[cpumanager] state file: unable to restore state from disk (unexpected end of JSON input)",
|
||||||
&stateMemory{
|
true,
|
||||||
assignments: ContainerCPUAssignments{},
|
&stateMemory{},
|
||||||
defaultCPUSet: cpuset.NewCPUSet(),
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"Invalid JSON - invalid content",
|
"Invalid JSON - invalid content",
|
||||||
"{",
|
"{",
|
||||||
"none",
|
"none",
|
||||||
"state file: could not unmarshal, corrupted state file",
|
"[cpumanager] state file: unable to restore state from disk (unexpected end of JSON input)",
|
||||||
&stateMemory{
|
true,
|
||||||
assignments: ContainerCPUAssignments{},
|
&stateMemory{},
|
||||||
defaultCPUSet: cpuset.NewCPUSet(),
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"Try restore defaultCPUSet only",
|
"Try restore defaultCPUSet only",
|
||||||
`{"policyName": "none", "defaultCpuSet": "4-6"}`,
|
`{"policyName": "none", "defaultCpuSet": "4-6"}`,
|
||||||
"none",
|
"none",
|
||||||
"",
|
"",
|
||||||
|
false,
|
||||||
&stateMemory{
|
&stateMemory{
|
||||||
assignments: ContainerCPUAssignments{},
|
assignments: ContainerCPUAssignments{},
|
||||||
defaultCPUSet: cpuset.NewCPUSet(4, 5, 6),
|
defaultCPUSet: cpuset.NewCPUSet(4, 5, 6),
|
||||||
@ -113,11 +111,9 @@ func TestFileStateTryRestore(t *testing.T) {
|
|||||||
"Try restore defaultCPUSet only - invalid name",
|
"Try restore defaultCPUSet only - invalid name",
|
||||||
`{"policyName": "none", "defaultCpuSet" "4-6"}`,
|
`{"policyName": "none", "defaultCpuSet" "4-6"}`,
|
||||||
"none",
|
"none",
|
||||||
"",
|
`[cpumanager] state file: unable to restore state from disk (invalid character '"' after object key)`,
|
||||||
&stateMemory{
|
true,
|
||||||
assignments: ContainerCPUAssignments{},
|
&stateMemory{},
|
||||||
defaultCPUSet: cpuset.NewCPUSet(),
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"Try restore assignments only",
|
"Try restore assignments only",
|
||||||
@ -130,6 +126,7 @@ func TestFileStateTryRestore(t *testing.T) {
|
|||||||
}`,
|
}`,
|
||||||
"none",
|
"none",
|
||||||
"",
|
"",
|
||||||
|
false,
|
||||||
&stateMemory{
|
&stateMemory{
|
||||||
assignments: ContainerCPUAssignments{
|
assignments: ContainerCPUAssignments{
|
||||||
"container1": cpuset.NewCPUSet(4, 5, 6),
|
"container1": cpuset.NewCPUSet(4, 5, 6),
|
||||||
@ -146,21 +143,17 @@ func TestFileStateTryRestore(t *testing.T) {
|
|||||||
"entries": {}
|
"entries": {}
|
||||||
}`,
|
}`,
|
||||||
"B",
|
"B",
|
||||||
"policy configured \"B\" != policy from state file \"A\"",
|
`[cpumanager] state file: unable to restore state from disk (policy configured "B" != policy from state file "A")`,
|
||||||
&stateMemory{
|
true,
|
||||||
assignments: ContainerCPUAssignments{},
|
&stateMemory{},
|
||||||
defaultCPUSet: cpuset.NewCPUSet(),
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"Try restore invalid assignments",
|
"Try restore invalid assignments",
|
||||||
`{"entries": }`,
|
`{"entries": }`,
|
||||||
"none",
|
"none",
|
||||||
"state file: could not unmarshal, corrupted state file",
|
"[cpumanager] state file: unable to restore state from disk (invalid character '}' looking for beginning of value)",
|
||||||
&stateMemory{
|
true,
|
||||||
assignments: ContainerCPUAssignments{},
|
&stateMemory{},
|
||||||
defaultCPUSet: cpuset.NewCPUSet(),
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"Try restore valid file",
|
"Try restore valid file",
|
||||||
@ -174,6 +167,7 @@ func TestFileStateTryRestore(t *testing.T) {
|
|||||||
}`,
|
}`,
|
||||||
"none",
|
"none",
|
||||||
"",
|
"",
|
||||||
|
false,
|
||||||
&stateMemory{
|
&stateMemory{
|
||||||
assignments: ContainerCPUAssignments{
|
assignments: ContainerCPUAssignments{
|
||||||
"container1": cpuset.NewCPUSet(4, 5, 6),
|
"container1": cpuset.NewCPUSet(4, 5, 6),
|
||||||
@ -189,11 +183,9 @@ func TestFileStateTryRestore(t *testing.T) {
|
|||||||
"defaultCpuSet": "2-sd"
|
"defaultCpuSet": "2-sd"
|
||||||
}`,
|
}`,
|
||||||
"none",
|
"none",
|
||||||
"state file: could not parse state file",
|
`[cpumanager] state file: unable to restore state from disk (strconv.Atoi: parsing "sd": invalid syntax)`,
|
||||||
&stateMemory{
|
true,
|
||||||
assignments: ContainerCPUAssignments{},
|
&stateMemory{},
|
||||||
defaultCPUSet: cpuset.NewCPUSet(),
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"Try restore un-parsable assignments",
|
"Try restore un-parsable assignments",
|
||||||
@ -206,17 +198,16 @@ func TestFileStateTryRestore(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}`,
|
}`,
|
||||||
"none",
|
"none",
|
||||||
"state file: could not parse state file",
|
`[cpumanager] state file: unable to restore state from disk (strconv.Atoi: parsing "p": invalid syntax)`,
|
||||||
&stateMemory{
|
true,
|
||||||
assignments: ContainerCPUAssignments{},
|
&stateMemory{},
|
||||||
defaultCPUSet: cpuset.NewCPUSet(),
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"TryRestoreState creates empty state file",
|
"tryRestoreState creates empty state file",
|
||||||
"",
|
"",
|
||||||
"none",
|
"none",
|
||||||
"",
|
"",
|
||||||
|
false,
|
||||||
&stateMemory{
|
&stateMemory{
|
||||||
assignments: ContainerCPUAssignments{},
|
assignments: ContainerCPUAssignments{},
|
||||||
defaultCPUSet: cpuset.NewCPUSet(),
|
defaultCPUSet: cpuset.NewCPUSet(),
|
||||||
@ -226,11 +217,23 @@ func TestFileStateTryRestore(t *testing.T) {
|
|||||||
|
|
||||||
for idx, tc := range testCases {
|
for idx, tc := range testCases {
|
||||||
t.Run(tc.description, func(t *testing.T) {
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
|
defer func() {
|
||||||
|
if tc.expPanic {
|
||||||
|
r := recover()
|
||||||
|
panicMsg := r.(string)
|
||||||
|
if !strings.HasPrefix(panicMsg, tc.expErr) {
|
||||||
|
t.Fatalf(`expected panic "%s" but got "%s"`, tc.expErr, panicMsg)
|
||||||
|
} else {
|
||||||
|
t.Logf(`got expected panic "%s"`, panicMsg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
sfilePath, err := ioutil.TempFile("/tmp", fmt.Sprintf("cpumanager_state_file_test_%d", idx))
|
sfilePath, err := ioutil.TempFile("/tmp", fmt.Sprintf("cpumanager_state_file_test_%d", idx))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("cannot create temporary file: %q", err.Error())
|
t.Errorf("cannot create temporary file: %q", err.Error())
|
||||||
}
|
}
|
||||||
// Don't create state file, let TryRestoreState figure out that is should create
|
// Don't create state file, let tryRestoreState figure out that is should create
|
||||||
if tc.stateFileContent != "" {
|
if tc.stateFileContent != "" {
|
||||||
writeToStateFile(sfilePath.Name(), tc.stateFileContent)
|
writeToStateFile(sfilePath.Name(), tc.stateFileContent)
|
||||||
}
|
}
|
||||||
@ -245,11 +248,11 @@ func TestFileStateTryRestore(t *testing.T) {
|
|||||||
if tc.expErr != "" {
|
if tc.expErr != "" {
|
||||||
if logData.String() != "" {
|
if logData.String() != "" {
|
||||||
if !strings.Contains(logData.String(), tc.expErr) {
|
if !strings.Contains(logData.String(), tc.expErr) {
|
||||||
t.Errorf("TryRestoreState() error = %v, wantErr %v", logData.String(), tc.expErr)
|
t.Errorf("tryRestoreState() error = %v, wantErr %v", logData.String(), tc.expErr)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
t.Errorf("TryRestoreState() error = nil, wantErr %v", tc.expErr)
|
t.Errorf("tryRestoreState() error = nil, wantErr %v", tc.expErr)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -268,7 +271,7 @@ func TestFileStateTryRestorePanic(t *testing.T) {
|
|||||||
}{
|
}{
|
||||||
"Panic creating file",
|
"Panic creating file",
|
||||||
true,
|
true,
|
||||||
"[cpumanager] state file not created",
|
"[cpumanager] state file not written",
|
||||||
}
|
}
|
||||||
|
|
||||||
t.Run(testCase.description, func(t *testing.T) {
|
t.Run(testCase.description, func(t *testing.T) {
|
||||||
@ -277,10 +280,10 @@ func TestFileStateTryRestorePanic(t *testing.T) {
|
|||||||
if err := recover(); err != nil {
|
if err := recover(); err != nil {
|
||||||
if testCase.wantPanic {
|
if testCase.wantPanic {
|
||||||
if testCase.panicMessage == err {
|
if testCase.panicMessage == err {
|
||||||
t.Logf("TryRestoreState() got expected panic = %v", err)
|
t.Logf("tryRestoreState() got expected panic = %v", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
t.Errorf("TryRestoreState() unexpected panic = %v, wantErr %v", err, testCase.panicMessage)
|
t.Errorf("tryRestoreState() unexpected panic = %v, wantErr %v", err, testCase.panicMessage)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
Loading…
Reference in New Issue
Block a user