Lock checksum calculation for v1 CPUManager state to pre 1.18 logic

The updated CPUManager from PR #84462 implements logic to migrate the
CPUManager checkpoint file from an old format to a new one. To do so, it
defines the following types:

```
type CPUManagerCheckpoint = CPUManagerCheckpointV2
type CPUManagerCheckpointV1 struct {  ...  }
type CPUManagerCheckpointV2 struct {  ...  }
```

This replaces the old definition of just:

```
type CPUManagerCheckpoint struct {  ...  }
```

Code was put in place to ensure proper migration from checkpoints in V1
format to checkpoints in V2 format. However (and this is a big however),
all of the unit tests were performed on V1 checkpoints that were
generated using the type name `CPUManagerCheckpointV1` and not the
original type name of `CPUManagerCheckpoint`. As such, the checksum in
the checkpoint file uses the `CPUManagerCheckpointV1` type to calculate
its checksum and not the original type name of `CPUManagerCheckpoint`.

This causes problems in the real world since all pre-1.18 checkpoint
files will have been generated with the original type name of
`CPUManagerCheckpoint`. When verifying the checksum of the checkpoint
file across an upgrade to 1.18, the checksum is calculated assuming
a type name of `CPUManagerCheckpointV1` (which is incorrect) and the
file is seen to be corrupt.

This patch ensures that all V1 checksums are verified against a type
name of `CPUManagerCheckpoint` instead of ``CPUManagerCheckpointV1`.
It also locks the algorithm used to calculate the checksum in place,
since it wil never change in the future (for pre-1.18 checkpoint
files at least).
This commit is contained in:
Kevin Klues 2019-12-28 13:44:24 +01:00
parent d605766c89
commit 5faf8f4c52
3 changed files with 39 additions and 3 deletions

View File

@ -17,6 +17,7 @@ go_library(
"//pkg/kubelet/checkpointmanager/errors:go_default_library",
"//pkg/kubelet/cm/cpumanager/containermap:go_default_library",
"//pkg/kubelet/cm/cpuset:go_default_library",
"//vendor/github.com/davecgh/go-spew/spew:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
],
)

View File

@ -18,9 +18,14 @@ package state
import (
"encoding/json"
"hash/fnv"
"strings"
"github.com/davecgh/go-spew/spew"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
)
// CPUManagerCheckpoint struct is used to store cpu/pod assignments in a checkpoint
@ -95,11 +100,27 @@ func (cp *CPUManagerCheckpointV1) VerifyChecksum() error {
// accept empty checksum for compatibility with old file backend
return nil
}
printer := spew.ConfigState{
Indent: " ",
SortKeys: true,
DisableMethods: true,
SpewKeys: true,
}
ck := cp.Checksum
cp.Checksum = 0
err := ck.Verify(cp)
object := printer.Sprintf("%#v", cp)
object = strings.Replace(object, "CPUManagerCheckpointV1", "CPUManagerCheckpoint", 1)
cp.Checksum = ck
return err
hash := fnv.New32a()
printer.Fprintf(hash, "%v", object)
if cp.Checksum != checksum.Checksum(hash.Sum32()) {
return errors.ErrCorruptCheckpoint
}
return nil
}
// VerifyChecksum verifies that current checksum of checkpoint is valid in v2 format

View File

@ -154,6 +154,20 @@ func TestCheckpointStateRestore(t *testing.T) {
`could not parse cpuset "asd" for container "container2" in pod "pod": strconv.Atoi: parsing "asd": invalid syntax`,
&stateMemory{},
},
{
"Restore checkpoint from checkpoint with v1 checksum",
`{
"policyName": "none",
"defaultCPUSet": "1-3",
"checksum": 1694838852
}`,
"none",
containermap.ContainerMap{},
"",
&stateMemory{
defaultCPUSet: cpuset.NewCPUSet(1, 2, 3),
},
},
{
"Restore checkpoint with migration",
`{
@ -163,7 +177,7 @@ func TestCheckpointStateRestore(t *testing.T) {
"containerID1": "4-6",
"containerID2": "1-3"
},
"checksum": 2832947348
"checksum": 3680390589
}`,
"none",
func() containermap.ContainerMap {