mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-07 11:13:48 +00:00
Lock checksum calculation for v1 CPUManager state to pre 1.18 logic
The updated CPUManager from PR #84462 implements logic to migrate the CPUManager checkpoint file from an old format to a new one. To do so, it defines the following types: ``` type CPUManagerCheckpoint = CPUManagerCheckpointV2 type CPUManagerCheckpointV1 struct { ... } type CPUManagerCheckpointV2 struct { ... } ``` This replaces the old definition of just: ``` type CPUManagerCheckpoint struct { ... } ``` Code was put in place to ensure proper migration from checkpoints in V1 format to checkpoints in V2 format. However (and this is a big however), all of the unit tests were performed on V1 checkpoints that were generated using the type name `CPUManagerCheckpointV1` and not the original type name of `CPUManagerCheckpoint`. As such, the checksum in the checkpoint file uses the `CPUManagerCheckpointV1` type to calculate its checksum and not the original type name of `CPUManagerCheckpoint`. This causes problems in the real world since all pre-1.18 checkpoint files will have been generated with the original type name of `CPUManagerCheckpoint`. When verifying the checksum of the checkpoint file across an upgrade to 1.18, the checksum is calculated assuming a type name of `CPUManagerCheckpointV1` (which is incorrect) and the file is seen to be corrupt. This patch ensures that all V1 checksums are verified against a type name of `CPUManagerCheckpoint` instead of ``CPUManagerCheckpointV1`. It also locks the algorithm used to calculate the checksum in place, since it wil never change in the future (for pre-1.18 checkpoint files at least).
This commit is contained in:
parent
d605766c89
commit
5faf8f4c52
@ -17,6 +17,7 @@ go_library(
|
||||
"//pkg/kubelet/checkpointmanager/errors:go_default_library",
|
||||
"//pkg/kubelet/cm/cpumanager/containermap:go_default_library",
|
||||
"//pkg/kubelet/cm/cpuset:go_default_library",
|
||||
"//vendor/github.com/davecgh/go-spew/spew:go_default_library",
|
||||
"//vendor/k8s.io/klog:go_default_library",
|
||||
],
|
||||
)
|
||||
|
@ -18,9 +18,14 @@ package state
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"hash/fnv"
|
||||
"strings"
|
||||
|
||||
"github.com/davecgh/go-spew/spew"
|
||||
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
|
||||
)
|
||||
|
||||
// CPUManagerCheckpoint struct is used to store cpu/pod assignments in a checkpoint
|
||||
@ -95,11 +100,27 @@ func (cp *CPUManagerCheckpointV1) VerifyChecksum() error {
|
||||
// accept empty checksum for compatibility with old file backend
|
||||
return nil
|
||||
}
|
||||
|
||||
printer := spew.ConfigState{
|
||||
Indent: " ",
|
||||
SortKeys: true,
|
||||
DisableMethods: true,
|
||||
SpewKeys: true,
|
||||
}
|
||||
|
||||
ck := cp.Checksum
|
||||
cp.Checksum = 0
|
||||
err := ck.Verify(cp)
|
||||
object := printer.Sprintf("%#v", cp)
|
||||
object = strings.Replace(object, "CPUManagerCheckpointV1", "CPUManagerCheckpoint", 1)
|
||||
cp.Checksum = ck
|
||||
return err
|
||||
|
||||
hash := fnv.New32a()
|
||||
printer.Fprintf(hash, "%v", object)
|
||||
if cp.Checksum != checksum.Checksum(hash.Sum32()) {
|
||||
return errors.ErrCorruptCheckpoint
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// VerifyChecksum verifies that current checksum of checkpoint is valid in v2 format
|
||||
|
@ -154,6 +154,20 @@ func TestCheckpointStateRestore(t *testing.T) {
|
||||
`could not parse cpuset "asd" for container "container2" in pod "pod": strconv.Atoi: parsing "asd": invalid syntax`,
|
||||
&stateMemory{},
|
||||
},
|
||||
{
|
||||
"Restore checkpoint from checkpoint with v1 checksum",
|
||||
`{
|
||||
"policyName": "none",
|
||||
"defaultCPUSet": "1-3",
|
||||
"checksum": 1694838852
|
||||
}`,
|
||||
"none",
|
||||
containermap.ContainerMap{},
|
||||
"",
|
||||
&stateMemory{
|
||||
defaultCPUSet: cpuset.NewCPUSet(1, 2, 3),
|
||||
},
|
||||
},
|
||||
{
|
||||
"Restore checkpoint with migration",
|
||||
`{
|
||||
@ -163,7 +177,7 @@ func TestCheckpointStateRestore(t *testing.T) {
|
||||
"containerID1": "4-6",
|
||||
"containerID2": "1-3"
|
||||
},
|
||||
"checksum": 2832947348
|
||||
"checksum": 3680390589
|
||||
}`,
|
||||
"none",
|
||||
func() containermap.ContainerMap {
|
||||
|
Loading…
Reference in New Issue
Block a user