mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-08 03:33:56 +00:00
Lock checksum calculation for v1 CPUManager state to pre 1.18 logic
The updated CPUManager from PR #84462 implements logic to migrate the CPUManager checkpoint file from an old format to a new one. To do so, it defines the following types: ``` type CPUManagerCheckpoint = CPUManagerCheckpointV2 type CPUManagerCheckpointV1 struct { ... } type CPUManagerCheckpointV2 struct { ... } ``` This replaces the old definition of just: ``` type CPUManagerCheckpoint struct { ... } ``` Code was put in place to ensure proper migration from checkpoints in V1 format to checkpoints in V2 format. However (and this is a big however), all of the unit tests were performed on V1 checkpoints that were generated using the type name `CPUManagerCheckpointV1` and not the original type name of `CPUManagerCheckpoint`. As such, the checksum in the checkpoint file uses the `CPUManagerCheckpointV1` type to calculate its checksum and not the original type name of `CPUManagerCheckpoint`. This causes problems in the real world since all pre-1.18 checkpoint files will have been generated with the original type name of `CPUManagerCheckpoint`. When verifying the checksum of the checkpoint file across an upgrade to 1.18, the checksum is calculated assuming a type name of `CPUManagerCheckpointV1` (which is incorrect) and the file is seen to be corrupt. This patch ensures that all V1 checksums are verified against a type name of `CPUManagerCheckpoint` instead of ``CPUManagerCheckpointV1`. It also locks the algorithm used to calculate the checksum in place, since it wil never change in the future (for pre-1.18 checkpoint files at least).
This commit is contained in:
parent
d605766c89
commit
5faf8f4c52
@ -17,6 +17,7 @@ go_library(
|
|||||||
"//pkg/kubelet/checkpointmanager/errors:go_default_library",
|
"//pkg/kubelet/checkpointmanager/errors:go_default_library",
|
||||||
"//pkg/kubelet/cm/cpumanager/containermap:go_default_library",
|
"//pkg/kubelet/cm/cpumanager/containermap:go_default_library",
|
||||||
"//pkg/kubelet/cm/cpuset:go_default_library",
|
"//pkg/kubelet/cm/cpuset:go_default_library",
|
||||||
|
"//vendor/github.com/davecgh/go-spew/spew:go_default_library",
|
||||||
"//vendor/k8s.io/klog:go_default_library",
|
"//vendor/k8s.io/klog:go_default_library",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
@ -18,9 +18,14 @@ package state
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"hash/fnv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/davecgh/go-spew/spew"
|
||||||
|
|
||||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum"
|
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
|
||||||
)
|
)
|
||||||
|
|
||||||
// CPUManagerCheckpoint struct is used to store cpu/pod assignments in a checkpoint
|
// CPUManagerCheckpoint struct is used to store cpu/pod assignments in a checkpoint
|
||||||
@ -95,11 +100,27 @@ func (cp *CPUManagerCheckpointV1) VerifyChecksum() error {
|
|||||||
// accept empty checksum for compatibility with old file backend
|
// accept empty checksum for compatibility with old file backend
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
printer := spew.ConfigState{
|
||||||
|
Indent: " ",
|
||||||
|
SortKeys: true,
|
||||||
|
DisableMethods: true,
|
||||||
|
SpewKeys: true,
|
||||||
|
}
|
||||||
|
|
||||||
ck := cp.Checksum
|
ck := cp.Checksum
|
||||||
cp.Checksum = 0
|
cp.Checksum = 0
|
||||||
err := ck.Verify(cp)
|
object := printer.Sprintf("%#v", cp)
|
||||||
|
object = strings.Replace(object, "CPUManagerCheckpointV1", "CPUManagerCheckpoint", 1)
|
||||||
cp.Checksum = ck
|
cp.Checksum = ck
|
||||||
return err
|
|
||||||
|
hash := fnv.New32a()
|
||||||
|
printer.Fprintf(hash, "%v", object)
|
||||||
|
if cp.Checksum != checksum.Checksum(hash.Sum32()) {
|
||||||
|
return errors.ErrCorruptCheckpoint
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// VerifyChecksum verifies that current checksum of checkpoint is valid in v2 format
|
// VerifyChecksum verifies that current checksum of checkpoint is valid in v2 format
|
||||||
|
@ -154,6 +154,20 @@ func TestCheckpointStateRestore(t *testing.T) {
|
|||||||
`could not parse cpuset "asd" for container "container2" in pod "pod": strconv.Atoi: parsing "asd": invalid syntax`,
|
`could not parse cpuset "asd" for container "container2" in pod "pod": strconv.Atoi: parsing "asd": invalid syntax`,
|
||||||
&stateMemory{},
|
&stateMemory{},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"Restore checkpoint from checkpoint with v1 checksum",
|
||||||
|
`{
|
||||||
|
"policyName": "none",
|
||||||
|
"defaultCPUSet": "1-3",
|
||||||
|
"checksum": 1694838852
|
||||||
|
}`,
|
||||||
|
"none",
|
||||||
|
containermap.ContainerMap{},
|
||||||
|
"",
|
||||||
|
&stateMemory{
|
||||||
|
defaultCPUSet: cpuset.NewCPUSet(1, 2, 3),
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"Restore checkpoint with migration",
|
"Restore checkpoint with migration",
|
||||||
`{
|
`{
|
||||||
@ -163,7 +177,7 @@ func TestCheckpointStateRestore(t *testing.T) {
|
|||||||
"containerID1": "4-6",
|
"containerID1": "4-6",
|
||||||
"containerID2": "1-3"
|
"containerID2": "1-3"
|
||||||
},
|
},
|
||||||
"checksum": 2832947348
|
"checksum": 3680390589
|
||||||
}`,
|
}`,
|
||||||
"none",
|
"none",
|
||||||
func() containermap.ContainerMap {
|
func() containermap.ContainerMap {
|
||||||
|
Loading…
Reference in New Issue
Block a user