From 5faf8f4c528b977d294e09cd589fecda47b8631b Mon Sep 17 00:00:00 2001 From: Kevin Klues Date: Sat, 28 Dec 2019 13:44:24 +0100 Subject: [PATCH] Lock checksum calculation for v1 CPUManager state to pre 1.18 logic The updated CPUManager from PR #84462 implements logic to migrate the CPUManager checkpoint file from an old format to a new one. To do so, it defines the following types: ``` type CPUManagerCheckpoint = CPUManagerCheckpointV2 type CPUManagerCheckpointV1 struct { ... } type CPUManagerCheckpointV2 struct { ... } ``` This replaces the old definition of just: ``` type CPUManagerCheckpoint struct { ... } ``` Code was put in place to ensure proper migration from checkpoints in V1 format to checkpoints in V2 format. However (and this is a big however), all of the unit tests were performed on V1 checkpoints that were generated using the type name `CPUManagerCheckpointV1` and not the original type name of `CPUManagerCheckpoint`. As such, the checksum in the checkpoint file uses the `CPUManagerCheckpointV1` type to calculate its checksum and not the original type name of `CPUManagerCheckpoint`. This causes problems in the real world since all pre-1.18 checkpoint files will have been generated with the original type name of `CPUManagerCheckpoint`. When verifying the checksum of the checkpoint file across an upgrade to 1.18, the checksum is calculated assuming a type name of `CPUManagerCheckpointV1` (which is incorrect) and the file is seen to be corrupt. This patch ensures that all V1 checksums are verified against a type name of `CPUManagerCheckpoint` instead of ``CPUManagerCheckpointV1`. It also locks the algorithm used to calculate the checksum in place, since it wil never change in the future (for pre-1.18 checkpoint files at least). --- pkg/kubelet/cm/cpumanager/state/BUILD | 1 + pkg/kubelet/cm/cpumanager/state/checkpoint.go | 25 +++++++++++++++++-- .../cpumanager/state/state_checkpoint_test.go | 16 +++++++++++- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/pkg/kubelet/cm/cpumanager/state/BUILD b/pkg/kubelet/cm/cpumanager/state/BUILD index 37850de90c3..438737c9d18 100644 --- a/pkg/kubelet/cm/cpumanager/state/BUILD +++ b/pkg/kubelet/cm/cpumanager/state/BUILD @@ -17,6 +17,7 @@ go_library( "//pkg/kubelet/checkpointmanager/errors:go_default_library", "//pkg/kubelet/cm/cpumanager/containermap:go_default_library", "//pkg/kubelet/cm/cpuset:go_default_library", + "//vendor/github.com/davecgh/go-spew/spew:go_default_library", "//vendor/k8s.io/klog:go_default_library", ], ) diff --git a/pkg/kubelet/cm/cpumanager/state/checkpoint.go b/pkg/kubelet/cm/cpumanager/state/checkpoint.go index 3683e66aedc..f817de24a7e 100644 --- a/pkg/kubelet/cm/cpumanager/state/checkpoint.go +++ b/pkg/kubelet/cm/cpumanager/state/checkpoint.go @@ -18,9 +18,14 @@ package state import ( "encoding/json" + "hash/fnv" + "strings" + + "github.com/davecgh/go-spew/spew" "k8s.io/kubernetes/pkg/kubelet/checkpointmanager" "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum" + "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors" ) // CPUManagerCheckpoint struct is used to store cpu/pod assignments in a checkpoint @@ -95,11 +100,27 @@ func (cp *CPUManagerCheckpointV1) VerifyChecksum() error { // accept empty checksum for compatibility with old file backend return nil } + + printer := spew.ConfigState{ + Indent: " ", + SortKeys: true, + DisableMethods: true, + SpewKeys: true, + } + ck := cp.Checksum cp.Checksum = 0 - err := ck.Verify(cp) + object := printer.Sprintf("%#v", cp) + object = strings.Replace(object, "CPUManagerCheckpointV1", "CPUManagerCheckpoint", 1) cp.Checksum = ck - return err + + hash := fnv.New32a() + printer.Fprintf(hash, "%v", object) + if cp.Checksum != checksum.Checksum(hash.Sum32()) { + return errors.ErrCorruptCheckpoint + } + + return nil } // VerifyChecksum verifies that current checksum of checkpoint is valid in v2 format diff --git a/pkg/kubelet/cm/cpumanager/state/state_checkpoint_test.go b/pkg/kubelet/cm/cpumanager/state/state_checkpoint_test.go index 34d40c43174..e81eaddd75a 100644 --- a/pkg/kubelet/cm/cpumanager/state/state_checkpoint_test.go +++ b/pkg/kubelet/cm/cpumanager/state/state_checkpoint_test.go @@ -154,6 +154,20 @@ func TestCheckpointStateRestore(t *testing.T) { `could not parse cpuset "asd" for container "container2" in pod "pod": strconv.Atoi: parsing "asd": invalid syntax`, &stateMemory{}, }, + { + "Restore checkpoint from checkpoint with v1 checksum", + `{ + "policyName": "none", + "defaultCPUSet": "1-3", + "checksum": 1694838852 + }`, + "none", + containermap.ContainerMap{}, + "", + &stateMemory{ + defaultCPUSet: cpuset.NewCPUSet(1, 2, 3), + }, + }, { "Restore checkpoint with migration", `{ @@ -163,7 +177,7 @@ func TestCheckpointStateRestore(t *testing.T) { "containerID1": "4-6", "containerID2": "1-3" }, - "checksum": 2832947348 + "checksum": 3680390589 }`, "none", func() containermap.ContainerMap {