mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-10 12:32:03 +00:00
kubelet: improve checkpoint errors
Recording the expected and actual checksum in the error makes it possible to provide that information, for example in a failed test like the ones for DRA. Otherwise developers have to manually step through the test with a debugger to figure out what the new checksum is.
This commit is contained in:
parent
157f4b94d8
commit
b9d00841a6
@ -28,8 +28,9 @@ type Checksum uint64
|
|||||||
|
|
||||||
// Verify verifies that passed checksum is same as calculated checksum
|
// Verify verifies that passed checksum is same as calculated checksum
|
||||||
func (cs Checksum) Verify(data interface{}) error {
|
func (cs Checksum) Verify(data interface{}) error {
|
||||||
if cs != New(data) {
|
actualCS := New(data)
|
||||||
return errors.ErrCorruptCheckpoint
|
if cs != actualCS {
|
||||||
|
return &errors.CorruptCheckpointError{ActualCS: uint64(actualCS), ExpectedCS: uint64(cs)}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -18,8 +18,28 @@ package errors
|
|||||||
|
|
||||||
import "fmt"
|
import "fmt"
|
||||||
|
|
||||||
// ErrCorruptCheckpoint error is reported when checksum does not match
|
// ErrCorruptCheckpoint error is reported when checksum does not match.
|
||||||
var ErrCorruptCheckpoint = fmt.Errorf("checkpoint is corrupted")
|
// Check for it with:
|
||||||
|
//
|
||||||
|
// var csErr *CorruptCheckpointError
|
||||||
|
// if errors.As(err, &csErr) { ... }
|
||||||
|
// if errors.Is(err, CorruptCheckpointError{}) { ... }
|
||||||
|
type CorruptCheckpointError struct {
|
||||||
|
ActualCS, ExpectedCS uint64
|
||||||
|
}
|
||||||
|
|
||||||
|
func (err CorruptCheckpointError) Error() string {
|
||||||
|
return "checkpoint is corrupted"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (err CorruptCheckpointError) Is(target error) bool {
|
||||||
|
switch target.(type) {
|
||||||
|
case *CorruptCheckpointError, CorruptCheckpointError:
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ErrCheckpointNotFound is reported when checkpoint is not found for a given key
|
// ErrCheckpointNotFound is reported when checkpoint is not found for a given key
|
||||||
var ErrCheckpointNotFound = fmt.Errorf("checkpoint is not found")
|
var ErrCheckpointNotFound = fmt.Errorf("checkpoint is not found")
|
||||||
|
@ -110,8 +110,12 @@ func (cp *CPUManagerCheckpointV1) VerifyChecksum() error {
|
|||||||
|
|
||||||
hash := fnv.New32a()
|
hash := fnv.New32a()
|
||||||
fmt.Fprintf(hash, "%v", object)
|
fmt.Fprintf(hash, "%v", object)
|
||||||
if cp.Checksum != checksum.Checksum(hash.Sum32()) {
|
actualCS := checksum.Checksum(hash.Sum32())
|
||||||
return errors.ErrCorruptCheckpoint
|
if cp.Checksum != actualCS {
|
||||||
|
return &errors.CorruptCheckpointError{
|
||||||
|
ActualCS: uint64(actualCS),
|
||||||
|
ExpectedCS: uint64(cp.Checksum),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
@ -18,6 +18,7 @@ package state
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"hash/fnv"
|
"hash/fnv"
|
||||||
"strings"
|
"strings"
|
||||||
@ -25,7 +26,7 @@ import (
|
|||||||
"k8s.io/apimachinery/pkg/util/dump"
|
"k8s.io/apimachinery/pkg/util/dump"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum"
|
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
|
cmerrors "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
|
||||||
)
|
)
|
||||||
|
|
||||||
var _ checkpointmanager.Checkpoint = &DRAManagerCheckpoint{}
|
var _ checkpointmanager.Checkpoint = &DRAManagerCheckpoint{}
|
||||||
@ -79,7 +80,7 @@ func (dc *DRAManagerCheckpoint) VerifyChecksum() error {
|
|||||||
ck := dc.Checksum
|
ck := dc.Checksum
|
||||||
dc.Checksum = 0
|
dc.Checksum = 0
|
||||||
err := ck.Verify(dc)
|
err := ck.Verify(dc)
|
||||||
if err == errors.ErrCorruptCheckpoint {
|
if errors.Is(err, cmerrors.CorruptCheckpointError{}) {
|
||||||
// Verify with old structs without ResourceHandles field
|
// Verify with old structs without ResourceHandles field
|
||||||
// TODO: remove in Beta
|
// TODO: remove in Beta
|
||||||
err = verifyChecksumWithoutResourceHandles(dc, ck)
|
err = verifyChecksumWithoutResourceHandles(dc, ck)
|
||||||
@ -115,8 +116,12 @@ func verifyChecksumWithoutResourceHandles(dc *DRAManagerCheckpoint, checkSum che
|
|||||||
object = strings.Replace(object, "ClaimInfoStateListWithoutResourceHandles", "ClaimInfoStateList", 1)
|
object = strings.Replace(object, "ClaimInfoStateListWithoutResourceHandles", "ClaimInfoStateList", 1)
|
||||||
hash := fnv.New32a()
|
hash := fnv.New32a()
|
||||||
fmt.Fprintf(hash, "%v", object)
|
fmt.Fprintf(hash, "%v", object)
|
||||||
if checkSum != checksum.Checksum(hash.Sum32()) {
|
actualCS := checksum.Checksum(hash.Sum32())
|
||||||
return errors.ErrCorruptCheckpoint
|
if checkSum != actualCS {
|
||||||
|
return &cmerrors.CorruptCheckpointError{
|
||||||
|
ActualCS: uint64(actualCS),
|
||||||
|
ExpectedCS: uint64(checkSum),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -126,7 +126,7 @@ func (sc *stateCheckpoint) GetOrCreate() (ClaimInfoStateList, error) {
|
|||||||
return ClaimInfoStateList{}, nil
|
return ClaimInfoStateList{}, nil
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to get checkpoint %v: %v", sc.checkpointName, err)
|
return nil, fmt.Errorf("failed to get checkpoint %v: %w", sc.checkpointName, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return checkpoint.Entries, nil
|
return checkpoint.Entries, nil
|
||||||
|
@ -17,16 +17,19 @@ limitations under the License.
|
|||||||
package state
|
package state
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"os"
|
"os"
|
||||||
"path"
|
"path"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
resourcev1alpha2 "k8s.io/api/resource/v1alpha2"
|
resourcev1alpha2 "k8s.io/api/resource/v1alpha2"
|
||||||
"k8s.io/apimachinery/pkg/util/sets"
|
"k8s.io/apimachinery/pkg/util/sets"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
||||||
|
cmerrors "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
|
||||||
testutil "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state/testing"
|
testutil "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state/testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -198,7 +201,7 @@ func TestCheckpointGetOrCreate(t *testing.T) {
|
|||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
assert.Contains(t, err.Error(), tc.expectedError)
|
assert.Contains(t, err.Error(), tc.expectedError)
|
||||||
} else {
|
} else {
|
||||||
assert.NoError(t, err, "unexpected error while creating checkpointState")
|
requireNoCheckpointError(t, err)
|
||||||
// compare state after restoration with the one expected
|
// compare state after restoration with the one expected
|
||||||
assertStateEqual(t, state, tc.expectedState)
|
assertStateEqual(t, state, tc.expectedState)
|
||||||
}
|
}
|
||||||
@ -270,7 +273,7 @@ func TestOldCheckpointRestore(t *testing.T) {
|
|||||||
|
|
||||||
checkpoint := NewDRAManagerCheckpoint()
|
checkpoint := NewDRAManagerCheckpoint()
|
||||||
err = cpm.GetCheckpoint(testingCheckpoint, checkpoint)
|
err = cpm.GetCheckpoint(testingCheckpoint, checkpoint)
|
||||||
assert.NoError(t, err, "could not restore checkpoint")
|
requireNoCheckpointError(t, err)
|
||||||
|
|
||||||
checkpointData, err := checkpoint.MarshalCheckpoint()
|
checkpointData, err := checkpoint.MarshalCheckpoint()
|
||||||
assert.NoError(t, err, "could not Marshal Checkpoint")
|
assert.NoError(t, err, "could not Marshal Checkpoint")
|
||||||
@ -278,3 +281,13 @@ func TestOldCheckpointRestore(t *testing.T) {
|
|||||||
expectedData := `{"version":"v1","entries":[{"DriverName":"test-driver.cdi.k8s.io","ClassName":"class-name","ClaimUID":"067798be-454e-4be4-9047-1aa06aea63f7","ClaimName":"example","Namespace":"default","PodUIDs":{"139cdb46-f989-4f17-9561-ca10cfb509a6":{}},"ResourceHandles":null,"CDIDevices":{"test-driver.cdi.k8s.io":["example.com/example=cdi-example"]}}],"checksum":453625682}`
|
expectedData := `{"version":"v1","entries":[{"DriverName":"test-driver.cdi.k8s.io","ClassName":"class-name","ClaimUID":"067798be-454e-4be4-9047-1aa06aea63f7","ClaimName":"example","Namespace":"default","PodUIDs":{"139cdb46-f989-4f17-9561-ca10cfb509a6":{}},"ResourceHandles":null,"CDIDevices":{"test-driver.cdi.k8s.io":["example.com/example=cdi-example"]}}],"checksum":453625682}`
|
||||||
assert.Equal(t, expectedData, string(checkpointData), "expected ClaimInfoState does not equal to restored one")
|
assert.Equal(t, expectedData, string(checkpointData), "expected ClaimInfoState does not equal to restored one")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func requireNoCheckpointError(t *testing.T, err error) {
|
||||||
|
t.Helper()
|
||||||
|
var cksumErr *cmerrors.CorruptCheckpointError
|
||||||
|
if errors.As(err, &cksumErr) {
|
||||||
|
t.Fatalf("unexpected corrupt checkpoint, expected checksum %d, got %d", cksumErr.ExpectedCS, cksumErr.ActualCS)
|
||||||
|
} else {
|
||||||
|
require.NoError(t, err, "could not restore checkpoint")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user