mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-07 03:03:59 +00:00
kubelet: improve checkpoint errors
Recording the expected and actual checksum in the error makes it possible to provide that information, for example in a failed test like the ones for DRA. Otherwise developers have to manually step through the test with a debugger to figure out what the new checksum is.
This commit is contained in:
parent
157f4b94d8
commit
b9d00841a6
@ -28,8 +28,9 @@ type Checksum uint64
|
||||
|
||||
// Verify verifies that passed checksum is same as calculated checksum
|
||||
func (cs Checksum) Verify(data interface{}) error {
|
||||
if cs != New(data) {
|
||||
return errors.ErrCorruptCheckpoint
|
||||
actualCS := New(data)
|
||||
if cs != actualCS {
|
||||
return &errors.CorruptCheckpointError{ActualCS: uint64(actualCS), ExpectedCS: uint64(cs)}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@ -18,8 +18,28 @@ package errors
|
||||
|
||||
import "fmt"
|
||||
|
||||
// ErrCorruptCheckpoint error is reported when checksum does not match
|
||||
var ErrCorruptCheckpoint = fmt.Errorf("checkpoint is corrupted")
|
||||
// ErrCorruptCheckpoint error is reported when checksum does not match.
|
||||
// Check for it with:
|
||||
//
|
||||
// var csErr *CorruptCheckpointError
|
||||
// if errors.As(err, &csErr) { ... }
|
||||
// if errors.Is(err, CorruptCheckpointError{}) { ... }
|
||||
type CorruptCheckpointError struct {
|
||||
ActualCS, ExpectedCS uint64
|
||||
}
|
||||
|
||||
func (err CorruptCheckpointError) Error() string {
|
||||
return "checkpoint is corrupted"
|
||||
}
|
||||
|
||||
func (err CorruptCheckpointError) Is(target error) bool {
|
||||
switch target.(type) {
|
||||
case *CorruptCheckpointError, CorruptCheckpointError:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// ErrCheckpointNotFound is reported when checkpoint is not found for a given key
|
||||
var ErrCheckpointNotFound = fmt.Errorf("checkpoint is not found")
|
||||
|
@ -110,8 +110,12 @@ func (cp *CPUManagerCheckpointV1) VerifyChecksum() error {
|
||||
|
||||
hash := fnv.New32a()
|
||||
fmt.Fprintf(hash, "%v", object)
|
||||
if cp.Checksum != checksum.Checksum(hash.Sum32()) {
|
||||
return errors.ErrCorruptCheckpoint
|
||||
actualCS := checksum.Checksum(hash.Sum32())
|
||||
if cp.Checksum != actualCS {
|
||||
return &errors.CorruptCheckpointError{
|
||||
ActualCS: uint64(actualCS),
|
||||
ExpectedCS: uint64(cp.Checksum),
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
|
@ -18,6 +18,7 @@ package state
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"strings"
|
||||
@ -25,7 +26,7 @@ import (
|
||||
"k8s.io/apimachinery/pkg/util/dump"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
|
||||
cmerrors "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
|
||||
)
|
||||
|
||||
var _ checkpointmanager.Checkpoint = &DRAManagerCheckpoint{}
|
||||
@ -79,7 +80,7 @@ func (dc *DRAManagerCheckpoint) VerifyChecksum() error {
|
||||
ck := dc.Checksum
|
||||
dc.Checksum = 0
|
||||
err := ck.Verify(dc)
|
||||
if err == errors.ErrCorruptCheckpoint {
|
||||
if errors.Is(err, cmerrors.CorruptCheckpointError{}) {
|
||||
// Verify with old structs without ResourceHandles field
|
||||
// TODO: remove in Beta
|
||||
err = verifyChecksumWithoutResourceHandles(dc, ck)
|
||||
@ -115,8 +116,12 @@ func verifyChecksumWithoutResourceHandles(dc *DRAManagerCheckpoint, checkSum che
|
||||
object = strings.Replace(object, "ClaimInfoStateListWithoutResourceHandles", "ClaimInfoStateList", 1)
|
||||
hash := fnv.New32a()
|
||||
fmt.Fprintf(hash, "%v", object)
|
||||
if checkSum != checksum.Checksum(hash.Sum32()) {
|
||||
return errors.ErrCorruptCheckpoint
|
||||
actualCS := checksum.Checksum(hash.Sum32())
|
||||
if checkSum != actualCS {
|
||||
return &cmerrors.CorruptCheckpointError{
|
||||
ActualCS: uint64(actualCS),
|
||||
ExpectedCS: uint64(checkSum),
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@ -126,7 +126,7 @@ func (sc *stateCheckpoint) GetOrCreate() (ClaimInfoStateList, error) {
|
||||
return ClaimInfoStateList{}, nil
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get checkpoint %v: %v", sc.checkpointName, err)
|
||||
return nil, fmt.Errorf("failed to get checkpoint %v: %w", sc.checkpointName, err)
|
||||
}
|
||||
|
||||
return checkpoint.Entries, nil
|
||||
|
@ -17,16 +17,19 @@ limitations under the License.
|
||||
package state
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"path"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
resourcev1alpha2 "k8s.io/api/resource/v1alpha2"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
||||
cmerrors "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
|
||||
testutil "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state/testing"
|
||||
)
|
||||
|
||||
@ -198,7 +201,7 @@ func TestCheckpointGetOrCreate(t *testing.T) {
|
||||
assert.Error(t, err)
|
||||
assert.Contains(t, err.Error(), tc.expectedError)
|
||||
} else {
|
||||
assert.NoError(t, err, "unexpected error while creating checkpointState")
|
||||
requireNoCheckpointError(t, err)
|
||||
// compare state after restoration with the one expected
|
||||
assertStateEqual(t, state, tc.expectedState)
|
||||
}
|
||||
@ -270,7 +273,7 @@ func TestOldCheckpointRestore(t *testing.T) {
|
||||
|
||||
checkpoint := NewDRAManagerCheckpoint()
|
||||
err = cpm.GetCheckpoint(testingCheckpoint, checkpoint)
|
||||
assert.NoError(t, err, "could not restore checkpoint")
|
||||
requireNoCheckpointError(t, err)
|
||||
|
||||
checkpointData, err := checkpoint.MarshalCheckpoint()
|
||||
assert.NoError(t, err, "could not Marshal Checkpoint")
|
||||
@ -278,3 +281,13 @@ func TestOldCheckpointRestore(t *testing.T) {
|
||||
expectedData := `{"version":"v1","entries":[{"DriverName":"test-driver.cdi.k8s.io","ClassName":"class-name","ClaimUID":"067798be-454e-4be4-9047-1aa06aea63f7","ClaimName":"example","Namespace":"default","PodUIDs":{"139cdb46-f989-4f17-9561-ca10cfb509a6":{}},"ResourceHandles":null,"CDIDevices":{"test-driver.cdi.k8s.io":["example.com/example=cdi-example"]}}],"checksum":453625682}`
|
||||
assert.Equal(t, expectedData, string(checkpointData), "expected ClaimInfoState does not equal to restored one")
|
||||
}
|
||||
|
||||
func requireNoCheckpointError(t *testing.T, err error) {
|
||||
t.Helper()
|
||||
var cksumErr *cmerrors.CorruptCheckpointError
|
||||
if errors.As(err, &cksumErr) {
|
||||
t.Fatalf("unexpected corrupt checkpoint, expected checksum %d, got %d", cksumErr.ExpectedCS, cksumErr.ActualCS)
|
||||
} else {
|
||||
require.NoError(t, err, "could not restore checkpoint")
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user