mirror of
				https://github.com/k3s-io/kubernetes.git
				synced 2025-11-04 07:49:35 +00:00 
			
		
		
		
	The checkpointing mechanism will repopulate DRA Manager in-memory cache on kubelet restart. This will ensure that the information needed by the PodResources API is available across a kubelet restart. The ClaimInfoState struct represent the DRA Manager in-memory cache state in checkpoint. It is embedd in the ClaimInfo which also include the annotation field. The separation between the in-memory cache and the cache state in the checkpoint is so we won't be tied to the in-memory cache struct which may change in the future. In the ClaimInfoState we save the minimal required fields to restore the in-memory cache. Signed-off-by: Moshe Levi <moshele@nvidia.com>
		
			
				
	
	
		
			116 lines
		
	
	
		
			3.2 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			116 lines
		
	
	
		
			3.2 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
/*
 | 
						|
Copyright 2023 The Kubernetes Authors.
 | 
						|
 | 
						|
Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
you may not use this file except in compliance with the License.
 | 
						|
You may obtain a copy of the License at
 | 
						|
 | 
						|
    http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
 | 
						|
Unless required by applicable law or agreed to in writing, software
 | 
						|
distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
See the License for the specific language governing permissions and
 | 
						|
limitations under the License.
 | 
						|
*/
 | 
						|
 | 
						|
package state
 | 
						|
 | 
						|
import (
 | 
						|
	"fmt"
 | 
						|
	"sync"
 | 
						|
 | 
						|
	"k8s.io/apimachinery/pkg/types"
 | 
						|
	"k8s.io/apimachinery/pkg/util/sets"
 | 
						|
	"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
 | 
						|
	"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
 | 
						|
)
 | 
						|
 | 
						|
var _ CheckpointState = &stateCheckpoint{}
 | 
						|
 | 
						|
// CheckpointState interface provides to get and store state
 | 
						|
type CheckpointState interface {
 | 
						|
	GetOrCreate() (ClaimInfoStateList, error)
 | 
						|
	Store(ClaimInfoStateList) error
 | 
						|
}
 | 
						|
 | 
						|
// ClaimInfoState is used to store claim info state in a checkpoint
 | 
						|
type ClaimInfoState struct {
 | 
						|
	// Name of the DRA driver
 | 
						|
	DriverName string
 | 
						|
 | 
						|
	// ClaimUID is an UID of the resource claim
 | 
						|
	ClaimUID types.UID
 | 
						|
 | 
						|
	// ClaimName is a name of the resource claim
 | 
						|
	ClaimName string
 | 
						|
 | 
						|
	// Namespace is a claim namespace
 | 
						|
	Namespace string
 | 
						|
 | 
						|
	// PodUIDs is a set of pod UIDs that reference a resource
 | 
						|
	PodUIDs sets.Set[string]
 | 
						|
 | 
						|
	// CdiDevices is a list of CDI devices returned by the
 | 
						|
	// GRPC API call NodePrepareResource
 | 
						|
	CdiDevices []string
 | 
						|
}
 | 
						|
 | 
						|
type stateCheckpoint struct {
 | 
						|
	sync.RWMutex
 | 
						|
	checkpointManager checkpointmanager.CheckpointManager
 | 
						|
	checkpointName    string
 | 
						|
}
 | 
						|
 | 
						|
// NewCheckpointState creates new State for keeping track of claim info  with checkpoint backend
 | 
						|
func NewCheckpointState(stateDir, checkpointName string) (*stateCheckpoint, error) {
 | 
						|
	checkpointManager, err := checkpointmanager.NewCheckpointManager(stateDir)
 | 
						|
	if err != nil {
 | 
						|
		return nil, fmt.Errorf("failed to initialize checkpoint manager: %v", err)
 | 
						|
	}
 | 
						|
	stateCheckpoint := &stateCheckpoint{
 | 
						|
		checkpointManager: checkpointManager,
 | 
						|
		checkpointName:    checkpointName,
 | 
						|
	}
 | 
						|
 | 
						|
	return stateCheckpoint, nil
 | 
						|
}
 | 
						|
 | 
						|
// get state from a checkpoint and creates it if it doesn't exist
 | 
						|
func (sc *stateCheckpoint) GetOrCreate() (ClaimInfoStateList, error) {
 | 
						|
	sc.Lock()
 | 
						|
	defer sc.Unlock()
 | 
						|
 | 
						|
	checkpoint := NewDRAManagerCheckpoint()
 | 
						|
	err := sc.checkpointManager.GetCheckpoint(sc.checkpointName, checkpoint)
 | 
						|
	if err == errors.ErrCheckpointNotFound {
 | 
						|
		sc.store(ClaimInfoStateList{})
 | 
						|
		return ClaimInfoStateList{}, nil
 | 
						|
	}
 | 
						|
	if err != nil {
 | 
						|
		return nil, fmt.Errorf("failed to get checkpoint %v: %v", sc.checkpointName, err)
 | 
						|
	}
 | 
						|
 | 
						|
	return checkpoint.Entries, nil
 | 
						|
}
 | 
						|
 | 
						|
// saves state to a checkpoint
 | 
						|
func (sc *stateCheckpoint) Store(claimInfoStateList ClaimInfoStateList) error {
 | 
						|
	sc.Lock()
 | 
						|
	defer sc.Unlock()
 | 
						|
 | 
						|
	return sc.store(claimInfoStateList)
 | 
						|
}
 | 
						|
 | 
						|
// saves state to a checkpoint, caller is responsible for locking
 | 
						|
func (sc *stateCheckpoint) store(claimInfoStateList ClaimInfoStateList) error {
 | 
						|
	checkpoint := NewDRAManagerCheckpoint()
 | 
						|
	checkpoint.Entries = claimInfoStateList
 | 
						|
 | 
						|
	err := sc.checkpointManager.CreateCheckpoint(sc.checkpointName, checkpoint)
 | 
						|
	if err != nil {
 | 
						|
		return fmt.Errorf("could not save checkpoint %s: %v", sc.checkpointName, err)
 | 
						|
	}
 | 
						|
	return nil
 | 
						|
}
 |