mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 11:50:44 +00:00
Merge pull request #56040 from vikaschoudhary16/ckmngr
Automatic merge from submit-queue (batch tested with PRs 56040, 62627). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Node-level Checkpointing manager: Migrate dockershim and device plugin manager checkpointing **What this PR does / why we need it**: This PR abstracts checkpoint manager at kubelet level. Currently, `dockershim`, `deviceplugin` have their own native checkpointing primitives. And most recently `cpumanager` also added package native checkpointing primitives. This adds to the redundancy at implementation level. Also degrades code readability and consistency. To help this: 1. Checkpointing interface is being abstracted at kubelet level as `checkpointmanager` package. 2. `dockershim` and `deviceplugin` packages are modified to use `checkpointmanager` instead native checkpointing. **Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*: Fixes # ```release-note None ``` cc @jeremyeder @vishh @derekwaynecarr @sjenning @yujuhong @dchen1107 @RenaudWasTaken @ConnorDoyle @RenaudWasTaken @jiayingz @mindprince @timstclair /sig node
This commit is contained in:
commit
83ee626561
@ -163,8 +163,12 @@ pkg/kubelet/apis/kubeletconfig
|
||||
pkg/kubelet/apis/kubeletconfig/v1beta1
|
||||
pkg/kubelet/cadvisor
|
||||
pkg/kubelet/cadvisor/testing
|
||||
pkg/kubelet/checkpoint
|
||||
pkg/kubelet/checkpointmanager/checksum
|
||||
pkg/kubelet/checkpointmanager/testing/example_checkpoint_formats/v1
|
||||
pkg/kubelet/client
|
||||
pkg/kubelet/cm
|
||||
pkg/kubelet/cm/devicemanager/checkpoint
|
||||
pkg/kubelet/cm/util
|
||||
pkg/kubelet/config
|
||||
pkg/kubelet/configmap
|
||||
@ -181,7 +185,6 @@ pkg/kubelet/dockershim/network/hostport
|
||||
pkg/kubelet/dockershim/network/hostport/testing
|
||||
pkg/kubelet/dockershim/network/kubenet
|
||||
pkg/kubelet/dockershim/network/testing
|
||||
pkg/kubelet/dockershim/testing
|
||||
pkg/kubelet/events
|
||||
pkg/kubelet/images
|
||||
pkg/kubelet/kuberuntime
|
||||
|
@ -46,6 +46,7 @@ go_library(
|
||||
"//pkg/kubelet/apis/kubeletconfig:go_default_library",
|
||||
"//pkg/kubelet/cadvisor:go_default_library",
|
||||
"//pkg/kubelet/certificate:go_default_library",
|
||||
"//pkg/kubelet/checkpointmanager:go_default_library",
|
||||
"//pkg/kubelet/cm:go_default_library",
|
||||
"//pkg/kubelet/config:go_default_library",
|
||||
"//pkg/kubelet/configmap:go_default_library",
|
||||
@ -246,6 +247,7 @@ filegroup(
|
||||
"//pkg/kubelet/cadvisor:all-srcs",
|
||||
"//pkg/kubelet/certificate:all-srcs",
|
||||
"//pkg/kubelet/checkpoint:all-srcs",
|
||||
"//pkg/kubelet/checkpointmanager:all-srcs",
|
||||
"//pkg/kubelet/client:all-srcs",
|
||||
"//pkg/kubelet/cm:all-srcs",
|
||||
"//pkg/kubelet/config:all-srcs",
|
||||
|
@ -7,9 +7,8 @@ go_library(
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//pkg/apis/core:go_default_library",
|
||||
"//pkg/volume/util:go_default_library",
|
||||
"//vendor/github.com/dchest/safefile:go_default_library",
|
||||
"//vendor/github.com/ghodss/yaml:go_default_library",
|
||||
"//pkg/kubelet/checkpointmanager:go_default_library",
|
||||
"//pkg/kubelet/checkpointmanager/checksum:go_default_library",
|
||||
"//vendor/github.com/golang/glog:go_default_library",
|
||||
"//vendor/k8s.io/api/core/v1:go_default_library",
|
||||
],
|
||||
@ -21,6 +20,7 @@ go_test(
|
||||
embed = [":go_default_library"],
|
||||
deps = [
|
||||
"//pkg/apis/core:go_default_library",
|
||||
"//pkg/kubelet/checkpointmanager:go_default_library",
|
||||
"//vendor/k8s.io/api/core/v1:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||
],
|
||||
|
@ -17,20 +17,15 @@ limitations under the License.
|
||||
package checkpoint
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/dchest/safefile"
|
||||
"github.com/ghodss/yaml"
|
||||
"github.com/golang/glog"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/kubernetes/pkg/apis/core"
|
||||
"k8s.io/kubernetes/pkg/volume/util"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -39,54 +34,44 @@ const (
|
||||
podPrefix = "Pod"
|
||||
)
|
||||
|
||||
// Manager is the interface used to manage checkpoints
|
||||
// which involves writing resources to disk to recover
|
||||
// during restart or failure scenarios.
|
||||
// https://github.com/kubernetes/community/pull/1241/files
|
||||
type Manager interface {
|
||||
// LoadPods will load checkpointed Pods from disk
|
||||
LoadPods() ([]*v1.Pod, error)
|
||||
|
||||
// WritePod will serialize a Pod to disk
|
||||
WritePod(pod *v1.Pod) error
|
||||
|
||||
// Deletes the checkpoint of the given pod from disk
|
||||
DeletePod(pod *v1.Pod) error
|
||||
type PodCheckpoint interface {
|
||||
checkpointmanager.Checkpoint
|
||||
GetPod() *v1.Pod
|
||||
}
|
||||
|
||||
var instance Manager
|
||||
var mutex = &sync.Mutex{}
|
||||
|
||||
// fileCheckPointManager - is a checkpointer that writes contents to disk
|
||||
// The type information of the resource objects are encoded in the name
|
||||
type fileCheckPointManager struct {
|
||||
path string
|
||||
// Data to be stored as checkpoint
|
||||
type Data struct {
|
||||
Pod *v1.Pod
|
||||
Checksum checksum.Checksum
|
||||
}
|
||||
|
||||
// NewCheckpointManager will create a Manager that points to the following path
|
||||
func NewCheckpointManager(path string) Manager {
|
||||
// NOTE: This is a precaution; current implementation should not run
|
||||
// multiple checkpoint managers.
|
||||
mutex.Lock()
|
||||
defer mutex.Unlock()
|
||||
instance = &fileCheckPointManager{path: path}
|
||||
return instance
|
||||
// NewPodCheckpoint returns new pod checkpoint
|
||||
func NewPodCheckpoint(pod *v1.Pod) PodCheckpoint {
|
||||
return &Data{Pod: pod}
|
||||
}
|
||||
|
||||
// GetInstance will return the current Manager, there should be only one.
|
||||
func GetInstance() Manager {
|
||||
mutex.Lock()
|
||||
defer mutex.Unlock()
|
||||
return instance
|
||||
// MarshalCheckpoint returns marshalled data
|
||||
func (cp *Data) MarshalCheckpoint() ([]byte, error) {
|
||||
cp.Checksum = checksum.New(*cp.Pod)
|
||||
return json.Marshal(*cp)
|
||||
}
|
||||
|
||||
// loadPod will load Pod Checkpoint yaml file.
|
||||
func (fcp *fileCheckPointManager) loadPod(file string) (*v1.Pod, error) {
|
||||
return util.LoadPodFromFile(file)
|
||||
// UnmarshalCheckpoint returns unmarshalled data
|
||||
func (cp *Data) UnmarshalCheckpoint(blob []byte) error {
|
||||
return json.Unmarshal(blob, cp)
|
||||
}
|
||||
|
||||
// VerifyChecksum verifies that passed checksum is same as calculated checksum
|
||||
func (cp *Data) VerifyChecksum() error {
|
||||
return cp.Checksum.Verify(*cp.Pod)
|
||||
}
|
||||
|
||||
func (cp *Data) GetPod() *v1.Pod {
|
||||
return cp.Pod
|
||||
}
|
||||
|
||||
// checkAnnotations will validate the checkpoint annotations exist on the Pod
|
||||
func (fcp *fileCheckPointManager) checkAnnotations(pod *v1.Pod) bool {
|
||||
func checkAnnotations(pod *v1.Pod) bool {
|
||||
if podAnnotations := pod.GetAnnotations(); podAnnotations != nil {
|
||||
if podAnnotations[core.BootstrapCheckpointAnnotationKey] == "true" {
|
||||
return true
|
||||
@ -95,57 +80,49 @@ func (fcp *fileCheckPointManager) checkAnnotations(pod *v1.Pod) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// getPodPath returns the full qualified path for the pod checkpoint
|
||||
func (fcp *fileCheckPointManager) getPodPath(pod *v1.Pod) string {
|
||||
return fmt.Sprintf("%v/Pod%v%v.yaml", fcp.path, delimiter, pod.GetUID())
|
||||
//getPodKey returns the full qualified path for the pod checkpoint
|
||||
func getPodKey(pod *v1.Pod) string {
|
||||
return fmt.Sprintf("Pod%v%v.yaml", delimiter, pod.GetUID())
|
||||
}
|
||||
|
||||
// LoadPods Loads All Checkpoints from disk
|
||||
func (fcp *fileCheckPointManager) LoadPods() ([]*v1.Pod, error) {
|
||||
checkpoints := make([]*v1.Pod, 0)
|
||||
files, err := ioutil.ReadDir(fcp.path)
|
||||
func LoadPods(cpm checkpointmanager.CheckpointManager) ([]*v1.Pod, error) {
|
||||
pods := make([]*v1.Pod, 0)
|
||||
|
||||
var err error
|
||||
checkpointKeys := []string{}
|
||||
checkpointKeys, err = cpm.ListCheckpoints()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
glog.Errorf("Failed to list checkpoints: %v", err)
|
||||
}
|
||||
for _, f := range files {
|
||||
// get just the filename
|
||||
_, fname := filepath.Split(f.Name())
|
||||
// Get just the Resource from "Resource_Name"
|
||||
fnfields := strings.Split(fname, delimiter)
|
||||
switch fnfields[0] {
|
||||
case podPrefix:
|
||||
pod, err := fcp.loadPod(fmt.Sprintf("%s/%s", fcp.path, f.Name()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
checkpoints = append(checkpoints, pod)
|
||||
default:
|
||||
glog.Warningf("Unsupported checkpoint file detected %v", f)
|
||||
|
||||
for _, key := range checkpointKeys {
|
||||
checkpoint := NewPodCheckpoint(nil)
|
||||
err := cpm.GetCheckpoint(key, checkpoint)
|
||||
if err != nil {
|
||||
glog.Errorf("Failed to retrieve checkpoint for pod %q: %v", key, err)
|
||||
continue
|
||||
}
|
||||
pods = append(pods, checkpoint.GetPod())
|
||||
}
|
||||
return checkpoints, nil
|
||||
return pods, nil
|
||||
}
|
||||
|
||||
// Writes a checkpoint to a file on disk if annotation is present
|
||||
func (fcp *fileCheckPointManager) WritePod(pod *v1.Pod) error {
|
||||
// WritePod a checkpoint to a file on disk if annotation is present
|
||||
func WritePod(cpm checkpointmanager.CheckpointManager, pod *v1.Pod) error {
|
||||
var err error
|
||||
if fcp.checkAnnotations(pod) {
|
||||
if blob, err := yaml.Marshal(pod); err == nil {
|
||||
err = safefile.WriteFile(fcp.getPodPath(pod), blob, 0644)
|
||||
}
|
||||
if checkAnnotations(pod) {
|
||||
data := NewPodCheckpoint(pod)
|
||||
err = cpm.CreateCheckpoint(getPodKey(pod), data)
|
||||
} else {
|
||||
// This is to handle an edge where a pod update could remove
|
||||
// an annotation and the checkpoint should then be removed.
|
||||
err = fcp.DeletePod(pod)
|
||||
err = cpm.RemoveCheckpoint(getPodKey(pod))
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Deletes a checkpoint from disk if present
|
||||
func (fcp *fileCheckPointManager) DeletePod(pod *v1.Pod) error {
|
||||
podPath := fcp.getPodPath(pod)
|
||||
if err := os.Remove(podPath); !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
// DeletePod deletes a checkpoint from disk if present
|
||||
func DeletePod(cpm checkpointmanager.CheckpointManager, pod *v1.Pod) error {
|
||||
return cpm.RemoveCheckpoint(getPodKey(pod))
|
||||
}
|
||||
|
@ -25,6 +25,7 @@ import (
|
||||
"k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/kubernetes/pkg/apis/core"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
||||
)
|
||||
|
||||
// TestWriteLoadDeletePods validates all combinations of write, load, and delete
|
||||
@ -70,15 +71,18 @@ func TestWriteLoadDeletePods(t *testing.T) {
|
||||
}
|
||||
defer os.RemoveAll(dir)
|
||||
|
||||
cp := NewCheckpointManager(dir)
|
||||
cpm, err := checkpointmanager.NewCheckpointManager(dir)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to initialize checkpoint manager error=%v", err)
|
||||
}
|
||||
for _, p := range testPods {
|
||||
// Write pods should always pass unless there is an fs error
|
||||
if err := cp.WritePod(p.pod); err != nil {
|
||||
if err := WritePod(cpm, p.pod); err != nil {
|
||||
t.Errorf("Failed to Write Pod: %v", err)
|
||||
}
|
||||
}
|
||||
// verify the correct written files are loaded from disk
|
||||
pods, err := cp.LoadPods()
|
||||
pods, err := LoadPods(cpm)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to Load Pods: %v", err)
|
||||
}
|
||||
@ -104,7 +108,7 @@ func TestWriteLoadDeletePods(t *testing.T) {
|
||||
} else if lpod != nil {
|
||||
t.Errorf("Got unexpected result for %v, should not have been loaded", pname)
|
||||
}
|
||||
err = cp.DeletePod(p.pod)
|
||||
err = DeletePod(cpm, p.pod)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to delete pod %v", pname)
|
||||
}
|
||||
|
48
pkg/kubelet/checkpointmanager/BUILD
Normal file
48
pkg/kubelet/checkpointmanager/BUILD
Normal file
@ -0,0 +1,48 @@
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
load(
|
||||
"@io_bazel_rules_go//go:def.bzl",
|
||||
"go_library",
|
||||
"go_test",
|
||||
)
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = ["checkpoint_manager.go"],
|
||||
importpath = "k8s.io/kubernetes/pkg/kubelet/checkpointmanager",
|
||||
deps = [
|
||||
"//pkg/kubelet/checkpointmanager/errors:go_default_library",
|
||||
"//pkg/kubelet/util/store:go_default_library",
|
||||
"//pkg/util/filesystem:go_default_library",
|
||||
],
|
||||
)
|
||||
|
||||
go_test(
|
||||
name = "go_default_test",
|
||||
srcs = ["checkpoint_manager_test.go"],
|
||||
embed = [":go_default_library"],
|
||||
deps = [
|
||||
"//pkg/kubelet/checkpointmanager/checksum:go_default_library",
|
||||
"//pkg/kubelet/checkpointmanager/testing:go_default_library",
|
||||
"//pkg/kubelet/checkpointmanager/testing/example_checkpoint_formats/v1:go_default_library",
|
||||
"//vendor/github.com/stretchr/testify/assert:go_default_library",
|
||||
],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "package-srcs",
|
||||
srcs = glob(["**"]),
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all-srcs",
|
||||
srcs = [
|
||||
":package-srcs",
|
||||
"//pkg/kubelet/checkpointmanager/checksum:all-srcs",
|
||||
"//pkg/kubelet/checkpointmanager/errors:all-srcs",
|
||||
"//pkg/kubelet/checkpointmanager/testing:all-srcs",
|
||||
],
|
||||
tags = ["automanaged"],
|
||||
)
|
25
pkg/kubelet/checkpointmanager/README.md
Normal file
25
pkg/kubelet/checkpointmanager/README.md
Normal file
@ -0,0 +1,25 @@
|
||||
## DISCLAIMER
|
||||
- Sig-Node community has reached a general consensus, as a best practice, to
|
||||
avoid introducing any new checkpointing support. We reached this understanding
|
||||
after struggling with some hard-to-debug issues in the production environments
|
||||
caused by the checkpointing.
|
||||
- Any changes to the checkpointed data structure would be considered incompatible and a component should add its own handling if it needs to ensure backward compatibility of reading old-format checkpoint files.
|
||||
|
||||
## Introduction
|
||||
This folder contains a framework & primitives, Checkpointing Manager, which is
|
||||
used by several other Kubelet submodules, `dockershim`, `devicemanager`, `pods`
|
||||
and `cpumanager`, to implement checkpointing at each submodule level. As already
|
||||
explained in above `Disclaimer` section, think twice before introducing any further
|
||||
checkpointing in Kubelet. If still checkpointing is required, then this folder
|
||||
provides the common APIs and the framework for implementing checkpointing.
|
||||
Using same APIs across all the submodules will help maintaining consistency at
|
||||
Kubelet level.
|
||||
|
||||
Below is the history of checkpointing support in Kubelet.
|
||||
|
||||
| Package | First checkpointing support merged on | PR link |
|
||||
| ------- | --------------------------------------| ------- |
|
||||
|kubelet/dockershim | Feb 3, 2017 | [[CRI] Implement Dockershim Checkpoint](https://github.com/kubernetes/kubernetes/pull/39903)
|
||||
|devicemanager| Sep 6, 2017 | [Deviceplugin checkpoint](https://github.com/kubernetes/kubernetes/pull/51744)
|
||||
| kubelet/pod | Nov 22, 2017 | [Initial basic bootstrap-checkpoint support](https://github.com/kubernetes/kubernetes/pull/50984)
|
||||
|cpumanager| Oct 27, 2017 |[Add file backed state to cpu manager ](https://github.com/kubernetes/kubernetes/pull/54408)
|
110
pkg/kubelet/checkpointmanager/checkpoint_manager.go
Normal file
110
pkg/kubelet/checkpointmanager/checkpoint_manager.go
Normal file
@ -0,0 +1,110 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package checkpointmanager
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
|
||||
utilstore "k8s.io/kubernetes/pkg/kubelet/util/store"
|
||||
utilfs "k8s.io/kubernetes/pkg/util/filesystem"
|
||||
)
|
||||
|
||||
// Checkpoint provides the process checkpoint data
|
||||
type Checkpoint interface {
|
||||
MarshalCheckpoint() ([]byte, error)
|
||||
UnmarshalCheckpoint(blob []byte) error
|
||||
VerifyChecksum() error
|
||||
}
|
||||
|
||||
// CheckpointManager provides the interface to manage checkpoint
|
||||
type CheckpointManager interface {
|
||||
// CreateCheckpoint persists checkpoint in CheckpointStore. checkpointKey is the key for utilstore to locate checkpoint.
|
||||
// For file backed utilstore, checkpointKey is the file name to write the checkpoint data.
|
||||
CreateCheckpoint(checkpointKey string, checkpoint Checkpoint) error
|
||||
// GetCheckpoint retrieves checkpoint from CheckpointStore.
|
||||
GetCheckpoint(checkpointKey string, checkpoint Checkpoint) error
|
||||
// WARNING: RemoveCheckpoint will not return error if checkpoint does not exist.
|
||||
RemoveCheckpoint(checkpointKey string) error
|
||||
// ListCheckpoint returns the list of existing checkpoints.
|
||||
ListCheckpoints() ([]string, error)
|
||||
}
|
||||
|
||||
// impl is an implementation of CheckpointManager. It persists checkpoint in CheckpointStore
|
||||
type impl struct {
|
||||
path string
|
||||
store utilstore.Store
|
||||
mutex sync.Mutex
|
||||
}
|
||||
|
||||
// NewCheckpointManager returns a new instance of a checkpoint manager
|
||||
func NewCheckpointManager(checkpointDir string) (CheckpointManager, error) {
|
||||
fstore, err := utilstore.NewFileStore(checkpointDir, utilfs.DefaultFs{})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &impl{path: checkpointDir, store: fstore}, nil
|
||||
}
|
||||
|
||||
// CreateCheckpoint persists checkpoint in CheckpointStore.
|
||||
func (manager *impl) CreateCheckpoint(checkpointKey string, checkpoint Checkpoint) error {
|
||||
manager.mutex.Lock()
|
||||
defer manager.mutex.Unlock()
|
||||
blob, err := checkpoint.MarshalCheckpoint()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return manager.store.Write(checkpointKey, blob)
|
||||
}
|
||||
|
||||
// GetCheckpoint retrieves checkpoint from CheckpointStore.
|
||||
func (manager *impl) GetCheckpoint(checkpointKey string, checkpoint Checkpoint) error {
|
||||
manager.mutex.Lock()
|
||||
defer manager.mutex.Unlock()
|
||||
blob, err := manager.store.Read(checkpointKey)
|
||||
if err != nil {
|
||||
if err == utilstore.ErrKeyNotFound {
|
||||
return errors.ErrCheckpointNotFound
|
||||
}
|
||||
return err
|
||||
}
|
||||
err = checkpoint.UnmarshalCheckpoint(blob)
|
||||
if err == nil {
|
||||
err = checkpoint.VerifyChecksum()
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// RemoveCheckpoint will not return error if checkpoint does not exist.
|
||||
func (manager *impl) RemoveCheckpoint(checkpointKey string) error {
|
||||
manager.mutex.Lock()
|
||||
defer manager.mutex.Unlock()
|
||||
return manager.store.Delete(checkpointKey)
|
||||
}
|
||||
|
||||
// ListCheckpoints returns the list of existing checkpoints.
|
||||
func (manager *impl) ListCheckpoints() ([]string, error) {
|
||||
manager.mutex.Lock()
|
||||
defer manager.mutex.Unlock()
|
||||
keys, err := manager.store.List()
|
||||
if err != nil {
|
||||
return []string{}, fmt.Errorf("failed to list checkpoint store: %v", err)
|
||||
}
|
||||
return keys, nil
|
||||
}
|
245
pkg/kubelet/checkpointmanager/checkpoint_manager_test.go
Normal file
245
pkg/kubelet/checkpointmanager/checkpoint_manager_test.go
Normal file
@ -0,0 +1,245 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package checkpointmanager
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"sort"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum"
|
||||
utilstore "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/testing"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/testing/example_checkpoint_formats/v1"
|
||||
)
|
||||
|
||||
var testStore *utilstore.MemStore
|
||||
|
||||
type FakeCheckpoint interface {
|
||||
Checkpoint
|
||||
GetData() ([]*PortMapping, bool)
|
||||
}
|
||||
|
||||
// Data contains all types of data that can be stored in the checkpoint.
|
||||
type Data struct {
|
||||
PortMappings []*PortMapping `json:"port_mappings,omitempty"`
|
||||
HostNetwork bool `json:"host_network,omitempty"`
|
||||
}
|
||||
|
||||
type CheckpointDataV2 struct {
|
||||
PortMappings []*PortMapping `json:"port_mappings,omitempty"`
|
||||
HostNetwork bool `json:"host_network,omitempty"`
|
||||
V2Field string `json:"v2field"`
|
||||
}
|
||||
|
||||
type protocol string
|
||||
|
||||
// portMapping is the port mapping configurations of a sandbox.
|
||||
type PortMapping struct {
|
||||
// protocol of the port mapping.
|
||||
Protocol *protocol
|
||||
// Port number within the container.
|
||||
ContainerPort *int32
|
||||
// Port number on the host.
|
||||
HostPort *int32
|
||||
}
|
||||
|
||||
// CheckpointData is a sample example structure to be used in test cases for checkpointing
|
||||
type CheckpointData struct {
|
||||
Version string
|
||||
Name string
|
||||
Data *Data
|
||||
Checksum checksum.Checksum
|
||||
}
|
||||
|
||||
func newFakeCheckpointV1(name string, portMappings []*PortMapping, hostNetwork bool) FakeCheckpoint {
|
||||
return &CheckpointData{
|
||||
Version: "v1",
|
||||
Name: name,
|
||||
Data: &Data{
|
||||
PortMappings: portMappings,
|
||||
HostNetwork: hostNetwork,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (cp *CheckpointData) MarshalCheckpoint() ([]byte, error) {
|
||||
cp.Checksum = checksum.New(*cp.Data)
|
||||
return json.Marshal(*cp)
|
||||
}
|
||||
|
||||
func (cp *CheckpointData) UnmarshalCheckpoint(blob []byte) error {
|
||||
return json.Unmarshal(blob, cp)
|
||||
}
|
||||
|
||||
func (cp *CheckpointData) VerifyChecksum() error {
|
||||
return cp.Checksum.Verify(*cp.Data)
|
||||
}
|
||||
|
||||
func (cp *CheckpointData) GetData() ([]*PortMapping, bool) {
|
||||
return cp.Data.PortMappings, cp.Data.HostNetwork
|
||||
}
|
||||
|
||||
type checkpointDataV2 struct {
|
||||
Version string
|
||||
Name string
|
||||
Data *CheckpointDataV2
|
||||
Checksum checksum.Checksum
|
||||
}
|
||||
|
||||
func newFakeCheckpointV2(name string, portMappings []*PortMapping, hostNetwork bool) FakeCheckpoint {
|
||||
return &checkpointDataV2{
|
||||
Version: "v2",
|
||||
Name: name,
|
||||
Data: &CheckpointDataV2{
|
||||
PortMappings: portMappings,
|
||||
HostNetwork: hostNetwork,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func newFakeCheckpointRemoteV1(name string, portMappings []*v1.PortMapping, hostNetwork bool) Checkpoint {
|
||||
return &v1.CheckpointData{
|
||||
Version: "v1",
|
||||
Name: name,
|
||||
Data: &v1.Data{
|
||||
PortMappings: portMappings,
|
||||
HostNetwork: hostNetwork,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (cp *checkpointDataV2) MarshalCheckpoint() ([]byte, error) {
|
||||
cp.Checksum = checksum.New(*cp.Data)
|
||||
return json.Marshal(*cp)
|
||||
}
|
||||
|
||||
func (cp *checkpointDataV2) UnmarshalCheckpoint(blob []byte) error {
|
||||
return json.Unmarshal(blob, cp)
|
||||
}
|
||||
|
||||
func (cp *checkpointDataV2) VerifyChecksum() error {
|
||||
return cp.Checksum.Verify(*cp.Data)
|
||||
}
|
||||
|
||||
func (cp *checkpointDataV2) GetData() ([]*PortMapping, bool) {
|
||||
return cp.Data.PortMappings, cp.Data.HostNetwork
|
||||
}
|
||||
|
||||
func newTestCheckpointManager() CheckpointManager {
|
||||
return &impl{store: testStore}
|
||||
}
|
||||
|
||||
func TestCheckpointManager(t *testing.T) {
|
||||
var err error
|
||||
testStore = utilstore.NewMemStore()
|
||||
manager := newTestCheckpointManager()
|
||||
port80 := int32(80)
|
||||
port443 := int32(443)
|
||||
proto := protocol("tcp")
|
||||
|
||||
portMappings := []*PortMapping{
|
||||
{
|
||||
&proto,
|
||||
&port80,
|
||||
&port80,
|
||||
},
|
||||
{
|
||||
&proto,
|
||||
&port443,
|
||||
&port443,
|
||||
},
|
||||
}
|
||||
checkpoint1 := newFakeCheckpointV1("check1", portMappings, true)
|
||||
|
||||
checkpoints := []struct {
|
||||
checkpointKey string
|
||||
checkpoint FakeCheckpoint
|
||||
expectHostNetwork bool
|
||||
}{
|
||||
{
|
||||
"key1",
|
||||
checkpoint1,
|
||||
true,
|
||||
},
|
||||
{
|
||||
"key2",
|
||||
newFakeCheckpointV1("check2", nil, false),
|
||||
false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range checkpoints {
|
||||
// Test CreateCheckpoints
|
||||
err = manager.CreateCheckpoint(tc.checkpointKey, tc.checkpoint)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Test GetCheckpoints
|
||||
checkpointOut := newFakeCheckpointV1("", nil, false)
|
||||
err := manager.GetCheckpoint(tc.checkpointKey, checkpointOut)
|
||||
assert.NoError(t, err)
|
||||
actualPortMappings, actualHostNetwork := checkpointOut.GetData()
|
||||
expPortMappings, expHostNetwork := tc.checkpoint.GetData()
|
||||
assert.Equal(t, actualPortMappings, expPortMappings)
|
||||
assert.Equal(t, actualHostNetwork, expHostNetwork)
|
||||
}
|
||||
// Test it fails if tried to read V1 structure into V2, a different structure from the structure which is checkpointed
|
||||
checkpointV2 := newFakeCheckpointV2("", nil, false)
|
||||
err = manager.GetCheckpoint("key1", checkpointV2)
|
||||
assert.EqualError(t, err, "checkpoint is corrupted")
|
||||
|
||||
// Test it fails if tried to read V1 structure into the same structure but defined in another package
|
||||
checkpointRemoteV1 := newFakeCheckpointRemoteV1("", nil, false)
|
||||
err = manager.GetCheckpoint("key1", checkpointRemoteV1)
|
||||
assert.EqualError(t, err, "checkpoint is corrupted")
|
||||
|
||||
// Test it works if tried to read V1 structure using into a new V1 structure
|
||||
checkpointV1 := newFakeCheckpointV1("", nil, false)
|
||||
err = manager.GetCheckpoint("key1", checkpointV1)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Test corrupt checksum case
|
||||
checkpointOut := newFakeCheckpointV1("", nil, false)
|
||||
blob, err := checkpointOut.MarshalCheckpoint()
|
||||
assert.NoError(t, err)
|
||||
testStore.Write("key1", blob)
|
||||
err = manager.GetCheckpoint("key1", checkpoint1)
|
||||
assert.EqualError(t, err, "checkpoint is corrupted")
|
||||
|
||||
// Test ListCheckpoints
|
||||
keys, err := manager.ListCheckpoints()
|
||||
assert.NoError(t, err)
|
||||
sort.Strings(keys)
|
||||
assert.Equal(t, keys, []string{"key1", "key2"})
|
||||
|
||||
// Test RemoveCheckpoints
|
||||
err = manager.RemoveCheckpoint("key1")
|
||||
assert.NoError(t, err)
|
||||
// Test Remove Nonexisted Checkpoints
|
||||
err = manager.RemoveCheckpoint("key1")
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Test ListCheckpoints
|
||||
keys, err = manager.ListCheckpoints()
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, keys, []string{"key2"})
|
||||
|
||||
// Test Get NonExisted Checkpoint
|
||||
checkpointNE := newFakeCheckpointV1("NE", nil, false)
|
||||
err = manager.GetCheckpoint("key1", checkpointNE)
|
||||
assert.Error(t, err)
|
||||
}
|
26
pkg/kubelet/checkpointmanager/checksum/BUILD
Normal file
26
pkg/kubelet/checkpointmanager/checksum/BUILD
Normal file
@ -0,0 +1,26 @@
|
||||
load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = ["checksum.go"],
|
||||
importpath = "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//pkg/kubelet/checkpointmanager/errors:go_default_library",
|
||||
"//pkg/util/hash:go_default_library",
|
||||
],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "package-srcs",
|
||||
srcs = glob(["**"]),
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all-srcs",
|
||||
srcs = [":package-srcs"],
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
46
pkg/kubelet/checkpointmanager/checksum/checksum.go
Normal file
46
pkg/kubelet/checkpointmanager/checksum/checksum.go
Normal file
@ -0,0 +1,46 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package checksum
|
||||
|
||||
import (
|
||||
"hash/fnv"
|
||||
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
|
||||
hashutil "k8s.io/kubernetes/pkg/util/hash"
|
||||
)
|
||||
|
||||
// Data to be stored as checkpoint
|
||||
type Checksum uint64
|
||||
|
||||
// VerifyChecksum verifies that passed checksum is same as calculated checksum
|
||||
func (cs Checksum) Verify(data interface{}) error {
|
||||
if cs != New(data) {
|
||||
return errors.ErrCorruptCheckpoint
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func New(data interface{}) Checksum {
|
||||
return Checksum(getChecksum(data))
|
||||
}
|
||||
|
||||
// Get returns calculated checksum of checkpoint data
|
||||
func getChecksum(data interface{}) uint64 {
|
||||
hash := fnv.New32a()
|
||||
hashutil.DeepHashObject(hash, data)
|
||||
return uint64(hash.Sum32())
|
||||
}
|
@ -7,8 +7,8 @@ load(
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = ["util.go"],
|
||||
importpath = "k8s.io/kubernetes/pkg/kubelet/dockershim/testing",
|
||||
srcs = ["errors.go"],
|
||||
importpath = "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors",
|
||||
)
|
||||
|
||||
filegroup(
|
25
pkg/kubelet/checkpointmanager/errors/errors.go
Normal file
25
pkg/kubelet/checkpointmanager/errors/errors.go
Normal file
@ -0,0 +1,25 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package errors
|
||||
|
||||
import "fmt"
|
||||
|
||||
// ErrCorruptCheckpoint error is reported when checksum does not match
|
||||
var ErrCorruptCheckpoint = fmt.Errorf("checkpoint is corrupted")
|
||||
|
||||
// ErrCheckpointNotFound is reported when checkpoint is not found for a given key
|
||||
var ErrCheckpointNotFound = fmt.Errorf("checkpoint is not found")
|
28
pkg/kubelet/checkpointmanager/testing/BUILD
Normal file
28
pkg/kubelet/checkpointmanager/testing/BUILD
Normal file
@ -0,0 +1,28 @@
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
load(
|
||||
"@io_bazel_rules_go//go:def.bzl",
|
||||
"go_library",
|
||||
)
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = ["util.go"],
|
||||
importpath = "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/testing",
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "package-srcs",
|
||||
srcs = glob(["**"]),
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all-srcs",
|
||||
srcs = [
|
||||
":package-srcs",
|
||||
"//pkg/kubelet/checkpointmanager/testing/example_checkpoint_formats/v1:all-srcs",
|
||||
],
|
||||
tags = ["automanaged"],
|
||||
)
|
@ -0,0 +1,23 @@
|
||||
load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = ["types.go"],
|
||||
importpath = "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/testing/example_checkpoint_formats/v1",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = ["//pkg/kubelet/checkpointmanager/checksum:go_default_library"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "package-srcs",
|
||||
srcs = glob(["**"]),
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all-srcs",
|
||||
srcs = [":package-srcs"],
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
@ -0,0 +1,62 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v1
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum"
|
||||
)
|
||||
|
||||
type protocol string
|
||||
|
||||
// portMapping is the port mapping configurations of a sandbox.
|
||||
type PortMapping struct {
|
||||
// protocol of the port mapping.
|
||||
Protocol *protocol
|
||||
// Port number within the container.
|
||||
ContainerPort *int32
|
||||
// Port number on the host.
|
||||
HostPort *int32
|
||||
}
|
||||
|
||||
// CheckpointData contains all types of data that can be stored in the checkpoint.
|
||||
type Data struct {
|
||||
PortMappings []*PortMapping `json:"port_mappings,omitempty"`
|
||||
HostNetwork bool `json:"host_network,omitempty"`
|
||||
}
|
||||
|
||||
// CheckpointData is a sample example structure to be used in test cases for checkpointing
|
||||
type CheckpointData struct {
|
||||
Version string
|
||||
Name string
|
||||
Data *Data
|
||||
Checksum checksum.Checksum
|
||||
}
|
||||
|
||||
func (cp *CheckpointData) MarshalCheckpoint() ([]byte, error) {
|
||||
cp.Checksum = checksum.New(*cp.Data)
|
||||
return json.Marshal(*cp)
|
||||
}
|
||||
|
||||
func (cp *CheckpointData) UnmarshalCheckpoint(blob []byte) error {
|
||||
return json.Unmarshal(blob, cp)
|
||||
}
|
||||
|
||||
func (cp *CheckpointData) VerifyChecksum() error {
|
||||
return cp.Checksum.Verify(*cp.Data)
|
||||
}
|
@ -27,10 +27,12 @@ type MemStore struct {
|
||||
sync.Mutex
|
||||
}
|
||||
|
||||
// NewMemStore returns an instance of MemStore
|
||||
func NewMemStore() *MemStore {
|
||||
return &MemStore{mem: make(map[string][]byte)}
|
||||
}
|
||||
|
||||
// Write writes the data to the store
|
||||
func (mstore *MemStore) Write(key string, data []byte) error {
|
||||
mstore.Lock()
|
||||
defer mstore.Unlock()
|
||||
@ -38,6 +40,7 @@ func (mstore *MemStore) Write(key string, data []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Read returns data read from store
|
||||
func (mstore *MemStore) Read(key string) ([]byte, error) {
|
||||
mstore.Lock()
|
||||
defer mstore.Unlock()
|
||||
@ -48,6 +51,7 @@ func (mstore *MemStore) Read(key string) ([]byte, error) {
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// Delete deletes data from the store
|
||||
func (mstore *MemStore) Delete(key string) error {
|
||||
mstore.Lock()
|
||||
defer mstore.Unlock()
|
||||
@ -55,6 +59,7 @@ func (mstore *MemStore) Delete(key string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// List returns all the keys from the store
|
||||
func (mstore *MemStore) List() ([]string, error) {
|
||||
mstore.Lock()
|
||||
defer mstore.Unlock()
|
@ -15,13 +15,14 @@ go_library(
|
||||
deps = [
|
||||
"//pkg/apis/core/v1/helper:go_default_library",
|
||||
"//pkg/kubelet/apis/deviceplugin/v1beta1:go_default_library",
|
||||
"//pkg/kubelet/checkpointmanager:go_default_library",
|
||||
"//pkg/kubelet/checkpointmanager/errors:go_default_library",
|
||||
"//pkg/kubelet/cm/devicemanager/checkpoint:go_default_library",
|
||||
"//pkg/kubelet/config:go_default_library",
|
||||
"//pkg/kubelet/container:go_default_library",
|
||||
"//pkg/kubelet/lifecycle:go_default_library",
|
||||
"//pkg/kubelet/metrics:go_default_library",
|
||||
"//pkg/kubelet/util/store:go_default_library",
|
||||
"//pkg/scheduler/schedulercache:go_default_library",
|
||||
"//pkg/util/filesystem:go_default_library",
|
||||
"//vendor/github.com/golang/glog:go_default_library",
|
||||
"//vendor/google.golang.org/grpc:go_default_library",
|
||||
"//vendor/k8s.io/api/core/v1:go_default_library",
|
||||
@ -39,10 +40,9 @@ go_test(
|
||||
embed = [":go_default_library"],
|
||||
deps = [
|
||||
"//pkg/kubelet/apis/deviceplugin/v1beta1:go_default_library",
|
||||
"//pkg/kubelet/checkpointmanager:go_default_library",
|
||||
"//pkg/kubelet/lifecycle:go_default_library",
|
||||
"//pkg/kubelet/util/store:go_default_library",
|
||||
"//pkg/scheduler/schedulercache:go_default_library",
|
||||
"//pkg/util/filesystem:go_default_library",
|
||||
"//vendor/github.com/stretchr/testify/assert:go_default_library",
|
||||
"//vendor/github.com/stretchr/testify/require:go_default_library",
|
||||
"//vendor/k8s.io/api/core/v1:go_default_library",
|
||||
@ -62,7 +62,10 @@ filegroup(
|
||||
|
||||
filegroup(
|
||||
name = "all-srcs",
|
||||
srcs = [":package-srcs"],
|
||||
srcs = [
|
||||
":package-srcs",
|
||||
"//pkg/kubelet/cm/devicemanager/checkpoint:all-srcs",
|
||||
],
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
26
pkg/kubelet/cm/devicemanager/checkpoint/BUILD
Normal file
26
pkg/kubelet/cm/devicemanager/checkpoint/BUILD
Normal file
@ -0,0 +1,26 @@
|
||||
load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = ["checkpoint.go"],
|
||||
importpath = "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//pkg/kubelet/checkpointmanager:go_default_library",
|
||||
"//pkg/kubelet/checkpointmanager/checksum:go_default_library",
|
||||
],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "package-srcs",
|
||||
srcs = glob(["**"]),
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all-srcs",
|
||||
srcs = [":package-srcs"],
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
81
pkg/kubelet/cm/devicemanager/checkpoint/checkpoint.go
Normal file
81
pkg/kubelet/cm/devicemanager/checkpoint/checkpoint.go
Normal file
@ -0,0 +1,81 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package checkpoint
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum"
|
||||
)
|
||||
|
||||
type DeviceManagerCheckpoint interface {
|
||||
checkpointmanager.Checkpoint
|
||||
GetData() ([]PodDevicesEntry, map[string][]string)
|
||||
}
|
||||
|
||||
type PodDevicesEntry struct {
|
||||
PodUID string
|
||||
ContainerName string
|
||||
ResourceName string
|
||||
DeviceIDs []string
|
||||
AllocResp []byte
|
||||
}
|
||||
|
||||
// checkpointData struct is used to store pod to device allocation information
|
||||
// in a checkpoint file.
|
||||
// TODO: add version control when we need to change checkpoint format.
|
||||
type checkpointData struct {
|
||||
PodDeviceEntries []PodDevicesEntry
|
||||
RegisteredDevices map[string][]string
|
||||
}
|
||||
|
||||
type Data struct {
|
||||
Data checkpointData
|
||||
Checksum checksum.Checksum
|
||||
}
|
||||
|
||||
// NewDeviceManagerCheckpoint returns an instance of Checkpoint
|
||||
func New(devEntries []PodDevicesEntry,
|
||||
devices map[string][]string) DeviceManagerCheckpoint {
|
||||
return &Data{
|
||||
Data: checkpointData{
|
||||
PodDeviceEntries: devEntries,
|
||||
RegisteredDevices: devices,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// MarshalCheckpoint returns marshalled data
|
||||
func (cp *Data) MarshalCheckpoint() ([]byte, error) {
|
||||
cp.Checksum = checksum.New(cp.Data)
|
||||
return json.Marshal(*cp)
|
||||
}
|
||||
|
||||
// UnmarshalCheckpoint returns unmarshalled data
|
||||
func (cp *Data) UnmarshalCheckpoint(blob []byte) error {
|
||||
return json.Unmarshal(blob, cp)
|
||||
}
|
||||
|
||||
// VerifyChecksum verifies that passed checksum is same as calculated checksum
|
||||
func (cp *Data) VerifyChecksum() error {
|
||||
return cp.Checksum.Verify(cp.Data)
|
||||
}
|
||||
|
||||
func (cp *Data) GetData() ([]PodDevicesEntry, map[string][]string) {
|
||||
return cp.Data.PodDeviceEntries, cp.Data.RegisteredDevices
|
||||
}
|
@ -18,7 +18,6 @@ package devicemanager
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net"
|
||||
"os"
|
||||
@ -34,12 +33,13 @@ import (
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
|
||||
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint"
|
||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
||||
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
utilstore "k8s.io/kubernetes/pkg/kubelet/util/store"
|
||||
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
|
||||
utilfs "k8s.io/kubernetes/pkg/util/filesystem"
|
||||
)
|
||||
|
||||
// ActivePodsFunc is a function that returns a list of pods to reconcile.
|
||||
@ -83,9 +83,9 @@ type ManagerImpl struct {
|
||||
allocatedDevices map[string]sets.String
|
||||
|
||||
// podDevices contains pod to allocated device mapping.
|
||||
podDevices podDevices
|
||||
store utilstore.Store
|
||||
pluginOpts map[string]*pluginapi.DevicePluginOptions
|
||||
podDevices podDevices
|
||||
pluginOpts map[string]*pluginapi.DevicePluginOptions
|
||||
checkpointManager checkpointmanager.CheckpointManager
|
||||
}
|
||||
|
||||
type sourcesReadyStub struct{}
|
||||
@ -122,11 +122,11 @@ func newManagerImpl(socketPath string) (*ManagerImpl, error) {
|
||||
// Before that, initializes them to perform no-op operations.
|
||||
manager.activePods = func() []*v1.Pod { return []*v1.Pod{} }
|
||||
manager.sourcesReady = &sourcesReadyStub{}
|
||||
var err error
|
||||
manager.store, err = utilstore.NewFileStore(dir, utilfs.DefaultFs{})
|
||||
checkpointManager, err := checkpointmanager.NewCheckpointManager(dir)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to initialize device plugin checkpointing store: %+v", err)
|
||||
return nil, fmt.Errorf("failed to initialize checkpoint manager: %+v", err)
|
||||
}
|
||||
manager.checkpointManager = checkpointManager
|
||||
|
||||
return manager, nil
|
||||
}
|
||||
@ -454,33 +454,19 @@ func (m *ManagerImpl) GetCapacity() (v1.ResourceList, v1.ResourceList, []string)
|
||||
return capacity, allocatable, deletedResources.UnsortedList()
|
||||
}
|
||||
|
||||
// checkpointData struct is used to store pod to device allocation information
|
||||
// and registered device information in a checkpoint file.
|
||||
// TODO: add version control when we need to change checkpoint format.
|
||||
type checkpointData struct {
|
||||
PodDeviceEntries []podDevicesCheckpointEntry
|
||||
RegisteredDevices map[string][]string
|
||||
}
|
||||
|
||||
// Checkpoints device to container allocation information to disk.
|
||||
func (m *ManagerImpl) writeCheckpoint() error {
|
||||
m.mutex.Lock()
|
||||
data := checkpointData{
|
||||
PodDeviceEntries: m.podDevices.toCheckpointData(),
|
||||
RegisteredDevices: make(map[string][]string),
|
||||
}
|
||||
registeredDevs := make(map[string][]string)
|
||||
for resource, devices := range m.healthyDevices {
|
||||
data.RegisteredDevices[resource] = devices.UnsortedList()
|
||||
registeredDevs[resource] = devices.UnsortedList()
|
||||
}
|
||||
data := checkpoint.New(m.podDevices.toCheckpointData(),
|
||||
registeredDevs)
|
||||
m.mutex.Unlock()
|
||||
|
||||
dataJSON, err := json.Marshal(data)
|
||||
err := m.checkpointManager.CreateCheckpoint(kubeletDeviceManagerCheckpoint, data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = m.store.Write(kubeletDeviceManagerCheckpoint, dataJSON)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write deviceplugin checkpoint file %q: %v", kubeletDeviceManagerCheckpoint, err)
|
||||
return fmt.Errorf("failed to write checkpoint file %q: %v", kubeletDeviceManagerCheckpoint, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@ -488,24 +474,23 @@ func (m *ManagerImpl) writeCheckpoint() error {
|
||||
// Reads device to container allocation information from disk, and populates
|
||||
// m.allocatedDevices accordingly.
|
||||
func (m *ManagerImpl) readCheckpoint() error {
|
||||
content, err := m.store.Read(kubeletDeviceManagerCheckpoint)
|
||||
registeredDevs := make(map[string][]string)
|
||||
devEntries := make([]checkpoint.PodDevicesEntry, 0)
|
||||
cp := checkpoint.New(devEntries, registeredDevs)
|
||||
err := m.checkpointManager.GetCheckpoint(kubeletDeviceManagerCheckpoint, cp)
|
||||
if err != nil {
|
||||
if err == utilstore.ErrKeyNotFound {
|
||||
if err == errors.ErrCheckpointNotFound {
|
||||
glog.Warningf("Failed to retrieve checkpoint for %q: %v", kubeletDeviceManagerCheckpoint, err)
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("failed to read checkpoint file %q: %v", kubeletDeviceManagerCheckpoint, err)
|
||||
return err
|
||||
}
|
||||
glog.V(4).Infof("Read checkpoint file %s\n", kubeletDeviceManagerCheckpoint)
|
||||
var data checkpointData
|
||||
if err := json.Unmarshal(content, &data); err != nil {
|
||||
return fmt.Errorf("failed to unmarshal deviceplugin checkpoint data: %v", err)
|
||||
}
|
||||
|
||||
m.mutex.Lock()
|
||||
defer m.mutex.Unlock()
|
||||
m.podDevices.fromCheckpointData(data.PodDeviceEntries)
|
||||
podDevices, registeredDevs := cp.GetData()
|
||||
m.podDevices.fromCheckpointData(podDevices)
|
||||
m.allocatedDevices = m.podDevices.devices()
|
||||
for resource := range data.RegisteredDevices {
|
||||
for resource := range registeredDevs {
|
||||
// During start up, creates empty healthyDevices list so that the resource capacity
|
||||
// will stay zero till the corresponding device plugin re-registers.
|
||||
m.healthyDevices[resource] = sets.NewString()
|
||||
|
@ -17,7 +17,6 @@ limitations under the License.
|
||||
package devicemanager
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
@ -34,10 +33,9 @@ import (
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/apimachinery/pkg/util/uuid"
|
||||
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
||||
utilstore "k8s.io/kubernetes/pkg/kubelet/util/store"
|
||||
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
|
||||
utilfs "k8s.io/kubernetes/pkg/util/filesystem"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -347,20 +345,19 @@ func constructAllocResp(devices, mounts, envs map[string]string) *pluginapi.Cont
|
||||
func TestCheckpoint(t *testing.T) {
|
||||
resourceName1 := "domain1.com/resource1"
|
||||
resourceName2 := "domain2.com/resource2"
|
||||
|
||||
as := assert.New(t)
|
||||
tmpDir, err := ioutil.TempDir("", "checkpoint")
|
||||
as.Nil(err)
|
||||
defer os.RemoveAll(tmpDir)
|
||||
ckm, err := checkpointmanager.NewCheckpointManager(tmpDir)
|
||||
as.Nil(err)
|
||||
testManager := &ManagerImpl{
|
||||
socketdir: tmpDir,
|
||||
endpoints: make(map[string]endpoint),
|
||||
healthyDevices: make(map[string]sets.String),
|
||||
unhealthyDevices: make(map[string]sets.String),
|
||||
allocatedDevices: make(map[string]sets.String),
|
||||
podDevices: make(podDevices),
|
||||
endpoints: make(map[string]endpoint),
|
||||
healthyDevices: make(map[string]sets.String),
|
||||
unhealthyDevices: make(map[string]sets.String),
|
||||
allocatedDevices: make(map[string]sets.String),
|
||||
podDevices: make(podDevices),
|
||||
checkpointManager: ckm,
|
||||
}
|
||||
testManager.store, _ = utilstore.NewFileStore("/tmp/", utilfs.DefaultFs{})
|
||||
|
||||
testManager.podDevices.insert("pod1", "con1", resourceName1,
|
||||
constructDevices([]string{"dev1", "dev2"}),
|
||||
@ -479,21 +476,25 @@ func makePod(limits v1.ResourceList) *v1.Pod {
|
||||
}
|
||||
}
|
||||
|
||||
func getTestManager(tmpDir string, activePods ActivePodsFunc, testRes []TestResource, opts map[string]*pluginapi.DevicePluginOptions) *ManagerImpl {
|
||||
func getTestManager(tmpDir string, activePods ActivePodsFunc, testRes []TestResource, opts map[string]*pluginapi.DevicePluginOptions) (*ManagerImpl, error) {
|
||||
monitorCallback := func(resourceName string, added, updated, deleted []pluginapi.Device) {}
|
||||
testManager := &ManagerImpl{
|
||||
socketdir: tmpDir,
|
||||
callback: monitorCallback,
|
||||
healthyDevices: make(map[string]sets.String),
|
||||
unhealthyDevices: make(map[string]sets.String),
|
||||
allocatedDevices: make(map[string]sets.String),
|
||||
endpoints: make(map[string]endpoint),
|
||||
pluginOpts: opts,
|
||||
podDevices: make(podDevices),
|
||||
activePods: activePods,
|
||||
sourcesReady: &sourcesReadyStub{},
|
||||
ckm, err := checkpointmanager.NewCheckpointManager(tmpDir)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
testManager := &ManagerImpl{
|
||||
socketdir: tmpDir,
|
||||
callback: monitorCallback,
|
||||
healthyDevices: make(map[string]sets.String),
|
||||
unhealthyDevices: make(map[string]sets.String),
|
||||
allocatedDevices: make(map[string]sets.String),
|
||||
endpoints: make(map[string]endpoint),
|
||||
pluginOpts: opts,
|
||||
podDevices: make(podDevices),
|
||||
activePods: activePods,
|
||||
sourcesReady: &sourcesReadyStub{},
|
||||
checkpointManager: ckm,
|
||||
}
|
||||
testManager.store, _ = utilstore.NewFileStore("/tmp/", utilfs.DefaultFs{})
|
||||
for _, res := range testRes {
|
||||
testManager.healthyDevices[res.resourceName] = sets.NewString()
|
||||
for _, dev := range res.devs {
|
||||
@ -525,7 +526,7 @@ func getTestManager(tmpDir string, activePods ActivePodsFunc, testRes []TestReso
|
||||
}
|
||||
}
|
||||
}
|
||||
return testManager
|
||||
return testManager, nil
|
||||
}
|
||||
|
||||
func getTestNodeInfo(allocatable v1.ResourceList) *schedulercache.NodeInfo {
|
||||
@ -546,7 +547,6 @@ type TestResource struct {
|
||||
}
|
||||
|
||||
func TestPodContainerDeviceAllocation(t *testing.T) {
|
||||
flag.Set("alsologtostderr", fmt.Sprintf("%t", true))
|
||||
res1 := TestResource{
|
||||
resourceName: "domain1.com/resource1",
|
||||
resourceQuantity: *resource.NewQuantity(int64(2), resource.DecimalSI),
|
||||
@ -569,7 +569,8 @@ func TestPodContainerDeviceAllocation(t *testing.T) {
|
||||
defer os.RemoveAll(tmpDir)
|
||||
nodeInfo := getTestNodeInfo(v1.ResourceList{})
|
||||
pluginOpts := make(map[string]*pluginapi.DevicePluginOptions)
|
||||
testManager := getTestManager(tmpDir, podsStub.getActivePods, testResources, pluginOpts)
|
||||
testManager, err := getTestManager(tmpDir, podsStub.getActivePods, testResources, pluginOpts)
|
||||
as.Nil(err)
|
||||
|
||||
testPods := []*v1.Pod{
|
||||
makePod(v1.ResourceList{
|
||||
@ -664,7 +665,8 @@ func TestInitContainerDeviceAllocation(t *testing.T) {
|
||||
as.Nil(err)
|
||||
defer os.RemoveAll(tmpDir)
|
||||
pluginOpts := make(map[string]*pluginapi.DevicePluginOptions)
|
||||
testManager := getTestManager(tmpDir, podsStub.getActivePods, testResources, pluginOpts)
|
||||
testManager, err := getTestManager(tmpDir, podsStub.getActivePods, testResources, pluginOpts)
|
||||
as.Nil(err)
|
||||
|
||||
podWithPluginResourcesInInitContainers := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
@ -742,14 +744,18 @@ func TestSanitizeNodeAllocatable(t *testing.T) {
|
||||
|
||||
as := assert.New(t)
|
||||
monitorCallback := func(resourceName string, added, updated, deleted []pluginapi.Device) {}
|
||||
tmpDir, err := ioutil.TempDir("", "checkpoint")
|
||||
as.Nil(err)
|
||||
|
||||
ckm, err := checkpointmanager.NewCheckpointManager(tmpDir)
|
||||
as.Nil(err)
|
||||
testManager := &ManagerImpl{
|
||||
callback: monitorCallback,
|
||||
healthyDevices: make(map[string]sets.String),
|
||||
allocatedDevices: make(map[string]sets.String),
|
||||
podDevices: make(podDevices),
|
||||
callback: monitorCallback,
|
||||
allocatedDevices: make(map[string]sets.String),
|
||||
healthyDevices: make(map[string]sets.String),
|
||||
podDevices: make(podDevices),
|
||||
checkpointManager: ckm,
|
||||
}
|
||||
testManager.store, _ = utilstore.NewFileStore("/tmp/", utilfs.DefaultFs{})
|
||||
// require one of resource1 and one of resource2
|
||||
testManager.allocatedDevices[resourceName1] = sets.NewString()
|
||||
testManager.allocatedDevices[resourceName1].Insert(devID1)
|
||||
@ -796,7 +802,8 @@ func TestDevicePreStartContainer(t *testing.T) {
|
||||
pluginOpts := make(map[string]*pluginapi.DevicePluginOptions)
|
||||
pluginOpts[res1.resourceName] = &pluginapi.DevicePluginOptions{PreStartRequired: true}
|
||||
|
||||
testManager := getTestManager(tmpDir, podsStub.getActivePods, []TestResource{res1}, pluginOpts)
|
||||
testManager, err := getTestManager(tmpDir, podsStub.getActivePods, []TestResource{res1}, pluginOpts)
|
||||
as.Nil(err)
|
||||
|
||||
ch := make(chan []string, 1)
|
||||
testManager.endpoints[res1.resourceName] = &MockEndpoint{
|
||||
|
@ -21,6 +21,7 @@ import (
|
||||
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
)
|
||||
|
||||
@ -126,18 +127,9 @@ func (pdev podDevices) devices() map[string]sets.String {
|
||||
return ret
|
||||
}
|
||||
|
||||
// podDevicesCheckpointEntry is used to record <pod, container> to device allocation information.
|
||||
type podDevicesCheckpointEntry struct {
|
||||
PodUID string
|
||||
ContainerName string
|
||||
ResourceName string
|
||||
DeviceIDs []string
|
||||
AllocResp []byte
|
||||
}
|
||||
|
||||
// Turns podDevices to checkpointData.
|
||||
func (pdev podDevices) toCheckpointData() []podDevicesCheckpointEntry {
|
||||
var data []podDevicesCheckpointEntry
|
||||
func (pdev podDevices) toCheckpointData() []checkpoint.PodDevicesEntry {
|
||||
var data []checkpoint.PodDevicesEntry
|
||||
for podUID, containerDevices := range pdev {
|
||||
for conName, resources := range containerDevices {
|
||||
for resource, devices := range resources {
|
||||
@ -152,7 +144,12 @@ func (pdev podDevices) toCheckpointData() []podDevicesCheckpointEntry {
|
||||
glog.Errorf("Can't marshal allocResp for %v %v %v: %v", podUID, conName, resource, err)
|
||||
continue
|
||||
}
|
||||
data = append(data, podDevicesCheckpointEntry{podUID, conName, resource, devIds, allocResp})
|
||||
data = append(data, checkpoint.PodDevicesEntry{
|
||||
PodUID: podUID,
|
||||
ContainerName: conName,
|
||||
ResourceName: resource,
|
||||
DeviceIDs: devIds,
|
||||
AllocResp: allocResp})
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -160,7 +157,7 @@ func (pdev podDevices) toCheckpointData() []podDevicesCheckpointEntry {
|
||||
}
|
||||
|
||||
// Populates podDevices from the passed in checkpointData.
|
||||
func (pdev podDevices) fromCheckpointData(data []podDevicesCheckpointEntry) {
|
||||
func (pdev podDevices) fromCheckpointData(data []checkpoint.PodDevicesEntry) {
|
||||
for _, entry := range data {
|
||||
glog.V(2).Infof("Get checkpoint entry: %v %v %v %v %v\n",
|
||||
entry.PodUID, entry.ContainerName, entry.ResourceName, entry.DeviceIDs, entry.AllocResp)
|
||||
|
@ -58,6 +58,7 @@ go_library(
|
||||
"//pkg/apis/core/v1:go_default_library",
|
||||
"//pkg/apis/core/validation:go_default_library",
|
||||
"//pkg/kubelet/checkpoint:go_default_library",
|
||||
"//pkg/kubelet/checkpointmanager:go_default_library",
|
||||
"//pkg/kubelet/container:go_default_library",
|
||||
"//pkg/kubelet/events:go_default_library",
|
||||
"//pkg/kubelet/types:go_default_library",
|
||||
|
@ -27,6 +27,7 @@ import (
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/client-go/tools/record"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpoint"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/events"
|
||||
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
|
||||
@ -64,7 +65,7 @@ type PodConfig struct {
|
||||
// contains the list of all configured sources
|
||||
sourcesLock sync.Mutex
|
||||
sources sets.String
|
||||
checkpointManager checkpoint.Manager
|
||||
checkpointManager checkpointmanager.CheckpointManager
|
||||
}
|
||||
|
||||
// NewPodConfig creates an object that can merge many configuration sources into a stream
|
||||
@ -114,10 +115,12 @@ func (c *PodConfig) Sync() {
|
||||
func (c *PodConfig) Restore(path string, updates chan<- interface{}) error {
|
||||
var err error
|
||||
if c.checkpointManager == nil {
|
||||
c.checkpointManager = checkpoint.NewCheckpointManager(path)
|
||||
pods, err := c.checkpointManager.LoadPods()
|
||||
if err == nil {
|
||||
updates <- kubetypes.PodUpdate{Pods: pods, Op: kubetypes.RESTORE, Source: kubetypes.ApiserverSource}
|
||||
c.checkpointManager, err = checkpointmanager.NewCheckpointManager(path)
|
||||
if err != nil {
|
||||
pods, err := checkpoint.LoadPods(c.checkpointManager)
|
||||
if err == nil {
|
||||
updates <- kubetypes.PodUpdate{Pods: pods, Op: kubetypes.RESTORE, Source: kubetypes.ApiserverSource}
|
||||
}
|
||||
}
|
||||
}
|
||||
return err
|
||||
|
@ -82,6 +82,8 @@ go_library(
|
||||
"//pkg/credentialprovider:go_default_library",
|
||||
"//pkg/kubelet/apis/cri/runtime/v1alpha2:go_default_library",
|
||||
"//pkg/kubelet/apis/kubeletconfig:go_default_library",
|
||||
"//pkg/kubelet/checkpointmanager:go_default_library",
|
||||
"//pkg/kubelet/checkpointmanager/checksum:go_default_library",
|
||||
"//pkg/kubelet/cm:go_default_library",
|
||||
"//pkg/kubelet/container:go_default_library",
|
||||
"//pkg/kubelet/dockershim/cm:go_default_library",
|
||||
@ -100,8 +102,6 @@ go_library(
|
||||
"//pkg/kubelet/util/ioutils:go_default_library",
|
||||
"//pkg/kubelet/util/store:go_default_library",
|
||||
"//pkg/security/apparmor:go_default_library",
|
||||
"//pkg/util/filesystem:go_default_library",
|
||||
"//pkg/util/hash:go_default_library",
|
||||
"//pkg/util/parsers:go_default_library",
|
||||
"//vendor/github.com/armon/circbuf:go_default_library",
|
||||
"//vendor/github.com/blang/semver:go_default_library",
|
||||
@ -149,12 +149,12 @@ go_test(
|
||||
embed = [":go_default_library"],
|
||||
deps = [
|
||||
"//pkg/kubelet/apis/cri/runtime/v1alpha2:go_default_library",
|
||||
"//pkg/kubelet/checkpointmanager:go_default_library",
|
||||
"//pkg/kubelet/container:go_default_library",
|
||||
"//pkg/kubelet/container/testing:go_default_library",
|
||||
"//pkg/kubelet/dockershim/libdocker:go_default_library",
|
||||
"//pkg/kubelet/dockershim/network:go_default_library",
|
||||
"//pkg/kubelet/dockershim/network/testing:go_default_library",
|
||||
"//pkg/kubelet/dockershim/testing:go_default_library",
|
||||
"//pkg/kubelet/types:go_default_library",
|
||||
"//pkg/kubelet/util/cache:go_default_library",
|
||||
"//pkg/security/apparmor:go_default_library",
|
||||
@ -186,7 +186,6 @@ filegroup(
|
||||
"//pkg/kubelet/dockershim/metrics:all-srcs",
|
||||
"//pkg/kubelet/dockershim/network:all-srcs",
|
||||
"//pkg/kubelet/dockershim/remote:all-srcs",
|
||||
"//pkg/kubelet/dockershim/testing:all-srcs",
|
||||
],
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:public"],
|
||||
|
@ -164,13 +164,14 @@ func containerToRuntimeAPISandbox(c *dockertypes.Container) (*runtimeapi.PodSand
|
||||
}, nil
|
||||
}
|
||||
|
||||
func checkpointToRuntimeAPISandbox(id string, checkpoint *PodSandboxCheckpoint) *runtimeapi.PodSandbox {
|
||||
func checkpointToRuntimeAPISandbox(id string, checkpoint DockershimCheckpoint) *runtimeapi.PodSandbox {
|
||||
state := runtimeapi.PodSandboxState_SANDBOX_NOTREADY
|
||||
_, name, namespace, _, _ := checkpoint.GetData()
|
||||
return &runtimeapi.PodSandbox{
|
||||
Id: id,
|
||||
Metadata: &runtimeapi.PodSandboxMetadata{
|
||||
Name: checkpoint.Name,
|
||||
Namespace: checkpoint.Namespace,
|
||||
Name: name,
|
||||
Namespace: namespace,
|
||||
},
|
||||
State: state,
|
||||
}
|
||||
|
@ -18,14 +18,9 @@ package dockershim
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/golang/glog"
|
||||
utilstore "k8s.io/kubernetes/pkg/kubelet/util/store"
|
||||
utilfs "k8s.io/kubernetes/pkg/util/filesystem"
|
||||
hashutil "k8s.io/kubernetes/pkg/util/hash"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -36,6 +31,11 @@ const (
|
||||
schemaVersion = "v1"
|
||||
)
|
||||
|
||||
type DockershimCheckpoint interface {
|
||||
checkpointmanager.Checkpoint
|
||||
GetData() (string, string, string, []*PortMapping, bool)
|
||||
}
|
||||
|
||||
type Protocol string
|
||||
|
||||
// PortMapping is the port mapping configurations of a sandbox.
|
||||
@ -65,89 +65,31 @@ type PodSandboxCheckpoint struct {
|
||||
// Data to checkpoint for pod sandbox.
|
||||
Data *CheckpointData `json:"data,omitempty"`
|
||||
// Checksum is calculated with fnv hash of the checkpoint object with checksum field set to be zero
|
||||
CheckSum uint64 `json:"checksum"`
|
||||
Checksum checksum.Checksum `json:"checksum"`
|
||||
}
|
||||
|
||||
// CheckpointHandler provides the interface to manage PodSandbox checkpoint
|
||||
type CheckpointHandler interface {
|
||||
// CreateCheckpoint persists sandbox checkpoint in CheckpointStore.
|
||||
CreateCheckpoint(podSandboxID string, checkpoint *PodSandboxCheckpoint) error
|
||||
// GetCheckpoint retrieves sandbox checkpoint from CheckpointStore.
|
||||
GetCheckpoint(podSandboxID string) (*PodSandboxCheckpoint, error)
|
||||
// RemoveCheckpoint removes sandbox checkpoint form CheckpointStore.
|
||||
// WARNING: RemoveCheckpoint will not return error if checkpoint does not exist.
|
||||
RemoveCheckpoint(podSandboxID string) error
|
||||
// ListCheckpoint returns the list of existing checkpoints.
|
||||
ListCheckpoints() ([]string, error)
|
||||
}
|
||||
|
||||
// PersistentCheckpointHandler is an implementation of CheckpointHandler. It persists checkpoint in CheckpointStore
|
||||
type PersistentCheckpointHandler struct {
|
||||
store utilstore.Store
|
||||
}
|
||||
|
||||
func NewPersistentCheckpointHandler(dockershimRootDir string) (CheckpointHandler, error) {
|
||||
fstore, err := utilstore.NewFileStore(filepath.Join(dockershimRootDir, sandboxCheckpointDir), utilfs.DefaultFs{})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &PersistentCheckpointHandler{store: fstore}, nil
|
||||
}
|
||||
|
||||
func (handler *PersistentCheckpointHandler) CreateCheckpoint(podSandboxID string, checkpoint *PodSandboxCheckpoint) error {
|
||||
checkpoint.CheckSum = calculateChecksum(*checkpoint)
|
||||
blob, err := json.Marshal(checkpoint)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return handler.store.Write(podSandboxID, blob)
|
||||
}
|
||||
|
||||
func (handler *PersistentCheckpointHandler) GetCheckpoint(podSandboxID string) (*PodSandboxCheckpoint, error) {
|
||||
blob, err := handler.store.Read(podSandboxID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var checkpoint PodSandboxCheckpoint
|
||||
//TODO: unmarhsal into a struct with just Version, check version, unmarshal into versioned type.
|
||||
err = json.Unmarshal(blob, &checkpoint)
|
||||
if err != nil {
|
||||
glog.Errorf("Failed to unmarshal checkpoint %q, removing checkpoint. Checkpoint content: %q. ErrMsg: %v", podSandboxID, string(blob), err)
|
||||
handler.RemoveCheckpoint(podSandboxID)
|
||||
return nil, fmt.Errorf("failed to unmarshal checkpoint")
|
||||
}
|
||||
if checkpoint.CheckSum != calculateChecksum(checkpoint) {
|
||||
glog.Errorf("Checksum of checkpoint %q is not valid, removing checkpoint", podSandboxID)
|
||||
handler.RemoveCheckpoint(podSandboxID)
|
||||
return nil, fmt.Errorf("checkpoint is corrupted")
|
||||
}
|
||||
return &checkpoint, nil
|
||||
}
|
||||
|
||||
func (handler *PersistentCheckpointHandler) RemoveCheckpoint(podSandboxID string) error {
|
||||
return handler.store.Delete(podSandboxID)
|
||||
}
|
||||
|
||||
func (handler *PersistentCheckpointHandler) ListCheckpoints() ([]string, error) {
|
||||
keys, err := handler.store.List()
|
||||
if err != nil {
|
||||
return []string{}, fmt.Errorf("failed to list checkpoint store: %v", err)
|
||||
}
|
||||
return keys, nil
|
||||
}
|
||||
|
||||
func NewPodSandboxCheckpoint(namespace, name string) *PodSandboxCheckpoint {
|
||||
func NewPodSandboxCheckpoint(namespace, name string, data *CheckpointData) DockershimCheckpoint {
|
||||
return &PodSandboxCheckpoint{
|
||||
Version: schemaVersion,
|
||||
Namespace: namespace,
|
||||
Name: name,
|
||||
Data: &CheckpointData{},
|
||||
Data: data,
|
||||
}
|
||||
}
|
||||
|
||||
func calculateChecksum(checkpoint PodSandboxCheckpoint) uint64 {
|
||||
checkpoint.CheckSum = 0
|
||||
hash := fnv.New32a()
|
||||
hashutil.DeepHashObject(hash, checkpoint)
|
||||
return uint64(hash.Sum32())
|
||||
func (cp *PodSandboxCheckpoint) MarshalCheckpoint() ([]byte, error) {
|
||||
cp.Checksum = checksum.New(*cp.Data)
|
||||
return json.Marshal(*cp)
|
||||
}
|
||||
|
||||
func (cp *PodSandboxCheckpoint) UnmarshalCheckpoint(blob []byte) error {
|
||||
return json.Unmarshal(blob, cp)
|
||||
}
|
||||
|
||||
func (cp *PodSandboxCheckpoint) VerifyChecksum() error {
|
||||
return cp.Checksum.Verify(*cp.Data)
|
||||
}
|
||||
|
||||
func (cp *PodSandboxCheckpoint) GetData() (string, string, string, []*PortMapping, bool) {
|
||||
return cp.Version, cp.Name, cp.Namespace, cp.Data.PortMappings, cp.Data.HostNetwork
|
||||
}
|
||||
|
@ -17,86 +17,17 @@ limitations under the License.
|
||||
package dockershim
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
utilstore "k8s.io/kubernetes/pkg/kubelet/dockershim/testing"
|
||||
)
|
||||
|
||||
func NewTestPersistentCheckpointHandler() CheckpointHandler {
|
||||
return &PersistentCheckpointHandler{store: utilstore.NewMemStore()}
|
||||
}
|
||||
|
||||
func TestPersistentCheckpointHandler(t *testing.T) {
|
||||
var err error
|
||||
handler := NewTestPersistentCheckpointHandler()
|
||||
port80 := int32(80)
|
||||
port443 := int32(443)
|
||||
proto := protocolTCP
|
||||
|
||||
checkpoint1 := NewPodSandboxCheckpoint("ns1", "sandbox1")
|
||||
checkpoint1.Data.PortMappings = []*PortMapping{
|
||||
{
|
||||
&proto,
|
||||
&port80,
|
||||
&port80,
|
||||
},
|
||||
{
|
||||
&proto,
|
||||
&port443,
|
||||
&port443,
|
||||
},
|
||||
}
|
||||
checkpoint1.Data.HostNetwork = true
|
||||
|
||||
checkpoints := []struct {
|
||||
podSandboxID string
|
||||
checkpoint *PodSandboxCheckpoint
|
||||
expectHostNetwork bool
|
||||
}{
|
||||
{
|
||||
"id1",
|
||||
checkpoint1,
|
||||
true,
|
||||
},
|
||||
{
|
||||
"id2",
|
||||
NewPodSandboxCheckpoint("ns2", "sandbox2"),
|
||||
false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range checkpoints {
|
||||
// Test CreateCheckpoints
|
||||
err = handler.CreateCheckpoint(tc.podSandboxID, tc.checkpoint)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Test GetCheckpoints
|
||||
checkpoint, err := handler.GetCheckpoint(tc.podSandboxID)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, *checkpoint, *tc.checkpoint)
|
||||
assert.Equal(t, checkpoint.Data.HostNetwork, tc.expectHostNetwork)
|
||||
}
|
||||
// Test ListCheckpoints
|
||||
keys, err := handler.ListCheckpoints()
|
||||
assert.NoError(t, err)
|
||||
sort.Strings(keys)
|
||||
assert.Equal(t, keys, []string{"id1", "id2"})
|
||||
|
||||
// Test RemoveCheckpoints
|
||||
err = handler.RemoveCheckpoint("id1")
|
||||
assert.NoError(t, err)
|
||||
// Test Remove Nonexisted Checkpoints
|
||||
err = handler.RemoveCheckpoint("id1")
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Test ListCheckpoints
|
||||
keys, err = handler.ListCheckpoints()
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, keys, []string{"id2"})
|
||||
|
||||
// Test Get NonExisted Checkpoint
|
||||
_, err = handler.GetCheckpoint("id1")
|
||||
assert.Error(t, err)
|
||||
func TestPodSandboxCheckpoint(t *testing.T) {
|
||||
data := &CheckpointData{HostNetwork: true}
|
||||
checkpoint := NewPodSandboxCheckpoint("ns1", "sandbox1", data)
|
||||
version, name, namespace, _, hostNetwork := checkpoint.GetData()
|
||||
assert.Equal(t, schemaVersion, version)
|
||||
assert.Equal(t, "ns1", namespace)
|
||||
assert.Equal(t, "sandbox1", name)
|
||||
assert.Equal(t, true, hostNetwork)
|
||||
}
|
||||
|
@ -30,6 +30,7 @@ import (
|
||||
|
||||
utilerrors "k8s.io/apimachinery/pkg/util/errors"
|
||||
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/dockershim/libdocker"
|
||||
"k8s.io/kubernetes/pkg/kubelet/qos"
|
||||
@ -118,7 +119,7 @@ func (ds *dockerService) RunPodSandbox(ctx context.Context, r *runtimeapi.RunPod
|
||||
}(&err)
|
||||
|
||||
// Step 3: Create Sandbox Checkpoint.
|
||||
if err = ds.checkpointHandler.CreateCheckpoint(createResp.ID, constructPodSandboxCheckpoint(config)); err != nil {
|
||||
if err = ds.checkpointManager.CreateCheckpoint(createResp.ID, constructPodSandboxCheckpoint(config)); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -189,7 +190,8 @@ func (ds *dockerService) StopPodSandbox(ctx context.Context, r *runtimeapi.StopP
|
||||
name = metadata.Name
|
||||
hostNetwork = (networkNamespaceMode(inspectResult) == runtimeapi.NamespaceMode_NODE)
|
||||
} else {
|
||||
checkpoint, checkpointErr := ds.checkpointHandler.GetCheckpoint(podSandboxID)
|
||||
checkpoint := NewPodSandboxCheckpoint("", "", &CheckpointData{})
|
||||
checkpointErr := ds.checkpointManager.GetCheckpoint(podSandboxID, checkpoint)
|
||||
|
||||
// Proceed if both sandbox container and checkpoint could not be found. This means that following
|
||||
// actions will only have sandbox ID and not have pod namespace and name information.
|
||||
@ -204,9 +206,7 @@ func (ds *dockerService) StopPodSandbox(ctx context.Context, r *runtimeapi.StopP
|
||||
fmt.Errorf("failed to get sandbox status: %v", statusErr)})
|
||||
}
|
||||
} else {
|
||||
namespace = checkpoint.Namespace
|
||||
name = checkpoint.Name
|
||||
hostNetwork = checkpoint.Data != nil && checkpoint.Data.HostNetwork
|
||||
_, name, namespace, _, hostNetwork = checkpoint.GetData()
|
||||
}
|
||||
}
|
||||
|
||||
@ -237,7 +237,7 @@ func (ds *dockerService) StopPodSandbox(ctx context.Context, r *runtimeapi.StopP
|
||||
errList = append(errList, err)
|
||||
} else {
|
||||
// remove the checkpoint for any sandbox that is not found in the runtime
|
||||
ds.checkpointHandler.RemoveCheckpoint(podSandboxID)
|
||||
ds.checkpointManager.RemoveCheckpoint(podSandboxID)
|
||||
}
|
||||
}
|
||||
|
||||
@ -284,7 +284,7 @@ func (ds *dockerService) RemovePodSandbox(ctx context.Context, r *runtimeapi.Rem
|
||||
}
|
||||
|
||||
// Remove the checkpoint of the sandbox.
|
||||
if err := ds.checkpointHandler.RemoveCheckpoint(podSandboxID); err != nil {
|
||||
if err := ds.checkpointManager.RemoveCheckpoint(podSandboxID); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
if len(errs) == 0 {
|
||||
@ -465,7 +465,7 @@ func (ds *dockerService) ListPodSandbox(_ context.Context, r *runtimeapi.ListPod
|
||||
var err error
|
||||
checkpoints := []string{}
|
||||
if filter == nil {
|
||||
checkpoints, err = ds.checkpointHandler.ListCheckpoints()
|
||||
checkpoints, err = ds.checkpointManager.ListCheckpoints()
|
||||
if err != nil {
|
||||
glog.Errorf("Failed to list checkpoints: %v", err)
|
||||
}
|
||||
@ -501,7 +501,8 @@ func (ds *dockerService) ListPodSandbox(_ context.Context, r *runtimeapi.ListPod
|
||||
if _, ok := sandboxIDs[id]; ok {
|
||||
continue
|
||||
}
|
||||
checkpoint, err := ds.checkpointHandler.GetCheckpoint(id)
|
||||
checkpoint := NewPodSandboxCheckpoint("", "", &CheckpointData{})
|
||||
err := ds.checkpointManager.GetCheckpoint(id, checkpoint)
|
||||
if err != nil {
|
||||
glog.Errorf("Failed to retrieve checkpoint for sandbox %q: %v", id, err)
|
||||
continue
|
||||
@ -624,20 +625,20 @@ func ipcNamespaceMode(container *dockertypes.ContainerJSON) runtimeapi.Namespace
|
||||
return runtimeapi.NamespaceMode_POD
|
||||
}
|
||||
|
||||
func constructPodSandboxCheckpoint(config *runtimeapi.PodSandboxConfig) *PodSandboxCheckpoint {
|
||||
checkpoint := NewPodSandboxCheckpoint(config.Metadata.Namespace, config.Metadata.Name)
|
||||
func constructPodSandboxCheckpoint(config *runtimeapi.PodSandboxConfig) checkpointmanager.Checkpoint {
|
||||
data := CheckpointData{}
|
||||
for _, pm := range config.GetPortMappings() {
|
||||
proto := toCheckpointProtocol(pm.Protocol)
|
||||
checkpoint.Data.PortMappings = append(checkpoint.Data.PortMappings, &PortMapping{
|
||||
data.PortMappings = append(data.PortMappings, &PortMapping{
|
||||
HostPort: &pm.HostPort,
|
||||
ContainerPort: &pm.ContainerPort,
|
||||
Protocol: &proto,
|
||||
})
|
||||
}
|
||||
if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtimeapi.NamespaceMode_NODE {
|
||||
checkpoint.Data.HostNetwork = true
|
||||
data.HostNetwork = true
|
||||
}
|
||||
return checkpoint
|
||||
return NewPodSandboxCheckpoint(config.Metadata.Namespace, config.Metadata.Name, &data)
|
||||
}
|
||||
|
||||
func toCheckpointProtocol(protocol runtimeapi.Protocol) Protocol {
|
||||
|
@ -20,6 +20,7 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@ -30,6 +31,7 @@ import (
|
||||
"k8s.io/api/core/v1"
|
||||
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2"
|
||||
"k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
||||
kubecm "k8s.io/kubernetes/pkg/kubelet/cm"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/dockershim/cm"
|
||||
@ -191,7 +193,8 @@ func NewDockerService(config *ClientConfig, podSandboxImage string, streamingCon
|
||||
client := NewDockerClientFromConfig(config)
|
||||
|
||||
c := libdocker.NewInstrumentedInterface(client)
|
||||
checkpointHandler, err := NewPersistentCheckpointHandler(dockershimRootDir)
|
||||
|
||||
checkpointManager, err := checkpointmanager.NewCheckpointManager(filepath.Join(dockershimRootDir, sandboxCheckpointDir))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -205,7 +208,7 @@ func NewDockerService(config *ClientConfig, podSandboxImage string, streamingCon
|
||||
execHandler: &NativeExecHandler{},
|
||||
},
|
||||
containerManager: cm.NewContainerManager(cgroupsName, client),
|
||||
checkpointHandler: checkpointHandler,
|
||||
checkpointManager: checkpointManager,
|
||||
disableSharedPID: disableSharedPID,
|
||||
networkReady: make(map[string]bool),
|
||||
}
|
||||
@ -293,7 +296,7 @@ type dockerService struct {
|
||||
containerManager cm.ContainerManager
|
||||
// cgroup driver used by Docker runtime.
|
||||
cgroupDriver string
|
||||
checkpointHandler CheckpointHandler
|
||||
checkpointManager checkpointmanager.CheckpointManager
|
||||
// caches the version of the runtime.
|
||||
// To be compatible with multiple docker versions, we need to perform
|
||||
// version checking for some operations. Use this cache to avoid querying
|
||||
@ -365,7 +368,8 @@ func (ds *dockerService) GetNetNS(podSandboxID string) (string, error) {
|
||||
// GetPodPortMappings returns the port mappings of the given podSandbox ID.
|
||||
func (ds *dockerService) GetPodPortMappings(podSandboxID string) ([]*hostport.PortMapping, error) {
|
||||
// TODO: get portmappings from docker labels for backward compatibility
|
||||
checkpoint, err := ds.checkpointHandler.GetCheckpoint(podSandboxID)
|
||||
checkpoint := NewPodSandboxCheckpoint("", "", &CheckpointData{})
|
||||
err := ds.checkpointManager.GetCheckpoint(podSandboxID, checkpoint)
|
||||
// Return empty portMappings if checkpoint is not found
|
||||
if err != nil {
|
||||
if err == utilstore.ErrKeyNotFound {
|
||||
@ -373,9 +377,9 @@ func (ds *dockerService) GetPodPortMappings(podSandboxID string) ([]*hostport.Po
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
portMappings := make([]*hostport.PortMapping, 0, len(checkpoint.Data.PortMappings))
|
||||
for _, pm := range checkpoint.Data.PortMappings {
|
||||
_, _, _, checkpointedPortMappings, _ := checkpoint.GetData()
|
||||
portMappings := make([]*hostport.PortMapping, 0, len(checkpointedPortMappings))
|
||||
for _, pm := range checkpointedPortMappings {
|
||||
proto := toAPIProtocol(*pm.Protocol)
|
||||
portMappings = append(portMappings, &hostport.PortMapping{
|
||||
HostPort: *pm.HostPort,
|
||||
|
@ -30,6 +30,7 @@ import (
|
||||
|
||||
"k8s.io/apimachinery/pkg/util/clock"
|
||||
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
||||
containertest "k8s.io/kubernetes/pkg/kubelet/container/testing"
|
||||
"k8s.io/kubernetes/pkg/kubelet/dockershim/libdocker"
|
||||
"k8s.io/kubernetes/pkg/kubelet/dockershim/network"
|
||||
@ -43,15 +44,50 @@ func newTestNetworkPlugin(t *testing.T) *nettest.MockNetworkPlugin {
|
||||
return nettest.NewMockNetworkPlugin(ctrl)
|
||||
}
|
||||
|
||||
type mockCheckpointManager struct {
|
||||
checkpoint map[string]*PodSandboxCheckpoint
|
||||
}
|
||||
|
||||
func (ckm *mockCheckpointManager) CreateCheckpoint(checkpointKey string, checkpoint checkpointmanager.Checkpoint) error {
|
||||
ckm.checkpoint[checkpointKey] = checkpoint.(*PodSandboxCheckpoint)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ckm *mockCheckpointManager) GetCheckpoint(checkpointKey string, checkpoint checkpointmanager.Checkpoint) error {
|
||||
*(checkpoint.(*PodSandboxCheckpoint)) = *(ckm.checkpoint[checkpointKey])
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ckm *mockCheckpointManager) RemoveCheckpoint(checkpointKey string) error {
|
||||
_, ok := ckm.checkpoint[checkpointKey]
|
||||
if ok {
|
||||
delete(ckm.checkpoint, "moo")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ckm *mockCheckpointManager) ListCheckpoints() ([]string, error) {
|
||||
var keys []string
|
||||
for key := range ckm.checkpoint {
|
||||
keys = append(keys, key)
|
||||
}
|
||||
return keys, nil
|
||||
}
|
||||
|
||||
func newMockCheckpointManager() checkpointmanager.CheckpointManager {
|
||||
return &mockCheckpointManager{checkpoint: make(map[string]*PodSandboxCheckpoint)}
|
||||
}
|
||||
|
||||
func newTestDockerService() (*dockerService, *libdocker.FakeDockerClient, *clock.FakeClock) {
|
||||
fakeClock := clock.NewFakeClock(time.Time{})
|
||||
c := libdocker.NewFakeDockerClient().WithClock(fakeClock).WithVersion("1.11.2", "1.23").WithRandSource(rand.NewSource(0))
|
||||
pm := network.NewPluginManager(&network.NoopNetworkPlugin{})
|
||||
ckm := newMockCheckpointManager()
|
||||
return &dockerService{
|
||||
client: c,
|
||||
os: &containertest.FakeOS{},
|
||||
network: pm,
|
||||
checkpointHandler: NewTestPersistentCheckpointHandler(),
|
||||
checkpointManager: ckm,
|
||||
networkReady: make(map[string]bool),
|
||||
}, c, fakeClock
|
||||
}
|
||||
|
@ -61,6 +61,7 @@ import (
|
||||
kubeletconfiginternal "k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cadvisor"
|
||||
kubeletcertificate "k8s.io/kubernetes/pkg/kubelet/certificate"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm"
|
||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
"k8s.io/kubernetes/pkg/kubelet/configmap"
|
||||
@ -553,8 +554,15 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
|
||||
klet.livenessManager = proberesults.NewManager()
|
||||
|
||||
klet.podCache = kubecontainer.NewCache()
|
||||
var checkpointManager checkpointmanager.CheckpointManager
|
||||
if bootstrapCheckpointPath != "" {
|
||||
checkpointManager, err = checkpointmanager.NewCheckpointManager(bootstrapCheckpointPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to initialize checkpoint manager: %+v", err)
|
||||
}
|
||||
}
|
||||
// podManager is also responsible for keeping secretManager and configMapManager contents up-to-date.
|
||||
klet.podManager = kubepod.NewBasicPodManager(kubepod.NewBasicMirrorClient(klet.kubeClient), secretManager, configMapManager)
|
||||
klet.podManager = kubepod.NewBasicPodManager(kubepod.NewBasicMirrorClient(klet.kubeClient), secretManager, configMapManager, checkpointManager)
|
||||
|
||||
if remoteRuntimeEndpoint != "" {
|
||||
// remoteImageEndpoint is same as remoteRuntimeEndpoint if not explicitly specified
|
||||
|
@ -215,7 +215,7 @@ func newTestKubeletWithImageList(
|
||||
kubelet.secretManager = secretManager
|
||||
configMapManager := configmap.NewSimpleConfigMapManager(kubelet.kubeClient)
|
||||
kubelet.configMapManager = configMapManager
|
||||
kubelet.podManager = kubepod.NewBasicPodManager(fakeMirrorClient, kubelet.secretManager, kubelet.configMapManager)
|
||||
kubelet.podManager = kubepod.NewBasicPodManager(fakeMirrorClient, kubelet.secretManager, kubelet.configMapManager, podtest.NewMockCheckpointManager())
|
||||
kubelet.statusManager = status.NewManager(fakeKubeClient, kubelet.podManager, &statustest.FakePodDeletionSafetyProvider{})
|
||||
|
||||
kubelet.containerRuntime = fakeRuntime
|
||||
|
@ -52,7 +52,7 @@ func TestGetVolumeExec(t *testing.T) {
|
||||
fakeSecretManager := secret.NewFakeManager()
|
||||
fakeConfigMapManager := configmap.NewFakeManager()
|
||||
podManager := kubepod.NewBasicPodManager(
|
||||
podtest.NewFakeMirrorClient(), fakeSecretManager, fakeConfigMapManager)
|
||||
podtest.NewFakeMirrorClient(), fakeSecretManager, fakeConfigMapManager, podtest.NewMockCheckpointManager())
|
||||
podManager.SetPods(pods)
|
||||
|
||||
// Prepare fake /var/lib/kubelet
|
||||
|
@ -15,6 +15,7 @@ go_library(
|
||||
importpath = "k8s.io/kubernetes/pkg/kubelet/pod",
|
||||
deps = [
|
||||
"//pkg/kubelet/checkpoint:go_default_library",
|
||||
"//pkg/kubelet/checkpointmanager:go_default_library",
|
||||
"//pkg/kubelet/configmap:go_default_library",
|
||||
"//pkg/kubelet/container:go_default_library",
|
||||
"//pkg/kubelet/secret:go_default_library",
|
||||
|
@ -24,6 +24,7 @@ import (
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpoint"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/configmap"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/secret"
|
||||
@ -121,18 +122,18 @@ type basicManager struct {
|
||||
// basicManager is keeping secretManager and configMapManager up-to-date.
|
||||
secretManager secret.Manager
|
||||
configMapManager configmap.Manager
|
||||
checkpointManager checkpoint.Manager
|
||||
checkpointManager checkpointmanager.CheckpointManager
|
||||
|
||||
// A mirror pod client to create/delete mirror pods.
|
||||
MirrorClient
|
||||
}
|
||||
|
||||
// NewBasicPodManager returns a functional Manager.
|
||||
func NewBasicPodManager(client MirrorClient, secretManager secret.Manager, configMapManager configmap.Manager) Manager {
|
||||
func NewBasicPodManager(client MirrorClient, secretManager secret.Manager, configMapManager configmap.Manager, cpm checkpointmanager.CheckpointManager) Manager {
|
||||
pm := &basicManager{}
|
||||
pm.secretManager = secretManager
|
||||
pm.configMapManager = configMapManager
|
||||
pm.checkpointManager = checkpoint.GetInstance()
|
||||
pm.checkpointManager = cpm
|
||||
pm.MirrorClient = client
|
||||
pm.SetPods(nil)
|
||||
return pm
|
||||
@ -161,7 +162,7 @@ func (pm *basicManager) UpdatePod(pod *v1.Pod) {
|
||||
defer pm.lock.Unlock()
|
||||
pm.updatePodsInternal(pod)
|
||||
if pm.checkpointManager != nil {
|
||||
if err := pm.checkpointManager.WritePod(pod); err != nil {
|
||||
if err := checkpoint.WritePod(pm.checkpointManager, pod); err != nil {
|
||||
glog.Errorf("Error writing checkpoint for pod: %v", pod.GetName())
|
||||
}
|
||||
}
|
||||
@ -224,7 +225,7 @@ func (pm *basicManager) DeletePod(pod *v1.Pod) {
|
||||
delete(pm.podByFullName, podFullName)
|
||||
}
|
||||
if pm.checkpointManager != nil {
|
||||
if err := pm.checkpointManager.DeletePod(pod); err != nil {
|
||||
if err := checkpoint.DeletePod(pm.checkpointManager, pod); err != nil {
|
||||
glog.Errorf("Error deleting checkpoint for pod: %v", pod.GetName())
|
||||
}
|
||||
}
|
||||
|
@ -34,7 +34,7 @@ func newTestManager() (*basicManager, *podtest.FakeMirrorClient) {
|
||||
fakeMirrorClient := podtest.NewFakeMirrorClient()
|
||||
secretManager := secret.NewFakeManager()
|
||||
configMapManager := configmap.NewFakeManager()
|
||||
manager := NewBasicPodManager(fakeMirrorClient, secretManager, configMapManager).(*basicManager)
|
||||
manager := NewBasicPodManager(fakeMirrorClient, secretManager, configMapManager, podtest.NewMockCheckpointManager()).(*basicManager)
|
||||
return manager, fakeMirrorClient
|
||||
}
|
||||
|
||||
|
@ -13,6 +13,8 @@ go_library(
|
||||
],
|
||||
importpath = "k8s.io/kubernetes/pkg/kubelet/pod/testing",
|
||||
deps = [
|
||||
"//pkg/kubelet/checkpoint:go_default_library",
|
||||
"//pkg/kubelet/checkpointmanager:go_default_library",
|
||||
"//pkg/kubelet/container:go_default_library",
|
||||
"//pkg/kubelet/types:go_default_library",
|
||||
"//vendor/github.com/stretchr/testify/mock:go_default_library",
|
||||
|
@ -21,6 +21,8 @@ import (
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
cp "k8s.io/kubernetes/pkg/kubelet/checkpoint"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
)
|
||||
|
||||
@ -81,3 +83,37 @@ func (fmc *FakeMirrorClient) GetCounts(podFullName string) (int, int) {
|
||||
defer fmc.mirrorPodLock.RUnlock()
|
||||
return fmc.createCounts[podFullName], fmc.deleteCounts[podFullName]
|
||||
}
|
||||
|
||||
type MockCheckpointManager struct {
|
||||
checkpoint map[string]*cp.Data
|
||||
}
|
||||
|
||||
func (ckm *MockCheckpointManager) CreateCheckpoint(checkpointKey string, checkpoint checkpointmanager.Checkpoint) error {
|
||||
ckm.checkpoint[checkpointKey] = (checkpoint.(*cp.Data))
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ckm *MockCheckpointManager) GetCheckpoint(checkpointKey string, checkpoint checkpointmanager.Checkpoint) error {
|
||||
*(checkpoint.(*cp.Data)) = *(ckm.checkpoint[checkpointKey])
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ckm *MockCheckpointManager) RemoveCheckpoint(checkpointKey string) error {
|
||||
_, ok := ckm.checkpoint[checkpointKey]
|
||||
if ok {
|
||||
delete(ckm.checkpoint, "moo")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ckm *MockCheckpointManager) ListCheckpoints() ([]string, error) {
|
||||
var keys []string
|
||||
for key := range ckm.checkpoint {
|
||||
keys = append(keys, key)
|
||||
}
|
||||
return keys, nil
|
||||
}
|
||||
|
||||
func NewMockCheckpointManager() checkpointmanager.CheckpointManager {
|
||||
return &MockCheckpointManager{checkpoint: make(map[string]*cp.Data)}
|
||||
}
|
||||
|
@ -99,7 +99,7 @@ func setTestProbe(pod *v1.Pod, probeType probeType, probeSpec v1.Probe) {
|
||||
func newTestManager() *manager {
|
||||
refManager := kubecontainer.NewRefManager()
|
||||
refManager.SetRef(testContainerID, &v1.ObjectReference{}) // Suppress prober warnings.
|
||||
podManager := kubepod.NewBasicPodManager(nil, nil, nil)
|
||||
podManager := kubepod.NewBasicPodManager(nil, nil, nil, nil)
|
||||
// Add test pod to pod manager, so that status manager can get the pod from pod manager if needed.
|
||||
podManager.AddPod(getTestPod())
|
||||
m := NewManager(
|
||||
|
@ -118,7 +118,7 @@ func TestDoProbe(t *testing.T) {
|
||||
}
|
||||
|
||||
// Clean up.
|
||||
m.statusManager = status.NewManager(&fake.Clientset{}, kubepod.NewBasicPodManager(nil, nil, nil), &statustest.FakePodDeletionSafetyProvider{})
|
||||
m.statusManager = status.NewManager(&fake.Clientset{}, kubepod.NewBasicPodManager(nil, nil, nil, nil), &statustest.FakePodDeletionSafetyProvider{})
|
||||
resultsManager(m, probeType).Remove(testContainerID)
|
||||
}
|
||||
}
|
||||
|
@ -63,7 +63,7 @@ func TestRunOnce(t *testing.T) {
|
||||
fakeSecretManager := secret.NewFakeManager()
|
||||
fakeConfigMapManager := configmap.NewFakeManager()
|
||||
podManager := kubepod.NewBasicPodManager(
|
||||
podtest.NewFakeMirrorClient(), fakeSecretManager, fakeConfigMapManager)
|
||||
podtest.NewFakeMirrorClient(), fakeSecretManager, fakeConfigMapManager, podtest.NewMockCheckpointManager())
|
||||
fakeRuntime := &containertest.FakeRuntime{}
|
||||
basePath, err := utiltesting.MkTmpdir("kubelet")
|
||||
if err != nil {
|
||||
|
@ -75,7 +75,7 @@ func (m *manager) testSyncBatch() {
|
||||
}
|
||||
|
||||
func newTestManager(kubeClient clientset.Interface) *manager {
|
||||
podManager := kubepod.NewBasicPodManager(podtest.NewFakeMirrorClient(), kubesecret.NewFakeManager(), kubeconfigmap.NewFakeManager())
|
||||
podManager := kubepod.NewBasicPodManager(podtest.NewFakeMirrorClient(), kubesecret.NewFakeManager(), kubeconfigmap.NewFakeManager(), podtest.NewMockCheckpointManager())
|
||||
podManager.AddPod(getTestPod())
|
||||
return NewManager(kubeClient, podManager, &statustest.FakePodDeletionSafetyProvider{}).(*manager)
|
||||
}
|
||||
|
@ -523,7 +523,7 @@ func createDswpWithVolume(t *testing.T, pv *v1.PersistentVolume, pvc *v1.Persist
|
||||
fakeSecretManager := secret.NewFakeManager()
|
||||
fakeConfigMapManager := configmap.NewFakeManager()
|
||||
fakePodManager := kubepod.NewBasicPodManager(
|
||||
podtest.NewFakeMirrorClient(), fakeSecretManager, fakeConfigMapManager)
|
||||
podtest.NewFakeMirrorClient(), fakeSecretManager, fakeConfigMapManager, podtest.NewMockCheckpointManager())
|
||||
|
||||
fakesDSW := cache.NewDesiredStateOfWorld(fakeVolumePluginMgr)
|
||||
fakeASW := cache.NewActualStateOfWorld("fake", fakeVolumePluginMgr)
|
||||
|
@ -55,7 +55,8 @@ func TestGetMountedVolumesForPodAndGetVolumesInUse(t *testing.T) {
|
||||
t.Fatalf("can't make a temp dir: %v", err)
|
||||
}
|
||||
defer os.RemoveAll(tmpDir)
|
||||
podManager := kubepod.NewBasicPodManager(podtest.NewFakeMirrorClient(), secret.NewFakeManager(), configmap.NewFakeManager())
|
||||
cpm := podtest.NewMockCheckpointManager()
|
||||
podManager := kubepod.NewBasicPodManager(podtest.NewFakeMirrorClient(), secret.NewFakeManager(), configmap.NewFakeManager(), cpm)
|
||||
|
||||
node, pod, pv, claim := createObjects()
|
||||
kubeClient := fake.NewSimpleClientset(node, pod, pv, claim)
|
||||
@ -97,7 +98,8 @@ func TestInitialPendingVolumesForPodAndGetVolumesInUse(t *testing.T) {
|
||||
t.Fatalf("can't make a temp dir: %v", err)
|
||||
}
|
||||
defer os.RemoveAll(tmpDir)
|
||||
podManager := kubepod.NewBasicPodManager(podtest.NewFakeMirrorClient(), secret.NewFakeManager(), configmap.NewFakeManager())
|
||||
cpm := podtest.NewMockCheckpointManager()
|
||||
podManager := kubepod.NewBasicPodManager(podtest.NewFakeMirrorClient(), secret.NewFakeManager(), configmap.NewFakeManager(), cpm)
|
||||
|
||||
node, pod, pv, claim := createObjects()
|
||||
claim.Status = v1.PersistentVolumeClaimStatus{
|
||||
@ -135,7 +137,8 @@ func TestGetExtraSupplementalGroupsForPod(t *testing.T) {
|
||||
t.Fatalf("can't make a temp dir: %v", err)
|
||||
}
|
||||
defer os.RemoveAll(tmpDir)
|
||||
podManager := kubepod.NewBasicPodManager(podtest.NewFakeMirrorClient(), secret.NewFakeManager(), configmap.NewFakeManager())
|
||||
cpm := podtest.NewMockCheckpointManager()
|
||||
podManager := kubepod.NewBasicPodManager(podtest.NewFakeMirrorClient(), secret.NewFakeManager(), configmap.NewFakeManager(), cpm)
|
||||
|
||||
node, pod, _, claim := createObjects()
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user