mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 03:41:45 +00:00
Handle Non-graceful Node Shutdown (#108486)
Signed-off-by: Ashutosh Kumar <sonasingh46@gmail.com> Co-authored-by: Ashutosh Kumar <sonasingh46@gmail.com> Co-authored-by: xing-yang <xingyang105@gmail.com>
This commit is contained in:
parent
78889cd1bb
commit
c00975370a
@ -29,14 +29,17 @@ import (
|
|||||||
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
||||||
"k8s.io/apimachinery/pkg/util/sets"
|
"k8s.io/apimachinery/pkg/util/sets"
|
||||||
"k8s.io/apimachinery/pkg/util/wait"
|
"k8s.io/apimachinery/pkg/util/wait"
|
||||||
|
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||||
coreinformers "k8s.io/client-go/informers/core/v1"
|
coreinformers "k8s.io/client-go/informers/core/v1"
|
||||||
clientset "k8s.io/client-go/kubernetes"
|
clientset "k8s.io/client-go/kubernetes"
|
||||||
corelisters "k8s.io/client-go/listers/core/v1"
|
corelisters "k8s.io/client-go/listers/core/v1"
|
||||||
"k8s.io/client-go/tools/cache"
|
"k8s.io/client-go/tools/cache"
|
||||||
"k8s.io/client-go/util/workqueue"
|
"k8s.io/client-go/util/workqueue"
|
||||||
"k8s.io/component-base/metrics/prometheus/ratelimiter"
|
"k8s.io/component-base/metrics/prometheus/ratelimiter"
|
||||||
|
|
||||||
"k8s.io/klog/v2"
|
"k8s.io/klog/v2"
|
||||||
|
"k8s.io/kubernetes/pkg/features"
|
||||||
|
nodeutil "k8s.io/kubernetes/pkg/util/node"
|
||||||
|
"k8s.io/kubernetes/pkg/util/taints"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@ -113,6 +116,9 @@ func (gcc *PodGCController) gc(ctx context.Context) {
|
|||||||
if gcc.terminatedPodThreshold > 0 {
|
if gcc.terminatedPodThreshold > 0 {
|
||||||
gcc.gcTerminated(pods)
|
gcc.gcTerminated(pods)
|
||||||
}
|
}
|
||||||
|
if utilfeature.DefaultFeatureGate.Enabled(features.NodeOutOfServiceVolumeDetach) {
|
||||||
|
gcc.gcTerminating(pods)
|
||||||
|
}
|
||||||
gcc.gcOrphaned(ctx, pods, nodes)
|
gcc.gcOrphaned(ctx, pods, nodes)
|
||||||
gcc.gcUnscheduledTerminating(pods)
|
gcc.gcUnscheduledTerminating(pods)
|
||||||
}
|
}
|
||||||
@ -124,6 +130,53 @@ func isPodTerminated(pod *v1.Pod) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// isPodTerminating returns true if the pod is terminating.
|
||||||
|
func isPodTerminating(pod *v1.Pod) bool {
|
||||||
|
return pod.ObjectMeta.DeletionTimestamp != nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (gcc *PodGCController) gcTerminating(pods []*v1.Pod) {
|
||||||
|
klog.V(4).Info("GC'ing terminating pods that are on out-of-service nodes")
|
||||||
|
terminatingPods := []*v1.Pod{}
|
||||||
|
for _, pod := range pods {
|
||||||
|
if isPodTerminating(pod) {
|
||||||
|
node, err := gcc.nodeLister.Get(string(pod.Spec.NodeName))
|
||||||
|
if err != nil {
|
||||||
|
klog.Errorf("failed to get node %s : %s", string(pod.Spec.NodeName), err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Add this pod to terminatingPods list only if the following conditions are met:
|
||||||
|
// 1. Node is not ready.
|
||||||
|
// 2. Node has `node.kubernetes.io/out-of-service` taint.
|
||||||
|
if !nodeutil.IsNodeReady(node) && taints.TaintKeyExists(node.Spec.Taints, v1.TaintNodeOutOfService) {
|
||||||
|
klog.V(4).Infof("garbage collecting pod %s that is terminating. Phase [%v]", pod.Name, pod.Status.Phase)
|
||||||
|
terminatingPods = append(terminatingPods, pod)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
deleteCount := len(terminatingPods)
|
||||||
|
if deleteCount == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
klog.V(4).Infof("Garbage collecting %v pods that are terminating on node tainted with node.kubernetes.io/out-of-service", deleteCount)
|
||||||
|
// sort only when necessary
|
||||||
|
sort.Sort(byCreationTimestamp(terminatingPods))
|
||||||
|
var wait sync.WaitGroup
|
||||||
|
for i := 0; i < deleteCount; i++ {
|
||||||
|
wait.Add(1)
|
||||||
|
go func(namespace string, name string) {
|
||||||
|
defer wait.Done()
|
||||||
|
if err := gcc.deletePod(namespace, name); err != nil {
|
||||||
|
// ignore not founds
|
||||||
|
utilruntime.HandleError(err)
|
||||||
|
}
|
||||||
|
}(terminatingPods[i].Namespace, terminatingPods[i].Name)
|
||||||
|
}
|
||||||
|
wait.Wait()
|
||||||
|
}
|
||||||
|
|
||||||
func (gcc *PodGCController) gcTerminated(pods []*v1.Pod) {
|
func (gcc *PodGCController) gcTerminated(pods []*v1.Pod) {
|
||||||
terminatedPods := []*v1.Pod{}
|
terminatedPods := []*v1.Pod{}
|
||||||
for _, pod := range pods {
|
for _, pod := range pods {
|
||||||
|
@ -27,13 +27,16 @@ import (
|
|||||||
"k8s.io/apimachinery/pkg/labels"
|
"k8s.io/apimachinery/pkg/labels"
|
||||||
"k8s.io/apimachinery/pkg/util/sets"
|
"k8s.io/apimachinery/pkg/util/sets"
|
||||||
"k8s.io/apimachinery/pkg/util/wait"
|
"k8s.io/apimachinery/pkg/util/wait"
|
||||||
|
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||||
"k8s.io/client-go/informers"
|
"k8s.io/client-go/informers"
|
||||||
coreinformers "k8s.io/client-go/informers/core/v1"
|
coreinformers "k8s.io/client-go/informers/core/v1"
|
||||||
clientset "k8s.io/client-go/kubernetes"
|
clientset "k8s.io/client-go/kubernetes"
|
||||||
"k8s.io/client-go/kubernetes/fake"
|
"k8s.io/client-go/kubernetes/fake"
|
||||||
"k8s.io/client-go/util/workqueue"
|
"k8s.io/client-go/util/workqueue"
|
||||||
|
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
||||||
"k8s.io/kubernetes/pkg/controller"
|
"k8s.io/kubernetes/pkg/controller"
|
||||||
"k8s.io/kubernetes/pkg/controller/testutil"
|
"k8s.io/kubernetes/pkg/controller/testutil"
|
||||||
|
"k8s.io/kubernetes/pkg/features"
|
||||||
testingclock "k8s.io/utils/clock/testing"
|
testingclock "k8s.io/utils/clock/testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -448,3 +451,154 @@ func TestGCUnscheduledTerminating(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestGCTerminating(t *testing.T) {
|
||||||
|
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeOutOfServiceVolumeDetach, true)()
|
||||||
|
type node struct {
|
||||||
|
name string
|
||||||
|
readyCondition v1.ConditionStatus
|
||||||
|
taints []v1.Taint
|
||||||
|
}
|
||||||
|
|
||||||
|
type nameToPodConfig struct {
|
||||||
|
name string
|
||||||
|
phase v1.PodPhase
|
||||||
|
deletionTimeStamp *metav1.Time
|
||||||
|
nodeName string
|
||||||
|
}
|
||||||
|
|
||||||
|
testCases := []struct {
|
||||||
|
name string
|
||||||
|
pods []nameToPodConfig
|
||||||
|
nodes []node
|
||||||
|
deletedPodNames sets.String
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "pods have deletion timestamp set and the corresponding nodes are not ready",
|
||||||
|
nodes: []node{
|
||||||
|
{name: "worker-0", readyCondition: v1.ConditionFalse},
|
||||||
|
{name: "worker-1", readyCondition: v1.ConditionFalse},
|
||||||
|
},
|
||||||
|
pods: []nameToPodConfig{
|
||||||
|
{name: "a", deletionTimeStamp: &metav1.Time{}, nodeName: "worker-0"},
|
||||||
|
{name: "b", deletionTimeStamp: &metav1.Time{}, nodeName: "worker-1"},
|
||||||
|
},
|
||||||
|
deletedPodNames: sets.NewString(),
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
name: "some pods have deletion timestamp and/or phase set and some of the corresponding nodes have an" +
|
||||||
|
"outofservice taint that are not ready",
|
||||||
|
nodes: []node{
|
||||||
|
// terminated pods on this node should be force deleted
|
||||||
|
{name: "worker-0", readyCondition: v1.ConditionFalse, taints: []v1.Taint{{Key: v1.TaintNodeOutOfService,
|
||||||
|
Effect: v1.TaintEffectNoExecute}}},
|
||||||
|
// terminated pods on this node should not be force deleted
|
||||||
|
{name: "worker-1", readyCondition: v1.ConditionFalse},
|
||||||
|
// terminated pods on this node should not be force deleted
|
||||||
|
{name: "worker-2", readyCondition: v1.ConditionTrue},
|
||||||
|
// terminated pods on this node should be force deleted
|
||||||
|
{name: "worker-3", readyCondition: v1.ConditionFalse, taints: []v1.Taint{{Key: v1.TaintNodeOutOfService,
|
||||||
|
Effect: v1.TaintEffectNoSchedule}}},
|
||||||
|
// terminated pods on this node should be force deleted
|
||||||
|
{name: "worker-4", readyCondition: v1.ConditionFalse, taints: []v1.Taint{{Key: v1.TaintNodeOutOfService,
|
||||||
|
Effect: v1.TaintEffectPreferNoSchedule}}},
|
||||||
|
// terminated pods on this node should be force deleted
|
||||||
|
{name: "worker-5", readyCondition: v1.ConditionFalse, taints: []v1.Taint{{Key: v1.TaintNodeOutOfService,
|
||||||
|
Value: "any-value", Effect: v1.TaintEffectNoExecute}}},
|
||||||
|
},
|
||||||
|
pods: []nameToPodConfig{
|
||||||
|
// pods a1, b1, c1, d1 and e1 are on node worker-0
|
||||||
|
{name: "a1", nodeName: "worker-0"},
|
||||||
|
{name: "b1", deletionTimeStamp: &metav1.Time{}, nodeName: "worker-0"},
|
||||||
|
{name: "c1", phase: v1.PodPending, nodeName: "worker-0"},
|
||||||
|
{name: "d1", phase: v1.PodRunning, nodeName: "worker-0"},
|
||||||
|
{name: "e1", phase: v1.PodUnknown, nodeName: "worker-0"},
|
||||||
|
|
||||||
|
// pods a2, b2, c2, d2 and e2 are on node worker-1
|
||||||
|
{name: "a2", nodeName: "worker-1"},
|
||||||
|
{name: "b2", deletionTimeStamp: &metav1.Time{}, nodeName: "worker-1"},
|
||||||
|
{name: "c2", phase: v1.PodPending, nodeName: "worker-1"},
|
||||||
|
{name: "d2", phase: v1.PodRunning, nodeName: "worker-1"},
|
||||||
|
{name: "e2", phase: v1.PodUnknown, nodeName: "worker-1"},
|
||||||
|
|
||||||
|
// pods a3, b3, c3, d3 and e3 are on node worker-2
|
||||||
|
{name: "a3", nodeName: "worker-2"},
|
||||||
|
{name: "b3", deletionTimeStamp: &metav1.Time{}, nodeName: "worker-2"},
|
||||||
|
{name: "c3", phase: v1.PodPending, nodeName: "worker-2"},
|
||||||
|
{name: "d3", phase: v1.PodRunning, nodeName: "worker-2"},
|
||||||
|
{name: "e3", phase: v1.PodUnknown, nodeName: "worker-2"},
|
||||||
|
|
||||||
|
// pods a4, b4, c4, d4 and e4 are on node worker-3
|
||||||
|
{name: "a4", nodeName: "worker-3"},
|
||||||
|
{name: "b4", deletionTimeStamp: &metav1.Time{}, nodeName: "worker-3"},
|
||||||
|
{name: "c4", phase: v1.PodPending, nodeName: "worker-3"},
|
||||||
|
{name: "d4", phase: v1.PodRunning, nodeName: "worker-3"},
|
||||||
|
{name: "e4", phase: v1.PodUnknown, nodeName: "worker-3"},
|
||||||
|
|
||||||
|
// pods a5, b5, c5, d5 and e5 are on node worker-4
|
||||||
|
{name: "a5", nodeName: "worker-3"},
|
||||||
|
{name: "b5", deletionTimeStamp: &metav1.Time{}, nodeName: "worker-4"},
|
||||||
|
{name: "c5", phase: v1.PodPending, nodeName: "worker-4"},
|
||||||
|
{name: "d5", phase: v1.PodRunning, nodeName: "worker-4"},
|
||||||
|
{name: "e5", phase: v1.PodUnknown, nodeName: "worker-4"},
|
||||||
|
|
||||||
|
// pods a6, b6, c6, d6 and e6 are on node worker-5
|
||||||
|
{name: "a6", nodeName: "worker-5"},
|
||||||
|
{name: "b6", deletionTimeStamp: &metav1.Time{}, nodeName: "worker-5"},
|
||||||
|
{name: "c6", phase: v1.PodPending, nodeName: "worker-5"},
|
||||||
|
{name: "d6", phase: v1.PodRunning, nodeName: "worker-5"},
|
||||||
|
{name: "e6", phase: v1.PodUnknown, nodeName: "worker-5"},
|
||||||
|
},
|
||||||
|
deletedPodNames: sets.NewString("b1", "b4", "b5", "b6"),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for i, test := range testCases {
|
||||||
|
t.Run(test.name, func(t *testing.T) {
|
||||||
|
client := fake.NewSimpleClientset(&v1.NodeList{Items: []v1.Node{*testutil.NewNode("node-a")}})
|
||||||
|
gcc, podInformer, nodeInformer := NewFromClient(client, -1)
|
||||||
|
deletedPodNames := make([]string, 0)
|
||||||
|
var lock sync.Mutex
|
||||||
|
gcc.deletePod = func(_, name string) error {
|
||||||
|
lock.Lock()
|
||||||
|
defer lock.Unlock()
|
||||||
|
deletedPodNames = append(deletedPodNames, name)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
creationTime := time.Unix(0, 0)
|
||||||
|
for _, node := range test.nodes {
|
||||||
|
creationTime = creationTime.Add(2 * time.Hour)
|
||||||
|
nodeInformer.Informer().GetStore().Add(&v1.Node{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{Name: node.name, CreationTimestamp: metav1.Time{Time: creationTime}},
|
||||||
|
Spec: v1.NodeSpec{
|
||||||
|
Taints: node.taints,
|
||||||
|
},
|
||||||
|
Status: v1.NodeStatus{
|
||||||
|
Conditions: []v1.NodeCondition{
|
||||||
|
{
|
||||||
|
Type: v1.NodeReady,
|
||||||
|
Status: node.readyCondition,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, pod := range test.pods {
|
||||||
|
creationTime = creationTime.Add(1 * time.Hour)
|
||||||
|
podInformer.Informer().GetStore().Add(&v1.Pod{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{Name: pod.name, CreationTimestamp: metav1.Time{Time: creationTime},
|
||||||
|
DeletionTimestamp: pod.deletionTimeStamp},
|
||||||
|
Status: v1.PodStatus{Phase: pod.phase},
|
||||||
|
Spec: v1.PodSpec{NodeName: pod.nodeName},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
gcc.gc(context.TODO())
|
||||||
|
if pass := compareStringSetToList(test.deletedPodNames, deletedPodNames); !pass {
|
||||||
|
t.Errorf("[%v]pod's deleted expected and actual did not match.\n\texpected: %v\n\tactual: %v",
|
||||||
|
i, test.deletedPodNames.List(), deletedPodNames)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -180,6 +180,7 @@ func NewAttachDetachController(
|
|||||||
adc.actualStateOfWorld,
|
adc.actualStateOfWorld,
|
||||||
adc.attacherDetacher,
|
adc.attacherDetacher,
|
||||||
adc.nodeStatusUpdater,
|
adc.nodeStatusUpdater,
|
||||||
|
adc.nodeLister,
|
||||||
recorder)
|
recorder)
|
||||||
|
|
||||||
csiTranslator := csitrans.New()
|
csiTranslator := csitrans.New()
|
||||||
|
@ -27,13 +27,17 @@ import (
|
|||||||
v1 "k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
"k8s.io/apimachinery/pkg/types"
|
"k8s.io/apimachinery/pkg/types"
|
||||||
"k8s.io/apimachinery/pkg/util/wait"
|
"k8s.io/apimachinery/pkg/util/wait"
|
||||||
|
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||||
|
corelisters "k8s.io/client-go/listers/core/v1"
|
||||||
"k8s.io/client-go/tools/record"
|
"k8s.io/client-go/tools/record"
|
||||||
"k8s.io/klog/v2"
|
"k8s.io/klog/v2"
|
||||||
"k8s.io/kubernetes/pkg/controller/volume/attachdetach/cache"
|
"k8s.io/kubernetes/pkg/controller/volume/attachdetach/cache"
|
||||||
"k8s.io/kubernetes/pkg/controller/volume/attachdetach/metrics"
|
"k8s.io/kubernetes/pkg/controller/volume/attachdetach/metrics"
|
||||||
"k8s.io/kubernetes/pkg/controller/volume/attachdetach/statusupdater"
|
"k8s.io/kubernetes/pkg/controller/volume/attachdetach/statusupdater"
|
||||||
|
"k8s.io/kubernetes/pkg/features"
|
||||||
kevents "k8s.io/kubernetes/pkg/kubelet/events"
|
kevents "k8s.io/kubernetes/pkg/kubelet/events"
|
||||||
"k8s.io/kubernetes/pkg/util/goroutinemap/exponentialbackoff"
|
"k8s.io/kubernetes/pkg/util/goroutinemap/exponentialbackoff"
|
||||||
|
"k8s.io/kubernetes/pkg/util/taints"
|
||||||
"k8s.io/kubernetes/pkg/volume/util"
|
"k8s.io/kubernetes/pkg/volume/util"
|
||||||
"k8s.io/kubernetes/pkg/volume/util/operationexecutor"
|
"k8s.io/kubernetes/pkg/volume/util/operationexecutor"
|
||||||
)
|
)
|
||||||
@ -69,6 +73,7 @@ func NewReconciler(
|
|||||||
actualStateOfWorld cache.ActualStateOfWorld,
|
actualStateOfWorld cache.ActualStateOfWorld,
|
||||||
attacherDetacher operationexecutor.OperationExecutor,
|
attacherDetacher operationexecutor.OperationExecutor,
|
||||||
nodeStatusUpdater statusupdater.NodeStatusUpdater,
|
nodeStatusUpdater statusupdater.NodeStatusUpdater,
|
||||||
|
nodeLister corelisters.NodeLister,
|
||||||
recorder record.EventRecorder) Reconciler {
|
recorder record.EventRecorder) Reconciler {
|
||||||
return &reconciler{
|
return &reconciler{
|
||||||
loopPeriod: loopPeriod,
|
loopPeriod: loopPeriod,
|
||||||
@ -79,6 +84,7 @@ func NewReconciler(
|
|||||||
actualStateOfWorld: actualStateOfWorld,
|
actualStateOfWorld: actualStateOfWorld,
|
||||||
attacherDetacher: attacherDetacher,
|
attacherDetacher: attacherDetacher,
|
||||||
nodeStatusUpdater: nodeStatusUpdater,
|
nodeStatusUpdater: nodeStatusUpdater,
|
||||||
|
nodeLister: nodeLister,
|
||||||
timeOfLastSync: time.Now(),
|
timeOfLastSync: time.Now(),
|
||||||
recorder: recorder,
|
recorder: recorder,
|
||||||
}
|
}
|
||||||
@ -92,6 +98,7 @@ type reconciler struct {
|
|||||||
actualStateOfWorld cache.ActualStateOfWorld
|
actualStateOfWorld cache.ActualStateOfWorld
|
||||||
attacherDetacher operationexecutor.OperationExecutor
|
attacherDetacher operationexecutor.OperationExecutor
|
||||||
nodeStatusUpdater statusupdater.NodeStatusUpdater
|
nodeStatusUpdater statusupdater.NodeStatusUpdater
|
||||||
|
nodeLister corelisters.NodeLister
|
||||||
timeOfLastSync time.Time
|
timeOfLastSync time.Time
|
||||||
disableReconciliationSync bool
|
disableReconciliationSync bool
|
||||||
recorder record.EventRecorder
|
recorder record.EventRecorder
|
||||||
@ -134,6 +141,19 @@ func (rc *reconciler) syncStates() {
|
|||||||
rc.attacherDetacher.VerifyVolumesAreAttached(volumesPerNode, rc.actualStateOfWorld)
|
rc.attacherDetacher.VerifyVolumesAreAttached(volumesPerNode, rc.actualStateOfWorld)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// hasOutOfServiceTaint returns true if the node has out-of-service taint present
|
||||||
|
// and `NodeOutOfServiceVolumeDetach` feature gate is enabled.
|
||||||
|
func (rc *reconciler) hasOutOfServiceTaint(nodeName types.NodeName) (bool, error) {
|
||||||
|
if utilfeature.DefaultFeatureGate.Enabled(features.NodeOutOfServiceVolumeDetach) {
|
||||||
|
node, err := rc.nodeLister.Get(string(nodeName))
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
return taints.TaintKeyExists(node.Spec.Taints, v1.TaintNodeOutOfService), nil
|
||||||
|
}
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (rc *reconciler) reconcile() {
|
func (rc *reconciler) reconcile() {
|
||||||
// Detaches are triggered before attaches so that volumes referenced by
|
// Detaches are triggered before attaches so that volumes referenced by
|
||||||
// pods that are rescheduled to a different node are detached first.
|
// pods that are rescheduled to a different node are detached first.
|
||||||
@ -183,8 +203,15 @@ func (rc *reconciler) reconcile() {
|
|||||||
}
|
}
|
||||||
// Check whether timeout has reached the maximum waiting time
|
// Check whether timeout has reached the maximum waiting time
|
||||||
timeout := elapsedTime > rc.maxWaitForUnmountDuration
|
timeout := elapsedTime > rc.maxWaitForUnmountDuration
|
||||||
|
|
||||||
|
hasOutOfServiceTaint, err := rc.hasOutOfServiceTaint(attachedVolume.NodeName)
|
||||||
|
if err != nil {
|
||||||
|
klog.Errorf("failed to get taint specs for node %s: %s", attachedVolume.NodeName, err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
// Check whether volume is still mounted. Skip detach if it is still mounted unless timeout
|
// Check whether volume is still mounted. Skip detach if it is still mounted unless timeout
|
||||||
if attachedVolume.MountedByNode && !timeout {
|
// or the node has `node.kubernetes.io/out-of-service` taint.
|
||||||
|
if attachedVolume.MountedByNode && !timeout && !hasOutOfServiceTaint {
|
||||||
klog.V(5).InfoS("Cannot detach volume because it is still mounted", "volume", attachedVolume)
|
klog.V(5).InfoS("Cannot detach volume because it is still mounted", "volume", attachedVolume)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@ -211,8 +238,12 @@ func (rc *reconciler) reconcile() {
|
|||||||
|
|
||||||
// Trigger detach volume which requires verifying safe to detach step
|
// Trigger detach volume which requires verifying safe to detach step
|
||||||
// If timeout is true, skip verifySafeToDetach check
|
// If timeout is true, skip verifySafeToDetach check
|
||||||
|
// If the node has node.kubernetes.io/out-of-service taint with NoExecute effect, skip verifySafeToDetach check
|
||||||
klog.V(5).InfoS("Starting attacherDetacher.DetachVolume", "volume", attachedVolume)
|
klog.V(5).InfoS("Starting attacherDetacher.DetachVolume", "volume", attachedVolume)
|
||||||
verifySafeToDetach := !timeout
|
if hasOutOfServiceTaint {
|
||||||
|
klog.V(4).Infof("node %q has out-of-service taint", attachedVolume.NodeName)
|
||||||
|
}
|
||||||
|
verifySafeToDetach := !(timeout || hasOutOfServiceTaint)
|
||||||
err = rc.attacherDetacher.DetachVolume(attachedVolume.AttachedVolume, verifySafeToDetach, rc.actualStateOfWorld)
|
err = rc.attacherDetacher.DetachVolume(attachedVolume.AttachedVolume, verifySafeToDetach, rc.actualStateOfWorld)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
if !timeout {
|
if !timeout {
|
||||||
|
@ -21,14 +21,18 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
v1 "k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
k8stypes "k8s.io/apimachinery/pkg/types"
|
k8stypes "k8s.io/apimachinery/pkg/types"
|
||||||
"k8s.io/apimachinery/pkg/util/wait"
|
"k8s.io/apimachinery/pkg/util/wait"
|
||||||
|
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||||
"k8s.io/client-go/informers"
|
"k8s.io/client-go/informers"
|
||||||
"k8s.io/client-go/tools/record"
|
"k8s.io/client-go/tools/record"
|
||||||
|
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
||||||
"k8s.io/kubernetes/pkg/controller"
|
"k8s.io/kubernetes/pkg/controller"
|
||||||
"k8s.io/kubernetes/pkg/controller/volume/attachdetach/cache"
|
"k8s.io/kubernetes/pkg/controller/volume/attachdetach/cache"
|
||||||
"k8s.io/kubernetes/pkg/controller/volume/attachdetach/statusupdater"
|
"k8s.io/kubernetes/pkg/controller/volume/attachdetach/statusupdater"
|
||||||
controllervolumetesting "k8s.io/kubernetes/pkg/controller/volume/attachdetach/testing"
|
controllervolumetesting "k8s.io/kubernetes/pkg/controller/volume/attachdetach/testing"
|
||||||
|
"k8s.io/kubernetes/pkg/features"
|
||||||
volumetesting "k8s.io/kubernetes/pkg/volume/testing"
|
volumetesting "k8s.io/kubernetes/pkg/volume/testing"
|
||||||
"k8s.io/kubernetes/pkg/volume/util/operationexecutor"
|
"k8s.io/kubernetes/pkg/volume/util/operationexecutor"
|
||||||
"k8s.io/kubernetes/pkg/volume/util/types"
|
"k8s.io/kubernetes/pkg/volume/util/types"
|
||||||
@ -36,9 +40,10 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
reconcilerLoopPeriod time.Duration = 10 * time.Millisecond
|
reconcilerLoopPeriod time.Duration = 10 * time.Millisecond
|
||||||
syncLoopPeriod time.Duration = 100 * time.Minute
|
syncLoopPeriod time.Duration = 100 * time.Minute
|
||||||
maxWaitForUnmountDuration time.Duration = 50 * time.Millisecond
|
maxWaitForUnmountDuration time.Duration = 50 * time.Millisecond
|
||||||
|
maxLongWaitForUnmountDuration time.Duration = 4200 * time.Second
|
||||||
)
|
)
|
||||||
|
|
||||||
// Calls Run()
|
// Calls Run()
|
||||||
@ -48,6 +53,7 @@ func Test_Run_Positive_DoNothing(t *testing.T) {
|
|||||||
volumePluginMgr, fakePlugin := volumetesting.GetTestVolumePluginMgr(t)
|
volumePluginMgr, fakePlugin := volumetesting.GetTestVolumePluginMgr(t)
|
||||||
dsw := cache.NewDesiredStateOfWorld(volumePluginMgr)
|
dsw := cache.NewDesiredStateOfWorld(volumePluginMgr)
|
||||||
asw := cache.NewActualStateOfWorld(volumePluginMgr)
|
asw := cache.NewActualStateOfWorld(volumePluginMgr)
|
||||||
|
|
||||||
fakeKubeClient := controllervolumetesting.CreateTestClient()
|
fakeKubeClient := controllervolumetesting.CreateTestClient()
|
||||||
fakeRecorder := &record.FakeRecorder{}
|
fakeRecorder := &record.FakeRecorder{}
|
||||||
fakeHandler := volumetesting.NewBlockVolumePathHandler()
|
fakeHandler := volumetesting.NewBlockVolumePathHandler()
|
||||||
@ -59,8 +65,9 @@ func Test_Run_Positive_DoNothing(t *testing.T) {
|
|||||||
informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc())
|
informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc())
|
||||||
nsu := statusupdater.NewNodeStatusUpdater(
|
nsu := statusupdater.NewNodeStatusUpdater(
|
||||||
fakeKubeClient, informerFactory.Core().V1().Nodes().Lister(), asw)
|
fakeKubeClient, informerFactory.Core().V1().Nodes().Lister(), asw)
|
||||||
|
nodeLister := informerFactory.Core().V1().Nodes().Lister()
|
||||||
reconciler := NewReconciler(
|
reconciler := NewReconciler(
|
||||||
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, fakeRecorder)
|
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder)
|
||||||
|
|
||||||
// Act
|
// Act
|
||||||
ch := make(chan struct{})
|
ch := make(chan struct{})
|
||||||
@ -91,9 +98,11 @@ func Test_Run_Positive_OneDesiredVolumeAttach(t *testing.T) {
|
|||||||
volumePluginMgr,
|
volumePluginMgr,
|
||||||
fakeRecorder,
|
fakeRecorder,
|
||||||
fakeHandler))
|
fakeHandler))
|
||||||
|
informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc())
|
||||||
nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */)
|
nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */)
|
||||||
|
nodeLister := informerFactory.Core().V1().Nodes().Lister()
|
||||||
reconciler := NewReconciler(
|
reconciler := NewReconciler(
|
||||||
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, fakeRecorder)
|
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder)
|
||||||
podName := "pod-uid"
|
podName := "pod-uid"
|
||||||
volumeName := v1.UniqueVolumeName("volume-name")
|
volumeName := v1.UniqueVolumeName("volume-name")
|
||||||
volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName)
|
volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName)
|
||||||
@ -142,9 +151,11 @@ func Test_Run_Positive_OneDesiredVolumeAttachThenDetachWithUnmountedVolume(t *te
|
|||||||
volumePluginMgr,
|
volumePluginMgr,
|
||||||
fakeRecorder,
|
fakeRecorder,
|
||||||
fakeHandler))
|
fakeHandler))
|
||||||
|
informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc())
|
||||||
nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */)
|
nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */)
|
||||||
|
nodeLister := informerFactory.Core().V1().Nodes().Lister()
|
||||||
reconciler := NewReconciler(
|
reconciler := NewReconciler(
|
||||||
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, fakeRecorder)
|
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder)
|
||||||
podName := "pod-uid"
|
podName := "pod-uid"
|
||||||
volumeName := v1.UniqueVolumeName("volume-name")
|
volumeName := v1.UniqueVolumeName("volume-name")
|
||||||
volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName)
|
volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName)
|
||||||
@ -214,9 +225,11 @@ func Test_Run_Positive_OneDesiredVolumeAttachThenDetachWithMountedVolume(t *test
|
|||||||
volumePluginMgr,
|
volumePluginMgr,
|
||||||
fakeRecorder,
|
fakeRecorder,
|
||||||
fakeHandler))
|
fakeHandler))
|
||||||
|
informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc())
|
||||||
|
nodeLister := informerFactory.Core().V1().Nodes().Lister()
|
||||||
nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */)
|
nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */)
|
||||||
reconciler := NewReconciler(
|
reconciler := NewReconciler(
|
||||||
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, fakeRecorder)
|
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder)
|
||||||
podName := "pod-uid"
|
podName := "pod-uid"
|
||||||
volumeName := v1.UniqueVolumeName("volume-name")
|
volumeName := v1.UniqueVolumeName("volume-name")
|
||||||
volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName)
|
volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName)
|
||||||
@ -286,9 +299,11 @@ func Test_Run_Negative_OneDesiredVolumeAttachThenDetachWithUnmountedVolumeUpdate
|
|||||||
volumePluginMgr,
|
volumePluginMgr,
|
||||||
fakeRecorder,
|
fakeRecorder,
|
||||||
fakeHandler))
|
fakeHandler))
|
||||||
|
informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc())
|
||||||
|
nodeLister := informerFactory.Core().V1().Nodes().Lister()
|
||||||
nsu := statusupdater.NewFakeNodeStatusUpdater(true /* returnError */)
|
nsu := statusupdater.NewFakeNodeStatusUpdater(true /* returnError */)
|
||||||
reconciler := NewReconciler(
|
reconciler := NewReconciler(
|
||||||
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, fakeRecorder)
|
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder)
|
||||||
podName := "pod-uid"
|
podName := "pod-uid"
|
||||||
volumeName := v1.UniqueVolumeName("volume-name")
|
volumeName := v1.UniqueVolumeName("volume-name")
|
||||||
volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName)
|
volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName)
|
||||||
@ -362,8 +377,10 @@ func Test_Run_OneVolumeAttachAndDetachMultipleNodesWithReadWriteMany(t *testing.
|
|||||||
fakeRecorder,
|
fakeRecorder,
|
||||||
fakeHandler))
|
fakeHandler))
|
||||||
nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */)
|
nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */)
|
||||||
|
informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc())
|
||||||
|
nodeLister := informerFactory.Core().V1().Nodes().Lister()
|
||||||
reconciler := NewReconciler(
|
reconciler := NewReconciler(
|
||||||
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, fakeRecorder)
|
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder)
|
||||||
podName1 := "pod-uid1"
|
podName1 := "pod-uid1"
|
||||||
podName2 := "pod-uid2"
|
podName2 := "pod-uid2"
|
||||||
volumeName := v1.UniqueVolumeName("volume-name")
|
volumeName := v1.UniqueVolumeName("volume-name")
|
||||||
@ -453,9 +470,11 @@ func Test_Run_OneVolumeAttachAndDetachMultipleNodesWithReadWriteOnce(t *testing.
|
|||||||
volumePluginMgr,
|
volumePluginMgr,
|
||||||
fakeRecorder,
|
fakeRecorder,
|
||||||
fakeHandler))
|
fakeHandler))
|
||||||
|
informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc())
|
||||||
|
nodeLister := informerFactory.Core().V1().Nodes().Lister()
|
||||||
nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */)
|
nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */)
|
||||||
reconciler := NewReconciler(
|
reconciler := NewReconciler(
|
||||||
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, fakeRecorder)
|
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder)
|
||||||
podName1 := "pod-uid1"
|
podName1 := "pod-uid1"
|
||||||
podName2 := "pod-uid2"
|
podName2 := "pod-uid2"
|
||||||
volumeName := v1.UniqueVolumeName("volume-name")
|
volumeName := v1.UniqueVolumeName("volume-name")
|
||||||
@ -543,9 +562,11 @@ func Test_Run_OneVolumeAttachAndDetachUncertainNodesWithReadWriteOnce(t *testing
|
|||||||
volumePluginMgr,
|
volumePluginMgr,
|
||||||
fakeRecorder,
|
fakeRecorder,
|
||||||
fakeHandler))
|
fakeHandler))
|
||||||
|
informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc())
|
||||||
|
nodeLister := informerFactory.Core().V1().Nodes().Lister()
|
||||||
nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */)
|
nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */)
|
||||||
reconciler := NewReconciler(
|
reconciler := NewReconciler(
|
||||||
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, fakeRecorder)
|
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder)
|
||||||
podName1 := "pod-uid1"
|
podName1 := "pod-uid1"
|
||||||
podName2 := "pod-uid2"
|
podName2 := "pod-uid2"
|
||||||
volumeName := v1.UniqueVolumeName("volume-name")
|
volumeName := v1.UniqueVolumeName("volume-name")
|
||||||
@ -604,9 +625,11 @@ func Test_Run_OneVolumeDetachFailNodeWithReadWriteOnce(t *testing.T) {
|
|||||||
volumePluginMgr,
|
volumePluginMgr,
|
||||||
fakeRecorder,
|
fakeRecorder,
|
||||||
fakeHandler))
|
fakeHandler))
|
||||||
|
informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc())
|
||||||
|
nodeLister := informerFactory.Core().V1().Nodes().Lister()
|
||||||
nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */)
|
nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */)
|
||||||
reconciler := NewReconciler(
|
reconciler := NewReconciler(
|
||||||
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, fakeRecorder)
|
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder)
|
||||||
podName1 := "pod-uid1"
|
podName1 := "pod-uid1"
|
||||||
podName2 := "pod-uid2"
|
podName2 := "pod-uid2"
|
||||||
podName3 := "pod-uid3"
|
podName3 := "pod-uid3"
|
||||||
@ -705,9 +728,11 @@ func Test_Run_OneVolumeAttachAndDetachTimeoutNodesWithReadWriteOnce(t *testing.T
|
|||||||
volumePluginMgr,
|
volumePluginMgr,
|
||||||
fakeRecorder,
|
fakeRecorder,
|
||||||
fakeHandler))
|
fakeHandler))
|
||||||
|
informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc())
|
||||||
|
nodeLister := informerFactory.Core().V1().Nodes().Lister()
|
||||||
nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */)
|
nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */)
|
||||||
reconciler := NewReconciler(
|
reconciler := NewReconciler(
|
||||||
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, fakeRecorder)
|
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder)
|
||||||
podName1 := "pod-uid1"
|
podName1 := "pod-uid1"
|
||||||
podName2 := "pod-uid2"
|
podName2 := "pod-uid2"
|
||||||
volumeName := v1.UniqueVolumeName("volume-name")
|
volumeName := v1.UniqueVolumeName("volume-name")
|
||||||
@ -752,6 +777,165 @@ func Test_Run_OneVolumeAttachAndDetachTimeoutNodesWithReadWriteOnce(t *testing.T
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Populates desiredStateOfWorld cache with one node/volume/pod tuple.
|
||||||
|
// The node has node.kubernetes.io/out-of-service taint present.
|
||||||
|
//
|
||||||
|
// The maxWaitForUnmountDuration is longer (in this case it is 4200 * time.Second so that detach does not happen
|
||||||
|
// immediately due to timeout.
|
||||||
|
//
|
||||||
|
// Calls Run()
|
||||||
|
// Verifies there is one attach call and no detach calls.
|
||||||
|
// Deletes the pod from desiredStateOfWorld cache without first marking the node/volume as unmounted.
|
||||||
|
// Verifies there is one detach call and no (new) attach calls.
|
||||||
|
func Test_Run_OneVolumeDetachOnOutOfServiceTaintedNode(t *testing.T) {
|
||||||
|
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeOutOfServiceVolumeDetach, true)()
|
||||||
|
// Arrange
|
||||||
|
volumePluginMgr, fakePlugin := volumetesting.GetTestVolumePluginMgr(t)
|
||||||
|
dsw := cache.NewDesiredStateOfWorld(volumePluginMgr)
|
||||||
|
asw := cache.NewActualStateOfWorld(volumePluginMgr)
|
||||||
|
fakeKubeClient := controllervolumetesting.CreateTestClient()
|
||||||
|
fakeRecorder := &record.FakeRecorder{}
|
||||||
|
fakeHandler := volumetesting.NewBlockVolumePathHandler()
|
||||||
|
ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator(
|
||||||
|
fakeKubeClient,
|
||||||
|
volumePluginMgr,
|
||||||
|
fakeRecorder,
|
||||||
|
fakeHandler))
|
||||||
|
informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc())
|
||||||
|
nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */)
|
||||||
|
nodeLister := informerFactory.Core().V1().Nodes().Lister()
|
||||||
|
reconciler := NewReconciler(
|
||||||
|
reconcilerLoopPeriod, maxLongWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad,
|
||||||
|
nsu, nodeLister, fakeRecorder)
|
||||||
|
podName1 := "pod-uid1"
|
||||||
|
volumeName1 := v1.UniqueVolumeName("volume-name1")
|
||||||
|
volumeSpec1 := controllervolumetesting.GetTestVolumeSpec(string(volumeName1), volumeName1)
|
||||||
|
nodeName1 := k8stypes.NodeName("worker-0")
|
||||||
|
node1 := &v1.Node{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{Name: string(nodeName1)},
|
||||||
|
Spec: v1.NodeSpec{
|
||||||
|
Taints: []v1.Taint{{Key: v1.TaintNodeOutOfService, Effect: v1.TaintEffectNoExecute}},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
informerFactory.Core().V1().Nodes().Informer().GetStore().Add(node1)
|
||||||
|
dsw.AddNode(nodeName1, false /*keepTerminatedPodVolumes*/)
|
||||||
|
volumeExists := dsw.VolumeExists(volumeName1, nodeName1)
|
||||||
|
if volumeExists {
|
||||||
|
t.Fatalf(
|
||||||
|
"Volume %q/node %q should not exist, but it does.",
|
||||||
|
volumeName1,
|
||||||
|
nodeName1)
|
||||||
|
}
|
||||||
|
|
||||||
|
generatedVolumeName, podErr := dsw.AddPod(types.UniquePodName(podName1), controllervolumetesting.NewPod(podName1,
|
||||||
|
podName1), volumeSpec1, nodeName1)
|
||||||
|
if podErr != nil {
|
||||||
|
t.Fatalf("AddPod failed. Expected: <no error> Actual: <%v>", podErr)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Act
|
||||||
|
ch := make(chan struct{})
|
||||||
|
go reconciler.Run(ch)
|
||||||
|
defer close(ch)
|
||||||
|
|
||||||
|
// Assert
|
||||||
|
waitForNewAttacherCallCount(t, 1 /* expectedCallCount */, fakePlugin)
|
||||||
|
verifyNewAttacherCallCount(t, false /* expectZeroNewAttacherCallCount */, fakePlugin)
|
||||||
|
waitForAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin)
|
||||||
|
verifyNewDetacherCallCount(t, true /* expectZeroNewDetacherCallCount */, fakePlugin)
|
||||||
|
waitForDetachCallCount(t, 0 /* expectedDetachCallCount */, fakePlugin)
|
||||||
|
|
||||||
|
// Delete the pod and the volume will be detached only after the maxLongWaitForUnmountDuration expires as volume is
|
||||||
|
//not unmounted. Here maxLongWaitForUnmountDuration is used to mimic that node is out of service.
|
||||||
|
// But in this case the node has the node.kubernetes.io/out-of-service taint and hence it will not wait for
|
||||||
|
// maxLongWaitForUnmountDuration and will progress to detach immediately.
|
||||||
|
dsw.DeletePod(types.UniquePodName(podName1), generatedVolumeName, nodeName1)
|
||||||
|
// Assert -- Detach will be triggered if node has out of service taint
|
||||||
|
waitForNewDetacherCallCount(t, 1 /* expectedCallCount */, fakePlugin)
|
||||||
|
verifyNewAttacherCallCount(t, false /* expectZeroNewAttacherCallCount */, fakePlugin)
|
||||||
|
waitForAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin)
|
||||||
|
verifyNewDetacherCallCount(t, false /* expectZeroNewDetacherCallCount */, fakePlugin)
|
||||||
|
waitForDetachCallCount(t, 1 /* expectedDetachCallCount */, fakePlugin)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Populates desiredStateOfWorld cache with one node/volume/pod tuple.
|
||||||
|
// The node does not have the node.kubernetes.io/out-of-service taint present.
|
||||||
|
//
|
||||||
|
// The maxWaitForUnmountDuration is longer (in this case it is 4200 * time.Second so that detach does not happen
|
||||||
|
// immediately due to timeout.
|
||||||
|
//
|
||||||
|
// Calls Run()
|
||||||
|
// Verifies there is one attach call and no detach calls.
|
||||||
|
// Deletes the pod from desiredStateOfWorld cache without first marking the node/volume as unmounted.
|
||||||
|
// Verifies there is no detach call and no (new) attach calls.
|
||||||
|
func Test_Run_OneVolumeDetachOnNoOutOfServiceTaintedNode(t *testing.T) {
|
||||||
|
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeOutOfServiceVolumeDetach, true)()
|
||||||
|
// Arrange
|
||||||
|
volumePluginMgr, fakePlugin := volumetesting.GetTestVolumePluginMgr(t)
|
||||||
|
dsw := cache.NewDesiredStateOfWorld(volumePluginMgr)
|
||||||
|
asw := cache.NewActualStateOfWorld(volumePluginMgr)
|
||||||
|
fakeKubeClient := controllervolumetesting.CreateTestClient()
|
||||||
|
fakeRecorder := &record.FakeRecorder{}
|
||||||
|
fakeHandler := volumetesting.NewBlockVolumePathHandler()
|
||||||
|
ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator(
|
||||||
|
fakeKubeClient,
|
||||||
|
volumePluginMgr,
|
||||||
|
fakeRecorder,
|
||||||
|
fakeHandler))
|
||||||
|
informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc())
|
||||||
|
nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */)
|
||||||
|
nodeLister := informerFactory.Core().V1().Nodes().Lister()
|
||||||
|
reconciler := NewReconciler(
|
||||||
|
reconcilerLoopPeriod, maxLongWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad,
|
||||||
|
nsu, nodeLister, fakeRecorder)
|
||||||
|
podName1 := "pod-uid1"
|
||||||
|
volumeName1 := v1.UniqueVolumeName("volume-name1")
|
||||||
|
volumeSpec1 := controllervolumetesting.GetTestVolumeSpec(string(volumeName1), volumeName1)
|
||||||
|
nodeName1 := k8stypes.NodeName("worker-0")
|
||||||
|
node1 := &v1.Node{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{Name: string(nodeName1)},
|
||||||
|
}
|
||||||
|
informerFactory.Core().V1().Nodes().Informer().GetStore().Add(node1)
|
||||||
|
dsw.AddNode(nodeName1, false /*keepTerminatedPodVolumes*/)
|
||||||
|
volumeExists := dsw.VolumeExists(volumeName1, nodeName1)
|
||||||
|
if volumeExists {
|
||||||
|
t.Fatalf(
|
||||||
|
"Volume %q/node %q should not exist, but it does.",
|
||||||
|
volumeName1,
|
||||||
|
nodeName1)
|
||||||
|
}
|
||||||
|
|
||||||
|
generatedVolumeName, podErr := dsw.AddPod(types.UniquePodName(podName1), controllervolumetesting.NewPod(podName1,
|
||||||
|
podName1), volumeSpec1, nodeName1)
|
||||||
|
if podErr != nil {
|
||||||
|
t.Fatalf("AddPod failed. Expected: <no error> Actual: <%v>", podErr)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Act
|
||||||
|
ch := make(chan struct{})
|
||||||
|
go reconciler.Run(ch)
|
||||||
|
defer close(ch)
|
||||||
|
|
||||||
|
// Assert
|
||||||
|
waitForNewAttacherCallCount(t, 1 /* expectedCallCount */, fakePlugin)
|
||||||
|
verifyNewAttacherCallCount(t, false /* expectZeroNewAttacherCallCount */, fakePlugin)
|
||||||
|
waitForAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin)
|
||||||
|
verifyNewDetacherCallCount(t, true /* expectZeroNewDetacherCallCount */, fakePlugin)
|
||||||
|
waitForDetachCallCount(t, 0 /* expectedDetachCallCount */, fakePlugin)
|
||||||
|
|
||||||
|
// Delete the pod and the volume will be detached only after the maxLongWaitForUnmountDuration expires as volume is
|
||||||
|
// not unmounted. Here maxLongWaitForUnmountDuration is used to mimic that node is out of service.
|
||||||
|
// But in this case the node does not have the node.kubernetes.io/out-of-service taint and hence it will wait for
|
||||||
|
// maxLongWaitForUnmountDuration and will not be detached immediately.
|
||||||
|
dsw.DeletePod(types.UniquePodName(podName1), generatedVolumeName, nodeName1)
|
||||||
|
// Assert -- Detach will be triggered only after maxLongWaitForUnmountDuration expires
|
||||||
|
waitForNewDetacherCallCount(t, 0 /* expectedCallCount */, fakePlugin)
|
||||||
|
verifyNewAttacherCallCount(t, false /* expectZeroNewAttacherCallCount */, fakePlugin)
|
||||||
|
waitForAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin)
|
||||||
|
verifyNewDetacherCallCount(t, true /* expectZeroNewDetacherCallCount */, fakePlugin)
|
||||||
|
waitForDetachCallCount(t, 0 /* expectedDetachCallCount */, fakePlugin)
|
||||||
|
}
|
||||||
|
|
||||||
func Test_ReportMultiAttachError(t *testing.T) {
|
func Test_ReportMultiAttachError(t *testing.T) {
|
||||||
type nodeWithPods struct {
|
type nodeWithPods struct {
|
||||||
name k8stypes.NodeName
|
name k8stypes.NodeName
|
||||||
@ -810,9 +994,11 @@ func Test_ReportMultiAttachError(t *testing.T) {
|
|||||||
volumePluginMgr,
|
volumePluginMgr,
|
||||||
fakeRecorder,
|
fakeRecorder,
|
||||||
fakeHandler))
|
fakeHandler))
|
||||||
|
informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc())
|
||||||
|
nodeLister := informerFactory.Core().V1().Nodes().Lister()
|
||||||
nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */)
|
nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */)
|
||||||
rc := NewReconciler(
|
rc := NewReconciler(
|
||||||
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, fakeRecorder)
|
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder)
|
||||||
|
|
||||||
nodes := []k8stypes.NodeName{}
|
nodes := []k8stypes.NodeName{}
|
||||||
for _, n := range test.nodes {
|
for _, n := range test.nodes {
|
||||||
|
@ -844,12 +844,20 @@ const (
|
|||||||
//
|
//
|
||||||
// Enable MinDomains in Pod Topology Spread.
|
// Enable MinDomains in Pod Topology Spread.
|
||||||
MinDomainsInPodTopologySpread featuregate.Feature = "MinDomainsInPodTopologySpread"
|
MinDomainsInPodTopologySpread featuregate.Feature = "MinDomainsInPodTopologySpread"
|
||||||
|
|
||||||
// owner: @aojea
|
// owner: @aojea
|
||||||
// kep: http://kep.k8s.io/3070
|
// kep: http://kep.k8s.io/3070
|
||||||
// alpha: v1.24
|
// alpha: v1.24
|
||||||
//
|
//
|
||||||
// Subdivide the ClusterIP range for dynamic and static IP allocation.
|
// Subdivide the ClusterIP range for dynamic and static IP allocation.
|
||||||
ServiceIPStaticSubrange featuregate.Feature = "ServiceIPStaticSubrange"
|
ServiceIPStaticSubrange featuregate.Feature = "ServiceIPStaticSubrange"
|
||||||
|
|
||||||
|
// owner: @xing-yang @sonasingh46
|
||||||
|
// kep: http://kep.k8s.io/2268
|
||||||
|
// alpha: v1.24
|
||||||
|
//
|
||||||
|
// Allow pods to failover to a different node in case of non graceful node shutdown
|
||||||
|
NodeOutOfServiceVolumeDetach featuregate.Feature = "NodeOutOfServiceVolumeDetach"
|
||||||
)
|
)
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
@ -973,7 +981,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
|
|||||||
LegacyServiceAccountTokenNoAutoGeneration: {Default: true, PreRelease: featuregate.Beta},
|
LegacyServiceAccountTokenNoAutoGeneration: {Default: true, PreRelease: featuregate.Beta},
|
||||||
MinDomainsInPodTopologySpread: {Default: false, PreRelease: featuregate.Alpha},
|
MinDomainsInPodTopologySpread: {Default: false, PreRelease: featuregate.Alpha},
|
||||||
ServiceIPStaticSubrange: {Default: false, PreRelease: featuregate.Alpha},
|
ServiceIPStaticSubrange: {Default: false, PreRelease: featuregate.Alpha},
|
||||||
|
NodeOutOfServiceVolumeDetach: {Default: false, PreRelease: featuregate.Alpha},
|
||||||
// inherited features from generic apiserver, relisted here to get a conflict if it is changed
|
// inherited features from generic apiserver, relisted here to get a conflict if it is changed
|
||||||
// unintentionally on either side:
|
// unintentionally on either side:
|
||||||
genericfeatures.AdvancedAuditing: {Default: true, PreRelease: featuregate.GA},
|
genericfeatures.AdvancedAuditing: {Default: true, PreRelease: featuregate.GA},
|
||||||
|
@ -164,3 +164,13 @@ func GetNodeIP(client clientset.Interface, name string) net.IP {
|
|||||||
}
|
}
|
||||||
return nodeIP
|
return nodeIP
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IsNodeReady returns true if a node is ready; false otherwise.
|
||||||
|
func IsNodeReady(node *v1.Node) bool {
|
||||||
|
for _, c := range node.Status.Conditions {
|
||||||
|
if c.Type == v1.NodeReady {
|
||||||
|
return c.Status == v1.ConditionTrue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
@ -275,6 +275,16 @@ func TaintExists(taints []v1.Taint, taintToFind *v1.Taint) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TaintKeyExists checks if the given taint key exists in list of taints. Returns true if exists false otherwise.
|
||||||
|
func TaintKeyExists(taints []v1.Taint, taintKeyToMatch string) bool {
|
||||||
|
for _, taint := range taints {
|
||||||
|
if taint.Key == taintKeyToMatch {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
func TaintSetDiff(t1, t2 []v1.Taint) (taintsToAdd []*v1.Taint, taintsToRemove []*v1.Taint) {
|
func TaintSetDiff(t1, t2 []v1.Taint) (taintsToAdd []*v1.Taint, taintsToRemove []*v1.Taint) {
|
||||||
for _, taint := range t1 {
|
for _, taint := range t1 {
|
||||||
if !TaintExists(t2, &taint) {
|
if !TaintExists(t2, &taint) {
|
||||||
|
@ -45,4 +45,8 @@ const (
|
|||||||
// TaintNodePIDPressure will be added when node has pid pressure
|
// TaintNodePIDPressure will be added when node has pid pressure
|
||||||
// and removed when node has enough pid.
|
// and removed when node has enough pid.
|
||||||
TaintNodePIDPressure = "node.kubernetes.io/pid-pressure"
|
TaintNodePIDPressure = "node.kubernetes.io/pid-pressure"
|
||||||
|
|
||||||
|
// TaintNodeOutOfService can be added when node is out of service in case of
|
||||||
|
// a non-graceful shutdown
|
||||||
|
TaintNodeOutOfService = "node.kubernetes.io/out-of-service"
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user