mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 19:56:01 +00:00
Simplify the integration test for node lifecycle manager
This commit is contained in:
parent
127f33f63d
commit
ca90621b2a
@ -23,7 +23,6 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
v1 "k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
|
||||||
"k8s.io/apimachinery/pkg/api/resource"
|
"k8s.io/apimachinery/pkg/api/resource"
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
"k8s.io/apimachinery/pkg/util/wait"
|
"k8s.io/apimachinery/pkg/util/wait"
|
||||||
@ -44,11 +43,6 @@ import (
|
|||||||
imageutils "k8s.io/kubernetes/test/utils/image"
|
imageutils "k8s.io/kubernetes/test/utils/image"
|
||||||
)
|
)
|
||||||
|
|
||||||
// poll is how often to poll pods, nodes and claims.
|
|
||||||
const poll = 2 * time.Second
|
|
||||||
|
|
||||||
type podCondition func(pod *v1.Pod) (bool, error)
|
|
||||||
|
|
||||||
// TestEvictionForNoExecuteTaintAddedByUser tests taint-based eviction for a node tainted NoExecute
|
// TestEvictionForNoExecuteTaintAddedByUser tests taint-based eviction for a node tainted NoExecute
|
||||||
func TestEvictionForNoExecuteTaintAddedByUser(t *testing.T) {
|
func TestEvictionForNoExecuteTaintAddedByUser(t *testing.T) {
|
||||||
tests := map[string]struct {
|
tests := map[string]struct {
|
||||||
@ -187,9 +181,9 @@ func TestEvictionForNoExecuteTaintAddedByUser(t *testing.T) {
|
|||||||
func TestTaintBasedEvictions(t *testing.T) {
|
func TestTaintBasedEvictions(t *testing.T) {
|
||||||
// we need at least 2 nodes to prevent lifecycle manager from entering "fully-disrupted" mode
|
// we need at least 2 nodes to prevent lifecycle manager from entering "fully-disrupted" mode
|
||||||
nodeCount := 3
|
nodeCount := 3
|
||||||
|
nodeIndex := 1 // the exact node doesn't matter, pick one
|
||||||
zero := int64(0)
|
zero := int64(0)
|
||||||
gracePeriod := int64(1)
|
gracePeriod := int64(1)
|
||||||
heartbeatInternal := time.Second * 2
|
|
||||||
testPod := &v1.Pod{
|
testPod := &v1.Pod{
|
||||||
ObjectMeta: metav1.ObjectMeta{Name: "testpod1", DeletionGracePeriodSeconds: &zero},
|
ObjectMeta: metav1.ObjectMeta{Name: "testpod1", DeletionGracePeriodSeconds: &zero},
|
||||||
Spec: v1.PodSpec{
|
Spec: v1.PodSpec{
|
||||||
@ -206,19 +200,20 @@ func TestTaintBasedEvictions(t *testing.T) {
|
|||||||
TerminationGracePeriodSeconds: &gracePeriod,
|
TerminationGracePeriodSeconds: &gracePeriod,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
tolerationSeconds := []int64{200, 300, 0}
|
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
nodeTaints []v1.Taint
|
nodeTaints []v1.Taint
|
||||||
nodeConditions []v1.NodeCondition
|
nodeConditions []v1.NodeCondition
|
||||||
pod *v1.Pod
|
pod *v1.Pod
|
||||||
|
tolerationSeconds int64
|
||||||
expectedWaitForPodCondition string
|
expectedWaitForPodCondition string
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
name: "Taint based evictions for NodeNotReady and 200 tolerationseconds",
|
name: "Taint based evictions for NodeNotReady and 200 tolerationseconds",
|
||||||
nodeTaints: []v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}},
|
nodeTaints: []v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}},
|
||||||
nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}},
|
nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}},
|
||||||
pod: testPod,
|
pod: testPod.DeepCopy(),
|
||||||
|
tolerationSeconds: 200,
|
||||||
expectedWaitForPodCondition: "updated with tolerationSeconds of 200",
|
expectedWaitForPodCondition: "updated with tolerationSeconds of 200",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -233,13 +228,15 @@ func TestTaintBasedEvictions(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
tolerationSeconds: 300,
|
||||||
expectedWaitForPodCondition: "updated with tolerationSeconds=300",
|
expectedWaitForPodCondition: "updated with tolerationSeconds=300",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "Taint based evictions for NodeNotReady and 0 tolerationseconds",
|
name: "Taint based evictions for NodeNotReady and 0 tolerationseconds",
|
||||||
nodeTaints: []v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}},
|
nodeTaints: []v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}},
|
||||||
nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}},
|
nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}},
|
||||||
pod: testPod,
|
pod: testPod.DeepCopy(),
|
||||||
|
tolerationSeconds: 0,
|
||||||
expectedWaitForPodCondition: "terminating",
|
expectedWaitForPodCondition: "terminating",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -255,7 +252,7 @@ func TestTaintBasedEvictions(t *testing.T) {
|
|||||||
podTolerations,
|
podTolerations,
|
||||||
defaulttolerationseconds.NewDefaultTolerationSeconds(),
|
defaulttolerationseconds.NewDefaultTolerationSeconds(),
|
||||||
)
|
)
|
||||||
for i, test := range tests {
|
for _, test := range tests {
|
||||||
t.Run(test.name, func(t *testing.T) {
|
t.Run(test.name, func(t *testing.T) {
|
||||||
testCtx := testutils.InitTestAPIServer(t, "taint-based-evictions", admission)
|
testCtx := testutils.InitTestAPIServer(t, "taint-based-evictions", admission)
|
||||||
|
|
||||||
@ -267,7 +264,6 @@ func TestTaintBasedEvictions(t *testing.T) {
|
|||||||
podTolerations.SetExternalKubeClientSet(externalClientset)
|
podTolerations.SetExternalKubeClientSet(externalClientset)
|
||||||
podTolerations.SetExternalKubeInformerFactory(externalInformers)
|
podTolerations.SetExternalKubeInformerFactory(externalInformers)
|
||||||
|
|
||||||
testCtx = testutils.InitTestScheduler(t, testCtx)
|
|
||||||
defer testutils.CleanupTest(t, testCtx)
|
defer testutils.CleanupTest(t, testCtx)
|
||||||
cs := testCtx.ClientSet
|
cs := testCtx.ClientSet
|
||||||
|
|
||||||
@ -279,7 +275,7 @@ func TestTaintBasedEvictions(t *testing.T) {
|
|||||||
externalInformers.Core().V1().Nodes(),
|
externalInformers.Core().V1().Nodes(),
|
||||||
externalInformers.Apps().V1().DaemonSets(),
|
externalInformers.Apps().V1().DaemonSets(),
|
||||||
cs,
|
cs,
|
||||||
5*time.Second, // Node monitor grace period
|
1*time.Second, // Node monitor grace period
|
||||||
time.Minute, // Node startup grace period
|
time.Minute, // Node startup grace period
|
||||||
time.Millisecond, // Node monitor period
|
time.Millisecond, // Node monitor period
|
||||||
time.Second, // Pod eviction timeout
|
time.Second, // Pod eviction timeout
|
||||||
@ -290,18 +286,15 @@ func TestTaintBasedEvictions(t *testing.T) {
|
|||||||
true, // Run taint manager
|
true, // Run taint manager
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("Failed to create node controller: %v", err)
|
t.Fatalf("Failed to create node controller: %v", err)
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Waiting for all controllers to sync
|
// Waiting for all controllers to sync
|
||||||
externalInformers.Start(testCtx.Ctx.Done())
|
externalInformers.Start(testCtx.Ctx.Done())
|
||||||
externalInformers.WaitForCacheSync(testCtx.Ctx.Done())
|
externalInformers.WaitForCacheSync(testCtx.Ctx.Done())
|
||||||
testutils.SyncInformerFactory(testCtx)
|
|
||||||
|
|
||||||
// Run all controllers
|
// Run the controller
|
||||||
go nc.Run(testCtx.Ctx)
|
go nc.Run(testCtx.Ctx)
|
||||||
go testCtx.Scheduler.Run(testCtx.Ctx)
|
|
||||||
|
|
||||||
nodeRes := v1.ResourceList{
|
nodeRes := v1.ResourceList{
|
||||||
v1.ResourceCPU: resource.MustParse("4000m"),
|
v1.ResourceCPU: resource.MustParse("4000m"),
|
||||||
@ -311,120 +304,68 @@ func TestTaintBasedEvictions(t *testing.T) {
|
|||||||
|
|
||||||
var nodes []*v1.Node
|
var nodes []*v1.Node
|
||||||
for i := 0; i < nodeCount; i++ {
|
for i := 0; i < nodeCount; i++ {
|
||||||
nodes = append(nodes, &v1.Node{
|
node := &v1.Node{
|
||||||
ObjectMeta: metav1.ObjectMeta{
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
Name: fmt.Sprintf("node-%d", i),
|
Name: fmt.Sprintf("node-%d", i),
|
||||||
Labels: map[string]string{v1.LabelTopologyRegion: "region1", v1.LabelTopologyZone: "zone1"},
|
Labels: map[string]string{
|
||||||
|
v1.LabelTopologyRegion: "region1",
|
||||||
|
v1.LabelTopologyZone: "zone1",
|
||||||
|
"node.kubernetes.io/exclude-disruption": "true",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
Spec: v1.NodeSpec{},
|
Spec: v1.NodeSpec{},
|
||||||
Status: v1.NodeStatus{
|
Status: v1.NodeStatus{
|
||||||
Capacity: nodeRes,
|
Capacity: nodeRes,
|
||||||
Allocatable: nodeRes,
|
Allocatable: nodeRes,
|
||||||
Conditions: []v1.NodeCondition{
|
|
||||||
{
|
|
||||||
Type: v1.NodeReady,
|
|
||||||
Status: v1.ConditionTrue,
|
|
||||||
LastHeartbeatTime: metav1.Now(),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
})
|
}
|
||||||
if _, err := cs.CoreV1().Nodes().Create(context.TODO(), nodes[i], metav1.CreateOptions{}); err != nil {
|
if i == nodeIndex {
|
||||||
t.Errorf("Failed to create node, err: %v", err)
|
node.Status.Conditions = append(node.Status.Conditions, test.nodeConditions...)
|
||||||
|
} else {
|
||||||
|
node.Status.Conditions = append(node.Status.Conditions, v1.NodeCondition{
|
||||||
|
Type: v1.NodeReady,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
nodes = append(nodes, node)
|
||||||
|
if _, err := cs.CoreV1().Nodes().Create(context.TODO(), node, metav1.CreateOptions{}); err != nil {
|
||||||
|
t.Fatalf("Failed to create node: %q, err: %v", klog.KObj(node), err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
neededNode := nodes[1]
|
|
||||||
if test.pod != nil {
|
if test.pod != nil {
|
||||||
test.pod.Name = fmt.Sprintf("testpod-%d", i)
|
test.pod.Spec.NodeName = nodes[nodeIndex].Name
|
||||||
|
test.pod.Name = "testpod"
|
||||||
if len(test.pod.Spec.Tolerations) > 0 {
|
if len(test.pod.Spec.Tolerations) > 0 {
|
||||||
test.pod.Spec.Tolerations[0].TolerationSeconds = &tolerationSeconds[i]
|
test.pod.Spec.Tolerations[0].TolerationSeconds = &test.tolerationSeconds
|
||||||
}
|
}
|
||||||
|
|
||||||
test.pod, err = cs.CoreV1().Pods(testCtx.NS.Name).Create(context.TODO(), test.pod, metav1.CreateOptions{})
|
test.pod, err = cs.CoreV1().Pods(testCtx.NS.Name).Create(context.TODO(), test.pod, metav1.CreateOptions{})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Test Failed: error: %v, while creating pod", err)
|
t.Fatalf("Test Failed: error: %q, while creating pod %q", err, klog.KObj(test.pod))
|
||||||
}
|
|
||||||
|
|
||||||
if err := testutils.WaitForPodToSchedule(cs, test.pod); err != nil {
|
|
||||||
t.Errorf("Failed to schedule pod %s/%s on the node, err: %v",
|
|
||||||
test.pod.Namespace, test.pod.Name, err)
|
|
||||||
}
|
|
||||||
test.pod, err = cs.CoreV1().Pods(testCtx.NS.Name).Get(context.TODO(), test.pod.Name, metav1.GetOptions{})
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("Test Failed: error: %v, while creating pod", err)
|
|
||||||
}
|
|
||||||
neededNode, err = cs.CoreV1().Nodes().Get(context.TODO(), test.pod.Spec.NodeName, metav1.GetOptions{})
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("Error while getting node associated with pod %v with err %v", test.pod.Name, err)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Regularly send heartbeat event to APIServer so that the cluster doesn't enter fullyDisruption mode.
|
if err := testutils.WaitForNodeTaints(cs, nodes[nodeIndex], test.nodeTaints); err != nil {
|
||||||
// TODO(Huang-Wei): use "NodeDisruptionExclusion" feature to simply the below logic when it's beta.
|
t.Errorf("Failed to taint node %q, err: %v", klog.KObj(nodes[nodeIndex]), err)
|
||||||
for i := 0; i < nodeCount; i++ {
|
|
||||||
var conditions []v1.NodeCondition
|
|
||||||
// If current node is not <neededNode>
|
|
||||||
if neededNode.Name != nodes[i].Name {
|
|
||||||
conditions = []v1.NodeCondition{
|
|
||||||
{
|
|
||||||
Type: v1.NodeReady,
|
|
||||||
Status: v1.ConditionTrue,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
c, err := testutils.NodeReadyStatus(test.nodeConditions)
|
|
||||||
if err != nil {
|
|
||||||
t.Error(err)
|
|
||||||
}
|
|
||||||
// Need to distinguish NodeReady/False and NodeReady/Unknown.
|
|
||||||
// If we try to update the node with condition NotReady/False, i.e. expect a NotReady:NoExecute taint
|
|
||||||
// we need to keep sending the update event to keep it alive, rather than just sending once.
|
|
||||||
if c == v1.ConditionFalse {
|
|
||||||
conditions = test.nodeConditions
|
|
||||||
} else if c == v1.ConditionUnknown {
|
|
||||||
// If it's expected to update the node with condition NotReady/Unknown,
|
|
||||||
// i.e. expect a Unreachable:NoExecute taint,
|
|
||||||
// we need to only send the update event once to simulate the network unreachable scenario.
|
|
||||||
nodeCopy := testutils.NodeCopyWithConditions(nodes[i], test.nodeConditions)
|
|
||||||
if err := testutils.UpdateNodeStatus(cs, nodeCopy); err != nil && !apierrors.IsNotFound(err) {
|
|
||||||
t.Errorf("Cannot update node: %v", err)
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Keeping sending NodeReady/True or NodeReady/False events.
|
|
||||||
go func(i int) {
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case <-testCtx.Ctx.Done():
|
|
||||||
return
|
|
||||||
case <-time.Tick(heartbeatInternal):
|
|
||||||
nodeCopy := testutils.NodeCopyWithConditions(nodes[i], conditions)
|
|
||||||
if err := testutils.UpdateNodeStatus(cs, nodeCopy); err != nil && !apierrors.IsNotFound(err) {
|
|
||||||
t.Errorf("Cannot update node: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}(i)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := testutils.WaitForNodeTaints(cs, neededNode, test.nodeTaints); err != nil {
|
|
||||||
t.Errorf("Failed to taint node in test %d <%s>, err: %v", i, neededNode.Name, err)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if test.pod != nil {
|
if test.pod != nil {
|
||||||
err = waitForPodCondition(cs, testCtx.NS.Name, test.pod.Name, test.expectedWaitForPodCondition, time.Second*15, func(pod *v1.Pod) (bool, error) {
|
err = wait.PollImmediate(time.Second, time.Second*15, func() (bool, error) {
|
||||||
|
pod, err := cs.CoreV1().Pods(test.pod.Namespace).Get(context.TODO(), test.pod.Name, metav1.GetOptions{})
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
// as node is unreachable, pod0 is expected to be in Terminating status
|
// as node is unreachable, pod0 is expected to be in Terminating status
|
||||||
// rather than getting deleted
|
// rather than getting deleted
|
||||||
if tolerationSeconds[i] == 0 {
|
if test.tolerationSeconds == 0 {
|
||||||
return pod.DeletionTimestamp != nil, nil
|
return pod.DeletionTimestamp != nil, nil
|
||||||
}
|
}
|
||||||
if seconds, err := testutils.GetTolerationSeconds(pod.Spec.Tolerations); err == nil {
|
if seconds, err := testutils.GetTolerationSeconds(pod.Spec.Tolerations); err == nil {
|
||||||
return seconds == tolerationSeconds[i], nil
|
return seconds == test.tolerationSeconds, nil
|
||||||
}
|
}
|
||||||
return false, nil
|
return false, nil
|
||||||
}, t)
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
pod, _ := cs.CoreV1().Pods(testCtx.NS.Name).Get(context.TODO(), test.pod.Name, metav1.GetOptions{})
|
pod, _ := cs.CoreV1().Pods(testCtx.NS.Name).Get(context.TODO(), test.pod.Name, metav1.GetOptions{})
|
||||||
t.Fatalf("Error: %v, Expected test pod to be %s but it's %v", err, test.expectedWaitForPodCondition, pod)
|
t.Fatalf("Error: %v, Expected test pod to be %s but it's %v", err, test.expectedWaitForPodCondition, pod)
|
||||||
@ -432,33 +373,6 @@ func TestTaintBasedEvictions(t *testing.T) {
|
|||||||
testutils.CleanupPods(cs, t, []*v1.Pod{test.pod})
|
testutils.CleanupPods(cs, t, []*v1.Pod{test.pod})
|
||||||
}
|
}
|
||||||
testutils.CleanupNodes(cs, t)
|
testutils.CleanupNodes(cs, t)
|
||||||
testutils.WaitForSchedulerCacheCleanup(testCtx.Scheduler, t)
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// waitForPodCondition waits a pods to be matched to the given condition.
|
|
||||||
func waitForPodCondition(c clientset.Interface, ns, podName, desc string, timeout time.Duration, condition podCondition, t *testing.T) error {
|
|
||||||
t.Logf("Waiting up to %v for pod %q in namespace %q to be %q", timeout, podName, ns, desc)
|
|
||||||
for start := time.Now(); time.Since(start) < timeout; time.Sleep(poll) {
|
|
||||||
pod, err := c.CoreV1().Pods(ns).Get(context.TODO(), podName, metav1.GetOptions{})
|
|
||||||
if err != nil {
|
|
||||||
if apierrors.IsNotFound(err) {
|
|
||||||
t.Logf("Pod %q in namespace %q not found. Error: %v", podName, ns, err)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
t.Logf("Get pod %q in namespace %q failed, ignoring for %v. Error: %v", podName, ns, poll, err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
// log now so that current pod info is reported before calling `condition()`
|
|
||||||
t.Logf("Pod %q: Phase=%q, Reason=%q, readiness=%t. Elapsed: %v",
|
|
||||||
podName, pod.Status.Phase, pod.Status.Reason, podutil.IsPodReady(pod), time.Since(start))
|
|
||||||
if done, err := condition(pod); done {
|
|
||||||
if err == nil {
|
|
||||||
t.Logf("Pod %q satisfied condition %q", podName, desc)
|
|
||||||
}
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return fmt.Errorf("gave up after waiting %v for pod %q to be %q", timeout, podName, desc)
|
|
||||||
}
|
|
||||||
|
Loading…
Reference in New Issue
Block a user