diff --git a/test/e2e/framework/nodes_util.go b/test/e2e/framework/nodes_util.go index f3231e7ed2c..35a02d2e3d0 100644 --- a/test/e2e/framework/nodes_util.go +++ b/test/e2e/framework/nodes_util.go @@ -16,21 +16,6 @@ limitations under the License. package framework -import ( - "sync" - "time" - - "github.com/onsi/ginkgo/v2" - - v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/util/wait" - clientset "k8s.io/client-go/kubernetes" - - // TODO: Remove the following imports (ref: https://github.com/kubernetes/kubernetes/issues/81245) - e2enode "k8s.io/kubernetes/test/e2e/framework/node" - e2essh "k8s.io/kubernetes/test/e2e/framework/ssh" -) - // AppendContainerCommandGroupIfNeeded returns container command group parameter if necessary. func AppendContainerCommandGroupIfNeeded(args []string) []string { if TestContext.CloudConfig.Region != "" { @@ -39,65 +24,3 @@ func AppendContainerCommandGroupIfNeeded(args []string) []string { } return args } - -// NodeKiller is a utility to simulate node failures. -type NodeKiller struct { - config NodeKillerConfig - client clientset.Interface - provider string -} - -// NewNodeKiller creates new NodeKiller. -func NewNodeKiller(config NodeKillerConfig, client clientset.Interface, provider string) *NodeKiller { - config.NodeKillerStopCh = make(chan struct{}) - return &NodeKiller{config, client, provider} -} - -// Run starts NodeKiller until stopCh is closed. -func (k *NodeKiller) Run(stopCh <-chan struct{}) { - // wait.JitterUntil starts work immediately, so wait first. - time.Sleep(wait.Jitter(k.config.Interval, k.config.JitterFactor)) - wait.JitterUntil(func() { - nodes := k.pickNodes() - k.kill(nodes) - }, k.config.Interval, k.config.JitterFactor, true, stopCh) -} - -func (k *NodeKiller) pickNodes() []v1.Node { - nodes, err := e2enode.GetReadySchedulableNodes(k.client) - ExpectNoError(err) - numNodes := int(k.config.FailureRatio * float64(len(nodes.Items))) - - nodes, err = e2enode.GetBoundedReadySchedulableNodes(k.client, numNodes) - ExpectNoError(err) - return nodes.Items -} - -func (k *NodeKiller) kill(nodes []v1.Node) { - wg := sync.WaitGroup{} - wg.Add(len(nodes)) - for _, node := range nodes { - node := node - go func() { - defer ginkgo.GinkgoRecover() - defer wg.Done() - - Logf("Stopping docker and kubelet on %q to simulate failure", node.Name) - err := e2essh.IssueSSHCommand("sudo systemctl stop docker kubelet", k.provider, &node) - if err != nil { - Logf("ERROR while stopping node %q: %v", node.Name, err) - return - } - - time.Sleep(k.config.SimulatedDowntime) - - Logf("Rebooting %q to repair the node", node.Name) - err = e2essh.IssueSSHCommand("sudo reboot", k.provider, &node) - if err != nil { - Logf("ERROR while rebooting node %q: %v", node.Name, err) - return - } - }() - } - wg.Wait() -} diff --git a/test/e2e/framework/test_context.go b/test/e2e/framework/test_context.go index 97413dd01b8..afb7654ec2b 100644 --- a/test/e2e/framework/test_context.go +++ b/test/e2e/framework/test_context.go @@ -197,6 +197,9 @@ type TestContextType struct { // NodeKillerConfig describes configuration of NodeKiller -- a utility to // simulate node failures. +// +// TODO: move this and the corresponding command line flags into +// test/e2e/framework/node. type NodeKillerConfig struct { // Enabled determines whether NodeKill should do anything at all. // All other options below are ignored if Enabled = false. diff --git a/test/e2e/framework/todo/node/node_killer.go b/test/e2e/framework/todo/node/node_killer.go new file mode 100644 index 00000000000..9e51b89e755 --- /dev/null +++ b/test/e2e/framework/todo/node/node_killer.go @@ -0,0 +1,94 @@ +/* +Copyright 2014 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package node + +import ( + "sync" + "time" + + "github.com/onsi/ginkgo/v2" + + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/wait" + clientset "k8s.io/client-go/kubernetes" + + "k8s.io/kubernetes/test/e2e/framework" + e2enode "k8s.io/kubernetes/test/e2e/framework/node" + e2essh "k8s.io/kubernetes/test/e2e/framework/ssh" +) + +// NodeKiller is a utility to simulate node failures. +type NodeKiller struct { + config framework.NodeKillerConfig + client clientset.Interface + provider string +} + +// NewNodeKiller creates new NodeKiller. +func NewNodeKiller(config framework.NodeKillerConfig, client clientset.Interface, provider string) *NodeKiller { + config.NodeKillerStopCh = make(chan struct{}) + return &NodeKiller{config, client, provider} +} + +// Run starts NodeKiller until stopCh is closed. +func (k *NodeKiller) Run(stopCh <-chan struct{}) { + // wait.JitterUntil starts work immediately, so wait first. + time.Sleep(wait.Jitter(k.config.Interval, k.config.JitterFactor)) + wait.JitterUntil(func() { + nodes := k.pickNodes() + k.kill(nodes) + }, k.config.Interval, k.config.JitterFactor, true, stopCh) +} + +func (k *NodeKiller) pickNodes() []v1.Node { + nodes, err := e2enode.GetReadySchedulableNodes(k.client) + framework.ExpectNoError(err) + numNodes := int(k.config.FailureRatio * float64(len(nodes.Items))) + + nodes, err = e2enode.GetBoundedReadySchedulableNodes(k.client, numNodes) + framework.ExpectNoError(err) + return nodes.Items +} + +func (k *NodeKiller) kill(nodes []v1.Node) { + wg := sync.WaitGroup{} + wg.Add(len(nodes)) + for _, node := range nodes { + node := node + go func() { + defer ginkgo.GinkgoRecover() + defer wg.Done() + + framework.Logf("Stopping docker and kubelet on %q to simulate failure", node.Name) + err := e2essh.IssueSSHCommand("sudo systemctl stop docker kubelet", k.provider, &node) + if err != nil { + framework.Logf("ERROR while stopping node %q: %v", node.Name, err) + return + } + + time.Sleep(k.config.SimulatedDowntime) + + framework.Logf("Rebooting %q to repair the node", node.Name) + err = e2essh.IssueSSHCommand("sudo reboot", k.provider, &node) + if err != nil { + framework.Logf("ERROR while rebooting node %q: %v", node.Name, err) + return + } + }() + } + wg.Wait() +}