e2e framework: move NodeKiller into framework/node

This removes some more dependencies from the framework.

Removing the configuration needs to be handled as part of refactoring the
TestContext.
This commit is contained in:
Patrick Ohly 2022-08-25 18:51:30 +02:00
parent 70d0824f01
commit 8af3258526
3 changed files with 97 additions and 77 deletions

View File

@ -16,21 +16,6 @@ limitations under the License.
package framework package framework
import (
"sync"
"time"
"github.com/onsi/ginkgo/v2"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/wait"
clientset "k8s.io/client-go/kubernetes"
// TODO: Remove the following imports (ref: https://github.com/kubernetes/kubernetes/issues/81245)
e2enode "k8s.io/kubernetes/test/e2e/framework/node"
e2essh "k8s.io/kubernetes/test/e2e/framework/ssh"
)
// AppendContainerCommandGroupIfNeeded returns container command group parameter if necessary. // AppendContainerCommandGroupIfNeeded returns container command group parameter if necessary.
func AppendContainerCommandGroupIfNeeded(args []string) []string { func AppendContainerCommandGroupIfNeeded(args []string) []string {
if TestContext.CloudConfig.Region != "" { if TestContext.CloudConfig.Region != "" {
@ -39,65 +24,3 @@ func AppendContainerCommandGroupIfNeeded(args []string) []string {
} }
return args return args
} }
// NodeKiller is a utility to simulate node failures.
type NodeKiller struct {
config NodeKillerConfig
client clientset.Interface
provider string
}
// NewNodeKiller creates new NodeKiller.
func NewNodeKiller(config NodeKillerConfig, client clientset.Interface, provider string) *NodeKiller {
config.NodeKillerStopCh = make(chan struct{})
return &NodeKiller{config, client, provider}
}
// Run starts NodeKiller until stopCh is closed.
func (k *NodeKiller) Run(stopCh <-chan struct{}) {
// wait.JitterUntil starts work immediately, so wait first.
time.Sleep(wait.Jitter(k.config.Interval, k.config.JitterFactor))
wait.JitterUntil(func() {
nodes := k.pickNodes()
k.kill(nodes)
}, k.config.Interval, k.config.JitterFactor, true, stopCh)
}
func (k *NodeKiller) pickNodes() []v1.Node {
nodes, err := e2enode.GetReadySchedulableNodes(k.client)
ExpectNoError(err)
numNodes := int(k.config.FailureRatio * float64(len(nodes.Items)))
nodes, err = e2enode.GetBoundedReadySchedulableNodes(k.client, numNodes)
ExpectNoError(err)
return nodes.Items
}
func (k *NodeKiller) kill(nodes []v1.Node) {
wg := sync.WaitGroup{}
wg.Add(len(nodes))
for _, node := range nodes {
node := node
go func() {
defer ginkgo.GinkgoRecover()
defer wg.Done()
Logf("Stopping docker and kubelet on %q to simulate failure", node.Name)
err := e2essh.IssueSSHCommand("sudo systemctl stop docker kubelet", k.provider, &node)
if err != nil {
Logf("ERROR while stopping node %q: %v", node.Name, err)
return
}
time.Sleep(k.config.SimulatedDowntime)
Logf("Rebooting %q to repair the node", node.Name)
err = e2essh.IssueSSHCommand("sudo reboot", k.provider, &node)
if err != nil {
Logf("ERROR while rebooting node %q: %v", node.Name, err)
return
}
}()
}
wg.Wait()
}

View File

@ -197,6 +197,9 @@ type TestContextType struct {
// NodeKillerConfig describes configuration of NodeKiller -- a utility to // NodeKillerConfig describes configuration of NodeKiller -- a utility to
// simulate node failures. // simulate node failures.
//
// TODO: move this and the corresponding command line flags into
// test/e2e/framework/node.
type NodeKillerConfig struct { type NodeKillerConfig struct {
// Enabled determines whether NodeKill should do anything at all. // Enabled determines whether NodeKill should do anything at all.
// All other options below are ignored if Enabled = false. // All other options below are ignored if Enabled = false.

View File

@ -0,0 +1,94 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package node
import (
"sync"
"time"
"github.com/onsi/ginkgo/v2"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/wait"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/test/e2e/framework"
e2enode "k8s.io/kubernetes/test/e2e/framework/node"
e2essh "k8s.io/kubernetes/test/e2e/framework/ssh"
)
// NodeKiller is a utility to simulate node failures.
type NodeKiller struct {
config framework.NodeKillerConfig
client clientset.Interface
provider string
}
// NewNodeKiller creates new NodeKiller.
func NewNodeKiller(config framework.NodeKillerConfig, client clientset.Interface, provider string) *NodeKiller {
config.NodeKillerStopCh = make(chan struct{})
return &NodeKiller{config, client, provider}
}
// Run starts NodeKiller until stopCh is closed.
func (k *NodeKiller) Run(stopCh <-chan struct{}) {
// wait.JitterUntil starts work immediately, so wait first.
time.Sleep(wait.Jitter(k.config.Interval, k.config.JitterFactor))
wait.JitterUntil(func() {
nodes := k.pickNodes()
k.kill(nodes)
}, k.config.Interval, k.config.JitterFactor, true, stopCh)
}
func (k *NodeKiller) pickNodes() []v1.Node {
nodes, err := e2enode.GetReadySchedulableNodes(k.client)
framework.ExpectNoError(err)
numNodes := int(k.config.FailureRatio * float64(len(nodes.Items)))
nodes, err = e2enode.GetBoundedReadySchedulableNodes(k.client, numNodes)
framework.ExpectNoError(err)
return nodes.Items
}
func (k *NodeKiller) kill(nodes []v1.Node) {
wg := sync.WaitGroup{}
wg.Add(len(nodes))
for _, node := range nodes {
node := node
go func() {
defer ginkgo.GinkgoRecover()
defer wg.Done()
framework.Logf("Stopping docker and kubelet on %q to simulate failure", node.Name)
err := e2essh.IssueSSHCommand("sudo systemctl stop docker kubelet", k.provider, &node)
if err != nil {
framework.Logf("ERROR while stopping node %q: %v", node.Name, err)
return
}
time.Sleep(k.config.SimulatedDowntime)
framework.Logf("Rebooting %q to repair the node", node.Name)
err = e2essh.IssueSSHCommand("sudo reboot", k.provider, &node)
if err != nil {
framework.Logf("ERROR while rebooting node %q: %v", node.Name, err)
return
}
}()
}
wg.Wait()
}