mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-24 04:06:03 +00:00
Merge pull request #29863 from ronnielai/system-test1
Automatic merge from submit-queue Added an node e2e test for pod evictions due to disk pressure. #29800
This commit is contained in:
commit
ea00445069
@ -71,6 +71,8 @@ type TestContextType struct {
|
||||
NodeName string
|
||||
// Whether to enable the QoS Cgroup Hierarchy or not
|
||||
CgroupsPerQOS bool
|
||||
// The hard eviction thresholds
|
||||
EvictionHard string
|
||||
}
|
||||
|
||||
type CloudConfig struct {
|
||||
@ -150,4 +152,5 @@ func RegisterClusterFlags() {
|
||||
func RegisterNodeFlags() {
|
||||
flag.StringVar(&TestContext.NodeName, "node-name", "", "Name of the node to run tests on (node e2e suite only).")
|
||||
flag.BoolVar(&TestContext.CgroupsPerQOS, "cgroups-per-qos", false, "Enable creation of QoS cgroup hierarchy, if true top level QoS and pod cgroups are created.")
|
||||
flag.StringVar(&TestContext.EvictionHard, "eviction-hard", "", "The hard eviction thresholds. If set, pods get evicted when the specified resources drop below the thresholds.")
|
||||
}
|
||||
|
190
test/e2e_node/disk_eviction_test.go
Normal file
190
test/e2e_node/disk_eviction_test.go
Normal file
@ -0,0 +1,190 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package e2e_node
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
"k8s.io/kubernetes/pkg/kubelet/dockertools"
|
||||
"k8s.io/kubernetes/pkg/util/uuid"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
|
||||
. "github.com/onsi/ginkgo"
|
||||
. "github.com/onsi/gomega"
|
||||
client "k8s.io/kubernetes/pkg/client/unversioned"
|
||||
)
|
||||
|
||||
const (
|
||||
// podCheckInterval is the interval seconds between pod status checks.
|
||||
podCheckInterval = time.Second * 2
|
||||
|
||||
dummyFile = "dummy."
|
||||
)
|
||||
|
||||
// TODO: Leverage dynamic Kubelet settings when it's implemented to only modify the kubelet eviction option in this test.
|
||||
// To manually trigger the test on a node with disk space just over 15Gi :
|
||||
// make test-e2e-node FOCUS="hard eviction test" TEST_ARGS="--eviction-hard=nodefs.available<15Gi"
|
||||
var _ = framework.KubeDescribe("Kubelet Eviction Manager [FLAKY] [Serial] [Disruptive]", func() {
|
||||
f := framework.NewDefaultFramework("kubelet-eviction-manager")
|
||||
var podClient *framework.PodClient
|
||||
var c *client.Client
|
||||
var n *api.Node
|
||||
|
||||
BeforeEach(func() {
|
||||
podClient = f.PodClient()
|
||||
c = f.Client
|
||||
nodeList := framework.GetReadySchedulableNodesOrDie(c)
|
||||
n = &nodeList.Items[0]
|
||||
})
|
||||
|
||||
Describe("hard eviction test", func() {
|
||||
Context("pod using the most disk space gets evicted when the node disk usage is above the eviction hard threshold", func() {
|
||||
var busyPodName, idlePodName string
|
||||
var containersToCleanUp map[string]bool
|
||||
|
||||
AfterEach(func() {
|
||||
podClient.Delete(busyPodName, &api.DeleteOptions{})
|
||||
podClient.Delete(idlePodName, &api.DeleteOptions{})
|
||||
for container := range containersToCleanUp {
|
||||
// TODO: to be container implementation agnostic
|
||||
cmd := exec.Command("docker", "rm", "-f", strings.Trim(container, dockertools.DockerPrefix))
|
||||
cmd.Run()
|
||||
}
|
||||
})
|
||||
|
||||
BeforeEach(func() {
|
||||
if !evictionOptionIsSet() {
|
||||
return
|
||||
}
|
||||
|
||||
busyPodName = "to-evict" + string(uuid.NewUUID())
|
||||
idlePodName = "idle" + string(uuid.NewUUID())
|
||||
containersToCleanUp = make(map[string]bool)
|
||||
podClient.Create(&api.Pod{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: idlePodName,
|
||||
},
|
||||
Spec: api.PodSpec{
|
||||
RestartPolicy: api.RestartPolicyNever,
|
||||
Containers: []api.Container{
|
||||
{
|
||||
Image: ImageRegistry[pauseImage],
|
||||
Name: idlePodName,
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
podClient.Create(&api.Pod{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: busyPodName,
|
||||
},
|
||||
Spec: api.PodSpec{
|
||||
RestartPolicy: api.RestartPolicyNever,
|
||||
Containers: []api.Container{
|
||||
{
|
||||
Image: ImageRegistry[busyBoxImage],
|
||||
Name: busyPodName,
|
||||
// Filling the disk
|
||||
Command: []string{"sh", "-c",
|
||||
fmt.Sprintf("for NUM in `seq 1 1 1000`; do dd if=/dev/urandom of=%s.$NUM bs=4000000 count=10; sleep 3; done",
|
||||
dummyFile)},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
It("should evict the pod using the most disk space", func() {
|
||||
if !evictionOptionIsSet() {
|
||||
framework.Logf("test skipped because eviction option is not set")
|
||||
return
|
||||
}
|
||||
|
||||
evictionOccurred := false
|
||||
Eventually(func() error {
|
||||
if !evictionOccurred {
|
||||
podData, err := podClient.Get(busyPodName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
recordContainerId(containersToCleanUp, podData.Status.ContainerStatuses)
|
||||
|
||||
err = verifyPodEviction(podData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !nodeHasDiskPressure(f.Client) {
|
||||
return fmt.Errorf("expected disk pressure condition is not set")
|
||||
}
|
||||
|
||||
podData, err = podClient.Get(idlePodName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
recordContainerId(containersToCleanUp, podData.Status.ContainerStatuses)
|
||||
|
||||
if podData.Status.Phase != api.PodRunning {
|
||||
return fmt.Errorf("expected phase to be running. got %+v", podData.Status.Phase)
|
||||
}
|
||||
|
||||
evictionOccurred = true
|
||||
}
|
||||
|
||||
// After eviction happens the pod is evicted so eventually the node disk pressure should be gone.
|
||||
if nodeHasDiskPressure(f.Client) {
|
||||
return fmt.Errorf("expected disk pressure condition relief has not happened")
|
||||
}
|
||||
return nil
|
||||
}, time.Minute*5, podCheckInterval).Should(BeNil())
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
func verifyPodEviction(podData *api.Pod) error {
|
||||
if podData.Status.Phase != api.PodFailed {
|
||||
return fmt.Errorf("expected phase to be failed. got %+v", podData.Status.Phase)
|
||||
}
|
||||
if podData.Status.Reason != "Evicted" {
|
||||
return fmt.Errorf("expected failed reason to be evicted. got %+v", podData.Status.Reason)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func nodeHasDiskPressure(c *client.Client) bool {
|
||||
nodeList := framework.GetReadySchedulableNodesOrDie(c)
|
||||
for _, condition := range nodeList.Items[0].Status.Conditions {
|
||||
if condition.Type == api.NodeDiskPressure {
|
||||
return condition.Status == api.ConditionTrue
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func recordContainerId(containersToCleanUp map[string]bool, containerStatuses []api.ContainerStatus) {
|
||||
for _, status := range containerStatuses {
|
||||
containersToCleanUp[status.ContainerID] = true
|
||||
}
|
||||
}
|
||||
|
||||
func evictionOptionIsSet() bool {
|
||||
return len(framework.TestContext.EvictionHard) > 0
|
||||
}
|
@ -125,7 +125,7 @@ var _ = SynchronizedBeforeSuite(func() []byte {
|
||||
|
||||
shared := &SharedContext{}
|
||||
if *startServices {
|
||||
e2es = newE2eService(framework.TestContext.NodeName, framework.TestContext.CgroupsPerQOS, shared)
|
||||
e2es = newE2eService(framework.TestContext.NodeName, framework.TestContext.CgroupsPerQOS, framework.TestContext.EvictionHard, shared)
|
||||
if err := e2es.start(); err != nil {
|
||||
Fail(fmt.Sprintf("Unable to start node services.\n%v", err))
|
||||
}
|
||||
|
@ -47,6 +47,7 @@ type e2eService struct {
|
||||
nodeName string
|
||||
logFiles map[string]logFileData
|
||||
cgroupsPerQOS bool
|
||||
evictionHard string
|
||||
}
|
||||
|
||||
type logFileData struct {
|
||||
@ -61,7 +62,7 @@ const (
|
||||
defaultEtcdPath = "/tmp/etcd"
|
||||
)
|
||||
|
||||
func newE2eService(nodeName string, cgroupsPerQOS bool, context *SharedContext) *e2eService {
|
||||
func newE2eService(nodeName string, cgroupsPerQOS bool, evictionHard string, context *SharedContext) *e2eService {
|
||||
// Special log files that need to be collected for additional debugging.
|
||||
var logFiles = map[string]logFileData{
|
||||
"kern.log": {[]string{"/var/log/kern.log"}, []string{"-k"}},
|
||||
@ -73,6 +74,7 @@ func newE2eService(nodeName string, cgroupsPerQOS bool, context *SharedContext)
|
||||
nodeName: nodeName,
|
||||
logFiles: logFiles,
|
||||
cgroupsPerQOS: cgroupsPerQOS,
|
||||
evictionHard: evictionHard,
|
||||
}
|
||||
}
|
||||
|
||||
@ -263,6 +265,8 @@ func (es *e2eService) startKubeletServer() (*killCmd, error) {
|
||||
"--file-check-frequency", "10s", // Check file frequently so tests won't wait too long
|
||||
"--v", LOG_VERBOSITY_LEVEL, "--logtostderr",
|
||||
"--pod-cidr=10.180.0.0/24", // Assign a fixed CIDR to the node because there is no node controller.
|
||||
"--eviction-hard", es.evictionHard,
|
||||
"--eviction-pressure-transition-period", "30s",
|
||||
)
|
||||
if es.cgroupsPerQOS {
|
||||
cmdArgs = append(cmdArgs,
|
||||
|
Loading…
Reference in New Issue
Block a user