mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-09-12 20:57:20 +00:00
Merge pull request #30941 from Random-Liu/remove-fatal-in-e2e-suite
Automatic merge from submit-queue Node E2E: Remove fatal error in e2e_node_suite_test.go Addresses https://github.com/kubernetes/kubernetes/issues/30779#issuecomment-240532190. Currently we run node e2e test in parallel, and ginkgo makes sure that we only initialize test framework in the first test node. However, because we throw out some fatal error during the initialization. Once there is an fatal error, the first test node will die immediately without reporting any error, and the other nodes will exit because the first node is gone with meaningless error. If kubelet start fails, we'll get something like: ``` ------------------------------ Failure [132.485 seconds] [BeforeSuite] BeforeSuite /usr/local/google/home/lantaol/workspace/src/k8s.io/kubernetes/test/e2e_node/e2e_node_suite_test.go:138 BeforeSuite on Node 1 failed /usr/local/google/home/lantaol/workspace/src/k8s.io/kubernetes/test/e2e_node/e2e_node_suite_test.go:138 ------------------------------ ...... ------------------------------ Failure [132.465 seconds] [BeforeSuite] BeforeSuite /usr/local/google/home/lantaol/workspace/src/k8s.io/kubernetes/test/e2e_node/e2e_node_suite_test.go:138 BeforeSuite on Node 1 failed /usr/local/google/home/lantaol/workspace/src/k8s.io/kubernetes/test/e2e_node/e2e_node_suite_test.go:138 ``` This PR replaces these fatal errors with gomega assertion, with this PR, we'll get: ``` Failure [132.482 seconds] [BeforeSuite] BeforeSuite /usr/local/google/home/lantaol/workspace/src/k8s.io/kubernetes/test/e2e_node/e2e_node_suite_test.go:138 should be able to start node services. Expected success, but got an error: <*errors.errorString | 0xc8203351b0>: { s: "failed to run server start command \"/tmp/ginkgo869068712/e2e_node.test --run-services-mode --server-start-timeout 2m0s --report-dir --node-name lantaol0.mtv.corp.google.com --disable-kubenet=true --cgroups-per-qos=false --manifest-path /tmp/node-e2e-pod221291440 --eviction-hard memory.available<250Mi\": exit status 255", } failed to run server start command "/tmp/ginkgo869068712/e2e_node.test --run-services-mode --server-start-timeout 2m0s --report-dir --node-name lantaol0.mtv.corp.google.com --disable-kubenet=true --cgroups-per-qos=false --manifest-path /tmp/node-e2e-pod221291440 --eviction-hard memory.available<250Mi": exit status 255 /usr/local/google/home/lantaol/workspace/src/k8s.io/kubernetes/test/e2e_node/e2e_node_suite_test.go:117 ------------------------------ Failure [132.485 seconds] [BeforeSuite] BeforeSuite /usr/local/google/home/lantaol/workspace/src/k8s.io/kubernetes/test/e2e_node/e2e_node_suite_test.go:138 BeforeSuite on Node 1 failed /usr/local/google/home/lantaol/workspace/src/k8s.io/kubernetes/test/e2e_node/e2e_node_suite_test.go:138 ------------------------------ ...... ------------------------------ Failure [132.465 seconds] [BeforeSuite] BeforeSuite /usr/local/google/home/lantaol/workspace/src/k8s.io/kubernetes/test/e2e_node/e2e_node_suite_test.go:138 BeforeSuite on Node 1 failed /usr/local/google/home/lantaol/workspace/src/k8s.io/kubernetes/test/e2e_node/e2e_node_suite_test.go:138 ``` This is much more informative. /cc @kubernetes/sig-node
This commit is contained in:
@@ -92,9 +92,7 @@ var _ = SynchronizedBeforeSuite(func() []byte {
|
||||
// Initialize node name here, so that the following code can get right node name.
|
||||
if framework.TestContext.NodeName == "" {
|
||||
hostname, err := os.Hostname()
|
||||
if err != nil {
|
||||
glog.Fatalf("Could not get node name: %v", err)
|
||||
}
|
||||
Expect(err).NotTo(HaveOccurred(), "should be able to get node name")
|
||||
framework.TestContext.NodeName = hostname
|
||||
}
|
||||
// Pre-pull the images tests depend on so we can fail immediately if there is an image pull issue
|
||||
@@ -112,9 +110,7 @@ var _ = SynchronizedBeforeSuite(func() []byte {
|
||||
|
||||
if *startServices {
|
||||
e2es = NewE2EServices()
|
||||
if err := e2es.Start(); err != nil {
|
||||
glog.Fatalf("Unable to start node services: %v", err)
|
||||
}
|
||||
Expect(e2es.Start()).To(Succeed(), "should be able to start node services.")
|
||||
glog.Infof("Node services started. Running tests...")
|
||||
} else {
|
||||
glog.Infof("Running tests without starting services.")
|
||||
@@ -127,16 +123,13 @@ var _ = SynchronizedBeforeSuite(func() []byte {
|
||||
commontest.CurrentSuite = commontest.NodeE2E
|
||||
|
||||
data, err := json.Marshal(&framework.TestContext.NodeTestContextType)
|
||||
if err != nil {
|
||||
glog.Fatalf("Failed to serialize node test context: %v", err)
|
||||
}
|
||||
Expect(err).NotTo(HaveOccurred(), "should be able to serialize node test context.")
|
||||
|
||||
return data
|
||||
}, func(data []byte) {
|
||||
// The node test context is updated in the first function, update it on every test node.
|
||||
err := json.Unmarshal(data, &framework.TestContext.NodeTestContextType)
|
||||
if err != nil {
|
||||
glog.Fatalf("Failed to deserialize node test context: %v", err)
|
||||
}
|
||||
Expect(err).NotTo(HaveOccurred(), "should be able to deserialize node test context.")
|
||||
})
|
||||
|
||||
// Tear down the kubelet on the node
|
||||
@@ -159,9 +152,8 @@ func maskLocksmithdOnCoreos() {
|
||||
return
|
||||
}
|
||||
if bytes.Contains(data, []byte("ID=coreos")) {
|
||||
if output, err := exec.Command("sudo", "systemctl", "mask", "--now", "locksmithd").CombinedOutput(); err != nil {
|
||||
glog.Fatalf("Could not mask locksmithd: %v, output: %q", err, string(output))
|
||||
}
|
||||
output, err := exec.Command("sudo", "systemctl", "mask", "--now", "locksmithd").CombinedOutput()
|
||||
Expect(err).NotTo(HaveOccurred(), fmt.Sprintf("should be able to mask locksmithd - output: %q", string(output)))
|
||||
glog.Infof("Locksmithd is masked successfully")
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user