Merge pull request #41240 from Random-Liu/update-npd-test

Automatic merge from submit-queue (batch tested with PRs 41844, 41803, 39116, 41129, 41240)

NPD: Update NPD test.

For https://github.com/kubernetes/node-problem-detector/issues/58.

Update NPD e2e test based on the new behavior.

Note that before merging this PR, we need to merge all pending PRs in npd, and release the v0.3.0-alpha.1 version of NPD.

/cc @dchen1107 @kubernetes/node-problem-detector-reviewers
This commit is contained in:
Kubernetes Submit Queue 2017-02-22 05:48:45 -08:00 committed by GitHub
commit eef16cf141

View File

@ -19,6 +19,7 @@ package e2e
import ( import (
"fmt" "fmt"
"path/filepath" "path/filepath"
"strconv"
"strings" "strings"
"time" "time"
@ -45,13 +46,13 @@ var _ = framework.KubeDescribe("NodeProblemDetector", func() {
pollInterval = 1 * time.Second pollInterval = 1 * time.Second
pollConsistent = 5 * time.Second pollConsistent = 5 * time.Second
pollTimeout = 1 * time.Minute pollTimeout = 1 * time.Minute
image = "gcr.io/google_containers/node-problem-detector:v0.2" image = "gcr.io/google_containers/node-problem-detector:v0.3.0-alpha.1"
) )
f := framework.NewDefaultFramework("node-problem-detector") f := framework.NewDefaultFramework("node-problem-detector")
var c clientset.Interface var c clientset.Interface
var uid string var uid string
var ns, name, configName, eventNamespace string var ns, name, configName, eventNamespace string
var nodeTime time.Time var bootTime, nodeTime time.Time
BeforeEach(func() { BeforeEach(func() {
c = f.ClientSet c = f.ClientSet
ns = f.Namespace.Name ns = f.Namespace.Name
@ -72,14 +73,13 @@ var _ = framework.KubeDescribe("NodeProblemDetector", func() {
framework.ExpectNoError(err) framework.ExpectNoError(err)
}) })
// Test kernel monitor. We may add other tests if we have more problem daemons in the future. // Test system log monitor. We may add other tests if we have more problem daemons in the future.
framework.KubeDescribe("KernelMonitor", func() { framework.KubeDescribe("SystemLogMonitor", func() {
const ( const (
// Use test condition to avoid conflict with real node problem detector // Use test condition to avoid conflict with real node problem detector
// TODO(random-liu): Now node condition could be arbitrary string, consider wether we need to // TODO(random-liu): Now node condition could be arbitrary string, consider wether we need to
// add TestCondition when switching to predefined condition list. // add TestCondition when switching to predefined condition list.
condition = v1.NodeConditionType("TestCondition") condition = v1.NodeConditionType("TestCondition")
lookback = time.Hour // Assume the test won't take more than 1 hour, in fact it usually only takes 90 seconds.
startPattern = "test reboot" startPattern = "test reboot"
// File paths used in the test. // File paths used in the test.
@ -99,10 +99,13 @@ var _ = framework.KubeDescribe("NodeProblemDetector", func() {
defaultMessage = "default message" defaultMessage = "default message"
tempReason = "Temporary" tempReason = "Temporary"
tempMessage = "temporary error" tempMessage = "temporary error"
permReason = "Permanent" permReason1 = "Permanent1"
permMessage = "permanent error" permMessage1 = "permanent error 1"
permReason2 = "Permanent2"
permMessage2 = "permanent error 2"
) )
var source, config, tmpDir string var source, config, tmpDir string
var lookback time.Duration
var node *v1.Node var node *v1.Node
var eventListOptions metav1.ListOptions var eventListOptions metav1.ListOptions
injectCommand := func(timestamp time.Time, log string, num int) string { injectCommand := func(timestamp time.Time, log string, num int) string {
@ -116,13 +119,36 @@ var _ = framework.KubeDescribe("NodeProblemDetector", func() {
BeforeEach(func() { BeforeEach(func() {
framework.SkipUnlessProviderIs(framework.ProvidersWithSSH...) framework.SkipUnlessProviderIs(framework.ProvidersWithSSH...)
By("Get a non master node to run the pod")
nodes, err := c.Core().Nodes().List(metav1.ListOptions{})
Expect(err).NotTo(HaveOccurred())
node = nil
for _, n := range nodes.Items {
if !system.IsMasterNode(n.Name) {
node = &n
break
}
}
Expect(node).NotTo(BeNil())
By("Calculate Lookback duration")
nodeTime, bootTime, err = getNodeTime(node)
Expect(err).To(BeNil())
// Set lookback duration longer than node up time.
// Assume the test won't take more than 1 hour, in fact it usually only takes 90 seconds.
lookback = nodeTime.Sub(bootTime) + time.Hour
// Randomize the source name to avoid conflict with real node problem detector // Randomize the source name to avoid conflict with real node problem detector
source = "kernel-monitor-" + uid source = "kernel-monitor-" + uid
config = ` config = `
{ {
"plugin": "filelog",
"pluginConfig": {
"timestamp": "^.{15}",
"message": "kernel: \\[.*\\] (.*)",
"timestampFormat": "` + time.Stamp + `"
},
"logPath": "` + filepath.Join(logDir, logFile) + `", "logPath": "` + filepath.Join(logDir, logFile) + `",
"lookback": "` + lookback.String() + `", "lookback": "` + lookback.String() + `",
"startPattern": "` + startPattern + `",
"bufferSize": 10, "bufferSize": 10,
"source": "` + source + `", "source": "` + source + `",
"conditions": [ "conditions": [
@ -141,22 +167,17 @@ var _ = framework.KubeDescribe("NodeProblemDetector", func() {
{ {
"type": "permanent", "type": "permanent",
"condition": "` + string(condition) + `", "condition": "` + string(condition) + `",
"reason": "` + permReason + `", "reason": "` + permReason1 + `",
"pattern": "` + permMessage + `" "pattern": "` + permMessage1 + ".*" + `"
},
{
"type": "permanent",
"condition": "` + string(condition) + `",
"reason": "` + permReason2 + `",
"pattern": "` + permMessage2 + ".*" + `"
} }
] ]
}` }`
By("Get a non master node to run the pod")
nodes, err := c.Core().Nodes().List(metav1.ListOptions{})
Expect(err).NotTo(HaveOccurred())
node = nil
for _, n := range nodes.Items {
if !system.IsMasterNode(n.Name) {
node = &n
break
}
}
Expect(node).NotTo(BeNil())
By("Generate event list options") By("Generate event list options")
selector := fields.Set{ selector := fields.Set{
"involvedObject.kind": "Node", "involvedObject.kind": "Node",
@ -184,7 +205,6 @@ var _ = framework.KubeDescribe("NodeProblemDetector", func() {
}, },
Spec: v1.PodSpec{ Spec: v1.PodSpec{
NodeName: node.Name, NodeName: node.Name,
HostNetwork: true,
SecurityContext: &v1.PodSecurityContext{}, SecurityContext: &v1.PodSecurityContext{},
Volumes: []v1.Volume{ Volumes: []v1.Volume{
{ {
@ -212,7 +232,7 @@ var _ = framework.KubeDescribe("NodeProblemDetector", func() {
{ {
Name: name, Name: name,
Image: image, Image: image,
Command: []string{"/node-problem-detector", "--kernel-monitor=" + filepath.Join(configDir, configFile)}, Command: []string{"/node-problem-detector", "--system-log-monitors=" + filepath.Join(configDir, configFile), "--logtostderr"},
ImagePullPolicy: v1.PullAlways, ImagePullPolicy: v1.PullAlways,
Env: []v1.EnvVar{ Env: []v1.EnvVar{
{ {
@ -246,13 +266,6 @@ var _ = framework.KubeDescribe("NodeProblemDetector", func() {
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
By("Wait for node problem detector running") By("Wait for node problem detector running")
Expect(f.WaitForPodRunning(name)).To(Succeed()) Expect(f.WaitForPodRunning(name)).To(Succeed())
// Get the node time
nodeIP := framework.GetNodeExternalIP(node)
result, err := framework.SSH("date '+%FT%T.%N%:z'", nodeIP, framework.TestContext.Provider)
Expect(err).ShouldNot(HaveOccurred())
Expect(result.Code).Should(BeZero())
nodeTime, err = time.Parse(time.RFC3339, strings.TrimSpace(result.Stdout))
Expect(err).ShouldNot(HaveOccurred())
}) })
It("should generate node condition and events for corresponding errors", func() { It("should generate node condition and events for corresponding errors", func() {
@ -274,7 +287,7 @@ var _ = framework.KubeDescribe("NodeProblemDetector", func() {
}, },
{ {
description: "should not generate events for too old log", description: "should not generate events for too old log",
timestamp: nodeTime.Add(-3 * lookback), // Assume 3*lookback is old enough timestamp: bootTime.Add(-1 * time.Minute),
message: tempMessage, message: tempMessage,
messageNum: 3, messageNum: 3,
conditionReason: defaultReason, conditionReason: defaultReason,
@ -283,8 +296,8 @@ var _ = framework.KubeDescribe("NodeProblemDetector", func() {
}, },
{ {
description: "should not change node condition for too old log", description: "should not change node condition for too old log",
timestamp: nodeTime.Add(-3 * lookback), // Assume 3*lookback is old enough timestamp: bootTime.Add(-1 * time.Minute),
message: permMessage, message: permMessage1,
messageNum: 1, messageNum: 1,
conditionReason: defaultReason, conditionReason: defaultReason,
conditionMessage: defaultMessage, conditionMessage: defaultMessage,
@ -292,7 +305,7 @@ var _ = framework.KubeDescribe("NodeProblemDetector", func() {
}, },
{ {
description: "should generate event for old log within lookback duration", description: "should generate event for old log within lookback duration",
timestamp: nodeTime.Add(-1 * time.Minute), timestamp: nodeTime,
message: tempMessage, message: tempMessage,
messageNum: 3, messageNum: 3,
events: 3, events: 3,
@ -302,23 +315,13 @@ var _ = framework.KubeDescribe("NodeProblemDetector", func() {
}, },
{ {
description: "should change node condition for old log within lookback duration", description: "should change node condition for old log within lookback duration",
timestamp: nodeTime.Add(-1 * time.Minute),
message: permMessage,
messageNum: 1,
events: 3, // event number should not change
conditionReason: permReason,
conditionMessage: permMessage,
conditionType: v1.ConditionTrue,
},
{
description: "should reset node condition if the node is reboot",
timestamp: nodeTime, timestamp: nodeTime,
message: startPattern, message: permMessage1,
messageNum: 1, messageNum: 1,
events: 3, // event number should not change events: 3, // event number should not change
conditionReason: defaultReason, conditionReason: permReason1,
conditionMessage: defaultMessage, conditionMessage: permMessage1,
conditionType: v1.ConditionFalse, conditionType: v1.ConditionTrue,
}, },
{ {
description: "should generate event for new log", description: "should generate event for new log",
@ -326,18 +329,28 @@ var _ = framework.KubeDescribe("NodeProblemDetector", func() {
message: tempMessage, message: tempMessage,
messageNum: 3, messageNum: 3,
events: 6, events: 6,
conditionReason: defaultReason, conditionReason: permReason1,
conditionMessage: defaultMessage, conditionMessage: permMessage1,
conditionType: v1.ConditionFalse, conditionType: v1.ConditionTrue,
},
{
description: "should not update node condition with the same reason",
timestamp: nodeTime.Add(5 * time.Minute),
message: permMessage1 + "different message",
messageNum: 1,
events: 6, // event number should not change
conditionReason: permReason1,
conditionMessage: permMessage1,
conditionType: v1.ConditionTrue,
}, },
{ {
description: "should change node condition for new log", description: "should change node condition for new log",
timestamp: nodeTime.Add(5 * time.Minute), timestamp: nodeTime.Add(5 * time.Minute),
message: permMessage, message: permMessage2,
messageNum: 1, messageNum: 1,
events: 6, // event number should not change events: 6, // event number should not change
conditionReason: permReason, conditionReason: permReason2,
conditionMessage: permMessage, conditionMessage: permMessage2,
conditionType: v1.ConditionTrue, conditionType: v1.ConditionTrue,
}, },
} { } {
@ -392,6 +405,45 @@ var _ = framework.KubeDescribe("NodeProblemDetector", func() {
}) })
}) })
// getNodeTime gets node boot time and current time by running ssh command on the node.
func getNodeTime(node *v1.Node) (time.Time, time.Time, error) {
nodeIP := framework.GetNodeExternalIP(node)
// Get node current time.
result, err := framework.SSH("date '+%FT%T.%N%:z'", nodeIP, framework.TestContext.Provider)
if err != nil {
return time.Time{}, time.Time{}, fmt.Errorf("failed to run ssh command to get node time: %v", err)
}
if result.Code != 0 {
return time.Time{}, time.Time{}, fmt.Errorf("failed to run ssh command with error code: %d", result.Code)
}
timestamp := strings.TrimSpace(result.Stdout)
nodeTime, err := time.Parse(time.RFC3339, timestamp)
if err != nil {
return time.Time{}, time.Time{}, fmt.Errorf("failed to parse node time %q: %v", timestamp, err)
}
// Get system uptime.
result, err = framework.SSH(`cat /proc/uptime | cut -d " " -f 1`, nodeIP, framework.TestContext.Provider)
if err != nil {
return time.Time{}, time.Time{}, fmt.Errorf("failed to run ssh command to get node boot time: %v", err)
}
if result.Code != 0 {
return time.Time{}, time.Time{}, fmt.Errorf("failed to run ssh command with error code: %d, stdout: %q, stderr: %q",
result.Code, result.Stdout, result.Stderr)
}
uptime, err := strconv.ParseFloat(strings.TrimSpace(result.Stdout), 64)
if err != nil {
return time.Time{}, time.Time{}, fmt.Errorf("failed to parse node uptime %q: %v", result.Stdout, err)
}
// Get node boot time. NOTE that because we get node current time before uptime, the boot time
// calculated will be a little earlier than the real boot time. This won't affect the correctness
// of the test result.
bootTime := nodeTime.Add(-time.Duration(uptime * float64(time.Second)))
return nodeTime, bootTime, nil
}
// verifyEvents verifies there are num specific events generated // verifyEvents verifies there are num specific events generated
func verifyEvents(e coreclientset.EventInterface, options metav1.ListOptions, num int, reason, message string) error { func verifyEvents(e coreclientset.EventInterface, options metav1.ListOptions, num int, reason, message string) error {
events, err := e.List(options) events, err := e.List(options)