Merge pull request #35740 from Random-Liu/update-npd-e2e-test

Automatic merge from submit-queue

NPD: Add e2e test for NPD v0.2.

Node problem detector has been updated after v0.1, including:
1. Add lookback support. It will lookback for configured time to search for possible kernel panic before node reboot.
2. Get node name via downward api.

This PR updates the test to test the new NPD behavior.

@dchen1107 
/cc @kubernetes/sig-node
This commit is contained in:
Kubernetes Submit Queue 2016-11-05 10:42:07 -07:00 committed by GitHub
commit b9e3b0ffa3

View File

@ -18,6 +18,7 @@ package e2e
import ( import (
"fmt" "fmt"
"path/filepath"
"strings" "strings"
"time" "time"
@ -39,12 +40,13 @@ var _ = framework.KubeDescribe("NodeProblemDetector", func() {
pollInterval = 1 * time.Second pollInterval = 1 * time.Second
pollConsistent = 5 * time.Second pollConsistent = 5 * time.Second
pollTimeout = 1 * time.Minute pollTimeout = 1 * time.Minute
image = "gcr.io/google_containers/node-problem-detector:v0.1" image = "gcr.io/google_containers/node-problem-detector:v0.2"
) )
f := framework.NewDefaultFramework("node-problem-detector") f := framework.NewDefaultFramework("node-problem-detector")
var c clientset.Interface var c clientset.Interface
var uid string var uid string
var ns, name, configName, eventNamespace string var ns, name, configName, eventNamespace string
var nodeTime time.Time
BeforeEach(func() { BeforeEach(func() {
c = f.ClientSet c = f.ClientSet
ns = f.Namespace.Name ns = f.Namespace.Name
@ -61,27 +63,38 @@ var _ = framework.KubeDescribe("NodeProblemDetector", func() {
// Use test condition to avoid conflict with real node problem detector // Use test condition to avoid conflict with real node problem detector
// TODO(random-liu): Now node condition could be arbitrary string, consider wether we need to // TODO(random-liu): Now node condition could be arbitrary string, consider wether we need to
// add TestCondition when switching to predefined condition list. // add TestCondition when switching to predefined condition list.
condition = api.NodeConditionType("TestCondition") condition = api.NodeConditionType("TestCondition")
lookback = time.Hour // Assume the test won't take more than 1 hour, in fact it usually only takes 90 seconds.
startPattern = "test reboot"
// File paths used in the test.
logDir = "/log"
logFile = "test.log"
configDir = "/config"
configFile = "testconfig.json"
etcLocaltime = "/etc/localtime"
// Volumes used in the test.
configVolume = "config"
logVolume = "log"
localtimeVolume = "localtime"
// Reasons and messages used in the test.
defaultReason = "Default" defaultReason = "Default"
defaultMessage = "default message" defaultMessage = "default message"
logDir = "/log"
logFile = "test.log"
configDir = "/config"
configFile = "testconfig.json"
tempReason = "Temporary" tempReason = "Temporary"
tempMessage = "temporary error" tempMessage = "temporary error"
permReason = "Permanent" permReason = "Permanent"
permMessage = "permanent error" permMessage = "permanent error"
configVolume = "config"
logVolume = "log"
) )
var source, config, tmpDir string var source, config, tmpDir string
var node *api.Node var node *api.Node
var eventListOptions api.ListOptions var eventListOptions api.ListOptions
injectCommand := func(err string, num int) string { injectCommand := func(timestamp time.Time, log string, num int) string {
var commands []string var commands []string
for i := 0; i < num; i++ { for i := 0; i < num; i++ {
commands = append(commands, fmt.Sprintf("echo kernel: [%d.000000] %s >> %s/%s", i, err, tmpDir, logFile)) commands = append(commands, fmt.Sprintf("echo \"%s kernel: [0.000000] %s\" >> %s/%s",
timestamp.Format(time.Stamp), log, tmpDir, logFile))
} }
return strings.Join(commands, ";") return strings.Join(commands, ";")
} }
@ -92,7 +105,9 @@ var _ = framework.KubeDescribe("NodeProblemDetector", func() {
source = "kernel-monitor-" + uid source = "kernel-monitor-" + uid
config = ` config = `
{ {
"logPath": "` + logDir + "/" + logFile + `", "logPath": "` + filepath.Join(logDir, logFile) + `",
"lookback": "` + lookback.String() + `",
"startPattern": "` + startPattern + `",
"bufferSize": 10, "bufferSize": 10,
"source": "` + source + `", "source": "` + source + `",
"conditions": [ "conditions": [
@ -170,17 +185,39 @@ var _ = framework.KubeDescribe("NodeProblemDetector", func() {
HostPath: &api.HostPathVolumeSource{Path: tmpDir}, HostPath: &api.HostPathVolumeSource{Path: tmpDir},
}, },
}, },
{
Name: localtimeVolume,
VolumeSource: api.VolumeSource{
HostPath: &api.HostPathVolumeSource{Path: etcLocaltime},
},
},
}, },
Containers: []api.Container{ Containers: []api.Container{
{ {
Name: name, Name: name,
Image: image, Image: image,
Command: []string{"/node-problem-detector", "--kernel-monitor=" + configDir + "/" + configFile}, Command: []string{"/node-problem-detector", "--kernel-monitor=" + filepath.Join(configDir, configFile)},
ImagePullPolicy: api.PullAlways,
Env: []api.EnvVar{
{
Name: "NODE_NAME",
ValueFrom: &api.EnvVarSource{
FieldRef: &api.ObjectFieldSelector{
APIVersion: "v1",
FieldPath: "spec.nodeName",
},
},
},
},
VolumeMounts: []api.VolumeMount{ VolumeMounts: []api.VolumeMount{
{ {
Name: logVolume, Name: logVolume,
MountPath: logDir, MountPath: logDir,
}, },
{
Name: localtimeVolume,
MountPath: etcLocaltime,
},
{ {
Name: configVolume, Name: configVolume,
MountPath: configDir, MountPath: configDir,
@ -193,45 +230,135 @@ var _ = framework.KubeDescribe("NodeProblemDetector", func() {
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
By("Wait for node problem detector running") By("Wait for node problem detector running")
Expect(f.WaitForPodRunning(name)).To(Succeed()) Expect(f.WaitForPodRunning(name)).To(Succeed())
// Get the node time
nodeIP := framework.GetNodeExternalIP(node)
result, err := framework.SSH("date '+%FT%T.%N%:z'", nodeIP, framework.TestContext.Provider)
Expect(err).ShouldNot(HaveOccurred())
Expect(result.Code).Should(BeZero())
nodeTime, err = time.Parse(time.RFC3339, strings.TrimSpace(result.Stdout))
Expect(err).ShouldNot(HaveOccurred())
}) })
It("should generate node condition and events for corresponding errors", func() { It("should generate node condition and events for corresponding errors", func() {
By("Make sure no events are generated") for _, test := range []struct {
Consistently(func() error { description string
return verifyNoEvents(c.Core().Events(eventNamespace), eventListOptions) timestamp time.Time
}, pollConsistent, pollInterval).Should(Succeed()) message string
By("Make sure the default node condition is generated") messageNum int
Eventually(func() error { events int
return verifyCondition(c.Core().Nodes(), node.Name, condition, api.ConditionFalse, defaultReason, defaultMessage) conditionReason string
}, pollTimeout, pollInterval).Should(Succeed()) conditionMessage string
conditionType api.ConditionStatus
}{
{
description: "should generate default node condition",
conditionReason: defaultReason,
conditionMessage: defaultMessage,
conditionType: api.ConditionFalse,
},
{
description: "should not generate events for too old log",
timestamp: nodeTime.Add(-3 * lookback), // Assume 3*lookback is old enough
message: tempMessage,
messageNum: 3,
conditionReason: defaultReason,
conditionMessage: defaultMessage,
conditionType: api.ConditionFalse,
},
{
description: "should not change node condition for too old log",
timestamp: nodeTime.Add(-3 * lookback), // Assume 3*lookback is old enough
message: permMessage,
messageNum: 1,
conditionReason: defaultReason,
conditionMessage: defaultMessage,
conditionType: api.ConditionFalse,
},
{
description: "should generate event for old log within lookback duration",
timestamp: nodeTime.Add(-1 * time.Minute),
message: tempMessage,
messageNum: 3,
events: 3,
conditionReason: defaultReason,
conditionMessage: defaultMessage,
conditionType: api.ConditionFalse,
},
{
description: "should change node condition for old log within lookback duration",
timestamp: nodeTime.Add(-1 * time.Minute),
message: permMessage,
messageNum: 1,
events: 3, // event number should not change
conditionReason: permReason,
conditionMessage: permMessage,
conditionType: api.ConditionTrue,
},
{
description: "should reset node condition if the node is reboot",
timestamp: nodeTime,
message: startPattern,
messageNum: 1,
events: 3, // event number should not change
conditionReason: defaultReason,
conditionMessage: defaultMessage,
conditionType: api.ConditionFalse,
},
{
description: "should generate event for new log",
timestamp: nodeTime.Add(5 * time.Minute),
message: tempMessage,
messageNum: 3,
events: 6,
conditionReason: defaultReason,
conditionMessage: defaultMessage,
conditionType: api.ConditionFalse,
},
{
description: "should change node condition for new log",
timestamp: nodeTime.Add(5 * time.Minute),
message: permMessage,
messageNum: 1,
events: 6, // event number should not change
conditionReason: permReason,
conditionMessage: permMessage,
conditionType: api.ConditionTrue,
},
} {
By(test.description)
if test.messageNum > 0 {
By(fmt.Sprintf("Inject %d logs: %q", test.messageNum, test.message))
cmd := injectCommand(test.timestamp, test.message, test.messageNum)
Expect(framework.IssueSSHCommand(cmd, framework.TestContext.Provider, node)).To(Succeed())
}
num := 3 By(fmt.Sprintf("Wait for %d events generated", test.events))
By(fmt.Sprintf("Inject %d temporary errors", num)) Eventually(func() error {
Expect(framework.IssueSSHCommand(injectCommand(tempMessage, num), framework.TestContext.Provider, node)).To(Succeed()) return verifyEvents(c.Core().Events(eventNamespace), eventListOptions, test.events, tempReason, tempMessage)
By(fmt.Sprintf("Wait for %d events generated", num)) }, pollTimeout, pollInterval).Should(Succeed())
Eventually(func() error { By(fmt.Sprintf("Make sure only %d events generated", test.events))
return verifyEvents(c.Core().Events(eventNamespace), eventListOptions, num, tempReason, tempMessage) Consistently(func() error {
}, pollTimeout, pollInterval).Should(Succeed()) return verifyEvents(c.Core().Events(eventNamespace), eventListOptions, test.events, tempReason, tempMessage)
By(fmt.Sprintf("Make sure only %d events generated", num)) }, pollConsistent, pollInterval).Should(Succeed())
Consistently(func() error {
return verifyEvents(c.Core().Events(eventNamespace), eventListOptions, num, tempReason, tempMessage)
}, pollConsistent, pollInterval).Should(Succeed())
By("Make sure the node condition is still false")
Expect(verifyCondition(c.Core().Nodes(), node.Name, condition, api.ConditionFalse, defaultReason, defaultMessage)).To(Succeed())
By("Inject 1 permanent error") By(fmt.Sprintf("Make sure node condition %q is set", condition))
Expect(framework.IssueSSHCommand(injectCommand(permMessage, 1), framework.TestContext.Provider, node)).To(Succeed()) Eventually(func() error {
By("Make sure the corresponding node condition is generated") return verifyCondition(c.Core().Nodes(), node.Name, condition, test.conditionType, test.conditionReason, test.conditionMessage)
Eventually(func() error { }, pollTimeout, pollInterval).Should(Succeed())
return verifyCondition(c.Core().Nodes(), node.Name, condition, api.ConditionTrue, permReason, permMessage) By(fmt.Sprintf("Make sure node condition %q is stable", condition))
}, pollTimeout, pollInterval).Should(Succeed()) Consistently(func() error {
By("Make sure no new events are generated") return verifyCondition(c.Core().Nodes(), node.Name, condition, test.conditionType, test.conditionReason, test.conditionMessage)
Consistently(func() error { }, pollConsistent, pollInterval).Should(Succeed())
return verifyEvents(c.Core().Events(eventNamespace), eventListOptions, num, tempReason, tempMessage) }
}, pollConsistent, pollInterval).Should(Succeed())
}) })
AfterEach(func() { AfterEach(func() {
if CurrentGinkgoTestDescription().Failed && framework.TestContext.DumpLogsOnFailure {
By("Get node problem detector log")
log, err := framework.GetPodLogs(c, ns, name, name)
Expect(err).ShouldNot(HaveOccurred())
framework.Logf("Node Problem Detector logs:\n %s", log)
}
By("Delete the node problem detector") By("Delete the node problem detector")
c.Core().Pods(ns).Delete(name, api.NewDeleteOptions(0)) c.Core().Pods(ns).Delete(name, api.NewDeleteOptions(0))
By("Wait for the node problem detector to disappear") By("Wait for the node problem detector to disappear")