allows configuring NPD image version in node e2e test and fix the test

This commit is contained in:
Zhen Wang 2019-02-07 15:41:16 -08:00
parent efa96f7eb8
commit 6df207bdaa
15 changed files with 93 additions and 38 deletions

View File

@ -34,6 +34,7 @@ image_service_endpoint=${IMAGE_SERVICE_ENDPOINT:-""}
run_until_failure=${RUN_UNTIL_FAILURE:-"false"}
test_args=${TEST_ARGS:-""}
system_spec_name=${SYSTEM_SPEC_NAME:-}
extra_envs=${EXTRA_ENVS:-}
# Parse the flags to pass to ginkgo
ginkgoflags=""
@ -148,7 +149,7 @@ if [ ${remote} = true ] ; then
--image-project="${image_project}" --instance-name-prefix="${instance_prefix}" \
--delete-instances="${delete_instances}" --test_args="${test_args}" --instance-metadata="${metadata}" \
--image-config-file="${image_config_file}" --system-spec-name="${system_spec_name}" \
--test-suite="${test_suite}" \
--extra-envs="${extra_envs}" --test-suite="${test_suite}" \
2>&1 | tee -i "${artifacts}/build-log.txt"
exit $?
@ -169,8 +170,8 @@ else
# Test using the host the script was run on
# Provided for backwards compatibility
go run test/e2e_node/runner/local/run_local.go \
--system-spec-name="${system_spec_name}" --ginkgo-flags="${ginkgoflags}" \
--test-flags="--container-runtime=${runtime} \
--system-spec-name="${system_spec_name}" --extra-envs="${extra_envs}" \
--ginkgo-flags="${ginkgoflags}" --test-flags="--container-runtime=${runtime} \
--alsologtostderr --v 4 --report-dir=${artifacts} --node-name $(hostname) \
${test_args}" --build-dependencies=true 2>&1 | tee -i "${artifacts}/build-log.txt"
exit $?

View File

@ -193,6 +193,8 @@ type NodeTestContextType struct {
// the node e2e test. If empty, the default one (system.DefaultSpec) is
// used. The system specs are in test/e2e_node/system/specs/.
SystemSpecName string
// ExtraEnvs is a map of environment names to values.
ExtraEnvs map[string]string
}
type CloudConfig struct {
@ -332,6 +334,7 @@ func RegisterNodeFlags() {
flag.BoolVar(&TestContext.PrepullImages, "prepull-images", true, "If true, prepull images so image pull failures do not cause test failures.")
flag.StringVar(&TestContext.ImageDescription, "image-description", "", "The description of the image which the test will be running on.")
flag.StringVar(&TestContext.SystemSpecName, "system-spec-name", "", "The name of the system spec (e.g., gke) that's used in the node e2e test. The system specs are in test/e2e_node/system/specs/. This is used by the test framework to determine which tests to run for validating the system requirements.")
flag.Var(cliflag.NewMapStringString(&TestContext.ExtraEnvs), "extra-envs", "The extra environment variables needed for node e2e tests. Format: a list of key=value pairs, e.g., env1=val1,env2=val2")
}
// HandleFlags sets up all flags and parses the command line.

View File

@ -27,12 +27,14 @@ COPY_SYSTEM_SPEC_FILE
# REPORT_PATH is the path in the container to save test result and logs.
# FLAKE_ATTEMPTS is the time to retry when there is a test failure. By default 2.
# TEST_ARGS is the test arguments passed into the test.
# EXTRA_ENVS is the extra environment variables needed for node e2e tests.
ENV FOCUS="\[Conformance\]" \
SKIP="\[Flaky\]|\[Serial\]" \
PARALLELISM=8 \
REPORT_PATH="/var/result" \
FLAKE_ATTEMPTS=2 \
TEST_ARGS=""
TEST_ARGS="" \
EXTRA_ENVS=""
ENTRYPOINT ginkgo --focus="$FOCUS" \
--skip="$SKIP" \
@ -46,4 +48,5 @@ ENTRYPOINT ginkgo --focus="$FOCUS" \
--system-spec-name=SYSTEM_SPEC_NAME \
# This is a placeholder that will be substituted in the Makefile.
--system-spec-file=SYSTEM_SPEC_FILE_PATH \
--extra-envs=$EXTRA_ENVS \
$TEST_ARGS

View File

@ -77,6 +77,7 @@ func TestMain(m *testing.M) {
rand.Seed(time.Now().UnixNano())
pflag.Parse()
framework.AfterReadingAllFlags(&framework.TestContext)
setExtraEnvs()
os.Exit(m.Run())
}
@ -146,6 +147,7 @@ var _ = SynchronizedBeforeSuite(func() []byte {
// This helps with debugging test flakes since it is hard to tell when a test failure is due to image pulling.
if framework.TestContext.PrepullImages {
klog.Infof("Pre-pulling images so that they are cached for the tests.")
updateImageWhiteList()
err := PrePullAllImages()
Expect(err).ShouldNot(HaveOccurred())
}
@ -244,6 +246,9 @@ func waitForNodeReady() {
// TODO(random-liu): Using dynamic kubelet configuration feature to
// update test context with node configuration.
func updateTestContext() error {
setExtraEnvs()
updateImageWhiteList()
client, err := getAPIServerClient()
if err != nil {
return fmt.Errorf("failed to get apiserver client: %v", err)
@ -261,7 +266,7 @@ func updateTestContext() error {
if err != nil {
return fmt.Errorf("failed to get kubelet configuration: %v", err)
}
framework.TestContext.KubeletConfig = *kubeletCfg // Set kubelet config.
framework.TestContext.KubeletConfig = *kubeletCfg // Set kubelet config
return nil
}
@ -319,3 +324,9 @@ func isNodeReady(node *v1.Node) bool {
}
return false
}
func setExtraEnvs() {
for name, value := range framework.TestContext.ExtraEnvs {
os.Setenv(name, value)
}
}

View File

@ -18,6 +18,7 @@ package e2e_node
import (
"fmt"
"os"
"os/exec"
"os/user"
"time"
@ -46,7 +47,6 @@ var NodeImageWhiteList = sets.NewString(
"k8s.gcr.io/stress:v1",
busyboxImage,
"k8s.gcr.io/busybox@sha256:4bdd623e848417d96127e16037743f0cd8b528c026e9175e22a84f639eca58ff",
"k8s.gcr.io/node-problem-detector:v0.4.1",
imageutils.GetE2EImage(imageutils.Nginx),
imageutils.GetE2EImage(imageutils.ServeHostname),
imageutils.GetE2EImage(imageutils.Netexec),
@ -58,9 +58,24 @@ var NodeImageWhiteList = sets.NewString(
"gcr.io/kubernetes-e2e-test-images/node-perf/tf-wide-deep-amd64:1.0",
)
func init() {
// updateImageWhiteList updates the framework.ImageWhiteList with
// 1. the hard coded lists
// 2. the ones passed in from framework.TestContext.ExtraEnvs
// So this function needs to be called after the extra envs are applied.
func updateImageWhiteList() {
// Union NodeImageWhiteList and CommonImageWhiteList into the framework image white list.
framework.ImageWhiteList = NodeImageWhiteList.Union(commontest.CommonImageWhiteList)
// Images from extra envs
framework.ImageWhiteList.Insert(getNodeProblemDetectorImage())
}
func getNodeProblemDetectorImage() string {
const defaultImage string = "k8s.gcr.io/node-problem-detector:v0.6.2"
image := os.Getenv("NODE_PROBLEM_DETECTOR_IMAGE")
if image == "" {
image = defaultImage
}
return image
}
// puller represents a generic image puller

View File

@ -40,4 +40,5 @@ go run test/e2e_node/runner/remote/run_remote.go --test-suite=conformance \
--results-dir="$ARTIFACTS" --test-timeout="$TIMEOUT" \
--test_args="--kubelet-flags=\"$KUBELET_ARGS\"" \
--instance-metadata="$GCE_INSTANCE_METADATA" \
--system-spec-name="$SYSTEM_SPEC_NAME"
--system-spec-name="$SYSTEM_SPEC_NAME" \
--extra-envs="$EXTRA_ENVS"

View File

@ -47,4 +47,5 @@ go run test/e2e_node/runner/remote/run_remote.go --logtostderr --vmodule=*=4 \
--image-config-file="$GCE_IMAGE_CONFIG_PATH" --cleanup="$CLEANUP" \
--results-dir="$ARTIFACTS" --ginkgo-flags="--nodes=$PARALLELISM $GINKGO_FLAGS" \
--test-timeout="$TIMEOUT" --test_args="$TEST_ARGS --kubelet-flags=\"$KUBELET_ARGS\"" \
--instance-metadata="$GCE_INSTANCE_METADATA" --system-spec-name="$SYSTEM_SPEC_NAME"
--instance-metadata="$GCE_INSTANCE_METADATA" --system-spec-name="$SYSTEM_SPEC_NAME" \
--extra-envs="$EXTRA_ENVS"

View File

@ -45,13 +45,14 @@ var _ = framework.KubeDescribe("NodeProblemDetector [NodeFeature:NodeProblemDete
pollInterval = 1 * time.Second
pollConsistent = 5 * time.Second
pollTimeout = 1 * time.Minute
image = "k8s.gcr.io/node-problem-detector:v0.4.1"
)
f := framework.NewDefaultFramework("node-problem-detector")
var c clientset.Interface
var uid string
var ns, name, configName, eventNamespace string
var bootTime, nodeTime time.Time
var image string
BeforeEach(func() {
c = f.ClientSet
ns = f.Namespace.Name
@ -60,6 +61,8 @@ var _ = framework.KubeDescribe("NodeProblemDetector [NodeFeature:NodeProblemDete
configName = "node-problem-detector-config-" + uid
// There is no namespace for Node, event recorder will set default namespace for node events.
eventNamespace = metav1.NamespaceDefault
image = getNodeProblemDetectorImage()
By(fmt.Sprintf("Using node-problem-detector image: %s", image))
})
// Test system log monitor. We may add other tests if we have more problem daemons in the future.
@ -245,7 +248,8 @@ var _ = framework.KubeDescribe("NodeProblemDetector [NodeFeature:NodeProblemDete
timestamp time.Time
message string
messageNum int
events int
tempEvents int // Events for temp errors
totalEvents int // Events for both temp errors and condition changes
conditionReason string
conditionMessage string
conditionType v1.ConditionStatus
@ -279,7 +283,8 @@ var _ = framework.KubeDescribe("NodeProblemDetector [NodeFeature:NodeProblemDete
timestamp: nodeTime,
message: tempMessage,
messageNum: 3,
events: 3,
tempEvents: 3,
totalEvents: 3,
conditionReason: defaultReason,
conditionMessage: defaultMessage,
conditionType: v1.ConditionFalse,
@ -289,7 +294,8 @@ var _ = framework.KubeDescribe("NodeProblemDetector [NodeFeature:NodeProblemDete
timestamp: nodeTime,
message: permMessage1,
messageNum: 1,
events: 3, // event number should not change
tempEvents: 3, // event number for temp errors should not change
totalEvents: 4, // add 1 event for condition change
conditionReason: permReason1,
conditionMessage: permMessage1,
conditionType: v1.ConditionTrue,
@ -299,7 +305,8 @@ var _ = framework.KubeDescribe("NodeProblemDetector [NodeFeature:NodeProblemDete
timestamp: nodeTime.Add(5 * time.Minute),
message: tempMessage,
messageNum: 3,
events: 6,
tempEvents: 6, // add 3 events for temp errors
totalEvents: 7, // add 3 events for temp errors
conditionReason: permReason1,
conditionMessage: permMessage1,
conditionType: v1.ConditionTrue,
@ -309,7 +316,8 @@ var _ = framework.KubeDescribe("NodeProblemDetector [NodeFeature:NodeProblemDete
timestamp: nodeTime.Add(5 * time.Minute),
message: permMessage1 + "different message",
messageNum: 1,
events: 6, // event number should not change
tempEvents: 6, // event number should not change
totalEvents: 7, // event number should not change
conditionReason: permReason1,
conditionMessage: permMessage1,
conditionType: v1.ConditionTrue,
@ -319,7 +327,8 @@ var _ = framework.KubeDescribe("NodeProblemDetector [NodeFeature:NodeProblemDete
timestamp: nodeTime.Add(5 * time.Minute),
message: permMessage2,
messageNum: 1,
events: 6, // event number should not change
tempEvents: 6, // event number for temp errors should not change
totalEvents: 8, // add 1 event for condition change
conditionReason: permReason2,
conditionMessage: permMessage2,
conditionType: v1.ConditionTrue,
@ -332,13 +341,17 @@ var _ = framework.KubeDescribe("NodeProblemDetector [NodeFeature:NodeProblemDete
Expect(err).NotTo(HaveOccurred())
}
By(fmt.Sprintf("Wait for %d events generated", test.events))
By(fmt.Sprintf("Wait for %d temp events generated", test.tempEvents))
Eventually(func() error {
return verifyEvents(c.CoreV1().Events(eventNamespace), eventListOptions, test.events, tempReason, tempMessage)
return verifyEvents(c.CoreV1().Events(eventNamespace), eventListOptions, test.tempEvents, tempReason, tempMessage)
}, pollTimeout, pollInterval).Should(Succeed())
By(fmt.Sprintf("Make sure only %d events generated", test.events))
By(fmt.Sprintf("Wait for %d total events generated", test.totalEvents))
Eventually(func() error {
return verifyTotalEvents(c.CoreV1().Events(eventNamespace), eventListOptions, test.totalEvents)
}, pollTimeout, pollInterval).Should(Succeed())
By(fmt.Sprintf("Make sure only %d total events generated", test.totalEvents))
Consistently(func() error {
return verifyEvents(c.CoreV1().Events(eventNamespace), eventListOptions, test.events, tempReason, tempMessage)
return verifyTotalEvents(c.CoreV1().Events(eventNamespace), eventListOptions, test.totalEvents)
}, pollConsistent, pollInterval).Should(Succeed())
By(fmt.Sprintf("Make sure node condition %q is set", condition))
@ -390,7 +403,7 @@ func injectLog(file string, timestamp time.Time, log string, num int) error {
return nil
}
// verifyEvents verifies there are num specific events generated
// verifyEvents verifies there are num specific events generated with given reason and message.
func verifyEvents(e coreclientset.EventInterface, options metav1.ListOptions, num int, reason, message string) error {
events, err := e.List(options)
if err != nil {
@ -399,7 +412,7 @@ func verifyEvents(e coreclientset.EventInterface, options metav1.ListOptions, nu
count := 0
for _, event := range events.Items {
if event.Reason != reason || event.Message != message {
return fmt.Errorf("unexpected event: %v", event)
continue
}
count += int(event.Count)
}
@ -409,14 +422,18 @@ func verifyEvents(e coreclientset.EventInterface, options metav1.ListOptions, nu
return nil
}
// verifyNoEvents verifies there is no event generated
func verifyNoEvents(e coreclientset.EventInterface, options metav1.ListOptions) error {
// verifyTotalEvents verifies there are num events in total.
func verifyTotalEvents(e coreclientset.EventInterface, options metav1.ListOptions, num int) error {
events, err := e.List(options)
if err != nil {
return err
}
if len(events.Items) != 0 {
return fmt.Errorf("unexpected events: %v", events.Items)
count := 0
for _, event := range events.Items {
count += int(event.Count)
}
if count != num {
return fmt.Errorf("expect event number %d, got %d: %v", num, count, events.Items)
}
return nil
}

View File

@ -63,7 +63,7 @@ func runCommand(command string, args ...string) error {
}
// RunTest implements TestSuite.RunTest
func (n *CAdvisorE2ERemote) RunTest(host, workspace, results, imageDesc, junitFilePrefix, testArgs, ginkgoArgs, systemSpecName string, timeout time.Duration) (string, error) {
func (n *CAdvisorE2ERemote) RunTest(host, workspace, results, imageDesc, junitFilePrefix, testArgs, ginkgoArgs, systemSpecName, extraEnvs string, timeout time.Duration) (string, error) {
// Kill any running node processes
cleanupNodeProcesses(host)

View File

@ -259,7 +259,7 @@ func stopKubelet(host, workspace string) error {
}
// RunTest runs test on the node.
func (c *ConformanceRemote) RunTest(host, workspace, results, imageDesc, junitFilePrefix, testArgs, _, systemSpecName string, timeout time.Duration) (string, error) {
func (c *ConformanceRemote) RunTest(host, workspace, results, imageDesc, junitFilePrefix, testArgs, _, systemSpecName, extraEnvs string, timeout time.Duration) (string, error) {
// Install the cni plugins and add a basic CNI configuration.
if err := setupCNI(host, workspace); err != nil {
return "", err
@ -293,8 +293,8 @@ func (c *ConformanceRemote) RunTest(host, workspace, results, imageDesc, junitFi
// Run the tests
klog.V(2).Infof("Starting tests on %q", host)
podManifestPath := getPodPath(workspace)
cmd := fmt.Sprintf("'timeout -k 30s %fs docker run --rm --privileged=true --net=host -v /:/rootfs -v %s:%s -v %s:/var/result -e TEST_ARGS=--report-prefix=%s %s'",
timeout.Seconds(), podManifestPath, podManifestPath, results, junitFilePrefix, getConformanceTestImageName(systemSpecName))
cmd := fmt.Sprintf("'timeout -k 30s %fs docker run --rm --privileged=true --net=host -v /:/rootfs -v %s:%s -v %s:/var/result -e TEST_ARGS=--report-prefix=%s -e EXTRA_ENVS=%s %s'",
timeout.Seconds(), podManifestPath, podManifestPath, results, junitFilePrefix, extraEnvs, getConformanceTestImageName(systemSpecName))
testOutput, err := SSH(host, "sh", "-c", cmd)
if err != nil {
return testOutput, err

View File

@ -135,7 +135,7 @@ func updateOSSpecificKubeletFlags(args, host, workspace string) (string, error)
}
// RunTest runs test on the node.
func (n *NodeE2ERemote) RunTest(host, workspace, results, imageDesc, junitFilePrefix, testArgs, ginkgoArgs, systemSpecName string, timeout time.Duration) (string, error) {
func (n *NodeE2ERemote) RunTest(host, workspace, results, imageDesc, junitFilePrefix, testArgs, ginkgoArgs, systemSpecName, extraEnvs string, timeout time.Duration) (string, error) {
// Install the cni plugins and add a basic CNI configuration.
// TODO(random-liu): Do this in cloud init after we remove containervm test.
if err := setupCNI(host, workspace); err != nil {
@ -164,8 +164,8 @@ func (n *NodeE2ERemote) RunTest(host, workspace, results, imageDesc, junitFilePr
klog.V(2).Infof("Starting tests on %q", host)
cmd := getSSHCommand(" && ",
fmt.Sprintf("cd %s", workspace),
fmt.Sprintf("timeout -k 30s %fs ./ginkgo %s ./e2e_node.test -- --system-spec-name=%s --system-spec-file=%s --logtostderr --v 4 --node-name=%s --report-dir=%s --report-prefix=%s --image-description=\"%s\" %s",
timeout.Seconds(), ginkgoArgs, systemSpecName, systemSpecFile, host, results, junitFilePrefix, imageDesc, testArgs),
fmt.Sprintf("timeout -k 30s %fs ./ginkgo %s ./e2e_node.test -- --system-spec-name=%s --system-spec-file=%s --extra-envs=%s --logtostderr --v 4 --node-name=%s --report-dir=%s --report-prefix=%s --image-description=\"%s\" %s",
timeout.Seconds(), ginkgoArgs, systemSpecName, systemSpecFile, extraEnvs, host, results, junitFilePrefix, imageDesc, testArgs),
)
return SSH(host, "sh", "-c", cmd)
}

View File

@ -65,7 +65,7 @@ func CreateTestArchive(suite TestSuite, systemSpecName string) (string, error) {
// Returns the command output, whether the exit was ok, and any errors
// TODO(random-liu): junitFilePrefix is not prefix actually, the file name is junit-junitFilePrefix.xml. Change the variable name.
func RunRemote(suite TestSuite, archive string, host string, cleanup bool, imageDesc, junitFilePrefix string, testArgs string, ginkgoArgs string, systemSpecName string) (string, bool, error) {
func RunRemote(suite TestSuite, archive string, host string, cleanup bool, imageDesc, junitFilePrefix string, testArgs string, ginkgoArgs string, systemSpecName string, extraEnvs string) (string, bool, error) {
// Create the temp staging directory
klog.V(2).Infof("Staging test binaries on %q", host)
workspace := newWorkspaceDir()
@ -110,7 +110,7 @@ func RunRemote(suite TestSuite, archive string, host string, cleanup bool, image
}
klog.V(2).Infof("Running test on %q", host)
output, err := suite.RunTest(host, workspace, resultDir, imageDesc, junitFilePrefix, testArgs, ginkgoArgs, systemSpecName, *testTimeoutSeconds)
output, err := suite.RunTest(host, workspace, resultDir, imageDesc, junitFilePrefix, testArgs, ginkgoArgs, systemSpecName, extraEnvs, *testTimeoutSeconds)
aggErrs := []error{}
// Do not log the output here, let the caller deal with the test output.

View File

@ -46,6 +46,7 @@ type TestSuite interface {
// * ginkgoArgs is the arguments passed to ginkgo.
// * systemSpecName is the name of the system spec used for validating the
// image on which the test runs.
// * extraEnvs is the extra environment variables needed for node e2e tests.
// * timeout is the test timeout.
RunTest(host, workspace, results, imageDesc, junitFilePrefix, testArgs, ginkgoArgs, systemSpecName string, timeout time.Duration) (string, error)
RunTest(host, workspace, results, imageDesc, junitFilePrefix, testArgs, ginkgoArgs, systemSpecName, extraEnvs string, timeout time.Duration) (string, error)
}

View File

@ -35,6 +35,7 @@ var buildDependencies = flag.Bool("build-dependencies", true, "If true, build al
var ginkgoFlags = flag.String("ginkgo-flags", "", "Space-separated list of arguments to pass to Ginkgo test runner.")
var testFlags = flag.String("test-flags", "", "Space-separated list of arguments to pass to node e2e test.")
var systemSpecName = flag.String("system-spec-name", "", fmt.Sprintf("The name of the system spec used for validating the image in the node conformance test. The specs are at %s. If unspecified, the default built-in spec (system.DefaultSpec) will be used.", system.SystemSpecPath))
var extraEnvs = flag.String("extra-envs", "", "The extra environment variables needed for node e2e tests. Format: a list of key=value pairs, e.g., env1=val1,env2=val2")
func main() {
klog.InitFlags(nil)
@ -63,7 +64,7 @@ func main() {
klog.Fatalf("Failed to get k8s root directory: %v", err)
}
systemSpecFile := filepath.Join(rootDir, system.SystemSpecPath, *systemSpecName+".yaml")
args = append(args, fmt.Sprintf("--system-spec-name=%s --system-spec-file=%s", *systemSpecName, systemSpecFile))
args = append(args, fmt.Sprintf("--system-spec-name=%s --system-spec-file=%s --extra-envs=%s", *systemSpecName, systemSpecFile, *extraEnvs))
}
if err := runCommand(ginkgo, args...); err != nil {
klog.Exitf("Test failed: %v", err)

View File

@ -63,6 +63,7 @@ var instanceMetadata = flag.String("instance-metadata", "", "key/value metadata
var gubernator = flag.Bool("gubernator", false, "If true, output Gubernator link to view logs")
var ginkgoFlags = flag.String("ginkgo-flags", "", "Passed to ginkgo to specify additional flags such as --skip=.")
var systemSpecName = flag.String("system-spec-name", "", fmt.Sprintf("The name of the system spec used for validating the image in the node conformance test. The specs are at %s. If unspecified, the default built-in spec (system.DefaultSpec) will be used.", system.SystemSpecPath))
var extraEnvs = flag.String("extra-envs", "", "The extra environment variables needed for node e2e tests. Format: a list of key=value pairs, e.g., env1=val1,env2=val2")
// envs is the type used to collect all node envs. The key is the env name,
// and the value is the env value
@ -442,7 +443,7 @@ func testHost(host string, deleteFiles bool, imageDesc, junitFilePrefix, ginkgoF
}
}
output, exitOk, err := remote.RunRemote(suite, path, host, deleteFiles, imageDesc, junitFilePrefix, *testArgs, ginkgoFlagsStr, *systemSpecName)
output, exitOk, err := remote.RunRemote(suite, path, host, deleteFiles, imageDesc, junitFilePrefix, *testArgs, ginkgoFlagsStr, *systemSpecName, *extraEnvs)
return &TestResult{
output: output,
err: err,