diff --git a/docs/devel/e2e-node-tests.md b/docs/devel/e2e-node-tests.md index 04b82799686..54b0ac9e9b3 100644 --- a/docs/devel/e2e-node-tests.md +++ b/docs/devel/e2e-node-tests.md @@ -205,6 +205,18 @@ less useful for catching flakes related creating the instance from an image.** make test-e2e-node REMOTE=true RUN_UNTIL_FAILURE=true ``` +## Run tests in parallel + +Running test in parallel can usually shorten the test duration. By default node +e2e test runs with`--nodes=8` (see ginkgo flag +[--nodes](https://onsi.github.io/ginkgo/#parallel-specs)). You can use the +`PARALLELISM` option to change the parallelism. + +```sh +make test-e2e-node PARALLELISM=4 # run test with 4 parallel nodes +make test-e2e-node PARALLELISM=1 # run test sequentially +``` + ## Run tests with kubenet network plugin [kubenet](http://kubernetes.io/docs/admin/network-plugins/#kubenet) is diff --git a/hack/make-rules/test-e2e-node.sh b/hack/make-rules/test-e2e-node.sh index 721c13cc2ce..5090ee2f736 100755 --- a/hack/make-rules/test-e2e-node.sh +++ b/hack/make-rules/test-e2e-node.sh @@ -19,6 +19,13 @@ source "${KUBE_ROOT}/hack/lib/init.sh" focus=${FOCUS:-""} skip=${SKIP:-""} +# The number of tests that can run in parallel depends on what tests +# are running and on the size of the node. Too many, and tests will +# fail due to resource contention. 8 is a reasonable default for a +# n1-standard-1 node. +# Currently, parallelism only affects when REMOTE=true. For local test, +# ginkgo default parallelism (cores - 1) is used. +parallelism=${PARALLELISM:-8} report=${REPORT:-"/tmp/"} artifacts=${ARTIFACTS:-"/tmp/_artifacts"} remote=${REMOTE:-"false"} @@ -46,6 +53,25 @@ if [[ -z "${ginkgo}" ]]; then exit 1 fi +# Parse the flags to pass to ginkgo +ginkgoflags="" +if [[ $parallelism > 1 ]]; then + ginkgoflags="$ginkgoflags -nodes=$parallelism " +fi + +if [[ $focus != "" ]]; then + ginkgoflags="$ginkgoflags -focus=$focus " +fi + +if [[ $skip != "" ]]; then + ginkgoflags="$ginkgoflags -skip=$skip " +fi + +if [[ $run_until_failure != "" ]]; then + ginkgoflags="$ginkgoflags -untilItFails=$run_until_failure " +fi + + if [ $remote = true ] ; then # Setup the directory to copy test artifacts (logs, junit.xml, etc) from remote host to local host if [ ! -d "${artifacts}" ]; then @@ -89,20 +115,6 @@ if [ $remote = true ] ; then done fi - # Parse the flags to pass to ginkgo - ginkgoflags="" - if [[ $focus != "" ]]; then - ginkgoflags="$ginkgoflags -focus=$focus " - fi - - if [[ $skip != "" ]]; then - ginkgoflags="$ginkgoflags -skip=$skip " - fi - - if [[ $run_until_failure != "" ]]; then - ginkgoflags="$ginkgoflags -untilItFails=$run_until_failure " - fi - # Output the configuration we will try to run echo "Running tests remotely using" echo "Project: $project" @@ -133,7 +145,7 @@ else fi # Test using the host the script was run on # Provided for backwards compatibility - "${ginkgo}" --focus=$focus --skip=$skip "${KUBE_ROOT}/test/e2e_node/" --report-dir=${report} \ + "${ginkgo}" $ginkgoflags "${KUBE_ROOT}/test/e2e_node/" --report-dir=${report} \ -- --alsologtostderr --v 2 --node-name $(hostname) --build-services=true \ --start-services=true --stop-services=true $test_args exit $? diff --git a/test/e2e_node/e2e_node_suite_test.go b/test/e2e_node/e2e_node_suite_test.go index f99b9f25b8d..a8ea855123b 100644 --- a/test/e2e_node/e2e_node_suite_test.go +++ b/test/e2e_node/e2e_node_suite_test.go @@ -20,6 +20,7 @@ package e2e_node import ( "bytes" + "encoding/json" "flag" "fmt" "io/ioutil" @@ -27,7 +28,6 @@ import ( "os" "os/exec" "path" - "strings" "testing" "time" @@ -48,6 +48,27 @@ import ( var e2es *e2eService +// context is the test context shared by all parallel nodes. +// Originally we setup the test environment and initialize global variables +// in BeforeSuite, and then used the global variables in the test. +// However, after we make the test parallel, ginkgo will run all tests +// in several parallel test nodes. And for each test node, the BeforeSuite +// and AfterSuite will be run. +// We don't want to start services (kubelet, apiserver and etcd) for all +// parallel nodes, but we do want to set some globally shared variable which +// could be used in test. +// We have to use SynchronizedBeforeSuite to achieve that. The first +// function of SynchronizedBeforeSuite is only called once, and the second +// function is called in each parallel test node. The result returned by +// the first function will be the parameter of the second function. +// So we'll start all services and initialize the shared context in the first +// function, and propagate the context to all parallel test nodes in the +// second function. +// Notice no lock is needed for shared context, because context should only be +// initialized in the first function in SynchronizedBeforeSuite. After that +// it should never be modified. +var context SharedContext + var prePullImages = flag.Bool("prepull-images", true, "If true, prepull images so image pull failures do not cause test failures.") var junitFileNumber = flag.Int("junit-file-number", 1, "Used to create junit filename - e.g. junit_01.xml.") @@ -77,17 +98,10 @@ func TestE2eNode(t *testing.T) { } // Setup the kubelet on the node -var _ = BeforeSuite(func() { +var _ = SynchronizedBeforeSuite(func() []byte { if *buildServices { buildGo() } - if framework.TestContext.NodeName == "" { - output, err := exec.Command("hostname").CombinedOutput() - if err != nil { - glog.Fatalf("Could not get node name from hostname %v. Output:\n%s", err, output) - } - framework.TestContext.NodeName = strings.TrimSpace(fmt.Sprintf("%s", output)) - } // Pre-pull the images tests depend on so we can fail immediately if there is an image pull issue // This helps with debugging test flakes since it is hard to tell when a test failure is due to image pulling. @@ -102,8 +116,9 @@ var _ = BeforeSuite(func() { // We should mask locksmithd when provisioning the machine. maskLocksmithdOnCoreos() + shared := &SharedContext{} if *startServices { - e2es = newE2eService(framework.TestContext.NodeName, framework.TestContext.CgroupsPerQOS) + e2es = newE2eService(framework.TestContext.NodeName, framework.TestContext.CgroupsPerQOS, shared) if err := e2es.start(); err != nil { Fail(fmt.Sprintf("Unable to start node services.\n%v", err)) } @@ -117,10 +132,28 @@ var _ = BeforeSuite(func() { // Reference common test to make the import valid. commontest.CurrentSuite = commontest.NodeE2E + + data, err := json.Marshal(shared) + Expect(err).NotTo(HaveOccurred()) + + return data +}, func(data []byte) { + // Set the shared context got from the synchronized initialize function + shared := &SharedContext{} + Expect(json.Unmarshal(data, shared)).To(Succeed()) + context = *shared + + if framework.TestContext.NodeName == "" { + hostname, err := os.Hostname() + if err != nil { + glog.Fatalf("Could not get node name: %v", err) + } + framework.TestContext.NodeName = hostname + } }) // Tear down the kubelet on the node -var _ = AfterSuite(func() { +var _ = SynchronizedAfterSuite(func() {}, func() { if e2es != nil { e2es.getLogFiles() if *startServices && *stopServices { diff --git a/test/e2e_node/e2e_service.go b/test/e2e_node/e2e_service.go index 76f185756ca..fa150cf1cda 100644 --- a/test/e2e_node/e2e_service.go +++ b/test/e2e_node/e2e_service.go @@ -42,11 +42,11 @@ type e2eService struct { killCmds []*killCmd rmDirs []string - etcdDataDir string - kubeletStaticPodDir string - nodeName string - logFiles map[string]logFileData - cgroupsPerQOS bool + context *SharedContext + etcdDataDir string + nodeName string + logFiles map[string]logFileData + cgroupsPerQOS bool } type logFileData struct { @@ -59,7 +59,7 @@ const ( LOG_VERBOSITY_LEVEL = "4" ) -func newE2eService(nodeName string, cgroupsPerQOS bool) *e2eService { +func newE2eService(nodeName string, cgroupsPerQOS bool, context *SharedContext) *e2eService { // Special log files that need to be collected for additional debugging. var logFiles = map[string]logFileData{ "kern.log": {[]string{"/var/log/kern.log"}, []string{"-k"}}, @@ -67,6 +67,7 @@ func newE2eService(nodeName string, cgroupsPerQOS bool) *e2eService { } return &e2eService{ + context: context, nodeName: nodeName, logFiles: logFiles, cgroupsPerQOS: cgroupsPerQOS, @@ -99,7 +100,7 @@ func (es *e2eService) start() error { return err } es.killCmds = append(es.killCmds, cmd) - es.rmDirs = append(es.rmDirs, es.kubeletStaticPodDir) + es.rmDirs = append(es.rmDirs, es.context.PodConfigPath) return nil } @@ -211,7 +212,7 @@ func (es *e2eService) startKubeletServer() (*killCmd, error) { if err != nil { return nil, err } - es.kubeletStaticPodDir = dataDir + es.context.PodConfigPath = dataDir var killOverride *exec.Cmd cmdArgs := []string{} if systemdRun, err := exec.LookPath("systemd-run"); err == nil { @@ -236,7 +237,7 @@ func (es *e2eService) startKubeletServer() (*killCmd, error) { "--volume-stats-agg-period", "10s", // Aggregate volumes frequently so tests don't need to wait as long "--allow-privileged", "true", "--serialize-image-pulls", "false", - "--config", es.kubeletStaticPodDir, + "--config", es.context.PodConfigPath, "--file-check-frequency", "10s", // Check file frequently so tests won't wait too long "--v", LOG_VERBOSITY_LEVEL, "--logtostderr", "--pod-cidr=10.180.0.0/24", // Assign a fixed CIDR to the node because there is no node controller. diff --git a/test/e2e_node/jenkins/e2e-node-jenkins.sh b/test/e2e_node/jenkins/e2e-node-jenkins.sh index d59f739b14a..ebd3f0b87f6 100755 --- a/test/e2e_node/jenkins/e2e-node-jenkins.sh +++ b/test/e2e_node/jenkins/e2e-node-jenkins.sh @@ -31,6 +31,7 @@ set -x make generated_files go build test/e2e_node/environment/conformance.go +PARALLELISM=${PARALLELISM:-8} WORKSPACE=${WORKSPACE:-"/tmp/"} ARTIFACTS=${WORKSPACE}/_artifacts @@ -39,5 +40,5 @@ go run test/e2e_node/runner/run_e2e.go --logtostderr --vmodule=*=2 --ssh-env="g --zone="$GCE_ZONE" --project="$GCE_PROJECT" --hosts="$GCE_HOSTS" \ --images="$GCE_IMAGES" --image-project="$GCE_IMAGE_PROJECT" \ --image-config-file="$GCE_IMAGE_CONFIG_PATH" --cleanup="$CLEANUP" \ - --results-dir="$ARTIFACTS" --ginkgo-flags="$GINKGO_FLAGS" \ + --results-dir="$ARTIFACTS" --ginkgo-flags="--nodes=$PARALLELISM $GINKGO_FLAGS" \ --setup-node="$SETUP_NODE" --test_args="$TEST_ARGS" --instance-metadata="$GCE_INSTANCE_METADATA" diff --git a/test/e2e_node/mirror_pod_test.go b/test/e2e_node/mirror_pod_test.go index 23a32400695..2e65a796979 100644 --- a/test/e2e_node/mirror_pod_test.go +++ b/test/e2e_node/mirror_pod_test.go @@ -37,14 +37,14 @@ import ( var _ = framework.KubeDescribe("MirrorPod", func() { f := framework.NewDefaultFramework("mirror-pod") Context("when create a mirror pod ", func() { - var staticPodName, mirrorPodName string + var ns, staticPodName, mirrorPodName string BeforeEach(func() { - ns := f.Namespace.Name + ns = f.Namespace.Name staticPodName = "static-pod-" + string(util.NewUUID()) - mirrorPodName = staticPodName + "-" + e2es.nodeName + mirrorPodName = staticPodName + "-" + framework.TestContext.NodeName By("create the static pod") - err := createStaticPod(e2es.kubeletStaticPodDir, staticPodName, ns, ImageRegistry[nginxImage], api.RestartPolicyAlways) + err := createStaticPod(context.PodConfigPath, staticPodName, ns, ImageRegistry[nginxImage], api.RestartPolicyAlways) Expect(err).ShouldNot(HaveOccurred()) By("wait for the mirror pod to be running") @@ -53,7 +53,6 @@ var _ = framework.KubeDescribe("MirrorPod", func() { }, 2*time.Minute, time.Second*4).Should(BeNil()) }) It("should be updated when static pod updated", func() { - ns := f.Namespace.Name By("get mirror pod uid") pod, err := f.Client.Pods(ns).Get(mirrorPodName) Expect(err).ShouldNot(HaveOccurred()) @@ -61,7 +60,7 @@ var _ = framework.KubeDescribe("MirrorPod", func() { By("update the static pod container image") image := ImageRegistry[pauseImage] - err = createStaticPod(e2es.kubeletStaticPodDir, staticPodName, ns, image, api.RestartPolicyAlways) + err = createStaticPod(context.PodConfigPath, staticPodName, ns, image, api.RestartPolicyAlways) Expect(err).ShouldNot(HaveOccurred()) By("wait for the mirror pod to be updated") @@ -76,7 +75,6 @@ var _ = framework.KubeDescribe("MirrorPod", func() { Expect(pod.Spec.Containers[0].Image).Should(Equal(image)) }) It("should be recreated when mirror pod gracefully deleted", func() { - ns := f.Namespace.Name By("get mirror pod uid") pod, err := f.Client.Pods(ns).Get(mirrorPodName) Expect(err).ShouldNot(HaveOccurred()) @@ -92,7 +90,6 @@ var _ = framework.KubeDescribe("MirrorPod", func() { }, 2*time.Minute, time.Second*4).Should(BeNil()) }) It("should be recreated when mirror pod forcibly deleted", func() { - ns := f.Namespace.Name By("get mirror pod uid") pod, err := f.Client.Pods(ns).Get(mirrorPodName) Expect(err).ShouldNot(HaveOccurred()) @@ -108,9 +105,8 @@ var _ = framework.KubeDescribe("MirrorPod", func() { }, 2*time.Minute, time.Second*4).Should(BeNil()) }) AfterEach(func() { - ns := f.Namespace.Name By("delete the static pod") - err := deleteStaticPod(e2es.kubeletStaticPodDir, staticPodName, ns) + err := deleteStaticPod(context.PodConfigPath, staticPodName, ns) Expect(err).ShouldNot(HaveOccurred()) By("wait for the mirror pod to disappear") diff --git a/test/e2e_node/util.go b/test/e2e_node/util.go index d09eea6770f..21d2c321cb4 100644 --- a/test/e2e_node/util.go +++ b/test/e2e_node/util.go @@ -26,3 +26,7 @@ var disableKubenet = flag.Bool("disable-kubenet", false, "If true, start kubelet var buildServices = flag.Bool("build-services", true, "If true, build local executables") var startServices = flag.Bool("start-services", true, "If true, start local node services") var stopServices = flag.Bool("stop-services", true, "If true, stop local node services after running tests") + +type SharedContext struct { + PodConfigPath string +}