Make the node e2e test run in parallel.

This commit is contained in:
Random-Liu 2016-07-18 00:52:39 -07:00
parent 17e31bacbc
commit 9d48c76361
7 changed files with 105 additions and 46 deletions

View File

@ -205,6 +205,18 @@ less useful for catching flakes related creating the instance from an image.**
make test-e2e-node REMOTE=true RUN_UNTIL_FAILURE=true
```
## Run tests in parallel
Running test in parallel can usually shorten the test duration. By default node
e2e test runs with`--nodes=8` (see ginkgo flag
[--nodes](https://onsi.github.io/ginkgo/#parallel-specs)). You can use the
`PARALLELISM` option to change the parallelism.
```sh
make test-e2e-node PARALLELISM=4 # run test with 4 parallel nodes
make test-e2e-node PARALLELISM=1 # run test sequentially
```
## Run tests with kubenet network plugin
[kubenet](http://kubernetes.io/docs/admin/network-plugins/#kubenet) is

View File

@ -19,6 +19,13 @@ source "${KUBE_ROOT}/hack/lib/init.sh"
focus=${FOCUS:-""}
skip=${SKIP:-""}
# The number of tests that can run in parallel depends on what tests
# are running and on the size of the node. Too many, and tests will
# fail due to resource contention. 8 is a reasonable default for a
# n1-standard-1 node.
# Currently, parallelism only affects when REMOTE=true. For local test,
# ginkgo default parallelism (cores - 1) is used.
parallelism=${PARALLELISM:-8}
report=${REPORT:-"/tmp/"}
artifacts=${ARTIFACTS:-"/tmp/_artifacts"}
remote=${REMOTE:-"false"}
@ -46,6 +53,25 @@ if [[ -z "${ginkgo}" ]]; then
exit 1
fi
# Parse the flags to pass to ginkgo
ginkgoflags=""
if [[ $parallelism > 1 ]]; then
ginkgoflags="$ginkgoflags -nodes=$parallelism "
fi
if [[ $focus != "" ]]; then
ginkgoflags="$ginkgoflags -focus=$focus "
fi
if [[ $skip != "" ]]; then
ginkgoflags="$ginkgoflags -skip=$skip "
fi
if [[ $run_until_failure != "" ]]; then
ginkgoflags="$ginkgoflags -untilItFails=$run_until_failure "
fi
if [ $remote = true ] ; then
# Setup the directory to copy test artifacts (logs, junit.xml, etc) from remote host to local host
if [ ! -d "${artifacts}" ]; then
@ -89,20 +115,6 @@ if [ $remote = true ] ; then
done
fi
# Parse the flags to pass to ginkgo
ginkgoflags=""
if [[ $focus != "" ]]; then
ginkgoflags="$ginkgoflags -focus=$focus "
fi
if [[ $skip != "" ]]; then
ginkgoflags="$ginkgoflags -skip=$skip "
fi
if [[ $run_until_failure != "" ]]; then
ginkgoflags="$ginkgoflags -untilItFails=$run_until_failure "
fi
# Output the configuration we will try to run
echo "Running tests remotely using"
echo "Project: $project"
@ -133,7 +145,7 @@ else
fi
# Test using the host the script was run on
# Provided for backwards compatibility
"${ginkgo}" --focus=$focus --skip=$skip "${KUBE_ROOT}/test/e2e_node/" --report-dir=${report} \
"${ginkgo}" $ginkgoflags "${KUBE_ROOT}/test/e2e_node/" --report-dir=${report} \
-- --alsologtostderr --v 2 --node-name $(hostname) --build-services=true \
--start-services=true --stop-services=true $test_args
exit $?

View File

@ -20,6 +20,7 @@ package e2e_node
import (
"bytes"
"encoding/json"
"flag"
"fmt"
"io/ioutil"
@ -27,7 +28,6 @@ import (
"os"
"os/exec"
"path"
"strings"
"testing"
"time"
@ -48,6 +48,27 @@ import (
var e2es *e2eService
// context is the test context shared by all parallel nodes.
// Originally we setup the test environment and initialize global variables
// in BeforeSuite, and then used the global variables in the test.
// However, after we make the test parallel, ginkgo will run all tests
// in several parallel test nodes. And for each test node, the BeforeSuite
// and AfterSuite will be run.
// We don't want to start services (kubelet, apiserver and etcd) for all
// parallel nodes, but we do want to set some globally shared variable which
// could be used in test.
// We have to use SynchronizedBeforeSuite to achieve that. The first
// function of SynchronizedBeforeSuite is only called once, and the second
// function is called in each parallel test node. The result returned by
// the first function will be the parameter of the second function.
// So we'll start all services and initialize the shared context in the first
// function, and propagate the context to all parallel test nodes in the
// second function.
// Notice no lock is needed for shared context, because context should only be
// initialized in the first function in SynchronizedBeforeSuite. After that
// it should never be modified.
var context SharedContext
var prePullImages = flag.Bool("prepull-images", true, "If true, prepull images so image pull failures do not cause test failures.")
var junitFileNumber = flag.Int("junit-file-number", 1, "Used to create junit filename - e.g. junit_01.xml.")
@ -77,17 +98,10 @@ func TestE2eNode(t *testing.T) {
}
// Setup the kubelet on the node
var _ = BeforeSuite(func() {
var _ = SynchronizedBeforeSuite(func() []byte {
if *buildServices {
buildGo()
}
if framework.TestContext.NodeName == "" {
output, err := exec.Command("hostname").CombinedOutput()
if err != nil {
glog.Fatalf("Could not get node name from hostname %v. Output:\n%s", err, output)
}
framework.TestContext.NodeName = strings.TrimSpace(fmt.Sprintf("%s", output))
}
// Pre-pull the images tests depend on so we can fail immediately if there is an image pull issue
// This helps with debugging test flakes since it is hard to tell when a test failure is due to image pulling.
@ -102,8 +116,9 @@ var _ = BeforeSuite(func() {
// We should mask locksmithd when provisioning the machine.
maskLocksmithdOnCoreos()
shared := &SharedContext{}
if *startServices {
e2es = newE2eService(framework.TestContext.NodeName, framework.TestContext.CgroupsPerQOS)
e2es = newE2eService(framework.TestContext.NodeName, framework.TestContext.CgroupsPerQOS, shared)
if err := e2es.start(); err != nil {
Fail(fmt.Sprintf("Unable to start node services.\n%v", err))
}
@ -117,10 +132,28 @@ var _ = BeforeSuite(func() {
// Reference common test to make the import valid.
commontest.CurrentSuite = commontest.NodeE2E
data, err := json.Marshal(shared)
Expect(err).NotTo(HaveOccurred())
return data
}, func(data []byte) {
// Set the shared context got from the synchronized initialize function
shared := &SharedContext{}
Expect(json.Unmarshal(data, shared)).To(Succeed())
context = *shared
if framework.TestContext.NodeName == "" {
hostname, err := os.Hostname()
if err != nil {
glog.Fatalf("Could not get node name: %v", err)
}
framework.TestContext.NodeName = hostname
}
})
// Tear down the kubelet on the node
var _ = AfterSuite(func() {
var _ = SynchronizedAfterSuite(func() {}, func() {
if e2es != nil {
e2es.getLogFiles()
if *startServices && *stopServices {

View File

@ -42,11 +42,11 @@ type e2eService struct {
killCmds []*killCmd
rmDirs []string
etcdDataDir string
kubeletStaticPodDir string
nodeName string
logFiles map[string]logFileData
cgroupsPerQOS bool
context *SharedContext
etcdDataDir string
nodeName string
logFiles map[string]logFileData
cgroupsPerQOS bool
}
type logFileData struct {
@ -59,7 +59,7 @@ const (
LOG_VERBOSITY_LEVEL = "4"
)
func newE2eService(nodeName string, cgroupsPerQOS bool) *e2eService {
func newE2eService(nodeName string, cgroupsPerQOS bool, context *SharedContext) *e2eService {
// Special log files that need to be collected for additional debugging.
var logFiles = map[string]logFileData{
"kern.log": {[]string{"/var/log/kern.log"}, []string{"-k"}},
@ -67,6 +67,7 @@ func newE2eService(nodeName string, cgroupsPerQOS bool) *e2eService {
}
return &e2eService{
context: context,
nodeName: nodeName,
logFiles: logFiles,
cgroupsPerQOS: cgroupsPerQOS,
@ -99,7 +100,7 @@ func (es *e2eService) start() error {
return err
}
es.killCmds = append(es.killCmds, cmd)
es.rmDirs = append(es.rmDirs, es.kubeletStaticPodDir)
es.rmDirs = append(es.rmDirs, es.context.PodConfigPath)
return nil
}
@ -211,7 +212,7 @@ func (es *e2eService) startKubeletServer() (*killCmd, error) {
if err != nil {
return nil, err
}
es.kubeletStaticPodDir = dataDir
es.context.PodConfigPath = dataDir
var killOverride *exec.Cmd
cmdArgs := []string{}
if systemdRun, err := exec.LookPath("systemd-run"); err == nil {
@ -236,7 +237,7 @@ func (es *e2eService) startKubeletServer() (*killCmd, error) {
"--volume-stats-agg-period", "10s", // Aggregate volumes frequently so tests don't need to wait as long
"--allow-privileged", "true",
"--serialize-image-pulls", "false",
"--config", es.kubeletStaticPodDir,
"--config", es.context.PodConfigPath,
"--file-check-frequency", "10s", // Check file frequently so tests won't wait too long
"--v", LOG_VERBOSITY_LEVEL, "--logtostderr",
"--pod-cidr=10.180.0.0/24", // Assign a fixed CIDR to the node because there is no node controller.

View File

@ -31,6 +31,7 @@ set -x
make generated_files
go build test/e2e_node/environment/conformance.go
PARALLELISM=${PARALLELISM:-8}
WORKSPACE=${WORKSPACE:-"/tmp/"}
ARTIFACTS=${WORKSPACE}/_artifacts
@ -39,5 +40,5 @@ go run test/e2e_node/runner/run_e2e.go --logtostderr --vmodule=*=2 --ssh-env="g
--zone="$GCE_ZONE" --project="$GCE_PROJECT" --hosts="$GCE_HOSTS" \
--images="$GCE_IMAGES" --image-project="$GCE_IMAGE_PROJECT" \
--image-config-file="$GCE_IMAGE_CONFIG_PATH" --cleanup="$CLEANUP" \
--results-dir="$ARTIFACTS" --ginkgo-flags="$GINKGO_FLAGS" \
--results-dir="$ARTIFACTS" --ginkgo-flags="--nodes=$PARALLELISM $GINKGO_FLAGS" \
--setup-node="$SETUP_NODE" --test_args="$TEST_ARGS" --instance-metadata="$GCE_INSTANCE_METADATA"

View File

@ -37,14 +37,14 @@ import (
var _ = framework.KubeDescribe("MirrorPod", func() {
f := framework.NewDefaultFramework("mirror-pod")
Context("when create a mirror pod ", func() {
var staticPodName, mirrorPodName string
var ns, staticPodName, mirrorPodName string
BeforeEach(func() {
ns := f.Namespace.Name
ns = f.Namespace.Name
staticPodName = "static-pod-" + string(util.NewUUID())
mirrorPodName = staticPodName + "-" + e2es.nodeName
mirrorPodName = staticPodName + "-" + framework.TestContext.NodeName
By("create the static pod")
err := createStaticPod(e2es.kubeletStaticPodDir, staticPodName, ns, ImageRegistry[nginxImage], api.RestartPolicyAlways)
err := createStaticPod(context.PodConfigPath, staticPodName, ns, ImageRegistry[nginxImage], api.RestartPolicyAlways)
Expect(err).ShouldNot(HaveOccurred())
By("wait for the mirror pod to be running")
@ -53,7 +53,6 @@ var _ = framework.KubeDescribe("MirrorPod", func() {
}, 2*time.Minute, time.Second*4).Should(BeNil())
})
It("should be updated when static pod updated", func() {
ns := f.Namespace.Name
By("get mirror pod uid")
pod, err := f.Client.Pods(ns).Get(mirrorPodName)
Expect(err).ShouldNot(HaveOccurred())
@ -61,7 +60,7 @@ var _ = framework.KubeDescribe("MirrorPod", func() {
By("update the static pod container image")
image := ImageRegistry[pauseImage]
err = createStaticPod(e2es.kubeletStaticPodDir, staticPodName, ns, image, api.RestartPolicyAlways)
err = createStaticPod(context.PodConfigPath, staticPodName, ns, image, api.RestartPolicyAlways)
Expect(err).ShouldNot(HaveOccurred())
By("wait for the mirror pod to be updated")
@ -76,7 +75,6 @@ var _ = framework.KubeDescribe("MirrorPod", func() {
Expect(pod.Spec.Containers[0].Image).Should(Equal(image))
})
It("should be recreated when mirror pod gracefully deleted", func() {
ns := f.Namespace.Name
By("get mirror pod uid")
pod, err := f.Client.Pods(ns).Get(mirrorPodName)
Expect(err).ShouldNot(HaveOccurred())
@ -92,7 +90,6 @@ var _ = framework.KubeDescribe("MirrorPod", func() {
}, 2*time.Minute, time.Second*4).Should(BeNil())
})
It("should be recreated when mirror pod forcibly deleted", func() {
ns := f.Namespace.Name
By("get mirror pod uid")
pod, err := f.Client.Pods(ns).Get(mirrorPodName)
Expect(err).ShouldNot(HaveOccurred())
@ -108,9 +105,8 @@ var _ = framework.KubeDescribe("MirrorPod", func() {
}, 2*time.Minute, time.Second*4).Should(BeNil())
})
AfterEach(func() {
ns := f.Namespace.Name
By("delete the static pod")
err := deleteStaticPod(e2es.kubeletStaticPodDir, staticPodName, ns)
err := deleteStaticPod(context.PodConfigPath, staticPodName, ns)
Expect(err).ShouldNot(HaveOccurred())
By("wait for the mirror pod to disappear")

View File

@ -26,3 +26,7 @@ var disableKubenet = flag.Bool("disable-kubenet", false, "If true, start kubelet
var buildServices = flag.Bool("build-services", true, "If true, build local executables")
var startServices = flag.Bool("start-services", true, "If true, start local node services")
var stopServices = flag.Bool("stop-services", true, "If true, stop local node services after running tests")
type SharedContext struct {
PodConfigPath string
}