mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-26 05:03:09 +00:00
Merge pull request #73288 from wangzhen127/npd-config
Decouple node-problem-detector release from kubernetes
This commit is contained in:
commit
02b8056efb
@ -296,6 +296,8 @@ else
|
|||||||
fi
|
fi
|
||||||
NODE_PROBLEM_DETECTOR_VERSION="${NODE_PROBLEM_DETECTOR_VERSION:-}"
|
NODE_PROBLEM_DETECTOR_VERSION="${NODE_PROBLEM_DETECTOR_VERSION:-}"
|
||||||
NODE_PROBLEM_DETECTOR_TAR_HASH="${NODE_PROBLEM_DETECTOR_TAR_HASH:-}"
|
NODE_PROBLEM_DETECTOR_TAR_HASH="${NODE_PROBLEM_DETECTOR_TAR_HASH:-}"
|
||||||
|
NODE_PROBLEM_DETECTOR_RELEASE_PATH="${NODE_PROBLEM_DETECTOR_RELEASE_PATH:-}"
|
||||||
|
NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS="${NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS:-}"
|
||||||
|
|
||||||
# Optional: Create autoscaler for cluster's nodes.
|
# Optional: Create autoscaler for cluster's nodes.
|
||||||
ENABLE_CLUSTER_AUTOSCALER="${KUBE_ENABLE_CLUSTER_AUTOSCALER:-false}"
|
ENABLE_CLUSTER_AUTOSCALER="${KUBE_ENABLE_CLUSTER_AUTOSCALER:-false}"
|
||||||
|
@ -308,6 +308,8 @@ else
|
|||||||
fi
|
fi
|
||||||
NODE_PROBLEM_DETECTOR_VERSION="${NODE_PROBLEM_DETECTOR_VERSION:-}"
|
NODE_PROBLEM_DETECTOR_VERSION="${NODE_PROBLEM_DETECTOR_VERSION:-}"
|
||||||
NODE_PROBLEM_DETECTOR_TAR_HASH="${NODE_PROBLEM_DETECTOR_TAR_HASH:-}"
|
NODE_PROBLEM_DETECTOR_TAR_HASH="${NODE_PROBLEM_DETECTOR_TAR_HASH:-}"
|
||||||
|
NODE_PROBLEM_DETECTOR_RELEASE_PATH="${NODE_PROBLEM_DETECTOR_RELEASE_PATH:-}"
|
||||||
|
NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS="${NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS:-}"
|
||||||
|
|
||||||
# Optional: Create autoscaler for cluster's nodes.
|
# Optional: Create autoscaler for cluster's nodes.
|
||||||
ENABLE_CLUSTER_AUTOSCALER="${KUBE_ENABLE_CLUSTER_AUTOSCALER:-false}"
|
ENABLE_CLUSTER_AUTOSCALER="${KUBE_ENABLE_CLUSTER_AUTOSCALER:-false}"
|
||||||
|
@ -1257,21 +1257,25 @@ EOF
|
|||||||
function start-node-problem-detector {
|
function start-node-problem-detector {
|
||||||
echo "Start node problem detector"
|
echo "Start node problem detector"
|
||||||
local -r npd_bin="${KUBE_HOME}/bin/node-problem-detector"
|
local -r npd_bin="${KUBE_HOME}/bin/node-problem-detector"
|
||||||
local -r km_config="${KUBE_HOME}/node-problem-detector/config/kernel-monitor.json"
|
|
||||||
# TODO(random-liu): Handle this for alternative container runtime.
|
|
||||||
local -r dm_config="${KUBE_HOME}/node-problem-detector/config/docker-monitor.json"
|
|
||||||
local -r custom_km_config="${KUBE_HOME}/node-problem-detector/config/kernel-monitor-counter.json,${KUBE_HOME}/node-problem-detector/config/systemd-monitor-counter.json,${KUBE_HOME}/node-problem-detector/config/docker-monitor-counter.json"
|
|
||||||
echo "Using node problem detector binary at ${npd_bin}"
|
echo "Using node problem detector binary at ${npd_bin}"
|
||||||
local flags="${NPD_TEST_LOG_LEVEL:-"--v=2"} ${NPD_TEST_ARGS:-}"
|
|
||||||
flags+=" --logtostderr"
|
local flags="${NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS:-}"
|
||||||
flags+=" --system-log-monitors=${km_config},${dm_config}"
|
if [[ -z "${flags}" ]]; then
|
||||||
flags+=" --custom-plugin-monitors=${custom_km_config}"
|
local -r km_config="${KUBE_HOME}/node-problem-detector/config/kernel-monitor.json"
|
||||||
flags+=" --apiserver-override=https://${KUBERNETES_MASTER_NAME}?inClusterConfig=false&auth=/var/lib/node-problem-detector/kubeconfig"
|
# TODO(random-liu): Handle this for alternative container runtime.
|
||||||
local -r npd_port=${NODE_PROBLEM_DETECTOR_PORT:-20256}
|
local -r dm_config="${KUBE_HOME}/node-problem-detector/config/docker-monitor.json"
|
||||||
flags+=" --port=${npd_port}"
|
local -r custom_km_config="${KUBE_HOME}/node-problem-detector/config/kernel-monitor-counter.json,${KUBE_HOME}/node-problem-detector/config/systemd-monitor-counter.json,${KUBE_HOME}/node-problem-detector/config/docker-monitor-counter.json"
|
||||||
if [[ -n "${EXTRA_NPD_ARGS:-}" ]]; then
|
flags="${NPD_TEST_LOG_LEVEL:-"--v=2"} ${NPD_TEST_ARGS:-}"
|
||||||
flags+=" ${EXTRA_NPD_ARGS}"
|
flags+=" --logtostderr"
|
||||||
|
flags+=" --system-log-monitors=${km_config},${dm_config}"
|
||||||
|
flags+=" --custom-plugin-monitors=${custom_km_config}"
|
||||||
|
local -r npd_port=${NODE_PROBLEM_DETECTOR_PORT:-20256}
|
||||||
|
flags+=" --port=${npd_port}"
|
||||||
|
if [[ -n "${EXTRA_NPD_ARGS:-}" ]]; then
|
||||||
|
flags+=" ${EXTRA_NPD_ARGS}"
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
flags+=" --apiserver-override=https://${KUBERNETES_MASTER_NAME}?inClusterConfig=false&auth=/var/lib/node-problem-detector/kubeconfig"
|
||||||
|
|
||||||
# Write the systemd service file for node problem detector.
|
# Write the systemd service file for node problem detector.
|
||||||
cat <<EOF >/etc/systemd/system/node-problem-detector.service
|
cat <<EOF >/etc/systemd/system/node-problem-detector.service
|
||||||
|
@ -213,12 +213,12 @@ function install-node-problem-detector {
|
|||||||
local -r npd_tar="node-problem-detector-${npd_version}.tar.gz"
|
local -r npd_tar="node-problem-detector-${npd_version}.tar.gz"
|
||||||
|
|
||||||
if is-preloaded "${npd_tar}" "${npd_sha1}"; then
|
if is-preloaded "${npd_tar}" "${npd_sha1}"; then
|
||||||
echo "node-problem-detector is preloaded."
|
echo "${npd_tar} is preloaded."
|
||||||
return
|
return
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "Downloading node problem detector."
|
echo "Downloading ${npd_tar}."
|
||||||
local -r npd_release_path="https://storage.googleapis.com/kubernetes-release"
|
local -r npd_release_path="${NODE_PROBLEM_DETECTOR_RELEASE_PATH:-https://storage.googleapis.com/kubernetes-release}"
|
||||||
download-or-bust "${npd_sha1}" "${npd_release_path}/node-problem-detector/${npd_tar}"
|
download-or-bust "${npd_sha1}" "${npd_release_path}/node-problem-detector/${npd_tar}"
|
||||||
local -r npd_dir="${KUBE_HOME}/node-problem-detector"
|
local -r npd_dir="${KUBE_HOME}/node-problem-detector"
|
||||||
mkdir -p "${npd_dir}"
|
mkdir -p "${npd_dir}"
|
||||||
|
@ -1077,6 +1077,8 @@ ENABLE_CLUSTER_UI: $(yaml-quote ${ENABLE_CLUSTER_UI:-false})
|
|||||||
ENABLE_NODE_PROBLEM_DETECTOR: $(yaml-quote ${ENABLE_NODE_PROBLEM_DETECTOR:-none})
|
ENABLE_NODE_PROBLEM_DETECTOR: $(yaml-quote ${ENABLE_NODE_PROBLEM_DETECTOR:-none})
|
||||||
NODE_PROBLEM_DETECTOR_VERSION: $(yaml-quote ${NODE_PROBLEM_DETECTOR_VERSION:-})
|
NODE_PROBLEM_DETECTOR_VERSION: $(yaml-quote ${NODE_PROBLEM_DETECTOR_VERSION:-})
|
||||||
NODE_PROBLEM_DETECTOR_TAR_HASH: $(yaml-quote ${NODE_PROBLEM_DETECTOR_TAR_HASH:-})
|
NODE_PROBLEM_DETECTOR_TAR_HASH: $(yaml-quote ${NODE_PROBLEM_DETECTOR_TAR_HASH:-})
|
||||||
|
NODE_PROBLEM_DETECTOR_RELEASE_PATH: $(yaml-quote ${NODE_PROBLEM_DETECTOR_RELEASE_PATH:-})
|
||||||
|
NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS: $(yaml-quote ${NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS:-})
|
||||||
ENABLE_NODE_LOGGING: $(yaml-quote ${ENABLE_NODE_LOGGING:-false})
|
ENABLE_NODE_LOGGING: $(yaml-quote ${ENABLE_NODE_LOGGING:-false})
|
||||||
LOGGING_DESTINATION: $(yaml-quote ${LOGGING_DESTINATION:-})
|
LOGGING_DESTINATION: $(yaml-quote ${LOGGING_DESTINATION:-})
|
||||||
ELASTICSEARCH_LOGGING_REPLICAS: $(yaml-quote ${ELASTICSEARCH_LOGGING_REPLICAS:-})
|
ELASTICSEARCH_LOGGING_REPLICAS: $(yaml-quote ${ELASTICSEARCH_LOGGING_REPLICAS:-})
|
||||||
|
@ -34,6 +34,7 @@ image_service_endpoint=${IMAGE_SERVICE_ENDPOINT:-""}
|
|||||||
run_until_failure=${RUN_UNTIL_FAILURE:-"false"}
|
run_until_failure=${RUN_UNTIL_FAILURE:-"false"}
|
||||||
test_args=${TEST_ARGS:-""}
|
test_args=${TEST_ARGS:-""}
|
||||||
system_spec_name=${SYSTEM_SPEC_NAME:-}
|
system_spec_name=${SYSTEM_SPEC_NAME:-}
|
||||||
|
extra_envs=${EXTRA_ENVS:-}
|
||||||
|
|
||||||
# Parse the flags to pass to ginkgo
|
# Parse the flags to pass to ginkgo
|
||||||
ginkgoflags=""
|
ginkgoflags=""
|
||||||
@ -148,7 +149,7 @@ if [ ${remote} = true ] ; then
|
|||||||
--image-project="${image_project}" --instance-name-prefix="${instance_prefix}" \
|
--image-project="${image_project}" --instance-name-prefix="${instance_prefix}" \
|
||||||
--delete-instances="${delete_instances}" --test_args="${test_args}" --instance-metadata="${metadata}" \
|
--delete-instances="${delete_instances}" --test_args="${test_args}" --instance-metadata="${metadata}" \
|
||||||
--image-config-file="${image_config_file}" --system-spec-name="${system_spec_name}" \
|
--image-config-file="${image_config_file}" --system-spec-name="${system_spec_name}" \
|
||||||
--test-suite="${test_suite}" \
|
--extra-envs="${extra_envs}" --test-suite="${test_suite}" \
|
||||||
2>&1 | tee -i "${artifacts}/build-log.txt"
|
2>&1 | tee -i "${artifacts}/build-log.txt"
|
||||||
exit $?
|
exit $?
|
||||||
|
|
||||||
@ -169,8 +170,8 @@ else
|
|||||||
# Test using the host the script was run on
|
# Test using the host the script was run on
|
||||||
# Provided for backwards compatibility
|
# Provided for backwards compatibility
|
||||||
go run test/e2e_node/runner/local/run_local.go \
|
go run test/e2e_node/runner/local/run_local.go \
|
||||||
--system-spec-name="${system_spec_name}" --ginkgo-flags="${ginkgoflags}" \
|
--system-spec-name="${system_spec_name}" --extra-envs="${extra_envs}" \
|
||||||
--test-flags="--container-runtime=${runtime} \
|
--ginkgo-flags="${ginkgoflags}" --test-flags="--container-runtime=${runtime} \
|
||||||
--alsologtostderr --v 4 --report-dir=${artifacts} --node-name $(hostname) \
|
--alsologtostderr --v 4 --report-dir=${artifacts} --node-name $(hostname) \
|
||||||
${test_args}" --build-dependencies=true 2>&1 | tee -i "${artifacts}/build-log.txt"
|
${test_args}" --build-dependencies=true 2>&1 | tee -i "${artifacts}/build-log.txt"
|
||||||
exit $?
|
exit $?
|
||||||
|
@ -193,6 +193,8 @@ type NodeTestContextType struct {
|
|||||||
// the node e2e test. If empty, the default one (system.DefaultSpec) is
|
// the node e2e test. If empty, the default one (system.DefaultSpec) is
|
||||||
// used. The system specs are in test/e2e_node/system/specs/.
|
// used. The system specs are in test/e2e_node/system/specs/.
|
||||||
SystemSpecName string
|
SystemSpecName string
|
||||||
|
// ExtraEnvs is a map of environment names to values.
|
||||||
|
ExtraEnvs map[string]string
|
||||||
}
|
}
|
||||||
|
|
||||||
type CloudConfig struct {
|
type CloudConfig struct {
|
||||||
@ -332,6 +334,7 @@ func RegisterNodeFlags() {
|
|||||||
flag.BoolVar(&TestContext.PrepullImages, "prepull-images", true, "If true, prepull images so image pull failures do not cause test failures.")
|
flag.BoolVar(&TestContext.PrepullImages, "prepull-images", true, "If true, prepull images so image pull failures do not cause test failures.")
|
||||||
flag.StringVar(&TestContext.ImageDescription, "image-description", "", "The description of the image which the test will be running on.")
|
flag.StringVar(&TestContext.ImageDescription, "image-description", "", "The description of the image which the test will be running on.")
|
||||||
flag.StringVar(&TestContext.SystemSpecName, "system-spec-name", "", "The name of the system spec (e.g., gke) that's used in the node e2e test. The system specs are in test/e2e_node/system/specs/. This is used by the test framework to determine which tests to run for validating the system requirements.")
|
flag.StringVar(&TestContext.SystemSpecName, "system-spec-name", "", "The name of the system spec (e.g., gke) that's used in the node e2e test. The system specs are in test/e2e_node/system/specs/. This is used by the test framework to determine which tests to run for validating the system requirements.")
|
||||||
|
flag.Var(cliflag.NewMapStringString(&TestContext.ExtraEnvs), "extra-envs", "The extra environment variables needed for node e2e tests. Format: a list of key=value pairs, e.g., env1=val1,env2=val2")
|
||||||
}
|
}
|
||||||
|
|
||||||
// HandleFlags sets up all flags and parses the command line.
|
// HandleFlags sets up all flags and parses the command line.
|
||||||
|
@ -10,6 +10,7 @@ go_library(
|
|||||||
"kubelet.go",
|
"kubelet.go",
|
||||||
"kubelet_perf.go",
|
"kubelet_perf.go",
|
||||||
"mount_propagation.go",
|
"mount_propagation.go",
|
||||||
|
"node_problem_detector.go",
|
||||||
"pod_gc.go",
|
"pod_gc.go",
|
||||||
"pods.go",
|
"pods.go",
|
||||||
"pre_stop.go",
|
"pre_stop.go",
|
||||||
|
232
test/e2e/node/node_problem_detector.go
Normal file
232
test/e2e/node/node_problem_detector.go
Normal file
@ -0,0 +1,232 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2019 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package node
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"sort"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"k8s.io/api/core/v1"
|
||||||
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
|
"k8s.io/apimachinery/pkg/fields"
|
||||||
|
"k8s.io/kubernetes/test/e2e/framework"
|
||||||
|
testutils "k8s.io/kubernetes/test/utils"
|
||||||
|
|
||||||
|
. "github.com/onsi/ginkgo"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
)
|
||||||
|
|
||||||
|
// This test checks if node-problem-detector (NPD) runs fine without error on
|
||||||
|
// the nodes in the cluster. NPD's functionality is tested in e2e_node tests.
|
||||||
|
var _ = SIGDescribe("NodeProblemDetector", func() {
|
||||||
|
const (
|
||||||
|
pollInterval = 1 * time.Second
|
||||||
|
pollTimeout = 1 * time.Minute
|
||||||
|
)
|
||||||
|
f := framework.NewDefaultFramework("node-problem-detector")
|
||||||
|
|
||||||
|
BeforeEach(func() {
|
||||||
|
framework.SkipUnlessSSHKeyPresent()
|
||||||
|
framework.SkipUnlessProviderIs(framework.ProvidersWithSSH...)
|
||||||
|
framework.SkipUnlessProviderIs("gce", "gke")
|
||||||
|
framework.SkipUnlessNodeOSDistroIs("gci", "ubuntu")
|
||||||
|
framework.WaitForAllNodesHealthy(f.ClientSet, time.Minute)
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should run without error", func() {
|
||||||
|
By("Getting all nodes' SSH-able IP addresses")
|
||||||
|
hosts, err := framework.NodeSSHHosts(f.ClientSet)
|
||||||
|
if err != nil {
|
||||||
|
framework.Failf("Error getting node hostnames: %v", err)
|
||||||
|
}
|
||||||
|
Expect(len(hosts)).NotTo(BeZero())
|
||||||
|
|
||||||
|
cpuUsageStats := make(map[string][]float64)
|
||||||
|
uptimeStats := make(map[string][]float64)
|
||||||
|
rssStats := make(map[string][]float64)
|
||||||
|
workingSetStats := make(map[string][]float64)
|
||||||
|
|
||||||
|
for _, host := range hosts {
|
||||||
|
cpuUsageStats[host] = []float64{}
|
||||||
|
uptimeStats[host] = []float64{}
|
||||||
|
rssStats[host] = []float64{}
|
||||||
|
workingSetStats[host] = []float64{}
|
||||||
|
|
||||||
|
By(fmt.Sprintf("Check node %q has node-problem-detector process", host))
|
||||||
|
psCmd := "ps aux | grep node-problem-detector"
|
||||||
|
result, err := framework.SSH(psCmd, host, framework.TestContext.Provider)
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
Expect(result.Code).To(BeZero())
|
||||||
|
Expect(result.Stdout).To(ContainSubstring("/home/kubernetes/bin/node-problem-detector"))
|
||||||
|
|
||||||
|
By(fmt.Sprintf("Check node-problem-detector is running fine on node %q", host))
|
||||||
|
journalctlCmd := "sudo journalctl -u node-problem-detector"
|
||||||
|
result, err = framework.SSH(journalctlCmd, host, framework.TestContext.Provider)
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
Expect(result.Code).To(BeZero())
|
||||||
|
Expect(result.Stdout).NotTo(ContainSubstring("node-problem-detector.service: Failed"))
|
||||||
|
|
||||||
|
cpuUsage, uptime := getCpuStat(f, host)
|
||||||
|
cpuUsageStats[host] = append(cpuUsageStats[host], cpuUsage)
|
||||||
|
uptimeStats[host] = append(uptimeStats[host], uptime)
|
||||||
|
|
||||||
|
By(fmt.Sprintf("Inject log to trigger AUFSUmountHung on node %q", host))
|
||||||
|
log := "INFO: task umount.aufs:21568 blocked for more than 120 seconds."
|
||||||
|
injectLogCmd := "sudo sh -c \"echo 'kernel: " + log + "' >> /dev/kmsg\""
|
||||||
|
_, err = framework.SSH(injectLogCmd, host, framework.TestContext.Provider)
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
Expect(result.Code).To(BeZero())
|
||||||
|
}
|
||||||
|
|
||||||
|
By("Check node-problem-detector can post conditions and events to API server")
|
||||||
|
nodes := framework.GetReadySchedulableNodesOrDie(f.ClientSet)
|
||||||
|
Expect(len(nodes.Items)).To(Equal(len(hosts)))
|
||||||
|
for _, node := range nodes.Items {
|
||||||
|
By(fmt.Sprintf("Check node-problem-detector posted KernelDeadlock condition on node %q", node.Name))
|
||||||
|
Eventually(func() error {
|
||||||
|
return verifyNodeCondition(f, "KernelDeadlock", v1.ConditionTrue, "AUFSUmountHung", node.Name)
|
||||||
|
}, pollTimeout, pollInterval).Should(Succeed())
|
||||||
|
|
||||||
|
By(fmt.Sprintf("Check node-problem-detector posted AUFSUmountHung event on node %q", node.Name))
|
||||||
|
eventListOptions := metav1.ListOptions{FieldSelector: fields.Set{"involvedObject.kind": "Node"}.AsSelector().String()}
|
||||||
|
Eventually(func() error {
|
||||||
|
return verifyEvents(f, eventListOptions, 1, "AUFSUmountHung", node.Name)
|
||||||
|
}, pollTimeout, pollInterval).Should(Succeed())
|
||||||
|
}
|
||||||
|
|
||||||
|
By("Gather node-problem-detector cpu and memory stats")
|
||||||
|
numIterations := 60
|
||||||
|
for i := 1; i <= numIterations; i++ {
|
||||||
|
for _, host := range hosts {
|
||||||
|
rss, workingSet := getMemoryStat(f, host)
|
||||||
|
rssStats[host] = append(rssStats[host], rss)
|
||||||
|
workingSetStats[host] = append(workingSetStats[host], workingSet)
|
||||||
|
if i == numIterations {
|
||||||
|
cpuUsage, uptime := getCpuStat(f, host)
|
||||||
|
cpuUsageStats[host] = append(cpuUsageStats[host], cpuUsage)
|
||||||
|
uptimeStats[host] = append(uptimeStats[host], uptime)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
time.Sleep(time.Second)
|
||||||
|
}
|
||||||
|
|
||||||
|
cpuStatsMsg := "CPU (core):"
|
||||||
|
rssStatsMsg := "RSS (MB):"
|
||||||
|
workingSetStatsMsg := "WorkingSet (MB):"
|
||||||
|
for i, host := range hosts {
|
||||||
|
cpuUsage := cpuUsageStats[host][1] - cpuUsageStats[host][0]
|
||||||
|
totaltime := uptimeStats[host][1] - uptimeStats[host][0]
|
||||||
|
cpuStatsMsg += fmt.Sprintf(" Node%d[%.3f];", i, cpuUsage/totaltime)
|
||||||
|
|
||||||
|
sort.Float64s(rssStats[host])
|
||||||
|
rssStatsMsg += fmt.Sprintf(" Node%d[%.1f|%.1f|%.1f];", i,
|
||||||
|
rssStats[host][0], rssStats[host][len(rssStats[host])/2], rssStats[host][len(rssStats[host])-1])
|
||||||
|
|
||||||
|
sort.Float64s(workingSetStats[host])
|
||||||
|
workingSetStatsMsg += fmt.Sprintf(" Node%d[%.1f|%.1f|%.1f];", i,
|
||||||
|
workingSetStats[host][0], workingSetStats[host][len(workingSetStats[host])/2], workingSetStats[host][len(workingSetStats[host])-1])
|
||||||
|
}
|
||||||
|
framework.Logf("Node-Problem-Detector CPU and Memory Stats:\n\t%s\n\t%s\n\t%s", cpuStatsMsg, rssStatsMsg, workingSetStatsMsg)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
func verifyEvents(f *framework.Framework, options metav1.ListOptions, num int, reason, nodeName string) error {
|
||||||
|
events, err := f.ClientSet.CoreV1().Events(metav1.NamespaceDefault).List(options)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
count := 0
|
||||||
|
for _, event := range events.Items {
|
||||||
|
if event.Reason != reason || event.Source.Host != nodeName {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
count += int(event.Count)
|
||||||
|
}
|
||||||
|
if count != num {
|
||||||
|
return fmt.Errorf("expect event number %d, got %d: %v", num, count, events.Items)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func verifyNodeCondition(f *framework.Framework, condition v1.NodeConditionType, status v1.ConditionStatus, reason, nodeName string) error {
|
||||||
|
node, err := f.ClientSet.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
_, c := testutils.GetNodeCondition(&node.Status, condition)
|
||||||
|
if c == nil {
|
||||||
|
return fmt.Errorf("node condition %q not found", condition)
|
||||||
|
}
|
||||||
|
if c.Status != status || c.Reason != reason {
|
||||||
|
return fmt.Errorf("unexpected node condition %q: %+v", condition, c)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getMemoryStat(f *framework.Framework, host string) (rss, workingSet float64) {
|
||||||
|
memCmd := "cat /sys/fs/cgroup/memory/system.slice/node-problem-detector.service/memory.usage_in_bytes && cat /sys/fs/cgroup/memory/system.slice/node-problem-detector.service/memory.stat"
|
||||||
|
result, err := framework.SSH(memCmd, host, framework.TestContext.Provider)
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
Expect(result.Code).To(BeZero())
|
||||||
|
lines := strings.Split(result.Stdout, "\n")
|
||||||
|
|
||||||
|
memoryUsage, err := strconv.ParseFloat(lines[0], 64)
|
||||||
|
Expect(err).To(BeNil())
|
||||||
|
|
||||||
|
var totalInactiveFile float64
|
||||||
|
for _, line := range lines[1:] {
|
||||||
|
tokens := strings.Split(line, " ")
|
||||||
|
if tokens[0] == "total_rss" {
|
||||||
|
rss, err = strconv.ParseFloat(tokens[1], 64)
|
||||||
|
Expect(err).To(BeNil())
|
||||||
|
}
|
||||||
|
if tokens[0] == "total_inactive_file" {
|
||||||
|
totalInactiveFile, err = strconv.ParseFloat(tokens[1], 64)
|
||||||
|
Expect(err).To(BeNil())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
workingSet = memoryUsage
|
||||||
|
if workingSet < totalInactiveFile {
|
||||||
|
workingSet = 0
|
||||||
|
} else {
|
||||||
|
workingSet -= totalInactiveFile
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert to MB
|
||||||
|
rss = rss / 1024 / 1024
|
||||||
|
workingSet = workingSet / 1024 / 1024
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func getCpuStat(f *framework.Framework, host string) (usage, uptime float64) {
|
||||||
|
cpuCmd := "cat /sys/fs/cgroup/cpu/system.slice/node-problem-detector.service/cpuacct.usage && cat /proc/uptime | awk '{print $1}'"
|
||||||
|
result, err := framework.SSH(cpuCmd, host, framework.TestContext.Provider)
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
Expect(result.Code).To(BeZero())
|
||||||
|
lines := strings.Split(result.Stdout, "\n")
|
||||||
|
|
||||||
|
usage, err = strconv.ParseFloat(lines[0], 64)
|
||||||
|
uptime, err = strconv.ParseFloat(lines[1], 64)
|
||||||
|
|
||||||
|
// Convert from nanoseconds to seconds
|
||||||
|
usage *= 1e-9
|
||||||
|
return
|
||||||
|
}
|
@ -27,12 +27,14 @@ COPY_SYSTEM_SPEC_FILE
|
|||||||
# REPORT_PATH is the path in the container to save test result and logs.
|
# REPORT_PATH is the path in the container to save test result and logs.
|
||||||
# FLAKE_ATTEMPTS is the time to retry when there is a test failure. By default 2.
|
# FLAKE_ATTEMPTS is the time to retry when there is a test failure. By default 2.
|
||||||
# TEST_ARGS is the test arguments passed into the test.
|
# TEST_ARGS is the test arguments passed into the test.
|
||||||
|
# EXTRA_ENVS is the extra environment variables needed for node e2e tests.
|
||||||
ENV FOCUS="\[Conformance\]" \
|
ENV FOCUS="\[Conformance\]" \
|
||||||
SKIP="\[Flaky\]|\[Serial\]" \
|
SKIP="\[Flaky\]|\[Serial\]" \
|
||||||
PARALLELISM=8 \
|
PARALLELISM=8 \
|
||||||
REPORT_PATH="/var/result" \
|
REPORT_PATH="/var/result" \
|
||||||
FLAKE_ATTEMPTS=2 \
|
FLAKE_ATTEMPTS=2 \
|
||||||
TEST_ARGS=""
|
TEST_ARGS="" \
|
||||||
|
EXTRA_ENVS=""
|
||||||
|
|
||||||
ENTRYPOINT ginkgo --focus="$FOCUS" \
|
ENTRYPOINT ginkgo --focus="$FOCUS" \
|
||||||
--skip="$SKIP" \
|
--skip="$SKIP" \
|
||||||
@ -46,4 +48,5 @@ ENTRYPOINT ginkgo --focus="$FOCUS" \
|
|||||||
--system-spec-name=SYSTEM_SPEC_NAME \
|
--system-spec-name=SYSTEM_SPEC_NAME \
|
||||||
# This is a placeholder that will be substituted in the Makefile.
|
# This is a placeholder that will be substituted in the Makefile.
|
||||||
--system-spec-file=SYSTEM_SPEC_FILE_PATH \
|
--system-spec-file=SYSTEM_SPEC_FILE_PATH \
|
||||||
|
--extra-envs=$EXTRA_ENVS \
|
||||||
$TEST_ARGS
|
$TEST_ARGS
|
||||||
|
@ -77,6 +77,7 @@ func TestMain(m *testing.M) {
|
|||||||
rand.Seed(time.Now().UnixNano())
|
rand.Seed(time.Now().UnixNano())
|
||||||
pflag.Parse()
|
pflag.Parse()
|
||||||
framework.AfterReadingAllFlags(&framework.TestContext)
|
framework.AfterReadingAllFlags(&framework.TestContext)
|
||||||
|
setExtraEnvs()
|
||||||
os.Exit(m.Run())
|
os.Exit(m.Run())
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -146,6 +147,7 @@ var _ = SynchronizedBeforeSuite(func() []byte {
|
|||||||
// This helps with debugging test flakes since it is hard to tell when a test failure is due to image pulling.
|
// This helps with debugging test flakes since it is hard to tell when a test failure is due to image pulling.
|
||||||
if framework.TestContext.PrepullImages {
|
if framework.TestContext.PrepullImages {
|
||||||
klog.Infof("Pre-pulling images so that they are cached for the tests.")
|
klog.Infof("Pre-pulling images so that they are cached for the tests.")
|
||||||
|
updateImageWhiteList()
|
||||||
err := PrePullAllImages()
|
err := PrePullAllImages()
|
||||||
Expect(err).ShouldNot(HaveOccurred())
|
Expect(err).ShouldNot(HaveOccurred())
|
||||||
}
|
}
|
||||||
@ -244,6 +246,9 @@ func waitForNodeReady() {
|
|||||||
// TODO(random-liu): Using dynamic kubelet configuration feature to
|
// TODO(random-liu): Using dynamic kubelet configuration feature to
|
||||||
// update test context with node configuration.
|
// update test context with node configuration.
|
||||||
func updateTestContext() error {
|
func updateTestContext() error {
|
||||||
|
setExtraEnvs()
|
||||||
|
updateImageWhiteList()
|
||||||
|
|
||||||
client, err := getAPIServerClient()
|
client, err := getAPIServerClient()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to get apiserver client: %v", err)
|
return fmt.Errorf("failed to get apiserver client: %v", err)
|
||||||
@ -261,7 +266,7 @@ func updateTestContext() error {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to get kubelet configuration: %v", err)
|
return fmt.Errorf("failed to get kubelet configuration: %v", err)
|
||||||
}
|
}
|
||||||
framework.TestContext.KubeletConfig = *kubeletCfg // Set kubelet config.
|
framework.TestContext.KubeletConfig = *kubeletCfg // Set kubelet config
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -319,3 +324,9 @@ func isNodeReady(node *v1.Node) bool {
|
|||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func setExtraEnvs() {
|
||||||
|
for name, value := range framework.TestContext.ExtraEnvs {
|
||||||
|
os.Setenv(name, value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -18,6 +18,7 @@ package e2e_node
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"os/user"
|
"os/user"
|
||||||
"time"
|
"time"
|
||||||
@ -46,7 +47,6 @@ var NodeImageWhiteList = sets.NewString(
|
|||||||
"k8s.gcr.io/stress:v1",
|
"k8s.gcr.io/stress:v1",
|
||||||
busyboxImage,
|
busyboxImage,
|
||||||
"k8s.gcr.io/busybox@sha256:4bdd623e848417d96127e16037743f0cd8b528c026e9175e22a84f639eca58ff",
|
"k8s.gcr.io/busybox@sha256:4bdd623e848417d96127e16037743f0cd8b528c026e9175e22a84f639eca58ff",
|
||||||
"k8s.gcr.io/node-problem-detector:v0.4.1",
|
|
||||||
imageutils.GetE2EImage(imageutils.Nginx),
|
imageutils.GetE2EImage(imageutils.Nginx),
|
||||||
imageutils.GetE2EImage(imageutils.ServeHostname),
|
imageutils.GetE2EImage(imageutils.ServeHostname),
|
||||||
imageutils.GetE2EImage(imageutils.Netexec),
|
imageutils.GetE2EImage(imageutils.Netexec),
|
||||||
@ -58,9 +58,24 @@ var NodeImageWhiteList = sets.NewString(
|
|||||||
"gcr.io/kubernetes-e2e-test-images/node-perf/tf-wide-deep-amd64:1.0",
|
"gcr.io/kubernetes-e2e-test-images/node-perf/tf-wide-deep-amd64:1.0",
|
||||||
)
|
)
|
||||||
|
|
||||||
func init() {
|
// updateImageWhiteList updates the framework.ImageWhiteList with
|
||||||
|
// 1. the hard coded lists
|
||||||
|
// 2. the ones passed in from framework.TestContext.ExtraEnvs
|
||||||
|
// So this function needs to be called after the extra envs are applied.
|
||||||
|
func updateImageWhiteList() {
|
||||||
// Union NodeImageWhiteList and CommonImageWhiteList into the framework image white list.
|
// Union NodeImageWhiteList and CommonImageWhiteList into the framework image white list.
|
||||||
framework.ImageWhiteList = NodeImageWhiteList.Union(commontest.CommonImageWhiteList)
|
framework.ImageWhiteList = NodeImageWhiteList.Union(commontest.CommonImageWhiteList)
|
||||||
|
// Images from extra envs
|
||||||
|
framework.ImageWhiteList.Insert(getNodeProblemDetectorImage())
|
||||||
|
}
|
||||||
|
|
||||||
|
func getNodeProblemDetectorImage() string {
|
||||||
|
const defaultImage string = "k8s.gcr.io/node-problem-detector:v0.6.2"
|
||||||
|
image := os.Getenv("NODE_PROBLEM_DETECTOR_IMAGE")
|
||||||
|
if image == "" {
|
||||||
|
image = defaultImage
|
||||||
|
}
|
||||||
|
return image
|
||||||
}
|
}
|
||||||
|
|
||||||
// puller represents a generic image puller
|
// puller represents a generic image puller
|
||||||
|
@ -40,4 +40,5 @@ go run test/e2e_node/runner/remote/run_remote.go --test-suite=conformance \
|
|||||||
--results-dir="$ARTIFACTS" --test-timeout="$TIMEOUT" \
|
--results-dir="$ARTIFACTS" --test-timeout="$TIMEOUT" \
|
||||||
--test_args="--kubelet-flags=\"$KUBELET_ARGS\"" \
|
--test_args="--kubelet-flags=\"$KUBELET_ARGS\"" \
|
||||||
--instance-metadata="$GCE_INSTANCE_METADATA" \
|
--instance-metadata="$GCE_INSTANCE_METADATA" \
|
||||||
--system-spec-name="$SYSTEM_SPEC_NAME"
|
--system-spec-name="$SYSTEM_SPEC_NAME" \
|
||||||
|
--extra-envs="$EXTRA_ENVS"
|
||||||
|
@ -47,4 +47,5 @@ go run test/e2e_node/runner/remote/run_remote.go --logtostderr --vmodule=*=4 \
|
|||||||
--image-config-file="$GCE_IMAGE_CONFIG_PATH" --cleanup="$CLEANUP" \
|
--image-config-file="$GCE_IMAGE_CONFIG_PATH" --cleanup="$CLEANUP" \
|
||||||
--results-dir="$ARTIFACTS" --ginkgo-flags="--nodes=$PARALLELISM $GINKGO_FLAGS" \
|
--results-dir="$ARTIFACTS" --ginkgo-flags="--nodes=$PARALLELISM $GINKGO_FLAGS" \
|
||||||
--test-timeout="$TIMEOUT" --test_args="$TEST_ARGS --kubelet-flags=\"$KUBELET_ARGS\"" \
|
--test-timeout="$TIMEOUT" --test_args="$TEST_ARGS --kubelet-flags=\"$KUBELET_ARGS\"" \
|
||||||
--instance-metadata="$GCE_INSTANCE_METADATA" --system-spec-name="$SYSTEM_SPEC_NAME"
|
--instance-metadata="$GCE_INSTANCE_METADATA" --system-spec-name="$SYSTEM_SPEC_NAME" \
|
||||||
|
--extra-envs="$EXTRA_ENVS"
|
||||||
|
@ -45,13 +45,14 @@ var _ = framework.KubeDescribe("NodeProblemDetector [NodeFeature:NodeProblemDete
|
|||||||
pollInterval = 1 * time.Second
|
pollInterval = 1 * time.Second
|
||||||
pollConsistent = 5 * time.Second
|
pollConsistent = 5 * time.Second
|
||||||
pollTimeout = 1 * time.Minute
|
pollTimeout = 1 * time.Minute
|
||||||
image = "k8s.gcr.io/node-problem-detector:v0.4.1"
|
|
||||||
)
|
)
|
||||||
f := framework.NewDefaultFramework("node-problem-detector")
|
f := framework.NewDefaultFramework("node-problem-detector")
|
||||||
var c clientset.Interface
|
var c clientset.Interface
|
||||||
var uid string
|
var uid string
|
||||||
var ns, name, configName, eventNamespace string
|
var ns, name, configName, eventNamespace string
|
||||||
var bootTime, nodeTime time.Time
|
var bootTime, nodeTime time.Time
|
||||||
|
var image string
|
||||||
|
|
||||||
BeforeEach(func() {
|
BeforeEach(func() {
|
||||||
c = f.ClientSet
|
c = f.ClientSet
|
||||||
ns = f.Namespace.Name
|
ns = f.Namespace.Name
|
||||||
@ -60,6 +61,8 @@ var _ = framework.KubeDescribe("NodeProblemDetector [NodeFeature:NodeProblemDete
|
|||||||
configName = "node-problem-detector-config-" + uid
|
configName = "node-problem-detector-config-" + uid
|
||||||
// There is no namespace for Node, event recorder will set default namespace for node events.
|
// There is no namespace for Node, event recorder will set default namespace for node events.
|
||||||
eventNamespace = metav1.NamespaceDefault
|
eventNamespace = metav1.NamespaceDefault
|
||||||
|
image = getNodeProblemDetectorImage()
|
||||||
|
By(fmt.Sprintf("Using node-problem-detector image: %s", image))
|
||||||
})
|
})
|
||||||
|
|
||||||
// Test system log monitor. We may add other tests if we have more problem daemons in the future.
|
// Test system log monitor. We may add other tests if we have more problem daemons in the future.
|
||||||
@ -245,7 +248,8 @@ var _ = framework.KubeDescribe("NodeProblemDetector [NodeFeature:NodeProblemDete
|
|||||||
timestamp time.Time
|
timestamp time.Time
|
||||||
message string
|
message string
|
||||||
messageNum int
|
messageNum int
|
||||||
events int
|
tempEvents int // Events for temp errors
|
||||||
|
totalEvents int // Events for both temp errors and condition changes
|
||||||
conditionReason string
|
conditionReason string
|
||||||
conditionMessage string
|
conditionMessage string
|
||||||
conditionType v1.ConditionStatus
|
conditionType v1.ConditionStatus
|
||||||
@ -279,7 +283,8 @@ var _ = framework.KubeDescribe("NodeProblemDetector [NodeFeature:NodeProblemDete
|
|||||||
timestamp: nodeTime,
|
timestamp: nodeTime,
|
||||||
message: tempMessage,
|
message: tempMessage,
|
||||||
messageNum: 3,
|
messageNum: 3,
|
||||||
events: 3,
|
tempEvents: 3,
|
||||||
|
totalEvents: 3,
|
||||||
conditionReason: defaultReason,
|
conditionReason: defaultReason,
|
||||||
conditionMessage: defaultMessage,
|
conditionMessage: defaultMessage,
|
||||||
conditionType: v1.ConditionFalse,
|
conditionType: v1.ConditionFalse,
|
||||||
@ -289,7 +294,8 @@ var _ = framework.KubeDescribe("NodeProblemDetector [NodeFeature:NodeProblemDete
|
|||||||
timestamp: nodeTime,
|
timestamp: nodeTime,
|
||||||
message: permMessage1,
|
message: permMessage1,
|
||||||
messageNum: 1,
|
messageNum: 1,
|
||||||
events: 3, // event number should not change
|
tempEvents: 3, // event number for temp errors should not change
|
||||||
|
totalEvents: 4, // add 1 event for condition change
|
||||||
conditionReason: permReason1,
|
conditionReason: permReason1,
|
||||||
conditionMessage: permMessage1,
|
conditionMessage: permMessage1,
|
||||||
conditionType: v1.ConditionTrue,
|
conditionType: v1.ConditionTrue,
|
||||||
@ -299,7 +305,8 @@ var _ = framework.KubeDescribe("NodeProblemDetector [NodeFeature:NodeProblemDete
|
|||||||
timestamp: nodeTime.Add(5 * time.Minute),
|
timestamp: nodeTime.Add(5 * time.Minute),
|
||||||
message: tempMessage,
|
message: tempMessage,
|
||||||
messageNum: 3,
|
messageNum: 3,
|
||||||
events: 6,
|
tempEvents: 6, // add 3 events for temp errors
|
||||||
|
totalEvents: 7, // add 3 events for temp errors
|
||||||
conditionReason: permReason1,
|
conditionReason: permReason1,
|
||||||
conditionMessage: permMessage1,
|
conditionMessage: permMessage1,
|
||||||
conditionType: v1.ConditionTrue,
|
conditionType: v1.ConditionTrue,
|
||||||
@ -309,7 +316,8 @@ var _ = framework.KubeDescribe("NodeProblemDetector [NodeFeature:NodeProblemDete
|
|||||||
timestamp: nodeTime.Add(5 * time.Minute),
|
timestamp: nodeTime.Add(5 * time.Minute),
|
||||||
message: permMessage1 + "different message",
|
message: permMessage1 + "different message",
|
||||||
messageNum: 1,
|
messageNum: 1,
|
||||||
events: 6, // event number should not change
|
tempEvents: 6, // event number should not change
|
||||||
|
totalEvents: 7, // event number should not change
|
||||||
conditionReason: permReason1,
|
conditionReason: permReason1,
|
||||||
conditionMessage: permMessage1,
|
conditionMessage: permMessage1,
|
||||||
conditionType: v1.ConditionTrue,
|
conditionType: v1.ConditionTrue,
|
||||||
@ -319,7 +327,8 @@ var _ = framework.KubeDescribe("NodeProblemDetector [NodeFeature:NodeProblemDete
|
|||||||
timestamp: nodeTime.Add(5 * time.Minute),
|
timestamp: nodeTime.Add(5 * time.Minute),
|
||||||
message: permMessage2,
|
message: permMessage2,
|
||||||
messageNum: 1,
|
messageNum: 1,
|
||||||
events: 6, // event number should not change
|
tempEvents: 6, // event number for temp errors should not change
|
||||||
|
totalEvents: 8, // add 1 event for condition change
|
||||||
conditionReason: permReason2,
|
conditionReason: permReason2,
|
||||||
conditionMessage: permMessage2,
|
conditionMessage: permMessage2,
|
||||||
conditionType: v1.ConditionTrue,
|
conditionType: v1.ConditionTrue,
|
||||||
@ -332,13 +341,17 @@ var _ = framework.KubeDescribe("NodeProblemDetector [NodeFeature:NodeProblemDete
|
|||||||
Expect(err).NotTo(HaveOccurred())
|
Expect(err).NotTo(HaveOccurred())
|
||||||
}
|
}
|
||||||
|
|
||||||
By(fmt.Sprintf("Wait for %d events generated", test.events))
|
By(fmt.Sprintf("Wait for %d temp events generated", test.tempEvents))
|
||||||
Eventually(func() error {
|
Eventually(func() error {
|
||||||
return verifyEvents(c.CoreV1().Events(eventNamespace), eventListOptions, test.events, tempReason, tempMessage)
|
return verifyEvents(c.CoreV1().Events(eventNamespace), eventListOptions, test.tempEvents, tempReason, tempMessage)
|
||||||
}, pollTimeout, pollInterval).Should(Succeed())
|
}, pollTimeout, pollInterval).Should(Succeed())
|
||||||
By(fmt.Sprintf("Make sure only %d events generated", test.events))
|
By(fmt.Sprintf("Wait for %d total events generated", test.totalEvents))
|
||||||
|
Eventually(func() error {
|
||||||
|
return verifyTotalEvents(c.CoreV1().Events(eventNamespace), eventListOptions, test.totalEvents)
|
||||||
|
}, pollTimeout, pollInterval).Should(Succeed())
|
||||||
|
By(fmt.Sprintf("Make sure only %d total events generated", test.totalEvents))
|
||||||
Consistently(func() error {
|
Consistently(func() error {
|
||||||
return verifyEvents(c.CoreV1().Events(eventNamespace), eventListOptions, test.events, tempReason, tempMessage)
|
return verifyTotalEvents(c.CoreV1().Events(eventNamespace), eventListOptions, test.totalEvents)
|
||||||
}, pollConsistent, pollInterval).Should(Succeed())
|
}, pollConsistent, pollInterval).Should(Succeed())
|
||||||
|
|
||||||
By(fmt.Sprintf("Make sure node condition %q is set", condition))
|
By(fmt.Sprintf("Make sure node condition %q is set", condition))
|
||||||
@ -390,7 +403,7 @@ func injectLog(file string, timestamp time.Time, log string, num int) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// verifyEvents verifies there are num specific events generated
|
// verifyEvents verifies there are num specific events generated with given reason and message.
|
||||||
func verifyEvents(e coreclientset.EventInterface, options metav1.ListOptions, num int, reason, message string) error {
|
func verifyEvents(e coreclientset.EventInterface, options metav1.ListOptions, num int, reason, message string) error {
|
||||||
events, err := e.List(options)
|
events, err := e.List(options)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -399,7 +412,7 @@ func verifyEvents(e coreclientset.EventInterface, options metav1.ListOptions, nu
|
|||||||
count := 0
|
count := 0
|
||||||
for _, event := range events.Items {
|
for _, event := range events.Items {
|
||||||
if event.Reason != reason || event.Message != message {
|
if event.Reason != reason || event.Message != message {
|
||||||
return fmt.Errorf("unexpected event: %v", event)
|
continue
|
||||||
}
|
}
|
||||||
count += int(event.Count)
|
count += int(event.Count)
|
||||||
}
|
}
|
||||||
@ -409,14 +422,18 @@ func verifyEvents(e coreclientset.EventInterface, options metav1.ListOptions, nu
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// verifyNoEvents verifies there is no event generated
|
// verifyTotalEvents verifies there are num events in total.
|
||||||
func verifyNoEvents(e coreclientset.EventInterface, options metav1.ListOptions) error {
|
func verifyTotalEvents(e coreclientset.EventInterface, options metav1.ListOptions, num int) error {
|
||||||
events, err := e.List(options)
|
events, err := e.List(options)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if len(events.Items) != 0 {
|
count := 0
|
||||||
return fmt.Errorf("unexpected events: %v", events.Items)
|
for _, event := range events.Items {
|
||||||
|
count += int(event.Count)
|
||||||
|
}
|
||||||
|
if count != num {
|
||||||
|
return fmt.Errorf("expect event number %d, got %d: %v", num, count, events.Items)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -63,7 +63,7 @@ func runCommand(command string, args ...string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// RunTest implements TestSuite.RunTest
|
// RunTest implements TestSuite.RunTest
|
||||||
func (n *CAdvisorE2ERemote) RunTest(host, workspace, results, imageDesc, junitFilePrefix, testArgs, ginkgoArgs, systemSpecName string, timeout time.Duration) (string, error) {
|
func (n *CAdvisorE2ERemote) RunTest(host, workspace, results, imageDesc, junitFilePrefix, testArgs, ginkgoArgs, systemSpecName, extraEnvs string, timeout time.Duration) (string, error) {
|
||||||
// Kill any running node processes
|
// Kill any running node processes
|
||||||
cleanupNodeProcesses(host)
|
cleanupNodeProcesses(host)
|
||||||
|
|
||||||
|
@ -259,7 +259,7 @@ func stopKubelet(host, workspace string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// RunTest runs test on the node.
|
// RunTest runs test on the node.
|
||||||
func (c *ConformanceRemote) RunTest(host, workspace, results, imageDesc, junitFilePrefix, testArgs, _, systemSpecName string, timeout time.Duration) (string, error) {
|
func (c *ConformanceRemote) RunTest(host, workspace, results, imageDesc, junitFilePrefix, testArgs, _, systemSpecName, extraEnvs string, timeout time.Duration) (string, error) {
|
||||||
// Install the cni plugins and add a basic CNI configuration.
|
// Install the cni plugins and add a basic CNI configuration.
|
||||||
if err := setupCNI(host, workspace); err != nil {
|
if err := setupCNI(host, workspace); err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
@ -293,8 +293,8 @@ func (c *ConformanceRemote) RunTest(host, workspace, results, imageDesc, junitFi
|
|||||||
// Run the tests
|
// Run the tests
|
||||||
klog.V(2).Infof("Starting tests on %q", host)
|
klog.V(2).Infof("Starting tests on %q", host)
|
||||||
podManifestPath := getPodPath(workspace)
|
podManifestPath := getPodPath(workspace)
|
||||||
cmd := fmt.Sprintf("'timeout -k 30s %fs docker run --rm --privileged=true --net=host -v /:/rootfs -v %s:%s -v %s:/var/result -e TEST_ARGS=--report-prefix=%s %s'",
|
cmd := fmt.Sprintf("'timeout -k 30s %fs docker run --rm --privileged=true --net=host -v /:/rootfs -v %s:%s -v %s:/var/result -e TEST_ARGS=--report-prefix=%s -e EXTRA_ENVS=%s %s'",
|
||||||
timeout.Seconds(), podManifestPath, podManifestPath, results, junitFilePrefix, getConformanceTestImageName(systemSpecName))
|
timeout.Seconds(), podManifestPath, podManifestPath, results, junitFilePrefix, extraEnvs, getConformanceTestImageName(systemSpecName))
|
||||||
testOutput, err := SSH(host, "sh", "-c", cmd)
|
testOutput, err := SSH(host, "sh", "-c", cmd)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return testOutput, err
|
return testOutput, err
|
||||||
|
@ -135,7 +135,7 @@ func updateOSSpecificKubeletFlags(args, host, workspace string) (string, error)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// RunTest runs test on the node.
|
// RunTest runs test on the node.
|
||||||
func (n *NodeE2ERemote) RunTest(host, workspace, results, imageDesc, junitFilePrefix, testArgs, ginkgoArgs, systemSpecName string, timeout time.Duration) (string, error) {
|
func (n *NodeE2ERemote) RunTest(host, workspace, results, imageDesc, junitFilePrefix, testArgs, ginkgoArgs, systemSpecName, extraEnvs string, timeout time.Duration) (string, error) {
|
||||||
// Install the cni plugins and add a basic CNI configuration.
|
// Install the cni plugins and add a basic CNI configuration.
|
||||||
// TODO(random-liu): Do this in cloud init after we remove containervm test.
|
// TODO(random-liu): Do this in cloud init after we remove containervm test.
|
||||||
if err := setupCNI(host, workspace); err != nil {
|
if err := setupCNI(host, workspace); err != nil {
|
||||||
@ -164,8 +164,8 @@ func (n *NodeE2ERemote) RunTest(host, workspace, results, imageDesc, junitFilePr
|
|||||||
klog.V(2).Infof("Starting tests on %q", host)
|
klog.V(2).Infof("Starting tests on %q", host)
|
||||||
cmd := getSSHCommand(" && ",
|
cmd := getSSHCommand(" && ",
|
||||||
fmt.Sprintf("cd %s", workspace),
|
fmt.Sprintf("cd %s", workspace),
|
||||||
fmt.Sprintf("timeout -k 30s %fs ./ginkgo %s ./e2e_node.test -- --system-spec-name=%s --system-spec-file=%s --logtostderr --v 4 --node-name=%s --report-dir=%s --report-prefix=%s --image-description=\"%s\" %s",
|
fmt.Sprintf("timeout -k 30s %fs ./ginkgo %s ./e2e_node.test -- --system-spec-name=%s --system-spec-file=%s --extra-envs=%s --logtostderr --v 4 --node-name=%s --report-dir=%s --report-prefix=%s --image-description=\"%s\" %s",
|
||||||
timeout.Seconds(), ginkgoArgs, systemSpecName, systemSpecFile, host, results, junitFilePrefix, imageDesc, testArgs),
|
timeout.Seconds(), ginkgoArgs, systemSpecName, systemSpecFile, extraEnvs, host, results, junitFilePrefix, imageDesc, testArgs),
|
||||||
)
|
)
|
||||||
return SSH(host, "sh", "-c", cmd)
|
return SSH(host, "sh", "-c", cmd)
|
||||||
}
|
}
|
||||||
|
@ -65,7 +65,7 @@ func CreateTestArchive(suite TestSuite, systemSpecName string) (string, error) {
|
|||||||
|
|
||||||
// Returns the command output, whether the exit was ok, and any errors
|
// Returns the command output, whether the exit was ok, and any errors
|
||||||
// TODO(random-liu): junitFilePrefix is not prefix actually, the file name is junit-junitFilePrefix.xml. Change the variable name.
|
// TODO(random-liu): junitFilePrefix is not prefix actually, the file name is junit-junitFilePrefix.xml. Change the variable name.
|
||||||
func RunRemote(suite TestSuite, archive string, host string, cleanup bool, imageDesc, junitFilePrefix string, testArgs string, ginkgoArgs string, systemSpecName string) (string, bool, error) {
|
func RunRemote(suite TestSuite, archive string, host string, cleanup bool, imageDesc, junitFilePrefix string, testArgs string, ginkgoArgs string, systemSpecName string, extraEnvs string) (string, bool, error) {
|
||||||
// Create the temp staging directory
|
// Create the temp staging directory
|
||||||
klog.V(2).Infof("Staging test binaries on %q", host)
|
klog.V(2).Infof("Staging test binaries on %q", host)
|
||||||
workspace := newWorkspaceDir()
|
workspace := newWorkspaceDir()
|
||||||
@ -110,7 +110,7 @@ func RunRemote(suite TestSuite, archive string, host string, cleanup bool, image
|
|||||||
}
|
}
|
||||||
|
|
||||||
klog.V(2).Infof("Running test on %q", host)
|
klog.V(2).Infof("Running test on %q", host)
|
||||||
output, err := suite.RunTest(host, workspace, resultDir, imageDesc, junitFilePrefix, testArgs, ginkgoArgs, systemSpecName, *testTimeoutSeconds)
|
output, err := suite.RunTest(host, workspace, resultDir, imageDesc, junitFilePrefix, testArgs, ginkgoArgs, systemSpecName, extraEnvs, *testTimeoutSeconds)
|
||||||
|
|
||||||
aggErrs := []error{}
|
aggErrs := []error{}
|
||||||
// Do not log the output here, let the caller deal with the test output.
|
// Do not log the output here, let the caller deal with the test output.
|
||||||
|
@ -46,6 +46,7 @@ type TestSuite interface {
|
|||||||
// * ginkgoArgs is the arguments passed to ginkgo.
|
// * ginkgoArgs is the arguments passed to ginkgo.
|
||||||
// * systemSpecName is the name of the system spec used for validating the
|
// * systemSpecName is the name of the system spec used for validating the
|
||||||
// image on which the test runs.
|
// image on which the test runs.
|
||||||
|
// * extraEnvs is the extra environment variables needed for node e2e tests.
|
||||||
// * timeout is the test timeout.
|
// * timeout is the test timeout.
|
||||||
RunTest(host, workspace, results, imageDesc, junitFilePrefix, testArgs, ginkgoArgs, systemSpecName string, timeout time.Duration) (string, error)
|
RunTest(host, workspace, results, imageDesc, junitFilePrefix, testArgs, ginkgoArgs, systemSpecName, extraEnvs string, timeout time.Duration) (string, error)
|
||||||
}
|
}
|
||||||
|
@ -35,6 +35,7 @@ var buildDependencies = flag.Bool("build-dependencies", true, "If true, build al
|
|||||||
var ginkgoFlags = flag.String("ginkgo-flags", "", "Space-separated list of arguments to pass to Ginkgo test runner.")
|
var ginkgoFlags = flag.String("ginkgo-flags", "", "Space-separated list of arguments to pass to Ginkgo test runner.")
|
||||||
var testFlags = flag.String("test-flags", "", "Space-separated list of arguments to pass to node e2e test.")
|
var testFlags = flag.String("test-flags", "", "Space-separated list of arguments to pass to node e2e test.")
|
||||||
var systemSpecName = flag.String("system-spec-name", "", fmt.Sprintf("The name of the system spec used for validating the image in the node conformance test. The specs are at %s. If unspecified, the default built-in spec (system.DefaultSpec) will be used.", system.SystemSpecPath))
|
var systemSpecName = flag.String("system-spec-name", "", fmt.Sprintf("The name of the system spec used for validating the image in the node conformance test. The specs are at %s. If unspecified, the default built-in spec (system.DefaultSpec) will be used.", system.SystemSpecPath))
|
||||||
|
var extraEnvs = flag.String("extra-envs", "", "The extra environment variables needed for node e2e tests. Format: a list of key=value pairs, e.g., env1=val1,env2=val2")
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
klog.InitFlags(nil)
|
klog.InitFlags(nil)
|
||||||
@ -63,7 +64,7 @@ func main() {
|
|||||||
klog.Fatalf("Failed to get k8s root directory: %v", err)
|
klog.Fatalf("Failed to get k8s root directory: %v", err)
|
||||||
}
|
}
|
||||||
systemSpecFile := filepath.Join(rootDir, system.SystemSpecPath, *systemSpecName+".yaml")
|
systemSpecFile := filepath.Join(rootDir, system.SystemSpecPath, *systemSpecName+".yaml")
|
||||||
args = append(args, fmt.Sprintf("--system-spec-name=%s --system-spec-file=%s", *systemSpecName, systemSpecFile))
|
args = append(args, fmt.Sprintf("--system-spec-name=%s --system-spec-file=%s --extra-envs=%s", *systemSpecName, systemSpecFile, *extraEnvs))
|
||||||
}
|
}
|
||||||
if err := runCommand(ginkgo, args...); err != nil {
|
if err := runCommand(ginkgo, args...); err != nil {
|
||||||
klog.Exitf("Test failed: %v", err)
|
klog.Exitf("Test failed: %v", err)
|
||||||
|
@ -63,6 +63,7 @@ var instanceMetadata = flag.String("instance-metadata", "", "key/value metadata
|
|||||||
var gubernator = flag.Bool("gubernator", false, "If true, output Gubernator link to view logs")
|
var gubernator = flag.Bool("gubernator", false, "If true, output Gubernator link to view logs")
|
||||||
var ginkgoFlags = flag.String("ginkgo-flags", "", "Passed to ginkgo to specify additional flags such as --skip=.")
|
var ginkgoFlags = flag.String("ginkgo-flags", "", "Passed to ginkgo to specify additional flags such as --skip=.")
|
||||||
var systemSpecName = flag.String("system-spec-name", "", fmt.Sprintf("The name of the system spec used for validating the image in the node conformance test. The specs are at %s. If unspecified, the default built-in spec (system.DefaultSpec) will be used.", system.SystemSpecPath))
|
var systemSpecName = flag.String("system-spec-name", "", fmt.Sprintf("The name of the system spec used for validating the image in the node conformance test. The specs are at %s. If unspecified, the default built-in spec (system.DefaultSpec) will be used.", system.SystemSpecPath))
|
||||||
|
var extraEnvs = flag.String("extra-envs", "", "The extra environment variables needed for node e2e tests. Format: a list of key=value pairs, e.g., env1=val1,env2=val2")
|
||||||
|
|
||||||
// envs is the type used to collect all node envs. The key is the env name,
|
// envs is the type used to collect all node envs. The key is the env name,
|
||||||
// and the value is the env value
|
// and the value is the env value
|
||||||
@ -442,7 +443,7 @@ func testHost(host string, deleteFiles bool, imageDesc, junitFilePrefix, ginkgoF
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
output, exitOk, err := remote.RunRemote(suite, path, host, deleteFiles, imageDesc, junitFilePrefix, *testArgs, ginkgoFlagsStr, *systemSpecName)
|
output, exitOk, err := remote.RunRemote(suite, path, host, deleteFiles, imageDesc, junitFilePrefix, *testArgs, ginkgoFlagsStr, *systemSpecName, *extraEnvs)
|
||||||
return &TestResult{
|
return &TestResult{
|
||||||
output: output,
|
output: output,
|
||||||
err: err,
|
err: err,
|
||||||
|
Loading…
Reference in New Issue
Block a user