Merge pull request #12319 from mbforbes/nodesNetworkRedux

Implement 'Nodes Network' test for GKE; add optional verbose SSH.
This commit is contained in:
Dawn Chen 2015-08-06 15:30:13 -07:00
commit 5fff8e935e
4 changed files with 54 additions and 27 deletions

View File

@ -210,18 +210,17 @@ function get-password() {
| grep password | cut -f 4 -d ' ')
}
# Detect the instance name and IP for the master
# Detect the IP for the master. Note that on GKE, we don't know the name of the
# master, so KUBE_MASTER is not set.
#
# Assumed vars:
# ZONE
# CLUSTER_NAME
# Vars set:
# KUBE_MASTER
# KUBE_MASTER_IP
function detect-master() {
echo "... in gke:detect-master()" >&2
detect-project >&2
KUBE_MASTER="k8s-${CLUSTER_NAME}-master"
KUBE_MASTER_IP=$("${GCLOUD}" "${CMD_GROUP}" container clusters describe \
--project="${PROJECT}" --zone="${ZONE}" "${CLUSTER_NAME}" \
| grep endpoint | cut -f 2 -d ' ')

View File

@ -89,7 +89,7 @@ fi
export PATH=$(dirname "${e2e_test}"):"${PATH}"
"${ginkgo}" "${ginkgo_args[@]:+${ginkgo_args[@]}}" "${e2e_test}" -- \
"${auth_config[@]:+${auth_config[@]}}" \
--host="https://${KUBE_MASTER_IP-}" \
--host="https://${KUBE_MASTER_IP:-}" \
--provider="${KUBERNETES_PROVIDER}" \
--gce-project="${PROJECT:-}" \
--gce-zone="${ZONE:-}" \

View File

@ -20,6 +20,7 @@ import (
"fmt"
"os/exec"
"regexp"
"strings"
"time"
"k8s.io/kubernetes/pkg/api"
@ -35,10 +36,11 @@ import (
"k8s.io/kubernetes/pkg/cloudprovider/aws"
)
const serveHostnameImage = "gcr.io/google_containers/serve_hostname:1.1"
const resizeNodeReadyTimeout = 2 * time.Minute
const resizeNodeNotReadyTimeout = 2 * time.Minute
const (
serveHostnameImage = "gcr.io/google_containers/serve_hostname:1.1"
resizeNodeReadyTimeout = 2 * time.Minute
resizeNodeNotReadyTimeout = 2 * time.Minute
)
func resizeGroup(size int) error {
if testContext.Provider == "gce" || testContext.Provider == "gke" {
@ -312,23 +314,25 @@ func performTemporaryNetworkFailure(c *client.Client, ns, rcName string, replica
if host == "" {
Failf("Couldn't get the external IP of host %s with addresses %v", node.Name, node.Status.Addresses)
}
By(fmt.Sprintf("block network traffic from node %s to the master", node.Name))
// TODO marekbiskup 2015-06-19 #10085
// The use of MasterName will cause iptables to do a DNS lookup to
// resolve the name to an IP address, which will slow down the test
// and cause it to fail if DNS is absent or broken.
// Use the IP address instead.
destination := testContext.CloudConfig.MasterName
if providerIs("aws") {
// This is the (internal) IP address used on AWS for the master
// TODO: Use IP address for all clouds?
// TODO: Avoid hard-coding this
destination = "172.20.0.9"
master := ""
switch testContext.Provider {
case "gce":
// TODO(#10085): The use of MasterName will cause iptables to do a DNS
// lookup to resolve the name to an IP address, which will slow down the
// test and cause it to fail if DNS is absent or broken. Use the
// internal IP address instead (i.e. NOT the one in testContext.Host).
master = testContext.CloudConfig.MasterName
case "gke":
master = strings.TrimPrefix(testContext.Host, "https://")
case "aws":
// TODO(justinsb): Avoid hardcoding this.
master = "172.20.0.9"
default:
Failf("This test is not supported for provider %s and should be disabled", testContext.Provider)
}
iptablesRule := fmt.Sprintf("OUTPUT --destination %s --jump DROP", destination)
iptablesRule := fmt.Sprintf("OUTPUT --destination %s --jump DROP", master)
defer func() {
// This code will execute even if setting the iptables rule failed.
// It is on purpose because we may have an error even if the new rule
@ -344,7 +348,7 @@ func performTemporaryNetworkFailure(c *client.Client, ns, rcName string, replica
// may fail). Manual intervention is required in such case (recreating the
// cluster solves the problem too).
err := wait.Poll(time.Millisecond*100, time.Second*30, func() (bool, error) {
_, _, code, err := SSH(undropCmd, host, testContext.Provider)
_, _, code, err := SSHVerbose(undropCmd, host, testContext.Provider)
if code == 0 && err == nil {
return true, nil
} else {
@ -370,7 +374,7 @@ func performTemporaryNetworkFailure(c *client.Client, ns, rcName string, replica
// We could also block network traffic from the master(s) to this node,
// but blocking it one way is sufficient for this test.
dropCmd := fmt.Sprintf("sudo iptables --insert %s", iptablesRule)
if _, _, code, err := SSH(dropCmd, host, testContext.Provider); code != 0 || err != nil {
if _, _, code, err := SSHVerbose(dropCmd, host, testContext.Provider); code != 0 || err != nil {
Failf("Expected 0 exit code and nil error when running %s on %s, got %d and %v",
dropCmd, node.Name, code, err)
}

View File

@ -1480,15 +1480,39 @@ func NodeSSHHosts(c *client.Client) ([]string, error) {
// is no error performing the SSH, the stdout, stderr, and exit code are
// returned.
func SSH(cmd, host, provider string) (string, string, int, error) {
return sshCore(cmd, host, provider, false)
}
// SSHVerbose is just like SSH, but it logs the command, user, host, stdout,
// stderr, exit code, and error.
func SSHVerbose(cmd, host, provider string) (string, string, int, error) {
return sshCore(cmd, host, provider, true)
}
func sshCore(cmd, host, provider string, verbose bool) (string, string, int, error) {
// Get a signer for the provider.
signer, err := getSigner(provider)
if err != nil {
return "", "", 0, fmt.Errorf("error getting signer for provider %s: '%v'", provider, err)
}
// RunSSHCommand will default to Getenv("USER") if user == "", but we're
// defaulting here as well for logging clarity.
user := os.Getenv("KUBE_SSH_USER")
// RunSSHCommand will default to Getenv("USER") if user == ""
return util.RunSSHCommand(cmd, user, host, signer)
if user == "" {
user = os.Getenv("USER")
}
stdout, stderr, code, err := util.RunSSHCommand(cmd, user, host, signer)
if verbose {
remote := fmt.Sprintf("%s@%s", user, host)
Logf("[%s] Running `%s`", remote, cmd)
Logf("[%s] stdout: %q", remote, stdout)
Logf("[%s] stderr: %q", remote, stderr)
Logf("[%s] exit code: %d", remote, code)
Logf("[%s] error: %v", remote, err)
}
return stdout, stderr, code, err
}
// getSigner returns an ssh.Signer for the provider ("gce", etc.) that can be