diff --git a/test/e2e/common/networking.go b/test/e2e/common/networking.go index 97010b2d734..f08520f9e9d 100644 --- a/test/e2e/common/networking.go +++ b/test/e2e/common/networking.go @@ -18,6 +18,7 @@ package common import ( "github.com/onsi/ginkgo" + v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/kubernetes/test/e2e/framework" e2enetwork "k8s.io/kubernetes/test/e2e/framework/network" @@ -28,6 +29,45 @@ var _ = ginkgo.Describe("[sig-network] Networking", func() { ginkgo.Describe("Granular Checks: Pods", func() { + checkNodeConnectivity := func(config *e2enetwork.NetworkingTestConfig, protocol string, port int) { + // breadth first poll to quickly estimate failure. + failedPodsByHost := map[string][]*v1.Pod{} + // First time, we'll quickly try all pods, breadth first. + for _, endpointPod := range config.EndpointPods { + framework.Logf("Breadth first check of %v on host %v...", endpointPod.Status.PodIP, endpointPod.Status.HostIP) + if err := config.DialFromTestContainer(protocol, endpointPod.Status.PodIP, port, 1, 0, sets.NewString(endpointPod.Name)); err != nil { + if _, ok := failedPodsByHost[endpointPod.Status.HostIP]; !ok { + failedPodsByHost[endpointPod.Status.HostIP] = []*v1.Pod{} + } + failedPodsByHost[endpointPod.Status.HostIP] = append(failedPodsByHost[endpointPod.Status.HostIP], endpointPod) + framework.Logf("...failed...will try again in next pass") + } + } + errors := []error{} + // Second time, we pass through pods more carefully... + framework.Logf("Going to retry %v out of %v pods....", len(failedPodsByHost), len(config.EndpointPods)) + for host, failedPods := range failedPodsByHost { + framework.Logf("Doublechecking %v pods in host %v which werent seen the first time.", len(failedPods), host) + for _, endpointPod := range failedPods { + framework.Logf("Now attempting to probe pod [[[ %v ]]]", endpointPod.Status.PodIP) + if err := config.DialFromTestContainer(protocol, endpointPod.Status.PodIP, port, config.MaxTries, 0, sets.NewString(endpointPod.Name)); err != nil { + errors = append(errors, err) + } else { + framework.Logf("Was able to reach %v on %v ", endpointPod.Status.PodIP, endpointPod.Status.HostIP) + } + framework.Logf("... Done probing pod [[[ %v ]]]", endpointPod.Status.PodIP) + } + framework.Logf("succeeded at polling %v out of %v connections", len(config.EndpointPods)-len(errors), len(config.EndpointPods)) + } + if len(errors) > 0 { + framework.Logf("pod polling failure summary:") + for _, e := range errors { + framework.Logf("Collected error: %v", e) + } + framework.Failf("failed, %v out of %v connections failed", len(errors), len(config.EndpointPods)) + } + } + // Try to hit all endpoints through a test container, retry 5 times, // expect exactly one unique hostname. Each of these endpoints reports // its own hostname. @@ -39,9 +79,7 @@ var _ = ginkgo.Describe("[sig-network] Networking", func() { */ framework.ConformanceIt("should function for intra-pod communication: http [NodeConformance]", func() { config := e2enetwork.NewCoreNetworkingTestConfig(f, false) - for _, endpointPod := range config.EndpointPods { - config.DialFromTestContainer("http", endpointPod.Status.PodIP, e2enetwork.EndpointHTTPPort, config.MaxTries, 0, sets.NewString(endpointPod.Name)) - } + checkNodeConnectivity(config, "http", e2enetwork.EndpointHTTPPort) }) /* @@ -52,9 +90,7 @@ var _ = ginkgo.Describe("[sig-network] Networking", func() { */ framework.ConformanceIt("should function for intra-pod communication: udp [NodeConformance]", func() { config := e2enetwork.NewCoreNetworkingTestConfig(f, false) - for _, endpointPod := range config.EndpointPods { - config.DialFromTestContainer("udp", endpointPod.Status.PodIP, e2enetwork.EndpointUDPPort, config.MaxTries, 0, sets.NewString(endpointPod.Name)) - } + checkNodeConnectivity(config, "udp", e2enetwork.EndpointUDPPort) }) /* diff --git a/test/e2e/framework/network/utils.go b/test/e2e/framework/network/utils.go index c66cb3cf47a..0ca63f3f6e0 100644 --- a/test/e2e/framework/network/utils.go +++ b/test/e2e/framework/network/utils.go @@ -167,17 +167,17 @@ type NetexecDialResponse struct { } // DialFromEndpointContainer executes a curl via kubectl exec in an endpoint container. -func (config *NetworkingTestConfig) DialFromEndpointContainer(protocol, targetIP string, targetPort, maxTries, minTries int, expectedEps sets.String) { - config.DialFromContainer(protocol, echoHostname, config.EndpointPods[0].Status.PodIP, targetIP, EndpointHTTPPort, targetPort, maxTries, minTries, expectedEps) +func (config *NetworkingTestConfig) DialFromEndpointContainer(protocol, targetIP string, targetPort, maxTries, minTries int, expectedEps sets.String) error { + return config.DialFromContainer(protocol, echoHostname, config.EndpointPods[0].Status.PodIP, targetIP, EndpointHTTPPort, targetPort, maxTries, minTries, expectedEps) } // DialFromTestContainer executes a curl via kubectl exec in a test container. -func (config *NetworkingTestConfig) DialFromTestContainer(protocol, targetIP string, targetPort, maxTries, minTries int, expectedEps sets.String) { - config.DialFromContainer(protocol, echoHostname, config.TestContainerPod.Status.PodIP, targetIP, testContainerHTTPPort, targetPort, maxTries, minTries, expectedEps) +func (config *NetworkingTestConfig) DialFromTestContainer(protocol, targetIP string, targetPort, maxTries, minTries int, expectedEps sets.String) error { + return config.DialFromContainer(protocol, echoHostname, config.TestContainerPod.Status.PodIP, targetIP, testContainerHTTPPort, targetPort, maxTries, minTries, expectedEps) } // DialEchoFromTestContainer executes a curl via kubectl exec in a test container. The response is expected to match the echoMessage. -func (config *NetworkingTestConfig) DialEchoFromTestContainer(protocol, targetIP string, targetPort, maxTries, minTries int, echoMessage string) { +func (config *NetworkingTestConfig) DialEchoFromTestContainer(protocol, targetIP string, targetPort, maxTries, minTries int, echoMessage string) error { expectedResponse := sets.NewString() expectedResponse.Insert(echoMessage) var dialCommand string @@ -191,7 +191,7 @@ func (config *NetworkingTestConfig) DialEchoFromTestContainer(protocol, targetIP } else { dialCommand = fmt.Sprintf("echo%%20%s", echoMessage) } - config.DialFromContainer(protocol, dialCommand, config.TestContainerPod.Status.PodIP, targetIP, testContainerHTTPPort, targetPort, maxTries, minTries, expectedResponse) + return config.DialFromContainer(protocol, dialCommand, config.TestContainerPod.Status.PodIP, targetIP, testContainerHTTPPort, targetPort, maxTries, minTries, expectedResponse) } // diagnoseMissingEndpoints prints debug information about the endpoints that @@ -248,7 +248,8 @@ func makeCURLDialCommand(ipPort, dialCmd, protocol, targetIP string, targetPort // maxTries == minTries will confirm that we see the expected endpoints and no // more for maxTries. Use this if you want to eg: fail a readiness check on a // pod and confirm it doesn't show up as an endpoint. -func (config *NetworkingTestConfig) DialFromContainer(protocol, dialCommand, containerIP, targetIP string, containerHTTPPort, targetPort, maxTries, minTries int, expectedResponses sets.String) { +// Returns nil if no error, or error message if failed after trying maxTries. +func (config *NetworkingTestConfig) DialFromContainer(protocol, dialCommand, containerIP, targetIP string, containerHTTPPort, targetPort, maxTries, minTries int, expectedResponses sets.String) error { ipPort := net.JoinHostPort(containerIP, strconv.Itoa(containerHTTPPort)) cmd := makeCURLDialCommand(ipPort, dialCommand, protocol, targetIP, targetPort) @@ -273,16 +274,19 @@ func (config *NetworkingTestConfig) DialFromContainer(protocol, dialCommand, con // Check against i+1 so we exit if minTries == maxTries. if (responses.Equal(expectedResponses) || responses.Len() == 0 && expectedResponses.Len() == 0) && i+1 >= minTries { - return + framework.Logf("reached %v after %v/%v tries", targetIP, i, maxTries) + return nil } // TODO: get rid of this delay #36281 time.Sleep(hitEndpointRetryDelay) } - if dialCommand == echoHostname { config.diagnoseMissingEndpoints(responses) } - framework.Failf("Failed to find expected responses:\nTries %d\nCommand %v\nretrieved %v\nexpected %v\n", maxTries, cmd, responses, expectedResponses) + returnMsg := fmt.Errorf("did not find expected responses... \nTries %d\nCommand %v\nretrieved %v\nexpected %v", maxTries, cmd, responses, expectedResponses) + framework.Logf("encountered error during dial (%v)", returnMsg) + return returnMsg + } // GetEndpointsFromTestContainer executes a curl via kubectl exec in a test container. @@ -676,6 +680,7 @@ func (config *NetworkingTestConfig) setupCore(selector map[string]string) { epCount := len(config.EndpointPods) config.MaxTries = epCount*epCount + testTries + framework.Logf("Setting MaxTries for pod polling to %v for networking test based on endpoint count %v", config.MaxTries, epCount) } // setup includes setupCore and also sets up services