Merge pull request #124725 from aojea/e2e_network_test

Debug E2e network test flake
This commit is contained in:
Kubernetes Prow Robot 2024-05-08 04:15:45 -07:00 committed by GitHub
commit 7ac9237b89
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 14 additions and 32 deletions

View File

@ -46,6 +46,7 @@ import (
e2epodoutput "k8s.io/kubernetes/test/e2e/framework/pod/output"
e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
e2essh "k8s.io/kubernetes/test/e2e/framework/ssh"
storageutils "k8s.io/kubernetes/test/e2e/storage/utils"
imageutils "k8s.io/kubernetes/test/utils/image"
netutils "k8s.io/utils/net"
)
@ -327,7 +328,20 @@ func (config *NetworkingTestConfig) DialFromContainer(ctx context.Context, proto
}
if responses.Difference(expectedResponses).Len() > 0 {
returnMsg := fmt.Errorf("received unexpected responses... \nAttempt %d\nCommand %v\nretrieved %v\nexpected %v", i, cmd, responses, expectedResponses)
// TODO(aojea) Remove once issues.k8s.io/123760 is solved
// Dump the nodes network routes and addresses for troubleshooting #123760
framework.Logf("encountered error during dial (%v)", returnMsg)
hostExec := storageutils.NewHostExec(config.f)
ginkgo.DeferCleanup(hostExec.Cleanup)
cmd := `echo "IP routes: " && ip route && echo "IP addresses:" && ip addr && echo "Open sockets: " && ss -anp --socket=tcp`
for _, node := range config.Nodes {
result, err := hostExec.IssueCommandWithResult(ctx, cmd, &node)
if err != nil {
framework.Logf("error occurred while executing command %s on node: %v", cmd, err)
continue
}
framework.Logf("Dump network information for node %s:\n%s", node.Name, result)
}
return returnMsg
}

View File

@ -17,7 +17,6 @@ limitations under the License.
package network
import (
"bufio"
"context"
"fmt"
"strconv"
@ -328,37 +327,6 @@ var _ = common.SIGDescribe("Networking", func() {
ginkgo.It("should update endpoints: http", func(ctx context.Context) {
config := e2enetwork.NewNetworkingTestConfig(ctx, f)
// start of intermittent code snippet to understand the reason for flaky behaviour
// TODO @aroradaman @aojea remove this once issue #123760 is resolved
// streaming logs for netserver-0 which will be deleted during the test
// (ref: https://github.com/kubernetes/kubernetes/issues/123760)
pod0name := config.EndpointPods[0].Name
go func() {
defer ginkgo.GinkgoRecover()
readCloser, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).GetLogs(pod0name, &v1.PodLogOptions{
Follow: true,
}).Stream(ctx)
// silently ignoring error, we don't want to disturb the original test
if err != nil {
return
}
defer func() {
_ = readCloser.Close()
}()
scanner := bufio.NewScanner(readCloser)
var lines []string
for scanner.Scan() {
lines = append(lines, "\t\t"+scanner.Text())
}
framework.Logf("================ start of pod log for %s ================", pod0name)
framework.Logf("\n%s", strings.Join(lines, "\n"))
framework.Logf("================ end of pod log for %s ================", pod0name)
}()
// end of intermittent code snippet
ginkgo.By(fmt.Sprintf("dialing(http) %v --> %v:%v (config.clusterIP)", config.TestContainerPod.Name, config.ClusterIP, e2enetwork.ClusterHTTPPort))
err := config.DialFromTestContainer(ctx, "http", config.ClusterIP, e2enetwork.ClusterHTTPPort, config.MaxTries, 0, config.EndpointHostnames())
if err != nil {