From b9d865a8185b62d83e9ff81b0e3499a26ac6960d Mon Sep 17 00:00:00 2001 From: Antonio Ojea Date: Fri, 10 Sep 2021 15:27:23 +0200 Subject: [PATCH 1/2] CloseIdleConnections for wrapped Transport It iterates over the wrapped transports until it finds one that implements the CloseIdleConnections method and executes it. add test for closeidle http1 connections add test for http1.1 reconnect with inflight request add test to reuse connection request add test for request connect after timeout add test for client-go request concurrency --- .../k8s.io/apimachinery/pkg/util/net/http.go | 23 +++ .../k8s.io/client-go/rest/connection_test.go | 184 +++++++++++++++++ .../src/k8s.io/client-go/rest/request_test.go | 186 +++++++++++++++++- 3 files changed, 392 insertions(+), 1 deletion(-) diff --git a/staging/src/k8s.io/apimachinery/pkg/util/net/http.go b/staging/src/k8s.io/apimachinery/pkg/util/net/http.go index 42d66d3164a..04432b175d6 100644 --- a/staging/src/k8s.io/apimachinery/pkg/util/net/http.go +++ b/staging/src/k8s.io/apimachinery/pkg/util/net/http.go @@ -238,6 +238,29 @@ func DialerFor(transport http.RoundTripper) (DialFunc, error) { } } +// CloseIdleConnectionsFor close idles connections for the Transport. +// If the Transport is wrapped it iterates over the wrapped round trippers +// until it finds one that implements the CloseIdleConnections method. +// If the Transport does not have a CloseIdleConnections method +// then this function does nothing. +func CloseIdleConnectionsFor(transport http.RoundTripper) { + if transport == nil { + return + } + type closeIdler interface { + CloseIdleConnections() + } + + switch transport := transport.(type) { + case closeIdler: + transport.CloseIdleConnections() + case RoundTripperWrapper: + CloseIdleConnectionsFor(transport.WrappedRoundTripper()) + default: + klog.Warningf("unknown transport type: %T", transport) + } +} + type TLSClientConfigHolder interface { TLSClientConfig() *tls.Config } diff --git a/staging/src/k8s.io/client-go/rest/connection_test.go b/staging/src/k8s.io/client-go/rest/connection_test.go index 70fd2aa1ba4..89d1b3512fe 100644 --- a/staging/src/k8s.io/client-go/rest/connection_test.go +++ b/staging/src/k8s.io/client-go/rest/connection_test.go @@ -34,6 +34,7 @@ import ( "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/runtime/serializer" utilnet "k8s.io/apimachinery/pkg/util/net" + "k8s.io/apimachinery/pkg/util/wait" ) type tcpLB struct { @@ -164,6 +165,189 @@ func TestReconnectBrokenTCP(t *testing.T) { } } +// 1. connect to https server with http1.1 using a TCP proxy +// 2. the connection has keepalive enabled so it will be reused +// 3. break the TCP connection stopping the proxy +// 4. close the idle connection to force creating a new connection +// 5. count that there are 2 connection to the server (we didn't reuse the original connection) +func TestReconnectBrokenTCP_HTTP1(t *testing.T) { + ts := httptest.NewUnstartedServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + fmt.Fprintf(w, "Hello, %s", r.Proto) + })) + ts.EnableHTTP2 = false + ts.StartTLS() + defer ts.Close() + + u, err := url.Parse(ts.URL) + if err != nil { + t.Fatalf("failed to parse URL from %q: %v", ts.URL, err) + } + lb := newLB(t, u.Host) + defer lb.ln.Close() + stopCh := make(chan struct{}) + go lb.serve(stopCh) + transport, ok := ts.Client().Transport.(*http.Transport) + if !ok { + t.Fatal("failed to assert *http.Transport") + } + config := &Config{ + Host: "https://" + lb.ln.Addr().String(), + Transport: utilnet.SetTransportDefaults(transport), + // large timeout, otherwise the broken connection will be cleaned by it + Timeout: wait.ForeverTestTimeout, + // These fields are required to create a REST client. + ContentConfig: ContentConfig{ + GroupVersion: &schema.GroupVersion{}, + NegotiatedSerializer: &serializer.CodecFactory{}, + }, + } + config.TLSClientConfig.NextProtos = []string{"http/1.1"} + client, err := RESTClientFor(config) + if err != nil { + t.Fatalf("failed to create REST client: %v", err) + } + + data, err := client.Get().AbsPath("/").DoRaw(context.TODO()) + if err != nil { + t.Fatalf("unexpected err: %s: %v", data, err) + } + if string(data) != "Hello, HTTP/1.1" { + t.Fatalf("unexpected response: %s", data) + } + + // Deliberately let the LB stop proxying traffic for the current + // connection. This mimics a broken TCP connection that's not properly + // closed. + close(stopCh) + + stopCh = make(chan struct{}) + go lb.serve(stopCh) + // Close the idle connections + utilnet.CloseIdleConnectionsFor(client.Client.Transport) + + // If the client didn't close the idle connections, the broken connection + // would still be in the connection pool, the following request would + // then reuse the broken connection instead of creating a new one, and + // thus would fail. + data, err = client.Get().AbsPath("/").DoRaw(context.TODO()) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + if string(data) != "Hello, HTTP/1.1" { + t.Fatalf("unexpected response: %s", data) + } + dials := atomic.LoadInt32(&lb.dials) + if dials != 2 { + t.Fatalf("expected %d dials, got %d", 2, dials) + } +} + +// 1. connect to https server with http1.1 using a TCP proxy making the connection to timeout +// 2. the connection has keepalive enabled so it will be reused +// 3. close the in-flight connection to force creating a new connection +// 4. count that there are 2 connection on the LB but only one succeeds +func TestReconnectBrokenTCPInFlight_HTTP1(t *testing.T) { + done := make(chan struct{}) + defer close(done) + received := make(chan struct{}) + + ts := httptest.NewUnstartedServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/hang" { + conn, _, _ := w.(http.Hijacker).Hijack() + close(received) + <-done + conn.Close() + } + fmt.Fprintf(w, "Hello, %s", r.Proto) + })) + ts.EnableHTTP2 = false + ts.StartTLS() + defer ts.Close() + + u, err := url.Parse(ts.URL) + if err != nil { + t.Fatalf("failed to parse URL from %q: %v", ts.URL, err) + } + + lb := newLB(t, u.Host) + defer lb.ln.Close() + stopCh := make(chan struct{}) + go lb.serve(stopCh) + + transport, ok := ts.Client().Transport.(*http.Transport) + if !ok { + t.Fatal("failed to assert *http.Transport") + } + config := &Config{ + Host: "https://" + lb.ln.Addr().String(), + Transport: utilnet.SetTransportDefaults(transport), + // Use something extraordinary large to not hit the timeout + Timeout: wait.ForeverTestTimeout, + // These fields are required to create a REST client. + ContentConfig: ContentConfig{ + GroupVersion: &schema.GroupVersion{}, + NegotiatedSerializer: &serializer.CodecFactory{}, + }, + } + config.TLSClientConfig.NextProtos = []string{"http/1.1"} + + client, err := RESTClientFor(config) + if err != nil { + t.Fatalf("failed to create REST client: %v", err) + } + + // The request will connect, hang and eventually time out + // but we can use a context to close once the test is done + // we are only interested in have an inflight connection + ctx, cancel := context.WithCancel(context.Background()) + reqErrCh := make(chan error, 1) + defer close(reqErrCh) + go func() { + _, err = client.Get().AbsPath("/hang").DoRaw(ctx) + reqErrCh <- err + }() + + // wait until it connect to the server + select { + case <-received: + case <-time.After(wait.ForeverTestTimeout): + t.Fatal("Test timed out waiting for first request to fail") + } + + // Deliberately let the LB stop proxying traffic for the current + // connection. This mimics a broken TCP connection that's not properly + // closed. + close(stopCh) + + stopCh = make(chan struct{}) + go lb.serve(stopCh) + + // New request will fail if tries to reuse the connection + data, err := client.Get().AbsPath("/").DoRaw(context.Background()) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + if string(data) != "Hello, HTTP/1.1" { + t.Fatalf("unexpected response: %s", data) + } + dials := atomic.LoadInt32(&lb.dials) + if dials != 2 { + t.Fatalf("expected %d dials, got %d", 2, dials) + } + + // cancel the in-flight connection + cancel() + select { + case <-reqErrCh: + if err == nil { + t.Fatal("Connection succeeded but was expected to timeout") + } + case <-time.After(10 * time.Second): + t.Fatal("Test timed out waiting for the request to fail") + } + +} + func TestRestClientTimeout(t *testing.T) { ts := httptest.NewUnstartedServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { time.Sleep(2 * time.Second) diff --git a/staging/src/k8s.io/client-go/rest/request_test.go b/staging/src/k8s.io/client-go/rest/request_test.go index ab2df1b5339..7b1d83ad348 100644 --- a/staging/src/k8s.io/client-go/rest/request_test.go +++ b/staging/src/k8s.io/client-go/rest/request_test.go @@ -44,10 +44,12 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/runtime/serializer" "k8s.io/apimachinery/pkg/runtime/serializer/streaming" "k8s.io/apimachinery/pkg/util/diff" "k8s.io/apimachinery/pkg/util/httpstream" "k8s.io/apimachinery/pkg/util/intstr" + utilnet "k8s.io/apimachinery/pkg/util/net" "k8s.io/apimachinery/pkg/watch" "k8s.io/client-go/kubernetes/scheme" restclientwatch "k8s.io/client-go/rest/watch" @@ -2273,15 +2275,17 @@ func TestStream(t *testing.T) { func testRESTClientWithConfig(t testing.TB, srv *httptest.Server, contentConfig ClientContentConfig) *RESTClient { base, _ := url.Parse("http://localhost") + var c *http.Client if srv != nil { var err error base, err = url.Parse(srv.URL) if err != nil { t.Fatalf("failed to parse test URL: %v", err) } + c = srv.Client() } versionedAPIPath := defaultResourcePathWithPrefix("", "", "", "") - client, err := NewRESTClient(base, versionedAPIPath, contentConfig, nil, nil) + client, err := NewRESTClient(base, versionedAPIPath, contentConfig, nil, c) if err != nil { t.Fatalf("failed to create a client: %v", err) } @@ -2944,3 +2948,183 @@ func testRequestWithRetry(t *testing.T, key string, doFunc func(ctx context.Cont }) } } + +func TestReuseRequest(t *testing.T) { + var tests = []struct { + name string + enableHTTP2 bool + }{ + {"HTTP1", false}, + {"HTTP2", true}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + + ts := httptest.NewUnstartedServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write([]byte(r.RemoteAddr)) + })) + ts.EnableHTTP2 = tt.enableHTTP2 + ts.StartTLS() + defer ts.Close() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + c := testRESTClient(t, ts) + + req1, err := c.Verb("GET"). + Prefix("foo"). + DoRaw(ctx) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + req2, err := c.Verb("GET"). + Prefix("foo"). + DoRaw(ctx) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + if string(req1) != string(req2) { + t.Fatalf("Expected %v to be equal to %v", string(req1), string(req2)) + } + + }) + } +} + +func TestHTTP1DoNotReuseRequestAfterTimeout(t *testing.T) { + var tests = []struct { + name string + enableHTTP2 bool + }{ + {"HTTP1", false}, + {"HTTP2", true}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + done := make(chan struct{}) + ts := httptest.NewUnstartedServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + t.Logf("TEST Connected from %v on %v\n", r.RemoteAddr, r.URL.Path) + if r.URL.Path == "/hang" { + t.Logf("TEST hanging %v\n", r.RemoteAddr) + <-done + } + w.Write([]byte(r.RemoteAddr)) + })) + ts.EnableHTTP2 = tt.enableHTTP2 + ts.StartTLS() + defer ts.Close() + // close hanging connection before shutting down the http server + defer close(done) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + transport, ok := ts.Client().Transport.(*http.Transport) + if !ok { + t.Fatalf("failed to assert *http.Transport") + } + + config := &Config{ + Host: ts.URL, + Transport: utilnet.SetTransportDefaults(transport), + Timeout: 100 * time.Millisecond, + // These fields are required to create a REST client. + ContentConfig: ContentConfig{ + GroupVersion: &schema.GroupVersion{}, + NegotiatedSerializer: &serializer.CodecFactory{}, + }, + } + if !tt.enableHTTP2 { + config.TLSClientConfig.NextProtos = []string{"http/1.1"} + } + c, err := RESTClientFor(config) + if err != nil { + t.Fatalf("failed to create REST client: %v", err) + } + req1, err := c.Verb("GET"). + Prefix("foo"). + DoRaw(ctx) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + _, err = c.Verb("GET"). + Prefix("/hang"). + DoRaw(ctx) + if err == nil { + t.Fatalf("Expected error") + } + + req2, err := c.Verb("GET"). + Prefix("foo"). + DoRaw(ctx) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + // http1 doesn't reuse the connection after it times + if tt.enableHTTP2 != (string(req1) == string(req2)) { + if tt.enableHTTP2 { + t.Fatalf("Expected %v to be the same as %v", string(req1), string(req2)) + } else { + t.Fatalf("Expected %v to be different to %v", string(req1), string(req2)) + } + } + }) + } +} + +func TestTransportConcurrency(t *testing.T) { + const numReqs = 10 + var tests = []struct { + name string + enableHTTP2 bool + }{ + {"HTTP1", false}, + {"HTTP2", true}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + + ts := httptest.NewUnstartedServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + t.Logf("Connected from %v %v", r.RemoteAddr, r.URL) + fmt.Fprintf(w, "%v", r.FormValue("echo")) + })) + ts.EnableHTTP2 = tt.enableHTTP2 + ts.StartTLS() + defer ts.Close() + var wg sync.WaitGroup + + wg.Add(numReqs) + c := testRESTClient(t, ts) + reqs := make(chan string) + defer close(reqs) + + for i := 0; i < 4; i++ { + go func() { + for req := range reqs { + res, err := c.Get().Param("echo", req).DoRaw(context.Background()) + if err != nil { + t.Errorf("error on req %s: %v", req, err) + wg.Done() + continue + } + + if string(res) != req { + t.Errorf("body of req %s = %q; want %q", req, res, req) + } + + wg.Done() + } + }() + } + for i := 0; i < numReqs; i++ { + reqs <- fmt.Sprintf("request-%d", i) + } + wg.Wait() + }) + } +} From 15ea255457f48cb3f06f1aadbccaeafed1099a98 Mon Sep 17 00:00:00 2001 From: Antonio Ojea Date: Tue, 21 Sep 2021 23:38:47 +0200 Subject: [PATCH 2/2] don't use a custom dialer for the kubelet Don't use a custom dialer for the kubelet if is not rotating certificates, so we can reuse TCP connections because we don't need a customer dialer. Kubelet needs to be able to recover from stale http connections. HTTP2 has a mechanism to detect broken connections by sending periodical pings. HTTP1 only can have one persistent connection, and it will close all Idle connections once the Kubelet heartbet fails. However, since there are many edge cases that we can't control, users can still opt-in to the previous behavior for closing the connections by setting the environment variable DISABLE_HTTP2. --- cmd/kubelet/app/server.go | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/cmd/kubelet/app/server.go b/cmd/kubelet/app/server.go index 0b8d8e67fce..897fa25ff6f 100644 --- a/cmd/kubelet/app/server.go +++ b/cmd/kubelet/app/server.go @@ -44,6 +44,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + utilnet "k8s.io/apimachinery/pkg/util/net" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" @@ -933,9 +934,23 @@ func buildKubeletClientConfig(ctx context.Context, s *options.KubeletServer, nod } kubeClientConfigOverrides(s, clientConfig) - closeAllConns, err := updateDialer(clientConfig) - if err != nil { - return nil, nil, err + // Kubelet needs to be able to recover from stale http connections. + // HTTP2 has a mechanism to detect broken connections by sending periodical pings. + // HTTP1 only can have one persistent connection, and it will close all Idle connections + // once the Kubelet heartbeat fails. However, since there are many edge cases that we can't + // control, users can still opt-in to the previous behavior for closing the connections by + // setting the environment variable DISABLE_HTTP2. + var closeAllConns func() + if s := os.Getenv("DISABLE_HTTP2"); len(s) > 0 { + klog.InfoS("HTTP2 has been explicitly disabled, updating Kubelet client Dialer to forcefully close active connections on heartbeat failures") + closeAllConns, err = updateDialer(clientConfig) + if err != nil { + return nil, nil, err + } + } else { + closeAllConns = func() { + utilnet.CloseIdleConnectionsFor(clientConfig.Transport) + } } return clientConfig, closeAllConns, nil }