Merge pull request #104844 from aojea/clientgo_close_idle

CloseIdleConnections of wrapped Transport RoundTrippers
This commit is contained in:
Kubernetes Prow Robot 2021-11-16 08:45:27 -08:00 committed by GitHub
commit 45f77ca4ba
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 410 additions and 4 deletions

View File

@ -44,6 +44,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
utilnet "k8s.io/apimachinery/pkg/util/net"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
@ -936,9 +937,23 @@ func buildKubeletClientConfig(ctx context.Context, s *options.KubeletServer, nod
}
kubeClientConfigOverrides(s, clientConfig)
closeAllConns, err := updateDialer(clientConfig)
if err != nil {
return nil, nil, err
// Kubelet needs to be able to recover from stale http connections.
// HTTP2 has a mechanism to detect broken connections by sending periodical pings.
// HTTP1 only can have one persistent connection, and it will close all Idle connections
// once the Kubelet heartbeat fails. However, since there are many edge cases that we can't
// control, users can still opt-in to the previous behavior for closing the connections by
// setting the environment variable DISABLE_HTTP2.
var closeAllConns func()
if s := os.Getenv("DISABLE_HTTP2"); len(s) > 0 {
klog.InfoS("HTTP2 has been explicitly disabled, updating Kubelet client Dialer to forcefully close active connections on heartbeat failures")
closeAllConns, err = updateDialer(clientConfig)
if err != nil {
return nil, nil, err
}
} else {
closeAllConns = func() {
utilnet.CloseIdleConnectionsFor(clientConfig.Transport)
}
}
return clientConfig, closeAllConns, nil
}

View File

@ -238,6 +238,29 @@ func DialerFor(transport http.RoundTripper) (DialFunc, error) {
}
}
// CloseIdleConnectionsFor close idles connections for the Transport.
// If the Transport is wrapped it iterates over the wrapped round trippers
// until it finds one that implements the CloseIdleConnections method.
// If the Transport does not have a CloseIdleConnections method
// then this function does nothing.
func CloseIdleConnectionsFor(transport http.RoundTripper) {
if transport == nil {
return
}
type closeIdler interface {
CloseIdleConnections()
}
switch transport := transport.(type) {
case closeIdler:
transport.CloseIdleConnections()
case RoundTripperWrapper:
CloseIdleConnectionsFor(transport.WrappedRoundTripper())
default:
klog.Warningf("unknown transport type: %T", transport)
}
}
type TLSClientConfigHolder interface {
TLSClientConfig() *tls.Config
}

View File

@ -34,6 +34,7 @@ import (
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/runtime/serializer"
utilnet "k8s.io/apimachinery/pkg/util/net"
"k8s.io/apimachinery/pkg/util/wait"
)
type tcpLB struct {
@ -164,6 +165,189 @@ func TestReconnectBrokenTCP(t *testing.T) {
}
}
// 1. connect to https server with http1.1 using a TCP proxy
// 2. the connection has keepalive enabled so it will be reused
// 3. break the TCP connection stopping the proxy
// 4. close the idle connection to force creating a new connection
// 5. count that there are 2 connection to the server (we didn't reuse the original connection)
func TestReconnectBrokenTCP_HTTP1(t *testing.T) {
ts := httptest.NewUnstartedServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, "Hello, %s", r.Proto)
}))
ts.EnableHTTP2 = false
ts.StartTLS()
defer ts.Close()
u, err := url.Parse(ts.URL)
if err != nil {
t.Fatalf("failed to parse URL from %q: %v", ts.URL, err)
}
lb := newLB(t, u.Host)
defer lb.ln.Close()
stopCh := make(chan struct{})
go lb.serve(stopCh)
transport, ok := ts.Client().Transport.(*http.Transport)
if !ok {
t.Fatal("failed to assert *http.Transport")
}
config := &Config{
Host: "https://" + lb.ln.Addr().String(),
Transport: utilnet.SetTransportDefaults(transport),
// large timeout, otherwise the broken connection will be cleaned by it
Timeout: wait.ForeverTestTimeout,
// These fields are required to create a REST client.
ContentConfig: ContentConfig{
GroupVersion: &schema.GroupVersion{},
NegotiatedSerializer: &serializer.CodecFactory{},
},
}
config.TLSClientConfig.NextProtos = []string{"http/1.1"}
client, err := RESTClientFor(config)
if err != nil {
t.Fatalf("failed to create REST client: %v", err)
}
data, err := client.Get().AbsPath("/").DoRaw(context.TODO())
if err != nil {
t.Fatalf("unexpected err: %s: %v", data, err)
}
if string(data) != "Hello, HTTP/1.1" {
t.Fatalf("unexpected response: %s", data)
}
// Deliberately let the LB stop proxying traffic for the current
// connection. This mimics a broken TCP connection that's not properly
// closed.
close(stopCh)
stopCh = make(chan struct{})
go lb.serve(stopCh)
// Close the idle connections
utilnet.CloseIdleConnectionsFor(client.Client.Transport)
// If the client didn't close the idle connections, the broken connection
// would still be in the connection pool, the following request would
// then reuse the broken connection instead of creating a new one, and
// thus would fail.
data, err = client.Get().AbsPath("/").DoRaw(context.TODO())
if err != nil {
t.Fatalf("unexpected err: %v", err)
}
if string(data) != "Hello, HTTP/1.1" {
t.Fatalf("unexpected response: %s", data)
}
dials := atomic.LoadInt32(&lb.dials)
if dials != 2 {
t.Fatalf("expected %d dials, got %d", 2, dials)
}
}
// 1. connect to https server with http1.1 using a TCP proxy making the connection to timeout
// 2. the connection has keepalive enabled so it will be reused
// 3. close the in-flight connection to force creating a new connection
// 4. count that there are 2 connection on the LB but only one succeeds
func TestReconnectBrokenTCPInFlight_HTTP1(t *testing.T) {
done := make(chan struct{})
defer close(done)
received := make(chan struct{})
ts := httptest.NewUnstartedServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/hang" {
conn, _, _ := w.(http.Hijacker).Hijack()
close(received)
<-done
conn.Close()
}
fmt.Fprintf(w, "Hello, %s", r.Proto)
}))
ts.EnableHTTP2 = false
ts.StartTLS()
defer ts.Close()
u, err := url.Parse(ts.URL)
if err != nil {
t.Fatalf("failed to parse URL from %q: %v", ts.URL, err)
}
lb := newLB(t, u.Host)
defer lb.ln.Close()
stopCh := make(chan struct{})
go lb.serve(stopCh)
transport, ok := ts.Client().Transport.(*http.Transport)
if !ok {
t.Fatal("failed to assert *http.Transport")
}
config := &Config{
Host: "https://" + lb.ln.Addr().String(),
Transport: utilnet.SetTransportDefaults(transport),
// Use something extraordinary large to not hit the timeout
Timeout: wait.ForeverTestTimeout,
// These fields are required to create a REST client.
ContentConfig: ContentConfig{
GroupVersion: &schema.GroupVersion{},
NegotiatedSerializer: &serializer.CodecFactory{},
},
}
config.TLSClientConfig.NextProtos = []string{"http/1.1"}
client, err := RESTClientFor(config)
if err != nil {
t.Fatalf("failed to create REST client: %v", err)
}
// The request will connect, hang and eventually time out
// but we can use a context to close once the test is done
// we are only interested in have an inflight connection
ctx, cancel := context.WithCancel(context.Background())
reqErrCh := make(chan error, 1)
defer close(reqErrCh)
go func() {
_, err = client.Get().AbsPath("/hang").DoRaw(ctx)
reqErrCh <- err
}()
// wait until it connect to the server
select {
case <-received:
case <-time.After(wait.ForeverTestTimeout):
t.Fatal("Test timed out waiting for first request to fail")
}
// Deliberately let the LB stop proxying traffic for the current
// connection. This mimics a broken TCP connection that's not properly
// closed.
close(stopCh)
stopCh = make(chan struct{})
go lb.serve(stopCh)
// New request will fail if tries to reuse the connection
data, err := client.Get().AbsPath("/").DoRaw(context.Background())
if err != nil {
t.Fatalf("unexpected err: %v", err)
}
if string(data) != "Hello, HTTP/1.1" {
t.Fatalf("unexpected response: %s", data)
}
dials := atomic.LoadInt32(&lb.dials)
if dials != 2 {
t.Fatalf("expected %d dials, got %d", 2, dials)
}
// cancel the in-flight connection
cancel()
select {
case <-reqErrCh:
if err == nil {
t.Fatal("Connection succeeded but was expected to timeout")
}
case <-time.After(10 * time.Second):
t.Fatal("Test timed out waiting for the request to fail")
}
}
func TestRestClientTimeout(t *testing.T) {
ts := httptest.NewUnstartedServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
time.Sleep(2 * time.Second)

View File

@ -44,10 +44,12 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/runtime/serializer"
"k8s.io/apimachinery/pkg/runtime/serializer/streaming"
"k8s.io/apimachinery/pkg/util/diff"
"k8s.io/apimachinery/pkg/util/httpstream"
"k8s.io/apimachinery/pkg/util/intstr"
utilnet "k8s.io/apimachinery/pkg/util/net"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/kubernetes/scheme"
restclientwatch "k8s.io/client-go/rest/watch"
@ -2273,15 +2275,17 @@ func TestStream(t *testing.T) {
func testRESTClientWithConfig(t testing.TB, srv *httptest.Server, contentConfig ClientContentConfig) *RESTClient {
base, _ := url.Parse("http://localhost")
var c *http.Client
if srv != nil {
var err error
base, err = url.Parse(srv.URL)
if err != nil {
t.Fatalf("failed to parse test URL: %v", err)
}
c = srv.Client()
}
versionedAPIPath := defaultResourcePathWithPrefix("", "", "", "")
client, err := NewRESTClient(base, versionedAPIPath, contentConfig, nil, nil)
client, err := NewRESTClient(base, versionedAPIPath, contentConfig, nil, c)
if err != nil {
t.Fatalf("failed to create a client: %v", err)
}
@ -2944,3 +2948,183 @@ func testRequestWithRetry(t *testing.T, key string, doFunc func(ctx context.Cont
})
}
}
func TestReuseRequest(t *testing.T) {
var tests = []struct {
name string
enableHTTP2 bool
}{
{"HTTP1", false},
{"HTTP2", true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ts := httptest.NewUnstartedServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Write([]byte(r.RemoteAddr))
}))
ts.EnableHTTP2 = tt.enableHTTP2
ts.StartTLS()
defer ts.Close()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
c := testRESTClient(t, ts)
req1, err := c.Verb("GET").
Prefix("foo").
DoRaw(ctx)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
req2, err := c.Verb("GET").
Prefix("foo").
DoRaw(ctx)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
if string(req1) != string(req2) {
t.Fatalf("Expected %v to be equal to %v", string(req1), string(req2))
}
})
}
}
func TestHTTP1DoNotReuseRequestAfterTimeout(t *testing.T) {
var tests = []struct {
name string
enableHTTP2 bool
}{
{"HTTP1", false},
{"HTTP2", true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
done := make(chan struct{})
ts := httptest.NewUnstartedServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
t.Logf("TEST Connected from %v on %v\n", r.RemoteAddr, r.URL.Path)
if r.URL.Path == "/hang" {
t.Logf("TEST hanging %v\n", r.RemoteAddr)
<-done
}
w.Write([]byte(r.RemoteAddr))
}))
ts.EnableHTTP2 = tt.enableHTTP2
ts.StartTLS()
defer ts.Close()
// close hanging connection before shutting down the http server
defer close(done)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
transport, ok := ts.Client().Transport.(*http.Transport)
if !ok {
t.Fatalf("failed to assert *http.Transport")
}
config := &Config{
Host: ts.URL,
Transport: utilnet.SetTransportDefaults(transport),
Timeout: 100 * time.Millisecond,
// These fields are required to create a REST client.
ContentConfig: ContentConfig{
GroupVersion: &schema.GroupVersion{},
NegotiatedSerializer: &serializer.CodecFactory{},
},
}
if !tt.enableHTTP2 {
config.TLSClientConfig.NextProtos = []string{"http/1.1"}
}
c, err := RESTClientFor(config)
if err != nil {
t.Fatalf("failed to create REST client: %v", err)
}
req1, err := c.Verb("GET").
Prefix("foo").
DoRaw(ctx)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
_, err = c.Verb("GET").
Prefix("/hang").
DoRaw(ctx)
if err == nil {
t.Fatalf("Expected error")
}
req2, err := c.Verb("GET").
Prefix("foo").
DoRaw(ctx)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
// http1 doesn't reuse the connection after it times
if tt.enableHTTP2 != (string(req1) == string(req2)) {
if tt.enableHTTP2 {
t.Fatalf("Expected %v to be the same as %v", string(req1), string(req2))
} else {
t.Fatalf("Expected %v to be different to %v", string(req1), string(req2))
}
}
})
}
}
func TestTransportConcurrency(t *testing.T) {
const numReqs = 10
var tests = []struct {
name string
enableHTTP2 bool
}{
{"HTTP1", false},
{"HTTP2", true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ts := httptest.NewUnstartedServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
t.Logf("Connected from %v %v", r.RemoteAddr, r.URL)
fmt.Fprintf(w, "%v", r.FormValue("echo"))
}))
ts.EnableHTTP2 = tt.enableHTTP2
ts.StartTLS()
defer ts.Close()
var wg sync.WaitGroup
wg.Add(numReqs)
c := testRESTClient(t, ts)
reqs := make(chan string)
defer close(reqs)
for i := 0; i < 4; i++ {
go func() {
for req := range reqs {
res, err := c.Get().Param("echo", req).DoRaw(context.Background())
if err != nil {
t.Errorf("error on req %s: %v", req, err)
wg.Done()
continue
}
if string(res) != req {
t.Errorf("body of req %s = %q; want %q", req, res, req)
}
wg.Done()
}
}()
}
for i := 0; i < numReqs; i++ {
reqs <- fmt.Sprintf("request-%d", i)
}
wg.Wait()
})
}
}