diff --git a/Gopkg.lock b/Gopkg.lock index 88a52418b1..8c4a4b66f5 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -419,7 +419,7 @@ revision = "3700c55dd766d37e17af354fb9975dc801619d62" [[projects]] - digest = "1:87d8533d926ef2bf1f84ef08589a753104e343e49a6e63a446eba17f59a0ce37" + digest = "1:d6e9b99fe0150d4c26d81612676e8d59ad045642e4cbc8646e494b50d4f245ef" name = "github.com/kata-containers/agent" packages = [ "pkg/types", @@ -427,7 +427,7 @@ "protocols/grpc", ] pruneopts = "NUT" - revision = "cc9502795e22cb38e75460adc5f4c87a36e5c3dc" + revision = "d26a505efd336e966636f9aa30eaacd29cf8a58f" [[projects]] digest = "1:58999a98719fddbac6303cb17e8d85b945f60b72f48e3a2df6b950b97fa926f1" diff --git a/Gopkg.toml b/Gopkg.toml index b0e99c7997..cb981258c0 100644 --- a/Gopkg.toml +++ b/Gopkg.toml @@ -52,7 +52,7 @@ [[constraint]] name = "github.com/kata-containers/agent" - revision = "cc9502795e22cb38e75460adc5f4c87a36e5c3dc" + revision = "d26a505efd336e966636f9aa30eaacd29cf8a58f" [[constraint]] name = "github.com/containerd/cri-containerd" diff --git a/vendor/github.com/kata-containers/agent/protocols/client/client.go b/vendor/github.com/kata-containers/agent/protocols/client/client.go index fb04cbab2b..aac0dd1a2d 100644 --- a/vendor/github.com/kata-containers/agent/protocols/client/client.go +++ b/vendor/github.com/kata-containers/agent/protocols/client/client.go @@ -9,6 +9,7 @@ package client import ( "bufio" "context" + "errors" "fmt" "net" "net/url" @@ -400,6 +401,7 @@ func HybridVSockDialer(sock string, timeout time.Duration) (net.Conn, error) { } dialFunc := func() (net.Conn, error) { + handshakeTimeout := 10 * time.Second conn, err := net.DialTimeout("unix", udsPath, timeout) if err != nil { return nil, err @@ -418,26 +420,41 @@ func HybridVSockDialer(sock string, timeout time.Duration) (net.Conn, error) { return nil, err } - // A trivial handshake is included in the host-initiated vsock connection protocol. - // It looks like this: - // - [host] CONNECT - // - [guest/success] OK - reader := bufio.NewReader(conn) - response, err := reader.ReadString('\n') - if err != nil { - conn.Close() - agentClientLog.WithField("Error", err).Debug("HybridVsock trivial handshake failed") - // for now, we temporarily rely on the backoff strategy from GRPC for more stable CI. - return conn, nil - } else if !strings.Contains(response, "OK") { - conn.Close() - agentClientLog.WithField("response", response).Debug("HybridVsock trivial handshake failed with malformd response code") - // for now, we temporarily rely on the backoff strategy from GRPC for more stable CI. - return conn, nil - } - agentClientLog.WithField("response", response).Debug("HybridVsock trivial handshake") + errChan := make(chan error) - return conn, nil + go func() { + reader := bufio.NewReader(conn) + response, err := reader.ReadString('\n') + if err != nil { + errChan <- err + return + } + + agentClientLog.WithField("response", response).Debug("HybridVsock trivial handshake") + + if strings.Contains(response, "OK") { + errChan <- nil + } else { + errChan <- errors.New("HybridVsock trivial handshake failed with malformed response code") + } + }() + + select { + case err = <-errChan: + if err != nil { + conn.Close() + agentClientLog.WithField("Error", err).Debug("HybridVsock trivial handshake failed") + return nil, err + + } + return conn, nil + case <-time.After(handshakeTimeout): + // Timeout: kernel vsock implementation has a race condition, where no response is given + // Instead of waiting forever for a response, timeout after a fair amount of time. + // See: https://lore.kernel.org/netdev/668b0eda8823564cd604b1663dc53fbaece0cd4e.camel@intel.com/ + conn.Close() + return nil, errors.New("timeout waiting for hybrid vsocket handshake") + } } timeoutErr := grpcStatus.Errorf(codes.DeadlineExceeded, "timed out connecting to hybrid vsocket %s", sock)