Merge pull request #110075 from luckerby/104584-retry-dial-on-socket-windows-base

Retry Unix domain sockets on Windows nodes for the plugin registration mechanism
This commit is contained in:
Kubernetes Prow Robot 2022-06-27 19:16:16 -07:00 committed by GitHub
commit b19d50d68e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 78 additions and 5 deletions

View File

@ -29,11 +29,19 @@ import (
"time" "time"
"github.com/Microsoft/go-winio" "github.com/Microsoft/go-winio"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/klog/v2"
) )
const ( const (
tcpProtocol = "tcp" tcpProtocol = "tcp"
npipeProtocol = "npipe" npipeProtocol = "npipe"
// Amount of time to wait between attempting to use a Unix domain socket.
// As detailed in https://github.com/kubernetes/kubernetes/issues/104584
// the first attempt will most likely fail, hence the need to retry
socketDialRetryPeriod = 1 * time.Second
// Overall timeout value to dial a Unix domain socket, including retries
socketDialTimeout = 4 * time.Second
) )
// CreateListener creates a listener on the specified endpoint. // CreateListener creates a listener on the specified endpoint.
@ -127,6 +135,8 @@ func GetBootTime() (time.Time, error) {
} }
// IsUnixDomainSocket returns whether a given file is a AF_UNIX socket file // IsUnixDomainSocket returns whether a given file is a AF_UNIX socket file
// Note that due to the retry logic inside, it could take up to 4 seconds
// to determine whether or not the file path supplied is a Unix domain socket
func IsUnixDomainSocket(filePath string) (bool, error) { func IsUnixDomainSocket(filePath string) (bool, error) {
// Due to the absence of golang support for os.ModeSocket in Windows (https://github.com/golang/go/issues/33357) // Due to the absence of golang support for os.ModeSocket in Windows (https://github.com/golang/go/issues/33357)
// we need to dial the file and check if we receive an error to determine if a file is Unix Domain Socket file. // we need to dial the file and check if we receive an error to determine if a file is Unix Domain Socket file.
@ -136,12 +146,35 @@ func IsUnixDomainSocket(filePath string) (bool, error) {
// does NOT work in 1809 if the socket file is created within a bind mounted directory by a container // does NOT work in 1809 if the socket file is created within a bind mounted directory by a container
// and the FSCTL is issued in the host by the kubelet. // and the FSCTL is issued in the host by the kubelet.
c, err := net.Dial("unix", filePath) klog.V(6).InfoS("Function IsUnixDomainSocket starts", "filePath", filePath)
if err == nil { // As detailed in https://github.com/kubernetes/kubernetes/issues/104584 we cannot rely
// on the Unix Domain socket working on the very first try, hence the potential need to
// dial multiple times
var lastSocketErr error
err := wait.PollImmediate(socketDialRetryPeriod, socketDialTimeout,
func() (bool, error) {
klog.V(6).InfoS("Dialing the socket", "filePath", filePath)
var c net.Conn
c, lastSocketErr = net.Dial("unix", filePath)
if lastSocketErr == nil {
c.Close() c.Close()
klog.V(6).InfoS("Socket dialed successfully", "filePath", filePath)
return true, nil return true, nil
} }
klog.V(6).InfoS("Failed the current attempt to dial the socket, so pausing before retry",
"filePath", filePath, "err", lastSocketErr, "socketDialRetryPeriod",
socketDialRetryPeriod)
return false, nil return false, nil
})
// PollImmediate will return "timed out waiting for the condition" if the function it
// invokes never returns true
if err != nil {
klog.V(2).InfoS("Failed all attempts to dial the socket so marking it as a non-Unix Domain socket. Last socket error along with the error from PollImmediate follow",
"filePath", filePath, "lastSocketErr", lastSocketErr, "err", err)
return false, nil
}
return true, nil
} }
// NormalizePath converts FS paths returned by certain go frameworks (like fsnotify) // NormalizePath converts FS paths returned by certain go frameworks (like fsnotify)

View File

@ -27,6 +27,7 @@ import (
"os" "os"
"reflect" "reflect"
"runtime" "runtime"
"sync"
"testing" "testing"
"time" "time"
@ -232,11 +233,50 @@ func testUnixDomainSocket(t *testing.T, label string) {
assert.True(t, result, "Unexpected result: false from IsUnixDomainSocket: %v for %s", result, label) assert.True(t, result, "Unexpected result: false from IsUnixDomainSocket: %v for %s", result, label)
} }
// This is required as on Windows it's possible for the socket file backing a Unix domain socket to
// exist but not be ready for socket communications yet as per
// https://github.com/kubernetes/kubernetes/issues/104584
func testPendingUnixDomainSocket(t *testing.T, label string) {
// Create a temporary file that will simulate the Unix domain socket file in a
// not-yet-ready state. We need this because the Kubelet keeps an eye on file
// changes and acts on them, leading to potential race issues as described in
// the referenced issue above
f, err := ioutil.TempFile("", "test-domain-socket")
require.NoErrorf(t, err, "Failed to create file for test purposes: %v while setting up: %s", err, label)
testFile := f.Name()
f.Close()
// Start the check at this point
wg := sync.WaitGroup{}
wg.Add(1)
go func() {
result, err := IsUnixDomainSocket(testFile)
assert.Nil(t, err, "Unexpected error: %v from IsUnixDomainSocket for %s", err, label)
assert.True(t, result, "Unexpected result: false from IsUnixDomainSocket: %v for %s", result, label)
wg.Done()
}()
// Wait a sufficient amount of time to make sure the retry logic kicks in
time.Sleep(socketDialRetryPeriod)
// Replace the temporary file with an actual Unix domain socket file
os.Remove(testFile)
ta, err := net.ResolveUnixAddr("unix", testFile)
require.NoErrorf(t, err, "Failed to ResolveUnixAddr: %v while setting up: %s", err, label)
unixln, err := net.ListenUnix("unix", ta)
require.NoErrorf(t, err, "Failed to ListenUnix: %v while setting up: %s", err, label)
// Wait for the goroutine to finish, then close the socket
wg.Wait()
unixln.Close()
}
func TestIsUnixDomainSocket(t *testing.T) { func TestIsUnixDomainSocket(t *testing.T) {
testPipe(t, "Named Pipe") testPipe(t, "Named Pipe")
testRegularFile(t, "Regular File that Exists", true) testRegularFile(t, "Regular File that Exists", true)
testRegularFile(t, "Regular File that Does Not Exist", false) testRegularFile(t, "Regular File that Does Not Exist", false)
testUnixDomainSocket(t, "Unix Domain Socket File") testUnixDomainSocket(t, "Unix Domain Socket File")
testPendingUnixDomainSocket(t, "Pending Unix Domain Socket File")
} }
func TestNormalizePath(t *testing.T) { func TestNormalizePath(t *testing.T) {