mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-25 20:53:33 +00:00
Merge pull request #110075 from luckerby/104584-retry-dial-on-socket-windows-base
Retry Unix domain sockets on Windows nodes for the plugin registration mechanism
This commit is contained in:
commit
b19d50d68e
@ -29,11 +29,19 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/Microsoft/go-winio"
|
"github.com/Microsoft/go-winio"
|
||||||
|
"k8s.io/apimachinery/pkg/util/wait"
|
||||||
|
"k8s.io/klog/v2"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
tcpProtocol = "tcp"
|
tcpProtocol = "tcp"
|
||||||
npipeProtocol = "npipe"
|
npipeProtocol = "npipe"
|
||||||
|
// Amount of time to wait between attempting to use a Unix domain socket.
|
||||||
|
// As detailed in https://github.com/kubernetes/kubernetes/issues/104584
|
||||||
|
// the first attempt will most likely fail, hence the need to retry
|
||||||
|
socketDialRetryPeriod = 1 * time.Second
|
||||||
|
// Overall timeout value to dial a Unix domain socket, including retries
|
||||||
|
socketDialTimeout = 4 * time.Second
|
||||||
)
|
)
|
||||||
|
|
||||||
// CreateListener creates a listener on the specified endpoint.
|
// CreateListener creates a listener on the specified endpoint.
|
||||||
@ -127,6 +135,8 @@ func GetBootTime() (time.Time, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// IsUnixDomainSocket returns whether a given file is a AF_UNIX socket file
|
// IsUnixDomainSocket returns whether a given file is a AF_UNIX socket file
|
||||||
|
// Note that due to the retry logic inside, it could take up to 4 seconds
|
||||||
|
// to determine whether or not the file path supplied is a Unix domain socket
|
||||||
func IsUnixDomainSocket(filePath string) (bool, error) {
|
func IsUnixDomainSocket(filePath string) (bool, error) {
|
||||||
// Due to the absence of golang support for os.ModeSocket in Windows (https://github.com/golang/go/issues/33357)
|
// Due to the absence of golang support for os.ModeSocket in Windows (https://github.com/golang/go/issues/33357)
|
||||||
// we need to dial the file and check if we receive an error to determine if a file is Unix Domain Socket file.
|
// we need to dial the file and check if we receive an error to determine if a file is Unix Domain Socket file.
|
||||||
@ -136,12 +146,35 @@ func IsUnixDomainSocket(filePath string) (bool, error) {
|
|||||||
// does NOT work in 1809 if the socket file is created within a bind mounted directory by a container
|
// does NOT work in 1809 if the socket file is created within a bind mounted directory by a container
|
||||||
// and the FSCTL is issued in the host by the kubelet.
|
// and the FSCTL is issued in the host by the kubelet.
|
||||||
|
|
||||||
c, err := net.Dial("unix", filePath)
|
klog.V(6).InfoS("Function IsUnixDomainSocket starts", "filePath", filePath)
|
||||||
if err == nil {
|
// As detailed in https://github.com/kubernetes/kubernetes/issues/104584 we cannot rely
|
||||||
c.Close()
|
// on the Unix Domain socket working on the very first try, hence the potential need to
|
||||||
return true, nil
|
// dial multiple times
|
||||||
|
var lastSocketErr error
|
||||||
|
err := wait.PollImmediate(socketDialRetryPeriod, socketDialTimeout,
|
||||||
|
func() (bool, error) {
|
||||||
|
klog.V(6).InfoS("Dialing the socket", "filePath", filePath)
|
||||||
|
var c net.Conn
|
||||||
|
c, lastSocketErr = net.Dial("unix", filePath)
|
||||||
|
if lastSocketErr == nil {
|
||||||
|
c.Close()
|
||||||
|
klog.V(6).InfoS("Socket dialed successfully", "filePath", filePath)
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
klog.V(6).InfoS("Failed the current attempt to dial the socket, so pausing before retry",
|
||||||
|
"filePath", filePath, "err", lastSocketErr, "socketDialRetryPeriod",
|
||||||
|
socketDialRetryPeriod)
|
||||||
|
return false, nil
|
||||||
|
})
|
||||||
|
|
||||||
|
// PollImmediate will return "timed out waiting for the condition" if the function it
|
||||||
|
// invokes never returns true
|
||||||
|
if err != nil {
|
||||||
|
klog.V(2).InfoS("Failed all attempts to dial the socket so marking it as a non-Unix Domain socket. Last socket error along with the error from PollImmediate follow",
|
||||||
|
"filePath", filePath, "lastSocketErr", lastSocketErr, "err", err)
|
||||||
|
return false, nil
|
||||||
}
|
}
|
||||||
return false, nil
|
return true, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// NormalizePath converts FS paths returned by certain go frameworks (like fsnotify)
|
// NormalizePath converts FS paths returned by certain go frameworks (like fsnotify)
|
||||||
|
@ -27,6 +27,7 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"reflect"
|
"reflect"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
"sync"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -232,11 +233,50 @@ func testUnixDomainSocket(t *testing.T, label string) {
|
|||||||
assert.True(t, result, "Unexpected result: false from IsUnixDomainSocket: %v for %s", result, label)
|
assert.True(t, result, "Unexpected result: false from IsUnixDomainSocket: %v for %s", result, label)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This is required as on Windows it's possible for the socket file backing a Unix domain socket to
|
||||||
|
// exist but not be ready for socket communications yet as per
|
||||||
|
// https://github.com/kubernetes/kubernetes/issues/104584
|
||||||
|
func testPendingUnixDomainSocket(t *testing.T, label string) {
|
||||||
|
// Create a temporary file that will simulate the Unix domain socket file in a
|
||||||
|
// not-yet-ready state. We need this because the Kubelet keeps an eye on file
|
||||||
|
// changes and acts on them, leading to potential race issues as described in
|
||||||
|
// the referenced issue above
|
||||||
|
f, err := ioutil.TempFile("", "test-domain-socket")
|
||||||
|
require.NoErrorf(t, err, "Failed to create file for test purposes: %v while setting up: %s", err, label)
|
||||||
|
testFile := f.Name()
|
||||||
|
f.Close()
|
||||||
|
|
||||||
|
// Start the check at this point
|
||||||
|
wg := sync.WaitGroup{}
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
result, err := IsUnixDomainSocket(testFile)
|
||||||
|
assert.Nil(t, err, "Unexpected error: %v from IsUnixDomainSocket for %s", err, label)
|
||||||
|
assert.True(t, result, "Unexpected result: false from IsUnixDomainSocket: %v for %s", result, label)
|
||||||
|
wg.Done()
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Wait a sufficient amount of time to make sure the retry logic kicks in
|
||||||
|
time.Sleep(socketDialRetryPeriod)
|
||||||
|
|
||||||
|
// Replace the temporary file with an actual Unix domain socket file
|
||||||
|
os.Remove(testFile)
|
||||||
|
ta, err := net.ResolveUnixAddr("unix", testFile)
|
||||||
|
require.NoErrorf(t, err, "Failed to ResolveUnixAddr: %v while setting up: %s", err, label)
|
||||||
|
unixln, err := net.ListenUnix("unix", ta)
|
||||||
|
require.NoErrorf(t, err, "Failed to ListenUnix: %v while setting up: %s", err, label)
|
||||||
|
|
||||||
|
// Wait for the goroutine to finish, then close the socket
|
||||||
|
wg.Wait()
|
||||||
|
unixln.Close()
|
||||||
|
}
|
||||||
|
|
||||||
func TestIsUnixDomainSocket(t *testing.T) {
|
func TestIsUnixDomainSocket(t *testing.T) {
|
||||||
testPipe(t, "Named Pipe")
|
testPipe(t, "Named Pipe")
|
||||||
testRegularFile(t, "Regular File that Exists", true)
|
testRegularFile(t, "Regular File that Exists", true)
|
||||||
testRegularFile(t, "Regular File that Does Not Exist", false)
|
testRegularFile(t, "Regular File that Does Not Exist", false)
|
||||||
testUnixDomainSocket(t, "Unix Domain Socket File")
|
testUnixDomainSocket(t, "Unix Domain Socket File")
|
||||||
|
testPendingUnixDomainSocket(t, "Pending Unix Domain Socket File")
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestNormalizePath(t *testing.T) {
|
func TestNormalizePath(t *testing.T) {
|
||||||
|
Loading…
Reference in New Issue
Block a user