devicemanager testing: dynamically choose tmp dir

Hard-coding the tests to use /tmp/device_plugin for sockets is
problematic because it prevents running tests in parallel on the same
machine (perhaps because there are multiple developers, perhaps
because testing is done independently on different code checkouts).
/tmp/device_plugin also was not removed after testing.

This is probably not that relevant. But more importantly, this change
also fixes https://github.com/kubernetes/kubernetes/issues/59488.
"make test" failed in TestDevicePluginReRegistration because something
removed /tmp/device_plugin/device-plugin.sock while something else
tried to connect to it:

2018/02/07 14:34:39 Starting to serve on /tmp/device_plugin/device-plugin.sock
[pid 29568] connect(14, {sa_family=AF_UNIX, sun_path="/tmp/device_plugin/server.sock"}, 33) = 0
[pid 29568] unlinkat(AT_FDCWD, "/tmp/device_plugin/server.sock", 0) = 0
[pid 29568] unlinkat(AT_FDCWD, "/tmp/device_plugin/device-plugin.sock", 0) = 0
[pid 29568] --- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=29568, si_uid=1000} ---
[pid 29568] connect(6, {sa_family=AF_UNIX, sun_path="/tmp/device_plugin/device-plugin.sock"}, 40) = -1 ENOENT (No such file or directory)
E0207 14:34:39.961321   29568 endpoint.go:117] listAndWatch ended unexpectedly for device plugin mock with error rpc error: code = Unavailable desc = transport is closing
strace: Process 29623 attached
[pid 29574] connect(3, {sa_family=AF_UNIX, sun_path="/tmp/device_plugin/device-plugin.sock"}, 40) = -1 ENOENT (No such file or directory)
[pid 29623] connect(3, {sa_family=AF_UNIX, sun_path="/tmp/device_plugin/device-plugin.sock"}, 40) = -1 ENOENT (No such file or directory)
[pid 29574] connect(3, {sa_family=AF_UNIX, sun_path="/tmp/device_plugin/device-plugin.sock"}, 40) = -1 ENOENT (No such file or directory)
E0207 14:34:49.961324   29568 endpoint.go:60] Can't create new endpoint with path /tmp/device_plugin/device-plugin.sock err failed to dial device plugin: context deadline exceeded
E0207 14:34:49.961390   29568 manager.go:340] Failed to dial device plugin with request &RegisterRequest{Version:v1alpha2,Endpoint:device-plugin.sock,ResourceName:fake-domain/resource,}: failed to dial device plugin: context deadline exceeded
panic: test timed out after 2m0s

It's not entirely certain which code was to blame for this unlinkat()
calls (perhaps some cleanup code from a previous test running in a
goroutine?) but this no longer happened after switching to per-test
socket directories.
This commit is contained in:
Patrick Ohly 2018-02-07 17:24:43 +01:00
parent 117780b908
commit 1325c2f8be

View File

@ -40,18 +40,32 @@ import (
)
const (
socketName = "/tmp/device_plugin/server.sock"
pluginSocketName = "/tmp/device_plugin/device-plugin.sock"
testResourceName = "fake-domain/resource"
)
func tmpSocketDir() (socketDir, socketName, pluginSocketName string, err error) {
socketDir, err = ioutil.TempDir("", "device_plugin")
if err != nil {
return
}
socketName = socketDir + "/server.sock"
pluginSocketName = socketDir + "/device-plugin.sock"
return
}
func TestNewManagerImpl(t *testing.T) {
_, err := newManagerImpl(socketName)
socketDir, socketName, _, err := tmpSocketDir()
require.NoError(t, err)
defer os.RemoveAll(socketDir)
_, err = newManagerImpl(socketName)
require.NoError(t, err)
}
func TestNewManagerImplStart(t *testing.T) {
m, p := setup(t, []*pluginapi.Device{}, func(n string, a, u, r []pluginapi.Device) {})
socketDir, socketName, pluginSocketName, err := tmpSocketDir()
require.NoError(t, err)
defer os.RemoveAll(socketDir)
m, p := setup(t, []*pluginapi.Device{}, func(n string, a, u, r []pluginapi.Device) {}, socketName, pluginSocketName)
cleanup(t, m, p)
}
@ -59,6 +73,9 @@ func TestNewManagerImplStart(t *testing.T) {
// making sure that after registration, devices are correctly updated and if a re-registration
// happens, we will NOT delete devices; and no orphaned devices left.
func TestDevicePluginReRegistration(t *testing.T) {
socketDir, socketName, pluginSocketName, err := tmpSocketDir()
require.NoError(t, err)
defer os.RemoveAll(socketDir)
devs := []*pluginapi.Device{
{ID: "Dev1", Health: pluginapi.Healthy},
{ID: "Dev2", Health: pluginapi.Healthy},
@ -77,7 +94,7 @@ func TestDevicePluginReRegistration(t *testing.T) {
}
callbackChan <- callbackCount
}
m, p1 := setup(t, devs, callback)
m, p1 := setup(t, devs, callback, socketName, pluginSocketName)
atomic.StoreInt32(&expCallbackCount, 1)
p1.Register(socketName, testResourceName)
// Wait for the first callback to be issued.
@ -87,7 +104,7 @@ func TestDevicePluginReRegistration(t *testing.T) {
require.Equal(t, 2, len(devices[testResourceName]), "Devices are not updated.")
p2 := NewDevicePluginStub(devs, pluginSocketName+".new")
err := p2.Start()
err = p2.Start()
require.NoError(t, err)
atomic.StoreInt32(&expCallbackCount, 2)
p2.Register(socketName, testResourceName)
@ -114,7 +131,7 @@ func TestDevicePluginReRegistration(t *testing.T) {
close(callbackChan)
}
func setup(t *testing.T, devs []*pluginapi.Device, callback monitorCallback) (Manager, *Stub) {
func setup(t *testing.T, devs []*pluginapi.Device, callback monitorCallback, socketName string, pluginSocketName string) (Manager, *Stub) {
m, err := newManagerImpl(socketName)
require.NoError(t, err)
@ -139,6 +156,9 @@ func cleanup(t *testing.T, m Manager, p *Stub) {
}
func TestUpdateCapacityAllocatable(t *testing.T) {
socketDir, socketName, _, err := tmpSocketDir()
require.NoError(t, err)
defer os.RemoveAll(socketDir)
testManager, err := newManagerImpl(socketName)
as := assert.New(t)
as.NotNil(testManager)