From 5f3929f75d26c8867461f6947cdf1f8fb64a59ee Mon Sep 17 00:00:00 2001 From: CJ Cullen Date: Thu, 17 Mar 2016 17:57:04 -0700 Subject: [PATCH] Add a SSHKey sync check to the master's healthz (when using SSHTunnels). --- pkg/master/master.go | 4 ++++ pkg/master/tunneler.go | 11 +++++++++++ pkg/master/tunneler_test.go | 1 + 3 files changed, 16 insertions(+) diff --git a/pkg/master/master.go b/pkg/master/master.go index 332f3ada66d..d062c570b0e 100644 --- a/pkg/master/master.go +++ b/pkg/master/master.go @@ -858,5 +858,9 @@ func (m *Master) IsTunnelSyncHealthy(req *http.Request) error { if lag > 600 { return fmt.Errorf("Tunnel sync is taking to long: %d", lag) } + sshKeyLag := m.tunneler.SecondsSinceSSHKeySync() + if sshKeyLag > 600 { + return fmt.Errorf("SSHKey sync is taking to long: %d", sshKeyLag) + } return nil } diff --git a/pkg/master/tunneler.go b/pkg/master/tunneler.go index 8a23ce3ff42..2da04849add 100644 --- a/pkg/master/tunneler.go +++ b/pkg/master/tunneler.go @@ -41,6 +41,7 @@ type Tunneler interface { Stop() Dial(net, addr string) (net.Conn, error) SecondsSinceSync() int64 + SecondsSinceSSHKeySync() int64 } type SSHTunneler struct { @@ -51,6 +52,7 @@ type SSHTunneler struct { tunnels *ssh.SSHTunnelList lastSync int64 // Seconds since Epoch + lastSSHKeySync int64 // Seconds since Epoch lastSyncMetric prometheus.GaugeFunc clock util.Clock @@ -101,6 +103,7 @@ func (c *SSHTunneler) Run(getAddresses AddressFunc) { c.tunnels = ssh.NewSSHTunnelList(c.SSHUser, c.SSHKeyfile, c.HealthCheckURL, c.stopChan) // Sync loop to ensure that the SSH key has been installed. + c.lastSSHKeySync = c.clock.Now().Unix() c.installSSHKeySyncLoop(c.SSHUser, publicKeyFile) // Sync tunnelList w/ nodes. c.lastSync = c.clock.Now().Unix() @@ -125,6 +128,12 @@ func (c *SSHTunneler) SecondsSinceSync() int64 { return now - then } +func (c *SSHTunneler) SecondsSinceSSHKeySync() int64 { + now := c.clock.Now().Unix() + then := atomic.LoadInt64(&c.lastSSHKeySync) + return now - then +} + func (c *SSHTunneler) installSSHKeySyncLoop(user, publicKeyfile string) { go wait.Until(func() { if c.InstallSSHKey == nil { @@ -143,7 +152,9 @@ func (c *SSHTunneler) installSSHKeySyncLoop(user, publicKeyfile string) { } if err := c.InstallSSHKey(user, keyData); err != nil { glog.Errorf("Failed to install ssh key: %v", err) + return } + atomic.StoreInt64(&c.lastSSHKeySync, c.clock.Now().Unix()) }, 5*time.Minute, c.stopChan) } diff --git a/pkg/master/tunneler_test.go b/pkg/master/tunneler_test.go index 665292607f6..3f208ee122c 100644 --- a/pkg/master/tunneler_test.go +++ b/pkg/master/tunneler_test.go @@ -76,6 +76,7 @@ func TestIsTunnelSyncHealthy(t *testing.T) { // Pass case: 540 second lag tunneler.lastSync = time.Date(2015, time.January, 1, 1, 1, 1, 1, time.UTC).Unix() + tunneler.lastSSHKeySync = time.Date(2015, time.January, 1, 1, 1, 1, 1, time.UTC).Unix() tunneler.clock = util.NewFakeClock(time.Date(2015, time.January, 1, 1, 9, 1, 1, time.UTC)) err := master.IsTunnelSyncHealthy(nil) assert.NoError(err, "IsTunnelSyncHealthy() should not have returned an error.")