diff --git a/Godeps/Godeps.json b/Godeps/Godeps.json index 60940935..a2ca311b 100644 --- a/Godeps/Godeps.json +++ b/Godeps/Godeps.json @@ -344,11 +344,11 @@ }, { "ImportPath": "k8s.io/api", - "Rev": "b98ecd433b91" + "Rev": "86199361f0af" }, { "ImportPath": "k8s.io/apimachinery", - "Rev": "50aa20a7b23f" + "Rev": "a55cb1b94bd3" }, { "ImportPath": "k8s.io/gengo", diff --git a/go.mod b/go.mod index 4fcc5fe8..cfff61a9 100644 --- a/go.mod +++ b/go.mod @@ -28,8 +28,8 @@ require ( golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45 golang.org/x/time v0.0.0-20181108054448-85acf8d2951c google.golang.org/appengine v1.5.0 // indirect - k8s.io/api v0.0.0-20191102065807-b98ecd433b91 - k8s.io/apimachinery v0.0.0-20191102025618-50aa20a7b23f + k8s.io/api v0.0.0-20191104185821-86199361f0af + k8s.io/apimachinery v0.0.0-20191104185628-a55cb1b94bd3 k8s.io/klog v1.0.0 k8s.io/utils v0.0.0-20191030222137-2b95a09bc58d sigs.k8s.io/yaml v1.1.0 @@ -44,6 +44,6 @@ replace ( golang.org/x/sys => golang.org/x/sys v0.0.0-20190209173611-3b5209105503 golang.org/x/text => golang.org/x/text v0.3.1-0.20181227161524-e6919f6577db golang.org/x/time => golang.org/x/time v0.0.0-20161028155119-f51c12702a4d - k8s.io/api => k8s.io/api v0.0.0-20191102065807-b98ecd433b91 - k8s.io/apimachinery => k8s.io/apimachinery v0.0.0-20191102025618-50aa20a7b23f + k8s.io/api => k8s.io/api v0.0.0-20191104185821-86199361f0af + k8s.io/apimachinery => k8s.io/apimachinery v0.0.0-20191104185628-a55cb1b94bd3 ) diff --git a/go.sum b/go.sum index 9a3226cb..2933cd1c 100644 --- a/go.sum +++ b/go.sum @@ -174,8 +174,8 @@ gopkg.in/yaml.v2 v2.2.4 h1:/eiJrUcujPVeJ3xlSWaiNi3uSVmDGBK1pDHUHAnao1I= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -k8s.io/api v0.0.0-20191102065807-b98ecd433b91/go.mod h1:o5KeABD7wGV2jASWXz27PT4gGWdB5zA9kOg3iFzXTcY= -k8s.io/apimachinery v0.0.0-20191102025618-50aa20a7b23f/go.mod h1:gA1T9z4LIup7PIegBwxkF2UYXUNVKhOAPvQWWnAc34k= +k8s.io/api v0.0.0-20191104185821-86199361f0af/go.mod h1:6Z4OXN69aR5stkJdnH2bnMwbWCUtnz8QjNPM7vjy/rk= +k8s.io/apimachinery v0.0.0-20191104185628-a55cb1b94bd3/go.mod h1:gA1T9z4LIup7PIegBwxkF2UYXUNVKhOAPvQWWnAc34k= k8s.io/gengo v0.0.0-20190128074634-0689ccc1d7d6/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0= k8s.io/klog v0.0.0-20181102134211-b9b56d5dfc92/go.mod h1:Gq+BEi5rUBO/HRz0bTSXDUcqjScdoY3a9IHpCEIOOfk= k8s.io/klog v0.3.0/go.mod h1:Gq+BEi5rUBO/HRz0bTSXDUcqjScdoY3a9IHpCEIOOfk= diff --git a/util/certificate/certificate_manager.go b/util/certificate/certificate_manager.go index ef387915..55f5f5cb 100644 --- a/util/certificate/certificate_manager.go +++ b/util/certificate/certificate_manager.go @@ -113,8 +113,17 @@ type Config struct { // quickly replaced with a unique cert/key pair. BootstrapKeyPEM []byte // CertificateExpiration will record a metric that shows the remaining - // lifetime of the certificate. + // lifetime of the certificate. This metric is a gauge because only the + // current cert expiry time is really useful. Reading this metric at any + // time simply gives the next expiration date, no need to keep some + // history (histogram) of all previous expiry dates. CertificateExpiration Gauge + // CertificateRotation will record a metric showing the time in seconds + // that certificates lived before being rotated. This metric is a histogram + // because there is value in keeping a history of rotation cadences. It + // allows one to setup monitoring and alerting of unexpected rotation + // behavior and track trends in rotation frequency. + CertificateRotation Histogram } // Store is responsible for getting and updating the current certificate. @@ -139,6 +148,12 @@ type Gauge interface { Set(float64) } +// Histogram will record the time a rotated certificate was used before being +// rotated. +type Histogram interface { + Observe(float64) +} + // NoCertKeyError indicates there is no cert/key currently available. type NoCertKeyError string @@ -163,6 +178,7 @@ type manager struct { certStore Store certificateExpiration Gauge + certificateRotation Histogram // the following variables must only be accessed under certAccessLock certAccessLock sync.RWMutex @@ -174,6 +190,9 @@ type manager struct { clientFn CSRClientFunc stopCh chan struct{} stopped bool + + // Set to time.Now but can be stubbed out for testing + now func() time.Time } // NewManager returns a new certificate manager. A certificate manager is @@ -203,6 +222,8 @@ func NewManager(config *Config) (Manager, error) { cert: cert, forceRotation: forceRotation, certificateExpiration: config.CertificateExpiration, + certificateRotation: config.CertificateRotation, + now: time.Now, } return &m, nil @@ -215,7 +236,7 @@ func NewManager(config *Config) (Manager, error) { func (m *manager) Current() *tls.Certificate { m.certAccessLock.RLock() defer m.certAccessLock.RUnlock() - if m.cert != nil && m.cert.Leaf != nil && time.Now().After(m.cert.Leaf.NotAfter) { + if m.cert != nil && m.cert.Leaf != nil && m.now().After(m.cert.Leaf.NotAfter) { klog.V(2).Infof("Current certificate is expired.") return nil } @@ -256,7 +277,7 @@ func (m *manager) Start() { templateChanged := make(chan struct{}) go wait.Until(func() { deadline := m.nextRotationDeadline() - if sleepInterval := deadline.Sub(time.Now()); sleepInterval > 0 { + if sleepInterval := deadline.Sub(m.now()); sleepInterval > 0 { klog.V(2).Infof("Waiting %v for next certificate rotation", sleepInterval) timer := time.NewTimer(sleepInterval) @@ -421,7 +442,10 @@ func (m *manager) rotateCerts() (bool, error) { return false, nil } - m.updateCached(cert) + if old := m.updateCached(cert); old != nil && m.certificateRotation != nil { + m.certificateRotation.Observe(m.now().Sub(old.Leaf.NotBefore).Seconds()) + } + return true, nil } @@ -490,14 +514,14 @@ func (m *manager) nextRotationDeadline() time.Time { // forceRotation is not protected by locks if m.forceRotation { m.forceRotation = false - return time.Now() + return m.now() } m.certAccessLock.RLock() defer m.certAccessLock.RUnlock() if !m.certSatisfiesTemplateLocked() { - return time.Now() + return m.now() } notAfter := m.cert.Leaf.NotAfter @@ -523,13 +547,15 @@ var jitteryDuration = func(totalDuration float64) time.Duration { return wait.Jitter(time.Duration(totalDuration), 0.2) - time.Duration(totalDuration*0.3) } -// updateCached sets the most recent retrieved cert. It also sets the server -// as assumed healthy. -func (m *manager) updateCached(cert *tls.Certificate) { +// updateCached sets the most recent retrieved cert and returns the old cert. +// It also sets the server as assumed healthy. +func (m *manager) updateCached(cert *tls.Certificate) *tls.Certificate { m.certAccessLock.Lock() defer m.certAccessLock.Unlock() m.serverHealth = true + old := m.cert m.cert = cert + return old } // updateServerError takes an error returned by the server and infers diff --git a/util/certificate/certificate_manager_test.go b/util/certificate/certificate_manager_test.go index 671cffc4..85969857 100644 --- a/util/certificate/certificate_manager_test.go +++ b/util/certificate/certificate_manager_test.go @@ -163,12 +163,17 @@ func TestNewManagerNoRotation(t *testing.T) { } } -type gaugeMock struct { +type metricMock struct { calls int lastValue float64 } -func (g *gaugeMock) Set(v float64) { +func (g *metricMock) Set(v float64) { + g.calls++ + g.lastValue = v +} + +func (g *metricMock) Observe(v float64) { g.calls++ g.lastValue = v } @@ -195,7 +200,7 @@ func TestSetRotationDeadline(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - g := gaugeMock{} + g := metricMock{} m := manager{ cert: &tls.Certificate{ Leaf: &x509.Certificate{ @@ -206,6 +211,7 @@ func TestSetRotationDeadline(t *testing.T) { getTemplate: func() *x509.CertificateRequest { return &x509.CertificateRequest{} }, usages: []certificates.KeyUsage{}, certificateExpiration: &g, + now: func() time.Time { return now }, } jitteryDuration = func(float64) time.Duration { return time.Duration(float64(tc.notAfter.Sub(tc.notBefore)) * 0.7) } lowerBound := tc.notBefore.Add(time.Duration(float64(tc.notAfter.Sub(tc.notBefore)) * 0.7)) @@ -383,6 +389,7 @@ func TestCertSatisfiesTemplate(t *testing.T) { m := manager{ cert: tlsCert, getTemplate: func() *x509.CertificateRequest { return tc.template }, + now: time.Now, } result := m.certSatisfiesTemplate() @@ -407,6 +414,7 @@ func TestRotateCertCreateCSRError(t *testing.T) { clientFn: func(_ *tls.Certificate) (certificatesclient.CertificateSigningRequestInterface, error) { return fakeClient{failureType: createError}, nil }, + now: func() time.Time { return now }, } if success, err := m.rotateCerts(); success { @@ -430,6 +438,7 @@ func TestRotateCertWaitingForResultError(t *testing.T) { clientFn: func(_ *tls.Certificate) (certificatesclient.CertificateSigningRequestInterface, error) { return fakeClient{failureType: watchError}, nil }, + now: func() time.Time { return now }, } defer func(t time.Duration) { certificateWaitTimeout = t }(certificateWaitTimeout) @@ -945,6 +954,40 @@ func TestServerHealth(t *testing.T) { } } +func TestRotationLogsDuration(t *testing.T) { + h := metricMock{} + now := time.Now() + certIss := now.Add(-2 * time.Hour) + m := manager{ + cert: &tls.Certificate{ + Leaf: &x509.Certificate{ + NotBefore: certIss, + NotAfter: now.Add(-1 * time.Hour), + }, + }, + certStore: &fakeStore{cert: expiredStoreCertData.certificate}, + getTemplate: func() *x509.CertificateRequest { return &x509.CertificateRequest{} }, + clientFn: func(_ *tls.Certificate) (certificatesclient.CertificateSigningRequestInterface, error) { + return &fakeClient{ + certificatePEM: apiServerCertData.certificatePEM, + }, nil + }, + certificateRotation: &h, + now: func() time.Time { return now }, + } + ok, err := m.rotateCerts() + if err != nil || !ok { + t.Errorf("failed to rotate certs: %v", err) + } + if h.calls != 1 { + t.Errorf("rotation metric was not called") + } + if h.lastValue != now.Sub(certIss).Seconds() { + t.Errorf("rotation metric did not record the right value got: %f; want %f", h.lastValue, now.Sub(certIss).Seconds()) + } + +} + type fakeClientFailureType int const (