From 3f65b3827953a370babccc5f63f54c36448c7dbe Mon Sep 17 00:00:00 2001 From: Clayton Coleman Date: Sun, 28 Jan 2018 14:28:28 -0500 Subject: [PATCH] Only rotate certificates in the background The certificate manager originally had a "block on startup" rotation behavior to ensure at least one rotation happened on startup. However, since rotation may not succeed within the first time window the code was changed to simply print the error rather than return it. This meant that the blocking rotation has no purpose - it cannot cause the kubelet to fail, and it *does* block the kubelet from starting static pods before the api server becomes available. The current block behavior causes a bootstrapped kubelet that is also set to run static pods to wait several minutes before actually launching the static pods, which means self-hosted masters using static pods have a pointless delay on startup. Since blocking rotation has no benefit and can't actually fail startup, this commit removes the blocking behavior and simplifies the code at the same time. The goroutine for rotation now completely owns the deadline, the shouldRotate() method is removed, and the method that sets rotationDeadline now returns it. We also explicitly guard against a negative sleep interval and omit the message. Should have no impact on bootstrapping except the removal of a long delay on startup before static pods start. Also add a guard condition where if the current cert in the store is expired, we fall back to the bootstrap cert initially (we use the bootstrap cert to communicate with the server). This is consistent with when we don't have a cert yet. Kubernetes-commit: 44493de195d89ec43cc7246af921e626e0002c16 --- util/certificate/certificate_manager.go | 82 ++++++++------------ util/certificate/certificate_manager_test.go | 59 ++------------ 2 files changed, 40 insertions(+), 101 deletions(-) diff --git a/util/certificate/certificate_manager.go b/util/certificate/certificate_manager.go index 7ea05ba7..68d90aa7 100644 --- a/util/certificate/certificate_manager.go +++ b/util/certificate/certificate_manager.go @@ -141,7 +141,6 @@ type manager struct { certStore Store certAccessLock sync.RWMutex cert *tls.Certificate - rotationDeadline time.Time forceRotation bool certificateExpiration Gauge serverHealth bool @@ -208,8 +207,7 @@ func (m *manager) SetCertificateSigningRequestClient(certSigningRequestClient ce func (m *manager) Start() { // Certificate rotation depends on access to the API server certificate // signing API, so don't start the certificate manager if we don't have a - // client. This will happen on the cluster master, where the kubelet is - // responsible for bootstrapping the pods of the master components. + // client. if m.certSigningRequestClient == nil { glog.V(2).Infof("Certificate rotation is not enabled, no connection to the apiserver.") return @@ -217,27 +215,18 @@ func (m *manager) Start() { glog.V(2).Infof("Certificate rotation is enabled.") - m.setRotationDeadline() - - // Synchronously request a certificate before entering the background - // loop to allow bootstrap scenarios, where the certificate manager - // doesn't have a certificate at all yet. - if m.shouldRotate() { - glog.V(1).Infof("shouldRotate() is true, forcing immediate rotation") - if _, err := m.rotateCerts(); err != nil { - utilruntime.HandleError(fmt.Errorf("Could not rotate certificates: %v", err)) - } - } - backoff := wait.Backoff{ - Duration: 2 * time.Second, - Factor: 2, - Jitter: 0.1, - Steps: 5, - } go wait.Forever(func() { - sleepInterval := m.rotationDeadline.Sub(time.Now()) - glog.V(2).Infof("Waiting %v for next certificate rotation", sleepInterval) - time.Sleep(sleepInterval) + deadline := m.nextRotationDeadline() + if sleepInterval := deadline.Sub(time.Now()); sleepInterval > 0 { + glog.V(2).Infof("Waiting %v for next certificate rotation", sleepInterval) + time.Sleep(sleepInterval) + } + backoff := wait.Backoff{ + Duration: 2 * time.Second, + Factor: 2, + Jitter: 0.1, + Steps: 5, + } if err := wait.ExponentialBackoff(backoff, m.rotateCerts); err != nil { utilruntime.HandleError(fmt.Errorf("Reached backoff limit, still unable to rotate certs: %v", err)) wait.PollInfinite(32*time.Second, m.rotateCerts) @@ -252,11 +241,14 @@ func getCurrentCertificateOrBootstrap( currentCert, err := store.Current() if err == nil { - return currentCert, false, nil - } - - if _, ok := err.(*NoCertKeyError); !ok { - return nil, false, err + // if the current cert is expired, fall back to the bootstrap cert + if currentCert.Leaf != nil && time.Now().Before(currentCert.Leaf.NotAfter) { + return currentCert, false, nil + } + } else { + if _, ok := err.(*NoCertKeyError); !ok { + return nil, false, err + } } if bootstrapCertificatePEM == nil || bootstrapKeyPEM == nil { @@ -279,20 +271,6 @@ func getCurrentCertificateOrBootstrap( return &bootstrapCert, true, nil } -// shouldRotate looks at how close the current certificate is to expiring and -// decides if it is time to rotate or not. -func (m *manager) shouldRotate() bool { - m.certAccessLock.RLock() - defer m.certAccessLock.RUnlock() - if m.cert == nil { - return true - } - if m.forceRotation { - return true - } - return time.Now().After(m.rotationDeadline) -} - // rotateCerts attempts to request a client cert from the server, wait a reasonable // period of time for it to be signed, and then update the cert on disk. If it cannot // retrieve a cert, it will return false. It will only return error in exceptional cases. @@ -349,30 +327,34 @@ func (m *manager) rotateCerts() (bool, error) { } m.updateCached(cert) - m.setRotationDeadline() - m.forceRotation = false return true, nil } -// setRotationDeadline sets a cached value for the threshold at which the +// nextRotationDeadline returns a value for the threshold at which the // current certificate should be rotated, 80%+/-10% of the expiration of the // certificate. -func (m *manager) setRotationDeadline() { +func (m *manager) nextRotationDeadline() time.Time { + // forceRotation is not protected by locks + if m.forceRotation { + m.forceRotation = false + return time.Now() + } + m.certAccessLock.RLock() defer m.certAccessLock.RUnlock() if m.cert == nil { - m.rotationDeadline = time.Now() - return + return time.Now() } notAfter := m.cert.Leaf.NotAfter totalDuration := float64(notAfter.Sub(m.cert.Leaf.NotBefore)) + deadline := m.cert.Leaf.NotBefore.Add(jitteryDuration(totalDuration)) - m.rotationDeadline = m.cert.Leaf.NotBefore.Add(jitteryDuration(totalDuration)) - glog.V(2).Infof("Certificate expiration is %v, rotation deadline is %v", notAfter, m.rotationDeadline) + glog.V(2).Infof("Certificate expiration is %v, rotation deadline is %v", notAfter, deadline) if m.certificateExpiration != nil { m.certificateExpiration.Set(float64(notAfter.Unix())) } + return deadline } // jitteryDuration uses some jitter to set the rotation threshold so each node diff --git a/util/certificate/certificate_manager_test.go b/util/certificate/certificate_manager_test.go index ab19e37b..c48f7a72 100644 --- a/util/certificate/certificate_manager_test.go +++ b/util/certificate/certificate_manager_test.go @@ -146,46 +146,6 @@ func TestNewManagerNoRotation(t *testing.T) { } } -func TestShouldRotate(t *testing.T) { - now := time.Now() - tests := []struct { - name string - notBefore time.Time - notAfter time.Time - shouldRotate bool - }{ - {"just issued, still good", now.Add(-1 * time.Hour), now.Add(99 * time.Hour), false}, - {"half way expired, still good", now.Add(-24 * time.Hour), now.Add(24 * time.Hour), false}, - {"mostly expired, still good", now.Add(-69 * time.Hour), now.Add(31 * time.Hour), false}, - {"just about expired, should rotate", now.Add(-91 * time.Hour), now.Add(9 * time.Hour), true}, - {"nearly expired, should rotate", now.Add(-99 * time.Hour), now.Add(1 * time.Hour), true}, - {"already expired, should rotate", now.Add(-10 * time.Hour), now.Add(-1 * time.Hour), true}, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - m := manager{ - cert: &tls.Certificate{ - Leaf: &x509.Certificate{ - NotBefore: test.notBefore, - NotAfter: test.notAfter, - }, - }, - template: &x509.CertificateRequest{}, - usages: []certificates.KeyUsage{}, - } - m.setRotationDeadline() - if m.shouldRotate() != test.shouldRotate { - t.Errorf("Time %v, a certificate issued for (%v, %v) should rotate should be %t.", - now, - m.cert.Leaf.NotBefore, - m.cert.Leaf.NotAfter, - test.shouldRotate) - } - }) - } -} - type gaugeMock struct { calls int lastValue float64 @@ -233,13 +193,13 @@ func TestSetRotationDeadline(t *testing.T) { jitteryDuration = func(float64) time.Duration { return time.Duration(float64(tc.notAfter.Sub(tc.notBefore)) * 0.7) } lowerBound := tc.notBefore.Add(time.Duration(float64(tc.notAfter.Sub(tc.notBefore)) * 0.7)) - m.setRotationDeadline() + deadline := m.nextRotationDeadline() - if !m.rotationDeadline.Equal(lowerBound) { + if !deadline.Equal(lowerBound) { t.Errorf("For notBefore %v, notAfter %v, the rotationDeadline %v should be %v.", tc.notBefore, tc.notAfter, - m.rotationDeadline, + deadline, lowerBound) } if g.calls != 1 { @@ -321,7 +281,7 @@ func TestNewManagerBootstrap(t *testing.T) { } if m, ok := cm.(*manager); !ok { t.Errorf("Expected a '*manager' from 'NewManager'") - } else if !m.shouldRotate() { + } else if !m.forceRotation { t.Errorf("Expected rotation should happen during bootstrap, but it won't.") } } @@ -360,9 +320,8 @@ func TestNewManagerNoBootstrap(t *testing.T) { if m, ok := cm.(*manager); !ok { t.Errorf("Expected a '*manager' from 'NewManager'") } else { - m.setRotationDeadline() - if m.shouldRotate() { - t.Errorf("Expected rotation should happen during bootstrap, but it won't.") + if m.forceRotation { + t.Errorf("Expected rotation should not happen during bootstrap, but it won't.") } } } @@ -515,8 +474,7 @@ func TestInitializeCertificateSigningRequestClient(t *testing.T) { if m, ok := certificateManager.(*manager); !ok { t.Errorf("Expected a '*manager' from 'NewManager'") } else { - m.setRotationDeadline() - if m.shouldRotate() { + if m.forceRotation { if success, err := m.rotateCerts(); !success { t.Errorf("Got failure from 'rotateCerts', wanted success.") } else if err != nil { @@ -614,8 +572,7 @@ func TestInitializeOtherRESTClients(t *testing.T) { if m, ok := certificateManager.(*manager); !ok { t.Errorf("Expected a '*manager' from 'NewManager'") } else { - m.setRotationDeadline() - if m.shouldRotate() { + if m.forceRotation { success, err := certificateManager.(*manager).rotateCerts() if err != nil { t.Errorf("Got error %v, expected none.", err)