Only rotate certificates in the background

The certificate manager originally had a "block on startup" rotation
behavior to ensure at least one rotation happened on startup. However,
since rotation may not succeed within the first time window the code was
changed to simply print the error rather than return it. This meant that
the blocking rotation has no purpose - it cannot cause the kubelet to
fail, and it *does* block the kubelet from starting static pods before
the api server becomes available.

The current block behavior causes a bootstrapped kubelet that is also
set to run static pods to wait several minutes before actually launching
the static pods, which means self-hosted masters using static pods have
a pointless delay on startup.

Since blocking rotation has no benefit and can't actually fail startup,
this commit removes the blocking behavior and simplifies the code at the
same time. The goroutine for rotation now completely owns the deadline,
the shouldRotate() method is removed, and the method that sets
rotationDeadline now returns it. We also explicitly guard against a
negative sleep interval and omit the message.

Should have no impact on bootstrapping except the removal of a long
delay on startup before static pods start.

Also add a guard condition where if the current cert in the store is
expired, we fall back to the bootstrap cert initially (we use the
bootstrap cert to communicate with the server). This is consistent with
when we don't have a cert yet.

Kubernetes-commit: 44493de195d89ec43cc7246af921e626e0002c16
This commit is contained in:
Clayton Coleman 2018-01-28 14:28:28 -05:00 committed by Kubernetes Publisher
parent 4b76cf9824
commit 3f65b38279
2 changed files with 40 additions and 101 deletions

View File

@ -141,7 +141,6 @@ type manager struct {
certStore Store
certAccessLock sync.RWMutex
cert *tls.Certificate
rotationDeadline time.Time
forceRotation bool
certificateExpiration Gauge
serverHealth bool
@ -208,8 +207,7 @@ func (m *manager) SetCertificateSigningRequestClient(certSigningRequestClient ce
func (m *manager) Start() {
// Certificate rotation depends on access to the API server certificate
// signing API, so don't start the certificate manager if we don't have a
// client. This will happen on the cluster master, where the kubelet is
// responsible for bootstrapping the pods of the master components.
// client.
if m.certSigningRequestClient == nil {
glog.V(2).Infof("Certificate rotation is not enabled, no connection to the apiserver.")
return
@ -217,16 +215,11 @@ func (m *manager) Start() {
glog.V(2).Infof("Certificate rotation is enabled.")
m.setRotationDeadline()
// Synchronously request a certificate before entering the background
// loop to allow bootstrap scenarios, where the certificate manager
// doesn't have a certificate at all yet.
if m.shouldRotate() {
glog.V(1).Infof("shouldRotate() is true, forcing immediate rotation")
if _, err := m.rotateCerts(); err != nil {
utilruntime.HandleError(fmt.Errorf("Could not rotate certificates: %v", err))
}
go wait.Forever(func() {
deadline := m.nextRotationDeadline()
if sleepInterval := deadline.Sub(time.Now()); sleepInterval > 0 {
glog.V(2).Infof("Waiting %v for next certificate rotation", sleepInterval)
time.Sleep(sleepInterval)
}
backoff := wait.Backoff{
Duration: 2 * time.Second,
@ -234,10 +227,6 @@ func (m *manager) Start() {
Jitter: 0.1,
Steps: 5,
}
go wait.Forever(func() {
sleepInterval := m.rotationDeadline.Sub(time.Now())
glog.V(2).Infof("Waiting %v for next certificate rotation", sleepInterval)
time.Sleep(sleepInterval)
if err := wait.ExponentialBackoff(backoff, m.rotateCerts); err != nil {
utilruntime.HandleError(fmt.Errorf("Reached backoff limit, still unable to rotate certs: %v", err))
wait.PollInfinite(32*time.Second, m.rotateCerts)
@ -252,12 +241,15 @@ func getCurrentCertificateOrBootstrap(
currentCert, err := store.Current()
if err == nil {
// if the current cert is expired, fall back to the bootstrap cert
if currentCert.Leaf != nil && time.Now().Before(currentCert.Leaf.NotAfter) {
return currentCert, false, nil
}
} else {
if _, ok := err.(*NoCertKeyError); !ok {
return nil, false, err
}
}
if bootstrapCertificatePEM == nil || bootstrapKeyPEM == nil {
return nil, true, nil
@ -279,20 +271,6 @@ func getCurrentCertificateOrBootstrap(
return &bootstrapCert, true, nil
}
// shouldRotate looks at how close the current certificate is to expiring and
// decides if it is time to rotate or not.
func (m *manager) shouldRotate() bool {
m.certAccessLock.RLock()
defer m.certAccessLock.RUnlock()
if m.cert == nil {
return true
}
if m.forceRotation {
return true
}
return time.Now().After(m.rotationDeadline)
}
// rotateCerts attempts to request a client cert from the server, wait a reasonable
// period of time for it to be signed, and then update the cert on disk. If it cannot
// retrieve a cert, it will return false. It will only return error in exceptional cases.
@ -349,30 +327,34 @@ func (m *manager) rotateCerts() (bool, error) {
}
m.updateCached(cert)
m.setRotationDeadline()
m.forceRotation = false
return true, nil
}
// setRotationDeadline sets a cached value for the threshold at which the
// nextRotationDeadline returns a value for the threshold at which the
// current certificate should be rotated, 80%+/-10% of the expiration of the
// certificate.
func (m *manager) setRotationDeadline() {
func (m *manager) nextRotationDeadline() time.Time {
// forceRotation is not protected by locks
if m.forceRotation {
m.forceRotation = false
return time.Now()
}
m.certAccessLock.RLock()
defer m.certAccessLock.RUnlock()
if m.cert == nil {
m.rotationDeadline = time.Now()
return
return time.Now()
}
notAfter := m.cert.Leaf.NotAfter
totalDuration := float64(notAfter.Sub(m.cert.Leaf.NotBefore))
deadline := m.cert.Leaf.NotBefore.Add(jitteryDuration(totalDuration))
m.rotationDeadline = m.cert.Leaf.NotBefore.Add(jitteryDuration(totalDuration))
glog.V(2).Infof("Certificate expiration is %v, rotation deadline is %v", notAfter, m.rotationDeadline)
glog.V(2).Infof("Certificate expiration is %v, rotation deadline is %v", notAfter, deadline)
if m.certificateExpiration != nil {
m.certificateExpiration.Set(float64(notAfter.Unix()))
}
return deadline
}
// jitteryDuration uses some jitter to set the rotation threshold so each node

View File

@ -146,46 +146,6 @@ func TestNewManagerNoRotation(t *testing.T) {
}
}
func TestShouldRotate(t *testing.T) {
now := time.Now()
tests := []struct {
name string
notBefore time.Time
notAfter time.Time
shouldRotate bool
}{
{"just issued, still good", now.Add(-1 * time.Hour), now.Add(99 * time.Hour), false},
{"half way expired, still good", now.Add(-24 * time.Hour), now.Add(24 * time.Hour), false},
{"mostly expired, still good", now.Add(-69 * time.Hour), now.Add(31 * time.Hour), false},
{"just about expired, should rotate", now.Add(-91 * time.Hour), now.Add(9 * time.Hour), true},
{"nearly expired, should rotate", now.Add(-99 * time.Hour), now.Add(1 * time.Hour), true},
{"already expired, should rotate", now.Add(-10 * time.Hour), now.Add(-1 * time.Hour), true},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
m := manager{
cert: &tls.Certificate{
Leaf: &x509.Certificate{
NotBefore: test.notBefore,
NotAfter: test.notAfter,
},
},
template: &x509.CertificateRequest{},
usages: []certificates.KeyUsage{},
}
m.setRotationDeadline()
if m.shouldRotate() != test.shouldRotate {
t.Errorf("Time %v, a certificate issued for (%v, %v) should rotate should be %t.",
now,
m.cert.Leaf.NotBefore,
m.cert.Leaf.NotAfter,
test.shouldRotate)
}
})
}
}
type gaugeMock struct {
calls int
lastValue float64
@ -233,13 +193,13 @@ func TestSetRotationDeadline(t *testing.T) {
jitteryDuration = func(float64) time.Duration { return time.Duration(float64(tc.notAfter.Sub(tc.notBefore)) * 0.7) }
lowerBound := tc.notBefore.Add(time.Duration(float64(tc.notAfter.Sub(tc.notBefore)) * 0.7))
m.setRotationDeadline()
deadline := m.nextRotationDeadline()
if !m.rotationDeadline.Equal(lowerBound) {
if !deadline.Equal(lowerBound) {
t.Errorf("For notBefore %v, notAfter %v, the rotationDeadline %v should be %v.",
tc.notBefore,
tc.notAfter,
m.rotationDeadline,
deadline,
lowerBound)
}
if g.calls != 1 {
@ -321,7 +281,7 @@ func TestNewManagerBootstrap(t *testing.T) {
}
if m, ok := cm.(*manager); !ok {
t.Errorf("Expected a '*manager' from 'NewManager'")
} else if !m.shouldRotate() {
} else if !m.forceRotation {
t.Errorf("Expected rotation should happen during bootstrap, but it won't.")
}
}
@ -360,9 +320,8 @@ func TestNewManagerNoBootstrap(t *testing.T) {
if m, ok := cm.(*manager); !ok {
t.Errorf("Expected a '*manager' from 'NewManager'")
} else {
m.setRotationDeadline()
if m.shouldRotate() {
t.Errorf("Expected rotation should happen during bootstrap, but it won't.")
if m.forceRotation {
t.Errorf("Expected rotation should not happen during bootstrap, but it won't.")
}
}
}
@ -515,8 +474,7 @@ func TestInitializeCertificateSigningRequestClient(t *testing.T) {
if m, ok := certificateManager.(*manager); !ok {
t.Errorf("Expected a '*manager' from 'NewManager'")
} else {
m.setRotationDeadline()
if m.shouldRotate() {
if m.forceRotation {
if success, err := m.rotateCerts(); !success {
t.Errorf("Got failure from 'rotateCerts', wanted success.")
} else if err != nil {
@ -614,8 +572,7 @@ func TestInitializeOtherRESTClients(t *testing.T) {
if m, ok := certificateManager.(*manager); !ok {
t.Errorf("Expected a '*manager' from 'NewManager'")
} else {
m.setRotationDeadline()
if m.shouldRotate() {
if m.forceRotation {
success, err := certificateManager.(*manager).rotateCerts()
if err != nil {
t.Errorf("Got error %v, expected none.", err)