From 39159c379b2e5c500b8aad9a47e661ffec2e40d5 Mon Sep 17 00:00:00 2001 From: Clayton Coleman Date: Tue, 16 Oct 2018 12:52:47 -0400 Subject: [PATCH 1/2] Make bootstrap client cert loading part of rotation Ensure that bootstrap+clientcert-rotation in the Kubelet can: 1. happen in the background so that static pods aren't blocked by bootstrap 2. collapse down to a single call path for requesting a CSR 3. reorganize the code to allow future flexibility in retrieving bootstrap creds Fetching the first certificate and later certificates when the kubelet is using client rotation and bootstrapping should share the same code path. We also want to start the Kubelet static pod loop before bootstrapping completes. Finally, we want to take an incremental step towards improving how the bootstrap credentials are loaded from disk (potentially allowing for a CLI call to get credentials, or a remote plugin that better integrates with cloud providers or KSMs). Reorganize how the kubelet client config is determined. If rotation is off, simplify the code path. If rotation is on, load the config from disk, and then pass that into the cert manager. The cert manager creates a client each time it tries to request a new cert. Preserve existing behavior where: 1. bootstrap kubeconfig is used if the current kubeconfig is invalid/expired 2. we create the kubeconfig file based on the bootstrap kubeconfig, pointing to the location that new client certs will be placed 3. the newest client cert is used once it has been loaded Kubernetes-commit: 0af19875add7deb562b2cf7bf6b1d273c44bab1b --- util/certificate/certificate_manager.go | 104 +++++++++++-------- util/certificate/certificate_manager_test.go | 75 ++++++++++--- 2 files changed, 116 insertions(+), 63 deletions(-) diff --git a/util/certificate/certificate_manager.go b/util/certificate/certificate_manager.go index ed74559e..a4b427ee 100644 --- a/util/certificate/certificate_manager.go +++ b/util/certificate/certificate_manager.go @@ -48,9 +48,6 @@ var certificateWaitBackoff = wait.Backoff{Duration: 30 * time.Second, Steps: 4, // manager. In the background it communicates with the API server to get new // certificates for certificates about to expire. type Manager interface { - // CertificateSigningRequestClient sets the client interface that is used for - // signing new certificates generated as part of rotation. - SetCertificateSigningRequestClient(certificatesclient.CertificateSigningRequestInterface) error // Start the API server status sync loop. Start() // Current returns the currently selected certificate from the @@ -67,11 +64,11 @@ type Manager interface { // Config is the set of configuration parameters available for a new Manager. type Config struct { - // CertificateSigningRequestClient will be used for signing new certificate - // requests generated when a key rotation occurs. It must be set either at - // initialization or by using CertificateSigningRequestClient before - // Manager.Start() is called. - CertificateSigningRequestClient certificatesclient.CertificateSigningRequestInterface + // ClientFn will be used to create a client for + // signing new certificate requests generated when a key rotation occurs. + // It must be set at initialization. The function will never be invoked + // in parallel. It is passed the current client certificate if one exists. + ClientFn CSRClientFunc // Template is the CertificateRequest that will be used as a template for // generating certificate signing requests for all new keys generated as // part of rotation. It follows the same rules as the template parameter of @@ -141,21 +138,32 @@ type Gauge interface { // NoCertKeyError indicates there is no cert/key currently available. type NoCertKeyError string +// CSRClientFunc returns a new client for requesting CSRs. It passes the +// current certificate if one is available and valid. +type CSRClientFunc func(current *tls.Certificate) (certificatesclient.CertificateSigningRequestInterface, error) + func (e *NoCertKeyError) Error() string { return string(*e) } type manager struct { - certSigningRequestClient certificatesclient.CertificateSigningRequestInterface - getTemplate func() *x509.CertificateRequest - lastRequestLock sync.Mutex - lastRequest *x509.CertificateRequest - dynamicTemplate bool - usages []certificates.KeyUsage - certStore Store - certAccessLock sync.RWMutex - cert *tls.Certificate - forceRotation bool - certificateExpiration Gauge - serverHealth bool + getTemplate func() *x509.CertificateRequest + lastRequestLock sync.Mutex + lastRequest *x509.CertificateRequest + dynamicTemplate bool + usages []certificates.KeyUsage + forceRotation bool + + certStore Store + + certificateExpiration Gauge + + // the following variables must only be accessed under certAccessLock + certAccessLock sync.RWMutex + cert *tls.Certificate + serverHealth bool + + // the clientFn must only be accessed under the clientAccessLock + clientAccessLock sync.Mutex + clientFn CSRClientFunc } // NewManager returns a new certificate manager. A certificate manager is @@ -176,14 +184,14 @@ func NewManager(config *Config) (Manager, error) { } m := manager{ - certSigningRequestClient: config.CertificateSigningRequestClient, - getTemplate: getTemplate, - dynamicTemplate: config.GetTemplate != nil, - usages: config.Usages, - certStore: config.CertificateStore, - cert: cert, - forceRotation: forceRotation, - certificateExpiration: config.CertificateExpiration, + clientFn: config.ClientFn, + getTemplate: getTemplate, + dynamicTemplate: config.GetTemplate != nil, + usages: config.Usages, + certStore: config.CertificateStore, + cert: cert, + forceRotation: forceRotation, + certificateExpiration: config.CertificateExpiration, } return &m, nil @@ -192,10 +200,14 @@ func NewManager(config *Config) (Manager, error) { // Current returns the currently selected certificate from the certificate // manager. This can be nil if the manager was initialized without a // certificate and has not yet received one from the -// CertificateSigningRequestClient. +// CertificateSigningRequestClient, or if the current cert has expired. func (m *manager) Current() *tls.Certificate { m.certAccessLock.RLock() defer m.certAccessLock.RUnlock() + if m.cert != nil && m.cert.Leaf != nil && time.Now().After(m.cert.Leaf.NotAfter) { + klog.V(2).Infof("Current certificate is expired.") + return nil + } return m.cert } @@ -207,26 +219,12 @@ func (m *manager) ServerHealthy() bool { return m.serverHealth } -// SetCertificateSigningRequestClient sets the client interface that is used -// for signing new certificates generated as part of rotation. It must be -// called before Start() and can not be used to change the -// CertificateSigningRequestClient that has already been set. This method is to -// support the one specific scenario where the CertificateSigningRequestClient -// uses the CertificateManager. -func (m *manager) SetCertificateSigningRequestClient(certSigningRequestClient certificatesclient.CertificateSigningRequestInterface) error { - if m.certSigningRequestClient == nil { - m.certSigningRequestClient = certSigningRequestClient - return nil - } - return fmt.Errorf("property CertificateSigningRequestClient is already set") -} - // Start will start the background work of rotating the certificates. func (m *manager) Start() { // Certificate rotation depends on access to the API server certificate // signing API, so don't start the certificate manager if we don't have a // client. - if m.certSigningRequestClient == nil { + if m.clientFn == nil { klog.V(2).Infof("Certificate rotation is not enabled, no connection to the apiserver.") return } @@ -327,6 +325,13 @@ func getCurrentCertificateOrBootstrap( return &bootstrapCert, true, nil } +func (m *manager) getClient() (certificatesclient.CertificateSigningRequestInterface, error) { + current := m.Current() + m.clientAccessLock.Lock() + defer m.clientAccessLock.Unlock() + return m.clientFn(current) +} + // rotateCerts attempts to request a client cert from the server, wait a reasonable // period of time for it to be signed, and then update the cert on disk. If it cannot // retrieve a cert, it will return false. It will only return error in exceptional cases. @@ -341,9 +346,16 @@ func (m *manager) rotateCerts() (bool, error) { return false, nil } + // request the client each time + client, err := m.getClient() + if err != nil { + utilruntime.HandleError(fmt.Errorf("Unable to load a client to request certificates: %v", err)) + return false, nil + } + // Call the Certificate Signing Request API to get a certificate for the // new private key. - req, err := csr.RequestCertificate(m.certSigningRequestClient, csrPEM, "", m.usages, privateKey) + req, err := csr.RequestCertificate(client, csrPEM, "", m.usages, privateKey) if err != nil { utilruntime.HandleError(fmt.Errorf("Failed while requesting a signed certificate from the master: %v", err)) return false, m.updateServerError(err) @@ -359,7 +371,7 @@ func (m *manager) rotateCerts() (bool, error) { var crtPEM []byte watchDuration := time.Minute if err := wait.ExponentialBackoff(certificateWaitBackoff, func() (bool, error) { - data, err := csr.WaitForCertificate(m.certSigningRequestClient, req, watchDuration) + data, err := csr.WaitForCertificate(client, req, watchDuration) switch { case err == nil: crtPEM = data diff --git a/util/certificate/certificate_manager_test.go b/util/certificate/certificate_manager_test.go index 545097ea..3ca7f767 100644 --- a/util/certificate/certificate_manager_test.go +++ b/util/certificate/certificate_manager_test.go @@ -60,6 +60,23 @@ iQIgZX08DA8VfvcA5/Xj1Zjdey9FVY6POLXen6RPiabE97UCICp6eUW7ht+2jjar e35EltCRCjoejRHTuN9TC0uCoVipAiAXaJIx/Q47vGwiw6Y8KXsNU6y54gTbOSxX 54LzHNk/+Q== -----END RSA PRIVATE KEY-----`) +var expiredStoreCertData = newCertificateData(`-----BEGIN CERTIFICATE----- +MIIBFzCBwgIJALhygXnxXmN1MA0GCSqGSIb3DQEBCwUAMBMxETAPBgNVBAMMCGhv +c3QtMTIzMB4XDTE4MTEwNDIzNTc1NFoXDTE4MTEwNTIzNTc1NFowEzERMA8GA1UE +AwwIaG9zdC0xMjMwXDANBgkqhkiG9w0BAQEFAANLADBIAkEAtBMa7NWpv3BVlKTC +PGO/LEsguKqWHBtKzweMY2CVtAL1rQm913huhxF9w+ai76KQ3MHK5IVnLJjYYA5M +zP2H5QIDAQABMA0GCSqGSIb3DQEBCwUAA0EAN2DPFUtCzqnidL+5nh+46Sk6dkMI +T5DD11UuuIjZusKvThsHKVCIsyJ2bDo7cTbI+/nklLRP+FcC2wESFUgXbA== +-----END CERTIFICATE-----`, `-----BEGIN RSA PRIVATE KEY----- +MIIBUwIBADANBgkqhkiG9w0BAQEFAASCAT0wggE5AgEAAkEAtBMa7NWpv3BVlKTC +PGO/LEsguKqWHBtKzweMY2CVtAL1rQm913huhxF9w+ai76KQ3MHK5IVnLJjYYA5M +zP2H5QIDAQABAkAS9BfXab3OKpK3bIgNNyp+DQJKrZnTJ4Q+OjsqkpXvNltPJosf +G8GsiKu/vAt4HGqI3eU77NvRI+mL4MnHRmXBAiEA3qM4FAtKSRBbcJzPxxLEUSwg +XSCcosCktbkXvpYrS30CIQDPDxgqlwDEJQ0uKuHkZI38/SPWWqfUmkecwlbpXABK +iQIgZX08DA8VfvcA5/Xj1Zjdey9FVY6POLXen6RPiabE97UCICp6eUW7ht+2jjar +e35EltCRCjoejRHTuN9TC0uCoVipAiAXaJIx/Q47vGwiw6Y8KXsNU6y54gTbOSxX +54LzHNk/+Q== +-----END RSA PRIVATE KEY-----`) var bootstrapCertData = newCertificateData( `-----BEGIN CERTIFICATE----- MIICRzCCAfGgAwIBAgIJANXr+UzRFq4TMA0GCSqGSIb3DQEBCwUAMH4xCzAJBgNV @@ -388,8 +405,8 @@ func TestRotateCertCreateCSRError(t *testing.T) { }, getTemplate: func() *x509.CertificateRequest { return &x509.CertificateRequest{} }, usages: []certificates.KeyUsage{}, - certSigningRequestClient: fakeClient{ - failureType: createError, + clientFn: func(_ *tls.Certificate) (certificatesclient.CertificateSigningRequestInterface, error) { + return fakeClient{failureType: createError}, nil }, } @@ -411,8 +428,8 @@ func TestRotateCertWaitingForResultError(t *testing.T) { }, getTemplate: func() *x509.CertificateRequest { return &x509.CertificateRequest{} }, usages: []certificates.KeyUsage{}, - certSigningRequestClient: fakeClient{ - failureType: watchError, + clientFn: func(_ *tls.Certificate) (certificatesclient.CertificateSigningRequestInterface, error) { + return fakeClient{failureType: watchError}, nil }, } @@ -598,6 +615,14 @@ func TestInitializeCertificateSigningRequestClient(t *testing.T) { expectedCertBeforeStart: storeCertData, expectedCertAfterStart: storeCertData, }, + { + description: "Current certificate expired, no bootstrap certificate", + storeCert: expiredStoreCertData, + bootstrapCert: nilCertificate, + apiCert: apiServerCertData, + expectedCertBeforeStart: nil, + expectedCertAfterStart: apiServerCertData, + }, } for _, tc := range testCases { @@ -621,19 +646,25 @@ func TestInitializeCertificateSigningRequestClient(t *testing.T) { CertificateStore: certificateStore, BootstrapCertificatePEM: tc.bootstrapCert.certificatePEM, BootstrapKeyPEM: tc.bootstrapCert.keyPEM, + ClientFn: func(_ *tls.Certificate) (certificatesclient.CertificateSigningRequestInterface, error) { + return &fakeClient{ + certificatePEM: tc.apiCert.certificatePEM, + }, nil + }, }) if err != nil { t.Errorf("Got %v, wanted no error.", err) } certificate := certificateManager.Current() - if !certificatesEqual(certificate, tc.expectedCertBeforeStart.certificate) { - t.Errorf("Got %v, wanted %v", certificateString(certificate), certificateString(tc.expectedCertBeforeStart.certificate)) - } - if err := certificateManager.SetCertificateSigningRequestClient(&fakeClient{ - certificatePEM: tc.apiCert.certificatePEM, - }); err != nil { - t.Errorf("Got error %v, expected none.", err) + if tc.expectedCertBeforeStart == nil { + if certificate != nil { + t.Errorf("Expected certificate to be nil, was %s", certificate.Leaf.NotAfter) + } + } else { + if !certificatesEqual(certificate, tc.expectedCertBeforeStart.certificate) { + t.Errorf("Got %v, wanted %v", certificateString(certificate), certificateString(tc.expectedCertBeforeStart.certificate)) + } } if m, ok := certificateManager.(*manager); !ok { @@ -649,6 +680,12 @@ func TestInitializeCertificateSigningRequestClient(t *testing.T) { } certificate = certificateManager.Current() + if tc.expectedCertAfterStart == nil { + if certificate != nil { + t.Errorf("Expected certificate to be nil, was %s", certificate.Leaf.NotAfter) + } + return + } if !certificatesEqual(certificate, tc.expectedCertAfterStart.certificate) { t.Errorf("Got %v, wanted %v", certificateString(certificate), certificateString(tc.expectedCertAfterStart.certificate)) } @@ -721,8 +758,10 @@ func TestInitializeOtherRESTClients(t *testing.T) { CertificateStore: certificateStore, BootstrapCertificatePEM: tc.bootstrapCert.certificatePEM, BootstrapKeyPEM: tc.bootstrapCert.keyPEM, - CertificateSigningRequestClient: &fakeClient{ - certificatePEM: tc.apiCert.certificatePEM, + ClientFn: func(_ *tls.Certificate) (certificatesclient.CertificateSigningRequestInterface, error) { + return &fakeClient{ + certificatePEM: tc.apiCert.certificatePEM, + }, nil }, }) if err != nil { @@ -873,10 +912,12 @@ func TestServerHealth(t *testing.T) { CertificateStore: certificateStore, BootstrapCertificatePEM: tc.bootstrapCert.certificatePEM, BootstrapKeyPEM: tc.bootstrapCert.keyPEM, - CertificateSigningRequestClient: &fakeClient{ - certificatePEM: tc.apiCert.certificatePEM, - failureType: tc.failureType, - err: tc.clientErr, + ClientFn: func(_ *tls.Certificate) (certificatesclient.CertificateSigningRequestInterface, error) { + return &fakeClient{ + certificatePEM: tc.apiCert.certificatePEM, + failureType: tc.failureType, + err: tc.clientErr, + }, nil }, }) if err != nil { From cbb80ab872b0f918e8d0d855255fa81158d189e4 Mon Sep 17 00:00:00 2001 From: Clayton Coleman Date: Thu, 15 Nov 2018 17:21:02 -0500 Subject: [PATCH 2/2] Ensure the bootstrap rotation code is tested by forcing rotation Expose both a Stop() method (for cleanup) and a method to force cert rotation, but only expose Stop() on the interface. Verify that we choose the correct client. Kubernetes-commit: de293b2d7ddb687850258370f2a7f30f224f0ec1 --- util/certificate/certificate_manager.go | 32 +++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/util/certificate/certificate_manager.go b/util/certificate/certificate_manager.go index a4b427ee..4aa9f3ab 100644 --- a/util/certificate/certificate_manager.go +++ b/util/certificate/certificate_manager.go @@ -50,6 +50,8 @@ var certificateWaitBackoff = wait.Backoff{Duration: 30 * time.Second, Steps: 4, type Manager interface { // Start the API server status sync loop. Start() + // Stop the cert manager loop. + Stop() // Current returns the currently selected certificate from the // certificate manager, as well as the associated certificate and key data // in PEM format. @@ -164,6 +166,8 @@ type manager struct { // the clientFn must only be accessed under the clientAccessLock clientAccessLock sync.Mutex clientFn CSRClientFunc + stopCh chan struct{} + stopped bool } // NewManager returns a new certificate manager. A certificate manager is @@ -184,6 +188,7 @@ func NewManager(config *Config) (Manager, error) { } m := manager{ + stopCh: make(chan struct{}), clientFn: config.ClientFn, getTemplate: getTemplate, dynamicTemplate: config.GetTemplate != nil, @@ -219,6 +224,17 @@ func (m *manager) ServerHealthy() bool { return m.serverHealth } +// Stop terminates the manager. +func (m *manager) Stop() { + m.clientAccessLock.Lock() + defer m.clientAccessLock.Unlock() + if m.stopped { + return + } + close(m.stopCh) + m.stopped = true +} + // Start will start the background work of rotating the certificates. func (m *manager) Start() { // Certificate rotation depends on access to the API server certificate @@ -232,7 +248,7 @@ func (m *manager) Start() { klog.V(2).Infof("Certificate rotation is enabled.") templateChanged := make(chan struct{}) - go wait.Forever(func() { + go wait.Until(func() { deadline := m.nextRotationDeadline() if sleepInterval := deadline.Sub(time.Now()); sleepInterval > 0 { klog.V(2).Infof("Waiting %v for next certificate rotation", sleepInterval) @@ -267,17 +283,17 @@ func (m *manager) Start() { utilruntime.HandleError(fmt.Errorf("Reached backoff limit, still unable to rotate certs: %v", err)) wait.PollInfinite(32*time.Second, m.rotateCerts) } - }, time.Second) + }, time.Second, m.stopCh) if m.dynamicTemplate { - go wait.Forever(func() { + go wait.Until(func() { // check if the current template matches what we last requested if !m.certSatisfiesTemplate() && !reflect.DeepEqual(m.getLastRequest(), m.getTemplate()) { // if the template is different, queue up an interrupt of the rotation deadline loop. // if we've requested a CSR that matches the new template by the time the interrupt is handled, the interrupt is disregarded. templateChanged <- struct{}{} } - }, time.Second) + }, time.Second, m.stopCh) } } @@ -332,11 +348,19 @@ func (m *manager) getClient() (certificatesclient.CertificateSigningRequestInter return m.clientFn(current) } +// RotateCerts is exposed for testing only and is not a part of the public interface. +// Returns true if it changed the cert, false otherwise. Error is only returned in +// exceptional cases. +func (m *manager) RotateCerts() (bool, error) { + return m.rotateCerts() +} + // rotateCerts attempts to request a client cert from the server, wait a reasonable // period of time for it to be signed, and then update the cert on disk. If it cannot // retrieve a cert, it will return false. It will only return error in exceptional cases. // This method also keeps track of "server health" by interpreting the responses it gets // from the server on the various calls it makes. +// TODO: return errors, have callers handle and log them correctly func (m *manager) rotateCerts() (bool, error) { klog.V(2).Infof("Rotating certificates")