From 627c414c1bfffe77567e2f335c7d4ae68a18f665 Mon Sep 17 00:00:00 2001 From: Mike Danese Date: Wed, 21 Jun 2017 19:01:02 +0200 Subject: [PATCH] kubelet should resume csr bootstrap Right now the kubelet creates a new csr object with the same key every time it restarts during the bootstrap process. It should resume with the old csr object if it exists. To do this the name of the csr object must be stable. Also using a list watch here eliminates a race condition where a watch event is missed and the kubelet stalls. --- cmd/kubelet/app/bootstrap.go | 11 +- pkg/kubelet/util/csr/BUILD | 5 + pkg/kubelet/util/csr/csr.go | 173 +++++++++++++++++++++++-------- pkg/kubelet/util/csr/csr_test.go | 4 + 4 files changed, 142 insertions(+), 51 deletions(-) diff --git a/cmd/kubelet/app/bootstrap.go b/cmd/kubelet/app/bootstrap.go index 21aab54a3b9..18ac878f948 100644 --- a/cmd/kubelet/app/bootstrap.go +++ b/cmd/kubelet/app/bootstrap.go @@ -73,19 +73,10 @@ func bootstrapClientCert(kubeconfigPath string, bootstrapPath string, certDir st if err != nil { return fmt.Errorf("unable to build bootstrap key path: %v", err) } - keyData, generatedKeyFile, err := certutil.LoadOrGenerateKeyFile(keyPath) + keyData, _, err := certutil.LoadOrGenerateKeyFile(keyPath) if err != nil { return err } - if generatedKeyFile { - defer func() { - if !success { - if err := os.Remove(keyPath); err != nil { - glog.Warningf("Cannot clean up the key file %q: %v", keyPath, err) - } - } - }() - } // Get the cert. certPath, err := filepath.Abs(filepath.Join(certDir, defaultKubeletClientCertificateFile)) diff --git a/pkg/kubelet/util/csr/BUILD b/pkg/kubelet/util/csr/BUILD index afc9ba669b1..9b794039a8c 100644 --- a/pkg/kubelet/util/csr/BUILD +++ b/pkg/kubelet/util/csr/BUILD @@ -13,12 +13,17 @@ go_library( srcs = ["csr.go"], tags = ["automanaged"], deps = [ + "//pkg/apis/certificates/v1beta1:go_default_library", + "//vendor/github.com/golang/glog:go_default_library", "//vendor/k8s.io/api/certificates/v1beta1:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library", "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//vendor/k8s.io/apimachinery/pkg/fields:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library", "//vendor/k8s.io/apimachinery/pkg/types:go_default_library", "//vendor/k8s.io/apimachinery/pkg/watch:go_default_library", "//vendor/k8s.io/client-go/kubernetes/typed/certificates/v1beta1:go_default_library", + "//vendor/k8s.io/client-go/tools/cache:go_default_library", "//vendor/k8s.io/client-go/util/cert:go_default_library", ], ) diff --git a/pkg/kubelet/util/csr/csr.go b/pkg/kubelet/util/csr/csr.go index 46fbc052fd3..316de1abe5f 100644 --- a/pkg/kubelet/util/csr/csr.go +++ b/pkg/kubelet/util/csr/csr.go @@ -17,16 +17,27 @@ limitations under the License. package csr import ( + "crypto" + "crypto/sha512" "crypto/x509/pkix" + "encoding/base64" "fmt" + "reflect" + "time" + + "github.com/golang/glog" certificates "k8s.io/api/certificates/v1beta1" + "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/watch" certificatesclient "k8s.io/client-go/kubernetes/typed/certificates/v1beta1" + "k8s.io/client-go/tools/cache" certutil "k8s.io/client-go/util/cert" + certhelper "k8s.io/kubernetes/pkg/apis/certificates/v1beta1" ) // RequestNodeCertificate will create a certificate signing request for a node @@ -39,7 +50,7 @@ import ( func RequestNodeCertificate(client certificatesclient.CertificateSigningRequestInterface, privateKeyData []byte, nodeName types.NodeName) (certData []byte, err error) { subject := &pkix.Name{ Organization: []string{"system:nodes"}, - CommonName: fmt.Sprintf("system:node:%s", nodeName), + CommonName: "system:node:" + string(nodeName), } privateKey, err := certutil.ParsePrivateKeyPEM(privateKeyData) @@ -50,67 +61,147 @@ func RequestNodeCertificate(client certificatesclient.CertificateSigningRequestI if err != nil { return nil, fmt.Errorf("unable to generate certificate request: %v", err) } - return RequestCertificate(client, csrData, []certificates.KeyUsage{ + + usages := []certificates.KeyUsage{ certificates.UsageDigitalSignature, certificates.UsageKeyEncipherment, certificates.UsageClientAuth, - }) + } + name := digestedName(privateKeyData, subject, usages) + return requestCertificate(client, csrData, name, usages, privateKey) } -// RequestCertificate will create a certificate signing request using the PEM -// encoded CSR and send it to API server, then it will watch the object's +// requestCertificate will either use an existing (if this process has run +// before but not to completion) or create a certificate signing request using the +// PEM encoded CSR and send it to API server, then it will watch the object's // status, once approved by API server, it will return the API server's issued // certificate (pem-encoded). If there is any errors, or the watch timeouts, it // will return an error. -func RequestCertificate(client certificatesclient.CertificateSigningRequestInterface, csrData []byte, usages []certificates.KeyUsage) (certData []byte, err error) { - req, err := client.Create(&certificates.CertificateSigningRequest{ +func requestCertificate(client certificatesclient.CertificateSigningRequestInterface, csrData []byte, name string, usages []certificates.KeyUsage, privateKey interface{}) (certData []byte, err error) { + csr := &certificates.CertificateSigningRequest{ // Username, UID, Groups will be injected by API server. - TypeMeta: metav1.TypeMeta{Kind: "CertificateSigningRequest"}, - ObjectMeta: metav1.ObjectMeta{GenerateName: "csr-"}, - + TypeMeta: metav1.TypeMeta{Kind: "CertificateSigningRequest"}, + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, Spec: certificates.CertificateSigningRequestSpec{ Request: csrData, Usages: usages, }, - }) - if err != nil { + } + + req, err := client.Create(csr) + switch { + case err == nil: + case errors.IsAlreadyExists(err): + glog.Infof("csr for this node already exists, reusing") + req, err = client.Get(name, metav1.GetOptions{}) + if err != nil { + return nil, fmt.Errorf("cannot retrieve certificate signing request: %v", err) + } + if err := ensureCompatible(req, csr, privateKey); err != nil { + return nil, fmt.Errorf("retrieved csr is not compatible: %v", err) + } + glog.Infof("csr for this node is still valid") + default: return nil, fmt.Errorf("cannot create certificate signing request: %v", err) } - // Make a default timeout = 3600s. - var defaultTimeoutSeconds int64 = 3600 - certWatch, err := client.Watch(metav1.ListOptions{ - Watch: true, - TimeoutSeconds: &defaultTimeoutSeconds, - FieldSelector: fields.OneTermEqualSelector("metadata.name", req.Name).String(), - }) + fieldSelector := fields.OneTermEqualSelector("metadata.name", req.Name).String() + + event, err := cache.ListWatchUntil( + 3600*time.Second, + &cache.ListWatch{ + ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { + options.FieldSelector = fieldSelector + return client.List(options) + }, + WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { + options.FieldSelector = fieldSelector + return client.Watch(options) + }, + }, + func(event watch.Event) (bool, error) { + switch event.Type { + case watch.Modified, watch.Added: + default: + return false, nil + } + csr := event.Object.(*certificates.CertificateSigningRequest) + if csr.UID != req.UID { + return false, fmt.Errorf("csr %q changed UIDs", csr.Name) + } + for _, c := range csr.Status.Conditions { + if c.Type == certificates.CertificateDenied { + return false, fmt.Errorf("certificate signing request is not approved, reason: %v, message: %v", c.Reason, c.Message) + } + if c.Type == certificates.CertificateApproved && csr.Status.Certificate != nil { + return true, nil + } + } + return false, nil + }, + ) if err != nil { return nil, fmt.Errorf("cannot watch on the certificate signing request: %v", err) } - defer certWatch.Stop() - ch := certWatch.ResultChan() - for { - event, ok := <-ch - if !ok { - break - } + return event.Object.(*certificates.CertificateSigningRequest).Status.Certificate, nil - if event.Type == watch.Modified || event.Type == watch.Added { - if event.Object.(*certificates.CertificateSigningRequest).UID != req.UID { - continue - } - status := event.Object.(*certificates.CertificateSigningRequest).Status - for _, c := range status.Conditions { - if c.Type == certificates.CertificateDenied { - return nil, fmt.Errorf("certificate signing request is not approved, reason: %v, message: %v", c.Reason, c.Message) - } - if c.Type == certificates.CertificateApproved && status.Certificate != nil { - return status.Certificate, nil - } - } - } +} + +// This digest should include all the relevant pieces of the CSR we care about. +// We can't direcly hash the serialized CSR because of random padding that we +// regenerate every loop and we include usages which are not contained in the +// CSR. This needs to be kept up to date as we add new fields to the node +// certificates and with ensureCompatible. +func digestedName(privateKeyData []byte, subject *pkix.Name, usages []certificates.KeyUsage) string { + hash := sha512.New512_256() + + // Here we make sure two different inputs can't write the same stream + // to the hash. This delimiter is not in the base64.URLEncoding + // alphabet so there is no way to have spill over collisions. Without + // it 'CN:foo,ORG:bar' hashes to the same value as 'CN:foob,ORG:ar' + const delimiter = '|' + encode := base64.RawURLEncoding.EncodeToString + + write := func(data []byte) { + hash.Write([]byte(encode(data))) + hash.Write([]byte{delimiter}) } - return nil, fmt.Errorf("watch channel closed") + write(privateKeyData) + write([]byte(subject.CommonName)) + for _, v := range subject.Organization { + write([]byte(v)) + } + for _, v := range usages { + write([]byte(v)) + } + + return "node-csr-" + encode(hash.Sum(nil)) +} + +// ensureCompatible ensures that a CSR object is compatible with an original CSR +func ensureCompatible(new, orig *certificates.CertificateSigningRequest, privateKey interface{}) error { + newCsr, err := certhelper.ParseCSR(new) + if err != nil { + return fmt.Errorf("unable to parse new csr: %v", err) + } + origCsr, err := certhelper.ParseCSR(orig) + if err != nil { + return fmt.Errorf("unable to parse original csr: %v", err) + } + if !reflect.DeepEqual(newCsr.Subject, origCsr.Subject) { + return fmt.Errorf("csr subjects differ: new: %#v, orig: %#v", newCsr.Subject, origCsr.Subject) + } + signer, ok := privateKey.(crypto.Signer) + if !ok { + return fmt.Errorf("privateKey is not a signer") + } + newCsr.PublicKey = signer.Public() + if err := newCsr.CheckSignature(); err != nil { + return fmt.Errorf("error validating signature new CSR against old key: %v", err) + } + return nil } diff --git a/pkg/kubelet/util/csr/csr_test.go b/pkg/kubelet/util/csr/csr_test.go index 6ad15fec3e0..03cff8ac448 100644 --- a/pkg/kubelet/util/csr/csr_test.go +++ b/pkg/kubelet/util/csr/csr_test.go @@ -98,6 +98,10 @@ func (c *fakeClient) Create(*certificates.CertificateSigningRequest) (*certifica return &csr, nil } +func (c *fakeClient) List(opts v1.ListOptions) (*certificates.CertificateSigningRequestList, error) { + return &certificates.CertificateSigningRequestList{}, nil +} + func (c *fakeClient) Watch(opts v1.ListOptions) (watch.Interface, error) { c.watch = watch.NewFakeWithChanSize(1, false) c.watch.Add(c.generateCSR())