mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-09-15 14:14:39 +00:00
Cap how long the kubelet waits when it has no client cert
If we go a certain amount of time without being able to create a client cert and we have no current client cert from the store, exit. This prevents a corrupted local copy of the cert from leaving the Kubelet in a zombie state forever. Exiting allows a config loop outside the Kubelet to clean up the file or the bootstrap client cert to get another client cert.
This commit is contained in:
@@ -528,9 +528,11 @@ func run(s *options.KubeletServer, kubeDeps *kubelet.Dependencies) (err error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
// we set exitIfExpired to true because we use this client configuration to request new certs - if we are unable
|
|
||||||
// to request new certs, we will be unable to continue normal operation
|
// we set exitAfter to five minutes because we use this client configuration to request new certs - if we are unable
|
||||||
if err := kubeletcertificate.UpdateTransport(wait.NeverStop, clientConfig, clientCertificateManager, true); err != nil {
|
// to request new certs, we will be unable to continue normal operation. Exiting the process allows a wrapper
|
||||||
|
// or the bootstrapping credentials to potentially lay down new initial config.
|
||||||
|
if err := kubeletcertificate.UpdateTransport(wait.NeverStop, clientConfig, clientCertificateManager, 5*time.Minute); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -43,15 +43,21 @@ import (
|
|||||||
// connections, forcing the client to re-handshake with the server and use the
|
// connections, forcing the client to re-handshake with the server and use the
|
||||||
// new certificate.
|
// new certificate.
|
||||||
//
|
//
|
||||||
|
// The exitAfter duration, if set, will terminate the current process if a certificate
|
||||||
|
// is not available from the store (because it has been deleted on disk or is corrupt)
|
||||||
|
// or if the certificate has expired and the server is responsive. This allows the
|
||||||
|
// process parent or the bootstrap credentials an opportunity to retrieve a new initial
|
||||||
|
// certificate.
|
||||||
|
//
|
||||||
// stopCh should be used to indicate when the transport is unused and doesn't need
|
// stopCh should be used to indicate when the transport is unused and doesn't need
|
||||||
// to continue checking the manager.
|
// to continue checking the manager.
|
||||||
func UpdateTransport(stopCh <-chan struct{}, clientConfig *restclient.Config, clientCertificateManager certificate.Manager, exitIfExpired bool) error {
|
func UpdateTransport(stopCh <-chan struct{}, clientConfig *restclient.Config, clientCertificateManager certificate.Manager, exitAfter time.Duration) error {
|
||||||
return updateTransport(stopCh, 10*time.Second, clientConfig, clientCertificateManager, exitIfExpired)
|
return updateTransport(stopCh, 10*time.Second, clientConfig, clientCertificateManager, exitAfter)
|
||||||
}
|
}
|
||||||
|
|
||||||
// updateTransport is an internal method that exposes how often this method checks that the
|
// updateTransport is an internal method that exposes how often this method checks that the
|
||||||
// client cert has changed. Intended for testing.
|
// client cert has changed.
|
||||||
func updateTransport(stopCh <-chan struct{}, period time.Duration, clientConfig *restclient.Config, clientCertificateManager certificate.Manager, exitIfExpired bool) error {
|
func updateTransport(stopCh <-chan struct{}, period time.Duration, clientConfig *restclient.Config, clientCertificateManager certificate.Manager, exitAfter time.Duration) error {
|
||||||
if clientConfig.Transport != nil {
|
if clientConfig.Transport != nil {
|
||||||
return fmt.Errorf("there is already a transport configured")
|
return fmt.Errorf("there is already a transport configured")
|
||||||
}
|
}
|
||||||
@@ -77,16 +83,35 @@ func updateTransport(stopCh <-chan struct{}, period time.Duration, clientConfig
|
|||||||
conns: make(map[*closableConn]struct{}),
|
conns: make(map[*closableConn]struct{}),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
lastCertAvailable := time.Now()
|
||||||
lastCert := clientCertificateManager.Current()
|
lastCert := clientCertificateManager.Current()
|
||||||
go wait.Until(func() {
|
go wait.Until(func() {
|
||||||
curr := clientCertificateManager.Current()
|
curr := clientCertificateManager.Current()
|
||||||
if exitIfExpired && curr != nil && time.Now().After(curr.Leaf.NotAfter) {
|
|
||||||
if clientCertificateManager.ServerHealthy() {
|
if exitAfter > 0 {
|
||||||
glog.Fatalf("The currently active client certificate has expired and the server is responsive, exiting.")
|
now := time.Now()
|
||||||
|
if curr == nil {
|
||||||
|
// the certificate has been deleted from disk or is otherwise corrupt
|
||||||
|
if now.After(lastCertAvailable.Add(exitAfter)) {
|
||||||
|
if clientCertificateManager.ServerHealthy() {
|
||||||
|
glog.Fatalf("It has been %s since a valid client cert was found and the server is responsive, exiting.", exitAfter)
|
||||||
|
} else {
|
||||||
|
glog.Errorf("It has been %s since a valid client cert was found, but the server is not responsive. A restart may be necessary to retrieve new initial credentials.", exitAfter)
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
glog.Errorf("The currently active client certificate has expired, but the server is not responsive. A restart may be necessary to retrieve new initial credentials.")
|
// the certificate is expired
|
||||||
|
if now.After(curr.Leaf.NotAfter) {
|
||||||
|
if clientCertificateManager.ServerHealthy() {
|
||||||
|
glog.Fatalf("The currently active client certificate has expired and the server is responsive, exiting.")
|
||||||
|
} else {
|
||||||
|
glog.Errorf("The currently active client certificate has expired, but the server is not responsive. A restart may be necessary to retrieve new initial credentials.")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
lastCertAvailable = now
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if curr == nil || lastCert == curr {
|
if curr == nil || lastCert == curr {
|
||||||
// Cert hasn't been rotated.
|
// Cert hasn't been rotated.
|
||||||
return
|
return
|
||||||
|
@@ -187,7 +187,7 @@ func TestRotateShutsDownConnections(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Check for a new cert every 10 milliseconds
|
// Check for a new cert every 10 milliseconds
|
||||||
if err := updateTransport(stop, 10*time.Millisecond, c, m, false); err != nil {
|
if err := updateTransport(stop, 10*time.Millisecond, c, m, 0); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user