Merge pull request #84614 from rphillips/fixes/add_cert_rotation_failure_metric

kubelet: add certificate rotation error metric
This commit is contained in:
Kubernetes Prow Robot 2019-12-03 11:40:57 -08:00 committed by GitHub
commit 205570eb75
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 64 additions and 18 deletions

View File

@ -61,6 +61,15 @@ func NewKubeletServerCertificateManager(kubeClient clientset.Interface, kubeCfg
},
)
legacyregistry.MustRegister(certificateExpiration)
var certificateRenewFailure = compbasemetrics.NewCounter(
&compbasemetrics.CounterOpts{
Subsystem: metrics.KubeletSubsystem,
Name: "server_expiration_renew_errors",
Help: "Counter of certificate renewal errors.",
StabilityLevel: compbasemetrics.ALPHA,
},
)
legacyregistry.MustRegister(certificateRenewFailure)
certificateRotationAge := compbasemetrics.NewHistogram(
&compbasemetrics.HistogramOpts{
@ -119,9 +128,10 @@ func NewKubeletServerCertificateManager(kubeClient clientset.Interface, kubeCfg
// authenticate itself to a TLS client.
certificates.UsageServerAuth,
},
CertificateStore: certificateStore,
CertificateExpiration: certificateExpiration,
CertificateRotation: certificateRotationAge,
CertificateStore: certificateStore,
CertificateExpiration: certificateExpiration,
CertificateRotation: certificateRotationAge,
CertificateRenewFailure: certificateRenewFailure,
})
if err != nil {
return nil, fmt.Errorf("failed to initialize server certificate manager: %v", err)
@ -199,6 +209,16 @@ func NewKubeletClientCertificateManager(
},
)
legacyregistry.Register(certificateExpiration)
var certificateRenewFailure = compbasemetrics.NewCounter(
&compbasemetrics.CounterOpts{
Namespace: metrics.KubeletSubsystem,
Subsystem: "certificate_manager",
Name: "client_expiration_renew_errors",
Help: "Counter of certificate renewal errors.",
StabilityLevel: compbasemetrics.ALPHA,
},
)
legacyregistry.Register(certificateRenewFailure)
m, err := certificate.NewManager(&certificate.Config{
ClientFn: clientFn,
@ -231,8 +251,9 @@ func NewKubeletClientCertificateManager(
BootstrapCertificatePEM: bootstrapCertData,
BootstrapKeyPEM: bootstrapKeyData,
CertificateStore: certificateStore,
CertificateExpiration: certificateExpiration,
CertificateStore: certificateStore,
CertificateExpiration: certificateExpiration,
CertificateRenewFailure: certificateRenewFailure,
})
if err != nil {
return nil, fmt.Errorf("failed to initialize client certificate manager: %v", err)

View File

@ -124,6 +124,9 @@ type Config struct {
// allows one to setup monitoring and alerting of unexpected rotation
// behavior and track trends in rotation frequency.
CertificateRotation Histogram
// CertifcateRenewFailure will record a metric that keeps track of
// certificate renewal failures.
CertificateRenewFailure Counter
}
// Store is responsible for getting and updating the current certificate.
@ -154,6 +157,11 @@ type Histogram interface {
Observe(float64)
}
// Counter will wrap a counter with labels
type Counter interface {
Inc()
}
// NoCertKeyError indicates there is no cert/key currently available.
type NoCertKeyError string
@ -177,8 +185,9 @@ type manager struct {
certStore Store
certificateExpiration Gauge
certificateRotation Histogram
certificateExpiration Gauge
certificateRotation Histogram
certificateRenewFailure Counter
// the following variables must only be accessed under certAccessLock
certAccessLock sync.RWMutex
@ -213,17 +222,18 @@ func NewManager(config *Config) (Manager, error) {
}
m := manager{
stopCh: make(chan struct{}),
clientFn: config.ClientFn,
getTemplate: getTemplate,
dynamicTemplate: config.GetTemplate != nil,
usages: config.Usages,
certStore: config.CertificateStore,
cert: cert,
forceRotation: forceRotation,
certificateExpiration: config.CertificateExpiration,
certificateRotation: config.CertificateRotation,
now: time.Now,
stopCh: make(chan struct{}),
clientFn: config.ClientFn,
getTemplate: getTemplate,
dynamicTemplate: config.GetTemplate != nil,
usages: config.Usages,
certStore: config.CertificateStore,
cert: cert,
forceRotation: forceRotation,
certificateExpiration: config.CertificateExpiration,
certificateRotation: config.CertificateRotation,
certificateRenewFailure: config.CertificateRenewFailure,
now: time.Now,
}
return &m, nil
@ -404,6 +414,9 @@ func (m *manager) rotateCerts() (bool, error) {
template, csrPEM, keyPEM, privateKey, err := m.generateCSR()
if err != nil {
utilruntime.HandleError(fmt.Errorf("Unable to generate a certificate signing request: %v", err))
if m.certificateRenewFailure != nil {
m.certificateRenewFailure.Inc()
}
return false, nil
}
@ -411,6 +424,9 @@ func (m *manager) rotateCerts() (bool, error) {
client, err := m.getClient()
if err != nil {
utilruntime.HandleError(fmt.Errorf("Unable to load a client to request certificates: %v", err))
if m.certificateRenewFailure != nil {
m.certificateRenewFailure.Inc()
}
return false, nil
}
@ -419,6 +435,9 @@ func (m *manager) rotateCerts() (bool, error) {
req, err := csr.RequestCertificate(client, csrPEM, "", m.usages, privateKey)
if err != nil {
utilruntime.HandleError(fmt.Errorf("Failed while requesting a signed certificate from the master: %v", err))
if m.certificateRenewFailure != nil {
m.certificateRenewFailure.Inc()
}
return false, m.updateServerError(err)
}
@ -433,12 +452,18 @@ func (m *manager) rotateCerts() (bool, error) {
crtPEM, err := csr.WaitForCertificate(ctx, client, req)
if err != nil {
utilruntime.HandleError(fmt.Errorf("certificate request was not signed: %v", err))
if m.certificateRenewFailure != nil {
m.certificateRenewFailure.Inc()
}
return false, nil
}
cert, err := m.certStore.Update(crtPEM, keyPEM)
if err != nil {
utilruntime.HandleError(fmt.Errorf("Unable to store the new cert/key pair: %v", err))
if m.certificateRenewFailure != nil {
m.certificateRenewFailure.Inc()
}
return false, nil
}