diff --git a/pkg/controlplane/apiserver/config.go b/pkg/controlplane/apiserver/config.go index a0134b16f4a..5ffa9950e87 100644 --- a/pkg/controlplane/apiserver/config.go +++ b/pkg/controlplane/apiserver/config.go @@ -147,7 +147,7 @@ func BuildGenericConfig( ctx := wait.ContextForChannel(genericConfig.DrainedNotify()) // Authentication.ApplyTo requires already applied OpenAPIConfig and EgressSelector if present - if lastErr = s.Authentication.ApplyTo(ctx, &genericConfig.Authentication, genericConfig.SecureServing, genericConfig.EgressSelector, genericConfig.OpenAPIConfig, genericConfig.OpenAPIV3Config, clientgoExternalClient, versionedInformers); lastErr != nil { + if lastErr = s.Authentication.ApplyTo(ctx, &genericConfig.Authentication, genericConfig.SecureServing, genericConfig.EgressSelector, genericConfig.OpenAPIConfig, genericConfig.OpenAPIV3Config, clientgoExternalClient, versionedInformers, genericConfig.APIServerID); lastErr != nil { return } diff --git a/pkg/kubeapiserver/options/authentication.go b/pkg/kubeapiserver/options/authentication.go index 6d5e684660c..4b24f28e01f 100644 --- a/pkg/kubeapiserver/options/authentication.go +++ b/pkg/kubeapiserver/options/authentication.go @@ -41,6 +41,7 @@ import ( genericapiserver "k8s.io/apiserver/pkg/server" "k8s.io/apiserver/pkg/server/egressselector" genericoptions "k8s.io/apiserver/pkg/server/options" + authenticationconfigmetrics "k8s.io/apiserver/pkg/server/options/authenticationconfig/metrics" utilfeature "k8s.io/apiserver/pkg/util/feature" "k8s.io/apiserver/plugin/pkg/authenticator/token/oidc" "k8s.io/client-go/informers" @@ -588,7 +589,16 @@ func (o *BuiltInAuthenticationOptions) ToAuthenticationConfig() (kubeauthenticat // ApplyTo requires already applied OpenAPIConfig and EgressSelector if present. // The input context controls the lifecycle of background goroutines started to reload the authentication config file. -func (o *BuiltInAuthenticationOptions) ApplyTo(ctx context.Context, authInfo *genericapiserver.AuthenticationInfo, secureServing *genericapiserver.SecureServingInfo, egressSelector *egressselector.EgressSelector, openAPIConfig *openapicommon.Config, openAPIV3Config *openapicommon.OpenAPIV3Config, extclient kubernetes.Interface, versionedInformer informers.SharedInformerFactory) error { +func (o *BuiltInAuthenticationOptions) ApplyTo( + ctx context.Context, + authInfo *genericapiserver.AuthenticationInfo, + secureServing *genericapiserver.SecureServingInfo, + egressSelector *egressselector.EgressSelector, + openAPIConfig *openapicommon.Config, + openAPIV3Config *openapicommon.OpenAPIV3Config, + extclient kubernetes.Interface, + versionedInformer informers.SharedInformerFactory, + apiServerID string) error { if o == nil { return nil } @@ -654,6 +664,7 @@ func (o *BuiltInAuthenticationOptions) ApplyTo(ctx context.Context, authInfo *ge authInfo.Authenticator = authenticator if len(o.AuthenticationConfigFile) > 0 { + authenticationconfigmetrics.RegisterMetrics() trackedAuthenticationConfigData := authenticatorConfig.AuthenticationConfigData var mu sync.Mutex go filesystem.WatchUntil( @@ -661,7 +672,6 @@ func (o *BuiltInAuthenticationOptions) ApplyTo(ctx context.Context, authInfo *ge time.Minute, o.AuthenticationConfigFile, func() { - // TODO add metrics // TODO collapse onto shared logic with DynamicEncryptionConfigContent controller mu.Lock() @@ -670,6 +680,7 @@ func (o *BuiltInAuthenticationOptions) ApplyTo(ctx context.Context, authInfo *ge authConfigBytes, err := os.ReadFile(o.AuthenticationConfigFile) if err != nil { klog.ErrorS(err, "failed to read authentication config file") + authenticationconfigmetrics.RecordAuthenticationConfigAutomaticReloadFailure(apiServerID) // we do not update the tracker here because this error could eventually resolve as we keep retrying return } @@ -683,6 +694,7 @@ func (o *BuiltInAuthenticationOptions) ApplyTo(ctx context.Context, authInfo *ge authConfig, err := loadAuthenticationConfigFromData(authConfigBytes) if err != nil { klog.ErrorS(err, "failed to load authentication config") + authenticationconfigmetrics.RecordAuthenticationConfigAutomaticReloadFailure(apiServerID) // this config is not structurally valid and never will be, update the tracker so we stop retrying trackedAuthenticationConfigData = authConfigData return @@ -690,6 +702,7 @@ func (o *BuiltInAuthenticationOptions) ApplyTo(ctx context.Context, authInfo *ge if err := apiservervalidation.ValidateAuthenticationConfiguration(authConfig, authenticatorConfig.ServiceAccountIssuers).ToAggregate(); err != nil { klog.ErrorS(err, "failed to validate authentication config") + authenticationconfigmetrics.RecordAuthenticationConfigAutomaticReloadFailure(apiServerID) // this config is not semantically valid and never will be, update the tracker so we stop retrying trackedAuthenticationConfigData = authConfigData return @@ -699,11 +712,14 @@ func (o *BuiltInAuthenticationOptions) ApplyTo(ctx context.Context, authInfo *ge defer timeoutCancel() if err := updateAuthenticationConfig(timeoutCtx, authConfig); err != nil { klog.ErrorS(err, "failed to update authentication config") + authenticationconfigmetrics.RecordAuthenticationConfigAutomaticReloadFailure(apiServerID) // we do not update the tracker here because this error could eventually resolve as we keep retrying return } trackedAuthenticationConfigData = authConfigData + klog.InfoS("reloaded authentication config") + authenticationconfigmetrics.RecordAuthenticationConfigAutomaticReloadSuccess(apiServerID) }, func(err error) { klog.ErrorS(err, "watching authentication config file") }, ) diff --git a/staging/src/k8s.io/apiserver/pkg/server/options/authenticationconfig/metrics/metrics.go b/staging/src/k8s.io/apiserver/pkg/server/options/authenticationconfig/metrics/metrics.go new file mode 100644 index 00000000000..e57ba55631f --- /dev/null +++ b/staging/src/k8s.io/apiserver/pkg/server/options/authenticationconfig/metrics/metrics.go @@ -0,0 +1,88 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "crypto/sha256" + "fmt" + "sync" + + "k8s.io/component-base/metrics" + "k8s.io/component-base/metrics/legacyregistry" +) + +const ( + namespace = "apiserver" + subsystem = "authentication_config_controller" +) + +var ( + authenticationConfigAutomaticReloadsTotal = metrics.NewCounterVec( + &metrics.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "automatic_reloads_total", + Help: "Total number of automatic reloads of authentication configuration split by status and apiserver identity.", + StabilityLevel: metrics.ALPHA, + }, + []string{"status", "apiserver_id_hash"}, + ) + + authenticationConfigAutomaticReloadLastTimestampSeconds = metrics.NewGaugeVec( + &metrics.GaugeOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "automatic_reload_last_timestamp_seconds", + Help: "Timestamp of the last automatic reload of authentication configuration split by status and apiserver identity.", + StabilityLevel: metrics.ALPHA, + }, + []string{"status", "apiserver_id_hash"}, + ) +) + +var registerMetrics sync.Once + +func RegisterMetrics() { + registerMetrics.Do(func() { + legacyregistry.MustRegister(authenticationConfigAutomaticReloadsTotal) + legacyregistry.MustRegister(authenticationConfigAutomaticReloadLastTimestampSeconds) + }) +} + +func ResetMetricsForTest() { + authenticationConfigAutomaticReloadsTotal.Reset() + authenticationConfigAutomaticReloadLastTimestampSeconds.Reset() +} + +func RecordAuthenticationConfigAutomaticReloadFailure(apiServerID string) { + apiServerIDHash := getHash(apiServerID) + authenticationConfigAutomaticReloadsTotal.WithLabelValues("failure", apiServerIDHash).Inc() + authenticationConfigAutomaticReloadLastTimestampSeconds.WithLabelValues("failure", apiServerIDHash).SetToCurrentTime() +} + +func RecordAuthenticationConfigAutomaticReloadSuccess(apiServerID string) { + apiServerIDHash := getHash(apiServerID) + authenticationConfigAutomaticReloadsTotal.WithLabelValues("success", apiServerIDHash).Inc() + authenticationConfigAutomaticReloadLastTimestampSeconds.WithLabelValues("success", apiServerIDHash).SetToCurrentTime() +} + +func getHash(data string) string { + if len(data) == 0 { + return "" + } + return fmt.Sprintf("sha256:%x", sha256.Sum256([]byte(data))) +} diff --git a/staging/src/k8s.io/apiserver/pkg/server/options/authenticationconfig/metrics/metrics_test.go b/staging/src/k8s.io/apiserver/pkg/server/options/authenticationconfig/metrics/metrics_test.go new file mode 100644 index 00000000000..75ef1868dfa --- /dev/null +++ b/staging/src/k8s.io/apiserver/pkg/server/options/authenticationconfig/metrics/metrics_test.go @@ -0,0 +1,109 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "strings" + "testing" + + "k8s.io/component-base/metrics/legacyregistry" + "k8s.io/component-base/metrics/testutil" +) + +const ( + testAPIServerID = "testAPIServerID" + testAPIServerIDHash = "sha256:14f9d63e669337ac6bfda2e2162915ee6a6067743eddd4e5c374b572f951ff37" +) + +func TestRecordAuthenticationConfigAutomaticReloadFailure(t *testing.T) { + expectedValue := ` + # HELP apiserver_authentication_config_controller_automatic_reloads_total [ALPHA] Total number of automatic reloads of authentication configuration split by status and apiserver identity. + # TYPE apiserver_authentication_config_controller_automatic_reloads_total counter + apiserver_authentication_config_controller_automatic_reloads_total {apiserver_id_hash="sha256:14f9d63e669337ac6bfda2e2162915ee6a6067743eddd4e5c374b572f951ff37",status="failure"} 1 + ` + metrics := []string{ + namespace + "_" + subsystem + "_automatic_reloads_total", + } + + authenticationConfigAutomaticReloadsTotal.Reset() + RegisterMetrics() + + RecordAuthenticationConfigAutomaticReloadFailure(testAPIServerID) + if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(expectedValue), metrics...); err != nil { + t.Fatal(err) + } +} + +func TestRecordAuthenticationConfigAutomaticReloadSuccess(t *testing.T) { + expectedValue := ` + # HELP apiserver_authentication_config_controller_automatic_reloads_total [ALPHA] Total number of automatic reloads of authentication configuration split by status and apiserver identity. + # TYPE apiserver_authentication_config_controller_automatic_reloads_total counter + apiserver_authentication_config_controller_automatic_reloads_total {apiserver_id_hash="sha256:14f9d63e669337ac6bfda2e2162915ee6a6067743eddd4e5c374b572f951ff37",status="success"} 1 + ` + metrics := []string{ + namespace + "_" + subsystem + "_automatic_reloads_total", + } + + authenticationConfigAutomaticReloadsTotal.Reset() + RegisterMetrics() + + RecordAuthenticationConfigAutomaticReloadSuccess(testAPIServerID) + if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(expectedValue), metrics...); err != nil { + t.Fatal(err) + } +} + +func TestAuthenticationConfigAutomaticReloadLastTimestampSeconds(t *testing.T) { + testCases := []struct { + expectedValue string + resultLabel string + timestamp int64 + }{ + { + expectedValue: ` + # HELP apiserver_authentication_config_controller_automatic_reload_last_timestamp_seconds [ALPHA] Timestamp of the last automatic reload of authentication configuration split by status and apiserver identity. + # TYPE apiserver_authentication_config_controller_automatic_reload_last_timestamp_seconds gauge + apiserver_authentication_config_controller_automatic_reload_last_timestamp_seconds{apiserver_id_hash="sha256:14f9d63e669337ac6bfda2e2162915ee6a6067743eddd4e5c374b572f951ff37",status="failure"} 1.689101941e+09 + `, + resultLabel: "failure", + timestamp: 1689101941, + }, + { + expectedValue: ` + # HELP apiserver_authentication_config_controller_automatic_reload_last_timestamp_seconds [ALPHA] Timestamp of the last automatic reload of authentication configuration split by status and apiserver identity. + # TYPE apiserver_authentication_config_controller_automatic_reload_last_timestamp_seconds gauge + apiserver_authentication_config_controller_automatic_reload_last_timestamp_seconds{apiserver_id_hash="sha256:14f9d63e669337ac6bfda2e2162915ee6a6067743eddd4e5c374b572f951ff37",status="success"} 1.689101941e+09 + `, + resultLabel: "success", + timestamp: 1689101941, + }, + } + + metrics := []string{ + namespace + "_" + subsystem + "_automatic_reload_last_timestamp_seconds", + } + RegisterMetrics() + + for _, tc := range testCases { + authenticationConfigAutomaticReloadLastTimestampSeconds.Reset() + authenticationConfigAutomaticReloadLastTimestampSeconds.WithLabelValues(tc.resultLabel, testAPIServerIDHash).Set(float64(tc.timestamp)) + + if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(tc.expectedValue), metrics...); err != nil { + t.Fatal(err) + } + } +} diff --git a/test/integration/apiserver/oidc/oidc_test.go b/test/integration/apiserver/oidc/oidc_test.go index 97381fb3731..f26858d77f8 100644 --- a/test/integration/apiserver/oidc/oidc_test.go +++ b/test/integration/apiserver/oidc/oidc_test.go @@ -31,6 +31,7 @@ import ( "net/url" "os" "path/filepath" + "regexp" "strings" "testing" "time" @@ -47,6 +48,8 @@ import ( utilrand "k8s.io/apimachinery/pkg/util/rand" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/apiserver/pkg/features" + genericapiserver "k8s.io/apiserver/pkg/server" + authenticationconfigmetrics "k8s.io/apiserver/pkg/server/options/authenticationconfig/metrics" utilfeature "k8s.io/apiserver/pkg/util/feature" "k8s.io/client-go/kubernetes" _ "k8s.io/client-go/plugin/pkg/client/auth/oidc" @@ -963,6 +966,7 @@ jwt: } func TestStructuredAuthenticationConfigReload(t *testing.T) { + genericapiserver.SetHostnameFuncForTests("testAPIServerID") const hardCodedTokenCacheTTLAndPollInterval = 10 * time.Second origUpdateAuthenticationConfigTimeout := options.UpdateAuthenticationConfigTimeout @@ -978,6 +982,7 @@ func TestStructuredAuthenticationConfigReload(t *testing.T) { wantUser, newWantUser *authenticationv1.UserInfo ignoreTransitionErrFn func(error) bool waitAfterConfigSwap bool + wantMetricStrings []string }{ { name: "old valid config to new valid config", @@ -1036,6 +1041,10 @@ jwt: Username: "panda-john_doe", Groups: []string{"system:authenticated"}, }, + wantMetricStrings: []string{ + `apiserver_authentication_config_controller_automatic_reload_last_timestamp_seconds{apiserver_id_hash="sha256:3c607df3b2bf22c9d9f01d5314b4bbf411c48ef43ff44ff29b1d55b41367c795",status="success"} FP`, + `apiserver_authentication_config_controller_automatic_reloads_total{apiserver_id_hash="sha256:3c607df3b2bf22c9d9f01d5314b4bbf411c48ef43ff44ff29b1d55b41367c795",status="success"} 1`, + }, }, { name: "old empty config to new valid config", @@ -1080,6 +1089,10 @@ jwt: Username: "snorlax-john_doe", Groups: []string{"system:authenticated"}, }, + wantMetricStrings: []string{ + `apiserver_authentication_config_controller_automatic_reload_last_timestamp_seconds{apiserver_id_hash="sha256:3c607df3b2bf22c9d9f01d5314b4bbf411c48ef43ff44ff29b1d55b41367c795",status="success"} FP`, + `apiserver_authentication_config_controller_automatic_reloads_total{apiserver_id_hash="sha256:3c607df3b2bf22c9d9f01d5314b4bbf411c48ef43ff44ff29b1d55b41367c795",status="success"} 1`, + }, }, { name: "old invalid config to new valid config", @@ -1131,6 +1144,10 @@ jwt: Username: "k8s-john_doe", Groups: []string{"system:authenticated"}, }, + wantMetricStrings: []string{ + `apiserver_authentication_config_controller_automatic_reload_last_timestamp_seconds{apiserver_id_hash="sha256:3c607df3b2bf22c9d9f01d5314b4bbf411c48ef43ff44ff29b1d55b41367c795",status="success"} FP`, + `apiserver_authentication_config_controller_automatic_reloads_total{apiserver_id_hash="sha256:3c607df3b2bf22c9d9f01d5314b4bbf411c48ef43ff44ff29b1d55b41367c795",status="success"} 1`, + }, }, { name: "old valid config to new structurally invalid config (should be ignored)", @@ -1185,6 +1202,10 @@ jwt: Groups: []string{"system:authenticated"}, }, waitAfterConfigSwap: true, + wantMetricStrings: []string{ + `apiserver_authentication_config_controller_automatic_reload_last_timestamp_seconds{apiserver_id_hash="sha256:3c607df3b2bf22c9d9f01d5314b4bbf411c48ef43ff44ff29b1d55b41367c795",status="failure"} FP`, + `apiserver_authentication_config_controller_automatic_reloads_total{apiserver_id_hash="sha256:3c607df3b2bf22c9d9f01d5314b4bbf411c48ef43ff44ff29b1d55b41367c795",status="failure"} 1`, + }, }, { name: "old valid config to new valid empty config (should cause tokens to stop working)", @@ -1224,6 +1245,10 @@ kind: AuthenticationConfiguration }, newWantUser: nil, waitAfterConfigSwap: true, + wantMetricStrings: []string{ + `apiserver_authentication_config_controller_automatic_reload_last_timestamp_seconds{apiserver_id_hash="sha256:3c607df3b2bf22c9d9f01d5314b4bbf411c48ef43ff44ff29b1d55b41367c795",status="success"} FP`, + `apiserver_authentication_config_controller_automatic_reloads_total{apiserver_id_hash="sha256:3c607df3b2bf22c9d9f01d5314b4bbf411c48ef43ff44ff29b1d55b41367c795",status="success"} 1`, + }, }, { name: "old valid config to new valid config with typo (should be ignored)", @@ -1277,11 +1302,18 @@ jwt: Groups: []string{"system:authenticated"}, }, waitAfterConfigSwap: true, + wantMetricStrings: []string{ + `apiserver_authentication_config_controller_automatic_reload_last_timestamp_seconds{apiserver_id_hash="sha256:3c607df3b2bf22c9d9f01d5314b4bbf411c48ef43ff44ff29b1d55b41367c795",status="failure"} FP`, + `apiserver_authentication_config_controller_automatic_reloads_total{apiserver_id_hash="sha256:3c607df3b2bf22c9d9f01d5314b4bbf411c48ef43ff44ff29b1d55b41367c795",status="failure"} 1`, + }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { + authenticationconfigmetrics.ResetMetricsForTest() + defer authenticationconfigmetrics.ResetMetricsForTest() + ctx := testContext(t) oidcServer, apiServer, caCert, certPath := configureBasicTestInfrastructureWithRandomKeyType(t, tt.authConfigFn) @@ -1330,6 +1362,23 @@ jwt: _, err = client.CoreV1().Pods(defaultNamespace).List(ctx, metav1.ListOptions{}) tt.newAssertErrFn(t, err) + + adminClient := kubernetes.NewForConfigOrDie(apiServer.ClientConfig) + body, err := adminClient.RESTClient().Get().AbsPath("/metrics").DoRaw(ctx) + require.NoError(t, err) + var gotMetricStrings []string + trimFP := regexp.MustCompile(`(.*)(} \d+\.\d+.*)`) + for _, line := range strings.Split(string(body), "\n") { + if strings.HasPrefix(line, "apiserver_authentication_config_controller_") { + if strings.Contains(line, "_seconds") { + line = trimFP.ReplaceAllString(line, `$1`) + "} FP" // ignore floating point metric values + } + gotMetricStrings = append(gotMetricStrings, line) + } + } + if diff := cmp.Diff(tt.wantMetricStrings, gotMetricStrings); diff != "" { + t.Errorf("unexpected metrics diff (-want +got): %s", diff) + } }) } }