diff --git a/pkg/kubelet/volumemanager/cache/desired_state_of_wold_selinux_metrics.go b/pkg/kubelet/volumemanager/cache/desired_state_of_wold_selinux_metrics.go index 4dd5ecd2eff..b7b6a94f8a1 100644 --- a/pkg/kubelet/volumemanager/cache/desired_state_of_wold_selinux_metrics.go +++ b/pkg/kubelet/volumemanager/cache/desired_state_of_wold_selinux_metrics.go @@ -25,37 +25,45 @@ import ( var ( // TODO: add plugin name + access mode labels to all these metrics - seLinuxContainerContextErrors = compbasemetrics.NewGauge( + seLinuxContainerContextErrors = compbasemetrics.NewGaugeVec( &compbasemetrics.GaugeOpts{ Name: "volume_manager_selinux_container_errors_total", Help: "Number of errors when kubelet cannot compute SELinux context for a container. Kubelet can't start such a Pod then and it will retry, therefore value of this metric may not represent the actual nr. of containers.", StabilityLevel: compbasemetrics.ALPHA, - }) - seLinuxContainerContextWarnings = compbasemetrics.NewGauge( + }, + []string{"access_mode"}, + ) + seLinuxContainerContextWarnings = compbasemetrics.NewGaugeVec( &compbasemetrics.GaugeOpts{ Name: "volume_manager_selinux_container_warnings_total", StabilityLevel: compbasemetrics.ALPHA, Help: "Number of errors when kubelet cannot compute SELinux context for a container that are ignored. They will become real errors when SELinuxMountReadWriteOncePod feature is expanded to all volume access modes.", - }) - seLinuxPodContextMismatchErrors = compbasemetrics.NewGauge( + }, + []string{"access_mode"}, + ) + seLinuxPodContextMismatchErrors = compbasemetrics.NewGaugeVec( &compbasemetrics.GaugeOpts{ Name: "volume_manager_selinux_pod_context_mismatch_errors_total", Help: "Number of errors when a Pod defines different SELinux contexts for its containers that use the same volume. Kubelet can't start such a Pod then and it will retry, therefore value of this metric may not represent the actual nr. of Pods.", StabilityLevel: compbasemetrics.ALPHA, - }) - seLinuxPodContextMismatchWarnings = compbasemetrics.NewGauge( + }, + []string{"access_mode"}, + ) + seLinuxPodContextMismatchWarnings = compbasemetrics.NewGaugeVec( &compbasemetrics.GaugeOpts{ Name: "volume_manager_selinux_pod_context_mismatch_warnings_total", Help: "Number of errors when a Pod defines different SELinux contexts for its containers that use the same volume. They are not errors yet, but they will become real errors when SELinuxMountReadWriteOncePod feature is expanded to all volume access modes.", StabilityLevel: compbasemetrics.ALPHA, - }) + }, + []string{"access_mode"}, + ) seLinuxVolumeContextMismatchErrors = compbasemetrics.NewGaugeVec( &compbasemetrics.GaugeOpts{ Name: "volume_manager_selinux_volume_context_mismatch_errors_total", Help: "Number of errors when a Pod uses a volume that is already mounted with a different SELinux context than the Pod needs. Kubelet can't start such a Pod then and it will retry, therefore value of this metric may not represent the actual nr. of Pods.", StabilityLevel: compbasemetrics.ALPHA, }, - []string{"volume_plugin"}, + []string{"volume_plugin", "access_mode"}, ) seLinuxVolumeContextMismatchWarnings = compbasemetrics.NewGaugeVec( &compbasemetrics.GaugeOpts{ @@ -63,7 +71,7 @@ var ( Help: "Number of errors when a Pod uses a volume that is already mounted with a different SELinux context than the Pod needs. They are not errors yet, but they will become real errors when SELinuxMountReadWriteOncePod feature is expanded to all volume access modes.", StabilityLevel: compbasemetrics.ALPHA, }, - []string{"volume_plugin"}, + []string{"volume_plugin", "access_mode"}, ) seLinuxVolumesAdmitted = compbasemetrics.NewGaugeVec( &compbasemetrics.GaugeOpts{ @@ -71,7 +79,7 @@ var ( Help: "Number of volumes whose SELinux context was fine and will be mounted with mount -o context option.", StabilityLevel: compbasemetrics.ALPHA, }, - []string{"volume_plugin"}, + []string{"volume_plugin", "access_mode"}, ) registerMetrics sync.Once diff --git a/pkg/kubelet/volumemanager/cache/desired_state_of_world.go b/pkg/kubelet/volumemanager/cache/desired_state_of_world.go index 7c417516b24..d5a4da7f6eb 100644 --- a/pkg/kubelet/volumemanager/cache/desired_state_of_world.go +++ b/pkg/kubelet/volumemanager/cache/desired_state_of_world.go @@ -275,6 +275,7 @@ func (dsw *desiredStateOfWorld) AddPodToVolume( err) } volumePluginName := getVolumePluginNameWithDriver(volumePlugin, volumeSpec) + accessMode := getVolumeAccessMode(volumeSpec) var volumeName v1.UniqueVolumeName @@ -328,7 +329,7 @@ func (dsw *desiredStateOfWorld) AddPodToVolume( effectiveSELinuxMountLabel = "" } if seLinuxFileLabel != "" { - seLinuxVolumesAdmitted.WithLabelValues(volumePluginName).Add(1.0) + seLinuxVolumesAdmitted.WithLabelValues(volumePluginName, accessMode).Add(1.0) } vmt := volumeToMount{ volumeName: volumeName, @@ -369,8 +370,8 @@ func (dsw *desiredStateOfWorld) AddPodToVolume( err := handleSELinuxMetricError( fullErr, supported, - seLinuxVolumeContextMismatchWarnings.WithLabelValues(volumePluginName), - seLinuxVolumeContextMismatchErrors.WithLabelValues(volumePluginName)) + seLinuxVolumeContextMismatchWarnings.WithLabelValues(volumePluginName, accessMode), + seLinuxVolumeContextMismatchErrors.WithLabelValues(volumePluginName, accessMode)) if err != nil { return "", err } @@ -414,7 +415,13 @@ func (dsw *desiredStateOfWorld) getSELinuxLabel(volumeSpec *volume.Spec, seLinux newLabel, err := dsw.seLinuxTranslator.SELinuxOptionsToFileLabel(containerContext) if err != nil { fullErr := fmt.Errorf("failed to construct SELinux label from context %q: %s", containerContext, err) - if err := handleSELinuxMetricError(fullErr, seLinuxSupported, seLinuxContainerContextWarnings, seLinuxContainerContextErrors); err != nil { + accessMode := getVolumeAccessMode(volumeSpec) + err := handleSELinuxMetricError( + fullErr, + seLinuxSupported, + seLinuxContainerContextWarnings.WithLabelValues(accessMode), + seLinuxContainerContextErrors.WithLabelValues(accessMode)) + if err != nil { return "", false, err } } @@ -423,8 +430,15 @@ func (dsw *desiredStateOfWorld) getSELinuxLabel(volumeSpec *volume.Spec, seLinux continue } if seLinuxFileLabel != newLabel { + accessMode := getVolumeAccessMode(volumeSpec) + fullErr := fmt.Errorf("volume %s is used with two different SELinux contexts in the same pod: %q, %q", volumeSpec.Name(), seLinuxFileLabel, newLabel) - if err := handleSELinuxMetricError(fullErr, seLinuxSupported, seLinuxPodContextMismatchWarnings, seLinuxPodContextMismatchErrors); err != nil { + err := handleSELinuxMetricError( + fullErr, + seLinuxSupported, + seLinuxPodContextMismatchWarnings.WithLabelValues(accessMode), + seLinuxPodContextMismatchErrors.WithLabelValues(accessMode)) + if err != nil { return "", false, err } } @@ -685,3 +699,26 @@ func getVolumePluginNameWithDriver(plugin volume.VolumePlugin, spec *volume.Spec // `/` is used to separate plugin + CSI driver in util.GetUniqueVolumeName() too return pluginName + "/" + driverName } + +func getVolumeAccessMode(spec *volume.Spec) string { + if spec.PersistentVolume == nil { + // In-line volumes in pod do not have a specific access mode, using "inline". + return "inline" + } + // For purpose of this PR, report only the "highest" access mode in this order: RWX (highest priority), ROX, RWO, RWOP (lowest priority + pv := spec.PersistentVolume + if util.ContainsAccessMode(pv.Spec.AccessModes, v1.ReadWriteMany) { + return "RWX" + } + if util.ContainsAccessMode(pv.Spec.AccessModes, v1.ReadOnlyMany) { + return "ROX" + } + if util.ContainsAccessMode(pv.Spec.AccessModes, v1.ReadWriteOnce) { + return "RWO" + } + if util.ContainsAccessMode(pv.Spec.AccessModes, v1.ReadWriteOncePod) { + return "RWOP" + } + // This should not happen, validation does not allow empty or unknown AccessModes. + return "" +} diff --git a/test/e2e/storage/csi_mock/csi_selinux_mount.go b/test/e2e/storage/csi_mock/csi_selinux_mount.go index 89f28514717..606881c421c 100644 --- a/test/e2e/storage/csi_mock/csi_selinux_mount.go +++ b/test/e2e/storage/csi_mock/csi_selinux_mount.go @@ -31,6 +31,8 @@ import ( "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/component-base/metrics/testutil" + "k8s.io/kubernetes/pkg/apis/core/v1/helper" "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/kubelet/events" "k8s.io/kubernetes/test/e2e/feature" @@ -316,6 +318,24 @@ var _ = utils.SIGDescribe("CSI Mock selinux on mount", func() { }) }) +var ( + // SELinux metrics that have volume_plugin and access_mode labels + metricsWithVolumePluginLabel = sets.NewString( + "volume_manager_selinux_volume_context_mismatch_errors_total", + "volume_manager_selinux_volume_context_mismatch_warnings_total", + "volume_manager_selinux_volumes_admitted_total", + ) + // SELinuxMetrics that have only access_mode label + metricsWithoutVolumePluginLabel = sets.NewString( + "volume_manager_selinux_container_errors_total", + "volume_manager_selinux_container_warnings_total", + "volume_manager_selinux_pod_context_mismatch_errors_total", + "volume_manager_selinux_pod_context_mismatch_warnings_total", + ) + // All SELinux metrics + allMetrics = metricsWithoutVolumePluginLabel.Union(metricsWithVolumePluginLabel) +) + var _ = utils.SIGDescribe("CSI Mock selinux on mount metrics", func() { f := framework.NewDefaultFramework("csi-mock-volumes-selinux-metrics") f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged @@ -389,6 +409,17 @@ var _ = utils.SIGDescribe("CSI Mock selinux on mount metrics", func() { expectIncreases: sets.NewString("volume_manager_selinux_volume_context_mismatch_errors_total"), testTags: []interface{}{framework.WithFeatureGate(features.SELinuxMountReadWriteOncePod), framework.WithFeatureGate(features.SELinuxMount)}, }, + { + name: "error is bumped on two Pods with a different context on RWX volume and SELinuxMount enabled", + csiDriverSELinuxEnabled: true, + firstPodSELinuxOpts: &seLinuxOpts1, + secondPodSELinuxOpts: &seLinuxOpts2, + secondPodFailureEvent: "conflicting SELinux labels of volume", + volumeMode: v1.ReadWriteMany, + waitForSecondPodStart: false, + expectIncreases: sets.NewString("volume_manager_selinux_volume_context_mismatch_errors_total"), + testTags: []interface{}{framework.WithFeatureGate(features.SELinuxMountReadWriteOncePod), framework.WithFeatureGate(features.SELinuxMount)}, + }, { name: "error is bumped on two Pods with a different context on RWOP volume", csiDriverSELinuxEnabled: true, @@ -405,19 +436,7 @@ var _ = utils.SIGDescribe("CSI Mock selinux on mount metrics", func() { t := t testFunc := func(ctx context.Context) { // Some metrics use CSI driver name as a label, which is "csi-mock-" + the namespace name. - volumePluginLabel := "{volume_plugin=\"kubernetes.io/csi/csi-mock-" + f.Namespace.Name + "\"}" - - // All SELinux metrics. Unless explicitly mentioned in test.expectIncreases, these metrics must not grow during - // a test. - allMetrics := sets.NewString( - "volume_manager_selinux_container_errors_total", - "volume_manager_selinux_container_warnings_total", - "volume_manager_selinux_pod_context_mismatch_errors_total", - "volume_manager_selinux_pod_context_mismatch_warnings_total", - "volume_manager_selinux_volume_context_mismatch_errors_total"+volumePluginLabel, - "volume_manager_selinux_volume_context_mismatch_warnings_total"+volumePluginLabel, - "volume_manager_selinux_volumes_admitted_total"+volumePluginLabel, - ) + volumePluginLabel := "volume_plugin=\"kubernetes.io/csi/csi-mock-" + f.Namespace.Name + "\"" if framework.NodeOSDistroIs("windows") { e2eskipper.Skipf("SELinuxMount is only applied on linux nodes -- skipping") @@ -444,7 +463,7 @@ var _ = utils.SIGDescribe("CSI Mock selinux on mount metrics", func() { ginkgo.By("Grabbing initial metrics") pod, err = m.cs.CoreV1().Pods(pod.Namespace).Get(ctx, pod.Name, metav1.GetOptions{}) framework.ExpectNoError(err, "getting the initial pod") - metrics, err := grabMetrics(ctx, grabber, pod.Spec.NodeName, allMetrics) + metrics, err := grabMetrics(ctx, grabber, pod.Spec.NodeName, allMetrics, volumePluginLabel) framework.ExpectNoError(err, "collecting the initial metrics") dumpMetrics(metrics) @@ -472,8 +491,9 @@ var _ = utils.SIGDescribe("CSI Mock selinux on mount metrics", func() { } // Assert: count the metrics - ginkgo.By("Waiting for expected metric changes") - err = waitForMetricIncrease(ctx, grabber, pod.Spec.NodeName, allMetrics, t.expectIncreases, metrics, framework.PodStartShortTimeout) + expectIncreaseWithLabels := addLabels(t.expectIncreases, volumePluginLabel, t.volumeMode) + framework.Logf("Waiting for changes of metrics %+v", expectIncreaseWithLabels) + err = waitForMetricIncrease(ctx, grabber, pod.Spec.NodeName, volumePluginLabel, allMetrics, expectIncreaseWithLabels, metrics, framework.PodStartShortTimeout) framework.ExpectNoError(err, "waiting for metrics %s to increase", t.expectIncreases) } // t.testTags is array and it's not possible to use It("name", func(){xxx}, t.testTags...) @@ -488,7 +508,7 @@ var _ = utils.SIGDescribe("CSI Mock selinux on mount metrics", func() { }) }) -func grabMetrics(ctx context.Context, grabber *e2emetrics.Grabber, nodeName string, metricNames sets.String) (map[string]float64, error) { +func grabMetrics(ctx context.Context, grabber *e2emetrics.Grabber, nodeName string, metricNames sets.String, volumePluginLabel string) (map[string]float64, error) { response, err := grabber.GrabFromKubelet(ctx, nodeName) framework.ExpectNoError(err) @@ -497,12 +517,19 @@ func grabMetrics(ctx context.Context, grabber *e2emetrics.Grabber, nodeName stri if len(samples) == 0 { continue } - // Find the *last* sample that has the label we are interested in. - for i := len(samples) - 1; i >= 0; i-- { - metricNameWithLabels := samples[i].Metric.String() - if metricNames.Has(metricNameWithLabels) { + // For each metric + label combination, remember the last sample + for i := range samples { + // E.g. "volume_manager_selinux_pod_context_mismatch_errors_total" + metricName := samples[i].Metric[testutil.MetricNameLabel] + if metricNames.Has(string(metricName)) { + // E.g. "volume_manager_selinux_pod_context_mismatch_errors_total{access_mode="RWOP",volume_plugin="kubernetes.io/csi/csi-mock-ns"} + metricNameWithLabels := samples[i].Metric.String() + // Filter out metrics of any other volume plugin + if strings.Contains(metricNameWithLabels, "volume_plugin=") && !strings.Contains(metricNameWithLabels, volumePluginLabel) { + continue + } + // Overwrite any previous value, so only the last one is stored. metrics[metricNameWithLabels] = float64(samples[i].Value) - break } } } @@ -510,32 +537,21 @@ func grabMetrics(ctx context.Context, grabber *e2emetrics.Grabber, nodeName stri return metrics, nil } -func waitForMetricIncrease(ctx context.Context, grabber *e2emetrics.Grabber, nodeName string, allMetricNames, expectedIncreaseNames sets.String, initialValues map[string]float64, timeout time.Duration) error { +func waitForMetricIncrease(ctx context.Context, grabber *e2emetrics.Grabber, nodeName string, volumePluginLabel string, allMetricNames, expectedIncreaseNames sets.String, initialValues map[string]float64, timeout time.Duration) error { var noIncreaseMetrics sets.String var metrics map[string]float64 err := wait.Poll(time.Second, timeout, func() (bool, error) { var err error - metrics, err = grabMetrics(ctx, grabber, nodeName, allMetricNames) + metrics, err = grabMetrics(ctx, grabber, nodeName, allMetricNames, volumePluginLabel) if err != nil { return false, err } noIncreaseMetrics = sets.NewString() // Always evaluate all SELinux metrics to check that the other metrics are not unexpectedly increased. - for name := range allMetricNames { - expectIncrease := false - - // allMetricNames can include "{volume_plugin="XXX"}", while expectedIncreaseNames does not. - // Compare them properly. Value of volume_plugin="XXX" was already checked in grabMetrics(), - // we can ignore it here. - for expectedIncreaseMetricName := range expectedIncreaseNames { - if strings.HasPrefix(name, expectedIncreaseMetricName) { - expectIncrease = true - break - } - } - if expectIncrease { + for name := range metrics { + if expectedIncreaseNames.Has(name) { if metrics[name] <= initialValues[name] { noIncreaseMetrics.Insert(name) } @@ -570,3 +586,22 @@ func dumpMetrics(metrics map[string]float64) { framework.Logf("Metric %s: %v", key, metrics[key]) } } + +// Add labels to the metric name based on the current test case +func addLabels(metricNames sets.String, volumePluginLabel string, accessMode v1.PersistentVolumeAccessMode) sets.String { + ret := sets.NewString() + accessModeShortString := helper.GetAccessModesAsString([]v1.PersistentVolumeAccessMode{accessMode}) + + for metricName := range metricNames { + var metricWithLabels string + if metricsWithVolumePluginLabel.Has(metricName) { + metricWithLabels = fmt.Sprintf("%s{access_mode=\"%s\", %s}", metricName, accessModeShortString, volumePluginLabel) + } else { + metricWithLabels = fmt.Sprintf("%s{access_mode=\"%s\"}", metricName, accessModeShortString) + } + + ret.Insert(metricWithLabels) + } + + return ret +}