mirror of
https://github.com/k3s-io/kubernetes.git
synced 2026-02-22 07:03:28 +00:00
Implement scheduler_resourceclaim_creates_total metrics for DRAExtendedResources
This commit is contained in:
@@ -53,6 +53,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
|
||||
"k8s.io/kubernetes/pkg/scheduler/metrics"
|
||||
schedutil "k8s.io/kubernetes/pkg/scheduler/util"
|
||||
"k8s.io/kubernetes/pkg/scheduler/util/assumecache"
|
||||
"k8s.io/kubernetes/pkg/util/slice"
|
||||
@@ -1477,8 +1478,10 @@ func (pl *DynamicResources) bindClaim(ctx context.Context, state *stateData, ind
|
||||
var err error
|
||||
claim, err = pl.clientset.ResourceV1().ResourceClaims(claim.Namespace).Create(ctx, claim, metav1.CreateOptions{})
|
||||
if err != nil {
|
||||
metrics.ResourceClaimCreatesTotal.WithLabelValues("failure").Inc()
|
||||
return nil, fmt.Errorf("create claim for extended resources %v: %w", klog.KObj(claim), err)
|
||||
}
|
||||
metrics.ResourceClaimCreatesTotal.WithLabelValues("success").Inc()
|
||||
resourceClaimModified = true
|
||||
logger.V(5).Info("created claim for extended resources", "pod", klog.KObj(pod), "node", nodeName, "resourceclaim", klog.Format(claim))
|
||||
|
||||
|
||||
@@ -32,7 +32,6 @@ import (
|
||||
"github.com/google/go-cmp/cmp/cmpopts"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
resourceapi "k8s.io/api/resource/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
@@ -45,6 +44,8 @@ import (
|
||||
cgotesting "k8s.io/client-go/testing"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
"k8s.io/client-go/tools/events"
|
||||
compbasemetrics "k8s.io/component-base/metrics"
|
||||
"k8s.io/component-base/metrics/testutil"
|
||||
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
||||
"k8s.io/dynamic-resource-allocation/deviceclass/extendedresourcecache"
|
||||
resourceslicetracker "k8s.io/dynamic-resource-allocation/resourceslice/tracker"
|
||||
@@ -57,12 +58,17 @@ import (
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/runtime"
|
||||
"k8s.io/kubernetes/pkg/scheduler/metrics"
|
||||
st "k8s.io/kubernetes/pkg/scheduler/testing"
|
||||
"k8s.io/kubernetes/pkg/scheduler/util/assumecache"
|
||||
"k8s.io/kubernetes/test/utils/ktesting"
|
||||
"k8s.io/utils/ptr"
|
||||
)
|
||||
|
||||
func init() {
|
||||
metrics.InitMetrics()
|
||||
}
|
||||
|
||||
var (
|
||||
podKind = v1.SchemeGroupVersion.WithKind("Pod")
|
||||
|
||||
@@ -777,6 +783,7 @@ func TestPlugin(t *testing.T) {
|
||||
disableDRASchedulerFilterTimeout bool
|
||||
skipOnWindows string
|
||||
failPatch bool
|
||||
metrics func(*testing.T, compbasemetrics.Gatherer)
|
||||
}{
|
||||
"empty": {
|
||||
pod: st.MakePod().Name("foo").Namespace("default").Obj(),
|
||||
@@ -1411,7 +1418,7 @@ func TestPlugin(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
"extended-resource-name-wth-node-resource": {
|
||||
"extended-resource-name-with-node-resource": {
|
||||
enableDRAExtendedResource: true,
|
||||
enableDRADeviceBindingConditions: true,
|
||||
enableDRAResourceClaimDeviceStatus: true,
|
||||
@@ -1419,6 +1426,10 @@ func TestPlugin(t *testing.T) {
|
||||
pod: podWithExtendedResourceName,
|
||||
classes: []*resourceapi.DeviceClass{deviceClassWithExtendResourceName},
|
||||
want: want{},
|
||||
metrics: func(t *testing.T, g compbasemetrics.Gatherer) {
|
||||
_, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
|
||||
assert.ErrorContains(t, err, "not found")
|
||||
},
|
||||
},
|
||||
"extended-resource-name-with-zero-allocatable": {
|
||||
enableDRAExtendedResource: true,
|
||||
@@ -1466,6 +1477,10 @@ func TestPlugin(t *testing.T) {
|
||||
status: fwk.NewStatus(fwk.Unschedulable, `still not schedulable`),
|
||||
},
|
||||
},
|
||||
metrics: func(t *testing.T, g compbasemetrics.Gatherer) {
|
||||
_, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
|
||||
assert.ErrorContains(t, err, "not found")
|
||||
},
|
||||
},
|
||||
"extended-resource-name-with-resources": {
|
||||
enableDRAExtendedResource: true,
|
||||
@@ -1484,6 +1499,11 @@ func TestPlugin(t *testing.T) {
|
||||
assumedClaim: reserve(extendedResourceClaim, podWithExtendedResourceName),
|
||||
},
|
||||
},
|
||||
metrics: func(t *testing.T, g compbasemetrics.Gatherer) {
|
||||
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, float64(1), metric["success"])
|
||||
},
|
||||
},
|
||||
"implicit-extended-resource-name-with-resources": {
|
||||
enableDRAExtendedResource: true,
|
||||
@@ -1502,6 +1522,11 @@ func TestPlugin(t *testing.T) {
|
||||
assumedClaim: reserve(implicitExtendedResourceClaim, podWithImplicitExtendedResourceName),
|
||||
},
|
||||
},
|
||||
metrics: func(t *testing.T, g compbasemetrics.Gatherer) {
|
||||
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, float64(1), metric["success"])
|
||||
},
|
||||
},
|
||||
"implicit-extended-resource-name-two-containers-with-resources": {
|
||||
enableDRAExtendedResource: true,
|
||||
@@ -1520,6 +1545,11 @@ func TestPlugin(t *testing.T) {
|
||||
assumedClaim: reserve(implicitExtendedResourceClaimTwoContainers, podWithImplicitExtendedResourceNameTwoContainers),
|
||||
},
|
||||
},
|
||||
metrics: func(t *testing.T, g compbasemetrics.Gatherer) {
|
||||
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, float64(1), metric["success"])
|
||||
},
|
||||
},
|
||||
"extended-resource-name-with-resources-fail-patch": {
|
||||
enableDRAExtendedResource: true,
|
||||
@@ -1540,6 +1570,11 @@ func TestPlugin(t *testing.T) {
|
||||
assumedClaim: reserve(extendedResourceClaim, podWithExtendedResourceName),
|
||||
},
|
||||
},
|
||||
metrics: func(t *testing.T, g compbasemetrics.Gatherer) {
|
||||
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, float64(1), metric["success"])
|
||||
},
|
||||
},
|
||||
"extended-resource-name-with-resources-has-claim": {
|
||||
enableDRAExtendedResource: true,
|
||||
@@ -1558,6 +1593,10 @@ func TestPlugin(t *testing.T) {
|
||||
removed: []metav1.Object{extendedResourceClaim},
|
||||
},
|
||||
},
|
||||
metrics: func(t *testing.T, g compbasemetrics.Gatherer) {
|
||||
_, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
|
||||
assert.ErrorContains(t, err, "not found")
|
||||
},
|
||||
},
|
||||
"extended-resource-name-with-resources-delete-claim": {
|
||||
enableDRAExtendedResource: true,
|
||||
@@ -1576,6 +1615,10 @@ func TestPlugin(t *testing.T) {
|
||||
removed: []metav1.Object{extendedResourceClaimNode2},
|
||||
},
|
||||
},
|
||||
metrics: func(t *testing.T, g compbasemetrics.Gatherer) {
|
||||
_, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
|
||||
assert.ErrorContains(t, err, "not found")
|
||||
},
|
||||
},
|
||||
"extended-resource-name-bind-failure": {
|
||||
enableDRAExtendedResource: true,
|
||||
@@ -1594,6 +1637,11 @@ func TestPlugin(t *testing.T) {
|
||||
removed: []metav1.Object{reserve(extendedResourceClaim, podWithExtendedResourceName)},
|
||||
},
|
||||
},
|
||||
metrics: func(t *testing.T, g compbasemetrics.Gatherer) {
|
||||
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, float64(1), metric["success"])
|
||||
},
|
||||
},
|
||||
"extended-resource-name-skip-bind": {
|
||||
enableDRAExtendedResource: true,
|
||||
@@ -1606,6 +1654,11 @@ func TestPlugin(t *testing.T) {
|
||||
},
|
||||
unreserveBeforePreBind: &result{},
|
||||
},
|
||||
metrics: func(t *testing.T, g compbasemetrics.Gatherer) {
|
||||
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, float64(1), metric["success"])
|
||||
},
|
||||
},
|
||||
"canceled": {
|
||||
cancelFilter: true,
|
||||
@@ -1966,7 +2019,6 @@ func TestPlugin(t *testing.T) {
|
||||
if len(tc.skipOnWindows) > 0 && goruntime.GOOS == "windows" {
|
||||
t.Skipf("Skipping '%s' test case on Windows, reason: %s", name, tc.skipOnWindows)
|
||||
}
|
||||
// We can run in parallel because logging is per-test.
|
||||
tc := tc
|
||||
t.Run(name, func(t *testing.T) {
|
||||
nodes := tc.nodes
|
||||
@@ -1987,6 +2039,10 @@ func TestPlugin(t *testing.T) {
|
||||
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.DRAExtendedResource, tc.enableDRAExtendedResource)
|
||||
testCtx := setup(t, tc.args, nodes, tc.claims, tc.classes, tc.objs, feats, tc.failPatch)
|
||||
initialObjects := testCtx.listAll(t)
|
||||
var registry compbasemetrics.KubeRegistry
|
||||
if tc.metrics != nil {
|
||||
registry = setupMetrics(features)
|
||||
}
|
||||
|
||||
status := testCtx.p.PreEnqueue(testCtx.ctx, tc.pod)
|
||||
t.Run("PreEnqueue", func(t *testing.T) {
|
||||
@@ -2103,10 +2159,24 @@ func TestPlugin(t *testing.T) {
|
||||
testCtx.verify(t, tc.want.postfilter, initialObjects, nil, status)
|
||||
})
|
||||
}
|
||||
if tc.metrics != nil {
|
||||
tc.metrics(t, registry)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func setupMetrics(features feature.Features) compbasemetrics.KubeRegistry {
|
||||
// Since feature gate is not set globally, we can't use metrics.Register().
|
||||
// We use a new registry instead of using global registry.
|
||||
testRegistry := compbasemetrics.NewKubeRegistry()
|
||||
if features.EnableDRAExtendedResource {
|
||||
testRegistry.MustRegister(metrics.ResourceClaimCreatesTotal)
|
||||
metrics.ResourceClaimCreatesTotal.Reset()
|
||||
}
|
||||
return testRegistry
|
||||
}
|
||||
|
||||
type testContext struct {
|
||||
ctx context.Context
|
||||
client *fake.Clientset
|
||||
|
||||
@@ -127,6 +127,9 @@ var (
|
||||
AsyncAPICallDuration *metrics.HistogramVec
|
||||
AsyncAPIPendingCalls *metrics.GaugeVec
|
||||
|
||||
// The below is only available when the DRAExtendedResource feature gate is enabled.
|
||||
ResourceClaimCreatesTotal *metrics.CounterVec
|
||||
|
||||
// metricsList is a list of all metrics that should be registered always, regardless of any feature gate's value.
|
||||
metricsList []metrics.Registerable
|
||||
)
|
||||
@@ -154,6 +157,9 @@ func Register() {
|
||||
AsyncAPIPendingCalls,
|
||||
)
|
||||
}
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.DRAExtendedResource) {
|
||||
RegisterMetrics(ResourceClaimCreatesTotal)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -377,6 +383,15 @@ func InitMetrics() {
|
||||
},
|
||||
[]string{"call_type"})
|
||||
|
||||
ResourceClaimCreatesTotal = metrics.NewCounterVec(
|
||||
&metrics.CounterOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "resourceclaim_creates_total",
|
||||
Help: "Number of ResourceClaims creation requests within scheduler",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{"status"})
|
||||
|
||||
metricsList = []metrics.Registerable{
|
||||
scheduleAttempts,
|
||||
schedulingLatency,
|
||||
|
||||
Reference in New Issue
Block a user