Merge pull request #59529 from wackxu/addmetricvol

Add metrics for volume scheduling operations
This commit is contained in:
k8s-ci-robot 2018-10-23 13:52:29 -07:00 committed by GitHub
commit 101d26c613
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 102 additions and 10 deletions

View File

@ -13,6 +13,7 @@ go_library(
"pv_controller.go",
"pv_controller_base.go",
"scheduler_assume_cache.go",
"scheduler_bind_cache_metrics.go",
"scheduler_binder.go",
"scheduler_binder_cache.go",
"scheduler_binder_fake.go",
@ -58,6 +59,7 @@ go_library(
"//staging/src/k8s.io/cloud-provider:go_default_library",
"//staging/src/k8s.io/csi-api/pkg/client/clientset/versioned:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
],
)

View File

@ -1,14 +1,10 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "go_default_library",
srcs = ["metrics.go"],
importpath = "k8s.io/kubernetes/pkg/controller/volume/persistentvolume/metrics",
visibility = ["//visibility:public"],
deps = [
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
@ -27,4 +23,5 @@ filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@ -0,0 +1,60 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package persistentvolume
import (
"github.com/prometheus/client_golang/prometheus"
)
// VolumeSchedulerSubsystem - subsystem name used by scheduler
const VolumeSchedulerSubsystem = "scheduler_volume"
var (
VolumeBindingRequestSchedulerBinderCache = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: VolumeSchedulerSubsystem,
Name: "binder_cache_requests_total",
Help: "Total number for request volume binding cache",
},
[]string{"operation"},
)
VolumeSchedulingStageLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Subsystem: VolumeSchedulerSubsystem,
Name: "scheduling_duration_seconds",
Help: "Volume scheduling stage latency",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
},
[]string{"operation"},
)
VolumeSchedulingStageFailed = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: VolumeSchedulerSubsystem,
Name: "scheduling_stage_error_total",
Help: "Volume scheduling stage error count",
},
[]string{"operation"},
)
)
// RegisterVolumeSchedulingMetrics is used for scheduler, because the volume binding cache is a library
// used by scheduler process.
func RegisterVolumeSchedulingMetrics() {
prometheus.MustRegister(VolumeBindingRequestSchedulerBinderCache)
prometheus.MustRegister(VolumeSchedulingStageLatency)
prometheus.MustRegister(VolumeSchedulingStageFailed)
}

View File

@ -149,6 +149,13 @@ func (b *volumeBinder) FindPodVolumes(pod *v1.Pod, node *v1.Node) (unboundVolume
// Initialize to true for pods that don't have volumes
unboundVolumesSatisfied = true
boundVolumesSatisfied = true
start := time.Now()
defer func() {
VolumeSchedulingStageLatency.WithLabelValues("predicate").Observe(time.Since(start).Seconds())
if err != nil {
VolumeSchedulingStageFailed.WithLabelValues("predicate").Inc()
}
}()
// The pod's volumes need to be processed in one call to avoid the race condition where
// volumes can get bound/provisioned in between calls.
@ -198,6 +205,13 @@ func (b *volumeBinder) AssumePodVolumes(assumedPod *v1.Pod, nodeName string) (al
podName := getPodName(assumedPod)
glog.V(4).Infof("AssumePodVolumes for pod %q, node %q", podName, nodeName)
start := time.Now()
defer func() {
VolumeSchedulingStageLatency.WithLabelValues("assume").Observe(time.Since(start).Seconds())
if err != nil {
VolumeSchedulingStageFailed.WithLabelValues("assume").Inc()
}
}()
if allBound := b.arePodVolumesBound(assumedPod); allBound {
glog.V(4).Infof("AssumePodVolumes for pod %q, node %q: all PVCs bound and nothing to do", podName, nodeName)
@ -264,15 +278,23 @@ func (b *volumeBinder) AssumePodVolumes(assumedPod *v1.Pod, nodeName string) (al
// BindPodVolumes gets the cached bindings and PVCs to provision in podBindingCache,
// makes the API update for those PVs/PVCs, and waits for the PVCs to be completely bound
// by the PV controller.
func (b *volumeBinder) BindPodVolumes(assumedPod *v1.Pod) error {
func (b *volumeBinder) BindPodVolumes(assumedPod *v1.Pod) (err error) {
podName := getPodName(assumedPod)
glog.V(4).Infof("BindPodVolumes for pod %q, node %q", podName, assumedPod.Spec.NodeName)
start := time.Now()
defer func() {
VolumeSchedulingStageLatency.WithLabelValues("bind").Observe(time.Since(start).Seconds())
if err != nil {
VolumeSchedulingStageFailed.WithLabelValues("bind").Inc()
}
}()
bindings := b.podBindingCache.GetBindings(assumedPod, assumedPod.Spec.NodeName)
claimsToProvision := b.podBindingCache.GetProvisionedPVCs(assumedPod, assumedPod.Spec.NodeName)
// Start API operations
err := b.bindAPIUpdate(podName, bindings, claimsToProvision)
err = b.bindAPIUpdate(podName, bindings, claimsToProvision)
if err != nil {
return err
}

View File

@ -77,7 +77,11 @@ func (c *podBindingCache) DeleteBindings(pod *v1.Pod) {
defer c.rwMutex.Unlock()
podName := getPodName(pod)
delete(c.bindingDecisions, podName)
if _, ok := c.bindingDecisions[podName]; ok {
delete(c.bindingDecisions, podName)
VolumeBindingRequestSchedulerBinderCache.WithLabelValues("delete").Inc()
}
}
func (c *podBindingCache) UpdateBindings(pod *v1.Pod, node string, bindings []*bindingInfo) {
@ -95,6 +99,7 @@ func (c *podBindingCache) UpdateBindings(pod *v1.Pod, node string, bindings []*b
decision = nodeDecision{
bindings: bindings,
}
VolumeBindingRequestSchedulerBinderCache.WithLabelValues("add").Inc()
} else {
decision.bindings = bindings
}

View File

@ -9,7 +9,10 @@ go_library(
name = "go_default_library",
srcs = ["metrics.go"],
importpath = "k8s.io/kubernetes/pkg/scheduler/metrics",
deps = ["//vendor/github.com/prometheus/client_golang/prometheus:go_default_library"],
deps = [
"//pkg/controller/volume/persistentvolume:go_default_library",
"//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
],
)
filegroup(

View File

@ -21,6 +21,7 @@ import (
"time"
"github.com/prometheus/client_golang/prometheus"
"k8s.io/kubernetes/pkg/controller/volume/persistentvolume"
)
const (
@ -171,6 +172,8 @@ func Register() {
for _, metric := range metricsList {
prometheus.MustRegister(metric)
}
persistentvolume.RegisterVolumeSchedulingMetrics()
})
}