Add fake runtimes and CRI changes for KEP-2371

Added new gRPC call 'ListPodSanboxMetrics' which would return additional container stats currently supported by cAdvisor, but outside the scope of /stats/summary api. Added new types to support metric exporting of prometheus, including Metric and other subfields. Added fake runtime changes associated with the CRI changes.
2025-07-30 23:15:14 +00:00 · 2022-10-12 19:08:31 +00:00 · 2022-10-12 19:08:31 +00:00 · dcc7c2f660
commit dcc7c2f660
parent 435606b109
7 changed files with 2626 additions and 404 deletions
--- a/pkg/kubelet/cri/remote/fake/fake_runtime.go
+++ b/pkg/kubelet/cri/remote/fake/fake_runtime.go
@ -336,3 +336,23 @@ func (f *RemoteRuntime) CheckpointContainer(ctx context.Context, req *kubeapi.Ch
 func (f *RemoteRuntime) GetContainerEvents(req *kubeapi.GetEventsRequest, ces kubeapi.RuntimeService_GetContainerEventsServer) error {
 	return nil
 }
+
+// ListMetricDescriptors gets the descriptors for the metrics that will be returned in ListPodSandboxMetrics.
+func (f *RemoteRuntime) ListMetricDescriptors(ctx context.Context, req *kubeapi.ListMetricDescriptorsRequest) (*kubeapi.ListMetricDescriptorsResponse, error) {
+	descs, err := f.RuntimeService.ListMetricDescriptors(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	return &kubeapi.ListMetricDescriptorsResponse{Descriptors: descs}, nil
+}
+
+// ListPodSandboxMetrics retrieves the metrics for all pod sandboxes.
+func (f *RemoteRuntime) ListPodSandboxMetrics(ctx context.Context, req *kubeapi.ListPodSandboxMetricsRequest) (*kubeapi.ListPodSandboxMetricsResponse, error) {
+	podMetrics, err := f.RuntimeService.ListPodSandboxMetrics(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	return &kubeapi.ListPodSandboxMetricsResponse{PodMetrics: podMetrics}, nil
+}
--- a/pkg/kubelet/cri/remote/remote_runtime.go
+++ b/pkg/kubelet/cri/remote/remote_runtime.go
@ -815,3 +815,33 @@ func (r *remoteRuntimeService) GetContainerEvents(containerEventsCh chan *runtim
 		}
 	}
 }
+
+// ListMetricDescriptors gets the descriptors for the metrics that will be returned in ListPodSandboxMetrics.
+func (r *remoteRuntimeService) ListMetricDescriptors(ctx context.Context) ([]*runtimeapi.MetricDescriptor, error) {
+	ctx, cancel := context.WithTimeout(ctx, r.timeout)
+	defer cancel()
+
+	resp, err := r.runtimeClient.ListMetricDescriptors(ctx, &runtimeapi.ListMetricDescriptorsRequest{})
+	if err != nil {
+		klog.ErrorS(err, "ListMetricDescriptors from runtime service failed")
+		return nil, err
+	}
+	klog.V(10).InfoS("[RemoteRuntimeService] ListMetricDescriptors Response", "stats", resp.GetDescriptors())
+
+	return resp.GetDescriptors(), nil
+}
+
+// ListPodSandboxMetrics retrieves the metrics for all pod sandboxes.
+func (r *remoteRuntimeService) ListPodSandboxMetrics(ctx context.Context) ([]*runtimeapi.PodSandboxMetrics, error) {
+	ctx, cancel := context.WithTimeout(ctx, r.timeout)
+	defer cancel()
+
+	resp, err := r.runtimeClient.ListPodSandboxMetrics(ctx, &runtimeapi.ListPodSandboxMetricsRequest{})
+	if err != nil {
+		klog.ErrorS(err, "ListPodSandboxMetrics from runtime service failed")
+		return nil, err
+	}
+	klog.V(10).InfoS("[RemoteRuntimeService] ListPodSandboxMetrics Response", "stats", resp.GetPodMetrics())
+
+	return resp.GetPodMetrics(), nil
+}
--- a/pkg/kubelet/kuberuntime/instrumented_services.go
+++ b/pkg/kubelet/kuberuntime/instrumented_services.go
@ -343,3 +343,21 @@ func (in instrumentedRuntimeService) GetContainerEvents(containerEventsCh chan *
 	recordError(operation, err)
 	return err
 }
+
+func (in instrumentedRuntimeService) ListMetricDescriptors(ctx context.Context) ([]*runtimeapi.MetricDescriptor, error) {
+	const operation = "list_metric_descriptors"
+	defer recordOperation(operation, time.Now())
+
+	out, err := in.service.ListMetricDescriptors(ctx)
+	recordError(operation, err)
+	return out, err
+}
+
+func (in instrumentedRuntimeService) ListPodSandboxMetrics(ctx context.Context) ([]*runtimeapi.PodSandboxMetrics, error) {
+	const operation = "list_podsandbox_metrics"
+	defer recordOperation(operation, time.Now())
+
+	out, err := in.service.ListPodSandboxMetrics(ctx)
+	recordError(operation, err)
+	return out, err
+}
--- a/staging/src/k8s.io/cri-api/pkg/apis/runtime/v1/api.pb.go
+++ b/staging/src/k8s.io/cri-api/pkg/apis/runtime/v1/api.pb.go
--- a/staging/src/k8s.io/cri-api/pkg/apis/runtime/v1/api.proto
+++ b/staging/src/k8s.io/cri-api/pkg/apis/runtime/v1/api.proto
@ -121,6 +121,16 @@ service RuntimeService {

    // GetContainerEvents gets container events from the CRI runtime
    rpc  GetContainerEvents(GetEventsRequest) returns (stream ContainerEventResponse) {}
+
+    // ListMetricDescriptors gets the descriptors for the metrics that will be returned in ListPodSandboxMetrics.
+    // This list should be static at startup: either the client and server restart together when
+    // adding or removing metrics descriptors, or they should not change.
+    // Put differently, if ListPodSandboxMetrics references a name that is not described in the initial
+    // ListMetricDescriptors call, then the metric will not be broadcasted.
+    rpc ListMetricDescriptors(ListMetricDescriptorsRequest) returns (ListMetricDescriptorsResponse) {}
+
+    // ListPodSandboxMetrics gets pod sandbox metrics from CRI Runtime
+    rpc ListPodSandboxMetrics(ListPodSandboxMetricsRequest) returns (ListPodSandboxMetricsResponse) {}
 }

 // ImageService defines the public APIs for managing images.
@ -1715,4 +1725,59 @@ enum ContainerEventType {

    // Container deleted
    CONTAINER_DELETED_EVENT = 3;
-}
+}
+
+message ListMetricDescriptorsRequest {}
+
+message ListMetricDescriptorsResponse {
+    repeated MetricDescriptor descriptors = 1;
+}
+
+message MetricDescriptor {
+    // The name field will be used as a unique identifier of this MetricDescriptor,
+    // and be used in conjunction with the Metric structure to populate the full Metric.
+    string name = 1;
+    string help = 2;
+    // When a metric uses this metric descriptor, it should only define
+    // labels that have previously been declared in label_keys.
+    // It is the responsibility of the runtime to correctly keep sorted the keys and values.
+    // If the two slices have different length, the behavior is undefined.
+    repeated string label_keys = 3;
+}
+
+message ListPodSandboxMetricsRequest {} 
+
+message ListPodSandboxMetricsResponse {
+    repeated PodSandboxMetrics pod_metrics = 1;
+}
+
+message PodSandboxMetrics {
+    string pod_sandbox_id = 1;
+    repeated Metric metrics = 2;
+    repeated ContainerMetrics container_metrics = 3;
+}
+
+message ContainerMetrics {
+    string container_id = 1;
+    repeated Metric metrics = 2;
+}
+
+message Metric {
+    // Name must match a name previously returned in a MetricDescriptors call,
+    // otherwise, it will be ignored.
+    string name = 1;
+    // Timestamp should be 0 if the metric was gathered live.
+    // If it was cached, the Timestamp should reflect the time it was collected.
+    int64 timestamp = 2;
+    MetricType metric_type = 3;
+    // The corresponding LabelValues to the LabelKeys defined in the MetricDescriptor.
+    // It is the responsibility of the runtime to correctly keep sorted the keys and values.
+    // If the two slices have different length, the behavior is undefined.
+    repeated string label_values = 4;
+    UInt64Value value = 5;
+}
+
+enum MetricType {
+    COUNTER = 0;
+    GAUGE = 1;
+}
--- a/staging/src/k8s.io/cri-api/pkg/apis/services.go
+++ b/staging/src/k8s.io/cri-api/pkg/apis/services.go
@ -97,6 +97,10 @@ type ContainerStatsManager interface {
 	PodSandboxStats(ctx context.Context, podSandboxID string) (*runtimeapi.PodSandboxStats, error)
 	// ListPodSandboxStats returns stats of all running pods.
 	ListPodSandboxStats(ctx context.Context, filter *runtimeapi.PodSandboxStatsFilter) ([]*runtimeapi.PodSandboxStats, error)
+	// ListMetricDescriptors gets the descriptors for the metrics that will be returned in ListPodSandboxMetrics.
+	ListMetricDescriptors(ctx context.Context) ([]*runtimeapi.MetricDescriptor, error)
+	// ListPodSandboxMetrics returns metrics of all running pods.
+	ListPodSandboxMetrics(ctx context.Context) ([]*runtimeapi.PodSandboxMetrics, error)
 }

 // RuntimeService interface should be implemented by a container runtime.
--- a/staging/src/k8s.io/cri-api/pkg/apis/testing/fake_runtime_service.go
+++ b/staging/src/k8s.io/cri-api/pkg/apis/testing/fake_runtime_service.go
@ -64,11 +64,14 @@ type FakeRuntimeService struct {
 	Called []string
 	Errors map[string][]error

-	FakeStatus          *runtimeapi.RuntimeStatus
-	Containers          map[string]*FakeContainer
-	Sandboxes           map[string]*FakePodSandbox
-	FakeContainerStats  map[string]*runtimeapi.ContainerStats
-	FakePodSandboxStats map[string]*runtimeapi.PodSandboxStats
+	FakeStatus            *runtimeapi.RuntimeStatus
+	Containers            map[string]*FakeContainer
+	Sandboxes             map[string]*FakePodSandbox
+	FakeContainerStats    map[string]*runtimeapi.ContainerStats
+	FakePodSandboxStats   map[string]*runtimeapi.PodSandboxStats
+	FakePodSandboxMetrics map[string]*runtimeapi.PodSandboxMetrics
+	FakeMetricDescriptors map[string]*runtimeapi.MetricDescriptor
+	FakeContainerMetrics  map[string]*runtimeapi.ContainerMetrics

 	ErrorOnSandboxCreate bool
 }
@ -153,12 +156,14 @@ func (r *FakeRuntimeService) popError(f string) error {
 // NewFakeRuntimeService creates a new FakeRuntimeService.
 func NewFakeRuntimeService() *FakeRuntimeService {
 	return &FakeRuntimeService{
-		Called:              make([]string, 0),
-		Errors:              make(map[string][]error),
-		Containers:          make(map[string]*FakeContainer),
-		Sandboxes:           make(map[string]*FakePodSandbox),
-		FakeContainerStats:  make(map[string]*runtimeapi.ContainerStats),
-		FakePodSandboxStats: make(map[string]*runtimeapi.PodSandboxStats),
+		Called:                make([]string, 0),
+		Errors:                make(map[string][]error),
+		Containers:            make(map[string]*FakeContainer),
+		Sandboxes:             make(map[string]*FakePodSandbox),
+		FakeContainerStats:    make(map[string]*runtimeapi.ContainerStats),
+		FakePodSandboxStats:   make(map[string]*runtimeapi.PodSandboxStats),
+		FakePodSandboxMetrics: make(map[string]*runtimeapi.PodSandboxMetrics),
+		FakeContainerMetrics:  make(map[string]*runtimeapi.ContainerMetrics),
 	}
 }

@ -713,3 +718,65 @@ func (r *FakeRuntimeService) CheckpointContainer(_ context.Context, options *run
 func (f *FakeRuntimeService) GetContainerEvents(containerEventsCh chan *runtimeapi.ContainerEventResponse) error {
 	return nil
 }
+
+// SetFakeMetricDescriptors sets the fake metrics descriptors in the FakeRuntimeService.
+func (r *FakeRuntimeService) SetFakeMetricDescriptors(descs []*runtimeapi.MetricDescriptor) {
+	r.Lock()
+	defer r.Unlock()
+
+	r.FakeMetricDescriptors = make(map[string]*runtimeapi.MetricDescriptor)
+	for _, d := range descs {
+		r.FakeMetricDescriptors[d.Name] = d
+	}
+}
+
+// ListMetricDescriptors gets the descriptors for the metrics that will be returned in ListPodSandboxMetrics.
+func (r *FakeRuntimeService) ListMetricDescriptors(_ context.Context) ([]*runtimeapi.MetricDescriptor, error) {
+	r.Lock()
+	defer r.Unlock()
+
+	r.Called = append(r.Called, "ListMetricDescriptors")
+	if err := r.popError("ListMetricDescriptors"); err != nil {
+		return nil, err
+	}
+
+	descs := make([]*runtimeapi.MetricDescriptor, 0, len(r.FakeMetricDescriptors))
+	for _, d := range r.FakeMetricDescriptors {
+		descs = append(descs, d)
+	}
+
+	return descs, nil
+}
+
+// SetFakePodSandboxMetrics sets the fake pod sandbox metrics in the FakeRuntimeService.
+func (r *FakeRuntimeService) SetFakePodSandboxMetrics(podStats []*runtimeapi.PodSandboxMetrics) {
+	r.Lock()
+	defer r.Unlock()
+
+	r.FakePodSandboxMetrics = make(map[string]*runtimeapi.PodSandboxMetrics)
+	for _, s := range podStats {
+		r.FakePodSandboxMetrics[s.PodSandboxId] = s
+	}
+}
+
+// ListPodSandboxMetrics returns the list of all pod sandbox metrics in the FakeRuntimeService.
+func (r *FakeRuntimeService) ListPodSandboxMetrics(_ context.Context) ([]*runtimeapi.PodSandboxMetrics, error) {
+	r.Lock()
+	defer r.Unlock()
+
+	r.Called = append(r.Called, "ListPodSandboxMetrics")
+	if err := r.popError("ListPodSandboxMetrics"); err != nil {
+		return nil, err
+	}
+
+	var result []*runtimeapi.PodSandboxMetrics
+	for _, sb := range r.Sandboxes {
+		s, found := r.FakePodSandboxMetrics[sb.Id]
+		if !found {
+			continue
+		}
+		result = append(result, s)
+	}
+
+	return result, nil
+}