Add fake runtimes and CRI changes for KEP-2371

Added new gRPC call 'ListPodSanboxMetrics' which would return additional
container stats currently supported by cAdvisor, but outside the scope
of /stats/summary api. Added new types to support metric exporting of
prometheus, including Metric and other subfields. Added fake runtime
changes associated with the CRI changes.
This commit is contained in:
Daniel Ye 2022-10-12 19:08:31 +00:00 committed by Peter Hunt
parent 435606b109
commit dcc7c2f660
7 changed files with 2626 additions and 404 deletions

View File

@ -336,3 +336,23 @@ func (f *RemoteRuntime) CheckpointContainer(ctx context.Context, req *kubeapi.Ch
func (f *RemoteRuntime) GetContainerEvents(req *kubeapi.GetEventsRequest, ces kubeapi.RuntimeService_GetContainerEventsServer) error {
return nil
}
// ListMetricDescriptors gets the descriptors for the metrics that will be returned in ListPodSandboxMetrics.
func (f *RemoteRuntime) ListMetricDescriptors(ctx context.Context, req *kubeapi.ListMetricDescriptorsRequest) (*kubeapi.ListMetricDescriptorsResponse, error) {
descs, err := f.RuntimeService.ListMetricDescriptors(ctx)
if err != nil {
return nil, err
}
return &kubeapi.ListMetricDescriptorsResponse{Descriptors: descs}, nil
}
// ListPodSandboxMetrics retrieves the metrics for all pod sandboxes.
func (f *RemoteRuntime) ListPodSandboxMetrics(ctx context.Context, req *kubeapi.ListPodSandboxMetricsRequest) (*kubeapi.ListPodSandboxMetricsResponse, error) {
podMetrics, err := f.RuntimeService.ListPodSandboxMetrics(ctx)
if err != nil {
return nil, err
}
return &kubeapi.ListPodSandboxMetricsResponse{PodMetrics: podMetrics}, nil
}

View File

@ -815,3 +815,33 @@ func (r *remoteRuntimeService) GetContainerEvents(containerEventsCh chan *runtim
}
}
}
// ListMetricDescriptors gets the descriptors for the metrics that will be returned in ListPodSandboxMetrics.
func (r *remoteRuntimeService) ListMetricDescriptors(ctx context.Context) ([]*runtimeapi.MetricDescriptor, error) {
ctx, cancel := context.WithTimeout(ctx, r.timeout)
defer cancel()
resp, err := r.runtimeClient.ListMetricDescriptors(ctx, &runtimeapi.ListMetricDescriptorsRequest{})
if err != nil {
klog.ErrorS(err, "ListMetricDescriptors from runtime service failed")
return nil, err
}
klog.V(10).InfoS("[RemoteRuntimeService] ListMetricDescriptors Response", "stats", resp.GetDescriptors())
return resp.GetDescriptors(), nil
}
// ListPodSandboxMetrics retrieves the metrics for all pod sandboxes.
func (r *remoteRuntimeService) ListPodSandboxMetrics(ctx context.Context) ([]*runtimeapi.PodSandboxMetrics, error) {
ctx, cancel := context.WithTimeout(ctx, r.timeout)
defer cancel()
resp, err := r.runtimeClient.ListPodSandboxMetrics(ctx, &runtimeapi.ListPodSandboxMetricsRequest{})
if err != nil {
klog.ErrorS(err, "ListPodSandboxMetrics from runtime service failed")
return nil, err
}
klog.V(10).InfoS("[RemoteRuntimeService] ListPodSandboxMetrics Response", "stats", resp.GetPodMetrics())
return resp.GetPodMetrics(), nil
}

View File

@ -343,3 +343,21 @@ func (in instrumentedRuntimeService) GetContainerEvents(containerEventsCh chan *
recordError(operation, err)
return err
}
func (in instrumentedRuntimeService) ListMetricDescriptors(ctx context.Context) ([]*runtimeapi.MetricDescriptor, error) {
const operation = "list_metric_descriptors"
defer recordOperation(operation, time.Now())
out, err := in.service.ListMetricDescriptors(ctx)
recordError(operation, err)
return out, err
}
func (in instrumentedRuntimeService) ListPodSandboxMetrics(ctx context.Context) ([]*runtimeapi.PodSandboxMetrics, error) {
const operation = "list_podsandbox_metrics"
defer recordOperation(operation, time.Now())
out, err := in.service.ListPodSandboxMetrics(ctx)
recordError(operation, err)
return out, err
}

File diff suppressed because it is too large Load Diff

View File

@ -121,6 +121,16 @@ service RuntimeService {
// GetContainerEvents gets container events from the CRI runtime
rpc GetContainerEvents(GetEventsRequest) returns (stream ContainerEventResponse) {}
// ListMetricDescriptors gets the descriptors for the metrics that will be returned in ListPodSandboxMetrics.
// This list should be static at startup: either the client and server restart together when
// adding or removing metrics descriptors, or they should not change.
// Put differently, if ListPodSandboxMetrics references a name that is not described in the initial
// ListMetricDescriptors call, then the metric will not be broadcasted.
rpc ListMetricDescriptors(ListMetricDescriptorsRequest) returns (ListMetricDescriptorsResponse) {}
// ListPodSandboxMetrics gets pod sandbox metrics from CRI Runtime
rpc ListPodSandboxMetrics(ListPodSandboxMetricsRequest) returns (ListPodSandboxMetricsResponse) {}
}
// ImageService defines the public APIs for managing images.
@ -1715,4 +1725,59 @@ enum ContainerEventType {
// Container deleted
CONTAINER_DELETED_EVENT = 3;
}
}
message ListMetricDescriptorsRequest {}
message ListMetricDescriptorsResponse {
repeated MetricDescriptor descriptors = 1;
}
message MetricDescriptor {
// The name field will be used as a unique identifier of this MetricDescriptor,
// and be used in conjunction with the Metric structure to populate the full Metric.
string name = 1;
string help = 2;
// When a metric uses this metric descriptor, it should only define
// labels that have previously been declared in label_keys.
// It is the responsibility of the runtime to correctly keep sorted the keys and values.
// If the two slices have different length, the behavior is undefined.
repeated string label_keys = 3;
}
message ListPodSandboxMetricsRequest {}
message ListPodSandboxMetricsResponse {
repeated PodSandboxMetrics pod_metrics = 1;
}
message PodSandboxMetrics {
string pod_sandbox_id = 1;
repeated Metric metrics = 2;
repeated ContainerMetrics container_metrics = 3;
}
message ContainerMetrics {
string container_id = 1;
repeated Metric metrics = 2;
}
message Metric {
// Name must match a name previously returned in a MetricDescriptors call,
// otherwise, it will be ignored.
string name = 1;
// Timestamp should be 0 if the metric was gathered live.
// If it was cached, the Timestamp should reflect the time it was collected.
int64 timestamp = 2;
MetricType metric_type = 3;
// The corresponding LabelValues to the LabelKeys defined in the MetricDescriptor.
// It is the responsibility of the runtime to correctly keep sorted the keys and values.
// If the two slices have different length, the behavior is undefined.
repeated string label_values = 4;
UInt64Value value = 5;
}
enum MetricType {
COUNTER = 0;
GAUGE = 1;
}

View File

@ -97,6 +97,10 @@ type ContainerStatsManager interface {
PodSandboxStats(ctx context.Context, podSandboxID string) (*runtimeapi.PodSandboxStats, error)
// ListPodSandboxStats returns stats of all running pods.
ListPodSandboxStats(ctx context.Context, filter *runtimeapi.PodSandboxStatsFilter) ([]*runtimeapi.PodSandboxStats, error)
// ListMetricDescriptors gets the descriptors for the metrics that will be returned in ListPodSandboxMetrics.
ListMetricDescriptors(ctx context.Context) ([]*runtimeapi.MetricDescriptor, error)
// ListPodSandboxMetrics returns metrics of all running pods.
ListPodSandboxMetrics(ctx context.Context) ([]*runtimeapi.PodSandboxMetrics, error)
}
// RuntimeService interface should be implemented by a container runtime.

View File

@ -64,11 +64,14 @@ type FakeRuntimeService struct {
Called []string
Errors map[string][]error
FakeStatus *runtimeapi.RuntimeStatus
Containers map[string]*FakeContainer
Sandboxes map[string]*FakePodSandbox
FakeContainerStats map[string]*runtimeapi.ContainerStats
FakePodSandboxStats map[string]*runtimeapi.PodSandboxStats
FakeStatus *runtimeapi.RuntimeStatus
Containers map[string]*FakeContainer
Sandboxes map[string]*FakePodSandbox
FakeContainerStats map[string]*runtimeapi.ContainerStats
FakePodSandboxStats map[string]*runtimeapi.PodSandboxStats
FakePodSandboxMetrics map[string]*runtimeapi.PodSandboxMetrics
FakeMetricDescriptors map[string]*runtimeapi.MetricDescriptor
FakeContainerMetrics map[string]*runtimeapi.ContainerMetrics
ErrorOnSandboxCreate bool
}
@ -153,12 +156,14 @@ func (r *FakeRuntimeService) popError(f string) error {
// NewFakeRuntimeService creates a new FakeRuntimeService.
func NewFakeRuntimeService() *FakeRuntimeService {
return &FakeRuntimeService{
Called: make([]string, 0),
Errors: make(map[string][]error),
Containers: make(map[string]*FakeContainer),
Sandboxes: make(map[string]*FakePodSandbox),
FakeContainerStats: make(map[string]*runtimeapi.ContainerStats),
FakePodSandboxStats: make(map[string]*runtimeapi.PodSandboxStats),
Called: make([]string, 0),
Errors: make(map[string][]error),
Containers: make(map[string]*FakeContainer),
Sandboxes: make(map[string]*FakePodSandbox),
FakeContainerStats: make(map[string]*runtimeapi.ContainerStats),
FakePodSandboxStats: make(map[string]*runtimeapi.PodSandboxStats),
FakePodSandboxMetrics: make(map[string]*runtimeapi.PodSandboxMetrics),
FakeContainerMetrics: make(map[string]*runtimeapi.ContainerMetrics),
}
}
@ -713,3 +718,65 @@ func (r *FakeRuntimeService) CheckpointContainer(_ context.Context, options *run
func (f *FakeRuntimeService) GetContainerEvents(containerEventsCh chan *runtimeapi.ContainerEventResponse) error {
return nil
}
// SetFakeMetricDescriptors sets the fake metrics descriptors in the FakeRuntimeService.
func (r *FakeRuntimeService) SetFakeMetricDescriptors(descs []*runtimeapi.MetricDescriptor) {
r.Lock()
defer r.Unlock()
r.FakeMetricDescriptors = make(map[string]*runtimeapi.MetricDescriptor)
for _, d := range descs {
r.FakeMetricDescriptors[d.Name] = d
}
}
// ListMetricDescriptors gets the descriptors for the metrics that will be returned in ListPodSandboxMetrics.
func (r *FakeRuntimeService) ListMetricDescriptors(_ context.Context) ([]*runtimeapi.MetricDescriptor, error) {
r.Lock()
defer r.Unlock()
r.Called = append(r.Called, "ListMetricDescriptors")
if err := r.popError("ListMetricDescriptors"); err != nil {
return nil, err
}
descs := make([]*runtimeapi.MetricDescriptor, 0, len(r.FakeMetricDescriptors))
for _, d := range r.FakeMetricDescriptors {
descs = append(descs, d)
}
return descs, nil
}
// SetFakePodSandboxMetrics sets the fake pod sandbox metrics in the FakeRuntimeService.
func (r *FakeRuntimeService) SetFakePodSandboxMetrics(podStats []*runtimeapi.PodSandboxMetrics) {
r.Lock()
defer r.Unlock()
r.FakePodSandboxMetrics = make(map[string]*runtimeapi.PodSandboxMetrics)
for _, s := range podStats {
r.FakePodSandboxMetrics[s.PodSandboxId] = s
}
}
// ListPodSandboxMetrics returns the list of all pod sandbox metrics in the FakeRuntimeService.
func (r *FakeRuntimeService) ListPodSandboxMetrics(_ context.Context) ([]*runtimeapi.PodSandboxMetrics, error) {
r.Lock()
defer r.Unlock()
r.Called = append(r.Called, "ListPodSandboxMetrics")
if err := r.popError("ListPodSandboxMetrics"); err != nil {
return nil, err
}
var result []*runtimeapi.PodSandboxMetrics
for _, sb := range r.Sandboxes {
s, found := r.FakePodSandboxMetrics[sb.Id]
if !found {
continue
}
result = append(result, s)
}
return result, nil
}