Merge pull request #102789 from haircommander/add-summary-stats-to-cri

CRI: add fields for pod level stats to satisfy the /stats/summary API
This commit is contained in:
Kubernetes Prow Robot 2021-08-04 18:59:43 -07:00 committed by GitHub
commit 4b2f2a0cd8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 9360 additions and 845 deletions

View File

@ -262,6 +262,27 @@ func (f *RemoteRuntime) ListContainerStats(ctx context.Context, req *kubeapi.Lis
return &kubeapi.ListContainerStatsResponse{Stats: stats}, nil
}
// PodSandboxStats returns stats of the pod. If the pod does not
// exist, the call returns an error.
func (f *RemoteRuntime) PodSandboxStats(ctx context.Context, req *kubeapi.PodSandboxStatsRequest) (*kubeapi.PodSandboxStatsResponse, error) {
stats, err := f.RuntimeService.PodSandboxStats(req.PodSandboxId)
if err != nil {
return nil, err
}
return &kubeapi.PodSandboxStatsResponse{Stats: stats}, nil
}
// ListPodSandboxStats returns stats of all running pods.
func (f *RemoteRuntime) ListPodSandboxStats(ctx context.Context, req *kubeapi.ListPodSandboxStatsRequest) (*kubeapi.ListPodSandboxStatsResponse, error) {
stats, err := f.RuntimeService.ListPodSandboxStats(req.Filter)
if err != nil {
return nil, err
}
return &kubeapi.ListPodSandboxStatsResponse{Stats: stats}, nil
}
// UpdateRuntimeConfig updates the runtime configuration based on the given request.
func (f *RemoteRuntime) UpdateRuntimeConfig(ctx context.Context, req *kubeapi.UpdateRuntimeConfigRequest) (*kubeapi.UpdateRuntimeConfigResponse, error) {
err := f.RuntimeService.UpdateRuntimeConfig(req.RuntimeConfig)

View File

@ -569,6 +569,46 @@ func (r *remoteRuntimeService) ListContainerStats(filter *runtimeapi.ContainerSt
return resp.GetStats(), nil
}
// PodSandboxStats returns the stats of the pod.
func (r *remoteRuntimeService) PodSandboxStats(podSandboxID string) (*runtimeapi.PodSandboxStats, error) {
klog.V(10).InfoS("[RemoteRuntimeService] PodSandboxStats", "podSandboxID", podSandboxID, "timeout", r.timeout)
ctx, cancel := getContextWithTimeout(r.timeout)
defer cancel()
resp, err := r.runtimeClient.PodSandboxStats(ctx, &runtimeapi.PodSandboxStatsRequest{
PodSandboxId: podSandboxID,
})
if err != nil {
if r.logReduction.ShouldMessageBePrinted(err.Error(), podSandboxID) {
klog.ErrorS(err, "PodSandbox from runtime service failed", "podSandboxID", podSandboxID)
}
return nil, err
}
r.logReduction.ClearID(podSandboxID)
klog.V(10).InfoS("[RemoteRuntimeService] PodSandbox Response", "podSandboxID", podSandboxID, "stats", resp.GetStats())
return resp.GetStats(), nil
}
// ListPodSandboxStats returns the list of pod sandbox stats given the filter
func (r *remoteRuntimeService) ListPodSandboxStats(filter *runtimeapi.PodSandboxStatsFilter) ([]*runtimeapi.PodSandboxStats, error) {
klog.V(10).InfoS("[RemoteRuntimeService] ListPodSandboxStats", "filter", filter)
// Set timeout, because runtimes are able to cache disk stats results
ctx, cancel := getContextWithTimeout(r.timeout)
defer cancel()
resp, err := r.runtimeClient.ListPodSandboxStats(ctx, &runtimeapi.ListPodSandboxStatsRequest{
Filter: filter,
})
if err != nil {
klog.ErrorS(err, "ListPodSandboxStats with filter from runtime service failed", "filter", filter)
return nil, err
}
klog.V(10).InfoS("[RemoteRuntimeService] ListPodSandboxStats Response", "filter", filter, "stats", resp.GetStats())
return resp.GetStats(), nil
}
// ReopenContainerLog reopens the container log file.
func (r *remoteRuntimeService) ReopenContainerLog(containerID string) error {
klog.V(10).InfoS("[RemoteRuntimeService] ReopenContainerLog", "containerID", containerID, "timeout", r.timeout)

View File

@ -20,10 +20,13 @@ package dockershim
import (
"context"
"errors"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
)
var ErrNotImplemented = errors.New("Not implemented")
// ContainerStats returns stats for a container stats request based on container id.
func (ds *dockerService) ContainerStats(_ context.Context, r *runtimeapi.ContainerStatsRequest) (*runtimeapi.ContainerStatsResponse, error) {
stats, err := ds.getContainerStats(r.ContainerId)
@ -62,3 +65,15 @@ func (ds *dockerService) ListContainerStats(ctx context.Context, r *runtimeapi.L
return &runtimeapi.ListContainerStatsResponse{Stats: stats}, nil
}
// PodSandboxStats returns stats for a pod sandbox based on pod sandbox id.
// This function is not implemented for the dockershim.
func (ds *dockerService) PodSandboxStats(_ context.Context, r *runtimeapi.PodSandboxStatsRequest) (*runtimeapi.PodSandboxStatsResponse, error) {
return nil, ErrNotImplemented
}
// ListPodSandboxStats returns stats for a list of pod sandboxes based on a filter.
// This function is not implemented for the dockershim.
func (ds *dockerService) ListPodSandboxStats(ctx context.Context, r *runtimeapi.ListPodSandboxStatsRequest) (*runtimeapi.ListPodSandboxStatsResponse, error) {
return nil, ErrNotImplemented
}

View File

@ -244,6 +244,24 @@ func (in instrumentedRuntimeService) ListContainerStats(filter *runtimeapi.Conta
return out, err
}
func (in instrumentedRuntimeService) PodSandboxStats(podSandboxID string) (*runtimeapi.PodSandboxStats, error) {
const operation = "podsandbox_stats"
defer recordOperation(operation, time.Now())
out, err := in.service.PodSandboxStats(podSandboxID)
recordError(operation, err)
return out, err
}
func (in instrumentedRuntimeService) ListPodSandboxStats(filter *runtimeapi.PodSandboxStatsFilter) ([]*runtimeapi.PodSandboxStats, error) {
const operation = "list_podsandbox_stats"
defer recordOperation(operation, time.Now())
out, err := in.service.ListPodSandboxStats(filter)
recordError(operation, err)
return out, err
}
func (in instrumentedRuntimeService) PortForward(req *runtimeapi.PortForwardRequest) (*runtimeapi.PortForwardResponse, error) {
const operation = "port_forward"
defer recordOperation(operation, time.Now())

File diff suppressed because it is too large Load Diff

View File

@ -103,6 +103,12 @@ service RuntimeService {
// ListContainerStats returns stats of all running containers.
rpc ListContainerStats(ListContainerStatsRequest) returns (ListContainerStatsResponse) {}
// PodSandboxStats returns stats of the pod. If the pod sandbox does not
// exist, the call returns an error.
rpc PodSandboxStats(PodSandboxStatsRequest) returns (PodSandboxStatsResponse) {}
// ListPodSandboxStats returns stats of the pods matching a filter.
rpc ListPodSandboxStats(ListPodSandboxStatsRequest) returns (ListPodSandboxStatsResponse) {}
// UpdateRuntimeConfig updates the runtime configuration based on the given request.
rpc UpdateRuntimeConfig(UpdateRuntimeConfigRequest) returns (UpdateRuntimeConfigResponse) {}
@ -545,6 +551,113 @@ message ListPodSandboxResponse {
repeated PodSandbox items = 1;
}
message PodSandboxStatsRequest {
// ID of the pod sandbox for which to retrieve stats.
string pod_sandbox_id = 1;
}
message PodSandboxStatsResponse {
PodSandboxStats stats = 1;
}
// PodSandboxStatsFilter is used to filter the list of pod sandboxes to retrieve stats for.
// All those fields are combined with 'AND'.
message PodSandboxStatsFilter {
// ID of the pod sandbox.
string id = 1;
// LabelSelector to select matches.
// Only api.MatchLabels is supported for now and the requirements
// are ANDed. MatchExpressions is not supported yet.
map<string, string> label_selector = 2;
}
message ListPodSandboxStatsRequest {
// Filter for the list request.
PodSandboxStatsFilter filter = 1;
}
message ListPodSandboxStatsResponse {
// Stats of the pod sandbox.
repeated PodSandboxStats stats = 1;
}
// PodSandboxAttributes provides basic information of the pod sandbox.
message PodSandboxAttributes {
// ID of the pod.
string id = 1;
// Metadata of the pod.
PodSandboxMetadata metadata = 2;
// Key-value pairs that may be used to scope and select individual resources.
map<string,string> labels = 3;
// Unstructured key-value map holding arbitrary metadata.
// Annotations MUST NOT be altered by the runtime; the value of this field
// MUST be identical to that of the corresponding PodSandboxStatus used to
// instantiate the PodSandbox this status represents.
map<string,string> annotations = 4;
}
// PodSandboxStats provides the resource usage statistics for a pod.
// The linux or windows field will be populated depending on the platform.
message PodSandboxStats {
// Information of the pod.
PodSandboxAttributes attributes = 1;
// Stats from linux.
LinuxPodSandboxStats linux = 2;
// Stats from windows.
WindowsPodSandboxStats windows = 3;
}
// LinuxPodSandboxStats provides the resource usage statistics for a pod sandbox on linux.
message LinuxPodSandboxStats {
// CPU usage gathered for the pod sandbox.
CpuUsage cpu = 1;
// Memory usage gathered for the pod sandbox.
MemoryUsage memory = 2;
// Network usage gathered for the pod sandbox
NetworkUsage network = 3;
// Stats pertaining to processes in the pod sandbox.
ProcessUsage process = 4;
// Stats of containers in the measured pod sandbox.
repeated ContainerStats containers = 5;
}
// WindowsPodSandboxStats provides the resource usage statistics for a pod sandbox on windows
message WindowsPodSandboxStats {
// TODO: Add stats relevant to windows.
}
// NetworkUsage contains data about network resources.
message NetworkUsage {
// The time at which these stats were updated.
int64 timestamp = 1;
// Stats for the default network interface.
NetworkInterfaceUsage default_interface = 2;
// Stats for all found network interfaces, excluding the default.
repeated NetworkInterfaceUsage interfaces = 3;
}
// NetworkInterfaceUsage contains resource value data about a network interface.
message NetworkInterfaceUsage {
// The name of the network interface.
string name = 1;
// Cumulative count of bytes received.
UInt64Value rx_bytes = 2;
// Cumulative count of receive errors encountered.
UInt64Value rx_errors = 3;
// Cumulative count of bytes transmitted.
UInt64Value tx_bytes = 4;
// Cumulative count of transmit errors encountered.
UInt64Value tx_errors = 5;
}
// ProcessUsage are stats pertaining to processes.
message ProcessUsage {
// The time at which these stats were updated.
int64 timestamp = 1;
// Number of processes.
UInt64Value process_count = 2;
}
// ImageSpec is an internal representation of an image.
message ImageSpec {
// Container's Image field (e.g. imageID or imageDigest).
@ -1351,6 +1464,9 @@ message CpuUsage {
int64 timestamp = 1;
// Cumulative CPU usage (sum across all cores) since object creation.
UInt64Value usage_core_nano_seconds = 2;
// Total CPU usage (sum of all cores) averaged over the sample window.
// The "core" unit can be interpreted as CPU core-nanoseconds per second.
UInt64Value usage_nano_cores = 3;
}
// MemoryUsage provides the memory usage information.
@ -1359,6 +1475,16 @@ message MemoryUsage {
int64 timestamp = 1;
// The amount of working set memory in bytes.
UInt64Value working_set_bytes = 2;
// Available memory for use. This is defined as the memory limit - workingSetBytes.
UInt64Value available_bytes = 3;
// Total memory in use. This includes all memory regardless of when it was accessed.
UInt64Value usage_bytes = 4;
// The amount of anonymous and swap cache memory (includes transparent hugepages).
UInt64Value rss_bytes = 5;
// Cumulative number of minor page faults.
UInt64Value page_faults = 6;
// Cumulative number of major page faults.
UInt64Value major_page_faults = 7;
}
message ReopenContainerLogRequest {

File diff suppressed because it is too large Load Diff

View File

@ -102,6 +102,12 @@ service RuntimeService {
// ListContainerStats returns stats of all running containers.
rpc ListContainerStats(ListContainerStatsRequest) returns (ListContainerStatsResponse) {}
// PodSandboxStats returns stats of the pod sandbox. If the pod sandbox does not
// exist, the call returns an error.
rpc PodSandboxStats(PodSandboxStatsRequest) returns (PodSandboxStatsResponse) {}
// ListPodSandboxStats returns stats of the pod sandboxes matching a filter.
rpc ListPodSandboxStats(ListPodSandboxStatsRequest) returns (ListPodSandboxStatsResponse) {}
// UpdateRuntimeConfig updates the runtime configuration based on the given request.
rpc UpdateRuntimeConfig(UpdateRuntimeConfigRequest) returns (UpdateRuntimeConfigResponse) {}
@ -549,6 +555,113 @@ message ListPodSandboxResponse {
repeated PodSandbox items = 1;
}
message PodSandboxStatsRequest {
// ID of the pod sandbox for which to retrieve stats.
string pod_sandbox_id = 1;
}
message PodSandboxStatsResponse {
PodSandboxStats stats = 1;
}
// PodSandboxStatsFilter is used to filter pod sandboxes.
// All those fields are combined with 'AND'.
message PodSandboxStatsFilter {
// ID of the pod sandbox.
string id = 1;
// LabelSelector to select matches.
// Only api.MatchLabels is supported for now and the requirements
// are ANDed. MatchExpressions is not supported yet.
map<string, string> label_selector = 2;
}
message ListPodSandboxStatsRequest {
// Filter for the list request.
PodSandboxStatsFilter filter = 1;
}
message ListPodSandboxStatsResponse {
// Stats of the pod sandbox.
repeated PodSandboxStats stats = 1;
}
// PodSandboxAttributes provides basic information of the pod sandbox.
message PodSandboxAttributes {
// ID of the pod sandbox.
string id = 1;
// Metadata of the pod sandbox.
PodSandboxMetadata metadata = 2;
// Key-value pairs that may be used to scope and select individual resources.
map<string,string> labels = 3;
// Unstructured key-value map holding arbitrary metadata.
// Annotations MUST NOT be altered by the runtime; the value of this field
// MUST be identical to that of the corresponding PodSandboxStatus used to
// instantiate the PodSandbox this status represents.
map<string,string> annotations = 4;
}
// PodSandboxStats provides the resource usage statistics for a pod.
// The linux or windows field will be populated depending on the platform.
message PodSandboxStats {
// Information of the pod.
PodSandboxAttributes attributes = 1;
// Stats from linux.
LinuxPodSandboxStats linux = 2;
// Stats from windows.
WindowsPodSandboxStats windows = 3;
}
// LinuxPodSandboxStats provides the resource usage statistics for a pod sandbox on linux.
message LinuxPodSandboxStats {
// CPU usage gathered for the pod sandbox.
CpuUsage cpu = 1;
// Memory usage gathered for the pod sandbox.
MemoryUsage memory = 2;
// Network usage gathered for the pod sandbox
NetworkUsage network = 3;
// Stats pertaining to processes in the pod sandbox.
ProcessUsage process = 4;
// Stats of containers in the measured pod sandbox.
repeated ContainerStats containers = 5;
}
// WindowsPodSandboxStats provides the resource usage statistics for a pod sandbox on windows
message WindowsPodSandboxStats {
// TODO: Add stats relevant to windows.
}
// NetworkUsage contains data about network resources.
message NetworkUsage {
// The time at which these stats were updated.
int64 timestamp = 1;
// Stats for the default network interface.
NetworkInterfaceUsage default_interface = 2;
// Stats for all found network interfaces, excluding the default.
repeated NetworkInterfaceUsage interfaces = 3;
}
// NetworkInterfaceUsage contains resource value data about a network interface.
message NetworkInterfaceUsage {
// The name of the network interface.
string name = 1;
// Cumulative count of bytes received.
UInt64Value rx_bytes = 2;
// Cumulative count of receive errors encountered.
UInt64Value rx_errors = 3;
// Cumulative count of bytes transmitted.
UInt64Value tx_bytes = 4;
// Cumulative count of transmit errors encountered.
UInt64Value tx_errors = 5;
}
// ProcessUsage are stats pertaining to processes.
message ProcessUsage {
// The time at which these stats were updated.
int64 timestamp = 1;
// Number of processes.
UInt64Value process_count = 2;
}
// ImageSpec is an internal representation of an image.
message ImageSpec {
// Container's Image field (e.g. imageID or imageDigest).
@ -1359,6 +1472,9 @@ message CpuUsage {
int64 timestamp = 1;
// Cumulative CPU usage (sum across all cores) since object creation.
UInt64Value usage_core_nano_seconds = 2;
// Total CPU usage (sum of all cores) averaged over the sample window.
// The "core" unit can be interpreted as CPU core-nanoseconds per second.
UInt64Value usage_nano_cores = 3;
}
// MemoryUsage provides the memory usage information.
@ -1367,6 +1483,16 @@ message MemoryUsage {
int64 timestamp = 1;
// The amount of working set memory in bytes.
UInt64Value working_set_bytes = 2;
// Available memory for use. This is defined as the memory limit = workingSetBytes.
UInt64Value available_bytes = 3;
// Total memory in use. This includes all memory regardless of when it was accessed.
UInt64Value usage_bytes = 4;
// The amount of anonymous and swap cache memory (includes transparent hugepages).
UInt64Value rss_bytes = 5;
// Cumulative number of minor page faults.
UInt64Value page_faults = 6;
// Cumulative number of major page faults.
UInt64Value major_page_faults = 7;
}
message ReopenContainerLogRequest {

View File

@ -86,6 +86,11 @@ type ContainerStatsManager interface {
ContainerStats(containerID string) (*runtimeapi.ContainerStats, error)
// ListContainerStats returns stats of all running containers.
ListContainerStats(filter *runtimeapi.ContainerStatsFilter) ([]*runtimeapi.ContainerStats, error)
// PodSandboxStats returns stats of the pod. If the pod does not
// exist, the call returns an error.
PodSandboxStats(podSandboxID string) (*runtimeapi.PodSandboxStats, error)
// ListPodSandboxStats returns stats of all running pods.
ListPodSandboxStats(filter *runtimeapi.PodSandboxStatsFilter) ([]*runtimeapi.PodSandboxStats, error)
}
// RuntimeService interface should be implemented by a container runtime.

View File

@ -63,10 +63,11 @@ type FakeRuntimeService struct {
Called []string
Errors map[string][]error
FakeStatus *runtimeapi.RuntimeStatus
Containers map[string]*FakeContainer
Sandboxes map[string]*FakePodSandbox
FakeContainerStats map[string]*runtimeapi.ContainerStats
FakeStatus *runtimeapi.RuntimeStatus
Containers map[string]*FakeContainer
Sandboxes map[string]*FakePodSandbox
FakeContainerStats map[string]*runtimeapi.ContainerStats
FakePodSandboxStats map[string]*runtimeapi.PodSandboxStats
ErrorOnSandboxCreate bool
}
@ -151,11 +152,12 @@ func (r *FakeRuntimeService) popError(f string) error {
// NewFakeRuntimeService creates a new FakeRuntimeService.
func NewFakeRuntimeService() *FakeRuntimeService {
return &FakeRuntimeService{
Called: make([]string, 0),
Errors: make(map[string][]error),
Containers: make(map[string]*FakeContainer),
Sandboxes: make(map[string]*FakePodSandbox),
FakeContainerStats: make(map[string]*runtimeapi.ContainerStats),
Called: make([]string, 0),
Errors: make(map[string][]error),
Containers: make(map[string]*FakeContainer),
Sandboxes: make(map[string]*FakePodSandbox),
FakeContainerStats: make(map[string]*runtimeapi.ContainerStats),
FakePodSandboxStats: make(map[string]*runtimeapi.PodSandboxStats),
}
}
@ -621,6 +623,64 @@ func (r *FakeRuntimeService) ListContainerStats(filter *runtimeapi.ContainerStat
return result, nil
}
// SetFakePodSandboxStats sets the fake pod sandbox stats in the FakeRuntimeService.
func (r *FakeRuntimeService) SetFakePodSandboxStats(podStats []*runtimeapi.PodSandboxStats) {
r.Lock()
defer r.Unlock()
r.FakePodSandboxStats = make(map[string]*runtimeapi.PodSandboxStats)
for _, s := range podStats {
r.FakePodSandboxStats[s.Attributes.Id] = s
}
}
// PodSandboxStats returns the sandbox stats in the FakeRuntimeService.
func (r *FakeRuntimeService) PodSandboxStats(podSandboxID string) (*runtimeapi.PodSandboxStats, error) {
r.Lock()
defer r.Unlock()
r.Called = append(r.Called, "PodSandboxStats")
if err := r.popError("PodSandboxStats"); err != nil {
return nil, err
}
s, found := r.FakePodSandboxStats[podSandboxID]
if !found {
return nil, fmt.Errorf("no stats for pod sandbox %q", podSandboxID)
}
return s, nil
}
// ListPodSandboxStats returns the list of all pod sandbox stats given the filter in the FakeRuntimeService.
func (r *FakeRuntimeService) ListPodSandboxStats(filter *runtimeapi.PodSandboxStatsFilter) ([]*runtimeapi.PodSandboxStats, error) {
r.Lock()
defer r.Unlock()
r.Called = append(r.Called, "ListPodSandboxStats")
if err := r.popError("ListPodSandboxStats"); err != nil {
return nil, err
}
var result []*runtimeapi.PodSandboxStats
for _, sb := range r.Sandboxes {
if filter != nil {
if filter.Id != "" && filter.Id != sb.Id {
continue
}
if filter.LabelSelector != nil && !filterInLabels(filter.LabelSelector, sb.GetLabels()) {
continue
}
}
s, found := r.FakePodSandboxStats[sb.Id]
if !found {
continue
}
result = append(result, s)
}
return result, nil
}
// ReopenContainerLog emulates call to the reopen container log in the FakeRuntimeService.
func (r *FakeRuntimeService) ReopenContainerLog(containerID string) error {
r.Lock()