Merge pull request #90477 from rphillips/feat/add_tracing_to_remote_runtime

kubelet: add remote_runtime tracing at log level 10
This commit is contained in:
Kubernetes Prow Robot 2020-05-14 00:46:22 -07:00 committed by GitHub
commit dc1934e078
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -71,6 +71,8 @@ func NewRemoteRuntimeService(endpoint string, connectionTimeout time.Duration) (
// Version returns the runtime name, runtime version and runtime API version.
func (r *RemoteRuntimeService) Version(apiVersion string) (*runtimeapi.VersionResponse, error) {
klog.V(10).Infof("[RemoteRuntimeService] Version (apiVersion=%v, timeout=%v)", apiVersion, r.timeout)
ctx, cancel := getContextWithTimeout(r.timeout)
defer cancel()
@ -82,6 +84,8 @@ func (r *RemoteRuntimeService) Version(apiVersion string) (*runtimeapi.VersionRe
return nil, err
}
klog.V(10).Infof("[RemoteRuntimeService] Version Response (typedVersion=%v)", typedVersion)
if typedVersion.Version == "" || typedVersion.RuntimeName == "" || typedVersion.RuntimeApiVersion == "" || typedVersion.RuntimeVersion == "" {
return nil, fmt.Errorf("not all fields are set in VersionResponse (%q)", *typedVersion)
}
@ -94,7 +98,11 @@ func (r *RemoteRuntimeService) Version(apiVersion string) (*runtimeapi.VersionRe
func (r *RemoteRuntimeService) RunPodSandbox(config *runtimeapi.PodSandboxConfig, runtimeHandler string) (string, error) {
// Use 2 times longer timeout for sandbox operation (4 mins by default)
// TODO: Make the pod sandbox timeout configurable.
ctx, cancel := getContextWithTimeout(r.timeout * 2)
timeout := r.timeout * 2
klog.V(10).Infof("[RemoteRuntimeService] RunPodSandbox (config=%v, runtimeHandler=%v, timeout=%v)", config, runtimeHandler, timeout)
ctx, cancel := getContextWithTimeout(timeout)
defer cancel()
resp, err := r.runtimeClient.RunPodSandbox(ctx, &runtimeapi.RunPodSandboxRequest{
@ -112,12 +120,16 @@ func (r *RemoteRuntimeService) RunPodSandbox(config *runtimeapi.PodSandboxConfig
return "", errors.New(errorMessage)
}
klog.V(10).Infof("[RemoteRuntimeService] RunPodSandbox Response (PodSandboxId=%v)", resp.PodSandboxId)
return resp.PodSandboxId, nil
}
// StopPodSandbox stops the sandbox. If there are any running containers in the
// sandbox, they should be forced to termination.
func (r *RemoteRuntimeService) StopPodSandbox(podSandBoxID string) error {
klog.V(10).Infof("[RemoteRuntimeService] StopPodSandbox (podSandboxID=%v, timeout=%v)", podSandBoxID, r.timeout)
ctx, cancel := getContextWithTimeout(r.timeout)
defer cancel()
@ -129,12 +141,15 @@ func (r *RemoteRuntimeService) StopPodSandbox(podSandBoxID string) error {
return err
}
klog.V(10).Infof("[RemoteRuntimeService] StopPodSandbox Response (podSandboxID=%v)", podSandBoxID)
return nil
}
// RemovePodSandbox removes the sandbox. If there are any containers in the
// sandbox, they should be forcibly removed.
func (r *RemoteRuntimeService) RemovePodSandbox(podSandBoxID string) error {
klog.V(10).Infof("[RemoteRuntimeService] RemovePodSandbox (podSandboxID=%v, timeout=%v)", podSandBoxID, r.timeout)
ctx, cancel := getContextWithTimeout(r.timeout)
defer cancel()
@ -146,11 +161,14 @@ func (r *RemoteRuntimeService) RemovePodSandbox(podSandBoxID string) error {
return err
}
klog.V(10).Infof("[RemoteRuntimeService] RemovePodSandbox Response (podSandboxID=%v)", podSandBoxID)
return nil
}
// PodSandboxStatus returns the status of the PodSandbox.
func (r *RemoteRuntimeService) PodSandboxStatus(podSandBoxID string) (*runtimeapi.PodSandboxStatus, error) {
klog.V(10).Infof("[RemoteRuntimeService] PodSandboxStatus (podSandboxID=%v, timeout=%v)", podSandBoxID, r.timeout)
ctx, cancel := getContextWithTimeout(r.timeout)
defer cancel()
@ -161,6 +179,8 @@ func (r *RemoteRuntimeService) PodSandboxStatus(podSandBoxID string) (*runtimeap
return nil, err
}
klog.V(10).Infof("[RemoteRuntimeService] PodSandboxStatus Response (podSandboxID=%v, status=%v)", podSandBoxID, resp.Status)
if resp.Status != nil {
if err := verifySandboxStatus(resp.Status); err != nil {
return nil, err
@ -172,6 +192,7 @@ func (r *RemoteRuntimeService) PodSandboxStatus(podSandBoxID string) (*runtimeap
// ListPodSandbox returns a list of PodSandboxes.
func (r *RemoteRuntimeService) ListPodSandbox(filter *runtimeapi.PodSandboxFilter) ([]*runtimeapi.PodSandbox, error) {
klog.V(10).Infof("[RemoteRuntimeService] ListPodSandbox (filter=%v, timeout=%v)", filter, r.timeout)
ctx, cancel := getContextWithTimeout(r.timeout)
defer cancel()
@ -183,11 +204,14 @@ func (r *RemoteRuntimeService) ListPodSandbox(filter *runtimeapi.PodSandboxFilte
return nil, err
}
klog.V(10).Infof("[RemoteRuntimeService] ListPodSandbox Response (filter=%v, items=%v)", filter, resp.Items)
return resp.Items, nil
}
// CreateContainer creates a new container in the specified PodSandbox.
func (r *RemoteRuntimeService) CreateContainer(podSandBoxID string, config *runtimeapi.ContainerConfig, sandboxConfig *runtimeapi.PodSandboxConfig) (string, error) {
klog.V(10).Infof("[RemoteRuntimeService] CreateContainer (podSandBoxID=%v, timeout=%v)", podSandBoxID, r.timeout)
ctx, cancel := getContextWithTimeout(r.timeout)
defer cancel()
@ -201,6 +225,7 @@ func (r *RemoteRuntimeService) CreateContainer(podSandBoxID string, config *runt
return "", err
}
klog.V(10).Infof("[RemoteRuntimeService] CreateContainer (podSandBoxID=%v, ContainerId=%v)", podSandBoxID, resp.ContainerId)
if resp.ContainerId == "" {
errorMessage := fmt.Sprintf("ContainerId is not set for container %q", config.GetMetadata())
klog.Errorf("CreateContainer failed: %s", errorMessage)
@ -212,6 +237,7 @@ func (r *RemoteRuntimeService) CreateContainer(podSandBoxID string, config *runt
// StartContainer starts the container.
func (r *RemoteRuntimeService) StartContainer(containerID string) error {
klog.V(10).Infof("[RemoteRuntimeService] StartContainer (containerID=%v, timeout=%v)", containerID, r.timeout)
ctx, cancel := getContextWithTimeout(r.timeout)
defer cancel()
@ -222,12 +248,14 @@ func (r *RemoteRuntimeService) StartContainer(containerID string) error {
klog.Errorf("StartContainer %q from runtime service failed: %v", containerID, err)
return err
}
klog.V(10).Infof("[RemoteRuntimeService] StartContainer Response (containerID=%v)", containerID)
return nil
}
// StopContainer stops a running container with a grace period (i.e., timeout).
func (r *RemoteRuntimeService) StopContainer(containerID string, timeout int64) error {
klog.V(10).Infof("[RemoteRuntimeService] StopContainer (containerID=%v, timeout=%v)", containerID, timeout)
// Use timeout + default timeout (2 minutes) as timeout to leave extra time
// for SIGKILL container and request latency.
t := r.timeout + time.Duration(timeout)*time.Second
@ -243,6 +271,7 @@ func (r *RemoteRuntimeService) StopContainer(containerID string, timeout int64)
klog.Errorf("StopContainer %q from runtime service failed: %v", containerID, err)
return err
}
klog.V(10).Infof("[RemoteRuntimeService] StopContainer Response (containerID=%v)", containerID)
return nil
}
@ -250,6 +279,7 @@ func (r *RemoteRuntimeService) StopContainer(containerID string, timeout int64)
// RemoveContainer removes the container. If the container is running, the container
// should be forced to removal.
func (r *RemoteRuntimeService) RemoveContainer(containerID string) error {
klog.V(10).Infof("[RemoteRuntimeService] RemoveContainer (containerID=%v, timeout=%v)", containerID, r.timeout)
ctx, cancel := getContextWithTimeout(r.timeout)
defer cancel()
@ -261,12 +291,14 @@ func (r *RemoteRuntimeService) RemoveContainer(containerID string) error {
klog.Errorf("RemoveContainer %q from runtime service failed: %v", containerID, err)
return err
}
klog.V(10).Infof("[RemoteRuntimeService] RemoveContainer Response (containerID=%v)", containerID)
return nil
}
// ListContainers lists containers by filters.
func (r *RemoteRuntimeService) ListContainers(filter *runtimeapi.ContainerFilter) ([]*runtimeapi.Container, error) {
klog.V(10).Infof("[RemoteRuntimeService] ListContainers (filter=%v, timeout=%v)", filter, r.timeout)
ctx, cancel := getContextWithTimeout(r.timeout)
defer cancel()
@ -277,12 +309,14 @@ func (r *RemoteRuntimeService) ListContainers(filter *runtimeapi.ContainerFilter
klog.Errorf("ListContainers with filter %+v from runtime service failed: %v", filter, err)
return nil, err
}
klog.V(10).Infof("[RemoteRuntimeService] ListContainers Response (filter=%v, containers=%v)", filter, resp.Containers)
return resp.Containers, nil
}
// ContainerStatus returns the container status.
func (r *RemoteRuntimeService) ContainerStatus(containerID string) (*runtimeapi.ContainerStatus, error) {
klog.V(10).Infof("[RemoteRuntimeService] ContainerStatus (containerID=%v, timeout=%v)", containerID, r.timeout)
ctx, cancel := getContextWithTimeout(r.timeout)
defer cancel()
@ -297,6 +331,7 @@ func (r *RemoteRuntimeService) ContainerStatus(containerID string) (*runtimeapi.
return nil, err
}
r.logReduction.ClearID(containerID)
klog.V(10).Infof("[RemoteRuntimeService] ContainerStatus Response (containerID=%v, status=%v)", containerID, resp.Status)
if resp.Status != nil {
if err := verifyContainerStatus(resp.Status); err != nil {
@ -310,6 +345,7 @@ func (r *RemoteRuntimeService) ContainerStatus(containerID string) (*runtimeapi.
// UpdateContainerResources updates a containers resource config
func (r *RemoteRuntimeService) UpdateContainerResources(containerID string, resources *runtimeapi.LinuxContainerResources) error {
klog.V(10).Infof("[RemoteRuntimeService] UpdateContainerResources (containerID=%v, timeout=%v)", containerID, r.timeout)
ctx, cancel := getContextWithTimeout(r.timeout)
defer cancel()
@ -321,6 +357,7 @@ func (r *RemoteRuntimeService) UpdateContainerResources(containerID string, reso
klog.Errorf("UpdateContainerResources %q from runtime service failed: %v", containerID, err)
return err
}
klog.V(10).Infof("[RemoteRuntimeService] UpdateContainerResources Response (containerID=%v)", containerID)
return nil
}
@ -328,6 +365,7 @@ func (r *RemoteRuntimeService) UpdateContainerResources(containerID string, reso
// ExecSync executes a command in the container, and returns the stdout output.
// If command exits with a non-zero exit code, an error is returned.
func (r *RemoteRuntimeService) ExecSync(containerID string, cmd []string, timeout time.Duration) (stdout []byte, stderr []byte, err error) {
klog.V(10).Infof("[RemoteRuntimeService] ExecSync (containerID=%v, timeout=%v)", containerID, timeout)
// Do not set timeout when timeout is 0.
var ctx context.Context
var cancel context.CancelFunc
@ -352,6 +390,7 @@ func (r *RemoteRuntimeService) ExecSync(containerID string, cmd []string, timeou
return nil, nil, err
}
klog.V(10).Infof("[RemoteRuntimeService] ExecSync Response (containerID=%v, ExitCode=%v)", containerID, resp.ExitCode)
err = nil
if resp.ExitCode != 0 {
err = utilexec.CodeExitError{
@ -365,6 +404,7 @@ func (r *RemoteRuntimeService) ExecSync(containerID string, cmd []string, timeou
// Exec prepares a streaming endpoint to execute a command in the container, and returns the address.
func (r *RemoteRuntimeService) Exec(req *runtimeapi.ExecRequest) (*runtimeapi.ExecResponse, error) {
klog.V(10).Infof("[RemoteRuntimeService] Exec (timeout=%v)", r.timeout)
ctx, cancel := getContextWithTimeout(r.timeout)
defer cancel()
@ -373,6 +413,7 @@ func (r *RemoteRuntimeService) Exec(req *runtimeapi.ExecRequest) (*runtimeapi.Ex
klog.Errorf("Exec %s '%s' from runtime service failed: %v", req.ContainerId, strings.Join(req.Cmd, " "), err)
return nil, err
}
klog.V(10).Info("[RemoteRuntimeService] Exec Response")
if resp.Url == "" {
errorMessage := "URL is not set"
@ -385,6 +426,7 @@ func (r *RemoteRuntimeService) Exec(req *runtimeapi.ExecRequest) (*runtimeapi.Ex
// Attach prepares a streaming endpoint to attach to a running container, and returns the address.
func (r *RemoteRuntimeService) Attach(req *runtimeapi.AttachRequest) (*runtimeapi.AttachResponse, error) {
klog.V(10).Infof("[RemoteRuntimeService] Attach (containerId=%v, timeout=%v)", req.ContainerId, r.timeout)
ctx, cancel := getContextWithTimeout(r.timeout)
defer cancel()
@ -393,6 +435,7 @@ func (r *RemoteRuntimeService) Attach(req *runtimeapi.AttachRequest) (*runtimeap
klog.Errorf("Attach %s from runtime service failed: %v", req.ContainerId, err)
return nil, err
}
klog.V(10).Infof("[RemoteRuntimeService] Attach Response (containerId=%v)", req.ContainerId)
if resp.Url == "" {
errorMessage := "URL is not set"
@ -404,6 +447,7 @@ func (r *RemoteRuntimeService) Attach(req *runtimeapi.AttachRequest) (*runtimeap
// PortForward prepares a streaming endpoint to forward ports from a PodSandbox, and returns the address.
func (r *RemoteRuntimeService) PortForward(req *runtimeapi.PortForwardRequest) (*runtimeapi.PortForwardResponse, error) {
klog.V(10).Infof("[RemoteRuntimeService] PortForward (podSandboxID=%v, port=%v, timeout=%v)", req.PodSandboxId, req.Port, r.timeout)
ctx, cancel := getContextWithTimeout(r.timeout)
defer cancel()
@ -412,6 +456,7 @@ func (r *RemoteRuntimeService) PortForward(req *runtimeapi.PortForwardRequest) (
klog.Errorf("PortForward %s from runtime service failed: %v", req.PodSandboxId, err)
return nil, err
}
klog.V(10).Infof("[RemoteRuntimeService] PortForward Response (podSandboxID=%v)", req.PodSandboxId)
if resp.Url == "" {
errorMessage := "URL is not set"
@ -426,6 +471,7 @@ func (r *RemoteRuntimeService) PortForward(req *runtimeapi.PortForwardRequest) (
// update payload currently supported is the pod CIDR assigned to a node,
// and the runtime service just proxies it down to the network plugin.
func (r *RemoteRuntimeService) UpdateRuntimeConfig(runtimeConfig *runtimeapi.RuntimeConfig) error {
klog.V(10).Infof("[RemoteRuntimeService] UpdateRuntimeConfig (runtimeConfig=%v, timeout=%v)", runtimeConfig, r.timeout)
ctx, cancel := getContextWithTimeout(r.timeout)
defer cancel()
@ -439,12 +485,14 @@ func (r *RemoteRuntimeService) UpdateRuntimeConfig(runtimeConfig *runtimeapi.Run
if err != nil {
return err
}
klog.V(10).Infof("[RemoteRuntimeService] UpdateRuntimeConfig Response (runtimeConfig=%v)", runtimeConfig)
return nil
}
// Status returns the status of the runtime.
func (r *RemoteRuntimeService) Status() (*runtimeapi.RuntimeStatus, error) {
klog.V(10).Infof("[RemoteRuntimeService] Status (timeout=%v)", r.timeout)
ctx, cancel := getContextWithTimeout(r.timeout)
defer cancel()
@ -454,6 +502,8 @@ func (r *RemoteRuntimeService) Status() (*runtimeapi.RuntimeStatus, error) {
return nil, err
}
klog.V(10).Infof("[RemoteRuntimeService] Status Response (status=%v)", resp.Status)
if resp.Status == nil || len(resp.Status.Conditions) < 2 {
errorMessage := "RuntimeReady or NetworkReady condition are not set"
klog.Errorf("Status failed: %s", errorMessage)
@ -465,6 +515,7 @@ func (r *RemoteRuntimeService) Status() (*runtimeapi.RuntimeStatus, error) {
// ContainerStats returns the stats of the container.
func (r *RemoteRuntimeService) ContainerStats(containerID string) (*runtimeapi.ContainerStats, error) {
klog.V(10).Infof("[RemoteRuntimeService] ContainerStats (containerID=%v, timeout=%v)", containerID, r.timeout)
ctx, cancel := getContextWithTimeout(r.timeout)
defer cancel()
@ -478,11 +529,13 @@ func (r *RemoteRuntimeService) ContainerStats(containerID string) (*runtimeapi.C
return nil, err
}
r.logReduction.ClearID(containerID)
klog.V(10).Infof("[RemoteRuntimeService] ContainerStats Response (containerID=%v, stats=%v)", containerID, resp.GetStats())
return resp.GetStats(), nil
}
func (r *RemoteRuntimeService) ListContainerStats(filter *runtimeapi.ContainerStatsFilter) ([]*runtimeapi.ContainerStats, error) {
klog.V(10).Infof("[RemoteRuntimeService] ListContainerStats (filter=%v)", filter)
// Do not set timeout, because writable layer stats collection takes time.
// TODO(random-liu): Should we assume runtime should cache the result, and set timeout here?
ctx, cancel := getContextWithCancel()
@ -495,11 +548,13 @@ func (r *RemoteRuntimeService) ListContainerStats(filter *runtimeapi.ContainerSt
klog.Errorf("ListContainerStats with filter %+v from runtime service failed: %v", filter, err)
return nil, err
}
klog.V(10).Infof("[RemoteRuntimeService] ListContainerStats Response (filter=%v, stats=%v)", filter, resp.GetStats())
return resp.GetStats(), nil
}
func (r *RemoteRuntimeService) ReopenContainerLog(containerID string) error {
klog.V(10).Infof("[RemoteRuntimeService] ReopenContainerLog (containerID=%v, timeout=%v)", containerID, r.timeout)
ctx, cancel := getContextWithTimeout(r.timeout)
defer cancel()
@ -508,5 +563,7 @@ func (r *RemoteRuntimeService) ReopenContainerLog(containerID string) error {
klog.Errorf("ReopenContainerLog %q from runtime service failed: %v", containerID, err)
return err
}
klog.V(10).Infof("[RemoteRuntimeService] ReopenContainerLog Response (containerID=%v)", containerID)
return nil
}