From faae51a14a317873939ebe3cf56285361818689f Mon Sep 17 00:00:00 2001 From: Yu-Ju Hong Date: Wed, 2 Nov 2016 15:48:19 -0700 Subject: [PATCH] CRI: stop sandbox before removing it Stopping a sandbox includes reclaiming the network resources. By always stopping the sandbox before removing it, we reduce the possibility of leaking resources in some corner cases. --- pkg/kubelet/api/v1alpha1/runtime/api.pb.go | 72 ++++++++++++++-------- pkg/kubelet/api/v1alpha1/runtime/api.proto | 38 ++++++++---- pkg/kubelet/kuberuntime/kuberuntime_gc.go | 7 +++ 3 files changed, 80 insertions(+), 37 deletions(-) diff --git a/pkg/kubelet/api/v1alpha1/runtime/api.pb.go b/pkg/kubelet/api/v1alpha1/runtime/api.pb.go index e7dc8e79243..ac18df5aa36 100644 --- a/pkg/kubelet/api/v1alpha1/runtime/api.pb.go +++ b/pkg/kubelet/api/v1alpha1/runtime/api.pb.go @@ -249,10 +249,10 @@ type VersionResponse struct { Version *string `protobuf:"bytes,1,opt,name=version" json:"version,omitempty"` // The name of the container runtime. RuntimeName *string `protobuf:"bytes,2,opt,name=runtime_name,json=runtimeName" json:"runtime_name,omitempty"` - // The version of the container runtime. The string should be + // The version of the container runtime. The string must be // semver-compatible. RuntimeVersion *string `protobuf:"bytes,3,opt,name=runtime_version,json=runtimeVersion" json:"runtime_version,omitempty"` - // The API version of the container runtime. The string should be + // The API version of the container runtime. The string must be // semver-compatible. RuntimeApiVersion *string `protobuf:"bytes,4,opt,name=runtime_api_version,json=runtimeApiVersion" json:"runtime_api_version,omitempty"` XXX_unrecognized []byte `json:"-"` @@ -2694,29 +2694,40 @@ const _ = grpc.SupportPackageIsVersion3 type RuntimeServiceClient interface { // Version returns the runtime name, runtime version and runtime API version Version(ctx context.Context, in *VersionRequest, opts ...grpc.CallOption) (*VersionResponse, error) - // RunPodSandbox creates and starts a pod-level sandbox. Runtimes should ensure - // the sandbox is in ready state. + // RunPodSandbox creates and starts a pod-level sandbox. Runtimes must ensure + // the sandbox is in the ready state on success. RunPodSandbox(ctx context.Context, in *RunPodSandboxRequest, opts ...grpc.CallOption) (*RunPodSandboxResponse, error) - // StopPodSandbox stops the running sandbox. If there are any running - // containers in the sandbox, they should be forcibly terminated. + // StopPodSandbox stops any running process that is part of the sandbox and + // reclaims network resources (e.g., IP addresses) allocated to the sandbox. + // If there are any running containers in the sandbox, they must be forcibly + // terminated. + // This call is idempotent, and must not return an error if all relevant + // resources have already been reclaimed. kubelet will call StopPodSandbox + // at least once before calling RemovePodSandbox. It will also attempt to + // reclaim resources eagerly, as soon as a sandbox is not needed. Hence, + // multiple StopPodSandbox calls are expected. StopPodSandbox(ctx context.Context, in *StopPodSandboxRequest, opts ...grpc.CallOption) (*StopPodSandboxResponse, error) - // RemovePodSandbox removes the sandbox. If there are any running containers in the - // sandbox, they should be forcibly removed. - // It should return success if the sandbox has already been removed. + // RemovePodSandbox removes the sandbox. If there are any running containers + // in the sandbox, they must be forcibly terminated and removed. + // This call is idempotent, and must not return an error if the sandbox has + // already been removed. RemovePodSandbox(ctx context.Context, in *RemovePodSandboxRequest, opts ...grpc.CallOption) (*RemovePodSandboxResponse, error) // PodSandboxStatus returns the status of the PodSandbox. PodSandboxStatus(ctx context.Context, in *PodSandboxStatusRequest, opts ...grpc.CallOption) (*PodSandboxStatusResponse, error) - // ListPodSandbox returns a list of Sandbox. + // ListPodSandbox returns a list of PodSandboxes. ListPodSandbox(ctx context.Context, in *ListPodSandboxRequest, opts ...grpc.CallOption) (*ListPodSandboxResponse, error) // CreateContainer creates a new container in specified PodSandbox CreateContainer(ctx context.Context, in *CreateContainerRequest, opts ...grpc.CallOption) (*CreateContainerResponse, error) // StartContainer starts the container. StartContainer(ctx context.Context, in *StartContainerRequest, opts ...grpc.CallOption) (*StartContainerResponse, error) // StopContainer stops a running container with a grace period (i.e., timeout). + // This call is idempotent, and must not return an error if the container has + // already been stopped. StopContainer(ctx context.Context, in *StopContainerRequest, opts ...grpc.CallOption) (*StopContainerResponse, error) // RemoveContainer removes the container. If the container is running, the - // container should be forcibly removed. - // It should return success if the container has already been removed. + // container must be forcibly removed. + // This call is idempotent, and must not return an error if the container has + // already been removed. RemoveContainer(ctx context.Context, in *RemoveContainerRequest, opts ...grpc.CallOption) (*RemoveContainerResponse, error) // ListContainers lists all containers by filters. ListContainers(ctx context.Context, in *ListContainersRequest, opts ...grpc.CallOption) (*ListContainersResponse, error) @@ -2900,29 +2911,40 @@ func (c *runtimeServiceClient) UpdateRuntimeConfig(ctx context.Context, in *Upda type RuntimeServiceServer interface { // Version returns the runtime name, runtime version and runtime API version Version(context.Context, *VersionRequest) (*VersionResponse, error) - // RunPodSandbox creates and starts a pod-level sandbox. Runtimes should ensure - // the sandbox is in ready state. + // RunPodSandbox creates and starts a pod-level sandbox. Runtimes must ensure + // the sandbox is in the ready state on success. RunPodSandbox(context.Context, *RunPodSandboxRequest) (*RunPodSandboxResponse, error) - // StopPodSandbox stops the running sandbox. If there are any running - // containers in the sandbox, they should be forcibly terminated. + // StopPodSandbox stops any running process that is part of the sandbox and + // reclaims network resources (e.g., IP addresses) allocated to the sandbox. + // If there are any running containers in the sandbox, they must be forcibly + // terminated. + // This call is idempotent, and must not return an error if all relevant + // resources have already been reclaimed. kubelet will call StopPodSandbox + // at least once before calling RemovePodSandbox. It will also attempt to + // reclaim resources eagerly, as soon as a sandbox is not needed. Hence, + // multiple StopPodSandbox calls are expected. StopPodSandbox(context.Context, *StopPodSandboxRequest) (*StopPodSandboxResponse, error) - // RemovePodSandbox removes the sandbox. If there are any running containers in the - // sandbox, they should be forcibly removed. - // It should return success if the sandbox has already been removed. + // RemovePodSandbox removes the sandbox. If there are any running containers + // in the sandbox, they must be forcibly terminated and removed. + // This call is idempotent, and must not return an error if the sandbox has + // already been removed. RemovePodSandbox(context.Context, *RemovePodSandboxRequest) (*RemovePodSandboxResponse, error) // PodSandboxStatus returns the status of the PodSandbox. PodSandboxStatus(context.Context, *PodSandboxStatusRequest) (*PodSandboxStatusResponse, error) - // ListPodSandbox returns a list of Sandbox. + // ListPodSandbox returns a list of PodSandboxes. ListPodSandbox(context.Context, *ListPodSandboxRequest) (*ListPodSandboxResponse, error) // CreateContainer creates a new container in specified PodSandbox CreateContainer(context.Context, *CreateContainerRequest) (*CreateContainerResponse, error) // StartContainer starts the container. StartContainer(context.Context, *StartContainerRequest) (*StartContainerResponse, error) // StopContainer stops a running container with a grace period (i.e., timeout). + // This call is idempotent, and must not return an error if the container has + // already been stopped. StopContainer(context.Context, *StopContainerRequest) (*StopContainerResponse, error) // RemoveContainer removes the container. If the container is running, the - // container should be forcibly removed. - // It should return success if the container has already been removed. + // container must be forcibly removed. + // This call is idempotent, and must not return an error if the container has + // already been removed. RemoveContainer(context.Context, *RemoveContainerRequest) (*RemoveContainerResponse, error) // ListContainers lists all containers by filters. ListContainers(context.Context, *ListContainersRequest) (*ListContainersResponse, error) @@ -3338,7 +3360,8 @@ type ImageServiceClient interface { // PullImage pulls an image with authentication config. PullImage(ctx context.Context, in *PullImageRequest, opts ...grpc.CallOption) (*PullImageResponse, error) // RemoveImage removes the image. - // It should return success if the image has already been removed. + // This call is idempotent, and must not return an error if the image has + // already been removed. RemoveImage(ctx context.Context, in *RemoveImageRequest, opts ...grpc.CallOption) (*RemoveImageResponse, error) } @@ -3397,7 +3420,8 @@ type ImageServiceServer interface { // PullImage pulls an image with authentication config. PullImage(context.Context, *PullImageRequest) (*PullImageResponse, error) // RemoveImage removes the image. - // It should return success if the image has already been removed. + // This call is idempotent, and must not return an error if the image has + // already been removed. RemoveImage(context.Context, *RemoveImageRequest) (*RemoveImageResponse, error) } diff --git a/pkg/kubelet/api/v1alpha1/runtime/api.proto b/pkg/kubelet/api/v1alpha1/runtime/api.proto index 14d32388acf..2bdea064bc0 100644 --- a/pkg/kubelet/api/v1alpha1/runtime/api.proto +++ b/pkg/kubelet/api/v1alpha1/runtime/api.proto @@ -8,19 +8,27 @@ service RuntimeService { // Version returns the runtime name, runtime version and runtime API version rpc Version(VersionRequest) returns (VersionResponse) {} - // RunPodSandbox creates and starts a pod-level sandbox. Runtimes should ensure - // the sandbox is in ready state. + // RunPodSandbox creates and starts a pod-level sandbox. Runtimes must ensure + // the sandbox is in the ready state on success. rpc RunPodSandbox(RunPodSandboxRequest) returns (RunPodSandboxResponse) {} - // StopPodSandbox stops the running sandbox. If there are any running - // containers in the sandbox, they should be forcibly terminated. + // StopPodSandbox stops any running process that is part of the sandbox and + // reclaims network resources (e.g., IP addresses) allocated to the sandbox. + // If there are any running containers in the sandbox, they must be forcibly + // terminated. + // This call is idempotent, and must not return an error if all relevant + // resources have already been reclaimed. kubelet will call StopPodSandbox + // at least once before calling RemovePodSandbox. It will also attempt to + // reclaim resources eagerly, as soon as a sandbox is not needed. Hence, + // multiple StopPodSandbox calls are expected. rpc StopPodSandbox(StopPodSandboxRequest) returns (StopPodSandboxResponse) {} - // RemovePodSandbox removes the sandbox. If there are any running containers in the - // sandbox, they should be forcibly removed. - // It should return success if the sandbox has already been removed. + // RemovePodSandbox removes the sandbox. If there are any running containers + // in the sandbox, they must be forcibly terminated and removed. + // This call is idempotent, and must not return an error if the sandbox has + // already been removed. rpc RemovePodSandbox(RemovePodSandboxRequest) returns (RemovePodSandboxResponse) {} // PodSandboxStatus returns the status of the PodSandbox. rpc PodSandboxStatus(PodSandboxStatusRequest) returns (PodSandboxStatusResponse) {} - // ListPodSandbox returns a list of Sandbox. + // ListPodSandbox returns a list of PodSandboxes. rpc ListPodSandbox(ListPodSandboxRequest) returns (ListPodSandboxResponse) {} // CreateContainer creates a new container in specified PodSandbox @@ -28,10 +36,13 @@ service RuntimeService { // StartContainer starts the container. rpc StartContainer(StartContainerRequest) returns (StartContainerResponse) {} // StopContainer stops a running container with a grace period (i.e., timeout). + // This call is idempotent, and must not return an error if the container has + // already been stopped. rpc StopContainer(StopContainerRequest) returns (StopContainerResponse) {} // RemoveContainer removes the container. If the container is running, the - // container should be forcibly removed. - // It should return success if the container has already been removed. + // container must be forcibly removed. + // This call is idempotent, and must not return an error if the container has + // already been removed. rpc RemoveContainer(RemoveContainerRequest) returns (RemoveContainerResponse) {} // ListContainers lists all containers by filters. rpc ListContainers(ListContainersRequest) returns (ListContainersResponse) {} @@ -61,7 +72,8 @@ service ImageService { // PullImage pulls an image with authentication config. rpc PullImage(PullImageRequest) returns (PullImageResponse) {} // RemoveImage removes the image. - // It should return success if the image has already been removed. + // This call is idempotent, and must not return an error if the image has + // already been removed. rpc RemoveImage(RemoveImageRequest) returns (RemoveImageResponse) {} } @@ -75,10 +87,10 @@ message VersionResponse { optional string version = 1; // The name of the container runtime. optional string runtime_name = 2; - // The version of the container runtime. The string should be + // The version of the container runtime. The string must be // semver-compatible. optional string runtime_version = 3; - // The API version of the container runtime. The string should be + // The API version of the container runtime. The string must be // semver-compatible. optional string runtime_api_version = 4; } diff --git a/pkg/kubelet/kuberuntime/kuberuntime_gc.go b/pkg/kubelet/kuberuntime/kuberuntime_gc.go index 7e7ab9a4152..97180daa2db 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_gc.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_gc.go @@ -131,6 +131,13 @@ func (cgc *containerGC) removeOldestN(containers []containerGCInfo, toRemove int // removeSandbox removes the sandbox by sandboxID. func (cgc *containerGC) removeSandbox(sandboxID string) { glog.V(4).Infof("Removing sandbox %q", sandboxID) + // In normal cases, kubelet should've already called StopPodSandbox before + // GC kicks in. To guard against the rare cases where this is not true, try + // stopping the sandbox before removing it. + if err := cgc.client.StopPodSandbox(sandboxID); err != nil { + glog.Errorf("Failed to stop sandbox %q before removing: %v", sandboxID, err) + return + } if err := cgc.client.RemovePodSandbox(sandboxID); err != nil { glog.Errorf("Failed to remove sandbox %q: %v", sandboxID, err) }