Merge pull request #34287 from Random-Liu/add-sandbox-gc-minage

Automatic merge from submit-queue

Add sandbox gc minage

Fixes https://github.com/kubernetes/kubernetes/issues/34272.
Fixes https://github.com/kubernetes/kubernetes/issues/33984.

This PR:
1) Change the `GetPodStatus` to get statuses of all containers in a pod instead of only containers belonging to existing sandboxes. This is because sandbox may be removed by GC or by users, kubelet should be able to deal with this case.
2) Change the CRI comment to clarify the timestamp unit (nanosecond).
2) Add MinAge for sandbox GC Policy.

@yujuhong @feiskyer @yifan-gu 
/cc @kubernetes/sig-node
This commit is contained in:
Kubernetes Submit Queue 2016-10-08 17:07:28 -07:00 committed by GitHub
commit 4b3498d2d5
7 changed files with 61 additions and 31 deletions

View File

@ -812,7 +812,7 @@ type PodSandboxStatus struct {
Metadata *PodSandboxMetadata `protobuf:"bytes,2,opt,name=metadata" json:"metadata,omitempty"`
// State of the sandbox.
State *PodSandBoxState `protobuf:"varint,3,opt,name=state,enum=runtime.PodSandBoxState" json:"state,omitempty"`
// Creation timestamp of the sandbox
// Creation timestamp of the sandbox in nanoseconds.
CreatedAt *int64 `protobuf:"varint,4,opt,name=created_at,json=createdAt" json:"created_at,omitempty"`
// Network contains network status if network is handled by the runtime.
Network *PodSandboxNetworkStatus `protobuf:"bytes,5,opt,name=network" json:"network,omitempty"`
@ -971,7 +971,7 @@ type PodSandbox struct {
Metadata *PodSandboxMetadata `protobuf:"bytes,2,opt,name=metadata" json:"metadata,omitempty"`
// The state of the PodSandbox
State *PodSandBoxState `protobuf:"varint,3,opt,name=state,enum=runtime.PodSandBoxState" json:"state,omitempty"`
// Creation timestamps of the sandbox
// Creation timestamps of the sandbox in nanoseconds
CreatedAt *int64 `protobuf:"varint,4,opt,name=created_at,json=createdAt" json:"created_at,omitempty"`
// The labels of the PodSandbox
Labels map[string]string `protobuf:"bytes,5,rep,name=labels" json:"labels,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"`
@ -1740,7 +1740,7 @@ type Container struct {
ImageRef *string `protobuf:"bytes,5,opt,name=image_ref,json=imageRef" json:"image_ref,omitempty"`
// State is the state of the container.
State *ContainerState `protobuf:"varint,6,opt,name=state,enum=runtime.ContainerState" json:"state,omitempty"`
// Creation time of the container.
// Creation time of the container in nanoseconds.
CreatedAt *int64 `protobuf:"varint,7,opt,name=created_at,json=createdAt" json:"created_at,omitempty"`
// Labels are key value pairs that may be used to scope and select individual resources.
Labels map[string]string `protobuf:"bytes,8,rep,name=labels" json:"labels,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"`
@ -1862,11 +1862,11 @@ type ContainerStatus struct {
Metadata *ContainerMetadata `protobuf:"bytes,2,opt,name=metadata" json:"metadata,omitempty"`
// Status of the container.
State *ContainerState `protobuf:"varint,3,opt,name=state,enum=runtime.ContainerState" json:"state,omitempty"`
// Creation time of the container.
// Creation time of the container in nanoseconds.
CreatedAt *int64 `protobuf:"varint,4,opt,name=created_at,json=createdAt" json:"created_at,omitempty"`
// Start time of the container.
// Start time of the container in nanoseconds.
StartedAt *int64 `protobuf:"varint,5,opt,name=started_at,json=startedAt" json:"started_at,omitempty"`
// Finish time of the container.
// Finish time of the container in nanoseconds.
FinishedAt *int64 `protobuf:"varint,6,opt,name=finished_at,json=finishedAt" json:"finished_at,omitempty"`
// Exit code of the container.
ExitCode *int32 `protobuf:"varint,7,opt,name=exit_code,json=exitCode" json:"exit_code,omitempty"`

View File

@ -251,7 +251,7 @@ message PodSandboxStatus {
optional PodSandboxMetadata metadata = 2;
// State of the sandbox.
optional PodSandBoxState state = 3;
// Creation timestamp of the sandbox
// Creation timestamp of the sandbox in nanoseconds.
optional int64 created_at = 4;
// Network contains network status if network is handled by the runtime.
optional PodSandboxNetworkStatus network = 5;
@ -296,7 +296,7 @@ message PodSandbox {
optional PodSandboxMetadata metadata = 2;
// The state of the PodSandbox
optional PodSandBoxState state = 3;
// Creation timestamps of the sandbox
// Creation timestamps of the sandbox in nanoseconds
optional int64 created_at = 4;
// The labels of the PodSandbox
map<string, string> labels = 5;
@ -533,7 +533,7 @@ message Container {
optional string image_ref = 5;
// State is the state of the container.
optional ContainerState state = 6;
// Creation time of the container.
// Creation time of the container in nanoseconds.
optional int64 created_at = 7;
// Labels are key value pairs that may be used to scope and select individual resources.
map<string, string> labels = 8;
@ -560,11 +560,11 @@ message ContainerStatus {
optional ContainerMetadata metadata = 2;
// Status of the container.
optional ContainerState state = 3;
// Creation time of the container.
// Creation time of the container in nanoseconds.
optional int64 created_at = 4;
// Start time of the container.
// Start time of the container in nanoseconds.
optional int64 started_at = 5;
// Finish time of the container.
// Finish time of the container in nanoseconds.
optional int64 finished_at = 6;
// Exit code of the container.
optional int32 exit_code = 7;

View File

@ -19,6 +19,7 @@ package dockershim
import (
"fmt"
"strings"
"time"
dockertypes "github.com/docker/engine-api/types"
@ -57,6 +58,8 @@ func toRuntimeAPIContainer(c *dockertypes.Container) (*runtimeApi.Container, err
}
labels, annotations := extractLabels(c.Labels)
sandboxID := c.Labels[sandboxIDLabelKey]
// The timestamp in dockertypes.Container is in seconds.
createdAt := c.Created * int64(time.Second)
return &runtimeApi.Container{
Id: &c.ID,
PodSandboxId: &sandboxID,
@ -64,6 +67,7 @@ func toRuntimeAPIContainer(c *dockertypes.Container) (*runtimeApi.Container, err
Image: &runtimeApi.ImageSpec{Image: &c.Image},
ImageRef: &c.ImageID,
State: &state,
CreatedAt: &createdAt,
Labels: labels,
Annotations: annotations,
}, nil
@ -117,11 +121,13 @@ func toRuntimeAPISandbox(c *dockertypes.Container) (*runtimeApi.PodSandbox, erro
return nil, err
}
labels, annotations := extractLabels(c.Labels)
// The timestamp in dockertypes.Container is in seconds.
createdAt := c.Created * int64(time.Second)
return &runtimeApi.PodSandbox{
Id: &c.ID,
Metadata: metadata,
State: &state,
CreatedAt: &c.Created,
CreatedAt: &createdAt,
Labels: labels,
Annotations: annotations,
}, nil

View File

@ -61,6 +61,7 @@ func TestListContainers(t *testing.T) {
expected := []*runtimeApi.Container{}
state := runtimeApi.ContainerState_RUNNING
var createdAt int64 = 0
for i := range configs {
// We don't care about the sandbox id; pass a bogus one.
sandboxID := fmt.Sprintf("sandboxid%d", i)
@ -77,6 +78,7 @@ func TestListContainers(t *testing.T) {
Id: &id,
PodSandboxId: &sandboxID,
State: &state,
CreatedAt: &createdAt,
Image: configs[i].Image,
ImageRef: &imageRef,
Labels: configs[i].Labels,

View File

@ -35,8 +35,9 @@ import (
"k8s.io/kubernetes/pkg/kubelet/dockershim"
"k8s.io/kubernetes/pkg/kubelet/events"
"k8s.io/kubernetes/pkg/kubelet/qos"
"k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/kubelet/util/format"
"k8s.io/kubernetes/pkg/types"
kubetypes "k8s.io/kubernetes/pkg/types"
utilruntime "k8s.io/kubernetes/pkg/util/runtime"
"k8s.io/kubernetes/pkg/util/sets"
"k8s.io/kubernetes/pkg/util/term"
@ -115,7 +116,7 @@ func (m *kubeGenericRuntimeManager) startContainer(podSandboxID string, podSandb
}
// getContainerLogsPath gets log path for container.
func getContainerLogsPath(containerName string, podUID types.UID) string {
func getContainerLogsPath(containerName string, podUID kubetypes.UID) string {
return path.Join(podLogsRootDirectory, string(podUID), fmt.Sprintf("%s.log", containerName))
}
@ -345,10 +346,11 @@ func getTerminationMessage(status *runtimeApi.ContainerStatus, kubeStatus *kubec
return message
}
// getKubeletContainerStatuses gets all containers' status for the pod sandbox.
func (m *kubeGenericRuntimeManager) getKubeletContainerStatuses(podSandboxID string) ([]*kubecontainer.ContainerStatus, error) {
// getPodContainerStatuses gets all containers' statuses for the pod.
func (m *kubeGenericRuntimeManager) getPodContainerStatuses(uid kubetypes.UID, name, namespace string) ([]*kubecontainer.ContainerStatus, error) {
// Select all containers of the given pod.
containers, err := m.runtimeService.ListContainers(&runtimeApi.ContainerFilter{
PodSandboxId: &podSandboxID,
LabelSelector: map[string]string{types.KubernetesPodUIDLabel: string(uid)},
})
if err != nil {
glog.Errorf("ListContainers error: %v", err)
@ -377,16 +379,16 @@ func (m *kubeGenericRuntimeManager) getKubeletContainerStatuses(podSandboxID str
Hash: annotatedInfo.Hash,
RestartCount: annotatedInfo.RestartCount,
State: toKubeContainerState(c.GetState()),
CreatedAt: time.Unix(status.GetCreatedAt(), 0),
CreatedAt: time.Unix(0, status.GetCreatedAt()),
}
if c.GetState() == runtimeApi.ContainerState_RUNNING {
cStatus.StartedAt = time.Unix(status.GetStartedAt(), 0)
cStatus.StartedAt = time.Unix(0, status.GetStartedAt())
} else {
cStatus.Reason = status.GetReason()
cStatus.Message = status.GetMessage()
cStatus.ExitCode = int(status.GetExitCode())
cStatus.FinishedAt = time.Unix(status.GetFinishedAt(), 0)
cStatus.FinishedAt = time.Unix(0, status.GetFinishedAt())
}
tMessage := getTerminationMessage(status, cStatus, annotatedInfo.TerminationMessagePath)

View File

@ -27,6 +27,20 @@ import (
"k8s.io/kubernetes/pkg/types"
)
// sandboxMinGCAge is the minimum age for an empty sandbox before it is garbage collected.
// This is introduced to avoid a sandbox being garbage collected before its containers are
// created.
// Notice that if the first container of a sandbox is created too late (exceeds sandboxMinGCAge),
// the sandbox could still be garbaged collected. In that case, SyncPod will recreate the
// sandbox and make sure old containers are all stopped.
// In the following figure, 'o' is a stopped sandbox, 'x' is a removed sandbox. It shows
// that, approximately if a sandbox keeps crashing and MinAge = 1/n GC Period, there will
// be 1/n more sandboxes not garbage collected.
// oooooo|xxxxxx|xxxxxx| <--- MinAge = 0
// gc gc gc gc
// oooooo|oooxxx|xxxxxx| <--- MinAge = 1/2 GC Perod
const sandboxMinGCAge time.Duration = 30 * time.Second
// containerGC is the manager of garbage collection.
type containerGC struct {
client internalApi.RuntimeService
@ -141,7 +155,7 @@ func (cgc *containerGC) evictableContainers(minAge time.Duration) (containersByE
continue
}
createdAt := time.Unix(container.GetCreatedAt(), 0)
createdAt := time.Unix(0, container.GetCreatedAt())
if newestGCTime.Before(createdAt) {
continue
}
@ -182,6 +196,7 @@ func (cgc *containerGC) evictableSandboxes() ([]string, error) {
}
evictSandboxes := make([]string, 0)
newestGCTime := time.Now().Add(-sandboxMinGCAge)
for _, sandbox := range sandboxes {
// Prune out ready sandboxes.
if sandbox.GetState() == runtimeApi.PodSandBoxState_READY {
@ -201,6 +216,12 @@ func (cgc *containerGC) evictableSandboxes() ([]string, error) {
continue
}
// Only garbage collect sandboxes older than sandboxMinGCAge.
createdAt := time.Unix(0, sandbox.GetCreatedAt())
if createdAt.After(newestGCTime) {
continue
}
evictSandboxes = append(evictSandboxes, sandboxID)
}

View File

@ -876,15 +876,14 @@ func (m *kubeGenericRuntimeManager) GetPodStatus(uid kubetypes.UID, name, namesp
UID: uid,
},
})
glog.V(4).Infof("getSandboxIDByPodUID got sandbox IDs %q for pod %q(UID:%q)", podSandboxIDs, podFullName, string(uid))
glog.V(4).Infof("getSandboxIDByPodUID got sandbox IDs %q for pod %q", podSandboxIDs, podFullName)
sandboxStatuses := make([]*runtimeApi.PodSandboxStatus, len(podSandboxIDs))
containerStatuses := []*kubecontainer.ContainerStatus{}
podIP := ""
for idx, podSandboxID := range podSandboxIDs {
podSandboxStatus, err := m.runtimeService.PodSandboxStatus(podSandboxID)
if err != nil {
glog.Errorf("PodSandboxStatus for pod (uid:%v, name:%s, namespace:%s) error: %v", uid, name, namespace, err)
glog.Errorf("PodSandboxStatus of sandbox %q for pod %q error: %v", podSandboxID, podFullName, err)
return nil, err
}
sandboxStatuses[idx] = podSandboxStatus
@ -893,13 +892,13 @@ func (m *kubeGenericRuntimeManager) GetPodStatus(uid kubetypes.UID, name, namesp
if idx == 0 && podSandboxStatus.GetState() == runtimeApi.PodSandBoxState_READY {
podIP = m.determinePodSandboxIP(namespace, name, podSandboxStatus)
}
}
statuses, err := m.getKubeletContainerStatuses(podSandboxID)
if err != nil {
glog.Errorf("getKubeletContainerStatuses for sandbox %s failed: %v", podSandboxID, err)
return nil, err
}
containerStatuses = append(containerStatuses, statuses...)
// Get statuses of all containers visible in the pod.
containerStatuses, err := m.getPodContainerStatuses(uid, name, namespace)
if err != nil {
glog.Errorf("getPodContainerStatuses for pod %q failed: %v", podFullName, err)
return nil, err
}
return &kubecontainer.PodStatus{