Gracefully delete pods from the Kubelet

This commit wires together the graceful delete option for pods
on the Kubelet.  When a pod is deleted on the API server, a
grace period is calculated that is based on the
Pod.Spec.TerminationGracePeriodInSeconds, the user's provided grace
period, or a default.  The grace period can only shrink once set.
The value provided by the user (or the default) is set onto metadata
as DeletionGracePeriod.

When the Kubelet sees a pod with DeletionTimestamp set, it uses the
value of ObjectMeta.GracePeriodSeconds as the grace period
sent to Docker.  When updating status, if the pod has DeletionTimestamp
set and all containers are terminated, the Kubelet will update the
status one last time and then invoke Delete(pod, grace: 0) to
clean up the pod immediately.
This commit is contained in:
Clayton Coleman 2015-05-21 18:36:44 -04:00
parent da7c612bb6
commit 72ee028cab
43 changed files with 598 additions and 180 deletions

View File

@ -1002,6 +1002,12 @@ func deepCopy_api_ObjectMeta(in ObjectMeta, out *ObjectMeta, c *conversion.Clone
} else {
out.DeletionTimestamp = nil
}
if in.DeletionGracePeriodSeconds != nil {
out.DeletionGracePeriodSeconds = new(int64)
*out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds
} else {
out.DeletionGracePeriodSeconds = nil
}
if in.Labels != nil {
out.Labels = make(map[string]string)
for key, val := range in.Labels {

View File

@ -59,6 +59,8 @@ func BeforeCreate(strategy RESTCreateStrategy, ctx api.Context, obj runtime.Obje
} else {
objectMeta.Namespace = api.NamespaceNone
}
objectMeta.DeletionTimestamp = nil
objectMeta.DeletionGracePeriodSeconds = nil
strategy.PrepareForCreate(obj)
api.FillObjectMetaSystemFields(ctx, objectMeta)
api.GenerateName(strategy, objectMeta)

View File

@ -40,12 +40,37 @@ func BeforeDelete(strategy RESTDeleteStrategy, ctx api.Context, obj runtime.Obje
if strategy == nil {
return false, false, nil
}
_, _, kerr := objectMetaAndKind(strategy, obj)
objectMeta, _, kerr := objectMetaAndKind(strategy, obj)
if kerr != nil {
return false, false, kerr
}
// if the object is already being deleted
if objectMeta.DeletionTimestamp != nil {
// if we are already being deleted, we may only shorten the deletion grace period
// this means the object was gracefully deleted previously but deletionGracePeriodSeconds was not set,
// so we force deletion immediately
if objectMeta.DeletionGracePeriodSeconds == nil {
return false, false, nil
}
// only a shorter grace period may be provided by a user
if options.GracePeriodSeconds != nil {
period := int64(*options.GracePeriodSeconds)
if period > *objectMeta.DeletionGracePeriodSeconds {
return false, true, nil
}
objectMeta.DeletionGracePeriodSeconds = &period
options.GracePeriodSeconds = &period
return true, false, nil
}
// graceful deletion is pending, do nothing
options.GracePeriodSeconds = objectMeta.DeletionGracePeriodSeconds
return false, true, nil
}
if !strategy.CheckGracefulDelete(obj, options) {
return false, false, nil
}
objectMeta.DeletionGracePeriodSeconds = options.GracePeriodSeconds
return true, false, nil
}

View File

@ -329,7 +329,9 @@ func (t *Tester) TestDeleteNonExist(createFn func() runtime.Object) {
func (t *Tester) TestDeleteGraceful(createFn func() runtime.Object, expectedGrace int64, wasGracefulFn func() bool) {
t.TestDeleteGracefulHasDefault(createFn(), expectedGrace, wasGracefulFn)
t.TestDeleteGracefulWithValue(createFn(), expectedGrace, wasGracefulFn)
t.TestDeleteGracefulUsesZeroOnNil(createFn(), 0)
t.TestDeleteGracefulExtend(createFn(), expectedGrace, wasGracefulFn)
}
func (t *Tester) TestDeleteNoGraceful(createFn func() runtime.Object, wasGracefulFn func() bool) {
@ -362,12 +364,99 @@ func (t *Tester) TestDeleteGracefulHasDefault(existing runtime.Object, expectedG
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if _, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name); err != nil {
if !wasGracefulFn() {
t.Errorf("did not gracefully delete resource")
return
}
object, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name)
if err != nil {
t.Errorf("unexpected error, object should exist: %v", err)
return
}
objectMeta, err = api.ObjectMetaFor(object)
if err != nil {
t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, object)
}
if objectMeta.DeletionTimestamp == nil {
t.Errorf("did not set deletion timestamp")
}
if objectMeta.DeletionGracePeriodSeconds == nil {
t.Fatalf("did not set deletion grace period seconds")
}
if *objectMeta.DeletionGracePeriodSeconds != expectedGrace {
t.Errorf("actual grace period does not match expected: %d", *objectMeta.DeletionGracePeriodSeconds)
}
}
func (t *Tester) TestDeleteGracefulWithValue(existing runtime.Object, expectedGrace int64, wasGracefulFn func() bool) {
objectMeta, err := api.ObjectMetaFor(existing)
if err != nil {
t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, existing)
}
ctx := api.WithNamespace(t.TestContext(), objectMeta.Namespace)
_, err = t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, api.NewDeleteOptions(expectedGrace+2))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !wasGracefulFn() {
t.Errorf("did not gracefully delete resource")
}
object, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name)
if err != nil {
t.Errorf("unexpected error, object should exist: %v", err)
}
objectMeta, err = api.ObjectMetaFor(object)
if err != nil {
t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, object)
}
if objectMeta.DeletionTimestamp == nil {
t.Errorf("did not set deletion timestamp")
}
if objectMeta.DeletionGracePeriodSeconds == nil {
t.Fatalf("did not set deletion grace period seconds")
}
if *objectMeta.DeletionGracePeriodSeconds != expectedGrace+2 {
t.Errorf("actual grace period does not match expected: %d", *objectMeta.DeletionGracePeriodSeconds)
}
}
func (t *Tester) TestDeleteGracefulExtend(existing runtime.Object, expectedGrace int64, wasGracefulFn func() bool) {
objectMeta, err := api.ObjectMetaFor(existing)
if err != nil {
t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, existing)
}
ctx := api.WithNamespace(t.TestContext(), objectMeta.Namespace)
_, err = t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, api.NewDeleteOptions(expectedGrace))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !wasGracefulFn() {
t.Errorf("did not gracefully delete resource")
}
// second delete duration is ignored
_, err = t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, api.NewDeleteOptions(expectedGrace+2))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
object, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name)
if err != nil {
t.Errorf("unexpected error, object should exist: %v", err)
}
objectMeta, err = api.ObjectMetaFor(object)
if err != nil {
t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, object)
}
if objectMeta.DeletionTimestamp == nil {
t.Errorf("did not set deletion timestamp")
}
if objectMeta.DeletionGracePeriodSeconds == nil {
t.Fatalf("did not set deletion grace period seconds")
}
if *objectMeta.DeletionGracePeriodSeconds != expectedGrace {
t.Errorf("actual grace period does not match expected: %d", *objectMeta.DeletionGracePeriodSeconds)
}
}
func (t *Tester) TestDeleteGracefulUsesZeroOnNil(existing runtime.Object, expectedGrace int64) {
@ -382,6 +471,6 @@ func (t *Tester) TestDeleteGracefulUsesZeroOnNil(existing runtime.Object, expect
t.Errorf("unexpected error: %v", err)
}
if _, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name); !errors.IsNotFound(err) {
t.Errorf("unexpected error, object should exist: %v", err)
t.Errorf("unexpected error, object should not exist: %v", err)
}
}

View File

@ -155,6 +155,7 @@ func TestRoundTripTypes(t *testing.T) {
}
func TestEncode_Ptr(t *testing.T) {
grace := int64(30)
pod := &api.Pod{
ObjectMeta: api.ObjectMeta{
Labels: map[string]string{"name": "foo"},
@ -162,6 +163,8 @@ func TestEncode_Ptr(t *testing.T) {
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
}
obj := runtime.Object(pod)

View File

@ -88,6 +88,15 @@ func FuzzerFor(t *testing.T, version string, src rand.Source) *fuzz.Fuzzer {
j.LabelSelector, _ = labels.Parse("a=b")
j.FieldSelector, _ = fields.ParseSelector("a=b")
},
func(j *api.PodSpec, c fuzz.Continue) {
c.FuzzNoCustom(j)
// has a default value
ttl := int64(30)
if c.RandBool() {
ttl = int64(c.Uint32())
}
j.TerminationGracePeriodSeconds = &ttl
},
func(j *api.PodPhase, c fuzz.Continue) {
statuses := []api.PodPhase{api.PodPending, api.PodRunning, api.PodFailed, api.PodUnknown}
*j = statuses[c.Rand.Intn(len(statuses))]

View File

@ -133,6 +133,10 @@ type ObjectMeta struct {
// will send a hard termination signal to the container.
DeletionTimestamp *util.Time `json:"deletionTimestamp,omitempty"`
// DeletionGracePeriodSeconds records the graceful deletion value set when graceful deletion
// was requested. Represents the most recent grace period, and may only be shortened once set.
DeletionGracePeriodSeconds *int64 `json:"deletionGracePeriodSeconds,omitempty"`
// Labels are key value pairs that may be used to scope and select individual resources.
// Label keys are of the form:
// label-key ::= prefixed-name | name

View File

@ -1087,6 +1087,12 @@ func convert_api_ObjectMeta_To_v1_ObjectMeta(in *api.ObjectMeta, out *ObjectMeta
} else {
out.DeletionTimestamp = nil
}
if in.DeletionGracePeriodSeconds != nil {
out.DeletionGracePeriodSeconds = new(int64)
*out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds
} else {
out.DeletionGracePeriodSeconds = nil
}
if in.Labels != nil {
out.Labels = make(map[string]string)
for key, val := range in.Labels {
@ -3362,6 +3368,12 @@ func convert_v1_ObjectMeta_To_api_ObjectMeta(in *ObjectMeta, out *api.ObjectMeta
} else {
out.DeletionTimestamp = nil
}
if in.DeletionGracePeriodSeconds != nil {
out.DeletionGracePeriodSeconds = new(int64)
*out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds
} else {
out.DeletionGracePeriodSeconds = nil
}
if in.Labels != nil {
out.Labels = make(map[string]string)
for key, val := range in.Labels {

View File

@ -933,6 +933,12 @@ func deepCopy_v1_ObjectMeta(in ObjectMeta, out *ObjectMeta, c *conversion.Cloner
} else {
out.DeletionTimestamp = nil
}
if in.DeletionGracePeriodSeconds != nil {
out.DeletionGracePeriodSeconds = new(int64)
*out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds
} else {
out.DeletionGracePeriodSeconds = nil
}
if in.Labels != nil {
out.Labels = make(map[string]string)
for key, val := range in.Labels {

View File

@ -98,6 +98,10 @@ func addDefaultingFuncs() {
if obj.HostNetwork {
defaultHostNetworkPorts(&obj.Containers)
}
if obj.TerminationGracePeriodSeconds == nil {
period := int64(DefaultTerminationGracePeriodSeconds)
obj.TerminationGracePeriodSeconds = &period
}
},
func(obj *Probe) {
if obj.TimeoutSeconds == 0 {

View File

@ -131,6 +131,10 @@ type ObjectMeta struct {
// will send a hard termination signal to the container.
DeletionTimestamp *util.Time `json:"deletionTimestamp,omitempty" description:"RFC 3339 date and time at which the object will be deleted; populated by the system when a graceful deletion is requested, read-only; if not set, graceful deletion of the object has not been requested"`
// DeletionGracePeriodSeconds records the graceful deletion value set when graceful deletion
// was requested. Represents the most recent grace period, and may only be shortened once set.
DeletionGracePeriodSeconds *int64 `json:"deletionGracePeriodSeconds,omitempty" description:"number of seconds allowed for this object to gracefully terminate before it will be removed from the system; only set when deletionTimestamp is also set, read-only; may only be shortened"`
// Labels are key value pairs that may be used to scope and select individual resources.
// TODO: replace map[string]string with labels.LabelSet type
Labels map[string]string `json:"labels,omitempty" description:"map of string keys and values that can be used to organize and categorize objects; may match selectors of replication controllers and services"`
@ -838,6 +842,8 @@ const (
// DNSDefault indicates that the pod should use the default (as
// determined by kubelet) DNS settings.
DNSDefault DNSPolicy = "Default"
DefaultTerminationGracePeriodSeconds = 30
)
// PodSpec is a description of a pod
@ -852,7 +858,7 @@ type PodSpec struct {
// The grace period is the duration in seconds after the processes running in the pod are sent
// a termination signal and the time when the processes are forcibly halted with a kill signal.
// Set this value longer than the expected cleanup time for your process.
TerminationGracePeriodSeconds *int64 `json:"terminationGracePeriodSeconds,omitempty" description:"optional duration in seconds the pod needs to terminate gracefully; may be decreased in delete request; value must be non-negative integer; the value zero indicates delete immediately; if this value is not set, the default grace period will be used instead; the grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal; set this value longer than the expected cleanup time for your process"`
TerminationGracePeriodSeconds *int64 `json:"terminationGracePeriodSeconds,omitempty" description:"optional duration in seconds the pod needs to terminate gracefully; may be decreased in delete request; value must be non-negative integer; the value zero indicates delete immediately; if this value is not set, the default grace period will be used instead; the grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal; set this value longer than the expected cleanup time for your process; defaults to 30 seconds"`
ActiveDeadlineSeconds *int64 `json:"activeDeadlineSeconds,omitempty" description:"optional duration in seconds the pod may be active on the node relative to StartTime before the system will actively try to mark it failed and kill associated containers; value must be a positive integer`
// Optional: Set DNS policy. Defaults to "ClusterFirst"
DNSPolicy DNSPolicy `json:"dnsPolicy,omitempty" description:"DNS policy for containers within the pod; one of 'ClusterFirst' or 'Default'"`

View File

@ -945,6 +945,12 @@ func convert_api_ObjectMeta_To_v1beta3_ObjectMeta(in *api.ObjectMeta, out *Objec
} else {
out.DeletionTimestamp = nil
}
if in.DeletionGracePeriodSeconds != nil {
out.DeletionGracePeriodSeconds = new(int64)
*out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds
} else {
out.DeletionGracePeriodSeconds = nil
}
if in.Labels != nil {
out.Labels = make(map[string]string)
for key, val := range in.Labels {
@ -3034,6 +3040,12 @@ func convert_v1beta3_ObjectMeta_To_api_ObjectMeta(in *ObjectMeta, out *api.Objec
} else {
out.DeletionTimestamp = nil
}
if in.DeletionGracePeriodSeconds != nil {
out.DeletionGracePeriodSeconds = new(int64)
*out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds
} else {
out.DeletionGracePeriodSeconds = nil
}
if in.Labels != nil {
out.Labels = make(map[string]string)
for key, val := range in.Labels {

View File

@ -937,6 +937,12 @@ func deepCopy_v1beta3_ObjectMeta(in ObjectMeta, out *ObjectMeta, c *conversion.C
} else {
out.DeletionTimestamp = nil
}
if in.DeletionGracePeriodSeconds != nil {
out.DeletionGracePeriodSeconds = new(int64)
*out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds
} else {
out.DeletionGracePeriodSeconds = nil
}
if in.Labels != nil {
out.Labels = make(map[string]string)
for key, val := range in.Labels {

View File

@ -102,6 +102,10 @@ func addDefaultingFuncs() {
if obj.HostNetwork {
defaultHostNetworkPorts(&obj.Containers)
}
if obj.TerminationGracePeriodSeconds == nil {
period := int64(DefaultTerminationGracePeriodSeconds)
obj.TerminationGracePeriodSeconds = &period
}
},
func(obj *Probe) {
if obj.TimeoutSeconds == 0 {

View File

@ -131,6 +131,10 @@ type ObjectMeta struct {
// will send a hard termination signal to the container.
DeletionTimestamp *util.Time `json:"deletionTimestamp,omitempty" description:"RFC 3339 date and time at which the object will be deleted; populated by the system when a graceful deletion is requested, read-only; if not set, graceful deletion of the object has not been requested"`
// DeletionGracePeriodSeconds records the graceful deletion value set when graceful deletion
// was requested. Represents the most recent grace period, and may only be shortened once set.
DeletionGracePeriodSeconds *int64 `json:"deletionGracePeriodSeconds,omitempty" description:"number of seconds allowed for this object to gracefully terminate before it will be removed from the system; only set when deletionTimestamp is also set, read-only; may only be shortened"`
// Labels are key value pairs that may be used to scope and select individual resources.
// TODO: replace map[string]string with labels.LabelSet type
Labels map[string]string `json:"labels,omitempty" description:"map of string keys and values that can be used to organize and categorize objects; may match selectors of replication controllers and services"`
@ -842,6 +846,8 @@ const (
// DNSDefault indicates that the pod should use the default (as
// determined by kubelet) DNS settings.
DNSDefault DNSPolicy = "Default"
DefaultTerminationGracePeriodSeconds = 30
)
// PodSpec is a description of a pod
@ -856,7 +862,7 @@ type PodSpec struct {
// The grace period is the duration in seconds after the processes running in the pod are sent
// a termination signal and the time when the processes are forcibly halted with a kill signal.
// Set this value longer than the expected cleanup time for your process.
TerminationGracePeriodSeconds *int64 `json:"terminationGracePeriodSeconds,omitempty" description:"optional duration in seconds the pod needs to terminate gracefully; may be decreased in delete request; value must be non-negative integer; the value zero indicates delete immediately; if this value is not set, the default grace period will be used instead; the grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal; set this value longer than the expected cleanup time for your process"`
TerminationGracePeriodSeconds *int64 `json:"terminationGracePeriodSeconds,omitempty" description:"optional duration in seconds the pod needs to terminate gracefully; may be decreased in delete request; value must be non-negative integer; the value zero indicates delete immediately; if this value is not set, the default grace period will be used instead; the grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal; set this value longer than the expected cleanup time for your process; defaults to 30 seconds"`
ActiveDeadlineSeconds *int64 `json:"activeDeadlineSeconds,omitempty" description:"optional duration in seconds the pod may be active on the node relative to StartTime before the system will actively try to mark it failed and kill associated containers; value must be a positive integer`
// Optional: Set DNS policy. Defaults to "ClusterFirst"
DNSPolicy DNSPolicy `json:"dnsPolicy,omitempty" description:"DNS policy for containers within the pod; one of 'ClusterFirst' or 'Default'"`

View File

@ -252,6 +252,16 @@ func ValidateObjectMetaUpdate(old, meta *api.ObjectMeta) errs.ValidationErrorLis
} else {
meta.CreationTimestamp = old.CreationTimestamp
}
// an object can never remove a deletion timestamp or clear/change grace period seconds
if !old.DeletionTimestamp.IsZero() {
meta.DeletionTimestamp = old.DeletionTimestamp
}
if old.DeletionGracePeriodSeconds != nil && meta.DeletionGracePeriodSeconds == nil {
meta.DeletionGracePeriodSeconds = old.DeletionGracePeriodSeconds
}
if meta.DeletionGracePeriodSeconds != nil && *meta.DeletionGracePeriodSeconds != *old.DeletionGracePeriodSeconds {
allErrs = append(allErrs, errs.NewFieldInvalid("deletionGracePeriodSeconds", meta.DeletionGracePeriodSeconds, "field is immutable; may only be changed via deletion"))
}
// Reject updates that don't specify a resource version
if meta.ResourceVersion == "" {

View File

@ -38,6 +38,7 @@ import (
)
func testData() (*api.PodList, *api.ServiceList, *api.ReplicationControllerList) {
grace := int64(30)
pods := &api.PodList{
ListMeta: api.ListMeta{
ResourceVersion: "15",
@ -46,15 +47,17 @@ func testData() (*api.PodList, *api.ServiceList, *api.ReplicationControllerList)
{
ObjectMeta: api.ObjectMeta{Name: "foo", Namespace: "test", ResourceVersion: "10"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
{
ObjectMeta: api.ObjectMeta{Name: "bar", Namespace: "test", ResourceVersion: "11"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
},
@ -503,6 +506,7 @@ func TestGetMultipleTypeObjectsWithDirectReference(t *testing.T) {
}
}
func watchTestData() ([]api.Pod, []watch.Event) {
grace := int64(30)
pods := []api.Pod{
{
ObjectMeta: api.ObjectMeta{
@ -511,8 +515,9 @@ func watchTestData() ([]api.Pod, []watch.Event) {
ResourceVersion: "10",
},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
}
@ -526,8 +531,9 @@ func watchTestData() ([]api.Pod, []watch.Event) {
ResourceVersion: "11",
},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
},
@ -540,8 +546,9 @@ func watchTestData() ([]api.Pod, []watch.Event) {
ResourceVersion: "12",
},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
},

View File

@ -29,6 +29,7 @@ import (
)
func TestMerge(t *testing.T) {
grace := int64(30)
tests := []struct {
obj runtime.Object
fragment string
@ -49,8 +50,9 @@ func TestMerge(t *testing.T) {
Name: "foo",
},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
},
@ -117,8 +119,9 @@ func TestMerge(t *testing.T) {
VolumeSource: api.VolumeSource{EmptyDir: &api.EmptyDirVolumeSource{}},
},
},
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
},

View File

@ -83,6 +83,7 @@ func fakeClientWith(testName string, t *testing.T, data map[string]string) Clien
}
func testData() (*api.PodList, *api.ServiceList) {
grace := int64(30)
pods := &api.PodList{
ListMeta: api.ListMeta{
ResourceVersion: "15",
@ -91,15 +92,17 @@ func testData() (*api.PodList, *api.ServiceList) {
{
ObjectMeta: api.ObjectMeta{Name: "foo", Namespace: "test", ResourceVersion: "10"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
{
ObjectMeta: api.ObjectMeta{Name: "bar", Namespace: "test", ResourceVersion: "11"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
},

View File

@ -128,6 +128,7 @@ func TestHelperCreate(t *testing.T) {
return true
}
grace := int64(30)
tests := []struct {
Resp *http.Response
RespFunc client.HTTPClientFunc
@ -172,8 +173,9 @@ func TestHelperCreate(t *testing.T) {
ExpectObject: &api.Pod{
ObjectMeta: api.ObjectMeta{Name: "foo"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
Resp: &http.Response{StatusCode: http.StatusOK, Body: objBody(&api.Status{Status: api.StatusSuccess})},
@ -381,6 +383,7 @@ func TestHelperUpdate(t *testing.T) {
return true
}
grace := int64(30)
tests := []struct {
Resp *http.Response
RespFunc client.HTTPClientFunc
@ -418,8 +421,9 @@ func TestHelperUpdate(t *testing.T) {
ExpectObject: &api.Pod{
ObjectMeta: api.ObjectMeta{Name: "foo", ResourceVersion: "10"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
Overwrite: true,

View File

@ -628,6 +628,7 @@ func TestUpdateExistingReplicationController(t *testing.T) {
func TestUpdateWithRetries(t *testing.T) {
codec := testapi.Codec()
grace := int64(30)
rc := &api.ReplicationController{
ObjectMeta: api.ObjectMeta{Name: "rc",
Labels: map[string]string{
@ -645,8 +646,9 @@ func TestUpdateWithRetries(t *testing.T) {
},
},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
},

View File

@ -30,6 +30,7 @@ import (
func noDefault(*api.Pod) error { return nil }
func TestDecodeSinglePod(t *testing.T) {
grace := int64(30)
pod := &api.Pod{
TypeMeta: api.TypeMeta{
APIVersion: "",
@ -40,8 +41,9 @@ func TestDecodeSinglePod(t *testing.T) {
Namespace: "mynamespace",
},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
Containers: []api.Container{{
Name: "image",
Image: "test/image",
@ -93,6 +95,7 @@ func TestDecodeSinglePod(t *testing.T) {
}
func TestDecodePodList(t *testing.T) {
grace := int64(30)
pod := &api.Pod{
TypeMeta: api.TypeMeta{
APIVersion: "",
@ -103,8 +106,9 @@ func TestDecodePodList(t *testing.T) {
Namespace: "mynamespace",
},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
Containers: []api.Container{{
Name: "image",
Image: "test/image",

View File

@ -209,9 +209,8 @@ func (s *podStorage) merge(source string, change interface{}) (adds, updates, de
for _, ref := range filtered {
name := kubecontainer.GetPodFullName(ref)
if existing, found := pods[name]; found {
if !reflect.DeepEqual(existing.Spec, ref.Spec) {
if checkAndUpdatePod(existing, ref) {
// this is an update
existing.Spec = ref.Spec
updates.Pods = append(updates.Pods, existing)
continue
}
@ -252,9 +251,8 @@ func (s *podStorage) merge(source string, change interface{}) (adds, updates, de
name := kubecontainer.GetPodFullName(ref)
if existing, found := oldPods[name]; found {
pods[name] = existing
if !reflect.DeepEqual(existing.Spec, ref.Spec) {
if checkAndUpdatePod(existing, ref) {
// this is an update
existing.Spec = ref.Spec
updates.Pods = append(updates.Pods, existing)
continue
}
@ -325,6 +323,23 @@ func filterInvalidPods(pods []*api.Pod, source string, recorder record.EventReco
return
}
// checkAndUpdatePod updates existing if ref makes a meaningful change and returns true, or
// returns false if there was no update.
func checkAndUpdatePod(existing, ref *api.Pod) bool {
// TODO: it would be better to update the whole object and only preserve certain things
// like the source annotation or the UID (to ensure safety)
if reflect.DeepEqual(existing.Spec, ref.Spec) &&
reflect.DeepEqual(existing.DeletionTimestamp, ref.DeletionTimestamp) &&
reflect.DeepEqual(existing.DeletionGracePeriodSeconds, ref.DeletionGracePeriodSeconds) {
return false
}
// this is an update
existing.Spec = ref.Spec
existing.DeletionTimestamp = ref.DeletionTimestamp
existing.DeletionGracePeriodSeconds = ref.DeletionGracePeriodSeconds
return true
}
// Sync sends a copy of the current state through the update channel.
func (s *podStorage) Sync() {
s.updateLock.Lock()

View File

@ -163,6 +163,7 @@ func TestReadContainerManifestFromFile(t *testing.T) {
func TestReadPodsFromFile(t *testing.T) {
hostname := "random-test-hostname"
grace := int64(30)
var testCases = []struct {
desc string
pod runtime.Object
@ -192,9 +193,10 @@ func TestReadPodsFromFile(t *testing.T) {
SelfLink: getSelfLink("test-"+hostname, "mynamespace"),
},
Spec: api.PodSpec{
NodeName: hostname,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
NodeName: hostname,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
Containers: []api.Container{{
Name: "image",
Image: "test/image",
@ -227,9 +229,10 @@ func TestReadPodsFromFile(t *testing.T) {
SelfLink: getSelfLink("12345-"+hostname, kubelet.NamespaceDefault),
},
Spec: api.PodSpec{
NodeName: hostname,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
NodeName: hostname,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
Containers: []api.Container{{
Name: "image",
Image: "test/image",

View File

@ -120,6 +120,7 @@ func TestExtractInvalidManifest(t *testing.T) {
func TestExtractPodsFromHTTP(t *testing.T) {
hostname := "different-value"
grace := int64(30)
var testCases = []struct {
desc string
pods runtime.Object
@ -153,9 +154,11 @@ func TestExtractPodsFromHTTP(t *testing.T) {
SelfLink: getSelfLink("foo-"+hostname, "mynamespace"),
},
Spec: api.PodSpec{
NodeName: hostname,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
NodeName: hostname,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
Containers: []api.Container{{
Name: "1",
Image: "foo",
@ -206,9 +209,11 @@ func TestExtractPodsFromHTTP(t *testing.T) {
SelfLink: getSelfLink("foo-"+hostname, kubelet.NamespaceDefault),
},
Spec: api.PodSpec{
NodeName: hostname,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
NodeName: hostname,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
Containers: []api.Container{{
Name: "1",
Image: "foo",
@ -226,9 +231,11 @@ func TestExtractPodsFromHTTP(t *testing.T) {
SelfLink: getSelfLink("bar-"+hostname, kubelet.NamespaceDefault),
},
Spec: api.PodSpec{
NodeName: hostname,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
NodeName: hostname,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
Containers: []api.Container{{
Name: "2",
Image: "bar",

View File

@ -163,13 +163,13 @@ func (f *FakeRuntime) SyncPod(pod *api.Pod, _ Pod, _ api.PodStatus, _ []api.Secr
return f.Err
}
func (f *FakeRuntime) KillPod(pod Pod) error {
func (f *FakeRuntime) KillPod(pod *api.Pod, runningPod Pod) error {
f.Lock()
defer f.Unlock()
f.CalledFunctions = append(f.CalledFunctions, "KillPod")
f.KilledPods = append(f.KilledPods, string(pod.ID))
for _, c := range pod.Containers {
f.KilledPods = append(f.KilledPods, string(runningPod.ID))
for _, c := range runningPod.Containers {
f.KilledContainers = append(f.KilledContainers, c.Name)
}
return f.Err

View File

@ -53,8 +53,8 @@ type Runtime interface {
GetPods(all bool) ([]*Pod, error)
// Syncs the running pod into the desired pod.
SyncPod(pod *api.Pod, runningPod Pod, podStatus api.PodStatus, pullSecrets []api.Secret) error
// KillPod kills all the containers of a pod.
KillPod(pod Pod) error
// KillPod kills all the containers of a pod. Pod may be nil, running pod must not be.
KillPod(pod *api.Pod, runningPod Pod) error
// GetPodStatus retrieves the status of the pod, including the information of
// all containers in the pod.
GetPodStatus(*api.Pod) (*api.PodStatus, error)

View File

@ -54,8 +54,17 @@ const (
maxReasonCacheEntries = 200
kubernetesPodLabel = "io.kubernetes.pod.data"
kubernetesContainerLabel = "io.kubernetes.container.name"
// In order to avoid unnecessary SIGKILLs, give every container a minimum grace
// period after SIGTERM. Docker will guarantee the termination, but SIGTERM is
// potentially dangerous.
// TODO: evaluate whether there are scenarios in which SIGKILL is preferable to
// SIGTERM for certain process types, which may justify setting this to 0.
minimumGracePeriodInSeconds = 2
kubernetesNameLabel = "io.kubernetes.pod.name"
kubernetesPodLabel = "io.kubernetes.pod.data"
kubernetesTerminationGracePeriodLabel = "io.kubernetes.pod.terminationGracePeriod"
kubernetesContainerLabel = "io.kubernetes.container.name"
)
// DockerManager implements the Runtime interface.
@ -558,12 +567,19 @@ func (dm *DockerManager) runContainer(
if len(containerHostname) > hostnameMaxLen {
containerHostname = containerHostname[:hostnameMaxLen]
}
// Pod information is recorded on the container as labels to preserve it in the event the pod is deleted
// while the Kubelet is down and there is no information available to recover the pod. This includes
// termination information like the termination grace period and the pre stop hooks.
// TODO: keep these labels up to date if the pod changes
namespacedName := types.NamespacedName{pod.Namespace, pod.Name}
labels := map[string]string{
"io.kubernetes.pod.name": namespacedName.String(),
kubernetesNameLabel: namespacedName.String(),
}
if pod.Spec.TerminationGracePeriodSeconds != nil {
labels[kubernetesTerminationGracePeriodLabel] = strconv.FormatInt(*pod.Spec.TerminationGracePeriodSeconds, 10)
}
if container.Lifecycle != nil && container.Lifecycle.PreStop != nil {
glog.V(1).Infof("Setting preStop hook")
// TODO: This is kind of hacky, we should really just encode the bits we need.
data, err := latest.Codec.Encode(pod)
if err != nil {
@ -1080,12 +1096,12 @@ func (dm *DockerManager) PortForward(pod *kubecontainer.Pod, port uint16, stream
}
// Kills all containers in the specified pod
func (dm *DockerManager) KillPod(pod kubecontainer.Pod) error {
func (dm *DockerManager) KillPod(pod *api.Pod, runningPod kubecontainer.Pod) error {
// Send the kills in parallel since they may take a long time. Len + 1 since there
// can be Len errors + the networkPlugin teardown error.
errs := make(chan error, len(pod.Containers)+1)
errs := make(chan error, len(runningPod.Containers)+1)
wg := sync.WaitGroup{}
for _, container := range pod.Containers {
for _, container := range runningPod.Containers {
wg.Add(1)
go func(container *kubecontainer.Container) {
defer util.HandleCrash()
@ -1093,15 +1109,24 @@ func (dm *DockerManager) KillPod(pod kubecontainer.Pod) error {
// TODO: Handle this without signaling the pod infra container to
// adapt to the generic container runtime.
if container.Name == PodInfraContainerName {
err := dm.networkPlugin.TearDownPod(pod.Namespace, pod.Name, kubeletTypes.DockerID(container.ID))
err := dm.networkPlugin.TearDownPod(runningPod.Namespace, runningPod.Name, kubeletTypes.DockerID(container.ID))
if err != nil {
glog.Errorf("Failed tearing down the infra container: %v", err)
errs <- err
}
}
err := dm.killContainer(container.ID)
var containerSpec *api.Container
if pod != nil {
for i, c := range pod.Spec.Containers {
if c.Name == container.Name {
containerSpec = &pod.Spec.Containers[i]
break
}
}
}
err := dm.killContainer(container.ID, containerSpec, pod)
if err != nil {
glog.Errorf("Failed to delete container: %v; Skipping pod %q", err, pod.ID)
glog.Errorf("Failed to delete container: %v; Skipping pod %q", err, runningPod.ID)
errs <- err
}
wg.Done()
@ -1119,75 +1144,128 @@ func (dm *DockerManager) KillPod(pod kubecontainer.Pod) error {
return nil
}
// KillContainerInPod kills a container in the pod.
func (dm *DockerManager) KillContainerInPod(container api.Container, pod *api.Pod) error {
// Locate the container.
pods, err := dm.GetPods(false)
if err != nil {
return err
}
targetPod := kubecontainer.Pods(pods).FindPod(kubecontainer.GetPodFullName(pod), pod.UID)
targetContainer := targetPod.FindContainerByName(container.Name)
if targetContainer == nil {
return fmt.Errorf("unable to find container %q in pod %q", container.Name, targetPod.Name)
}
return dm.killContainer(targetContainer.ID)
}
// TODO(vmarmol): Unexport this as it is no longer used externally.
// KillContainer kills a container identified by containerID.
// Internally, it invokes docker's StopContainer API with a timeout of 10s.
// TODO: Deprecate this function in favor of KillContainerInPod.
func (dm *DockerManager) KillContainer(containerID types.UID) error {
return dm.killContainer(containerID)
}
func (dm *DockerManager) killContainer(containerID types.UID) error {
ID := string(containerID)
glog.V(2).Infof("Killing container with id %q", ID)
inspect, err := dm.client.InspectContainer(ID)
if err != nil {
return err
}
var found bool
var preStop string
if inspect != nil && inspect.Config != nil && inspect.Config.Labels != nil {
preStop, found = inspect.Config.Labels[kubernetesPodLabel]
}
if found {
var pod api.Pod
err := latest.Codec.DecodeInto([]byte(preStop), &pod)
// KillContainerInPod kills a container in the pod. It must be passed either a container ID or a container and pod,
// and will attempt to lookup the other information if missing.
func (dm *DockerManager) KillContainerInPod(containerID types.UID, container *api.Container, pod *api.Pod) error {
switch {
case len(containerID) == 0:
// Locate the container.
pods, err := dm.GetPods(false)
if err != nil {
glog.Errorf("Failed to decode prestop: %s, %s", preStop, ID)
} else {
name := inspect.Config.Labels[kubernetesContainerLabel]
var container *api.Container
return err
}
targetPod := kubecontainer.Pods(pods).FindPod(kubecontainer.GetPodFullName(pod), pod.UID)
targetContainer := targetPod.FindContainerByName(container.Name)
if targetContainer == nil {
return fmt.Errorf("unable to find container %q in pod %q", container.Name, targetPod.Name)
}
containerID = targetContainer.ID
case container == nil || pod == nil:
// Read information about the container from labels
inspect, err := dm.client.InspectContainer(string(containerID))
if err != nil {
return err
}
storedPod, storedContainer, cerr := containerAndPodFromLabels(inspect)
if cerr != nil {
glog.Errorf("unable to access pod data from container: %v", err)
}
if container == nil {
container = storedContainer
}
if pod == nil {
pod = storedPod
}
}
return dm.killContainer(containerID, container, pod)
}
// killContainer accepts a containerID and an optional container or pod containing shutdown policies. Invoke
// KillContainerInPod if information must be retrieved first.
func (dm *DockerManager) killContainer(containerID types.UID, container *api.Container, pod *api.Pod) error {
ID := string(containerID)
name := ID
if container != nil {
name = fmt.Sprintf("%s %s", name, container.Name)
}
if pod != nil {
name = fmt.Sprintf("%s %s/%s", name, pod.Namespace, pod.Name)
}
gracePeriod := int64(minimumGracePeriodInSeconds)
if pod != nil && pod.DeletionGracePeriodSeconds != nil {
gracePeriod = *pod.DeletionGracePeriodSeconds
}
glog.V(2).Infof("Killing container %q with %d second grace period", name, gracePeriod)
if pod != nil && container != nil && container.Lifecycle != nil && container.Lifecycle.PreStop != nil {
glog.V(4).Infof("Running preStop hook for container %q", name)
start := util.Now()
// TODO: timebox PreStop execution to at most gracePeriod
if err := dm.runner.Run(ID, pod, container, container.Lifecycle.PreStop); err != nil {
glog.Errorf("preStop hook for container %q failed: %v", name, err)
}
gracePeriod -= int64(util.Now().Sub(start.Time).Seconds())
}
dm.readinessManager.RemoveReadiness(ID)
// always give containers a minimal shutdown window to avoid unnecessary SIGKILLs
if gracePeriod < minimumGracePeriodInSeconds {
gracePeriod = minimumGracePeriodInSeconds
}
err := dm.client.StopContainer(ID, uint(gracePeriod))
ref, ok := dm.containerRefManager.GetRef(ID)
if !ok {
glog.Warningf("No ref for pod '%q'", name)
} else {
// TODO: pass reason down here, and state, or move this call up the stack.
dm.recorder.Eventf(ref, "killing", "Killing %v", ID)
}
return err
}
var errNoPodOnContainer = fmt.Errorf("no pod information labels on Docker container")
// containerAndPodFromLabels tries to load the appropriate container info off of a Docker container's labels
func containerAndPodFromLabels(inspect *docker.Container) (pod *api.Pod, container *api.Container, err error) {
if inspect == nil && inspect.Config == nil && inspect.Config.Labels == nil {
return nil, nil, errNoPodOnContainer
}
labels := inspect.Config.Labels
// the pod data may not be set
if body, found := labels[kubernetesPodLabel]; found {
pod = &api.Pod{}
if err = latest.Codec.DecodeInto([]byte(body), pod); err == nil {
name := labels[kubernetesContainerLabel]
for ix := range pod.Spec.Containers {
if pod.Spec.Containers[ix].Name == name {
container = &pod.Spec.Containers[ix]
break
}
}
if container != nil {
glog.V(1).Infof("Running preStop hook")
if err := dm.runner.Run(ID, &pod, container, container.Lifecycle.PreStop); err != nil {
glog.Errorf("failed to run preStop hook: %v", err)
}
} else {
glog.Errorf("unable to find container %v, %s", pod, name)
if container == nil {
err = fmt.Errorf("unable to find container %s in pod %v", name, pod)
}
} else {
pod = nil
}
}
// attempt to find the default grace period if we didn't commit a pod, but set the generic metadata
// field (the one used by kill)
if pod == nil {
if period, ok := labels[kubernetesTerminationGracePeriodLabel]; ok {
if seconds, err := strconv.ParseInt(period, 10, 64); err == nil {
pod = &api.Pod{}
pod.DeletionGracePeriodSeconds = &seconds
}
}
}
dm.readinessManager.RemoveReadiness(ID)
err = dm.client.StopContainer(ID, 10)
ref, ok := dm.containerRefManager.GetRef(ID)
if !ok {
glog.Warningf("No ref for pod '%v'", ID)
} else {
// TODO: pass reason down here, and state, or move this call up the stack.
dm.recorder.Eventf(ref, "killing", "Killing %v", ID)
}
return err
return
}
// Run a single container from a pod. Returns the docker container ID
@ -1215,7 +1293,7 @@ func (dm *DockerManager) runContainerInPod(pod *api.Pod, container *api.Containe
if container.Lifecycle != nil && container.Lifecycle.PostStart != nil {
handlerErr := dm.runner.Run(id, pod, container, container.Lifecycle.PostStart)
if handlerErr != nil {
dm.killContainer(types.UID(id))
dm.killContainer(types.UID(id), container, pod)
return kubeletTypes.DockerID(""), fmt.Errorf("failed to call event handler: %v", handlerErr)
}
}
@ -1328,6 +1406,11 @@ func (dm *DockerManager) computePodContainerChanges(pod *api.Pod, runningPod kub
containersToKeep := make(map[kubeletTypes.DockerID]int)
createPodInfraContainer := false
if pod.DeletionTimestamp != nil {
glog.V(4).Infof("Pod is terminating %q", podFullName)
return PodContainerChangesSpec{}, nil
}
var err error
var podInfraContainerID kubeletTypes.DockerID
var changed bool
@ -1481,7 +1564,7 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, pod
}
// Killing phase: if we want to start new infra container, or nothing is running kill everything (including infra container)
err = dm.KillPod(runningPod)
err = dm.KillPod(pod, runningPod)
if err != nil {
return err
}
@ -1491,7 +1574,15 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, pod
_, keep := containerChanges.ContainersToKeep[kubeletTypes.DockerID(container.ID)]
if !keep {
glog.V(3).Infof("Killing unwanted container %+v", container)
err = dm.KillContainer(container.ID)
// attempt to find the appropriate container policy
var podContainer *api.Container
for i, c := range pod.Spec.Containers {
if c.Name == container.Name {
podContainer = &pod.Spec.Containers[i]
break
}
}
err = dm.KillContainerInPod(container.ID, podContainer, pod)
if err != nil {
glog.Errorf("Error killing container: %v", err)
}

View File

@ -417,7 +417,7 @@ func TestKillContainerInPod(t *testing.T) {
manager.readinessManager.SetReadiness(c.ID, true)
}
if err := manager.KillContainerInPod(pod.Spec.Containers[0], pod); err != nil {
if err := manager.KillContainerInPod("", &pod.Spec.Containers[0], pod); err != nil {
t.Errorf("unexpected error: %v", err)
}
// Assert the container has been stopped.
@ -490,14 +490,14 @@ func TestKillContainerInPodWithPreStop(t *testing.T) {
manager.readinessManager.SetReadiness(c.ID, true)
}
if err := manager.KillContainerInPod(pod.Spec.Containers[0], pod); err != nil {
if err := manager.KillContainerInPod("", &pod.Spec.Containers[0], pod); err != nil {
t.Errorf("unexpected error: %v", err)
}
// Assert the container has been stopped.
if err := fakeDocker.AssertStopped([]string{containerToKill.ID}); err != nil {
t.Errorf("container was not stopped correctly: %v", err)
}
verifyCalls(t, fakeDocker, []string{"list", "inspect_container", "create_exec", "start_exec", "stop"})
verifyCalls(t, fakeDocker, []string{"list", "create_exec", "start_exec", "stop"})
if !reflect.DeepEqual(expectedCmd, fakeDocker.execCmd) {
t.Errorf("expected: %v, got %v", expectedCmd, fakeDocker.execCmd)
}
@ -534,7 +534,7 @@ func TestKillContainerInPodWithError(t *testing.T) {
manager.readinessManager.SetReadiness(c.ID, true)
}
if err := manager.KillContainerInPod(pod.Spec.Containers[0], pod); err == nil {
if err := manager.KillContainerInPod("", &pod.Spec.Containers[0], pod); err == nil {
t.Errorf("expected error, found nil")
}
@ -1030,7 +1030,7 @@ func TestSyncPodDeletesWithNoPodInfraContainer(t *testing.T) {
verifyCalls(t, fakeDocker, []string{
// Kill the container since pod infra container is not running.
"inspect_container", "stop",
"stop",
// Create pod infra container.
"create", "start", "inspect_container",
// Create container.
@ -1105,7 +1105,7 @@ func TestSyncPodDeletesDuplicate(t *testing.T) {
// Check the pod infra container.
"inspect_container",
// Kill the duplicated container.
"inspect_container", "stop",
"stop",
})
// Expect one of the duplicates to be killed.
if len(fakeDocker.Stopped) != 1 || (fakeDocker.Stopped[0] != "1234" && fakeDocker.Stopped[0] != "4567") {
@ -1159,7 +1159,7 @@ func TestSyncPodBadHash(t *testing.T) {
// Check the pod infra container.
"inspect_container",
// Kill and restart the bad hash container.
"inspect_container", "stop", "create", "start",
"stop", "create", "start",
})
if err := fakeDocker.AssertStopped([]string{"1234"}); err != nil {
@ -1217,7 +1217,7 @@ func TestSyncPodsUnhealthy(t *testing.T) {
// Check the pod infra container.
"inspect_container",
// Kill the unhealthy container.
"inspect_container", "stop",
"stop",
// Restart the unhealthy container.
"create", "start",
})
@ -1426,7 +1426,7 @@ func TestSyncPodWithRestartPolicy(t *testing.T) {
// Check the pod infra container.
"inspect_container",
// Stop the last pod infra container.
"inspect_container", "stop",
"stop",
},
[]string{},
[]string{"9876"},

View File

@ -1054,8 +1054,8 @@ func parseResolvConf(reader io.Reader) (nameservers []string, searches []string,
}
// Kill all running containers in a pod (includes the pod infra container).
func (kl *Kubelet) killPod(pod kubecontainer.Pod) error {
return kl.containerRuntime.KillPod(pod)
func (kl *Kubelet) killPod(pod *api.Pod, runningPod kubecontainer.Pod) error {
return kl.containerRuntime.KillPod(pod, runningPod)
}
type empty struct{}
@ -1101,7 +1101,7 @@ func (kl *Kubelet) syncPod(pod *api.Pod, mirrorPod *api.Pod, runningPod kubecont
// Kill pods we can't run.
err := canRunPod(pod)
if err != nil {
kl.killPod(runningPod)
kl.killPod(pod, runningPod)
return err
}
@ -1416,7 +1416,7 @@ func (kl *Kubelet) killUnwantedPods(desiredPods map[types.UID]empty,
}()
glog.V(1).Infof("Killing unwanted pod %q", pod.Name)
// Stop the containers.
err = kl.killPod(*pod)
err = kl.killPod(nil, *pod)
if err != nil {
glog.Errorf("Failed killing the pod %q: %v", pod.Name, err)
return

View File

@ -1153,7 +1153,7 @@ func TestSyncPodEventHandlerFails(t *testing.T) {
// Create the container.
"create", "start",
// Kill the container since event handler fails.
"inspect_container", "stop",
"stop",
// Get pod status.
"list", "inspect_container", "inspect_container",
// Get pods for deleting orphaned volumes.

View File

@ -671,11 +671,11 @@ func (r *runtime) GetPods(all bool) ([]*kubecontainer.Pod, error) {
}
// KillPod invokes 'systemctl kill' to kill the unit that runs the pod.
func (r *runtime) KillPod(pod kubecontainer.Pod) error {
glog.V(4).Infof("Rkt is killing pod: name %q.", pod.Name)
func (r *runtime) KillPod(pod *api.Pod, runningPod kubecontainer.Pod) error {
glog.V(4).Infof("Rkt is killing pod: name %q.", runningPod.Name)
// TODO(yifan): More graceful stop. Replace with StopUnit and wait for a timeout.
r.systemd.KillUnit(makePodServiceFileName(pod.ID), int32(syscall.SIGKILL))
r.systemd.KillUnit(makePodServiceFileName(runningPod.ID), int32(syscall.SIGKILL))
return r.systemd.Reload()
}
@ -880,7 +880,7 @@ func (r *runtime) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, podStatus
if restartPod {
// TODO(yifan): Handle network plugin.
if err := r.KillPod(runningPod); err != nil {
if err := r.KillPod(pod, runningPod); err != nil {
return err
}
if err := r.RunPod(pod); err != nil {

View File

@ -22,6 +22,7 @@ import (
"sync"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api/errors"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
kubecontainer "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/container"
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
@ -135,13 +136,24 @@ func (s *statusManager) syncBatch() error {
}
// TODO: make me easier to express from client code
statusPod, err = s.kubeClient.Pods(statusPod.Namespace).Get(statusPod.Name)
if errors.IsNotFound(err) {
glog.V(3).Infof("Pod %q was deleted on the server", pod.Name)
return nil
}
if err == nil {
statusPod.Status = status
_, err = s.kubeClient.Pods(pod.Namespace).UpdateStatus(statusPod)
// TODO: handle conflict as a retry, make that easier too.
statusPod, err = s.kubeClient.Pods(pod.Namespace).UpdateStatus(statusPod)
if err == nil {
glog.V(3).Infof("Status for pod %q updated successfully", pod.Name)
return nil
if statusPod.DeletionTimestamp == nil || !allTerminated(statusPod.Status.ContainerStatuses) {
return nil
}
if err := s.kubeClient.Pods(statusPod.Namespace).Delete(statusPod.Name, api.NewDeleteOptions(0)); err == nil {
glog.V(3).Infof("Pod %q fully terminated and removed from etcd", statusPod.Name)
return nil
}
}
}
@ -151,3 +163,14 @@ func (s *statusManager) syncBatch() error {
s.DeletePodStatus(podFullName)
return fmt.Errorf("error updating status for pod %q: %v", pod.Name, err)
}
// allTerminated returns true if every status is terminated, or the status list
// is empty.
func allTerminated(statuses []api.ContainerStatus) bool {
for _, status := range statuses {
if status.State.Terminated == nil {
return false
}
}
return true
}

View File

@ -722,7 +722,7 @@ func TestDelete(t *testing.T) {
// If the controller is still around after trying to delete either the delete
// failed, or we're deleting it gracefully.
if fakeClient.Data[key].R.Node != nil {
return true
return fakeClient.Data[key].R.Node.TTL != 0
}
return false
}

View File

@ -37,6 +37,7 @@ var testTTL uint64 = 60
func NewTestEventEtcdRegistry(t *testing.T) (*tools.FakeEtcdClient, generic.Registry) {
f := tools.NewFakeEtcdClient(t)
f.HideExpires = true
f.TestIndex = true
h := tools.NewEtcdHelper(f, testapi.Codec(), etcdtest.PathPrefix())

View File

@ -324,7 +324,7 @@ func TestEtcdUpdateStatus(t *testing.T) {
key, _ := storage.KeyFunc(ctx, "foo")
key = etcdtest.AddPrefix(key)
pvStart := validNewPersistentVolume("foo")
fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, pvStart), 1)
fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, pvStart), 0)
pvIn := &api.PersistentVolume{
ObjectMeta: api.ObjectMeta{

View File

@ -325,7 +325,7 @@ func TestEtcdUpdateStatus(t *testing.T) {
key, _ := storage.KeyFunc(ctx, "foo")
key = etcdtest.AddPrefix(key)
pvcStart := validNewPersistentVolumeClaim("foo", api.NamespaceDefault)
fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, pvcStart), 1)
fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, pvcStart), 0)
pvc := &api.PersistentVolumeClaim{
ObjectMeta: api.ObjectMeta{

View File

@ -54,6 +54,7 @@ func newStorage(t *testing.T) (*REST, *BindingREST, *StatusREST, *tools.FakeEtcd
}
func validNewPod() *api.Pod {
grace := int64(30)
return &api.Pod{
ObjectMeta: api.ObjectMeta{
Name: "foo",
@ -62,6 +63,8 @@ func validNewPod() *api.Pod {
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
Containers: []api.Container{
{
Name: "foo",
@ -132,9 +135,9 @@ func TestDelete(t *testing.T) {
if fakeEtcdClient.Data[key].R.Node == nil {
return false
}
return fakeEtcdClient.Data[key].R.Node.TTL == 30
return fakeEtcdClient.Data[key].R.Node.TTL != 0
}
test.TestDelete(createFn, gracefulSetFn)
test.TestDeleteGraceful(createFn, 30, gracefulSetFn)
}
func expectPod(t *testing.T, out runtime.Object) (*api.Pod, bool) {
@ -1118,6 +1121,7 @@ func TestEtcdUpdateScheduled(t *testing.T) {
},
}), 1)
grace := int64(30)
podIn := api.Pod{
ObjectMeta: api.ObjectMeta{
Name: "foo",
@ -1139,6 +1143,8 @@ func TestEtcdUpdateScheduled(t *testing.T) {
},
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
}
_, _, err := registry.Update(ctx, &podIn)
@ -1179,7 +1185,7 @@ func TestEtcdUpdateStatus(t *testing.T) {
},
},
}
fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, &podStart), 1)
fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, &podStart), 0)
podIn := api.Pod{
ObjectMeta: api.ObjectMeta{
@ -1208,6 +1214,8 @@ func TestEtcdUpdateStatus(t *testing.T) {
expected := podStart
expected.ResourceVersion = "2"
grace := int64(30)
expected.Spec.TerminationGracePeriodSeconds = &grace
expected.Spec.RestartPolicy = api.RestartPolicyAlways
expected.Spec.DNSPolicy = api.DNSClusterFirst
expected.Spec.Containers[0].ImagePullPolicy = api.PullIfNotPresent

View File

@ -81,9 +81,26 @@ func (podStrategy) ValidateUpdate(ctx api.Context, obj, old runtime.Object) fiel
return append(errorList, validation.ValidatePodUpdate(obj.(*api.Pod), old.(*api.Pod))...)
}
// CheckGracefulDelete allows a pod to be gracefully deleted.
// CheckGracefulDelete allows a pod to be gracefully deleted. It updates the DeleteOptions to
// reflect the desired grace value.
func (podStrategy) CheckGracefulDelete(obj runtime.Object, options *api.DeleteOptions) bool {
return false
if options == nil {
return false
}
period := int64(0)
// user has specified a value
if options.GracePeriodSeconds != nil {
period = *options.GracePeriodSeconds
} else {
// use the default value if set, or deletes the pod immediately (0)
pod := obj.(*api.Pod)
if pod.Spec.TerminationGracePeriodSeconds != nil {
period = *pod.Spec.TerminationGracePeriodSeconds
}
}
// ensure the options and the pod are in sync
options.GracePeriodSeconds = &period
return true
}
type podStatusStrategy struct {
@ -96,6 +113,7 @@ func (podStatusStrategy) PrepareForUpdate(obj, old runtime.Object) {
newPod := obj.(*api.Pod)
oldPod := old.(*api.Pod)
newPod.Spec = oldPod.Spec
newPod.DeletionTimestamp = nil
}
func (podStatusStrategy) ValidateUpdate(ctx api.Context, obj, old runtime.Object) fielderrors.ValidationErrorList {

View File

@ -477,7 +477,7 @@ func TestEtcdUpdateStatus(t *testing.T) {
key, _ := registry.KeyFunc(ctx, "foo")
key = etcdtest.AddPrefix(key)
resourcequotaStart := validNewResourceQuota()
fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, resourcequotaStart), 1)
fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, resourcequotaStart), 0)
resourcequotaIn := &api.ResourceQuota{
ObjectMeta: api.ObjectMeta{

View File

@ -117,28 +117,32 @@ func TestExtractToList(t *testing.T) {
},
},
}
grace := int64(30)
expect := api.PodList{
ListMeta: api.ListMeta{ResourceVersion: "10"},
Items: []api.Pod{
{
ObjectMeta: api.ObjectMeta{Name: "bar", ResourceVersion: "2"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
{
ObjectMeta: api.ObjectMeta{Name: "baz", ResourceVersion: "3"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
{
ObjectMeta: api.ObjectMeta{Name: "foo", ResourceVersion: "1"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
},
@ -200,6 +204,7 @@ func TestExtractToListAcrossDirectories(t *testing.T) {
},
},
}
grace := int64(30)
expect := api.PodList{
ListMeta: api.ListMeta{ResourceVersion: "10"},
Items: []api.Pod{
@ -207,22 +212,25 @@ func TestExtractToListAcrossDirectories(t *testing.T) {
{
ObjectMeta: api.ObjectMeta{Name: "baz", ResourceVersion: "3"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
{
ObjectMeta: api.ObjectMeta{Name: "foo", ResourceVersion: "1"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
{
ObjectMeta: api.ObjectMeta{Name: "bar", ResourceVersion: "2"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
},
@ -272,28 +280,32 @@ func TestExtractToListExcludesDirectories(t *testing.T) {
},
},
}
grace := int64(30)
expect := api.PodList{
ListMeta: api.ListMeta{ResourceVersion: "10"},
Items: []api.Pod{
{
ObjectMeta: api.ObjectMeta{Name: "bar", ResourceVersion: "2"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
{
ObjectMeta: api.ObjectMeta{Name: "baz", ResourceVersion: "3"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
{
ObjectMeta: api.ObjectMeta{Name: "foo", ResourceVersion: "1"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
},
@ -313,11 +325,13 @@ func TestExtractObj(t *testing.T) {
fakeClient := NewFakeEtcdClient(t)
helper := NewEtcdHelper(fakeClient, testapi.Codec(), etcdtest.PathPrefix())
key := etcdtest.AddPrefix("/some/key")
grace := int64(30)
expect := api.Pod{
ObjectMeta: api.ObjectMeta{Name: "foo"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
}
fakeClient.Set(key, runtime.EncodeOrDie(testapi.Codec(), &expect), 0)

View File

@ -21,6 +21,7 @@ import (
"fmt"
"sort"
"sync"
"time"
"github.com/coreos/go-etcd/etcd"
)
@ -53,6 +54,8 @@ type FakeEtcdClient struct {
TestIndex bool
ChangeIndex uint64
LastSetTTL uint64
// Will avoid setting the expires header on objects to make comparison easier
HideExpires bool
Machines []string
// Will become valid after Watch is called; tester may write to it. Tester may
@ -184,6 +187,11 @@ func (f *FakeEtcdClient) setLocked(key, value string, ttl uint64) (*etcd.Respons
prevResult := f.Data[key]
createdIndex := prevResult.R.Node.CreatedIndex
f.t.Logf("updating %v, index %v -> %v (ttl: %d)", key, createdIndex, i, ttl)
var expires *time.Time
if !f.HideExpires && ttl > 0 {
now := time.Now()
expires = &now
}
result := EtcdResponseWithError{
R: &etcd.Response{
Node: &etcd.Node{
@ -191,6 +199,7 @@ func (f *FakeEtcdClient) setLocked(key, value string, ttl uint64) (*etcd.Respons
CreatedIndex: createdIndex,
ModifiedIndex: i,
TTL: int64(ttl),
Expiration: expires,
},
},
}

View File

@ -132,11 +132,13 @@ func PriorityTwo(pod *api.Pod, podLister algorithm.PodLister, minionLister algor
}
func TestDefaultErrorFunc(t *testing.T) {
grace := int64(30)
testPod := &api.Pod{
ObjectMeta: api.ObjectMeta{Name: "foo", Namespace: "bar"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
}
handler := util.FakeHandler{