From 72ee028cab61493d2180782114a1e54281b5b4d1 Mon Sep 17 00:00:00 2001 From: Clayton Coleman Date: Thu, 21 May 2015 18:36:44 -0400 Subject: [PATCH] Gracefully delete pods from the Kubelet This commit wires together the graceful delete option for pods on the Kubelet. When a pod is deleted on the API server, a grace period is calculated that is based on the Pod.Spec.TerminationGracePeriodInSeconds, the user's provided grace period, or a default. The grace period can only shrink once set. The value provided by the user (or the default) is set onto metadata as DeletionGracePeriod. When the Kubelet sees a pod with DeletionTimestamp set, it uses the value of ObjectMeta.GracePeriodSeconds as the grace period sent to Docker. When updating status, if the pod has DeletionTimestamp set and all containers are terminated, the Kubelet will update the status one last time and then invoke Delete(pod, grace: 0) to clean up the pod immediately. --- pkg/api/deep_copy_generated.go | 6 + pkg/api/rest/create.go | 2 + pkg/api/rest/delete.go | 27 +- pkg/api/rest/resttest/resttest.go | 93 ++++++- pkg/api/serialization_test.go | 3 + pkg/api/testing/fuzzer.go | 9 + pkg/api/types.go | 4 + pkg/api/v1/conversion_generated.go | 12 + pkg/api/v1/deep_copy_generated.go | 6 + pkg/api/v1/defaults.go | 4 + pkg/api/v1/types.go | 8 +- pkg/api/v1beta3/conversion_generated.go | 12 + pkg/api/v1beta3/deep_copy_generated.go | 6 + pkg/api/v1beta3/defaults.go | 4 + pkg/api/v1beta3/types.go | 8 +- pkg/api/validation/validation.go | 10 + pkg/kubectl/cmd/get_test.go | 27 +- pkg/kubectl/cmd/util/helpers_test.go | 11 +- pkg/kubectl/resource/builder_test.go | 11 +- pkg/kubectl/resource/helper_test.go | 12 +- pkg/kubectl/rolling_updater_test.go | 6 +- pkg/kubelet/config/common_test.go | 12 +- pkg/kubelet/config/config.go | 23 +- pkg/kubelet/config/file_test.go | 15 +- pkg/kubelet/config/http_test.go | 25 +- pkg/kubelet/container/fake_runtime.go | 6 +- pkg/kubelet/container/runtime.go | 4 +- pkg/kubelet/dockertools/manager.go | 235 ++++++++++++------ pkg/kubelet/dockertools/manager_test.go | 18 +- pkg/kubelet/kubelet.go | 8 +- pkg/kubelet/kubelet_test.go | 2 +- pkg/kubelet/rkt/rkt.go | 8 +- pkg/kubelet/status_manager.go | 27 +- pkg/registry/controller/etcd/etcd_test.go | 2 +- pkg/registry/event/registry_test.go | 1 + .../persistentvolume/etcd/etcd_test.go | 2 +- .../persistentvolumeclaim/etcd/etcd_test.go | 2 +- pkg/registry/pod/etcd/etcd_test.go | 14 +- pkg/registry/pod/rest.go | 22 +- pkg/registry/resourcequota/etcd/etcd_test.go | 2 +- pkg/tools/etcd_helper_test.go | 54 ++-- pkg/tools/fake_etcd_client.go | 9 + plugin/pkg/scheduler/factory/factory_test.go | 6 +- 43 files changed, 598 insertions(+), 180 deletions(-) diff --git a/pkg/api/deep_copy_generated.go b/pkg/api/deep_copy_generated.go index cc3202a19d3..0a28c7f9caa 100644 --- a/pkg/api/deep_copy_generated.go +++ b/pkg/api/deep_copy_generated.go @@ -1002,6 +1002,12 @@ func deepCopy_api_ObjectMeta(in ObjectMeta, out *ObjectMeta, c *conversion.Clone } else { out.DeletionTimestamp = nil } + if in.DeletionGracePeriodSeconds != nil { + out.DeletionGracePeriodSeconds = new(int64) + *out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds + } else { + out.DeletionGracePeriodSeconds = nil + } if in.Labels != nil { out.Labels = make(map[string]string) for key, val := range in.Labels { diff --git a/pkg/api/rest/create.go b/pkg/api/rest/create.go index 6b0951f2f40..82fd569ec84 100644 --- a/pkg/api/rest/create.go +++ b/pkg/api/rest/create.go @@ -59,6 +59,8 @@ func BeforeCreate(strategy RESTCreateStrategy, ctx api.Context, obj runtime.Obje } else { objectMeta.Namespace = api.NamespaceNone } + objectMeta.DeletionTimestamp = nil + objectMeta.DeletionGracePeriodSeconds = nil strategy.PrepareForCreate(obj) api.FillObjectMetaSystemFields(ctx, objectMeta) api.GenerateName(strategy, objectMeta) diff --git a/pkg/api/rest/delete.go b/pkg/api/rest/delete.go index 950fada6c74..369e6a0e8a2 100644 --- a/pkg/api/rest/delete.go +++ b/pkg/api/rest/delete.go @@ -40,12 +40,37 @@ func BeforeDelete(strategy RESTDeleteStrategy, ctx api.Context, obj runtime.Obje if strategy == nil { return false, false, nil } - _, _, kerr := objectMetaAndKind(strategy, obj) + objectMeta, _, kerr := objectMetaAndKind(strategy, obj) if kerr != nil { return false, false, kerr } + + // if the object is already being deleted + if objectMeta.DeletionTimestamp != nil { + // if we are already being deleted, we may only shorten the deletion grace period + // this means the object was gracefully deleted previously but deletionGracePeriodSeconds was not set, + // so we force deletion immediately + if objectMeta.DeletionGracePeriodSeconds == nil { + return false, false, nil + } + // only a shorter grace period may be provided by a user + if options.GracePeriodSeconds != nil { + period := int64(*options.GracePeriodSeconds) + if period > *objectMeta.DeletionGracePeriodSeconds { + return false, true, nil + } + objectMeta.DeletionGracePeriodSeconds = &period + options.GracePeriodSeconds = &period + return true, false, nil + } + // graceful deletion is pending, do nothing + options.GracePeriodSeconds = objectMeta.DeletionGracePeriodSeconds + return false, true, nil + } + if !strategy.CheckGracefulDelete(obj, options) { return false, false, nil } + objectMeta.DeletionGracePeriodSeconds = options.GracePeriodSeconds return true, false, nil } diff --git a/pkg/api/rest/resttest/resttest.go b/pkg/api/rest/resttest/resttest.go index 269180d8090..8b6e917b1dc 100644 --- a/pkg/api/rest/resttest/resttest.go +++ b/pkg/api/rest/resttest/resttest.go @@ -329,7 +329,9 @@ func (t *Tester) TestDeleteNonExist(createFn func() runtime.Object) { func (t *Tester) TestDeleteGraceful(createFn func() runtime.Object, expectedGrace int64, wasGracefulFn func() bool) { t.TestDeleteGracefulHasDefault(createFn(), expectedGrace, wasGracefulFn) + t.TestDeleteGracefulWithValue(createFn(), expectedGrace, wasGracefulFn) t.TestDeleteGracefulUsesZeroOnNil(createFn(), 0) + t.TestDeleteGracefulExtend(createFn(), expectedGrace, wasGracefulFn) } func (t *Tester) TestDeleteNoGraceful(createFn func() runtime.Object, wasGracefulFn func() bool) { @@ -362,12 +364,99 @@ func (t *Tester) TestDeleteGracefulHasDefault(existing runtime.Object, expectedG if err != nil { t.Errorf("unexpected error: %v", err) } - if _, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name); err != nil { + if !wasGracefulFn() { + t.Errorf("did not gracefully delete resource") + return + } + object, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name) + if err != nil { t.Errorf("unexpected error, object should exist: %v", err) + return + } + objectMeta, err = api.ObjectMetaFor(object) + if err != nil { + t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, object) + } + if objectMeta.DeletionTimestamp == nil { + t.Errorf("did not set deletion timestamp") + } + if objectMeta.DeletionGracePeriodSeconds == nil { + t.Fatalf("did not set deletion grace period seconds") + } + if *objectMeta.DeletionGracePeriodSeconds != expectedGrace { + t.Errorf("actual grace period does not match expected: %d", *objectMeta.DeletionGracePeriodSeconds) + } +} + +func (t *Tester) TestDeleteGracefulWithValue(existing runtime.Object, expectedGrace int64, wasGracefulFn func() bool) { + objectMeta, err := api.ObjectMetaFor(existing) + if err != nil { + t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, existing) + } + + ctx := api.WithNamespace(t.TestContext(), objectMeta.Namespace) + _, err = t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, api.NewDeleteOptions(expectedGrace+2)) + if err != nil { + t.Errorf("unexpected error: %v", err) } if !wasGracefulFn() { t.Errorf("did not gracefully delete resource") } + object, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name) + if err != nil { + t.Errorf("unexpected error, object should exist: %v", err) + } + objectMeta, err = api.ObjectMetaFor(object) + if err != nil { + t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, object) + } + if objectMeta.DeletionTimestamp == nil { + t.Errorf("did not set deletion timestamp") + } + if objectMeta.DeletionGracePeriodSeconds == nil { + t.Fatalf("did not set deletion grace period seconds") + } + if *objectMeta.DeletionGracePeriodSeconds != expectedGrace+2 { + t.Errorf("actual grace period does not match expected: %d", *objectMeta.DeletionGracePeriodSeconds) + } +} + +func (t *Tester) TestDeleteGracefulExtend(existing runtime.Object, expectedGrace int64, wasGracefulFn func() bool) { + objectMeta, err := api.ObjectMetaFor(existing) + if err != nil { + t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, existing) + } + + ctx := api.WithNamespace(t.TestContext(), objectMeta.Namespace) + _, err = t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, api.NewDeleteOptions(expectedGrace)) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if !wasGracefulFn() { + t.Errorf("did not gracefully delete resource") + } + // second delete duration is ignored + _, err = t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, api.NewDeleteOptions(expectedGrace+2)) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + object, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name) + if err != nil { + t.Errorf("unexpected error, object should exist: %v", err) + } + objectMeta, err = api.ObjectMetaFor(object) + if err != nil { + t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, object) + } + if objectMeta.DeletionTimestamp == nil { + t.Errorf("did not set deletion timestamp") + } + if objectMeta.DeletionGracePeriodSeconds == nil { + t.Fatalf("did not set deletion grace period seconds") + } + if *objectMeta.DeletionGracePeriodSeconds != expectedGrace { + t.Errorf("actual grace period does not match expected: %d", *objectMeta.DeletionGracePeriodSeconds) + } } func (t *Tester) TestDeleteGracefulUsesZeroOnNil(existing runtime.Object, expectedGrace int64) { @@ -382,6 +471,6 @@ func (t *Tester) TestDeleteGracefulUsesZeroOnNil(existing runtime.Object, expect t.Errorf("unexpected error: %v", err) } if _, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name); !errors.IsNotFound(err) { - t.Errorf("unexpected error, object should exist: %v", err) + t.Errorf("unexpected error, object should not exist: %v", err) } } diff --git a/pkg/api/serialization_test.go b/pkg/api/serialization_test.go index 08d1e96065f..74b8d7fe469 100644 --- a/pkg/api/serialization_test.go +++ b/pkg/api/serialization_test.go @@ -155,6 +155,7 @@ func TestRoundTripTypes(t *testing.T) { } func TestEncode_Ptr(t *testing.T) { + grace := int64(30) pod := &api.Pod{ ObjectMeta: api.ObjectMeta{ Labels: map[string]string{"name": "foo"}, @@ -162,6 +163,8 @@ func TestEncode_Ptr(t *testing.T) { Spec: api.PodSpec{ RestartPolicy: api.RestartPolicyAlways, DNSPolicy: api.DNSClusterFirst, + + TerminationGracePeriodSeconds: &grace, }, } obj := runtime.Object(pod) diff --git a/pkg/api/testing/fuzzer.go b/pkg/api/testing/fuzzer.go index f60d68c4cbf..f0480f4a455 100644 --- a/pkg/api/testing/fuzzer.go +++ b/pkg/api/testing/fuzzer.go @@ -88,6 +88,15 @@ func FuzzerFor(t *testing.T, version string, src rand.Source) *fuzz.Fuzzer { j.LabelSelector, _ = labels.Parse("a=b") j.FieldSelector, _ = fields.ParseSelector("a=b") }, + func(j *api.PodSpec, c fuzz.Continue) { + c.FuzzNoCustom(j) + // has a default value + ttl := int64(30) + if c.RandBool() { + ttl = int64(c.Uint32()) + } + j.TerminationGracePeriodSeconds = &ttl + }, func(j *api.PodPhase, c fuzz.Continue) { statuses := []api.PodPhase{api.PodPending, api.PodRunning, api.PodFailed, api.PodUnknown} *j = statuses[c.Rand.Intn(len(statuses))] diff --git a/pkg/api/types.go b/pkg/api/types.go index a016ff25a94..39985b1d289 100644 --- a/pkg/api/types.go +++ b/pkg/api/types.go @@ -133,6 +133,10 @@ type ObjectMeta struct { // will send a hard termination signal to the container. DeletionTimestamp *util.Time `json:"deletionTimestamp,omitempty"` + // DeletionGracePeriodSeconds records the graceful deletion value set when graceful deletion + // was requested. Represents the most recent grace period, and may only be shortened once set. + DeletionGracePeriodSeconds *int64 `json:"deletionGracePeriodSeconds,omitempty"` + // Labels are key value pairs that may be used to scope and select individual resources. // Label keys are of the form: // label-key ::= prefixed-name | name diff --git a/pkg/api/v1/conversion_generated.go b/pkg/api/v1/conversion_generated.go index 2ada8393a58..f7864faee68 100644 --- a/pkg/api/v1/conversion_generated.go +++ b/pkg/api/v1/conversion_generated.go @@ -1087,6 +1087,12 @@ func convert_api_ObjectMeta_To_v1_ObjectMeta(in *api.ObjectMeta, out *ObjectMeta } else { out.DeletionTimestamp = nil } + if in.DeletionGracePeriodSeconds != nil { + out.DeletionGracePeriodSeconds = new(int64) + *out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds + } else { + out.DeletionGracePeriodSeconds = nil + } if in.Labels != nil { out.Labels = make(map[string]string) for key, val := range in.Labels { @@ -3362,6 +3368,12 @@ func convert_v1_ObjectMeta_To_api_ObjectMeta(in *ObjectMeta, out *api.ObjectMeta } else { out.DeletionTimestamp = nil } + if in.DeletionGracePeriodSeconds != nil { + out.DeletionGracePeriodSeconds = new(int64) + *out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds + } else { + out.DeletionGracePeriodSeconds = nil + } if in.Labels != nil { out.Labels = make(map[string]string) for key, val := range in.Labels { diff --git a/pkg/api/v1/deep_copy_generated.go b/pkg/api/v1/deep_copy_generated.go index d724895bf8c..c45de5195c4 100644 --- a/pkg/api/v1/deep_copy_generated.go +++ b/pkg/api/v1/deep_copy_generated.go @@ -933,6 +933,12 @@ func deepCopy_v1_ObjectMeta(in ObjectMeta, out *ObjectMeta, c *conversion.Cloner } else { out.DeletionTimestamp = nil } + if in.DeletionGracePeriodSeconds != nil { + out.DeletionGracePeriodSeconds = new(int64) + *out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds + } else { + out.DeletionGracePeriodSeconds = nil + } if in.Labels != nil { out.Labels = make(map[string]string) for key, val := range in.Labels { diff --git a/pkg/api/v1/defaults.go b/pkg/api/v1/defaults.go index cfc8a1fdec9..f2ac2ec43e1 100644 --- a/pkg/api/v1/defaults.go +++ b/pkg/api/v1/defaults.go @@ -98,6 +98,10 @@ func addDefaultingFuncs() { if obj.HostNetwork { defaultHostNetworkPorts(&obj.Containers) } + if obj.TerminationGracePeriodSeconds == nil { + period := int64(DefaultTerminationGracePeriodSeconds) + obj.TerminationGracePeriodSeconds = &period + } }, func(obj *Probe) { if obj.TimeoutSeconds == 0 { diff --git a/pkg/api/v1/types.go b/pkg/api/v1/types.go index 63b17d0a8c4..4eb86b2168c 100644 --- a/pkg/api/v1/types.go +++ b/pkg/api/v1/types.go @@ -131,6 +131,10 @@ type ObjectMeta struct { // will send a hard termination signal to the container. DeletionTimestamp *util.Time `json:"deletionTimestamp,omitempty" description:"RFC 3339 date and time at which the object will be deleted; populated by the system when a graceful deletion is requested, read-only; if not set, graceful deletion of the object has not been requested"` + // DeletionGracePeriodSeconds records the graceful deletion value set when graceful deletion + // was requested. Represents the most recent grace period, and may only be shortened once set. + DeletionGracePeriodSeconds *int64 `json:"deletionGracePeriodSeconds,omitempty" description:"number of seconds allowed for this object to gracefully terminate before it will be removed from the system; only set when deletionTimestamp is also set, read-only; may only be shortened"` + // Labels are key value pairs that may be used to scope and select individual resources. // TODO: replace map[string]string with labels.LabelSet type Labels map[string]string `json:"labels,omitempty" description:"map of string keys and values that can be used to organize and categorize objects; may match selectors of replication controllers and services"` @@ -838,6 +842,8 @@ const ( // DNSDefault indicates that the pod should use the default (as // determined by kubelet) DNS settings. DNSDefault DNSPolicy = "Default" + + DefaultTerminationGracePeriodSeconds = 30 ) // PodSpec is a description of a pod @@ -852,7 +858,7 @@ type PodSpec struct { // The grace period is the duration in seconds after the processes running in the pod are sent // a termination signal and the time when the processes are forcibly halted with a kill signal. // Set this value longer than the expected cleanup time for your process. - TerminationGracePeriodSeconds *int64 `json:"terminationGracePeriodSeconds,omitempty" description:"optional duration in seconds the pod needs to terminate gracefully; may be decreased in delete request; value must be non-negative integer; the value zero indicates delete immediately; if this value is not set, the default grace period will be used instead; the grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal; set this value longer than the expected cleanup time for your process"` + TerminationGracePeriodSeconds *int64 `json:"terminationGracePeriodSeconds,omitempty" description:"optional duration in seconds the pod needs to terminate gracefully; may be decreased in delete request; value must be non-negative integer; the value zero indicates delete immediately; if this value is not set, the default grace period will be used instead; the grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal; set this value longer than the expected cleanup time for your process; defaults to 30 seconds"` ActiveDeadlineSeconds *int64 `json:"activeDeadlineSeconds,omitempty" description:"optional duration in seconds the pod may be active on the node relative to StartTime before the system will actively try to mark it failed and kill associated containers; value must be a positive integer` // Optional: Set DNS policy. Defaults to "ClusterFirst" DNSPolicy DNSPolicy `json:"dnsPolicy,omitempty" description:"DNS policy for containers within the pod; one of 'ClusterFirst' or 'Default'"` diff --git a/pkg/api/v1beta3/conversion_generated.go b/pkg/api/v1beta3/conversion_generated.go index 4799a52ac35..10591677945 100644 --- a/pkg/api/v1beta3/conversion_generated.go +++ b/pkg/api/v1beta3/conversion_generated.go @@ -945,6 +945,12 @@ func convert_api_ObjectMeta_To_v1beta3_ObjectMeta(in *api.ObjectMeta, out *Objec } else { out.DeletionTimestamp = nil } + if in.DeletionGracePeriodSeconds != nil { + out.DeletionGracePeriodSeconds = new(int64) + *out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds + } else { + out.DeletionGracePeriodSeconds = nil + } if in.Labels != nil { out.Labels = make(map[string]string) for key, val := range in.Labels { @@ -3034,6 +3040,12 @@ func convert_v1beta3_ObjectMeta_To_api_ObjectMeta(in *ObjectMeta, out *api.Objec } else { out.DeletionTimestamp = nil } + if in.DeletionGracePeriodSeconds != nil { + out.DeletionGracePeriodSeconds = new(int64) + *out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds + } else { + out.DeletionGracePeriodSeconds = nil + } if in.Labels != nil { out.Labels = make(map[string]string) for key, val := range in.Labels { diff --git a/pkg/api/v1beta3/deep_copy_generated.go b/pkg/api/v1beta3/deep_copy_generated.go index 19f5f118c38..194e429370b 100644 --- a/pkg/api/v1beta3/deep_copy_generated.go +++ b/pkg/api/v1beta3/deep_copy_generated.go @@ -937,6 +937,12 @@ func deepCopy_v1beta3_ObjectMeta(in ObjectMeta, out *ObjectMeta, c *conversion.C } else { out.DeletionTimestamp = nil } + if in.DeletionGracePeriodSeconds != nil { + out.DeletionGracePeriodSeconds = new(int64) + *out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds + } else { + out.DeletionGracePeriodSeconds = nil + } if in.Labels != nil { out.Labels = make(map[string]string) for key, val := range in.Labels { diff --git a/pkg/api/v1beta3/defaults.go b/pkg/api/v1beta3/defaults.go index d542e5f3047..978073ab86e 100644 --- a/pkg/api/v1beta3/defaults.go +++ b/pkg/api/v1beta3/defaults.go @@ -102,6 +102,10 @@ func addDefaultingFuncs() { if obj.HostNetwork { defaultHostNetworkPorts(&obj.Containers) } + if obj.TerminationGracePeriodSeconds == nil { + period := int64(DefaultTerminationGracePeriodSeconds) + obj.TerminationGracePeriodSeconds = &period + } }, func(obj *Probe) { if obj.TimeoutSeconds == 0 { diff --git a/pkg/api/v1beta3/types.go b/pkg/api/v1beta3/types.go index 559f5cc858f..a00389ef73f 100644 --- a/pkg/api/v1beta3/types.go +++ b/pkg/api/v1beta3/types.go @@ -131,6 +131,10 @@ type ObjectMeta struct { // will send a hard termination signal to the container. DeletionTimestamp *util.Time `json:"deletionTimestamp,omitempty" description:"RFC 3339 date and time at which the object will be deleted; populated by the system when a graceful deletion is requested, read-only; if not set, graceful deletion of the object has not been requested"` + // DeletionGracePeriodSeconds records the graceful deletion value set when graceful deletion + // was requested. Represents the most recent grace period, and may only be shortened once set. + DeletionGracePeriodSeconds *int64 `json:"deletionGracePeriodSeconds,omitempty" description:"number of seconds allowed for this object to gracefully terminate before it will be removed from the system; only set when deletionTimestamp is also set, read-only; may only be shortened"` + // Labels are key value pairs that may be used to scope and select individual resources. // TODO: replace map[string]string with labels.LabelSet type Labels map[string]string `json:"labels,omitempty" description:"map of string keys and values that can be used to organize and categorize objects; may match selectors of replication controllers and services"` @@ -842,6 +846,8 @@ const ( // DNSDefault indicates that the pod should use the default (as // determined by kubelet) DNS settings. DNSDefault DNSPolicy = "Default" + + DefaultTerminationGracePeriodSeconds = 30 ) // PodSpec is a description of a pod @@ -856,7 +862,7 @@ type PodSpec struct { // The grace period is the duration in seconds after the processes running in the pod are sent // a termination signal and the time when the processes are forcibly halted with a kill signal. // Set this value longer than the expected cleanup time for your process. - TerminationGracePeriodSeconds *int64 `json:"terminationGracePeriodSeconds,omitempty" description:"optional duration in seconds the pod needs to terminate gracefully; may be decreased in delete request; value must be non-negative integer; the value zero indicates delete immediately; if this value is not set, the default grace period will be used instead; the grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal; set this value longer than the expected cleanup time for your process"` + TerminationGracePeriodSeconds *int64 `json:"terminationGracePeriodSeconds,omitempty" description:"optional duration in seconds the pod needs to terminate gracefully; may be decreased in delete request; value must be non-negative integer; the value zero indicates delete immediately; if this value is not set, the default grace period will be used instead; the grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal; set this value longer than the expected cleanup time for your process; defaults to 30 seconds"` ActiveDeadlineSeconds *int64 `json:"activeDeadlineSeconds,omitempty" description:"optional duration in seconds the pod may be active on the node relative to StartTime before the system will actively try to mark it failed and kill associated containers; value must be a positive integer` // Optional: Set DNS policy. Defaults to "ClusterFirst" DNSPolicy DNSPolicy `json:"dnsPolicy,omitempty" description:"DNS policy for containers within the pod; one of 'ClusterFirst' or 'Default'"` diff --git a/pkg/api/validation/validation.go b/pkg/api/validation/validation.go index 10065db79d8..42bc7ad5b1b 100644 --- a/pkg/api/validation/validation.go +++ b/pkg/api/validation/validation.go @@ -252,6 +252,16 @@ func ValidateObjectMetaUpdate(old, meta *api.ObjectMeta) errs.ValidationErrorLis } else { meta.CreationTimestamp = old.CreationTimestamp } + // an object can never remove a deletion timestamp or clear/change grace period seconds + if !old.DeletionTimestamp.IsZero() { + meta.DeletionTimestamp = old.DeletionTimestamp + } + if old.DeletionGracePeriodSeconds != nil && meta.DeletionGracePeriodSeconds == nil { + meta.DeletionGracePeriodSeconds = old.DeletionGracePeriodSeconds + } + if meta.DeletionGracePeriodSeconds != nil && *meta.DeletionGracePeriodSeconds != *old.DeletionGracePeriodSeconds { + allErrs = append(allErrs, errs.NewFieldInvalid("deletionGracePeriodSeconds", meta.DeletionGracePeriodSeconds, "field is immutable; may only be changed via deletion")) + } // Reject updates that don't specify a resource version if meta.ResourceVersion == "" { diff --git a/pkg/kubectl/cmd/get_test.go b/pkg/kubectl/cmd/get_test.go index d68b4d2a095..7f6c0288e77 100644 --- a/pkg/kubectl/cmd/get_test.go +++ b/pkg/kubectl/cmd/get_test.go @@ -38,6 +38,7 @@ import ( ) func testData() (*api.PodList, *api.ServiceList, *api.ReplicationControllerList) { + grace := int64(30) pods := &api.PodList{ ListMeta: api.ListMeta{ ResourceVersion: "15", @@ -46,15 +47,17 @@ func testData() (*api.PodList, *api.ServiceList, *api.ReplicationControllerList) { ObjectMeta: api.ObjectMeta{Name: "foo", Namespace: "test", ResourceVersion: "10"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, { ObjectMeta: api.ObjectMeta{Name: "bar", Namespace: "test", ResourceVersion: "11"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, }, @@ -503,6 +506,7 @@ func TestGetMultipleTypeObjectsWithDirectReference(t *testing.T) { } } func watchTestData() ([]api.Pod, []watch.Event) { + grace := int64(30) pods := []api.Pod{ { ObjectMeta: api.ObjectMeta{ @@ -511,8 +515,9 @@ func watchTestData() ([]api.Pod, []watch.Event) { ResourceVersion: "10", }, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, } @@ -526,8 +531,9 @@ func watchTestData() ([]api.Pod, []watch.Event) { ResourceVersion: "11", }, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, }, @@ -540,8 +546,9 @@ func watchTestData() ([]api.Pod, []watch.Event) { ResourceVersion: "12", }, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, }, diff --git a/pkg/kubectl/cmd/util/helpers_test.go b/pkg/kubectl/cmd/util/helpers_test.go index 232eefea14a..61c3a09d524 100644 --- a/pkg/kubectl/cmd/util/helpers_test.go +++ b/pkg/kubectl/cmd/util/helpers_test.go @@ -29,6 +29,7 @@ import ( ) func TestMerge(t *testing.T) { + grace := int64(30) tests := []struct { obj runtime.Object fragment string @@ -49,8 +50,9 @@ func TestMerge(t *testing.T) { Name: "foo", }, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, }, @@ -117,8 +119,9 @@ func TestMerge(t *testing.T) { VolumeSource: api.VolumeSource{EmptyDir: &api.EmptyDirVolumeSource{}}, }, }, - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, }, diff --git a/pkg/kubectl/resource/builder_test.go b/pkg/kubectl/resource/builder_test.go index 602d0cb79ee..b16734be09d 100644 --- a/pkg/kubectl/resource/builder_test.go +++ b/pkg/kubectl/resource/builder_test.go @@ -83,6 +83,7 @@ func fakeClientWith(testName string, t *testing.T, data map[string]string) Clien } func testData() (*api.PodList, *api.ServiceList) { + grace := int64(30) pods := &api.PodList{ ListMeta: api.ListMeta{ ResourceVersion: "15", @@ -91,15 +92,17 @@ func testData() (*api.PodList, *api.ServiceList) { { ObjectMeta: api.ObjectMeta{Name: "foo", Namespace: "test", ResourceVersion: "10"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, { ObjectMeta: api.ObjectMeta{Name: "bar", Namespace: "test", ResourceVersion: "11"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, }, diff --git a/pkg/kubectl/resource/helper_test.go b/pkg/kubectl/resource/helper_test.go index 1c9d1aab96b..243d41c9528 100644 --- a/pkg/kubectl/resource/helper_test.go +++ b/pkg/kubectl/resource/helper_test.go @@ -128,6 +128,7 @@ func TestHelperCreate(t *testing.T) { return true } + grace := int64(30) tests := []struct { Resp *http.Response RespFunc client.HTTPClientFunc @@ -172,8 +173,9 @@ func TestHelperCreate(t *testing.T) { ExpectObject: &api.Pod{ ObjectMeta: api.ObjectMeta{Name: "foo"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, Resp: &http.Response{StatusCode: http.StatusOK, Body: objBody(&api.Status{Status: api.StatusSuccess})}, @@ -381,6 +383,7 @@ func TestHelperUpdate(t *testing.T) { return true } + grace := int64(30) tests := []struct { Resp *http.Response RespFunc client.HTTPClientFunc @@ -418,8 +421,9 @@ func TestHelperUpdate(t *testing.T) { ExpectObject: &api.Pod{ ObjectMeta: api.ObjectMeta{Name: "foo", ResourceVersion: "10"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, Overwrite: true, diff --git a/pkg/kubectl/rolling_updater_test.go b/pkg/kubectl/rolling_updater_test.go index 8374bd433d2..1a2d25083fa 100644 --- a/pkg/kubectl/rolling_updater_test.go +++ b/pkg/kubectl/rolling_updater_test.go @@ -628,6 +628,7 @@ func TestUpdateExistingReplicationController(t *testing.T) { func TestUpdateWithRetries(t *testing.T) { codec := testapi.Codec() + grace := int64(30) rc := &api.ReplicationController{ ObjectMeta: api.ObjectMeta{Name: "rc", Labels: map[string]string{ @@ -645,8 +646,9 @@ func TestUpdateWithRetries(t *testing.T) { }, }, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, }, diff --git a/pkg/kubelet/config/common_test.go b/pkg/kubelet/config/common_test.go index d311c6b3af5..8d3fd6e564f 100644 --- a/pkg/kubelet/config/common_test.go +++ b/pkg/kubelet/config/common_test.go @@ -30,6 +30,7 @@ import ( func noDefault(*api.Pod) error { return nil } func TestDecodeSinglePod(t *testing.T) { + grace := int64(30) pod := &api.Pod{ TypeMeta: api.TypeMeta{ APIVersion: "", @@ -40,8 +41,9 @@ func TestDecodeSinglePod(t *testing.T) { Namespace: "mynamespace", }, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, Containers: []api.Container{{ Name: "image", Image: "test/image", @@ -93,6 +95,7 @@ func TestDecodeSinglePod(t *testing.T) { } func TestDecodePodList(t *testing.T) { + grace := int64(30) pod := &api.Pod{ TypeMeta: api.TypeMeta{ APIVersion: "", @@ -103,8 +106,9 @@ func TestDecodePodList(t *testing.T) { Namespace: "mynamespace", }, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, Containers: []api.Container{{ Name: "image", Image: "test/image", diff --git a/pkg/kubelet/config/config.go b/pkg/kubelet/config/config.go index f82ba39fa48..8c09ee59786 100644 --- a/pkg/kubelet/config/config.go +++ b/pkg/kubelet/config/config.go @@ -209,9 +209,8 @@ func (s *podStorage) merge(source string, change interface{}) (adds, updates, de for _, ref := range filtered { name := kubecontainer.GetPodFullName(ref) if existing, found := pods[name]; found { - if !reflect.DeepEqual(existing.Spec, ref.Spec) { + if checkAndUpdatePod(existing, ref) { // this is an update - existing.Spec = ref.Spec updates.Pods = append(updates.Pods, existing) continue } @@ -252,9 +251,8 @@ func (s *podStorage) merge(source string, change interface{}) (adds, updates, de name := kubecontainer.GetPodFullName(ref) if existing, found := oldPods[name]; found { pods[name] = existing - if !reflect.DeepEqual(existing.Spec, ref.Spec) { + if checkAndUpdatePod(existing, ref) { // this is an update - existing.Spec = ref.Spec updates.Pods = append(updates.Pods, existing) continue } @@ -325,6 +323,23 @@ func filterInvalidPods(pods []*api.Pod, source string, recorder record.EventReco return } +// checkAndUpdatePod updates existing if ref makes a meaningful change and returns true, or +// returns false if there was no update. +func checkAndUpdatePod(existing, ref *api.Pod) bool { + // TODO: it would be better to update the whole object and only preserve certain things + // like the source annotation or the UID (to ensure safety) + if reflect.DeepEqual(existing.Spec, ref.Spec) && + reflect.DeepEqual(existing.DeletionTimestamp, ref.DeletionTimestamp) && + reflect.DeepEqual(existing.DeletionGracePeriodSeconds, ref.DeletionGracePeriodSeconds) { + return false + } + // this is an update + existing.Spec = ref.Spec + existing.DeletionTimestamp = ref.DeletionTimestamp + existing.DeletionGracePeriodSeconds = ref.DeletionGracePeriodSeconds + return true +} + // Sync sends a copy of the current state through the update channel. func (s *podStorage) Sync() { s.updateLock.Lock() diff --git a/pkg/kubelet/config/file_test.go b/pkg/kubelet/config/file_test.go index 7d45ae479e2..4faf9485615 100644 --- a/pkg/kubelet/config/file_test.go +++ b/pkg/kubelet/config/file_test.go @@ -163,6 +163,7 @@ func TestReadContainerManifestFromFile(t *testing.T) { func TestReadPodsFromFile(t *testing.T) { hostname := "random-test-hostname" + grace := int64(30) var testCases = []struct { desc string pod runtime.Object @@ -192,9 +193,10 @@ func TestReadPodsFromFile(t *testing.T) { SelfLink: getSelfLink("test-"+hostname, "mynamespace"), }, Spec: api.PodSpec{ - NodeName: hostname, - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + NodeName: hostname, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, Containers: []api.Container{{ Name: "image", Image: "test/image", @@ -227,9 +229,10 @@ func TestReadPodsFromFile(t *testing.T) { SelfLink: getSelfLink("12345-"+hostname, kubelet.NamespaceDefault), }, Spec: api.PodSpec{ - NodeName: hostname, - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + NodeName: hostname, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, Containers: []api.Container{{ Name: "image", Image: "test/image", diff --git a/pkg/kubelet/config/http_test.go b/pkg/kubelet/config/http_test.go index 6843ac864b1..f6429fa2a80 100644 --- a/pkg/kubelet/config/http_test.go +++ b/pkg/kubelet/config/http_test.go @@ -120,6 +120,7 @@ func TestExtractInvalidManifest(t *testing.T) { func TestExtractPodsFromHTTP(t *testing.T) { hostname := "different-value" + grace := int64(30) var testCases = []struct { desc string pods runtime.Object @@ -153,9 +154,11 @@ func TestExtractPodsFromHTTP(t *testing.T) { SelfLink: getSelfLink("foo-"+hostname, "mynamespace"), }, Spec: api.PodSpec{ - NodeName: hostname, - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + NodeName: hostname, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, + Containers: []api.Container{{ Name: "1", Image: "foo", @@ -206,9 +209,11 @@ func TestExtractPodsFromHTTP(t *testing.T) { SelfLink: getSelfLink("foo-"+hostname, kubelet.NamespaceDefault), }, Spec: api.PodSpec{ - NodeName: hostname, - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + NodeName: hostname, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, + Containers: []api.Container{{ Name: "1", Image: "foo", @@ -226,9 +231,11 @@ func TestExtractPodsFromHTTP(t *testing.T) { SelfLink: getSelfLink("bar-"+hostname, kubelet.NamespaceDefault), }, Spec: api.PodSpec{ - NodeName: hostname, - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + NodeName: hostname, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, + Containers: []api.Container{{ Name: "2", Image: "bar", diff --git a/pkg/kubelet/container/fake_runtime.go b/pkg/kubelet/container/fake_runtime.go index 768e2e49a10..215fe5a8ab1 100644 --- a/pkg/kubelet/container/fake_runtime.go +++ b/pkg/kubelet/container/fake_runtime.go @@ -163,13 +163,13 @@ func (f *FakeRuntime) SyncPod(pod *api.Pod, _ Pod, _ api.PodStatus, _ []api.Secr return f.Err } -func (f *FakeRuntime) KillPod(pod Pod) error { +func (f *FakeRuntime) KillPod(pod *api.Pod, runningPod Pod) error { f.Lock() defer f.Unlock() f.CalledFunctions = append(f.CalledFunctions, "KillPod") - f.KilledPods = append(f.KilledPods, string(pod.ID)) - for _, c := range pod.Containers { + f.KilledPods = append(f.KilledPods, string(runningPod.ID)) + for _, c := range runningPod.Containers { f.KilledContainers = append(f.KilledContainers, c.Name) } return f.Err diff --git a/pkg/kubelet/container/runtime.go b/pkg/kubelet/container/runtime.go index 6fa2073da49..ac4477a255d 100644 --- a/pkg/kubelet/container/runtime.go +++ b/pkg/kubelet/container/runtime.go @@ -53,8 +53,8 @@ type Runtime interface { GetPods(all bool) ([]*Pod, error) // Syncs the running pod into the desired pod. SyncPod(pod *api.Pod, runningPod Pod, podStatus api.PodStatus, pullSecrets []api.Secret) error - // KillPod kills all the containers of a pod. - KillPod(pod Pod) error + // KillPod kills all the containers of a pod. Pod may be nil, running pod must not be. + KillPod(pod *api.Pod, runningPod Pod) error // GetPodStatus retrieves the status of the pod, including the information of // all containers in the pod. GetPodStatus(*api.Pod) (*api.PodStatus, error) diff --git a/pkg/kubelet/dockertools/manager.go b/pkg/kubelet/dockertools/manager.go index d1bbf459493..89f918eb9ff 100644 --- a/pkg/kubelet/dockertools/manager.go +++ b/pkg/kubelet/dockertools/manager.go @@ -54,8 +54,17 @@ const ( maxReasonCacheEntries = 200 - kubernetesPodLabel = "io.kubernetes.pod.data" - kubernetesContainerLabel = "io.kubernetes.container.name" + // In order to avoid unnecessary SIGKILLs, give every container a minimum grace + // period after SIGTERM. Docker will guarantee the termination, but SIGTERM is + // potentially dangerous. + // TODO: evaluate whether there are scenarios in which SIGKILL is preferable to + // SIGTERM for certain process types, which may justify setting this to 0. + minimumGracePeriodInSeconds = 2 + + kubernetesNameLabel = "io.kubernetes.pod.name" + kubernetesPodLabel = "io.kubernetes.pod.data" + kubernetesTerminationGracePeriodLabel = "io.kubernetes.pod.terminationGracePeriod" + kubernetesContainerLabel = "io.kubernetes.container.name" ) // DockerManager implements the Runtime interface. @@ -558,12 +567,19 @@ func (dm *DockerManager) runContainer( if len(containerHostname) > hostnameMaxLen { containerHostname = containerHostname[:hostnameMaxLen] } + + // Pod information is recorded on the container as labels to preserve it in the event the pod is deleted + // while the Kubelet is down and there is no information available to recover the pod. This includes + // termination information like the termination grace period and the pre stop hooks. + // TODO: keep these labels up to date if the pod changes namespacedName := types.NamespacedName{pod.Namespace, pod.Name} labels := map[string]string{ - "io.kubernetes.pod.name": namespacedName.String(), + kubernetesNameLabel: namespacedName.String(), + } + if pod.Spec.TerminationGracePeriodSeconds != nil { + labels[kubernetesTerminationGracePeriodLabel] = strconv.FormatInt(*pod.Spec.TerminationGracePeriodSeconds, 10) } if container.Lifecycle != nil && container.Lifecycle.PreStop != nil { - glog.V(1).Infof("Setting preStop hook") // TODO: This is kind of hacky, we should really just encode the bits we need. data, err := latest.Codec.Encode(pod) if err != nil { @@ -1080,12 +1096,12 @@ func (dm *DockerManager) PortForward(pod *kubecontainer.Pod, port uint16, stream } // Kills all containers in the specified pod -func (dm *DockerManager) KillPod(pod kubecontainer.Pod) error { +func (dm *DockerManager) KillPod(pod *api.Pod, runningPod kubecontainer.Pod) error { // Send the kills in parallel since they may take a long time. Len + 1 since there // can be Len errors + the networkPlugin teardown error. - errs := make(chan error, len(pod.Containers)+1) + errs := make(chan error, len(runningPod.Containers)+1) wg := sync.WaitGroup{} - for _, container := range pod.Containers { + for _, container := range runningPod.Containers { wg.Add(1) go func(container *kubecontainer.Container) { defer util.HandleCrash() @@ -1093,15 +1109,24 @@ func (dm *DockerManager) KillPod(pod kubecontainer.Pod) error { // TODO: Handle this without signaling the pod infra container to // adapt to the generic container runtime. if container.Name == PodInfraContainerName { - err := dm.networkPlugin.TearDownPod(pod.Namespace, pod.Name, kubeletTypes.DockerID(container.ID)) + err := dm.networkPlugin.TearDownPod(runningPod.Namespace, runningPod.Name, kubeletTypes.DockerID(container.ID)) if err != nil { glog.Errorf("Failed tearing down the infra container: %v", err) errs <- err } } - err := dm.killContainer(container.ID) + var containerSpec *api.Container + if pod != nil { + for i, c := range pod.Spec.Containers { + if c.Name == container.Name { + containerSpec = &pod.Spec.Containers[i] + break + } + } + } + err := dm.killContainer(container.ID, containerSpec, pod) if err != nil { - glog.Errorf("Failed to delete container: %v; Skipping pod %q", err, pod.ID) + glog.Errorf("Failed to delete container: %v; Skipping pod %q", err, runningPod.ID) errs <- err } wg.Done() @@ -1119,75 +1144,128 @@ func (dm *DockerManager) KillPod(pod kubecontainer.Pod) error { return nil } -// KillContainerInPod kills a container in the pod. -func (dm *DockerManager) KillContainerInPod(container api.Container, pod *api.Pod) error { - // Locate the container. - pods, err := dm.GetPods(false) - if err != nil { - return err - } - targetPod := kubecontainer.Pods(pods).FindPod(kubecontainer.GetPodFullName(pod), pod.UID) - targetContainer := targetPod.FindContainerByName(container.Name) - if targetContainer == nil { - return fmt.Errorf("unable to find container %q in pod %q", container.Name, targetPod.Name) - } - return dm.killContainer(targetContainer.ID) -} - -// TODO(vmarmol): Unexport this as it is no longer used externally. -// KillContainer kills a container identified by containerID. -// Internally, it invokes docker's StopContainer API with a timeout of 10s. -// TODO: Deprecate this function in favor of KillContainerInPod. -func (dm *DockerManager) KillContainer(containerID types.UID) error { - return dm.killContainer(containerID) -} - -func (dm *DockerManager) killContainer(containerID types.UID) error { - ID := string(containerID) - glog.V(2).Infof("Killing container with id %q", ID) - inspect, err := dm.client.InspectContainer(ID) - if err != nil { - return err - } - var found bool - var preStop string - if inspect != nil && inspect.Config != nil && inspect.Config.Labels != nil { - preStop, found = inspect.Config.Labels[kubernetesPodLabel] - } - if found { - var pod api.Pod - err := latest.Codec.DecodeInto([]byte(preStop), &pod) +// KillContainerInPod kills a container in the pod. It must be passed either a container ID or a container and pod, +// and will attempt to lookup the other information if missing. +func (dm *DockerManager) KillContainerInPod(containerID types.UID, container *api.Container, pod *api.Pod) error { + switch { + case len(containerID) == 0: + // Locate the container. + pods, err := dm.GetPods(false) if err != nil { - glog.Errorf("Failed to decode prestop: %s, %s", preStop, ID) - } else { - name := inspect.Config.Labels[kubernetesContainerLabel] - var container *api.Container + return err + } + targetPod := kubecontainer.Pods(pods).FindPod(kubecontainer.GetPodFullName(pod), pod.UID) + targetContainer := targetPod.FindContainerByName(container.Name) + if targetContainer == nil { + return fmt.Errorf("unable to find container %q in pod %q", container.Name, targetPod.Name) + } + containerID = targetContainer.ID + + case container == nil || pod == nil: + // Read information about the container from labels + inspect, err := dm.client.InspectContainer(string(containerID)) + if err != nil { + return err + } + storedPod, storedContainer, cerr := containerAndPodFromLabels(inspect) + if cerr != nil { + glog.Errorf("unable to access pod data from container: %v", err) + } + if container == nil { + container = storedContainer + } + if pod == nil { + pod = storedPod + } + } + return dm.killContainer(containerID, container, pod) +} + +// killContainer accepts a containerID and an optional container or pod containing shutdown policies. Invoke +// KillContainerInPod if information must be retrieved first. +func (dm *DockerManager) killContainer(containerID types.UID, container *api.Container, pod *api.Pod) error { + ID := string(containerID) + name := ID + if container != nil { + name = fmt.Sprintf("%s %s", name, container.Name) + } + if pod != nil { + name = fmt.Sprintf("%s %s/%s", name, pod.Namespace, pod.Name) + } + + gracePeriod := int64(minimumGracePeriodInSeconds) + if pod != nil && pod.DeletionGracePeriodSeconds != nil { + gracePeriod = *pod.DeletionGracePeriodSeconds + } + glog.V(2).Infof("Killing container %q with %d second grace period", name, gracePeriod) + + if pod != nil && container != nil && container.Lifecycle != nil && container.Lifecycle.PreStop != nil { + glog.V(4).Infof("Running preStop hook for container %q", name) + start := util.Now() + // TODO: timebox PreStop execution to at most gracePeriod + if err := dm.runner.Run(ID, pod, container, container.Lifecycle.PreStop); err != nil { + glog.Errorf("preStop hook for container %q failed: %v", name, err) + } + gracePeriod -= int64(util.Now().Sub(start.Time).Seconds()) + } + + dm.readinessManager.RemoveReadiness(ID) + + // always give containers a minimal shutdown window to avoid unnecessary SIGKILLs + if gracePeriod < minimumGracePeriodInSeconds { + gracePeriod = minimumGracePeriodInSeconds + } + err := dm.client.StopContainer(ID, uint(gracePeriod)) + ref, ok := dm.containerRefManager.GetRef(ID) + if !ok { + glog.Warningf("No ref for pod '%q'", name) + } else { + // TODO: pass reason down here, and state, or move this call up the stack. + dm.recorder.Eventf(ref, "killing", "Killing %v", ID) + } + return err +} + +var errNoPodOnContainer = fmt.Errorf("no pod information labels on Docker container") + +// containerAndPodFromLabels tries to load the appropriate container info off of a Docker container's labels +func containerAndPodFromLabels(inspect *docker.Container) (pod *api.Pod, container *api.Container, err error) { + if inspect == nil && inspect.Config == nil && inspect.Config.Labels == nil { + return nil, nil, errNoPodOnContainer + } + labels := inspect.Config.Labels + + // the pod data may not be set + if body, found := labels[kubernetesPodLabel]; found { + pod = &api.Pod{} + if err = latest.Codec.DecodeInto([]byte(body), pod); err == nil { + name := labels[kubernetesContainerLabel] for ix := range pod.Spec.Containers { if pod.Spec.Containers[ix].Name == name { container = &pod.Spec.Containers[ix] break } } - if container != nil { - glog.V(1).Infof("Running preStop hook") - if err := dm.runner.Run(ID, &pod, container, container.Lifecycle.PreStop); err != nil { - glog.Errorf("failed to run preStop hook: %v", err) - } - } else { - glog.Errorf("unable to find container %v, %s", pod, name) + if container == nil { + err = fmt.Errorf("unable to find container %s in pod %v", name, pod) + } + } else { + pod = nil + } + } + + // attempt to find the default grace period if we didn't commit a pod, but set the generic metadata + // field (the one used by kill) + if pod == nil { + if period, ok := labels[kubernetesTerminationGracePeriodLabel]; ok { + if seconds, err := strconv.ParseInt(period, 10, 64); err == nil { + pod = &api.Pod{} + pod.DeletionGracePeriodSeconds = &seconds } } } - dm.readinessManager.RemoveReadiness(ID) - err = dm.client.StopContainer(ID, 10) - ref, ok := dm.containerRefManager.GetRef(ID) - if !ok { - glog.Warningf("No ref for pod '%v'", ID) - } else { - // TODO: pass reason down here, and state, or move this call up the stack. - dm.recorder.Eventf(ref, "killing", "Killing %v", ID) - } - return err + + return } // Run a single container from a pod. Returns the docker container ID @@ -1215,7 +1293,7 @@ func (dm *DockerManager) runContainerInPod(pod *api.Pod, container *api.Containe if container.Lifecycle != nil && container.Lifecycle.PostStart != nil { handlerErr := dm.runner.Run(id, pod, container, container.Lifecycle.PostStart) if handlerErr != nil { - dm.killContainer(types.UID(id)) + dm.killContainer(types.UID(id), container, pod) return kubeletTypes.DockerID(""), fmt.Errorf("failed to call event handler: %v", handlerErr) } } @@ -1328,6 +1406,11 @@ func (dm *DockerManager) computePodContainerChanges(pod *api.Pod, runningPod kub containersToKeep := make(map[kubeletTypes.DockerID]int) createPodInfraContainer := false + if pod.DeletionTimestamp != nil { + glog.V(4).Infof("Pod is terminating %q", podFullName) + return PodContainerChangesSpec{}, nil + } + var err error var podInfraContainerID kubeletTypes.DockerID var changed bool @@ -1481,7 +1564,7 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, pod } // Killing phase: if we want to start new infra container, or nothing is running kill everything (including infra container) - err = dm.KillPod(runningPod) + err = dm.KillPod(pod, runningPod) if err != nil { return err } @@ -1491,7 +1574,15 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, pod _, keep := containerChanges.ContainersToKeep[kubeletTypes.DockerID(container.ID)] if !keep { glog.V(3).Infof("Killing unwanted container %+v", container) - err = dm.KillContainer(container.ID) + // attempt to find the appropriate container policy + var podContainer *api.Container + for i, c := range pod.Spec.Containers { + if c.Name == container.Name { + podContainer = &pod.Spec.Containers[i] + break + } + } + err = dm.KillContainerInPod(container.ID, podContainer, pod) if err != nil { glog.Errorf("Error killing container: %v", err) } diff --git a/pkg/kubelet/dockertools/manager_test.go b/pkg/kubelet/dockertools/manager_test.go index 38b89301915..32e6e148872 100644 --- a/pkg/kubelet/dockertools/manager_test.go +++ b/pkg/kubelet/dockertools/manager_test.go @@ -417,7 +417,7 @@ func TestKillContainerInPod(t *testing.T) { manager.readinessManager.SetReadiness(c.ID, true) } - if err := manager.KillContainerInPod(pod.Spec.Containers[0], pod); err != nil { + if err := manager.KillContainerInPod("", &pod.Spec.Containers[0], pod); err != nil { t.Errorf("unexpected error: %v", err) } // Assert the container has been stopped. @@ -490,14 +490,14 @@ func TestKillContainerInPodWithPreStop(t *testing.T) { manager.readinessManager.SetReadiness(c.ID, true) } - if err := manager.KillContainerInPod(pod.Spec.Containers[0], pod); err != nil { + if err := manager.KillContainerInPod("", &pod.Spec.Containers[0], pod); err != nil { t.Errorf("unexpected error: %v", err) } // Assert the container has been stopped. if err := fakeDocker.AssertStopped([]string{containerToKill.ID}); err != nil { t.Errorf("container was not stopped correctly: %v", err) } - verifyCalls(t, fakeDocker, []string{"list", "inspect_container", "create_exec", "start_exec", "stop"}) + verifyCalls(t, fakeDocker, []string{"list", "create_exec", "start_exec", "stop"}) if !reflect.DeepEqual(expectedCmd, fakeDocker.execCmd) { t.Errorf("expected: %v, got %v", expectedCmd, fakeDocker.execCmd) } @@ -534,7 +534,7 @@ func TestKillContainerInPodWithError(t *testing.T) { manager.readinessManager.SetReadiness(c.ID, true) } - if err := manager.KillContainerInPod(pod.Spec.Containers[0], pod); err == nil { + if err := manager.KillContainerInPod("", &pod.Spec.Containers[0], pod); err == nil { t.Errorf("expected error, found nil") } @@ -1030,7 +1030,7 @@ func TestSyncPodDeletesWithNoPodInfraContainer(t *testing.T) { verifyCalls(t, fakeDocker, []string{ // Kill the container since pod infra container is not running. - "inspect_container", "stop", + "stop", // Create pod infra container. "create", "start", "inspect_container", // Create container. @@ -1105,7 +1105,7 @@ func TestSyncPodDeletesDuplicate(t *testing.T) { // Check the pod infra container. "inspect_container", // Kill the duplicated container. - "inspect_container", "stop", + "stop", }) // Expect one of the duplicates to be killed. if len(fakeDocker.Stopped) != 1 || (fakeDocker.Stopped[0] != "1234" && fakeDocker.Stopped[0] != "4567") { @@ -1159,7 +1159,7 @@ func TestSyncPodBadHash(t *testing.T) { // Check the pod infra container. "inspect_container", // Kill and restart the bad hash container. - "inspect_container", "stop", "create", "start", + "stop", "create", "start", }) if err := fakeDocker.AssertStopped([]string{"1234"}); err != nil { @@ -1217,7 +1217,7 @@ func TestSyncPodsUnhealthy(t *testing.T) { // Check the pod infra container. "inspect_container", // Kill the unhealthy container. - "inspect_container", "stop", + "stop", // Restart the unhealthy container. "create", "start", }) @@ -1426,7 +1426,7 @@ func TestSyncPodWithRestartPolicy(t *testing.T) { // Check the pod infra container. "inspect_container", // Stop the last pod infra container. - "inspect_container", "stop", + "stop", }, []string{}, []string{"9876"}, diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 2b218c190da..08ec6d9e11c 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -1054,8 +1054,8 @@ func parseResolvConf(reader io.Reader) (nameservers []string, searches []string, } // Kill all running containers in a pod (includes the pod infra container). -func (kl *Kubelet) killPod(pod kubecontainer.Pod) error { - return kl.containerRuntime.KillPod(pod) +func (kl *Kubelet) killPod(pod *api.Pod, runningPod kubecontainer.Pod) error { + return kl.containerRuntime.KillPod(pod, runningPod) } type empty struct{} @@ -1101,7 +1101,7 @@ func (kl *Kubelet) syncPod(pod *api.Pod, mirrorPod *api.Pod, runningPod kubecont // Kill pods we can't run. err := canRunPod(pod) if err != nil { - kl.killPod(runningPod) + kl.killPod(pod, runningPod) return err } @@ -1416,7 +1416,7 @@ func (kl *Kubelet) killUnwantedPods(desiredPods map[types.UID]empty, }() glog.V(1).Infof("Killing unwanted pod %q", pod.Name) // Stop the containers. - err = kl.killPod(*pod) + err = kl.killPod(nil, *pod) if err != nil { glog.Errorf("Failed killing the pod %q: %v", pod.Name, err) return diff --git a/pkg/kubelet/kubelet_test.go b/pkg/kubelet/kubelet_test.go index 3b8bd06495e..0095e5e3c09 100644 --- a/pkg/kubelet/kubelet_test.go +++ b/pkg/kubelet/kubelet_test.go @@ -1153,7 +1153,7 @@ func TestSyncPodEventHandlerFails(t *testing.T) { // Create the container. "create", "start", // Kill the container since event handler fails. - "inspect_container", "stop", + "stop", // Get pod status. "list", "inspect_container", "inspect_container", // Get pods for deleting orphaned volumes. diff --git a/pkg/kubelet/rkt/rkt.go b/pkg/kubelet/rkt/rkt.go index ba2da9c58fa..3c716bc869d 100644 --- a/pkg/kubelet/rkt/rkt.go +++ b/pkg/kubelet/rkt/rkt.go @@ -671,11 +671,11 @@ func (r *runtime) GetPods(all bool) ([]*kubecontainer.Pod, error) { } // KillPod invokes 'systemctl kill' to kill the unit that runs the pod. -func (r *runtime) KillPod(pod kubecontainer.Pod) error { - glog.V(4).Infof("Rkt is killing pod: name %q.", pod.Name) +func (r *runtime) KillPod(pod *api.Pod, runningPod kubecontainer.Pod) error { + glog.V(4).Infof("Rkt is killing pod: name %q.", runningPod.Name) // TODO(yifan): More graceful stop. Replace with StopUnit and wait for a timeout. - r.systemd.KillUnit(makePodServiceFileName(pod.ID), int32(syscall.SIGKILL)) + r.systemd.KillUnit(makePodServiceFileName(runningPod.ID), int32(syscall.SIGKILL)) return r.systemd.Reload() } @@ -880,7 +880,7 @@ func (r *runtime) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, podStatus if restartPod { // TODO(yifan): Handle network plugin. - if err := r.KillPod(runningPod); err != nil { + if err := r.KillPod(pod, runningPod); err != nil { return err } if err := r.RunPod(pod); err != nil { diff --git a/pkg/kubelet/status_manager.go b/pkg/kubelet/status_manager.go index 4df39d7c086..a0e7af830e9 100644 --- a/pkg/kubelet/status_manager.go +++ b/pkg/kubelet/status_manager.go @@ -22,6 +22,7 @@ import ( "sync" "github.com/GoogleCloudPlatform/kubernetes/pkg/api" + "github.com/GoogleCloudPlatform/kubernetes/pkg/api/errors" "github.com/GoogleCloudPlatform/kubernetes/pkg/client" kubecontainer "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/container" "github.com/GoogleCloudPlatform/kubernetes/pkg/util" @@ -135,13 +136,24 @@ func (s *statusManager) syncBatch() error { } // TODO: make me easier to express from client code statusPod, err = s.kubeClient.Pods(statusPod.Namespace).Get(statusPod.Name) + if errors.IsNotFound(err) { + glog.V(3).Infof("Pod %q was deleted on the server", pod.Name) + return nil + } if err == nil { statusPod.Status = status - _, err = s.kubeClient.Pods(pod.Namespace).UpdateStatus(statusPod) // TODO: handle conflict as a retry, make that easier too. + statusPod, err = s.kubeClient.Pods(pod.Namespace).UpdateStatus(statusPod) if err == nil { glog.V(3).Infof("Status for pod %q updated successfully", pod.Name) - return nil + + if statusPod.DeletionTimestamp == nil || !allTerminated(statusPod.Status.ContainerStatuses) { + return nil + } + if err := s.kubeClient.Pods(statusPod.Namespace).Delete(statusPod.Name, api.NewDeleteOptions(0)); err == nil { + glog.V(3).Infof("Pod %q fully terminated and removed from etcd", statusPod.Name) + return nil + } } } @@ -151,3 +163,14 @@ func (s *statusManager) syncBatch() error { s.DeletePodStatus(podFullName) return fmt.Errorf("error updating status for pod %q: %v", pod.Name, err) } + +// allTerminated returns true if every status is terminated, or the status list +// is empty. +func allTerminated(statuses []api.ContainerStatus) bool { + for _, status := range statuses { + if status.State.Terminated == nil { + return false + } + } + return true +} diff --git a/pkg/registry/controller/etcd/etcd_test.go b/pkg/registry/controller/etcd/etcd_test.go index 6b9fdc15242..fc9bd7f91d8 100644 --- a/pkg/registry/controller/etcd/etcd_test.go +++ b/pkg/registry/controller/etcd/etcd_test.go @@ -722,7 +722,7 @@ func TestDelete(t *testing.T) { // If the controller is still around after trying to delete either the delete // failed, or we're deleting it gracefully. if fakeClient.Data[key].R.Node != nil { - return true + return fakeClient.Data[key].R.Node.TTL != 0 } return false } diff --git a/pkg/registry/event/registry_test.go b/pkg/registry/event/registry_test.go index 75d29614fb4..5da05e536cf 100644 --- a/pkg/registry/event/registry_test.go +++ b/pkg/registry/event/registry_test.go @@ -37,6 +37,7 @@ var testTTL uint64 = 60 func NewTestEventEtcdRegistry(t *testing.T) (*tools.FakeEtcdClient, generic.Registry) { f := tools.NewFakeEtcdClient(t) + f.HideExpires = true f.TestIndex = true h := tools.NewEtcdHelper(f, testapi.Codec(), etcdtest.PathPrefix()) diff --git a/pkg/registry/persistentvolume/etcd/etcd_test.go b/pkg/registry/persistentvolume/etcd/etcd_test.go index 0ce3d2912ad..2b80a6019ec 100644 --- a/pkg/registry/persistentvolume/etcd/etcd_test.go +++ b/pkg/registry/persistentvolume/etcd/etcd_test.go @@ -324,7 +324,7 @@ func TestEtcdUpdateStatus(t *testing.T) { key, _ := storage.KeyFunc(ctx, "foo") key = etcdtest.AddPrefix(key) pvStart := validNewPersistentVolume("foo") - fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, pvStart), 1) + fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, pvStart), 0) pvIn := &api.PersistentVolume{ ObjectMeta: api.ObjectMeta{ diff --git a/pkg/registry/persistentvolumeclaim/etcd/etcd_test.go b/pkg/registry/persistentvolumeclaim/etcd/etcd_test.go index e3ba55afed2..067009ad109 100644 --- a/pkg/registry/persistentvolumeclaim/etcd/etcd_test.go +++ b/pkg/registry/persistentvolumeclaim/etcd/etcd_test.go @@ -325,7 +325,7 @@ func TestEtcdUpdateStatus(t *testing.T) { key, _ := storage.KeyFunc(ctx, "foo") key = etcdtest.AddPrefix(key) pvcStart := validNewPersistentVolumeClaim("foo", api.NamespaceDefault) - fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, pvcStart), 1) + fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, pvcStart), 0) pvc := &api.PersistentVolumeClaim{ ObjectMeta: api.ObjectMeta{ diff --git a/pkg/registry/pod/etcd/etcd_test.go b/pkg/registry/pod/etcd/etcd_test.go index 9d1e00f9264..e64e3aae27d 100644 --- a/pkg/registry/pod/etcd/etcd_test.go +++ b/pkg/registry/pod/etcd/etcd_test.go @@ -54,6 +54,7 @@ func newStorage(t *testing.T) (*REST, *BindingREST, *StatusREST, *tools.FakeEtcd } func validNewPod() *api.Pod { + grace := int64(30) return &api.Pod{ ObjectMeta: api.ObjectMeta{ Name: "foo", @@ -62,6 +63,8 @@ func validNewPod() *api.Pod { Spec: api.PodSpec{ RestartPolicy: api.RestartPolicyAlways, DNSPolicy: api.DNSClusterFirst, + + TerminationGracePeriodSeconds: &grace, Containers: []api.Container{ { Name: "foo", @@ -132,9 +135,9 @@ func TestDelete(t *testing.T) { if fakeEtcdClient.Data[key].R.Node == nil { return false } - return fakeEtcdClient.Data[key].R.Node.TTL == 30 + return fakeEtcdClient.Data[key].R.Node.TTL != 0 } - test.TestDelete(createFn, gracefulSetFn) + test.TestDeleteGraceful(createFn, 30, gracefulSetFn) } func expectPod(t *testing.T, out runtime.Object) (*api.Pod, bool) { @@ -1118,6 +1121,7 @@ func TestEtcdUpdateScheduled(t *testing.T) { }, }), 1) + grace := int64(30) podIn := api.Pod{ ObjectMeta: api.ObjectMeta{ Name: "foo", @@ -1139,6 +1143,8 @@ func TestEtcdUpdateScheduled(t *testing.T) { }, RestartPolicy: api.RestartPolicyAlways, DNSPolicy: api.DNSClusterFirst, + + TerminationGracePeriodSeconds: &grace, }, } _, _, err := registry.Update(ctx, &podIn) @@ -1179,7 +1185,7 @@ func TestEtcdUpdateStatus(t *testing.T) { }, }, } - fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, &podStart), 1) + fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, &podStart), 0) podIn := api.Pod{ ObjectMeta: api.ObjectMeta{ @@ -1208,6 +1214,8 @@ func TestEtcdUpdateStatus(t *testing.T) { expected := podStart expected.ResourceVersion = "2" + grace := int64(30) + expected.Spec.TerminationGracePeriodSeconds = &grace expected.Spec.RestartPolicy = api.RestartPolicyAlways expected.Spec.DNSPolicy = api.DNSClusterFirst expected.Spec.Containers[0].ImagePullPolicy = api.PullIfNotPresent diff --git a/pkg/registry/pod/rest.go b/pkg/registry/pod/rest.go index 7c1ed33e3bc..12918dcc9b9 100644 --- a/pkg/registry/pod/rest.go +++ b/pkg/registry/pod/rest.go @@ -81,9 +81,26 @@ func (podStrategy) ValidateUpdate(ctx api.Context, obj, old runtime.Object) fiel return append(errorList, validation.ValidatePodUpdate(obj.(*api.Pod), old.(*api.Pod))...) } -// CheckGracefulDelete allows a pod to be gracefully deleted. +// CheckGracefulDelete allows a pod to be gracefully deleted. It updates the DeleteOptions to +// reflect the desired grace value. func (podStrategy) CheckGracefulDelete(obj runtime.Object, options *api.DeleteOptions) bool { - return false + if options == nil { + return false + } + period := int64(0) + // user has specified a value + if options.GracePeriodSeconds != nil { + period = *options.GracePeriodSeconds + } else { + // use the default value if set, or deletes the pod immediately (0) + pod := obj.(*api.Pod) + if pod.Spec.TerminationGracePeriodSeconds != nil { + period = *pod.Spec.TerminationGracePeriodSeconds + } + } + // ensure the options and the pod are in sync + options.GracePeriodSeconds = &period + return true } type podStatusStrategy struct { @@ -96,6 +113,7 @@ func (podStatusStrategy) PrepareForUpdate(obj, old runtime.Object) { newPod := obj.(*api.Pod) oldPod := old.(*api.Pod) newPod.Spec = oldPod.Spec + newPod.DeletionTimestamp = nil } func (podStatusStrategy) ValidateUpdate(ctx api.Context, obj, old runtime.Object) fielderrors.ValidationErrorList { diff --git a/pkg/registry/resourcequota/etcd/etcd_test.go b/pkg/registry/resourcequota/etcd/etcd_test.go index b4498b338eb..b19e939141b 100644 --- a/pkg/registry/resourcequota/etcd/etcd_test.go +++ b/pkg/registry/resourcequota/etcd/etcd_test.go @@ -477,7 +477,7 @@ func TestEtcdUpdateStatus(t *testing.T) { key, _ := registry.KeyFunc(ctx, "foo") key = etcdtest.AddPrefix(key) resourcequotaStart := validNewResourceQuota() - fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, resourcequotaStart), 1) + fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, resourcequotaStart), 0) resourcequotaIn := &api.ResourceQuota{ ObjectMeta: api.ObjectMeta{ diff --git a/pkg/tools/etcd_helper_test.go b/pkg/tools/etcd_helper_test.go index 5c39002d4cc..b948a800865 100644 --- a/pkg/tools/etcd_helper_test.go +++ b/pkg/tools/etcd_helper_test.go @@ -117,28 +117,32 @@ func TestExtractToList(t *testing.T) { }, }, } + grace := int64(30) expect := api.PodList{ ListMeta: api.ListMeta{ResourceVersion: "10"}, Items: []api.Pod{ { ObjectMeta: api.ObjectMeta{Name: "bar", ResourceVersion: "2"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, { ObjectMeta: api.ObjectMeta{Name: "baz", ResourceVersion: "3"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, { ObjectMeta: api.ObjectMeta{Name: "foo", ResourceVersion: "1"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, }, @@ -200,6 +204,7 @@ func TestExtractToListAcrossDirectories(t *testing.T) { }, }, } + grace := int64(30) expect := api.PodList{ ListMeta: api.ListMeta{ResourceVersion: "10"}, Items: []api.Pod{ @@ -207,22 +212,25 @@ func TestExtractToListAcrossDirectories(t *testing.T) { { ObjectMeta: api.ObjectMeta{Name: "baz", ResourceVersion: "3"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, { ObjectMeta: api.ObjectMeta{Name: "foo", ResourceVersion: "1"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, { ObjectMeta: api.ObjectMeta{Name: "bar", ResourceVersion: "2"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, }, @@ -272,28 +280,32 @@ func TestExtractToListExcludesDirectories(t *testing.T) { }, }, } + grace := int64(30) expect := api.PodList{ ListMeta: api.ListMeta{ResourceVersion: "10"}, Items: []api.Pod{ { ObjectMeta: api.ObjectMeta{Name: "bar", ResourceVersion: "2"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, { ObjectMeta: api.ObjectMeta{Name: "baz", ResourceVersion: "3"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, { ObjectMeta: api.ObjectMeta{Name: "foo", ResourceVersion: "1"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, }, @@ -313,11 +325,13 @@ func TestExtractObj(t *testing.T) { fakeClient := NewFakeEtcdClient(t) helper := NewEtcdHelper(fakeClient, testapi.Codec(), etcdtest.PathPrefix()) key := etcdtest.AddPrefix("/some/key") + grace := int64(30) expect := api.Pod{ ObjectMeta: api.ObjectMeta{Name: "foo"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, } fakeClient.Set(key, runtime.EncodeOrDie(testapi.Codec(), &expect), 0) diff --git a/pkg/tools/fake_etcd_client.go b/pkg/tools/fake_etcd_client.go index 11d7d435108..a4cbde7ad96 100644 --- a/pkg/tools/fake_etcd_client.go +++ b/pkg/tools/fake_etcd_client.go @@ -21,6 +21,7 @@ import ( "fmt" "sort" "sync" + "time" "github.com/coreos/go-etcd/etcd" ) @@ -53,6 +54,8 @@ type FakeEtcdClient struct { TestIndex bool ChangeIndex uint64 LastSetTTL uint64 + // Will avoid setting the expires header on objects to make comparison easier + HideExpires bool Machines []string // Will become valid after Watch is called; tester may write to it. Tester may @@ -184,6 +187,11 @@ func (f *FakeEtcdClient) setLocked(key, value string, ttl uint64) (*etcd.Respons prevResult := f.Data[key] createdIndex := prevResult.R.Node.CreatedIndex f.t.Logf("updating %v, index %v -> %v (ttl: %d)", key, createdIndex, i, ttl) + var expires *time.Time + if !f.HideExpires && ttl > 0 { + now := time.Now() + expires = &now + } result := EtcdResponseWithError{ R: &etcd.Response{ Node: &etcd.Node{ @@ -191,6 +199,7 @@ func (f *FakeEtcdClient) setLocked(key, value string, ttl uint64) (*etcd.Respons CreatedIndex: createdIndex, ModifiedIndex: i, TTL: int64(ttl), + Expiration: expires, }, }, } diff --git a/plugin/pkg/scheduler/factory/factory_test.go b/plugin/pkg/scheduler/factory/factory_test.go index c1b37ad8e41..e7af8734b04 100644 --- a/plugin/pkg/scheduler/factory/factory_test.go +++ b/plugin/pkg/scheduler/factory/factory_test.go @@ -132,11 +132,13 @@ func PriorityTwo(pod *api.Pod, podLister algorithm.PodLister, minionLister algor } func TestDefaultErrorFunc(t *testing.T) { + grace := int64(30) testPod := &api.Pod{ ObjectMeta: api.ObjectMeta{Name: "foo", Namespace: "bar"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, } handler := util.FakeHandler{