diff --git a/cluster/saltbase/salt/fluentd-es/fluentd-es.yaml b/cluster/saltbase/salt/fluentd-es/fluentd-es.yaml index 7c16cce73f7..b6ed10323cd 100644 --- a/cluster/saltbase/salt/fluentd-es/fluentd-es.yaml +++ b/cluster/saltbase/salt/fluentd-es/fluentd-es.yaml @@ -17,6 +17,7 @@ spec: mountPath: /varlog - name: containers mountPath: /var/lib/docker/containers + terminationGracePeriodSeconds: 30 volumes: - name: varlog hostPath: diff --git a/cluster/saltbase/salt/fluentd-gcp/fluentd-gcp.yaml b/cluster/saltbase/salt/fluentd-gcp/fluentd-gcp.yaml index db280b16286..554777aa7b7 100644 --- a/cluster/saltbase/salt/fluentd-gcp/fluentd-gcp.yaml +++ b/cluster/saltbase/salt/fluentd-gcp/fluentd-gcp.yaml @@ -17,6 +17,7 @@ spec: mountPath: /varlog - name: containers mountPath: /var/lib/docker/containers + terminationGracePeriodSeconds: 30 volumes: - name: varlog hostPath: diff --git a/cmd/integration/integration.go b/cmd/integration/integration.go index 33b9cd64cff..00dbbdd397b 100644 --- a/cmd/integration/integration.go +++ b/cmd/integration/integration.go @@ -878,18 +878,24 @@ func runSchedulerNoPhantomPodsTest(client *client.Client) { // Delete a pod to free up room. glog.Infof("Deleting pod %v", bar.Name) - err = client.Pods(api.NamespaceDefault).Delete(bar.Name, nil) + err = client.Pods(api.NamespaceDefault).Delete(bar.Name, api.NewDeleteOptions(1)) if err != nil { glog.Fatalf("FAILED: couldn't delete pod %q: %v", bar.Name, err) } + time.Sleep(2 * time.Second) + pod.ObjectMeta.Name = "phantom.baz" baz, err := client.Pods(api.NamespaceDefault).Create(pod) if err != nil { glog.Fatalf("Failed to create pod: %v, %v", pod, err) } if err := wait.Poll(time.Second, time.Second*60, podRunning(client, baz.Namespace, baz.Name)); err != nil { - glog.Fatalf("FAILED: (Scheduler probably didn't process deletion of 'phantom.bar') Pod never started running: %v", err) + if pod, perr := client.Pods(api.NamespaceDefault).Get("phantom.bar"); perr == nil { + glog.Fatalf("FAILED: 'phantom.bar' was never deleted: %#v", pod) + } else { + glog.Fatalf("FAILED: (Scheduler probably didn't process deletion of 'phantom.bar') Pod never started running: %v", err) + } } glog.Info("Scheduler doesn't make phantom pods: test passed.") diff --git a/contrib/for-tests/network-tester/slow-pod.json b/contrib/for-tests/network-tester/slow-pod.json new file mode 100644 index 00000000000..eab2837603b --- /dev/null +++ b/contrib/for-tests/network-tester/slow-pod.json @@ -0,0 +1,28 @@ +{ + "kind": "Pod", + "apiVersion": "v1beta3", + "metadata": { + "name": "slow-pod", + "labels": { + "name": "nettest" + } + }, + "spec": { + "containers": [ + { + "name": "webserver", + "image": "gcr.io/google_containers/nettest:1.5", + "args": [ + "-service=nettest", + "-delay-shutdown=10" + ], + "ports": [ + { + "containerPort": 8080, + "protocol": "TCP" + } + ] + } + ] + } +} diff --git a/contrib/for-tests/network-tester/slow-rc.json b/contrib/for-tests/network-tester/slow-rc.json new file mode 100644 index 00000000000..52d251e9db2 --- /dev/null +++ b/contrib/for-tests/network-tester/slow-rc.json @@ -0,0 +1,42 @@ +{ + "kind": "ReplicationController", + "apiVersion": "v1beta3", + "metadata": { + "name": "slow-rc", + "labels": { + "name": "nettest" + } + }, + "spec": { + "replicas": 8, + "selector": { + "name": "nettest" + }, + "template": { + "metadata": { + "labels": { + "name": "nettest" + } + }, + "spec": { + "terminationGracePeriodSeconds": 5, + "containers": [ + { + "name": "webserver", + "image": "gcr.io/google_containers/nettest:1.5", + "args": [ + "-service=nettest", + "-delay-shutdown=10" + ], + "ports": [ + { + "containerPort": 8080, + "protocol": "TCP" + } + ] + } + ] + } + } + } +} diff --git a/contrib/for-tests/network-tester/webserver.go b/contrib/for-tests/network-tester/webserver.go index 937abc8b633..d114fb63245 100644 --- a/contrib/for-tests/network-tester/webserver.go +++ b/contrib/for-tests/network-tester/webserver.go @@ -40,7 +40,9 @@ import ( "net" "net/http" "os" + "os/signal" "sync" + "syscall" "time" "github.com/GoogleCloudPlatform/kubernetes/pkg/client" @@ -48,10 +50,11 @@ import ( ) var ( - port = flag.Int("port", 8080, "Port number to serve at.") - peerCount = flag.Int("peers", 8, "Must find at least this many peers for the test to pass.") - service = flag.String("service", "nettest", "Service to find other network test pods in.") - namespace = flag.String("namespace", "default", "Namespace of this pod. TODO: kubernetes should make this discoverable.") + port = flag.Int("port", 8080, "Port number to serve at.") + peerCount = flag.Int("peers", 8, "Must find at least this many peers for the test to pass.") + service = flag.String("service", "nettest", "Service to find other network test pods in.") + namespace = flag.String("namespace", "default", "Namespace of this pod. TODO: kubernetes should make this discoverable.") + delayShutdown = flag.Int("delay-shutdown", 0, "Number of seconds to delay shutdown when receiving SIGTERM.") ) // State tracks the internal state of our little http server. @@ -179,6 +182,17 @@ func main() { log.Fatalf("Error getting hostname: %v", err) } + if *delayShutdown > 0 { + termCh := make(chan os.Signal) + signal.Notify(termCh, syscall.SIGTERM) + go func() { + <-termCh + log.Printf("Sleeping %d seconds before exit ...", *delayShutdown) + time.Sleep(time.Duration(*delayShutdown) * time.Second) + os.Exit(0) + }() + } + state := State{ Hostname: hostname, StillContactingPeers: true, diff --git a/hack/lib/test.sh b/hack/lib/test.sh index 8568d95f248..2278db8e46c 100644 --- a/hack/lib/test.sh +++ b/hack/lib/test.sh @@ -23,7 +23,7 @@ readonly red=$(tput setaf 1) readonly green=$(tput setaf 2) kube::test::clear_all() { - kubectl delete "${kube_flags[@]}" rc,pods --all + kubectl delete "${kube_flags[@]}" rc,pods --all --grace-period=0 } kube::test::get_object_assert() { diff --git a/hack/test-cmd.sh b/hack/test-cmd.sh index d269eecaaf3..af02cf1b22b 100755 --- a/hack/test-cmd.sh +++ b/hack/test-cmd.sh @@ -179,6 +179,11 @@ for version in "${kube_api_versions[@]}"; do kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" 'valid-pod:' # Command kubectl delete pod valid-pod "${kube_flags[@]}" + # Post-condition: pod is still there, in terminating + kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" 'valid-pod:' + [[ "$(kubectl get pods "${kube_flags[@]}" | grep Terminating)" ]] + # Command + kubectl delete pod valid-pod "${kube_flags[@]}" --grace-period=0 # Post-condition: no POD is running kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" '' @@ -194,7 +199,7 @@ for version in "${kube_api_versions[@]}"; do # Pre-condition: valid-pod POD is running kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" 'valid-pod:' # Command - kubectl delete -f examples/limitrange/valid-pod.json "${kube_flags[@]}" + kubectl delete -f examples/limitrange/valid-pod.json "${kube_flags[@]}" --grace-period=0 # Post-condition: no POD is running kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" '' @@ -210,7 +215,7 @@ for version in "${kube_api_versions[@]}"; do # Pre-condition: valid-pod POD is running kube::test::get_object_assert "pods -l'name in (valid-pod)'" '{{range.items}}{{$id_field}}:{{end}}' 'valid-pod:' # Command - kubectl delete pods -l'name in (valid-pod)' "${kube_flags[@]}" + kubectl delete pods -l'name in (valid-pod)' "${kube_flags[@]}" --grace-period=0 # Post-condition: no POD is running kube::test::get_object_assert "pods -l'name in (valid-pod)'" '{{range.items}}{{$id_field}}:{{end}}' '' @@ -242,7 +247,7 @@ for version in "${kube_api_versions[@]}"; do # Pre-condition: valid-pod POD is running kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" 'valid-pod:' # Command - kubectl delete --all pods "${kube_flags[@]}" # --all remove all the pods + kubectl delete --all pods "${kube_flags[@]}" --grace-period=0 # --all remove all the pods # Post-condition: no POD is running kube::test::get_object_assert "pods -l'name in (valid-pod)'" '{{range.items}}{{$id_field}}:{{end}}' '' @@ -259,7 +264,7 @@ for version in "${kube_api_versions[@]}"; do # Pre-condition: valid-pod and redis-proxy PODs are running kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" 'redis-proxy:valid-pod:' # Command - kubectl delete pods valid-pod redis-proxy "${kube_flags[@]}" # delete multiple pods at once + kubectl delete pods valid-pod redis-proxy "${kube_flags[@]}" --grace-period=0 # delete multiple pods at once # Post-condition: no POD is running kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" '' @@ -276,7 +281,7 @@ for version in "${kube_api_versions[@]}"; do # Pre-condition: valid-pod and redis-proxy PODs are running kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" 'redis-proxy:valid-pod:' # Command - kubectl stop pods valid-pod redis-proxy "${kube_flags[@]}" # stop multiple pods at once + kubectl stop pods valid-pod redis-proxy "${kube_flags[@]}" --grace-period=0 # stop multiple pods at once # Post-condition: no POD is running kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" '' @@ -300,7 +305,7 @@ for version in "${kube_api_versions[@]}"; do # Pre-condition: valid-pod POD is running kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" 'valid-pod:' # Command - kubectl delete pods -lnew-name=new-valid-pod "${kube_flags[@]}" + kubectl delete pods -lnew-name=new-valid-pod --grace-period=0 "${kube_flags[@]}" # Post-condition: no POD is running kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" '' @@ -332,7 +337,7 @@ for version in "${kube_api_versions[@]}"; do # Pre-condition: valid-pod POD is running kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" 'valid-pod:' # Command - kubectl delete pods -l'name in (valid-pod-super-sayan)' "${kube_flags[@]}" + kubectl delete pods -l'name in (valid-pod-super-sayan)' --grace-period=0 "${kube_flags[@]}" # Post-condition: no POD is running kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" '' @@ -353,7 +358,7 @@ for version in "${kube_api_versions[@]}"; do # Pre-condition: valid-pod POD is running kube::test::get_object_assert 'pods --namespace=other' "{{range.items}}{{$id_field}}:{{end}}" 'valid-pod:' # Command - kubectl delete "${kube_flags[@]}" pod --namespace=other valid-pod + kubectl delete "${kube_flags[@]}" pod --namespace=other valid-pod --grace-period=0 # Post-condition: no POD is running kube::test::get_object_assert 'pods --namespace=other' "{{range.items}}{{$id_field}}:{{end}}" '' diff --git a/pkg/api/deep_copy_generated.go b/pkg/api/deep_copy_generated.go index cc3202a19d3..0a28c7f9caa 100644 --- a/pkg/api/deep_copy_generated.go +++ b/pkg/api/deep_copy_generated.go @@ -1002,6 +1002,12 @@ func deepCopy_api_ObjectMeta(in ObjectMeta, out *ObjectMeta, c *conversion.Clone } else { out.DeletionTimestamp = nil } + if in.DeletionGracePeriodSeconds != nil { + out.DeletionGracePeriodSeconds = new(int64) + *out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds + } else { + out.DeletionGracePeriodSeconds = nil + } if in.Labels != nil { out.Labels = make(map[string]string) for key, val := range in.Labels { diff --git a/pkg/api/rest/create.go b/pkg/api/rest/create.go index 6b0951f2f40..82fd569ec84 100644 --- a/pkg/api/rest/create.go +++ b/pkg/api/rest/create.go @@ -59,6 +59,8 @@ func BeforeCreate(strategy RESTCreateStrategy, ctx api.Context, obj runtime.Obje } else { objectMeta.Namespace = api.NamespaceNone } + objectMeta.DeletionTimestamp = nil + objectMeta.DeletionGracePeriodSeconds = nil strategy.PrepareForCreate(obj) api.FillObjectMetaSystemFields(ctx, objectMeta) api.GenerateName(strategy, objectMeta) diff --git a/pkg/api/rest/delete.go b/pkg/api/rest/delete.go index 950fada6c74..369e6a0e8a2 100644 --- a/pkg/api/rest/delete.go +++ b/pkg/api/rest/delete.go @@ -40,12 +40,37 @@ func BeforeDelete(strategy RESTDeleteStrategy, ctx api.Context, obj runtime.Obje if strategy == nil { return false, false, nil } - _, _, kerr := objectMetaAndKind(strategy, obj) + objectMeta, _, kerr := objectMetaAndKind(strategy, obj) if kerr != nil { return false, false, kerr } + + // if the object is already being deleted + if objectMeta.DeletionTimestamp != nil { + // if we are already being deleted, we may only shorten the deletion grace period + // this means the object was gracefully deleted previously but deletionGracePeriodSeconds was not set, + // so we force deletion immediately + if objectMeta.DeletionGracePeriodSeconds == nil { + return false, false, nil + } + // only a shorter grace period may be provided by a user + if options.GracePeriodSeconds != nil { + period := int64(*options.GracePeriodSeconds) + if period > *objectMeta.DeletionGracePeriodSeconds { + return false, true, nil + } + objectMeta.DeletionGracePeriodSeconds = &period + options.GracePeriodSeconds = &period + return true, false, nil + } + // graceful deletion is pending, do nothing + options.GracePeriodSeconds = objectMeta.DeletionGracePeriodSeconds + return false, true, nil + } + if !strategy.CheckGracefulDelete(obj, options) { return false, false, nil } + objectMeta.DeletionGracePeriodSeconds = options.GracePeriodSeconds return true, false, nil } diff --git a/pkg/api/rest/resttest/resttest.go b/pkg/api/rest/resttest/resttest.go index 269180d8090..8b6e917b1dc 100644 --- a/pkg/api/rest/resttest/resttest.go +++ b/pkg/api/rest/resttest/resttest.go @@ -329,7 +329,9 @@ func (t *Tester) TestDeleteNonExist(createFn func() runtime.Object) { func (t *Tester) TestDeleteGraceful(createFn func() runtime.Object, expectedGrace int64, wasGracefulFn func() bool) { t.TestDeleteGracefulHasDefault(createFn(), expectedGrace, wasGracefulFn) + t.TestDeleteGracefulWithValue(createFn(), expectedGrace, wasGracefulFn) t.TestDeleteGracefulUsesZeroOnNil(createFn(), 0) + t.TestDeleteGracefulExtend(createFn(), expectedGrace, wasGracefulFn) } func (t *Tester) TestDeleteNoGraceful(createFn func() runtime.Object, wasGracefulFn func() bool) { @@ -362,12 +364,99 @@ func (t *Tester) TestDeleteGracefulHasDefault(existing runtime.Object, expectedG if err != nil { t.Errorf("unexpected error: %v", err) } - if _, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name); err != nil { + if !wasGracefulFn() { + t.Errorf("did not gracefully delete resource") + return + } + object, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name) + if err != nil { t.Errorf("unexpected error, object should exist: %v", err) + return + } + objectMeta, err = api.ObjectMetaFor(object) + if err != nil { + t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, object) + } + if objectMeta.DeletionTimestamp == nil { + t.Errorf("did not set deletion timestamp") + } + if objectMeta.DeletionGracePeriodSeconds == nil { + t.Fatalf("did not set deletion grace period seconds") + } + if *objectMeta.DeletionGracePeriodSeconds != expectedGrace { + t.Errorf("actual grace period does not match expected: %d", *objectMeta.DeletionGracePeriodSeconds) + } +} + +func (t *Tester) TestDeleteGracefulWithValue(existing runtime.Object, expectedGrace int64, wasGracefulFn func() bool) { + objectMeta, err := api.ObjectMetaFor(existing) + if err != nil { + t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, existing) + } + + ctx := api.WithNamespace(t.TestContext(), objectMeta.Namespace) + _, err = t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, api.NewDeleteOptions(expectedGrace+2)) + if err != nil { + t.Errorf("unexpected error: %v", err) } if !wasGracefulFn() { t.Errorf("did not gracefully delete resource") } + object, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name) + if err != nil { + t.Errorf("unexpected error, object should exist: %v", err) + } + objectMeta, err = api.ObjectMetaFor(object) + if err != nil { + t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, object) + } + if objectMeta.DeletionTimestamp == nil { + t.Errorf("did not set deletion timestamp") + } + if objectMeta.DeletionGracePeriodSeconds == nil { + t.Fatalf("did not set deletion grace period seconds") + } + if *objectMeta.DeletionGracePeriodSeconds != expectedGrace+2 { + t.Errorf("actual grace period does not match expected: %d", *objectMeta.DeletionGracePeriodSeconds) + } +} + +func (t *Tester) TestDeleteGracefulExtend(existing runtime.Object, expectedGrace int64, wasGracefulFn func() bool) { + objectMeta, err := api.ObjectMetaFor(existing) + if err != nil { + t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, existing) + } + + ctx := api.WithNamespace(t.TestContext(), objectMeta.Namespace) + _, err = t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, api.NewDeleteOptions(expectedGrace)) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if !wasGracefulFn() { + t.Errorf("did not gracefully delete resource") + } + // second delete duration is ignored + _, err = t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, api.NewDeleteOptions(expectedGrace+2)) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + object, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name) + if err != nil { + t.Errorf("unexpected error, object should exist: %v", err) + } + objectMeta, err = api.ObjectMetaFor(object) + if err != nil { + t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, object) + } + if objectMeta.DeletionTimestamp == nil { + t.Errorf("did not set deletion timestamp") + } + if objectMeta.DeletionGracePeriodSeconds == nil { + t.Fatalf("did not set deletion grace period seconds") + } + if *objectMeta.DeletionGracePeriodSeconds != expectedGrace { + t.Errorf("actual grace period does not match expected: %d", *objectMeta.DeletionGracePeriodSeconds) + } } func (t *Tester) TestDeleteGracefulUsesZeroOnNil(existing runtime.Object, expectedGrace int64) { @@ -382,6 +471,6 @@ func (t *Tester) TestDeleteGracefulUsesZeroOnNil(existing runtime.Object, expect t.Errorf("unexpected error: %v", err) } if _, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name); !errors.IsNotFound(err) { - t.Errorf("unexpected error, object should exist: %v", err) + t.Errorf("unexpected error, object should not exist: %v", err) } } diff --git a/pkg/api/serialization_test.go b/pkg/api/serialization_test.go index 08d1e96065f..74b8d7fe469 100644 --- a/pkg/api/serialization_test.go +++ b/pkg/api/serialization_test.go @@ -155,6 +155,7 @@ func TestRoundTripTypes(t *testing.T) { } func TestEncode_Ptr(t *testing.T) { + grace := int64(30) pod := &api.Pod{ ObjectMeta: api.ObjectMeta{ Labels: map[string]string{"name": "foo"}, @@ -162,6 +163,8 @@ func TestEncode_Ptr(t *testing.T) { Spec: api.PodSpec{ RestartPolicy: api.RestartPolicyAlways, DNSPolicy: api.DNSClusterFirst, + + TerminationGracePeriodSeconds: &grace, }, } obj := runtime.Object(pod) diff --git a/pkg/api/testing/fuzzer.go b/pkg/api/testing/fuzzer.go index f60d68c4cbf..f0480f4a455 100644 --- a/pkg/api/testing/fuzzer.go +++ b/pkg/api/testing/fuzzer.go @@ -88,6 +88,15 @@ func FuzzerFor(t *testing.T, version string, src rand.Source) *fuzz.Fuzzer { j.LabelSelector, _ = labels.Parse("a=b") j.FieldSelector, _ = fields.ParseSelector("a=b") }, + func(j *api.PodSpec, c fuzz.Continue) { + c.FuzzNoCustom(j) + // has a default value + ttl := int64(30) + if c.RandBool() { + ttl = int64(c.Uint32()) + } + j.TerminationGracePeriodSeconds = &ttl + }, func(j *api.PodPhase, c fuzz.Continue) { statuses := []api.PodPhase{api.PodPending, api.PodRunning, api.PodFailed, api.PodUnknown} *j = statuses[c.Rand.Intn(len(statuses))] diff --git a/pkg/api/types.go b/pkg/api/types.go index a016ff25a94..39985b1d289 100644 --- a/pkg/api/types.go +++ b/pkg/api/types.go @@ -133,6 +133,10 @@ type ObjectMeta struct { // will send a hard termination signal to the container. DeletionTimestamp *util.Time `json:"deletionTimestamp,omitempty"` + // DeletionGracePeriodSeconds records the graceful deletion value set when graceful deletion + // was requested. Represents the most recent grace period, and may only be shortened once set. + DeletionGracePeriodSeconds *int64 `json:"deletionGracePeriodSeconds,omitempty"` + // Labels are key value pairs that may be used to scope and select individual resources. // Label keys are of the form: // label-key ::= prefixed-name | name diff --git a/pkg/api/v1/conversion_generated.go b/pkg/api/v1/conversion_generated.go index 2ada8393a58..f7864faee68 100644 --- a/pkg/api/v1/conversion_generated.go +++ b/pkg/api/v1/conversion_generated.go @@ -1087,6 +1087,12 @@ func convert_api_ObjectMeta_To_v1_ObjectMeta(in *api.ObjectMeta, out *ObjectMeta } else { out.DeletionTimestamp = nil } + if in.DeletionGracePeriodSeconds != nil { + out.DeletionGracePeriodSeconds = new(int64) + *out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds + } else { + out.DeletionGracePeriodSeconds = nil + } if in.Labels != nil { out.Labels = make(map[string]string) for key, val := range in.Labels { @@ -3362,6 +3368,12 @@ func convert_v1_ObjectMeta_To_api_ObjectMeta(in *ObjectMeta, out *api.ObjectMeta } else { out.DeletionTimestamp = nil } + if in.DeletionGracePeriodSeconds != nil { + out.DeletionGracePeriodSeconds = new(int64) + *out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds + } else { + out.DeletionGracePeriodSeconds = nil + } if in.Labels != nil { out.Labels = make(map[string]string) for key, val := range in.Labels { diff --git a/pkg/api/v1/deep_copy_generated.go b/pkg/api/v1/deep_copy_generated.go index d724895bf8c..c45de5195c4 100644 --- a/pkg/api/v1/deep_copy_generated.go +++ b/pkg/api/v1/deep_copy_generated.go @@ -933,6 +933,12 @@ func deepCopy_v1_ObjectMeta(in ObjectMeta, out *ObjectMeta, c *conversion.Cloner } else { out.DeletionTimestamp = nil } + if in.DeletionGracePeriodSeconds != nil { + out.DeletionGracePeriodSeconds = new(int64) + *out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds + } else { + out.DeletionGracePeriodSeconds = nil + } if in.Labels != nil { out.Labels = make(map[string]string) for key, val := range in.Labels { diff --git a/pkg/api/v1/defaults.go b/pkg/api/v1/defaults.go index cfc8a1fdec9..f2ac2ec43e1 100644 --- a/pkg/api/v1/defaults.go +++ b/pkg/api/v1/defaults.go @@ -98,6 +98,10 @@ func addDefaultingFuncs() { if obj.HostNetwork { defaultHostNetworkPorts(&obj.Containers) } + if obj.TerminationGracePeriodSeconds == nil { + period := int64(DefaultTerminationGracePeriodSeconds) + obj.TerminationGracePeriodSeconds = &period + } }, func(obj *Probe) { if obj.TimeoutSeconds == 0 { diff --git a/pkg/api/v1/types.go b/pkg/api/v1/types.go index 63b17d0a8c4..4eb86b2168c 100644 --- a/pkg/api/v1/types.go +++ b/pkg/api/v1/types.go @@ -131,6 +131,10 @@ type ObjectMeta struct { // will send a hard termination signal to the container. DeletionTimestamp *util.Time `json:"deletionTimestamp,omitempty" description:"RFC 3339 date and time at which the object will be deleted; populated by the system when a graceful deletion is requested, read-only; if not set, graceful deletion of the object has not been requested"` + // DeletionGracePeriodSeconds records the graceful deletion value set when graceful deletion + // was requested. Represents the most recent grace period, and may only be shortened once set. + DeletionGracePeriodSeconds *int64 `json:"deletionGracePeriodSeconds,omitempty" description:"number of seconds allowed for this object to gracefully terminate before it will be removed from the system; only set when deletionTimestamp is also set, read-only; may only be shortened"` + // Labels are key value pairs that may be used to scope and select individual resources. // TODO: replace map[string]string with labels.LabelSet type Labels map[string]string `json:"labels,omitempty" description:"map of string keys and values that can be used to organize and categorize objects; may match selectors of replication controllers and services"` @@ -838,6 +842,8 @@ const ( // DNSDefault indicates that the pod should use the default (as // determined by kubelet) DNS settings. DNSDefault DNSPolicy = "Default" + + DefaultTerminationGracePeriodSeconds = 30 ) // PodSpec is a description of a pod @@ -852,7 +858,7 @@ type PodSpec struct { // The grace period is the duration in seconds after the processes running in the pod are sent // a termination signal and the time when the processes are forcibly halted with a kill signal. // Set this value longer than the expected cleanup time for your process. - TerminationGracePeriodSeconds *int64 `json:"terminationGracePeriodSeconds,omitempty" description:"optional duration in seconds the pod needs to terminate gracefully; may be decreased in delete request; value must be non-negative integer; the value zero indicates delete immediately; if this value is not set, the default grace period will be used instead; the grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal; set this value longer than the expected cleanup time for your process"` + TerminationGracePeriodSeconds *int64 `json:"terminationGracePeriodSeconds,omitempty" description:"optional duration in seconds the pod needs to terminate gracefully; may be decreased in delete request; value must be non-negative integer; the value zero indicates delete immediately; if this value is not set, the default grace period will be used instead; the grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal; set this value longer than the expected cleanup time for your process; defaults to 30 seconds"` ActiveDeadlineSeconds *int64 `json:"activeDeadlineSeconds,omitempty" description:"optional duration in seconds the pod may be active on the node relative to StartTime before the system will actively try to mark it failed and kill associated containers; value must be a positive integer` // Optional: Set DNS policy. Defaults to "ClusterFirst" DNSPolicy DNSPolicy `json:"dnsPolicy,omitempty" description:"DNS policy for containers within the pod; one of 'ClusterFirst' or 'Default'"` diff --git a/pkg/api/v1beta3/conversion_generated.go b/pkg/api/v1beta3/conversion_generated.go index 4799a52ac35..10591677945 100644 --- a/pkg/api/v1beta3/conversion_generated.go +++ b/pkg/api/v1beta3/conversion_generated.go @@ -945,6 +945,12 @@ func convert_api_ObjectMeta_To_v1beta3_ObjectMeta(in *api.ObjectMeta, out *Objec } else { out.DeletionTimestamp = nil } + if in.DeletionGracePeriodSeconds != nil { + out.DeletionGracePeriodSeconds = new(int64) + *out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds + } else { + out.DeletionGracePeriodSeconds = nil + } if in.Labels != nil { out.Labels = make(map[string]string) for key, val := range in.Labels { @@ -3034,6 +3040,12 @@ func convert_v1beta3_ObjectMeta_To_api_ObjectMeta(in *ObjectMeta, out *api.Objec } else { out.DeletionTimestamp = nil } + if in.DeletionGracePeriodSeconds != nil { + out.DeletionGracePeriodSeconds = new(int64) + *out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds + } else { + out.DeletionGracePeriodSeconds = nil + } if in.Labels != nil { out.Labels = make(map[string]string) for key, val := range in.Labels { diff --git a/pkg/api/v1beta3/deep_copy_generated.go b/pkg/api/v1beta3/deep_copy_generated.go index 19f5f118c38..194e429370b 100644 --- a/pkg/api/v1beta3/deep_copy_generated.go +++ b/pkg/api/v1beta3/deep_copy_generated.go @@ -937,6 +937,12 @@ func deepCopy_v1beta3_ObjectMeta(in ObjectMeta, out *ObjectMeta, c *conversion.C } else { out.DeletionTimestamp = nil } + if in.DeletionGracePeriodSeconds != nil { + out.DeletionGracePeriodSeconds = new(int64) + *out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds + } else { + out.DeletionGracePeriodSeconds = nil + } if in.Labels != nil { out.Labels = make(map[string]string) for key, val := range in.Labels { diff --git a/pkg/api/v1beta3/defaults.go b/pkg/api/v1beta3/defaults.go index d542e5f3047..978073ab86e 100644 --- a/pkg/api/v1beta3/defaults.go +++ b/pkg/api/v1beta3/defaults.go @@ -102,6 +102,10 @@ func addDefaultingFuncs() { if obj.HostNetwork { defaultHostNetworkPorts(&obj.Containers) } + if obj.TerminationGracePeriodSeconds == nil { + period := int64(DefaultTerminationGracePeriodSeconds) + obj.TerminationGracePeriodSeconds = &period + } }, func(obj *Probe) { if obj.TimeoutSeconds == 0 { diff --git a/pkg/api/v1beta3/types.go b/pkg/api/v1beta3/types.go index 559f5cc858f..a00389ef73f 100644 --- a/pkg/api/v1beta3/types.go +++ b/pkg/api/v1beta3/types.go @@ -131,6 +131,10 @@ type ObjectMeta struct { // will send a hard termination signal to the container. DeletionTimestamp *util.Time `json:"deletionTimestamp,omitempty" description:"RFC 3339 date and time at which the object will be deleted; populated by the system when a graceful deletion is requested, read-only; if not set, graceful deletion of the object has not been requested"` + // DeletionGracePeriodSeconds records the graceful deletion value set when graceful deletion + // was requested. Represents the most recent grace period, and may only be shortened once set. + DeletionGracePeriodSeconds *int64 `json:"deletionGracePeriodSeconds,omitempty" description:"number of seconds allowed for this object to gracefully terminate before it will be removed from the system; only set when deletionTimestamp is also set, read-only; may only be shortened"` + // Labels are key value pairs that may be used to scope and select individual resources. // TODO: replace map[string]string with labels.LabelSet type Labels map[string]string `json:"labels,omitempty" description:"map of string keys and values that can be used to organize and categorize objects; may match selectors of replication controllers and services"` @@ -842,6 +846,8 @@ const ( // DNSDefault indicates that the pod should use the default (as // determined by kubelet) DNS settings. DNSDefault DNSPolicy = "Default" + + DefaultTerminationGracePeriodSeconds = 30 ) // PodSpec is a description of a pod @@ -856,7 +862,7 @@ type PodSpec struct { // The grace period is the duration in seconds after the processes running in the pod are sent // a termination signal and the time when the processes are forcibly halted with a kill signal. // Set this value longer than the expected cleanup time for your process. - TerminationGracePeriodSeconds *int64 `json:"terminationGracePeriodSeconds,omitempty" description:"optional duration in seconds the pod needs to terminate gracefully; may be decreased in delete request; value must be non-negative integer; the value zero indicates delete immediately; if this value is not set, the default grace period will be used instead; the grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal; set this value longer than the expected cleanup time for your process"` + TerminationGracePeriodSeconds *int64 `json:"terminationGracePeriodSeconds,omitempty" description:"optional duration in seconds the pod needs to terminate gracefully; may be decreased in delete request; value must be non-negative integer; the value zero indicates delete immediately; if this value is not set, the default grace period will be used instead; the grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal; set this value longer than the expected cleanup time for your process; defaults to 30 seconds"` ActiveDeadlineSeconds *int64 `json:"activeDeadlineSeconds,omitempty" description:"optional duration in seconds the pod may be active on the node relative to StartTime before the system will actively try to mark it failed and kill associated containers; value must be a positive integer` // Optional: Set DNS policy. Defaults to "ClusterFirst" DNSPolicy DNSPolicy `json:"dnsPolicy,omitempty" description:"DNS policy for containers within the pod; one of 'ClusterFirst' or 'Default'"` diff --git a/pkg/api/validation/validation.go b/pkg/api/validation/validation.go index 10065db79d8..42bc7ad5b1b 100644 --- a/pkg/api/validation/validation.go +++ b/pkg/api/validation/validation.go @@ -252,6 +252,16 @@ func ValidateObjectMetaUpdate(old, meta *api.ObjectMeta) errs.ValidationErrorLis } else { meta.CreationTimestamp = old.CreationTimestamp } + // an object can never remove a deletion timestamp or clear/change grace period seconds + if !old.DeletionTimestamp.IsZero() { + meta.DeletionTimestamp = old.DeletionTimestamp + } + if old.DeletionGracePeriodSeconds != nil && meta.DeletionGracePeriodSeconds == nil { + meta.DeletionGracePeriodSeconds = old.DeletionGracePeriodSeconds + } + if meta.DeletionGracePeriodSeconds != nil && *meta.DeletionGracePeriodSeconds != *old.DeletionGracePeriodSeconds { + allErrs = append(allErrs, errs.NewFieldInvalid("deletionGracePeriodSeconds", meta.DeletionGracePeriodSeconds, "field is immutable; may only be changed via deletion")) + } // Reject updates that don't specify a resource version if meta.ResourceVersion == "" { diff --git a/pkg/controller/controller_utils.go b/pkg/controller/controller_utils.go index 53954232dcd..e1ed38db6c8 100644 --- a/pkg/controller/controller_utils.go +++ b/pkg/controller/controller_utils.go @@ -294,7 +294,8 @@ func filterActivePods(pods []api.Pod) []*api.Pod { var result []*api.Pod for i := range pods { if api.PodSucceeded != pods[i].Status.Phase && - api.PodFailed != pods[i].Status.Phase { + api.PodFailed != pods[i].Status.Phase && + pods[i].DeletionTimestamp == nil { result = append(result, &pods[i]) } } diff --git a/pkg/controller/replication_controller.go b/pkg/controller/replication_controller.go index 84f6895cc32..0cec5115be4 100644 --- a/pkg/controller/replication_controller.go +++ b/pkg/controller/replication_controller.go @@ -204,6 +204,12 @@ func (rm *ReplicationManager) getPodControllers(pod *api.Pod) *api.ReplicationCo // When a pod is created, enqueue the controller that manages it and update it's expectations. func (rm *ReplicationManager) addPod(obj interface{}) { pod := obj.(*api.Pod) + if pod.DeletionTimestamp != nil { + // on a restart of the controller manager, it's possible a new pod shows up in a state that + // is already pending deletion. Prevent the pod from being a creation observation. + rm.deletePod(pod) + return + } if rc := rm.getPodControllers(pod); rc != nil { rm.expectations.CreationObserved(rc) rm.enqueueController(rc) @@ -220,6 +226,15 @@ func (rm *ReplicationManager) updatePod(old, cur interface{}) { } // TODO: Write a unittest for this case curPod := cur.(*api.Pod) + if curPod.DeletionTimestamp != nil { + // when a pod is deleted gracefully it's deletion timestamp is first modified to reflect a grace period, + // and after such time has passed, the kubelet actually deletes it from the store. We receive an update + // for modification of the deletion timestamp and expect an rc to create more replicas asap, not wait + // until the kubelet actually deletes the pod. This is different from the Phase of a pod changing, because + // an rc never initiates a phase change, and so is never asleep waiting for the same. + rm.deletePod(curPod) + return + } if rc := rm.getPodControllers(curPod); rc != nil { rm.enqueueController(rc) } diff --git a/pkg/kubectl/cmd/get_test.go b/pkg/kubectl/cmd/get_test.go index d68b4d2a095..7f6c0288e77 100644 --- a/pkg/kubectl/cmd/get_test.go +++ b/pkg/kubectl/cmd/get_test.go @@ -38,6 +38,7 @@ import ( ) func testData() (*api.PodList, *api.ServiceList, *api.ReplicationControllerList) { + grace := int64(30) pods := &api.PodList{ ListMeta: api.ListMeta{ ResourceVersion: "15", @@ -46,15 +47,17 @@ func testData() (*api.PodList, *api.ServiceList, *api.ReplicationControllerList) { ObjectMeta: api.ObjectMeta{Name: "foo", Namespace: "test", ResourceVersion: "10"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, { ObjectMeta: api.ObjectMeta{Name: "bar", Namespace: "test", ResourceVersion: "11"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, }, @@ -503,6 +506,7 @@ func TestGetMultipleTypeObjectsWithDirectReference(t *testing.T) { } } func watchTestData() ([]api.Pod, []watch.Event) { + grace := int64(30) pods := []api.Pod{ { ObjectMeta: api.ObjectMeta{ @@ -511,8 +515,9 @@ func watchTestData() ([]api.Pod, []watch.Event) { ResourceVersion: "10", }, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, } @@ -526,8 +531,9 @@ func watchTestData() ([]api.Pod, []watch.Event) { ResourceVersion: "11", }, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, }, @@ -540,8 +546,9 @@ func watchTestData() ([]api.Pod, []watch.Event) { ResourceVersion: "12", }, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, }, diff --git a/pkg/kubectl/cmd/util/helpers_test.go b/pkg/kubectl/cmd/util/helpers_test.go index 232eefea14a..61c3a09d524 100644 --- a/pkg/kubectl/cmd/util/helpers_test.go +++ b/pkg/kubectl/cmd/util/helpers_test.go @@ -29,6 +29,7 @@ import ( ) func TestMerge(t *testing.T) { + grace := int64(30) tests := []struct { obj runtime.Object fragment string @@ -49,8 +50,9 @@ func TestMerge(t *testing.T) { Name: "foo", }, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, }, @@ -117,8 +119,9 @@ func TestMerge(t *testing.T) { VolumeSource: api.VolumeSource{EmptyDir: &api.EmptyDirVolumeSource{}}, }, }, - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, }, diff --git a/pkg/kubectl/describe.go b/pkg/kubectl/describe.go index a3947047e59..d6cc6ce12a5 100644 --- a/pkg/kubectl/describe.go +++ b/pkg/kubectl/describe.go @@ -271,7 +271,12 @@ func describePod(pod *api.Pod, rcs []api.ReplicationController, events *api.Even fmt.Fprintf(out, "Image(s):\t%s\n", makeImageList(&pod.Spec)) fmt.Fprintf(out, "Node:\t%s\n", pod.Spec.NodeName+"/"+pod.Status.HostIP) fmt.Fprintf(out, "Labels:\t%s\n", formatLabels(pod.Labels)) - fmt.Fprintf(out, "Status:\t%s\n", string(pod.Status.Phase)) + if pod.DeletionTimestamp != nil { + fmt.Fprintf(out, "Status:\tTerminating (expires %s)\n", pod.DeletionTimestamp.Time.Format(time.RFC1123Z)) + fmt.Fprintf(out, "Termination Grace Period:\t%ss\n", pod.DeletionGracePeriodSeconds) + } else { + fmt.Fprintf(out, "Status:\t%s\n", string(pod.Status.Phase)) + } fmt.Fprintf(out, "Replication Controllers:\t%s\n", printReplicationControllersByLabels(rcs)) fmt.Fprintf(out, "Containers:\n") describeContainers(pod.Status.ContainerStatuses, out) diff --git a/pkg/kubectl/resource/builder_test.go b/pkg/kubectl/resource/builder_test.go index 602d0cb79ee..b16734be09d 100644 --- a/pkg/kubectl/resource/builder_test.go +++ b/pkg/kubectl/resource/builder_test.go @@ -83,6 +83,7 @@ func fakeClientWith(testName string, t *testing.T, data map[string]string) Clien } func testData() (*api.PodList, *api.ServiceList) { + grace := int64(30) pods := &api.PodList{ ListMeta: api.ListMeta{ ResourceVersion: "15", @@ -91,15 +92,17 @@ func testData() (*api.PodList, *api.ServiceList) { { ObjectMeta: api.ObjectMeta{Name: "foo", Namespace: "test", ResourceVersion: "10"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, { ObjectMeta: api.ObjectMeta{Name: "bar", Namespace: "test", ResourceVersion: "11"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, }, diff --git a/pkg/kubectl/resource/helper_test.go b/pkg/kubectl/resource/helper_test.go index 1c9d1aab96b..243d41c9528 100644 --- a/pkg/kubectl/resource/helper_test.go +++ b/pkg/kubectl/resource/helper_test.go @@ -128,6 +128,7 @@ func TestHelperCreate(t *testing.T) { return true } + grace := int64(30) tests := []struct { Resp *http.Response RespFunc client.HTTPClientFunc @@ -172,8 +173,9 @@ func TestHelperCreate(t *testing.T) { ExpectObject: &api.Pod{ ObjectMeta: api.ObjectMeta{Name: "foo"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, Resp: &http.Response{StatusCode: http.StatusOK, Body: objBody(&api.Status{Status: api.StatusSuccess})}, @@ -381,6 +383,7 @@ func TestHelperUpdate(t *testing.T) { return true } + grace := int64(30) tests := []struct { Resp *http.Response RespFunc client.HTTPClientFunc @@ -418,8 +421,9 @@ func TestHelperUpdate(t *testing.T) { ExpectObject: &api.Pod{ ObjectMeta: api.ObjectMeta{Name: "foo", ResourceVersion: "10"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, Overwrite: true, diff --git a/pkg/kubectl/resource_printer.go b/pkg/kubectl/resource_printer.go index 90ad20f3ef8..478494e2ac3 100644 --- a/pkg/kubectl/resource_printer.go +++ b/pkg/kubectl/resource_printer.go @@ -404,13 +404,17 @@ func printPod(pod *api.Pod, w io.Writer, withNamespace bool) error { name = pod.Name } + phase := string(pod.Status.Phase) + if pod.DeletionTimestamp != nil { + phase = "Terminating" + } _, err := fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", name, pod.Status.PodIP, "", "", podHostString(pod.Spec.NodeName, pod.Status.HostIP), formatLabels(pod.Labels), - pod.Status.Phase, + phase, translateTimestamp(pod.CreationTimestamp), pod.Status.Message, ) diff --git a/pkg/kubectl/rolling_updater_test.go b/pkg/kubectl/rolling_updater_test.go index 8374bd433d2..1a2d25083fa 100644 --- a/pkg/kubectl/rolling_updater_test.go +++ b/pkg/kubectl/rolling_updater_test.go @@ -628,6 +628,7 @@ func TestUpdateExistingReplicationController(t *testing.T) { func TestUpdateWithRetries(t *testing.T) { codec := testapi.Codec() + grace := int64(30) rc := &api.ReplicationController{ ObjectMeta: api.ObjectMeta{Name: "rc", Labels: map[string]string{ @@ -645,8 +646,9 @@ func TestUpdateWithRetries(t *testing.T) { }, }, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, }, diff --git a/pkg/kubelet/config/common_test.go b/pkg/kubelet/config/common_test.go index d311c6b3af5..8d3fd6e564f 100644 --- a/pkg/kubelet/config/common_test.go +++ b/pkg/kubelet/config/common_test.go @@ -30,6 +30,7 @@ import ( func noDefault(*api.Pod) error { return nil } func TestDecodeSinglePod(t *testing.T) { + grace := int64(30) pod := &api.Pod{ TypeMeta: api.TypeMeta{ APIVersion: "", @@ -40,8 +41,9 @@ func TestDecodeSinglePod(t *testing.T) { Namespace: "mynamespace", }, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, Containers: []api.Container{{ Name: "image", Image: "test/image", @@ -93,6 +95,7 @@ func TestDecodeSinglePod(t *testing.T) { } func TestDecodePodList(t *testing.T) { + grace := int64(30) pod := &api.Pod{ TypeMeta: api.TypeMeta{ APIVersion: "", @@ -103,8 +106,9 @@ func TestDecodePodList(t *testing.T) { Namespace: "mynamespace", }, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, Containers: []api.Container{{ Name: "image", Image: "test/image", diff --git a/pkg/kubelet/config/config.go b/pkg/kubelet/config/config.go index f82ba39fa48..8c09ee59786 100644 --- a/pkg/kubelet/config/config.go +++ b/pkg/kubelet/config/config.go @@ -209,9 +209,8 @@ func (s *podStorage) merge(source string, change interface{}) (adds, updates, de for _, ref := range filtered { name := kubecontainer.GetPodFullName(ref) if existing, found := pods[name]; found { - if !reflect.DeepEqual(existing.Spec, ref.Spec) { + if checkAndUpdatePod(existing, ref) { // this is an update - existing.Spec = ref.Spec updates.Pods = append(updates.Pods, existing) continue } @@ -252,9 +251,8 @@ func (s *podStorage) merge(source string, change interface{}) (adds, updates, de name := kubecontainer.GetPodFullName(ref) if existing, found := oldPods[name]; found { pods[name] = existing - if !reflect.DeepEqual(existing.Spec, ref.Spec) { + if checkAndUpdatePod(existing, ref) { // this is an update - existing.Spec = ref.Spec updates.Pods = append(updates.Pods, existing) continue } @@ -325,6 +323,23 @@ func filterInvalidPods(pods []*api.Pod, source string, recorder record.EventReco return } +// checkAndUpdatePod updates existing if ref makes a meaningful change and returns true, or +// returns false if there was no update. +func checkAndUpdatePod(existing, ref *api.Pod) bool { + // TODO: it would be better to update the whole object and only preserve certain things + // like the source annotation or the UID (to ensure safety) + if reflect.DeepEqual(existing.Spec, ref.Spec) && + reflect.DeepEqual(existing.DeletionTimestamp, ref.DeletionTimestamp) && + reflect.DeepEqual(existing.DeletionGracePeriodSeconds, ref.DeletionGracePeriodSeconds) { + return false + } + // this is an update + existing.Spec = ref.Spec + existing.DeletionTimestamp = ref.DeletionTimestamp + existing.DeletionGracePeriodSeconds = ref.DeletionGracePeriodSeconds + return true +} + // Sync sends a copy of the current state through the update channel. func (s *podStorage) Sync() { s.updateLock.Lock() diff --git a/pkg/kubelet/config/file_test.go b/pkg/kubelet/config/file_test.go index 7d45ae479e2..4faf9485615 100644 --- a/pkg/kubelet/config/file_test.go +++ b/pkg/kubelet/config/file_test.go @@ -163,6 +163,7 @@ func TestReadContainerManifestFromFile(t *testing.T) { func TestReadPodsFromFile(t *testing.T) { hostname := "random-test-hostname" + grace := int64(30) var testCases = []struct { desc string pod runtime.Object @@ -192,9 +193,10 @@ func TestReadPodsFromFile(t *testing.T) { SelfLink: getSelfLink("test-"+hostname, "mynamespace"), }, Spec: api.PodSpec{ - NodeName: hostname, - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + NodeName: hostname, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, Containers: []api.Container{{ Name: "image", Image: "test/image", @@ -227,9 +229,10 @@ func TestReadPodsFromFile(t *testing.T) { SelfLink: getSelfLink("12345-"+hostname, kubelet.NamespaceDefault), }, Spec: api.PodSpec{ - NodeName: hostname, - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + NodeName: hostname, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, Containers: []api.Container{{ Name: "image", Image: "test/image", diff --git a/pkg/kubelet/config/http_test.go b/pkg/kubelet/config/http_test.go index 6843ac864b1..f6429fa2a80 100644 --- a/pkg/kubelet/config/http_test.go +++ b/pkg/kubelet/config/http_test.go @@ -120,6 +120,7 @@ func TestExtractInvalidManifest(t *testing.T) { func TestExtractPodsFromHTTP(t *testing.T) { hostname := "different-value" + grace := int64(30) var testCases = []struct { desc string pods runtime.Object @@ -153,9 +154,11 @@ func TestExtractPodsFromHTTP(t *testing.T) { SelfLink: getSelfLink("foo-"+hostname, "mynamespace"), }, Spec: api.PodSpec{ - NodeName: hostname, - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + NodeName: hostname, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, + Containers: []api.Container{{ Name: "1", Image: "foo", @@ -206,9 +209,11 @@ func TestExtractPodsFromHTTP(t *testing.T) { SelfLink: getSelfLink("foo-"+hostname, kubelet.NamespaceDefault), }, Spec: api.PodSpec{ - NodeName: hostname, - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + NodeName: hostname, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, + Containers: []api.Container{{ Name: "1", Image: "foo", @@ -226,9 +231,11 @@ func TestExtractPodsFromHTTP(t *testing.T) { SelfLink: getSelfLink("bar-"+hostname, kubelet.NamespaceDefault), }, Spec: api.PodSpec{ - NodeName: hostname, - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + NodeName: hostname, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, + Containers: []api.Container{{ Name: "2", Image: "bar", diff --git a/pkg/kubelet/container/fake_runtime.go b/pkg/kubelet/container/fake_runtime.go index 768e2e49a10..215fe5a8ab1 100644 --- a/pkg/kubelet/container/fake_runtime.go +++ b/pkg/kubelet/container/fake_runtime.go @@ -163,13 +163,13 @@ func (f *FakeRuntime) SyncPod(pod *api.Pod, _ Pod, _ api.PodStatus, _ []api.Secr return f.Err } -func (f *FakeRuntime) KillPod(pod Pod) error { +func (f *FakeRuntime) KillPod(pod *api.Pod, runningPod Pod) error { f.Lock() defer f.Unlock() f.CalledFunctions = append(f.CalledFunctions, "KillPod") - f.KilledPods = append(f.KilledPods, string(pod.ID)) - for _, c := range pod.Containers { + f.KilledPods = append(f.KilledPods, string(runningPod.ID)) + for _, c := range runningPod.Containers { f.KilledContainers = append(f.KilledContainers, c.Name) } return f.Err diff --git a/pkg/kubelet/container/runtime.go b/pkg/kubelet/container/runtime.go index 6fa2073da49..ac4477a255d 100644 --- a/pkg/kubelet/container/runtime.go +++ b/pkg/kubelet/container/runtime.go @@ -53,8 +53,8 @@ type Runtime interface { GetPods(all bool) ([]*Pod, error) // Syncs the running pod into the desired pod. SyncPod(pod *api.Pod, runningPod Pod, podStatus api.PodStatus, pullSecrets []api.Secret) error - // KillPod kills all the containers of a pod. - KillPod(pod Pod) error + // KillPod kills all the containers of a pod. Pod may be nil, running pod must not be. + KillPod(pod *api.Pod, runningPod Pod) error // GetPodStatus retrieves the status of the pod, including the information of // all containers in the pod. GetPodStatus(*api.Pod) (*api.PodStatus, error) diff --git a/pkg/kubelet/dockertools/manager.go b/pkg/kubelet/dockertools/manager.go index d1bbf459493..89f918eb9ff 100644 --- a/pkg/kubelet/dockertools/manager.go +++ b/pkg/kubelet/dockertools/manager.go @@ -54,8 +54,17 @@ const ( maxReasonCacheEntries = 200 - kubernetesPodLabel = "io.kubernetes.pod.data" - kubernetesContainerLabel = "io.kubernetes.container.name" + // In order to avoid unnecessary SIGKILLs, give every container a minimum grace + // period after SIGTERM. Docker will guarantee the termination, but SIGTERM is + // potentially dangerous. + // TODO: evaluate whether there are scenarios in which SIGKILL is preferable to + // SIGTERM for certain process types, which may justify setting this to 0. + minimumGracePeriodInSeconds = 2 + + kubernetesNameLabel = "io.kubernetes.pod.name" + kubernetesPodLabel = "io.kubernetes.pod.data" + kubernetesTerminationGracePeriodLabel = "io.kubernetes.pod.terminationGracePeriod" + kubernetesContainerLabel = "io.kubernetes.container.name" ) // DockerManager implements the Runtime interface. @@ -558,12 +567,19 @@ func (dm *DockerManager) runContainer( if len(containerHostname) > hostnameMaxLen { containerHostname = containerHostname[:hostnameMaxLen] } + + // Pod information is recorded on the container as labels to preserve it in the event the pod is deleted + // while the Kubelet is down and there is no information available to recover the pod. This includes + // termination information like the termination grace period and the pre stop hooks. + // TODO: keep these labels up to date if the pod changes namespacedName := types.NamespacedName{pod.Namespace, pod.Name} labels := map[string]string{ - "io.kubernetes.pod.name": namespacedName.String(), + kubernetesNameLabel: namespacedName.String(), + } + if pod.Spec.TerminationGracePeriodSeconds != nil { + labels[kubernetesTerminationGracePeriodLabel] = strconv.FormatInt(*pod.Spec.TerminationGracePeriodSeconds, 10) } if container.Lifecycle != nil && container.Lifecycle.PreStop != nil { - glog.V(1).Infof("Setting preStop hook") // TODO: This is kind of hacky, we should really just encode the bits we need. data, err := latest.Codec.Encode(pod) if err != nil { @@ -1080,12 +1096,12 @@ func (dm *DockerManager) PortForward(pod *kubecontainer.Pod, port uint16, stream } // Kills all containers in the specified pod -func (dm *DockerManager) KillPod(pod kubecontainer.Pod) error { +func (dm *DockerManager) KillPod(pod *api.Pod, runningPod kubecontainer.Pod) error { // Send the kills in parallel since they may take a long time. Len + 1 since there // can be Len errors + the networkPlugin teardown error. - errs := make(chan error, len(pod.Containers)+1) + errs := make(chan error, len(runningPod.Containers)+1) wg := sync.WaitGroup{} - for _, container := range pod.Containers { + for _, container := range runningPod.Containers { wg.Add(1) go func(container *kubecontainer.Container) { defer util.HandleCrash() @@ -1093,15 +1109,24 @@ func (dm *DockerManager) KillPod(pod kubecontainer.Pod) error { // TODO: Handle this without signaling the pod infra container to // adapt to the generic container runtime. if container.Name == PodInfraContainerName { - err := dm.networkPlugin.TearDownPod(pod.Namespace, pod.Name, kubeletTypes.DockerID(container.ID)) + err := dm.networkPlugin.TearDownPod(runningPod.Namespace, runningPod.Name, kubeletTypes.DockerID(container.ID)) if err != nil { glog.Errorf("Failed tearing down the infra container: %v", err) errs <- err } } - err := dm.killContainer(container.ID) + var containerSpec *api.Container + if pod != nil { + for i, c := range pod.Spec.Containers { + if c.Name == container.Name { + containerSpec = &pod.Spec.Containers[i] + break + } + } + } + err := dm.killContainer(container.ID, containerSpec, pod) if err != nil { - glog.Errorf("Failed to delete container: %v; Skipping pod %q", err, pod.ID) + glog.Errorf("Failed to delete container: %v; Skipping pod %q", err, runningPod.ID) errs <- err } wg.Done() @@ -1119,75 +1144,128 @@ func (dm *DockerManager) KillPod(pod kubecontainer.Pod) error { return nil } -// KillContainerInPod kills a container in the pod. -func (dm *DockerManager) KillContainerInPod(container api.Container, pod *api.Pod) error { - // Locate the container. - pods, err := dm.GetPods(false) - if err != nil { - return err - } - targetPod := kubecontainer.Pods(pods).FindPod(kubecontainer.GetPodFullName(pod), pod.UID) - targetContainer := targetPod.FindContainerByName(container.Name) - if targetContainer == nil { - return fmt.Errorf("unable to find container %q in pod %q", container.Name, targetPod.Name) - } - return dm.killContainer(targetContainer.ID) -} - -// TODO(vmarmol): Unexport this as it is no longer used externally. -// KillContainer kills a container identified by containerID. -// Internally, it invokes docker's StopContainer API with a timeout of 10s. -// TODO: Deprecate this function in favor of KillContainerInPod. -func (dm *DockerManager) KillContainer(containerID types.UID) error { - return dm.killContainer(containerID) -} - -func (dm *DockerManager) killContainer(containerID types.UID) error { - ID := string(containerID) - glog.V(2).Infof("Killing container with id %q", ID) - inspect, err := dm.client.InspectContainer(ID) - if err != nil { - return err - } - var found bool - var preStop string - if inspect != nil && inspect.Config != nil && inspect.Config.Labels != nil { - preStop, found = inspect.Config.Labels[kubernetesPodLabel] - } - if found { - var pod api.Pod - err := latest.Codec.DecodeInto([]byte(preStop), &pod) +// KillContainerInPod kills a container in the pod. It must be passed either a container ID or a container and pod, +// and will attempt to lookup the other information if missing. +func (dm *DockerManager) KillContainerInPod(containerID types.UID, container *api.Container, pod *api.Pod) error { + switch { + case len(containerID) == 0: + // Locate the container. + pods, err := dm.GetPods(false) if err != nil { - glog.Errorf("Failed to decode prestop: %s, %s", preStop, ID) - } else { - name := inspect.Config.Labels[kubernetesContainerLabel] - var container *api.Container + return err + } + targetPod := kubecontainer.Pods(pods).FindPod(kubecontainer.GetPodFullName(pod), pod.UID) + targetContainer := targetPod.FindContainerByName(container.Name) + if targetContainer == nil { + return fmt.Errorf("unable to find container %q in pod %q", container.Name, targetPod.Name) + } + containerID = targetContainer.ID + + case container == nil || pod == nil: + // Read information about the container from labels + inspect, err := dm.client.InspectContainer(string(containerID)) + if err != nil { + return err + } + storedPod, storedContainer, cerr := containerAndPodFromLabels(inspect) + if cerr != nil { + glog.Errorf("unable to access pod data from container: %v", err) + } + if container == nil { + container = storedContainer + } + if pod == nil { + pod = storedPod + } + } + return dm.killContainer(containerID, container, pod) +} + +// killContainer accepts a containerID and an optional container or pod containing shutdown policies. Invoke +// KillContainerInPod if information must be retrieved first. +func (dm *DockerManager) killContainer(containerID types.UID, container *api.Container, pod *api.Pod) error { + ID := string(containerID) + name := ID + if container != nil { + name = fmt.Sprintf("%s %s", name, container.Name) + } + if pod != nil { + name = fmt.Sprintf("%s %s/%s", name, pod.Namespace, pod.Name) + } + + gracePeriod := int64(minimumGracePeriodInSeconds) + if pod != nil && pod.DeletionGracePeriodSeconds != nil { + gracePeriod = *pod.DeletionGracePeriodSeconds + } + glog.V(2).Infof("Killing container %q with %d second grace period", name, gracePeriod) + + if pod != nil && container != nil && container.Lifecycle != nil && container.Lifecycle.PreStop != nil { + glog.V(4).Infof("Running preStop hook for container %q", name) + start := util.Now() + // TODO: timebox PreStop execution to at most gracePeriod + if err := dm.runner.Run(ID, pod, container, container.Lifecycle.PreStop); err != nil { + glog.Errorf("preStop hook for container %q failed: %v", name, err) + } + gracePeriod -= int64(util.Now().Sub(start.Time).Seconds()) + } + + dm.readinessManager.RemoveReadiness(ID) + + // always give containers a minimal shutdown window to avoid unnecessary SIGKILLs + if gracePeriod < minimumGracePeriodInSeconds { + gracePeriod = minimumGracePeriodInSeconds + } + err := dm.client.StopContainer(ID, uint(gracePeriod)) + ref, ok := dm.containerRefManager.GetRef(ID) + if !ok { + glog.Warningf("No ref for pod '%q'", name) + } else { + // TODO: pass reason down here, and state, or move this call up the stack. + dm.recorder.Eventf(ref, "killing", "Killing %v", ID) + } + return err +} + +var errNoPodOnContainer = fmt.Errorf("no pod information labels on Docker container") + +// containerAndPodFromLabels tries to load the appropriate container info off of a Docker container's labels +func containerAndPodFromLabels(inspect *docker.Container) (pod *api.Pod, container *api.Container, err error) { + if inspect == nil && inspect.Config == nil && inspect.Config.Labels == nil { + return nil, nil, errNoPodOnContainer + } + labels := inspect.Config.Labels + + // the pod data may not be set + if body, found := labels[kubernetesPodLabel]; found { + pod = &api.Pod{} + if err = latest.Codec.DecodeInto([]byte(body), pod); err == nil { + name := labels[kubernetesContainerLabel] for ix := range pod.Spec.Containers { if pod.Spec.Containers[ix].Name == name { container = &pod.Spec.Containers[ix] break } } - if container != nil { - glog.V(1).Infof("Running preStop hook") - if err := dm.runner.Run(ID, &pod, container, container.Lifecycle.PreStop); err != nil { - glog.Errorf("failed to run preStop hook: %v", err) - } - } else { - glog.Errorf("unable to find container %v, %s", pod, name) + if container == nil { + err = fmt.Errorf("unable to find container %s in pod %v", name, pod) + } + } else { + pod = nil + } + } + + // attempt to find the default grace period if we didn't commit a pod, but set the generic metadata + // field (the one used by kill) + if pod == nil { + if period, ok := labels[kubernetesTerminationGracePeriodLabel]; ok { + if seconds, err := strconv.ParseInt(period, 10, 64); err == nil { + pod = &api.Pod{} + pod.DeletionGracePeriodSeconds = &seconds } } } - dm.readinessManager.RemoveReadiness(ID) - err = dm.client.StopContainer(ID, 10) - ref, ok := dm.containerRefManager.GetRef(ID) - if !ok { - glog.Warningf("No ref for pod '%v'", ID) - } else { - // TODO: pass reason down here, and state, or move this call up the stack. - dm.recorder.Eventf(ref, "killing", "Killing %v", ID) - } - return err + + return } // Run a single container from a pod. Returns the docker container ID @@ -1215,7 +1293,7 @@ func (dm *DockerManager) runContainerInPod(pod *api.Pod, container *api.Containe if container.Lifecycle != nil && container.Lifecycle.PostStart != nil { handlerErr := dm.runner.Run(id, pod, container, container.Lifecycle.PostStart) if handlerErr != nil { - dm.killContainer(types.UID(id)) + dm.killContainer(types.UID(id), container, pod) return kubeletTypes.DockerID(""), fmt.Errorf("failed to call event handler: %v", handlerErr) } } @@ -1328,6 +1406,11 @@ func (dm *DockerManager) computePodContainerChanges(pod *api.Pod, runningPod kub containersToKeep := make(map[kubeletTypes.DockerID]int) createPodInfraContainer := false + if pod.DeletionTimestamp != nil { + glog.V(4).Infof("Pod is terminating %q", podFullName) + return PodContainerChangesSpec{}, nil + } + var err error var podInfraContainerID kubeletTypes.DockerID var changed bool @@ -1481,7 +1564,7 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, pod } // Killing phase: if we want to start new infra container, or nothing is running kill everything (including infra container) - err = dm.KillPod(runningPod) + err = dm.KillPod(pod, runningPod) if err != nil { return err } @@ -1491,7 +1574,15 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, pod _, keep := containerChanges.ContainersToKeep[kubeletTypes.DockerID(container.ID)] if !keep { glog.V(3).Infof("Killing unwanted container %+v", container) - err = dm.KillContainer(container.ID) + // attempt to find the appropriate container policy + var podContainer *api.Container + for i, c := range pod.Spec.Containers { + if c.Name == container.Name { + podContainer = &pod.Spec.Containers[i] + break + } + } + err = dm.KillContainerInPod(container.ID, podContainer, pod) if err != nil { glog.Errorf("Error killing container: %v", err) } diff --git a/pkg/kubelet/dockertools/manager_test.go b/pkg/kubelet/dockertools/manager_test.go index 38b89301915..32e6e148872 100644 --- a/pkg/kubelet/dockertools/manager_test.go +++ b/pkg/kubelet/dockertools/manager_test.go @@ -417,7 +417,7 @@ func TestKillContainerInPod(t *testing.T) { manager.readinessManager.SetReadiness(c.ID, true) } - if err := manager.KillContainerInPod(pod.Spec.Containers[0], pod); err != nil { + if err := manager.KillContainerInPod("", &pod.Spec.Containers[0], pod); err != nil { t.Errorf("unexpected error: %v", err) } // Assert the container has been stopped. @@ -490,14 +490,14 @@ func TestKillContainerInPodWithPreStop(t *testing.T) { manager.readinessManager.SetReadiness(c.ID, true) } - if err := manager.KillContainerInPod(pod.Spec.Containers[0], pod); err != nil { + if err := manager.KillContainerInPod("", &pod.Spec.Containers[0], pod); err != nil { t.Errorf("unexpected error: %v", err) } // Assert the container has been stopped. if err := fakeDocker.AssertStopped([]string{containerToKill.ID}); err != nil { t.Errorf("container was not stopped correctly: %v", err) } - verifyCalls(t, fakeDocker, []string{"list", "inspect_container", "create_exec", "start_exec", "stop"}) + verifyCalls(t, fakeDocker, []string{"list", "create_exec", "start_exec", "stop"}) if !reflect.DeepEqual(expectedCmd, fakeDocker.execCmd) { t.Errorf("expected: %v, got %v", expectedCmd, fakeDocker.execCmd) } @@ -534,7 +534,7 @@ func TestKillContainerInPodWithError(t *testing.T) { manager.readinessManager.SetReadiness(c.ID, true) } - if err := manager.KillContainerInPod(pod.Spec.Containers[0], pod); err == nil { + if err := manager.KillContainerInPod("", &pod.Spec.Containers[0], pod); err == nil { t.Errorf("expected error, found nil") } @@ -1030,7 +1030,7 @@ func TestSyncPodDeletesWithNoPodInfraContainer(t *testing.T) { verifyCalls(t, fakeDocker, []string{ // Kill the container since pod infra container is not running. - "inspect_container", "stop", + "stop", // Create pod infra container. "create", "start", "inspect_container", // Create container. @@ -1105,7 +1105,7 @@ func TestSyncPodDeletesDuplicate(t *testing.T) { // Check the pod infra container. "inspect_container", // Kill the duplicated container. - "inspect_container", "stop", + "stop", }) // Expect one of the duplicates to be killed. if len(fakeDocker.Stopped) != 1 || (fakeDocker.Stopped[0] != "1234" && fakeDocker.Stopped[0] != "4567") { @@ -1159,7 +1159,7 @@ func TestSyncPodBadHash(t *testing.T) { // Check the pod infra container. "inspect_container", // Kill and restart the bad hash container. - "inspect_container", "stop", "create", "start", + "stop", "create", "start", }) if err := fakeDocker.AssertStopped([]string{"1234"}); err != nil { @@ -1217,7 +1217,7 @@ func TestSyncPodsUnhealthy(t *testing.T) { // Check the pod infra container. "inspect_container", // Kill the unhealthy container. - "inspect_container", "stop", + "stop", // Restart the unhealthy container. "create", "start", }) @@ -1426,7 +1426,7 @@ func TestSyncPodWithRestartPolicy(t *testing.T) { // Check the pod infra container. "inspect_container", // Stop the last pod infra container. - "inspect_container", "stop", + "stop", }, []string{}, []string{"9876"}, diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 2b218c190da..08ec6d9e11c 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -1054,8 +1054,8 @@ func parseResolvConf(reader io.Reader) (nameservers []string, searches []string, } // Kill all running containers in a pod (includes the pod infra container). -func (kl *Kubelet) killPod(pod kubecontainer.Pod) error { - return kl.containerRuntime.KillPod(pod) +func (kl *Kubelet) killPod(pod *api.Pod, runningPod kubecontainer.Pod) error { + return kl.containerRuntime.KillPod(pod, runningPod) } type empty struct{} @@ -1101,7 +1101,7 @@ func (kl *Kubelet) syncPod(pod *api.Pod, mirrorPod *api.Pod, runningPod kubecont // Kill pods we can't run. err := canRunPod(pod) if err != nil { - kl.killPod(runningPod) + kl.killPod(pod, runningPod) return err } @@ -1416,7 +1416,7 @@ func (kl *Kubelet) killUnwantedPods(desiredPods map[types.UID]empty, }() glog.V(1).Infof("Killing unwanted pod %q", pod.Name) // Stop the containers. - err = kl.killPod(*pod) + err = kl.killPod(nil, *pod) if err != nil { glog.Errorf("Failed killing the pod %q: %v", pod.Name, err) return diff --git a/pkg/kubelet/kubelet_test.go b/pkg/kubelet/kubelet_test.go index 3b8bd06495e..0095e5e3c09 100644 --- a/pkg/kubelet/kubelet_test.go +++ b/pkg/kubelet/kubelet_test.go @@ -1153,7 +1153,7 @@ func TestSyncPodEventHandlerFails(t *testing.T) { // Create the container. "create", "start", // Kill the container since event handler fails. - "inspect_container", "stop", + "stop", // Get pod status. "list", "inspect_container", "inspect_container", // Get pods for deleting orphaned volumes. diff --git a/pkg/kubelet/rkt/rkt.go b/pkg/kubelet/rkt/rkt.go index ba2da9c58fa..3c716bc869d 100644 --- a/pkg/kubelet/rkt/rkt.go +++ b/pkg/kubelet/rkt/rkt.go @@ -671,11 +671,11 @@ func (r *runtime) GetPods(all bool) ([]*kubecontainer.Pod, error) { } // KillPod invokes 'systemctl kill' to kill the unit that runs the pod. -func (r *runtime) KillPod(pod kubecontainer.Pod) error { - glog.V(4).Infof("Rkt is killing pod: name %q.", pod.Name) +func (r *runtime) KillPod(pod *api.Pod, runningPod kubecontainer.Pod) error { + glog.V(4).Infof("Rkt is killing pod: name %q.", runningPod.Name) // TODO(yifan): More graceful stop. Replace with StopUnit and wait for a timeout. - r.systemd.KillUnit(makePodServiceFileName(pod.ID), int32(syscall.SIGKILL)) + r.systemd.KillUnit(makePodServiceFileName(runningPod.ID), int32(syscall.SIGKILL)) return r.systemd.Reload() } @@ -880,7 +880,7 @@ func (r *runtime) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, podStatus if restartPod { // TODO(yifan): Handle network plugin. - if err := r.KillPod(runningPod); err != nil { + if err := r.KillPod(pod, runningPod); err != nil { return err } if err := r.RunPod(pod); err != nil { diff --git a/pkg/kubelet/status_manager.go b/pkg/kubelet/status_manager.go index 4df39d7c086..a0e7af830e9 100644 --- a/pkg/kubelet/status_manager.go +++ b/pkg/kubelet/status_manager.go @@ -22,6 +22,7 @@ import ( "sync" "github.com/GoogleCloudPlatform/kubernetes/pkg/api" + "github.com/GoogleCloudPlatform/kubernetes/pkg/api/errors" "github.com/GoogleCloudPlatform/kubernetes/pkg/client" kubecontainer "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/container" "github.com/GoogleCloudPlatform/kubernetes/pkg/util" @@ -135,13 +136,24 @@ func (s *statusManager) syncBatch() error { } // TODO: make me easier to express from client code statusPod, err = s.kubeClient.Pods(statusPod.Namespace).Get(statusPod.Name) + if errors.IsNotFound(err) { + glog.V(3).Infof("Pod %q was deleted on the server", pod.Name) + return nil + } if err == nil { statusPod.Status = status - _, err = s.kubeClient.Pods(pod.Namespace).UpdateStatus(statusPod) // TODO: handle conflict as a retry, make that easier too. + statusPod, err = s.kubeClient.Pods(pod.Namespace).UpdateStatus(statusPod) if err == nil { glog.V(3).Infof("Status for pod %q updated successfully", pod.Name) - return nil + + if statusPod.DeletionTimestamp == nil || !allTerminated(statusPod.Status.ContainerStatuses) { + return nil + } + if err := s.kubeClient.Pods(statusPod.Namespace).Delete(statusPod.Name, api.NewDeleteOptions(0)); err == nil { + glog.V(3).Infof("Pod %q fully terminated and removed from etcd", statusPod.Name) + return nil + } } } @@ -151,3 +163,14 @@ func (s *statusManager) syncBatch() error { s.DeletePodStatus(podFullName) return fmt.Errorf("error updating status for pod %q: %v", pod.Name, err) } + +// allTerminated returns true if every status is terminated, or the status list +// is empty. +func allTerminated(statuses []api.ContainerStatus) bool { + for _, status := range statuses { + if status.State.Terminated == nil { + return false + } + } + return true +} diff --git a/pkg/namespace/namespace_controller.go b/pkg/namespace/namespace_controller.go index 1913457d45f..7557c4f9def 100644 --- a/pkg/namespace/namespace_controller.go +++ b/pkg/namespace/namespace_controller.go @@ -17,6 +17,7 @@ limitations under the License. package namespace import ( + "fmt" "time" "github.com/GoogleCloudPlatform/kubernetes/pkg/api" @@ -119,7 +120,7 @@ func deleteAllContent(kubeClient client.Interface, namespace string) (err error) if err != nil { return err } - err = deletePods(kubeClient, namespace) + estimate, err := deletePods(kubeClient, namespace) if err != nil { return err } @@ -143,6 +144,10 @@ func deleteAllContent(kubeClient client.Interface, namespace string) (err error) if err != nil { return err } + + if estimate > 0 { + return fmt.Errorf("some resources are being gracefully deleted, estimate %d seconds", estimate) + } return nil } @@ -263,18 +268,25 @@ func deleteReplicationControllers(kubeClient client.Interface, ns string) error return nil } -func deletePods(kubeClient client.Interface, ns string) error { +func deletePods(kubeClient client.Interface, ns string) (int64, error) { items, err := kubeClient.Pods(ns).List(labels.Everything(), fields.Everything()) if err != nil { - return err + return 0, err } + estimate := int64(0) for i := range items.Items { + if items.Items[i].Spec.TerminationGracePeriodSeconds != nil { + grace := *items.Items[i].Spec.TerminationGracePeriodSeconds + if grace > estimate { + estimate = grace + } + } err := kubeClient.Pods(ns).Delete(items.Items[i].Name, nil) if err != nil { - return err + return 0, err } } - return nil + return estimate, nil } func deleteEvents(kubeClient client.Interface, ns string) error { diff --git a/pkg/registry/controller/etcd/etcd_test.go b/pkg/registry/controller/etcd/etcd_test.go index 6b9fdc15242..fc9bd7f91d8 100644 --- a/pkg/registry/controller/etcd/etcd_test.go +++ b/pkg/registry/controller/etcd/etcd_test.go @@ -722,7 +722,7 @@ func TestDelete(t *testing.T) { // If the controller is still around after trying to delete either the delete // failed, or we're deleting it gracefully. if fakeClient.Data[key].R.Node != nil { - return true + return fakeClient.Data[key].R.Node.TTL != 0 } return false } diff --git a/pkg/registry/event/registry_test.go b/pkg/registry/event/registry_test.go index 75d29614fb4..5da05e536cf 100644 --- a/pkg/registry/event/registry_test.go +++ b/pkg/registry/event/registry_test.go @@ -37,6 +37,7 @@ var testTTL uint64 = 60 func NewTestEventEtcdRegistry(t *testing.T) (*tools.FakeEtcdClient, generic.Registry) { f := tools.NewFakeEtcdClient(t) + f.HideExpires = true f.TestIndex = true h := tools.NewEtcdHelper(f, testapi.Codec(), etcdtest.PathPrefix()) diff --git a/pkg/registry/persistentvolume/etcd/etcd_test.go b/pkg/registry/persistentvolume/etcd/etcd_test.go index 0ce3d2912ad..2b80a6019ec 100644 --- a/pkg/registry/persistentvolume/etcd/etcd_test.go +++ b/pkg/registry/persistentvolume/etcd/etcd_test.go @@ -324,7 +324,7 @@ func TestEtcdUpdateStatus(t *testing.T) { key, _ := storage.KeyFunc(ctx, "foo") key = etcdtest.AddPrefix(key) pvStart := validNewPersistentVolume("foo") - fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, pvStart), 1) + fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, pvStart), 0) pvIn := &api.PersistentVolume{ ObjectMeta: api.ObjectMeta{ diff --git a/pkg/registry/persistentvolumeclaim/etcd/etcd_test.go b/pkg/registry/persistentvolumeclaim/etcd/etcd_test.go index e3ba55afed2..067009ad109 100644 --- a/pkg/registry/persistentvolumeclaim/etcd/etcd_test.go +++ b/pkg/registry/persistentvolumeclaim/etcd/etcd_test.go @@ -325,7 +325,7 @@ func TestEtcdUpdateStatus(t *testing.T) { key, _ := storage.KeyFunc(ctx, "foo") key = etcdtest.AddPrefix(key) pvcStart := validNewPersistentVolumeClaim("foo", api.NamespaceDefault) - fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, pvcStart), 1) + fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, pvcStart), 0) pvc := &api.PersistentVolumeClaim{ ObjectMeta: api.ObjectMeta{ diff --git a/pkg/registry/pod/etcd/etcd_test.go b/pkg/registry/pod/etcd/etcd_test.go index 9d1e00f9264..e64e3aae27d 100644 --- a/pkg/registry/pod/etcd/etcd_test.go +++ b/pkg/registry/pod/etcd/etcd_test.go @@ -54,6 +54,7 @@ func newStorage(t *testing.T) (*REST, *BindingREST, *StatusREST, *tools.FakeEtcd } func validNewPod() *api.Pod { + grace := int64(30) return &api.Pod{ ObjectMeta: api.ObjectMeta{ Name: "foo", @@ -62,6 +63,8 @@ func validNewPod() *api.Pod { Spec: api.PodSpec{ RestartPolicy: api.RestartPolicyAlways, DNSPolicy: api.DNSClusterFirst, + + TerminationGracePeriodSeconds: &grace, Containers: []api.Container{ { Name: "foo", @@ -132,9 +135,9 @@ func TestDelete(t *testing.T) { if fakeEtcdClient.Data[key].R.Node == nil { return false } - return fakeEtcdClient.Data[key].R.Node.TTL == 30 + return fakeEtcdClient.Data[key].R.Node.TTL != 0 } - test.TestDelete(createFn, gracefulSetFn) + test.TestDeleteGraceful(createFn, 30, gracefulSetFn) } func expectPod(t *testing.T, out runtime.Object) (*api.Pod, bool) { @@ -1118,6 +1121,7 @@ func TestEtcdUpdateScheduled(t *testing.T) { }, }), 1) + grace := int64(30) podIn := api.Pod{ ObjectMeta: api.ObjectMeta{ Name: "foo", @@ -1139,6 +1143,8 @@ func TestEtcdUpdateScheduled(t *testing.T) { }, RestartPolicy: api.RestartPolicyAlways, DNSPolicy: api.DNSClusterFirst, + + TerminationGracePeriodSeconds: &grace, }, } _, _, err := registry.Update(ctx, &podIn) @@ -1179,7 +1185,7 @@ func TestEtcdUpdateStatus(t *testing.T) { }, }, } - fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, &podStart), 1) + fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, &podStart), 0) podIn := api.Pod{ ObjectMeta: api.ObjectMeta{ @@ -1208,6 +1214,8 @@ func TestEtcdUpdateStatus(t *testing.T) { expected := podStart expected.ResourceVersion = "2" + grace := int64(30) + expected.Spec.TerminationGracePeriodSeconds = &grace expected.Spec.RestartPolicy = api.RestartPolicyAlways expected.Spec.DNSPolicy = api.DNSClusterFirst expected.Spec.Containers[0].ImagePullPolicy = api.PullIfNotPresent diff --git a/pkg/registry/pod/rest.go b/pkg/registry/pod/rest.go index 7c1ed33e3bc..12918dcc9b9 100644 --- a/pkg/registry/pod/rest.go +++ b/pkg/registry/pod/rest.go @@ -81,9 +81,26 @@ func (podStrategy) ValidateUpdate(ctx api.Context, obj, old runtime.Object) fiel return append(errorList, validation.ValidatePodUpdate(obj.(*api.Pod), old.(*api.Pod))...) } -// CheckGracefulDelete allows a pod to be gracefully deleted. +// CheckGracefulDelete allows a pod to be gracefully deleted. It updates the DeleteOptions to +// reflect the desired grace value. func (podStrategy) CheckGracefulDelete(obj runtime.Object, options *api.DeleteOptions) bool { - return false + if options == nil { + return false + } + period := int64(0) + // user has specified a value + if options.GracePeriodSeconds != nil { + period = *options.GracePeriodSeconds + } else { + // use the default value if set, or deletes the pod immediately (0) + pod := obj.(*api.Pod) + if pod.Spec.TerminationGracePeriodSeconds != nil { + period = *pod.Spec.TerminationGracePeriodSeconds + } + } + // ensure the options and the pod are in sync + options.GracePeriodSeconds = &period + return true } type podStatusStrategy struct { @@ -96,6 +113,7 @@ func (podStatusStrategy) PrepareForUpdate(obj, old runtime.Object) { newPod := obj.(*api.Pod) oldPod := old.(*api.Pod) newPod.Spec = oldPod.Spec + newPod.DeletionTimestamp = nil } func (podStatusStrategy) ValidateUpdate(ctx api.Context, obj, old runtime.Object) fielderrors.ValidationErrorList { diff --git a/pkg/registry/resourcequota/etcd/etcd_test.go b/pkg/registry/resourcequota/etcd/etcd_test.go index b4498b338eb..b19e939141b 100644 --- a/pkg/registry/resourcequota/etcd/etcd_test.go +++ b/pkg/registry/resourcequota/etcd/etcd_test.go @@ -477,7 +477,7 @@ func TestEtcdUpdateStatus(t *testing.T) { key, _ := registry.KeyFunc(ctx, "foo") key = etcdtest.AddPrefix(key) resourcequotaStart := validNewResourceQuota() - fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, resourcequotaStart), 1) + fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, resourcequotaStart), 0) resourcequotaIn := &api.ResourceQuota{ ObjectMeta: api.ObjectMeta{ diff --git a/pkg/service/endpoints_controller.go b/pkg/service/endpoints_controller.go index e6d8e30a0d7..ba1dd960d8b 100644 --- a/pkg/service/endpoints_controller.go +++ b/pkg/service/endpoints_controller.go @@ -308,7 +308,11 @@ func (e *EndpointController) syncService(key string) { continue } if len(pod.Status.PodIP) == 0 { - glog.V(4).Infof("Failed to find an IP for pod %s/%s", pod.Namespace, pod.Name) + glog.V(5).Infof("Failed to find an IP for pod %s/%s", pod.Namespace, pod.Name) + continue + } + if pod.DeletionTimestamp != nil { + glog.V(5).Infof("Pod is being deleted %s/%s", pod.Namespace, pod.Name) continue } diff --git a/pkg/tools/etcd_helper.go b/pkg/tools/etcd_helper.go index d60cf68b43a..0f603948860 100644 --- a/pkg/tools/etcd_helper.go +++ b/pkg/tools/etcd_helper.go @@ -556,14 +556,21 @@ func (h *EtcdHelper) GuaranteedUpdate(key string, ptrToType runtime.Object, igno ttl := uint64(0) if node != nil { index = node.ModifiedIndex - if node.TTL > 0 { + if node.TTL != 0 { ttl = uint64(node.TTL) } + if node.Expiration != nil && ttl == 0 { + ttl = 1 + } } else if res != nil { index = res.EtcdIndex } if newTTL != nil { + if ttl != 0 && *newTTL == 0 { + // TODO: remove this after we have verified this is no longer an issue + glog.V(4).Infof("GuaranteedUpdate is clearing TTL for %q, may not be intentional", key) + } ttl = *newTTL } diff --git a/pkg/tools/etcd_helper_test.go b/pkg/tools/etcd_helper_test.go index 5c39002d4cc..b948a800865 100644 --- a/pkg/tools/etcd_helper_test.go +++ b/pkg/tools/etcd_helper_test.go @@ -117,28 +117,32 @@ func TestExtractToList(t *testing.T) { }, }, } + grace := int64(30) expect := api.PodList{ ListMeta: api.ListMeta{ResourceVersion: "10"}, Items: []api.Pod{ { ObjectMeta: api.ObjectMeta{Name: "bar", ResourceVersion: "2"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, { ObjectMeta: api.ObjectMeta{Name: "baz", ResourceVersion: "3"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, { ObjectMeta: api.ObjectMeta{Name: "foo", ResourceVersion: "1"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, }, @@ -200,6 +204,7 @@ func TestExtractToListAcrossDirectories(t *testing.T) { }, }, } + grace := int64(30) expect := api.PodList{ ListMeta: api.ListMeta{ResourceVersion: "10"}, Items: []api.Pod{ @@ -207,22 +212,25 @@ func TestExtractToListAcrossDirectories(t *testing.T) { { ObjectMeta: api.ObjectMeta{Name: "baz", ResourceVersion: "3"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, { ObjectMeta: api.ObjectMeta{Name: "foo", ResourceVersion: "1"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, { ObjectMeta: api.ObjectMeta{Name: "bar", ResourceVersion: "2"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, }, @@ -272,28 +280,32 @@ func TestExtractToListExcludesDirectories(t *testing.T) { }, }, } + grace := int64(30) expect := api.PodList{ ListMeta: api.ListMeta{ResourceVersion: "10"}, Items: []api.Pod{ { ObjectMeta: api.ObjectMeta{Name: "bar", ResourceVersion: "2"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, { ObjectMeta: api.ObjectMeta{Name: "baz", ResourceVersion: "3"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, { ObjectMeta: api.ObjectMeta{Name: "foo", ResourceVersion: "1"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, }, @@ -313,11 +325,13 @@ func TestExtractObj(t *testing.T) { fakeClient := NewFakeEtcdClient(t) helper := NewEtcdHelper(fakeClient, testapi.Codec(), etcdtest.PathPrefix()) key := etcdtest.AddPrefix("/some/key") + grace := int64(30) expect := api.Pod{ ObjectMeta: api.ObjectMeta{Name: "foo"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, } fakeClient.Set(key, runtime.EncodeOrDie(testapi.Codec(), &expect), 0) diff --git a/pkg/tools/etcd_helper_watch.go b/pkg/tools/etcd_helper_watch.go index a10128cdd4a..4b3fb1cab21 100644 --- a/pkg/tools/etcd_helper_watch.go +++ b/pkg/tools/etcd_helper_watch.go @@ -39,6 +39,7 @@ const ( EtcdSet = "set" EtcdCAS = "compareAndSwap" EtcdDelete = "delete" + EtcdExpire = "expire" ) // FilterFunc is a predicate which takes an API object and returns true @@ -405,7 +406,7 @@ func (w *etcdWatcher) sendResult(res *etcd.Response) { w.sendAdd(res) case EtcdSet, EtcdCAS: w.sendModify(res) - case EtcdDelete: + case EtcdDelete, EtcdExpire: w.sendDelete(res) default: glog.Errorf("unknown action: %v", res.Action) diff --git a/pkg/tools/fake_etcd_client.go b/pkg/tools/fake_etcd_client.go index 11d7d435108..a4cbde7ad96 100644 --- a/pkg/tools/fake_etcd_client.go +++ b/pkg/tools/fake_etcd_client.go @@ -21,6 +21,7 @@ import ( "fmt" "sort" "sync" + "time" "github.com/coreos/go-etcd/etcd" ) @@ -53,6 +54,8 @@ type FakeEtcdClient struct { TestIndex bool ChangeIndex uint64 LastSetTTL uint64 + // Will avoid setting the expires header on objects to make comparison easier + HideExpires bool Machines []string // Will become valid after Watch is called; tester may write to it. Tester may @@ -184,6 +187,11 @@ func (f *FakeEtcdClient) setLocked(key, value string, ttl uint64) (*etcd.Respons prevResult := f.Data[key] createdIndex := prevResult.R.Node.CreatedIndex f.t.Logf("updating %v, index %v -> %v (ttl: %d)", key, createdIndex, i, ttl) + var expires *time.Time + if !f.HideExpires && ttl > 0 { + now := time.Now() + expires = &now + } result := EtcdResponseWithError{ R: &etcd.Response{ Node: &etcd.Node{ @@ -191,6 +199,7 @@ func (f *FakeEtcdClient) setLocked(key, value string, ttl uint64) (*etcd.Respons CreatedIndex: createdIndex, ModifiedIndex: i, TTL: int64(ttl), + Expiration: expires, }, }, } diff --git a/plugin/pkg/scheduler/factory/factory_test.go b/plugin/pkg/scheduler/factory/factory_test.go index c1b37ad8e41..e7af8734b04 100644 --- a/plugin/pkg/scheduler/factory/factory_test.go +++ b/plugin/pkg/scheduler/factory/factory_test.go @@ -132,11 +132,13 @@ func PriorityTwo(pod *api.Pod, podLister algorithm.PodLister, minionLister algor } func TestDefaultErrorFunc(t *testing.T) { + grace := int64(30) testPod := &api.Pod{ ObjectMeta: api.ObjectMeta{Name: "foo", Namespace: "bar"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, } handler := util.FakeHandler{ diff --git a/test/e2e/kubectl.go b/test/e2e/kubectl.go index e6d069f417b..46863ebacc0 100644 --- a/test/e2e/kubectl.go +++ b/test/e2e/kubectl.go @@ -205,7 +205,7 @@ func getUDData(jpgExpected string, ns string) func(*client.Client, string) error if strings.Contains(data.Image, jpgExpected) { return nil } else { - return errors.New(fmt.Sprintf("data served up in container is innaccurate, %s didn't contain %s", data, jpgExpected)) + return errors.New(fmt.Sprintf("data served up in container is inaccurate, %s didn't contain %s", data, jpgExpected)) } } } diff --git a/test/e2e/pd.go b/test/e2e/pd.go index d396d44665d..dedb5214010 100644 --- a/test/e2e/pd.go +++ b/test/e2e/pd.go @@ -82,8 +82,8 @@ var _ = Describe("PD", func() { By("cleaning up PD-RW test environment") // Teardown pods, PD. Ignore errors. // Teardown should do nothing unless test failed. - podClient.Delete(host0Pod.Name, nil) - podClient.Delete(host1Pod.Name, nil) + podClient.Delete(host0Pod.Name, api.NewDeleteOptions(0)) + podClient.Delete(host1Pod.Name, api.NewDeleteOptions(0)) detachPD(host0Name, diskName) detachPD(host1Name, diskName) deletePD(diskName) @@ -96,7 +96,7 @@ var _ = Describe("PD", func() { expectNoError(waitForPodRunning(c, host0Pod.Name)) By("deleting host0Pod") - expectNoError(podClient.Delete(host0Pod.Name, nil), "Failed to delete host0Pod") + expectNoError(podClient.Delete(host0Pod.Name, api.NewDeleteOptions(0)), "Failed to delete host0Pod") By("submitting host1Pod to kubernetes") _, err = podClient.Create(host1Pod) @@ -105,7 +105,7 @@ var _ = Describe("PD", func() { expectNoError(waitForPodRunning(c, host1Pod.Name)) By("deleting host1Pod") - expectNoError(podClient.Delete(host1Pod.Name, nil), "Failed to delete host1Pod") + expectNoError(podClient.Delete(host1Pod.Name, api.NewDeleteOptions(0)), "Failed to delete host1Pod") By(fmt.Sprintf("deleting PD %q", diskName)) for start := time.Now(); time.Since(start) < 180*time.Second; time.Sleep(5 * time.Second) { @@ -142,9 +142,9 @@ var _ = Describe("PD", func() { By("cleaning up PD-RO test environment") // Teardown pods, PD. Ignore errors. // Teardown should do nothing unless test failed. - podClient.Delete(rwPod.Name, nil) - podClient.Delete(host0ROPod.Name, nil) - podClient.Delete(host1ROPod.Name, nil) + podClient.Delete(rwPod.Name, api.NewDeleteOptions(0)) + podClient.Delete(host0ROPod.Name, api.NewDeleteOptions(0)) + podClient.Delete(host1ROPod.Name, api.NewDeleteOptions(0)) detachPD(host0Name, diskName) detachPD(host1Name, diskName) @@ -155,7 +155,7 @@ var _ = Describe("PD", func() { _, err = podClient.Create(rwPod) expectNoError(err, "Failed to create rwPod") expectNoError(waitForPodRunning(c, rwPod.Name)) - expectNoError(podClient.Delete(rwPod.Name, nil), "Failed to delete host0Pod") + expectNoError(podClient.Delete(rwPod.Name, api.NewDeleteOptions(0)), "Failed to delete host0Pod") By("submitting host0ROPod to kubernetes") _, err = podClient.Create(host0ROPod) @@ -170,10 +170,10 @@ var _ = Describe("PD", func() { expectNoError(waitForPodRunning(c, host1ROPod.Name)) By("deleting host0ROPod") - expectNoError(podClient.Delete(host0ROPod.Name, nil), "Failed to delete host0ROPod") + expectNoError(podClient.Delete(host0ROPod.Name, api.NewDeleteOptions(0)), "Failed to delete host0ROPod") By("deleting host1ROPod") - expectNoError(podClient.Delete(host1ROPod.Name, nil), "Failed to delete host1ROPod") + expectNoError(podClient.Delete(host1ROPod.Name, api.NewDeleteOptions(0)), "Failed to delete host1ROPod") By(fmt.Sprintf("deleting PD %q", diskName)) for start := time.Now(); time.Since(start) < 180*time.Second; time.Sleep(5 * time.Second) { diff --git a/test/e2e/pods.go b/test/e2e/pods.go index 62a7151a711..172c997fee3 100644 --- a/test/e2e/pods.go +++ b/test/e2e/pods.go @@ -55,7 +55,7 @@ func runLivenessTest(c *client.Client, podDescr *api.Pod, expectRestart bool) { // At the end of the test, clean up by removing the pod. defer func() { By("deleting the pod") - c.Pods(ns).Delete(podDescr.Name, nil) + c.Pods(ns).Delete(podDescr.Name, api.NewDeleteOptions(0)) }() // Wait until the pod is not pending. (Here we need to check for something other than @@ -101,7 +101,7 @@ func testHostIP(c *client.Client, pod *api.Pod) { podClient := c.Pods(ns) By("creating pod") - defer podClient.Delete(pod.Name, nil) + defer podClient.Delete(pod.Name, api.NewDeleteOptions(0)) _, err = podClient.Create(pod) if err != nil { Fail(fmt.Sprintf("Failed to create pod: %v", err)) @@ -205,7 +205,7 @@ var _ = Describe("Pods", func() { // We call defer here in case there is a problem with // the test so we can ensure that we clean up after // ourselves - defer podClient.Delete(pod.Name, nil) + defer podClient.Delete(pod.Name, api.NewDeleteOptions(0)) _, err = podClient.Create(pod) if err != nil { Fail(fmt.Sprintf("Failed to create pod: %v", err)) @@ -218,7 +218,7 @@ var _ = Describe("Pods", func() { } Expect(len(pods.Items)).To(Equal(1)) - By("veryfying pod creation was observed") + By("verifying pod creation was observed") select { case event, _ := <-w.ResultChan(): if event.Type != watch.Added { @@ -228,22 +228,21 @@ var _ = Describe("Pods", func() { Fail("Timeout while waiting for pod creation") } - By("deleting the pod") - podClient.Delete(pod.Name, nil) - pods, err = podClient.List(labels.SelectorFromSet(labels.Set(map[string]string{"time": value})), fields.Everything()) - if err != nil { - Fail(fmt.Sprintf("Failed to delete pod: %v", err)) + By("deleting the pod gracefully") + if err := podClient.Delete(pod.Name, nil); err != nil { + Fail(fmt.Sprintf("Failed to observe pod deletion: %v", err)) } - Expect(len(pods.Items)).To(Equal(0)) - By("veryfying pod deletion was observed") + By("verifying pod deletion was observed") deleted := false timeout := false + var lastPod *api.Pod timer := time.After(podStartTimeout) for !deleted && !timeout { select { case event, _ := <-w.ResultChan(): if event.Type == watch.Deleted { + lastPod = event.Object.(*api.Pod) deleted = true } case <-timer: @@ -253,6 +252,14 @@ var _ = Describe("Pods", func() { if !deleted { Fail("Failed to observe pod deletion") } + Expect(lastPod.DeletionTimestamp).ToNot(BeNil()) + Expect(lastPod.Spec.TerminationGracePeriodSeconds).ToNot(BeZero()) + + pods, err = podClient.List(labels.SelectorFromSet(labels.Set(map[string]string{"time": value})), fields.Everything()) + if err != nil { + Fail(fmt.Sprintf("Failed to delete pod: %v", err)) + } + Expect(len(pods.Items)).To(Equal(0)) }) It("should be updated", func() { @@ -292,7 +299,7 @@ var _ = Describe("Pods", func() { By("submitting the pod to kubernetes") defer func() { By("deleting the pod") - podClient.Delete(pod.Name, nil) + podClient.Delete(pod.Name, api.NewDeleteOptions(0)) }() pod, err := podClient.Create(pod) if err != nil { @@ -356,7 +363,7 @@ var _ = Describe("Pods", func() { }, }, } - defer c.Pods(api.NamespaceDefault).Delete(serverPod.Name, nil) + defer c.Pods(api.NamespaceDefault).Delete(serverPod.Name, api.NewDeleteOptions(0)) _, err := c.Pods(api.NamespaceDefault).Create(serverPod) if err != nil { Fail(fmt.Sprintf("Failed to create serverPod: %v", err)) @@ -547,7 +554,7 @@ var _ = Describe("Pods", func() { // We call defer here in case there is a problem with // the test so we can ensure that we clean up after // ourselves - podClient.Delete(pod.Name) + podClient.Delete(pod.Name, api.NewDeleteOptions(0)) }() By("waiting for the pod to start running") @@ -620,7 +627,7 @@ var _ = Describe("Pods", func() { // We call defer here in case there is a problem with // the test so we can ensure that we clean up after // ourselves - podClient.Delete(pod.Name) + podClient.Delete(pod.Name, api.NewDeleteOptions(0)) }() By("waiting for the pod to start running") diff --git a/test/e2e/util.go b/test/e2e/util.go index 38b940f08c5..467374de5d5 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -446,20 +446,24 @@ func expectNoError(err error, explain ...interface{}) { ExpectWithOffset(1, err).NotTo(HaveOccurred(), explain...) } -// Stops everything from filePath from namespace ns and checks if everything maching selectors from the given namespace is correctly stopped. +// Stops everything from filePath from namespace ns and checks if everything matching selectors from the given namespace is correctly stopped. func cleanup(filePath string, ns string, selectors ...string) { - By("using stop to clean up resources") + By("using delete to clean up resources") var nsArg string if ns != "" { nsArg = fmt.Sprintf("--namespace=%s", ns) } - runKubectl("stop", "-f", filePath, nsArg) + runKubectl("stop", "--grace-period=0", "-f", filePath, nsArg) for _, selector := range selectors { - resources := runKubectl("get", "pods,rc,se", "-l", selector, "--no-headers", nsArg) + resources := runKubectl("get", "rc,se", "-l", selector, "--no-headers", nsArg) if resources != "" { Failf("Resources left running after stop:\n%s", resources) } + pods := runKubectl("get", "pods", "-l", selector, nsArg, "-t", "{{ range .items }}{{ if not .metadata.deletionTimestamp }}{{ .metadata.name }}{{ \"\\n\" }}{{ end }}{{ end }}") + if pods != "" { + Failf("Pods left unterminated after stop:\n%s", pods) + } } } diff --git a/test/integration/auth_test.go b/test/integration/auth_test.go index ada2c441df7..06486930156 100644 --- a/test/integration/auth_test.go +++ b/test/integration/auth_test.go @@ -240,7 +240,7 @@ var deleteNow string = ` { "kind": "DeleteOptions", "apiVersion": "v1beta3", - "gracePeriodSeconds": null%s + "gracePeriodSeconds": 0%s } ` diff --git a/test/integration/etcd_tools_test.go b/test/integration/etcd_tools_test.go index 0197968eb56..910ae30651b 100644 --- a/test/integration/etcd_tools_test.go +++ b/test/integration/etcd_tools_test.go @@ -88,6 +88,58 @@ func TestExtractObj(t *testing.T) { }) } +func TestWriteTTL(t *testing.T) { + client := framework.NewEtcdClient() + helper := tools.EtcdHelper{Client: client, Codec: stringCodec{}} + framework.WithEtcdKey(func(key string) { + _, err := client.Set(key, "object", 0) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + s := fakeAPIObject("") + err = helper.GuaranteedUpdate(key, &s, false, func(obj runtime.Object, res tools.ResponseMeta) (runtime.Object, *uint64, error) { + if *(obj.(*fakeAPIObject)) != "object" { + t.Fatalf("unexpected existing object: %v", obj) + } + if res.TTL != 0 { + t.Fatalf("unexpected TTL: %#v", res) + } + ttl := uint64(10) + out := fakeAPIObject("test") + return &out, &ttl, nil + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if s != "test" { + t.Errorf("unexpected response: %#v", s) + } + if res, err := client.Get(key, false, false); err != nil || res == nil || res.Node.TTL != 10 { + t.Fatalf("unexpected get: %v %#v", err, res) + } + + err = helper.GuaranteedUpdate(key, &s, false, func(obj runtime.Object, res tools.ResponseMeta) (runtime.Object, *uint64, error) { + if *(obj.(*fakeAPIObject)) != "test" { + t.Fatalf("unexpected existing object: %v", obj) + } + if res.TTL <= 1 { + t.Fatalf("unexpected TTL: %#v", res) + } + out := fakeAPIObject("test2") + return &out, nil, nil + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if s != "test2" { + t.Errorf("unexpected response: %#v", s) + } + if res, err := client.Get(key, false, false); err != nil || res == nil || res.Node.TTL <= 1 { + t.Fatalf("unexpected get: %v %#v", err, res) + } + }) +} + func TestWatch(t *testing.T) { client := framework.NewEtcdClient() helper := tools.NewEtcdHelper(client, testapi.Codec(), etcdtest.PathPrefix()) diff --git a/test/integration/scheduler_test.go b/test/integration/scheduler_test.go index e1733929c7e..8f6009b5c62 100644 --- a/test/integration/scheduler_test.go +++ b/test/integration/scheduler_test.go @@ -277,7 +277,7 @@ func DoTestUnschedulableNodes(t *testing.T, restClient *client.Client, nodeStore t.Logf("Test %d: Pod got scheduled on a schedulable node", i) } - err = restClient.Pods(api.NamespaceDefault).Delete(myPod.Name, nil) + err = restClient.Pods(api.NamespaceDefault).Delete(myPod.Name, api.NewDeleteOptions(0)) if err != nil { t.Errorf("Failed to delete pod: %v", err) }