From 89f1f3b1b8ec99d7efb0ed800b35828962b76317 Mon Sep 17 00:00:00 2001 From: Clayton Coleman Date: Mon, 27 Jul 2015 16:41:31 -0400 Subject: [PATCH] Alter graceful deletion to not use TTL Avoid TTL by deleting pods immediately when they aren't scheduled, and letting the Kubelet delete them otherwise. Ensure the Kubelet uses pod.Spec.TerminationGracePeriodSeconds when no pod.DeletionGracePeriodSeconds is available. --- cmd/integration/integration.go | 5 +- docs/getting-started-guides/logging.md | 1 + hack/test-cmd.sh | 5 -- pkg/api/rest/delete.go | 7 ++ pkg/api/rest/resttest/resttest.go | 111 +++++++++++++++--------- pkg/api/validation/validation.go | 2 +- pkg/api/validation/validation_test.go | 27 ++++++ pkg/kubectl/describe.go | 2 +- pkg/kubelet/dockertools/manager.go | 23 +++-- pkg/kubelet/dockertools/manager_test.go | 4 +- pkg/kubelet/kubelet.go | 7 +- pkg/kubelet/status_manager.go | 15 ++-- pkg/registry/generic/etcd/etcd.go | 39 ++++++++- pkg/registry/pod/etcd/etcd_test.go | 13 ++- pkg/registry/pod/rest.go | 18 +++- test/e2e/util.go | 2 +- test/integration/etcd_tools_test.go | 33 +++---- 17 files changed, 223 insertions(+), 91 deletions(-) diff --git a/cmd/integration/integration.go b/cmd/integration/integration.go index 3e7d694edb2..d314a149101 100644 --- a/cmd/integration/integration.go +++ b/cmd/integration/integration.go @@ -872,12 +872,13 @@ func runSchedulerNoPhantomPodsTest(client *client.Client) { // Delete a pod to free up room. glog.Infof("Deleting pod %v", bar.Name) - err = client.Pods(api.NamespaceDefault).Delete(bar.Name, api.NewDeleteOptions(1)) + err = client.Pods(api.NamespaceDefault).Delete(bar.Name, api.NewDeleteOptions(0)) if err != nil { glog.Fatalf("FAILED: couldn't delete pod %q: %v", bar.Name, err) } - time.Sleep(2 * time.Second) + //TODO: reenable once fake_docker_client handles deletion cleanly + //time.Sleep(2 * time.Second) pod.ObjectMeta.Name = "phantom.baz" baz, err := client.Pods(api.NamespaceDefault).Create(pod) diff --git a/docs/getting-started-guides/logging.md b/docs/getting-started-guides/logging.md index 7062a0a09e1..2d61fa0cc87 100644 --- a/docs/getting-started-guides/logging.md +++ b/docs/getting-started-guides/logging.md @@ -182,6 +182,7 @@ spec: mountPath: /varlog - name: containers mountPath: /var/lib/docker/containers + terminationGracePeriodSeconds: 30 volumes: - name: varlog hostPath: diff --git a/hack/test-cmd.sh b/hack/test-cmd.sh index c650bf06af6..b872eff37e5 100755 --- a/hack/test-cmd.sh +++ b/hack/test-cmd.sh @@ -246,11 +246,6 @@ runTests() { # Pre-condition: valid-pod POD is running kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" 'valid-pod:' # Command - kubectl delete pod valid-pod "${kube_flags[@]}" - # Post-condition: pod is still there, in terminating - kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" 'valid-pod:' - [[ "$(kubectl get pods "${kube_flags[@]}" | grep Terminating)" ]] - # Command kubectl delete pod valid-pod "${kube_flags[@]}" --grace-period=0 # Post-condition: no POD is running kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" '' diff --git a/pkg/api/rest/delete.go b/pkg/api/rest/delete.go index d1b04e6dded..c9df5970ef5 100644 --- a/pkg/api/rest/delete.go +++ b/pkg/api/rest/delete.go @@ -17,8 +17,11 @@ limitations under the License. package rest import ( + "time" + "k8s.io/kubernetes/pkg/api" "k8s.io/kubernetes/pkg/runtime" + "k8s.io/kubernetes/pkg/util" ) // RESTDeleteStrategy defines deletion behavior on an object that follows Kubernetes @@ -59,6 +62,8 @@ func BeforeDelete(strategy RESTDeleteStrategy, ctx api.Context, obj runtime.Obje if period > *objectMeta.DeletionGracePeriodSeconds { return false, true, nil } + now := util.NewTime(util.Now().Add(time.Second * time.Duration(*options.GracePeriodSeconds))) + objectMeta.DeletionTimestamp = &now objectMeta.DeletionGracePeriodSeconds = &period options.GracePeriodSeconds = &period return true, false, nil @@ -71,6 +76,8 @@ func BeforeDelete(strategy RESTDeleteStrategy, ctx api.Context, obj runtime.Obje if !strategy.CheckGracefulDelete(obj, options) { return false, false, nil } + now := util.NewTime(util.Now().Add(time.Second * time.Duration(*options.GracePeriodSeconds))) + objectMeta.DeletionTimestamp = &now objectMeta.DeletionGracePeriodSeconds = options.GracePeriodSeconds return true, false, nil } diff --git a/pkg/api/rest/resttest/resttest.go b/pkg/api/rest/resttest/resttest.go index 5576692c093..694782c0730 100644 --- a/pkg/api/rest/resttest/resttest.go +++ b/pkg/api/rest/resttest/resttest.go @@ -114,17 +114,20 @@ func (t *Tester) TestUpdate(valid runtime.Object, existing, older runtime.Object // Test deleting an object. func (t *Tester) TestDelete(createFn func() runtime.Object, wasGracefulFn func() bool, invalid ...runtime.Object) { - t.testDeleteNonExist(createFn) - t.testDeleteNoGraceful(createFn, wasGracefulFn) - t.testDeleteInvokesValidation(invalid...) + t.TestDeleteNonExist(createFn) + t.TestDeleteNoGraceful(createFn, wasGracefulFn) + t.TestDeleteInvokesValidation(invalid...) // TODO: Test delete namespace mismatch rejection // once #5684 is fixed. } // Test graceful deletion. func (t *Tester) TestDeleteGraceful(createFn func() runtime.Object, expectedGrace int64, wasGracefulFn func() bool) { - t.testDeleteGracefulHasDefault(createFn(), expectedGrace, wasGracefulFn) - t.testDeleteGracefulUsesZeroOnNil(createFn(), 0) + t.TestDeleteGracefulHasDefault(createFn(), expectedGrace, wasGracefulFn) + t.TestDeleteGracefulWithValue(createFn(), expectedGrace, wasGracefulFn) + t.TestDeleteGracefulUsesZeroOnNil(createFn(), 0) + t.TestDeleteGracefulExtend(createFn(), expectedGrace, wasGracefulFn) + t.TestDeleteGracefulImmediate(createFn(), expectedGrace, wasGracefulFn) } // Test getting object. @@ -298,6 +301,33 @@ func (t *Tester) testUpdateFailsOnVersion(older runtime.Object) { // ============================================================================= // Deletion tests. +func (t *Tester) TestDeleteInvokesValidation(invalid ...runtime.Object) { + for i, obj := range invalid { + objectMeta := t.getObjectMetaOrFail(obj) + ctx := t.TestContext() + _, err := t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, nil) + if !errors.IsInvalid(err) { + t.Errorf("%d: Expected to get an invalid resource error, got %v", i, err) + } + } +} + +func (t *Tester) TestDeleteNonExist(createFn func() runtime.Object) { + existing := createFn() + objectMeta := t.getObjectMetaOrFail(existing) + context := t.TestContext() + + t.withStorageError(&etcd.EtcdError{ErrorCode: tools.EtcdErrorCodeNotFound}, func() { + _, err := t.storage.(rest.GracefulDeleter).Delete(context, objectMeta.Name, nil) + if err == nil || !errors.IsNotFound(err) { + t.Fatalf("Unexpected error: %v", err) + } + }) +} + +// ============================================================================= +// Graceful Deletion tests. + func (t *Tester) TestDeleteNoGraceful(createFn func() runtime.Object, wasGracefulFn func() bool) { existing := createFn() objectMeta := t.getObjectMetaOrFail(existing) @@ -314,41 +344,7 @@ func (t *Tester) TestDeleteNoGraceful(createFn func() runtime.Object, wasGracefu } } -func (t *Tester) testDeleteInvokesValidation(invalid ...runtime.Object) { - for i, obj := range invalid { - objectMeta := t.getObjectMetaOrFail(obj) - ctx := t.TestContext() - _, err := t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, nil) - if !errors.IsInvalid(err) { - t.Errorf("%d: Expected to get an invalid resource error, got %v", i, err) - } - } -} - -func (t *Tester) testDeleteNonExist(createFn func() runtime.Object) { - existing := createFn() - objectMeta := t.getObjectMetaOrFail(existing) - context := t.TestContext() - - t.withStorageError(&etcd.EtcdError{ErrorCode: tools.EtcdErrorCodeNotFound}, func() { - _, err := t.storage.(rest.GracefulDeleter).Delete(context, objectMeta.Name, nil) - if err == nil || !errors.IsNotFound(err) { - t.Fatalf("Unexpected error: %v", err) - } - }) -} - -// ============================================================================= -// Graceful Deletion tests. - -func (t *Tester) TestDeleteGraceful(createFn func() runtime.Object, expectedGrace int64, wasGracefulFn func() bool) { - t.TestDeleteGracefulHasDefault(createFn(), expectedGrace, wasGracefulFn) - t.TestDeleteGracefulWithValue(createFn(), expectedGrace, wasGracefulFn) - t.TestDeleteGracefulUsesZeroOnNil(createFn(), 0) - t.TestDeleteGracefulExtend(createFn(), expectedGrace, wasGracefulFn) -} - -func (t *Tester) testDeleteGracefulHasDefault(existing runtime.Object, expectedGrace int64, wasGracefulFn func() bool) { +func (t *Tester) TestDeleteGracefulHasDefault(existing runtime.Object, expectedGrace int64, wasGracefulFn func() bool) { objectMeta := t.getObjectMetaOrFail(existing) ctx := api.WithNamespace(t.TestContext(), objectMeta.Namespace) _, err := t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, &api.DeleteOptions{}) @@ -450,7 +446,40 @@ func (t *Tester) TestDeleteGracefulExtend(existing runtime.Object, expectedGrace } } -func (t *Tester) testDeleteGracefulUsesZeroOnNil(existing runtime.Object, expectedGrace int64) { +func (t *Tester) TestDeleteGracefulImmediate(existing runtime.Object, expectedGrace int64, wasGracefulFn func() bool) { + objectMeta, err := api.ObjectMetaFor(existing) + if err != nil { + t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, existing) + } + + ctx := api.WithNamespace(t.TestContext(), objectMeta.Namespace) + _, err = t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, api.NewDeleteOptions(expectedGrace)) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if !wasGracefulFn() { + t.Errorf("did not gracefully delete resource") + } + // second delete is immediate, resource is deleted + out, err := t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, api.NewDeleteOptions(0)) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + _, err = t.storage.(rest.Getter).Get(ctx, objectMeta.Name) + if !errors.IsNotFound(err) { + t.Errorf("unexpected error, object should be deleted immediately: %v", err) + } + objectMeta, err = api.ObjectMetaFor(out) + if err != nil { + t.Errorf("unexpected error: %v", err) + return + } + if objectMeta.DeletionTimestamp == nil || objectMeta.DeletionGracePeriodSeconds == nil || *objectMeta.DeletionGracePeriodSeconds != 0 { + t.Errorf("unexpected deleted meta: %#v", objectMeta) + } +} + +func (t *Tester) TestDeleteGracefulUsesZeroOnNil(existing runtime.Object, expectedGrace int64) { objectMeta := t.getObjectMetaOrFail(existing) ctx := api.WithNamespace(t.TestContext(), objectMeta.Namespace) _, err := t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, nil) diff --git a/pkg/api/validation/validation.go b/pkg/api/validation/validation.go index ed9578ebe8b..d8422a55c57 100644 --- a/pkg/api/validation/validation.go +++ b/pkg/api/validation/validation.go @@ -272,7 +272,7 @@ func ValidateObjectMetaUpdate(new, old *api.ObjectMeta) errs.ValidationErrorList if old.DeletionGracePeriodSeconds != nil && new.DeletionGracePeriodSeconds == nil { new.DeletionGracePeriodSeconds = old.DeletionGracePeriodSeconds } - if new.DeletionGracePeriodSeconds != nil && *new.DeletionGracePeriodSeconds != *old.DeletionGracePeriodSeconds { + if new.DeletionGracePeriodSeconds != nil && old.DeletionGracePeriodSeconds != nil && *new.DeletionGracePeriodSeconds != *old.DeletionGracePeriodSeconds { allErrs = append(allErrs, errs.NewFieldInvalid("deletionGracePeriodSeconds", new.DeletionGracePeriodSeconds, "field is immutable; may only be changed via deletion")) } diff --git a/pkg/api/validation/validation_test.go b/pkg/api/validation/validation_test.go index b20de4cd9d2..1c8c1e737c3 100644 --- a/pkg/api/validation/validation_test.go +++ b/pkg/api/validation/validation_test.go @@ -1317,6 +1317,9 @@ func TestValidatePod(t *testing.T) { } func TestValidatePodUpdate(t *testing.T) { + now := util.Now() + grace := int64(30) + grace2 := int64(31) tests := []struct { a api.Pod b api.Pod @@ -1403,6 +1406,30 @@ func TestValidatePodUpdate(t *testing.T) { false, "more containers", }, + { + api.Pod{ + ObjectMeta: api.ObjectMeta{Name: "foo", DeletionTimestamp: &now}, + Spec: api.PodSpec{Containers: []api.Container{{Image: "foo:V1"}}}, + }, + api.Pod{ + ObjectMeta: api.ObjectMeta{Name: "foo"}, + Spec: api.PodSpec{Containers: []api.Container{{Image: "foo:V1"}}}, + }, + true, + "deletion timestamp filled out", + }, + { + api.Pod{ + ObjectMeta: api.ObjectMeta{Name: "foo", DeletionTimestamp: &now, DeletionGracePeriodSeconds: &grace}, + Spec: api.PodSpec{Containers: []api.Container{{Image: "foo:V1"}}}, + }, + api.Pod{ + ObjectMeta: api.ObjectMeta{Name: "foo", DeletionTimestamp: &now, DeletionGracePeriodSeconds: &grace2}, + Spec: api.PodSpec{Containers: []api.Container{{Image: "foo:V1"}}}, + }, + false, + "deletion grace period seconds cleared", + }, { api.Pod{ ObjectMeta: api.ObjectMeta{Name: "foo"}, diff --git a/pkg/kubectl/describe.go b/pkg/kubectl/describe.go index 107abc65f3b..c3de3b8506d 100644 --- a/pkg/kubectl/describe.go +++ b/pkg/kubectl/describe.go @@ -419,7 +419,7 @@ func describePod(pod *api.Pod, rcs []api.ReplicationController, events *api.Even fmt.Fprintf(out, "Labels:\t%s\n", formatLabels(pod.Labels)) if pod.DeletionTimestamp != nil { fmt.Fprintf(out, "Status:\tTerminating (expires %s)\n", pod.DeletionTimestamp.Time.Format(time.RFC1123Z)) - fmt.Fprintf(out, "Termination Grace Period:\t%ss\n", pod.DeletionGracePeriodSeconds) + fmt.Fprintf(out, "Termination Grace Period:\t%ds\n", pod.DeletionGracePeriodSeconds) } else { fmt.Fprintf(out, "Status:\t%s\n", string(pod.Status.Phase)) } diff --git a/pkg/kubelet/dockertools/manager.go b/pkg/kubelet/dockertools/manager.go index 3de60eb1926..e019495caa4 100644 --- a/pkg/kubelet/dockertools/manager.go +++ b/pkg/kubelet/dockertools/manager.go @@ -1234,14 +1234,19 @@ func (dm *DockerManager) killContainer(containerID types.UID, container *api.Con } gracePeriod := int64(minimumGracePeriodInSeconds) - if pod != nil && pod.DeletionGracePeriodSeconds != nil { - gracePeriod = *pod.DeletionGracePeriodSeconds + if pod != nil { + switch { + case pod.DeletionGracePeriodSeconds != nil: + gracePeriod = *pod.DeletionGracePeriodSeconds + case pod.Spec.TerminationGracePeriodSeconds != nil: + gracePeriod = *pod.Spec.TerminationGracePeriodSeconds + } } glog.V(2).Infof("Killing container %q with %d second grace period", name, gracePeriod) + start := util.Now() if pod != nil && container != nil && container.Lifecycle != nil && container.Lifecycle.PreStop != nil { glog.V(4).Infof("Running preStop hook for container %q", name) - start := util.Now() // TODO: timebox PreStop execution to at most gracePeriod if err := dm.runner.Run(ID, pod, container, container.Lifecycle.PreStop); err != nil { glog.Errorf("preStop hook for container %q failed: %v", name, err) @@ -1256,6 +1261,11 @@ func (dm *DockerManager) killContainer(containerID types.UID, container *api.Con gracePeriod = minimumGracePeriodInSeconds } err := dm.client.StopContainer(ID, uint(gracePeriod)) + if err == nil { + glog.V(2).Infof("Container %q exited after %s", name, util.Now().Sub(start.Time)) + } else { + glog.V(2).Infof("Container %q termination failed after %s: %v", name, util.Now().Sub(start.Time), err) + } ref, ok := dm.containerRefManager.GetRef(ID) if !ok { glog.Warningf("No ref for pod '%q'", name) @@ -1498,11 +1508,6 @@ func (dm *DockerManager) computePodContainerChanges(pod *api.Pod, runningPod kub containersToKeep := make(map[kubeletTypes.DockerID]int) createPodInfraContainer := false - if pod.DeletionTimestamp != nil { - glog.V(4).Infof("Pod is terminating %q", podFullName) - return PodContainerChangesSpec{}, nil - } - var err error var podInfraContainerID kubeletTypes.DockerID var changed bool @@ -1624,10 +1629,10 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, pod podFullName := kubecontainer.GetPodFullName(pod) containerChanges, err := dm.computePodContainerChanges(pod, runningPod, podStatus) - glog.V(3).Infof("Got container changes for pod %q: %+v", podFullName, containerChanges) if err != nil { return err } + glog.V(3).Infof("Got container changes for pod %q: %+v", podFullName, containerChanges) if containerChanges.StartInfraContainer || (len(containerChanges.ContainersToKeep) == 0 && len(containerChanges.ContainersToStart) == 0) { if len(containerChanges.ContainersToKeep) == 0 && len(containerChanges.ContainersToStart) == 0 { diff --git a/pkg/kubelet/dockertools/manager_test.go b/pkg/kubelet/dockertools/manager_test.go index 0da1ca938b1..606404fb6a3 100644 --- a/pkg/kubelet/dockertools/manager_test.go +++ b/pkg/kubelet/dockertools/manager_test.go @@ -568,7 +568,7 @@ func replaceProber(dm *DockerManager, result probe.Result, err error) { // Unknown or error. // // PLEASE READ THE PROBE DOCS BEFORE CHANGING THIS TEST IF YOU ARE UNSURE HOW PROBES ARE SUPPOSED TO WORK: -// (See https://github.com/GoogleCloudPlatform/kubernetes/blob/master/docs/pod-states.md#pod-conditions) +// (See https://k8s.io/kubernetes/blob/master/docs/pod-states.md#pod-conditions) func TestProbeContainer(t *testing.T) { manager, _ := newTestDockerManager() dc := &docker.APIContainers{ @@ -1150,7 +1150,7 @@ func TestSyncPodBadHash(t *testing.T) { // Check the pod infra container. "inspect_container", // Kill and restart the bad hash container. - "stop", "create", "start", + "stop", "create", "start", "inspect_container", }) if err := fakeDocker.AssertStopped([]string{"1234"}); err != nil { diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 5e5a094c249..41850d039c7 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -1177,9 +1177,10 @@ func (kl *Kubelet) syncPod(pod *api.Pod, mirrorPod *api.Pod, runningPod kubecont }() // Kill pods we can't run. - err := canRunPod(pod) - if err != nil { - kl.killPod(pod, runningPod) + if err := canRunPod(pod); err != nil || pod.DeletionTimestamp != nil { + if err := kl.killPod(pod, runningPod); err != nil { + util.HandleError(err) + } return err } diff --git a/pkg/kubelet/status_manager.go b/pkg/kubelet/status_manager.go index 5e9c753f9fa..4d754286309 100644 --- a/pkg/kubelet/status_manager.go +++ b/pkg/kubelet/status_manager.go @@ -123,7 +123,7 @@ func (s *statusManager) SetPodStatus(pod *api.Pod, status api.PodStatus) { // Currently this routine is not called for the same pod from multiple // workers and/or the kubelet but dropping the lock before sending the // status down the channel feels like an easy way to get a bullet in foot. - if !found || !isStatusEqual(&oldStatus, &status) { + if !found || !isStatusEqual(&oldStatus, &status) || pod.DeletionTimestamp != nil { s.podStatuses[podFullName] = status s.podStatusChannel <- podStatusSyncRequest{pod, status} } else { @@ -173,11 +173,16 @@ func (s *statusManager) syncBatch() error { if err == nil { glog.V(3).Infof("Status for pod %q updated successfully", kubeletUtil.FormatPodName(pod)) - if statusPod.DeletionTimestamp == nil || !allTerminated(statusPod.Status.ContainerStatuses) { + if pod.DeletionTimestamp == nil { + return nil + } + if !notRunning(pod.Status.ContainerStatuses) { + glog.V(3).Infof("Pod %q is terminated, but some pods are still running", pod.Name) return nil } if err := s.kubeClient.Pods(statusPod.Namespace).Delete(statusPod.Name, api.NewDeleteOptions(0)); err == nil { glog.V(3).Infof("Pod %q fully terminated and removed from etcd", statusPod.Name) + s.DeletePodStatus(podFullName) return nil } } @@ -194,11 +199,11 @@ func (s *statusManager) syncBatch() error { return fmt.Errorf("error updating status for pod %q: %v", kubeletUtil.FormatPodName(pod), err) } -// allTerminated returns true if every status is terminated, or the status list +// notRunning returns true if every status is terminated or waiting, or the status list // is empty. -func allTerminated(statuses []api.ContainerStatus) bool { +func notRunning(statuses []api.ContainerStatus) bool { for _, status := range statuses { - if status.State.Terminated == nil { + if status.State.Terminated == nil && status.State.Waiting == nil { return false } } diff --git a/pkg/registry/generic/etcd/etcd.go b/pkg/registry/generic/etcd/etcd.go index 0b0d4190ac9..3242d20bc73 100644 --- a/pkg/registry/generic/etcd/etcd.go +++ b/pkg/registry/generic/etcd/etcd.go @@ -341,6 +341,11 @@ func (e *Etcd) Get(ctx api.Context, name string) (runtime.Object, error) { return obj, nil } +var ( + errAlreadyDeleting = fmt.Errorf("abort delete") + errDeleteNow = fmt.Errorf("delete now") +) + // Delete removes the item from etcd. func (e *Etcd) Delete(ctx api.Context, name string, options *api.DeleteOptions) (runtime.Object, error) { key, err := e.KeyFunc(ctx, name) @@ -367,13 +372,41 @@ func (e *Etcd) Delete(ctx api.Context, name string, options *api.DeleteOptions) if pendingGraceful { return e.finalizeDelete(obj, false) } - if graceful && *options.GracePeriodSeconds != 0 { + if graceful { trace.Step("Graceful deletion") out := e.NewFunc() - if err := e.Storage.Set(key, obj, out, uint64(*options.GracePeriodSeconds)); err != nil { + lastGraceful := int64(0) + err := e.Storage.GuaranteedUpdate( + key, out, false, + storage.SimpleUpdate(func(existing runtime.Object) (runtime.Object, error) { + graceful, pendingGraceful, err := rest.BeforeDelete(e.DeleteStrategy, ctx, existing, options) + if err != nil { + return nil, err + } + if pendingGraceful { + return nil, errAlreadyDeleting + } + if !graceful { + return nil, errDeleteNow + } + lastGraceful = *options.GracePeriodSeconds + return existing, nil + }), + ) + switch err { + case nil: + if lastGraceful > 0 { + return out, nil + } + // fall through and delete immediately + case errDeleteNow: + // we've updated the object to have a zero grace period, or it's already at 0, so + // we should fall through and truly delete the object. + case errAlreadyDeleting: + return e.finalizeDelete(obj, true) + default: return nil, etcderr.InterpretUpdateError(err, e.EndpointName, name) } - return e.finalizeDelete(out, true) } // delete immediately, or no graceful deletion supported diff --git a/pkg/registry/pod/etcd/etcd_test.go b/pkg/registry/pod/etcd/etcd_test.go index 1bd879b2011..f565dfcabc2 100644 --- a/pkg/registry/pod/etcd/etcd_test.go +++ b/pkg/registry/pod/etcd/etcd_test.go @@ -121,8 +121,10 @@ func TestDelete(t *testing.T) { key = etcdtest.AddPrefix(key) test := resttest.New(t, storage, fakeEtcdClient.SetError) + expectedNode := "some-node" createFn := func() runtime.Object { pod := validChangedPod() + pod.Spec.NodeName = expectedNode fakeEtcdClient.Data[key] = tools.EtcdResponseWithError{ R: &etcd.Response{ Node: &etcd.Node{ @@ -137,9 +139,18 @@ func TestDelete(t *testing.T) { if fakeEtcdClient.Data[key].R.Node == nil { return false } - return fakeEtcdClient.Data[key].R.Node.TTL != 0 + obj, err := latest.Codec.Decode([]byte(fakeEtcdClient.Data[key].R.Node.Value)) + if err != nil { + return false + } + pod := obj.(*api.Pod) + t.Logf("found object %#v", pod.ObjectMeta) + return pod.DeletionTimestamp != nil && pod.DeletionGracePeriodSeconds != nil && *pod.DeletionGracePeriodSeconds != 0 } test.TestDeleteGraceful(createFn, 30, gracefulSetFn) + + expectedNode = "" + test.TestDelete(createFn, gracefulSetFn) } func expectPod(t *testing.T, out runtime.Object) (*api.Pod, bool) { diff --git a/pkg/registry/pod/rest.go b/pkg/registry/pod/rest.go index 43135278d50..699d9cddecc 100644 --- a/pkg/registry/pod/rest.go +++ b/pkg/registry/pod/rest.go @@ -92,22 +92,38 @@ func (podStrategy) CheckGracefulDelete(obj runtime.Object, options *api.DeleteOp if options == nil { return false } + pod := obj.(*api.Pod) period := int64(0) // user has specified a value if options.GracePeriodSeconds != nil { period = *options.GracePeriodSeconds } else { // use the default value if set, or deletes the pod immediately (0) - pod := obj.(*api.Pod) if pod.Spec.TerminationGracePeriodSeconds != nil { period = *pod.Spec.TerminationGracePeriodSeconds } } + // if the pod is not scheduled, delete immediately + if len(pod.Spec.NodeName) == 0 { + period = 0 + } // ensure the options and the pod are in sync options.GracePeriodSeconds = &period return true } +type podStrategyWithoutGraceful struct { + podStrategy +} + +// CheckGracefulDelete prohibits graceful deletion. +func (podStrategyWithoutGraceful) CheckGracefulDelete(obj runtime.Object, options *api.DeleteOptions) bool { + return false +} + +// StrategyWithoutGraceful implements the legacy instant delele behavior. +var StrategyWithoutGraceful = podStrategyWithoutGraceful{Strategy} + type podStatusStrategy struct { podStrategy } diff --git a/test/e2e/util.go b/test/e2e/util.go index 16156a88aca..f239b9724bc 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -841,7 +841,7 @@ func cleanup(filePath string, ns string, selectors ...string) { runKubectl("stop", "--grace-period=0", "-f", filePath, nsArg) for _, selector := range selectors { - resources := runKubectl("get", "rc,se", "-l", selector, "--no-headers", nsArg) + resources := runKubectl("get", "rc,svc", "-l", selector, "--no-headers", nsArg) if resources != "" { Failf("Resources left running after stop:\n%s", resources) } diff --git a/test/integration/etcd_tools_test.go b/test/integration/etcd_tools_test.go index b3b37bd0bc2..60b9408edb0 100644 --- a/test/integration/etcd_tools_test.go +++ b/test/integration/etcd_tools_test.go @@ -82,49 +82,50 @@ func TestGet(t *testing.T) { func TestWriteTTL(t *testing.T) { client := framework.NewEtcdClient() - helper := tools.EtcdHelper{Client: client, Codec: stringCodec{}} + etcdStorage := etcd.NewEtcdStorage(client, testapi.Codec(), "") framework.WithEtcdKey(func(key string) { - _, err := client.Set(key, "object", 0) - if err != nil { + testObject := api.ServiceAccount{ObjectMeta: api.ObjectMeta{Name: "foo"}} + if err := etcdStorage.Set(key, &testObject, nil, 0); err != nil { t.Fatalf("unexpected error: %v", err) } - s := fakeAPIObject("") - err = helper.GuaranteedUpdate(key, &s, false, func(obj runtime.Object, res tools.ResponseMeta) (runtime.Object, *uint64, error) { - if *(obj.(*fakeAPIObject)) != "object" { + result := &api.ServiceAccount{} + err := etcdStorage.GuaranteedUpdate(key, result, false, func(obj runtime.Object, res storage.ResponseMeta) (runtime.Object, *uint64, error) { + if in, ok := obj.(*api.ServiceAccount); !ok || in.Name != "foo" { t.Fatalf("unexpected existing object: %v", obj) } if res.TTL != 0 { t.Fatalf("unexpected TTL: %#v", res) } ttl := uint64(10) - out := fakeAPIObject("test") - return &out, &ttl, nil + out := &api.ServiceAccount{ObjectMeta: api.ObjectMeta{Name: "out"}} + return out, &ttl, nil }) if err != nil { t.Fatalf("unexpected error: %v", err) } - if s != "test" { - t.Errorf("unexpected response: %#v", s) + if result.Name != "out" { + t.Errorf("unexpected response: %#v", result) } if res, err := client.Get(key, false, false); err != nil || res == nil || res.Node.TTL != 10 { t.Fatalf("unexpected get: %v %#v", err, res) } - err = helper.GuaranteedUpdate(key, &s, false, func(obj runtime.Object, res tools.ResponseMeta) (runtime.Object, *uint64, error) { - if *(obj.(*fakeAPIObject)) != "test" { + result = &api.ServiceAccount{} + err = etcdStorage.GuaranteedUpdate(key, result, false, func(obj runtime.Object, res storage.ResponseMeta) (runtime.Object, *uint64, error) { + if in, ok := obj.(*api.ServiceAccount); !ok || in.Name != "out" { t.Fatalf("unexpected existing object: %v", obj) } if res.TTL <= 1 { t.Fatalf("unexpected TTL: %#v", res) } - out := fakeAPIObject("test2") - return &out, nil, nil + out := &api.ServiceAccount{ObjectMeta: api.ObjectMeta{Name: "out2"}} + return out, nil, nil }) if err != nil { t.Fatalf("unexpected error: %v", err) } - if s != "test2" { - t.Errorf("unexpected response: %#v", s) + if result.Name != "out2" { + t.Errorf("unexpected response: %#v", result) } if res, err := client.Get(key, false, false); err != nil || res == nil || res.Node.TTL <= 1 { t.Fatalf("unexpected get: %v %#v", err, res)