mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-04 01:40:07 +00:00
Merge pull request #6992 from yujuhong/kill_pods
Prioritize deleting the non-running pods when reducing replicas
This commit is contained in:
commit
8ae4cb384e
@ -58,3 +58,13 @@ func GetExistingContainerStatus(statuses []ContainerStatus, name string) Contain
|
|||||||
}
|
}
|
||||||
return ContainerStatus{}
|
return ContainerStatus{}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IsPodReady retruns true if a pod is ready; false otherwise.
|
||||||
|
func IsPodReady(pod *Pod) bool {
|
||||||
|
for _, c := range pod.Status.Conditions {
|
||||||
|
if c.Type == PodReady && c.Status == ConditionTrue {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
@ -18,6 +18,7 @@ package controller
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"sort"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -163,7 +164,7 @@ func (rm *ReplicationManager) watchControllers(resourceVersion *string) {
|
|||||||
if !ok {
|
if !ok {
|
||||||
if status, ok := event.Object.(*api.Status); ok {
|
if status, ok := event.Object.(*api.Status); ok {
|
||||||
if status.Status == api.StatusFailure {
|
if status.Status == api.StatusFailure {
|
||||||
glog.Errorf("failed to watch: %v", status)
|
glog.Errorf("Failed to watch: %v", status)
|
||||||
// Clear resource version here, as above, this won't hurt consistency, but we
|
// Clear resource version here, as above, this won't hurt consistency, but we
|
||||||
// should consider introspecting more carefully here. (or make the apiserver smarter)
|
// should consider introspecting more carefully here. (or make the apiserver smarter)
|
||||||
// "why not both?"
|
// "why not both?"
|
||||||
@ -178,7 +179,7 @@ func (rm *ReplicationManager) watchControllers(resourceVersion *string) {
|
|||||||
*resourceVersion = rc.ResourceVersion
|
*resourceVersion = rc.ResourceVersion
|
||||||
// Sync even if this is a deletion event, to ensure that we leave
|
// Sync even if this is a deletion event, to ensure that we leave
|
||||||
// it in the desired state.
|
// it in the desired state.
|
||||||
glog.V(4).Infof("About to sync from watch: %v", rc.Name)
|
glog.V(4).Infof("About to sync from watch: %q", rc.Name)
|
||||||
if err := rm.syncHandler(*rc); err != nil {
|
if err := rm.syncHandler(*rc); err != nil {
|
||||||
util.HandleError(fmt.Errorf("unexpected sync error: %v", err))
|
util.HandleError(fmt.Errorf("unexpected sync error: %v", err))
|
||||||
}
|
}
|
||||||
@ -186,33 +187,54 @@ func (rm *ReplicationManager) watchControllers(resourceVersion *string) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helper function. Also used in pkg/registry/controller, for now.
|
// filterActivePods returns pods that have not terminated.
|
||||||
func FilterActivePods(pods []api.Pod) []*api.Pod {
|
func filterActivePods(pods []api.Pod) []*api.Pod {
|
||||||
var result []*api.Pod
|
var result []*api.Pod
|
||||||
for i := range pods {
|
for _, value := range pods {
|
||||||
value := &pods[i]
|
|
||||||
if api.PodSucceeded != value.Status.Phase &&
|
if api.PodSucceeded != value.Status.Phase &&
|
||||||
api.PodFailed != value.Status.Phase {
|
api.PodFailed != value.Status.Phase {
|
||||||
result = append(result, value)
|
result = append(result, &value)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type activePods []*api.Pod
|
||||||
|
|
||||||
|
func (s activePods) Len() int { return len(s) }
|
||||||
|
func (s activePods) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
||||||
|
|
||||||
|
func (s activePods) Less(i, j int) bool {
|
||||||
|
// Unassigned < assigned
|
||||||
|
if s[i].Spec.Host == "" && s[j].Spec.Host != "" {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// PodPending < PodUnknown < PodRunning
|
||||||
|
m := map[api.PodPhase]int{api.PodPending: 0, api.PodUnknown: 1, api.PodRunning: 2}
|
||||||
|
if m[s[i].Status.Phase] != m[s[j].Status.Phase] {
|
||||||
|
return m[s[i].Status.Phase] < m[s[j].Status.Phase]
|
||||||
|
}
|
||||||
|
// Not ready < ready
|
||||||
|
if !api.IsPodReady(s[i]) && api.IsPodReady(s[j]) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
func (rm *ReplicationManager) syncReplicationController(controller api.ReplicationController) error {
|
func (rm *ReplicationManager) syncReplicationController(controller api.ReplicationController) error {
|
||||||
s := labels.Set(controller.Spec.Selector).AsSelector()
|
s := labels.Set(controller.Spec.Selector).AsSelector()
|
||||||
podList, err := rm.kubeClient.Pods(controller.Namespace).List(s)
|
podList, err := rm.kubeClient.Pods(controller.Namespace).List(s)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
filteredList := FilterActivePods(podList.Items)
|
filteredList := filterActivePods(podList.Items)
|
||||||
activePods := len(filteredList)
|
numActivePods := len(filteredList)
|
||||||
diff := activePods - controller.Spec.Replicas
|
diff := numActivePods - controller.Spec.Replicas
|
||||||
if diff < 0 {
|
if diff < 0 {
|
||||||
diff *= -1
|
diff *= -1
|
||||||
wait := sync.WaitGroup{}
|
wait := sync.WaitGroup{}
|
||||||
wait.Add(diff)
|
wait.Add(diff)
|
||||||
glog.V(2).Infof("Too few \"%s\" replicas, creating %d\n", controller.Name, diff)
|
glog.V(2).Infof("Too few %q replicas, creating %d", controller.Name, diff)
|
||||||
for i := 0; i < diff; i++ {
|
for i := 0; i < diff; i++ {
|
||||||
go func() {
|
go func() {
|
||||||
defer wait.Done()
|
defer wait.Done()
|
||||||
@ -221,7 +243,12 @@ func (rm *ReplicationManager) syncReplicationController(controller api.Replicati
|
|||||||
}
|
}
|
||||||
wait.Wait()
|
wait.Wait()
|
||||||
} else if diff > 0 {
|
} else if diff > 0 {
|
||||||
glog.V(2).Infof("Too many \"%s\" replicas, deleting %d\n", controller.Name, diff)
|
glog.V(2).Infof("Too many %q replicas, deleting %d", controller.Name, diff)
|
||||||
|
// Sort the pods in the order such that not-ready < ready, unscheduled
|
||||||
|
// < scheduled, and pending < running. This ensures that we delete pods
|
||||||
|
// in the earlier stages whenever possible.
|
||||||
|
sort.Sort(activePods(filteredList))
|
||||||
|
|
||||||
wait := sync.WaitGroup{}
|
wait := sync.WaitGroup{}
|
||||||
wait.Add(diff)
|
wait.Add(diff)
|
||||||
for i := 0; i < diff; i++ {
|
for i := 0; i < diff; i++ {
|
||||||
@ -232,8 +259,8 @@ func (rm *ReplicationManager) syncReplicationController(controller api.Replicati
|
|||||||
}
|
}
|
||||||
wait.Wait()
|
wait.Wait()
|
||||||
}
|
}
|
||||||
if controller.Status.Replicas != activePods {
|
if controller.Status.Replicas != numActivePods {
|
||||||
controller.Status.Replicas = activePods
|
controller.Status.Replicas = numActivePods
|
||||||
_, err = rm.kubeClient.ReplicationControllers(controller.Namespace).Update(&controller)
|
_, err = rm.kubeClient.ReplicationControllers(controller.Namespace).Update(&controller)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -18,8 +18,11 @@ package controller
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"math/rand"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
|
"reflect"
|
||||||
|
"sort"
|
||||||
"sync"
|
"sync"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
@ -375,3 +378,52 @@ func TestWatchControllers(t *testing.T) {
|
|||||||
t.Errorf("Expected 1 call but got 0")
|
t.Errorf("Expected 1 call but got 0")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestSortingActivePods(t *testing.T) {
|
||||||
|
numPods := 5
|
||||||
|
podList := newPodList(numPods)
|
||||||
|
pods := make([]*api.Pod, len(podList.Items))
|
||||||
|
for i := range podList.Items {
|
||||||
|
pods[i] = &podList.Items[i]
|
||||||
|
}
|
||||||
|
// pods[0] is not scheduled yet.
|
||||||
|
pods[0].Spec.Host = ""
|
||||||
|
pods[0].Status.Phase = api.PodPending
|
||||||
|
// pods[1] is scheduled but pending.
|
||||||
|
pods[1].Spec.Host = "bar"
|
||||||
|
pods[1].Status.Phase = api.PodPending
|
||||||
|
// pods[2] is unknown.
|
||||||
|
pods[2].Spec.Host = "foo"
|
||||||
|
pods[2].Status.Phase = api.PodUnknown
|
||||||
|
// pods[3] is running but not ready.
|
||||||
|
pods[3].Spec.Host = "foo"
|
||||||
|
pods[3].Status.Phase = api.PodRunning
|
||||||
|
// pods[4] is running and ready.
|
||||||
|
pods[4].Spec.Host = "foo"
|
||||||
|
pods[4].Status.Phase = api.PodRunning
|
||||||
|
pods[4].Status.Conditions = []api.PodCondition{{Type: api.PodReady, Status: api.ConditionTrue}}
|
||||||
|
|
||||||
|
getOrder := func(pods []*api.Pod) []string {
|
||||||
|
names := make([]string, len(pods))
|
||||||
|
for i := range pods {
|
||||||
|
names[i] = pods[i].Name
|
||||||
|
}
|
||||||
|
return names
|
||||||
|
}
|
||||||
|
|
||||||
|
expected := getOrder(pods)
|
||||||
|
|
||||||
|
for i := 0; i < 20; i++ {
|
||||||
|
idx := rand.Perm(numPods)
|
||||||
|
randomizedPods := make([]*api.Pod, numPods)
|
||||||
|
for j := 0; j < numPods; j++ {
|
||||||
|
randomizedPods[j] = pods[idx[j]]
|
||||||
|
}
|
||||||
|
sort.Sort(activePods(randomizedPods))
|
||||||
|
actual := getOrder(randomizedPods)
|
||||||
|
|
||||||
|
if !reflect.DeepEqual(actual, expected) {
|
||||||
|
t.Errorf("expected %v, got %v", expected, actual)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -315,14 +315,7 @@ func (e *EndpointController) syncService(key string) {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
inService := false
|
if !api.IsPodReady(pod) {
|
||||||
for _, c := range pod.Status.Conditions {
|
|
||||||
if c.Type == api.PodReady && c.Status == api.ConditionTrue {
|
|
||||||
inService = true
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if !inService {
|
|
||||||
glog.V(5).Infof("Pod is out of service: %v/%v", pod.Namespace, pod.Name)
|
glog.V(5).Infof("Pod is out of service: %v/%v", pod.Namespace, pod.Name)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user