Node controller supports disabling node probes.

Node controller supports disabling sending node probes and updating node statuses. Controlled by --sync_node_status flag. Resolves #4565.
This commit is contained in:
Jerzy Szczepkowski
2015-02-24 07:43:58 +01:00
parent 08402d798c
commit e0548c3c03
5 changed files with 161 additions and 11 deletions

View File

@@ -77,7 +77,7 @@ func NewNodeController(
// Run creates initial node list and start syncing instances from cloudprovider if any.
// It also starts syncing cluster node status.
func (s *NodeController) Run(period time.Duration, syncNodeList bool) {
func (s *NodeController) Run(period time.Duration, syncNodeList, syncNodeStatus bool) {
// Register intial set of nodes with their status set.
var nodes *api.NodeList
var err error
@@ -96,7 +96,6 @@ func (s *NodeController) Run(period time.Duration, syncNodeList bool) {
glog.Errorf("Error loading initial static nodes: %v", err)
}
}
nodes = s.DoChecks(nodes)
nodes, err = s.PopulateIPs(nodes)
if err != nil {
glog.Errorf("Error getting nodes ips: %v", err)
@@ -114,12 +113,21 @@ func (s *NodeController) Run(period time.Duration, syncNodeList bool) {
}, period)
}
// Start syncing node status.
go util.Forever(func() {
if err = s.SyncNodeStatus(); err != nil {
glog.Errorf("Error syncing status: %v", err)
}
}, period)
if syncNodeStatus {
// Start syncing node status.
go util.Forever(func() {
if err = s.SyncNodeStatus(); err != nil {
glog.Errorf("Error syncing status: %v", err)
}
}, period)
} else {
// Start checking node reachability and evicting timeouted pods.
go util.Forever(func() {
if err = s.EvictTimeoutedPods(); err != nil {
glog.Errorf("Error evicting timeouted pods: %v", err)
}
}, period)
}
}
// RegisterNodes registers the given list of nodes, it keeps retrying for `retryCount` times.
@@ -216,6 +224,33 @@ func (s *NodeController) SyncNodeStatus() error {
return nil
}
// EvictTimeoutedPods verifies if nodes are reachable by checking the time of last probe
// and deletes pods from not reachable nodes.
func (s *NodeController) EvictTimeoutedPods() error {
nodes, err := s.kubeClient.Nodes().List()
if err != nil {
return err
}
for _, node := range nodes.Items {
if util.Now().After(latestReadyTime(&node).Add(s.podEvictionTimeout)) {
s.deletePods(node.Name)
}
}
return nil
}
func latestReadyTime(node *api.Node) util.Time {
readyTime := node.ObjectMeta.CreationTimestamp
for _, condition := range node.Status.Conditions {
if condition.Type == api.NodeReady &&
condition.Status == api.ConditionFull &&
condition.LastProbeTime.After(readyTime.Time) {
readyTime = condition.LastProbeTime
}
}
return readyTime
}
// PopulateIPs queries IPs for given list of nodes.
func (s *NodeController) PopulateIPs(nodes *api.NodeList) (*api.NodeList, error) {
if s.isRunningCloudProvider() {

View File

@@ -709,6 +709,118 @@ func TestSyncNodeStatusTransitionTime(t *testing.T) {
}
}
func TestEvictTimeoutedPods(t *testing.T) {
table := []struct {
fakeNodeHandler *FakeNodeHandler
expectedRequestCount int
expectedActions []client.FakeAction
}{
// Node created long time ago, with no status.
{
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{
{
ObjectMeta: api.ObjectMeta{
Name: "node0",
CreationTimestamp: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
},
},
Fake: client.Fake{
PodsList: api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}},
},
},
expectedRequestCount: 1, // List
expectedActions: []client.FakeAction{{Action: "list-pods"}, {Action: "delete-pod", Value: "pod0"}},
},
// Node created recently, with no status.
{
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{
{
ObjectMeta: api.ObjectMeta{
Name: "node0",
CreationTimestamp: util.Now(),
},
},
},
Fake: client.Fake{
PodsList: api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}},
},
},
expectedRequestCount: 1, // List
expectedActions: nil,
},
// Node created long time ago, with status updated long time ago.
{
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{
{
ObjectMeta: api.ObjectMeta{
Name: "node0",
CreationTimestamp: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionFull,
LastProbeTime: util.Date(2013, 1, 1, 0, 0, 0, 0, time.UTC),
},
},
},
},
},
Fake: client.Fake{
PodsList: api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}},
},
},
expectedRequestCount: 1, // List
expectedActions: []client.FakeAction{{Action: "list-pods"}, {Action: "delete-pod", Value: "pod0"}},
},
// Node created long time ago, with status updated recently.
{
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{
{
ObjectMeta: api.ObjectMeta{
Name: "node0",
CreationTimestamp: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionFull,
LastProbeTime: util.Now(),
},
},
},
},
},
Fake: client.Fake{
PodsList: api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}},
},
},
expectedRequestCount: 1, // List
expectedActions: nil,
},
}
for _, item := range table {
nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, nil, 10, 5*time.Minute)
if err := nodeController.EvictTimeoutedPods(); err != nil {
t.Errorf("unexpected error: %v", err)
}
if item.expectedRequestCount != item.fakeNodeHandler.RequestCount {
t.Errorf("expected %v call, but got %v.", item.expectedRequestCount, item.fakeNodeHandler.RequestCount)
}
if !reflect.DeepEqual(item.expectedActions, item.fakeNodeHandler.Actions) {
t.Errorf("actions differs, expected %+v, got %+v", item.expectedActions, item.fakeNodeHandler.Actions)
}
}
}
func TestSyncNodeStatusDeletePods(t *testing.T) {
table := []struct {
fakeNodeHandler *FakeNodeHandler