Node is NotReady until the Route is created

This commit is contained in:
gmarek
2016-05-25 15:51:43 +02:00
committed by Wojciech Tyczynski
parent 1cb088857b
commit 7bdf480340
7 changed files with 128 additions and 63 deletions

View File

@@ -24,6 +24,7 @@ import (
"github.com/golang/glog"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/unversioned"
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
"k8s.io/kubernetes/pkg/cloudprovider"
"k8s.io/kubernetes/pkg/util/metrics"
@@ -36,6 +37,7 @@ const (
maxConcurrentRouteCreations int = 200
// Maximum number of retries of route creations.
maxRetries int = 5
updateNodeStatusMaxRetries = 3
)
type RouteController struct {
@@ -84,6 +86,22 @@ func (rc *RouteController) reconcileNodeRoutes() error {
return rc.reconcile(nodeList.Items, routeList)
}
func tryUpdateNodeStatus(node *api.Node, kubeClient clientset.Interface) error {
for i := 0; i < updateNodeStatusMaxRetries; i++ {
if _, err := kubeClient.Core().Nodes().UpdateStatus(node); err == nil {
break
} else {
if i+1 < updateNodeStatusMaxRetries {
glog.Errorf("Error updating node %s - will retry: %v", node.Name, err)
} else {
glog.Errorf("Error updating node %s - wont retry: %v", node.Name, err)
return err
}
}
}
return nil
}
func (rc *RouteController) reconcile(nodes []api.Node, routes []*cloudprovider.Route) error {
// nodeCIDRs maps nodeName->nodeCIDR
nodeCIDRs := make(map[string]string)
@@ -111,24 +129,6 @@ func (rc *RouteController) reconcile(nodes []api.Node, routes []*cloudprovider.R
}
nameHint := string(node.UID)
wg.Add(1)
glog.Infof("Creating route for node %s %s with hint %s", node.Name, route.DestinationCIDR, nameHint)
go func(nodeName string, nameHint string, route *cloudprovider.Route) {
defer wg.Done()
for i := 0; i < maxRetries; i++ {
startTime := time.Now()
// Ensure that we don't have more than maxConcurrentRouteCreations
// CreateRoute calls in flight.
rateLimiter <- struct{}{}
err := rc.routes.CreateRoute(rc.clusterName, nameHint, route)
<-rateLimiter
if err != nil {
glog.Errorf("Could not create route %s %s for node %s after %v: %v", nameHint, route.DestinationCIDR, nodeName, time.Now().Sub(startTime), err)
} else {
glog.Infof("Created route for node %s %s with hint %s after %v", nodeName, route.DestinationCIDR, nameHint, time.Now().Sub(startTime))
return
}
}
}(node.Name, nameHint, route)
}
nodeCIDRs[node.Name] = node.Spec.PodCIDR
}
@@ -138,12 +138,12 @@ func (rc *RouteController) reconcile(nodes []api.Node, routes []*cloudprovider.R
if nodeCIDRs[route.TargetInstance] != route.DestinationCIDR {
wg.Add(1)
// Delete the route.
glog.Infof("Deleting route %s %s", route.Name, route.DestinationCIDR)
glog.V(2).Infof("Deleting route %s %s", route.Name, route.DestinationCIDR)
go func(route *cloudprovider.Route, startTime time.Time) {
if err := rc.routes.DeleteRoute(rc.clusterName, route); err != nil {
glog.Errorf("Could not delete route %s %s after %v: %v", route.Name, route.DestinationCIDR, time.Now().Sub(startTime), err)
} else {
glog.Infof("Deleted route %s %s after %v", route.Name, route.DestinationCIDR, time.Now().Sub(startTime))
glog.V(2).Infof("Deleted route %s %s after %v", route.Name, route.DestinationCIDR, time.Now().Sub(startTime))
}
wg.Done()

View File

@@ -22,6 +22,7 @@ import (
"time"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/fake"
"k8s.io/kubernetes/pkg/cloudprovider"
fakecloud "k8s.io/kubernetes/pkg/cloudprovider/providers/fake"
)
@@ -67,16 +68,22 @@ func TestIsResponsibleForRoute(t *testing.T) {
func TestReconcile(t *testing.T) {
cluster := "my-k8s"
node1 := api.Node{ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}}
node2 := api.Node{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}}
nodeNoCidr := api.Node{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: ""}}
testCases := []struct {
nodes []api.Node
initialRoutes []*cloudprovider.Route
expectedRoutes []*cloudprovider.Route
nodes []api.Node
initialRoutes []*cloudprovider.Route
expectedRoutes []*cloudprovider.Route
expectedNetworkReady []bool
clientset *fake.Clientset
}{
// 2 nodes, routes already there
{
nodes: []api.Node{
{ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}},
{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}},
node1,
node2,
},
initialRoutes: []*cloudprovider.Route{
{cluster + "-01", "node-1", "10.120.0.0/24"},
@@ -86,12 +93,14 @@ func TestReconcile(t *testing.T) {
{cluster + "-01", "node-1", "10.120.0.0/24"},
{cluster + "-02", "node-2", "10.120.1.0/24"},
},
expectedNetworkReady: []bool{true, true},
clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}),
},
// 2 nodes, one route already there
{
nodes: []api.Node{
{ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}},
{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}},
node1,
node2,
},
initialRoutes: []*cloudprovider.Route{
{cluster + "-01", "node-1", "10.120.0.0/24"},
@@ -100,24 +109,28 @@ func TestReconcile(t *testing.T) {
{cluster + "-01", "node-1", "10.120.0.0/24"},
{cluster + "-02", "node-2", "10.120.1.0/24"},
},
expectedNetworkReady: []bool{true, true},
clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}),
},
// 2 nodes, no routes yet
{
nodes: []api.Node{
{ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}},
{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}},
node1,
node2,
},
initialRoutes: []*cloudprovider.Route{},
expectedRoutes: []*cloudprovider.Route{
{cluster + "-01", "node-1", "10.120.0.0/24"},
{cluster + "-02", "node-2", "10.120.1.0/24"},
},
expectedNetworkReady: []bool{true, true},
clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}),
},
// 2 nodes, a few too many routes
{
nodes: []api.Node{
{ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}},
{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}},
node1,
node2,
},
initialRoutes: []*cloudprovider.Route{
{cluster + "-01", "node-1", "10.120.0.0/24"},
@@ -129,12 +142,14 @@ func TestReconcile(t *testing.T) {
{cluster + "-01", "node-1", "10.120.0.0/24"},
{cluster + "-02", "node-2", "10.120.1.0/24"},
},
expectedNetworkReady: []bool{true, true},
clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}),
},
// 2 nodes, 2 routes, but only 1 is right
{
nodes: []api.Node{
{ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}},
{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}},
node1,
node2,
},
initialRoutes: []*cloudprovider.Route{
{cluster + "-01", "node-1", "10.120.0.0/24"},
@@ -144,17 +159,21 @@ func TestReconcile(t *testing.T) {
{cluster + "-01", "node-1", "10.120.0.0/24"},
{cluster + "-02", "node-2", "10.120.1.0/24"},
},
expectedNetworkReady: []bool{true, true},
clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}),
},
// 2 nodes, one node without CIDR assigned.
{
nodes: []api.Node{
{ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}},
{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: ""}},
node1,
nodeNoCidr,
},
initialRoutes: []*cloudprovider.Route{},
expectedRoutes: []*cloudprovider.Route{
{cluster + "-01", "node-1", "10.120.0.0/24"},
},
expectedNetworkReady: []bool{true, false},
clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, nodeNoCidr}}),
},
}
for i, testCase := range testCases {
@@ -170,10 +189,36 @@ func TestReconcile(t *testing.T) {
t.Error("Error in test: fakecloud doesn't support Routes()")
}
_, cidr, _ := net.ParseCIDR("10.120.0.0/16")
rc := New(routes, nil, cluster, cidr)
rc := New(routes, testCase.clientset, cluster, cidr)
if err := rc.reconcile(testCase.nodes, testCase.initialRoutes); err != nil {
t.Errorf("%d. Error from rc.reconcile(): %v", i, err)
}
for _, action := range testCase.clientset.Actions() {
if action.GetVerb() == "update" && action.GetResource().Resource == "nodes" {
node := action.GetObject().(*api.Node)
_, condition := api.GetNodeCondition(&node.Status, api.NodeNetworkingReady)
if condition == nil {
t.Errorf("%d. Missing NodeNetworkingReady condition for Node %v", i, node.Name)
} else {
check := func(index int) bool {
return (condition.Status == api.ConditionTrue) == testCase.expectedNetworkReady[index]
}
var index int
if node.Name == node1.Name {
index = 0
} else if node.Name == node1.Name {
index = 1
} else {
// Something's wrong
continue
}
if !check(index) {
t.Errorf("%d. Invalid NodeNetworkingReady condition for Node %v, expected %v, got %v",
i, node.Name, (condition.Status == api.ConditionTrue), testCase.expectedNetworkReady[index])
}
}
}
}
var finalRoutes []*cloudprovider.Route
var err error
timeoutChan := time.After(200 * time.Millisecond)