From 7bdf48034069ac008ed185ade473f83c2255362c Mon Sep 17 00:00:00 2001 From: gmarek Date: Wed, 25 May 2016 15:51:43 +0200 Subject: [PATCH 1/2] Node is NotReady until the Route is created --- pkg/api/resource_helpers.go | 21 +++++- pkg/api/types.go | 2 + pkg/api/v1/types.go | 2 + pkg/controller/node/nodecontroller.go | 25 ++----- pkg/controller/route/routecontroller.go | 40 +++++----- pkg/controller/route/routecontroller_test.go | 77 ++++++++++++++++---- pkg/kubelet/kubelet.go | 24 ++++-- 7 files changed, 128 insertions(+), 63 deletions(-) diff --git a/pkg/api/resource_helpers.go b/pkg/api/resource_helpers.go index 2a664d8dc8a..4c55b120e14 100644 --- a/pkg/api/resource_helpers.go +++ b/pkg/api/resource_helpers.go @@ -95,8 +95,25 @@ func GetPodReadyCondition(status PodStatus) *PodCondition { // GetPodCondition extracts the provided condition from the given status and returns that. // Returns nil and -1 if the condition is not present, and the the index of the located condition. func GetPodCondition(status *PodStatus, conditionType PodConditionType) (int, *PodCondition) { - for i, c := range status.Conditions { - if c.Type == conditionType { + if status == nil { + return -1, nil + } + for i := range status.Conditions { + if status.Conditions[i].Type == conditionType { + return i, &status.Conditions[i] + } + } + return -1, nil +} + +// GetNodeCondition extracts the provided condition from the given status and returns that. +// Returns nil and -1 if the condition is not present, and the the index of the located condition. +func GetNodeCondition(status *NodeStatus, conditionType NodeConditionType) (int, *NodeCondition) { + if status == nil { + return -1, nil + } + for i := range status.Conditions { + if status.Conditions[i].Type == conditionType { return i, &status.Conditions[i] } } diff --git a/pkg/api/types.go b/pkg/api/types.go index d05702bdf13..3910593019f 100644 --- a/pkg/api/types.go +++ b/pkg/api/types.go @@ -2009,6 +2009,8 @@ const ( NodeOutOfDisk NodeConditionType = "OutOfDisk" // NodeMemoryPressure means the kubelet is under pressure due to insufficient available memory. NodeMemoryPressure NodeConditionType = "MemoryPressure" + // NodeNetworkingReady means that network for the node is correctly configured. + NodeNetworkingReady NodeConditionType = "NetworkingReady" ) type NodeCondition struct { diff --git a/pkg/api/v1/types.go b/pkg/api/v1/types.go index b45b56b9a69..dd64f6901d0 100644 --- a/pkg/api/v1/types.go +++ b/pkg/api/v1/types.go @@ -2412,6 +2412,8 @@ const ( NodeOutOfDisk NodeConditionType = "OutOfDisk" // NodeMemoryPressure means the kubelet is under pressure due to insufficient available memory. NodeMemoryPressure NodeConditionType = "MemoryPressure" + // NodeNetworkingReady means that network for the node is correctly configured. + NodeNetworkingReady NodeConditionType = "NetworkingReady" ) // NodeCondition contains condition infromation for a node. diff --git a/pkg/controller/node/nodecontroller.go b/pkg/controller/node/nodecontroller.go index 804ce745c1c..a3c0c3c3b91 100644 --- a/pkg/controller/node/nodecontroller.go +++ b/pkg/controller/node/nodecontroller.go @@ -412,20 +412,6 @@ func (nc *NodeController) recycleCIDR(obj interface{}) { } } -// getCondition returns a condition object for the specific condition -// type, nil if the condition is not set. -func (nc *NodeController) getCondition(status *api.NodeStatus, conditionType api.NodeConditionType) *api.NodeCondition { - if status == nil { - return nil - } - for i := range status.Conditions { - if status.Conditions[i].Type == conditionType { - return &status.Conditions[i] - } - } - return nil -} - var gracefulDeletionVersion = version.MustParse("v1.1.0") // maybeDeleteTerminatingPod non-gracefully deletes pods that are terminating @@ -711,7 +697,7 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap var err error var gracePeriod time.Duration var observedReadyCondition api.NodeCondition - currentReadyCondition := nc.getCondition(&node.Status, api.NodeReady) + _, currentReadyCondition := api.GetNodeCondition(&node.Status, api.NodeReady) if currentReadyCondition == nil { // If ready condition is nil, then kubelet (or nodecontroller) never posted node status. // A fake ready condition is created, where LastProbeTime and LastTransitionTime is set @@ -751,9 +737,9 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap // if that's the case, but it does not seem necessary. var savedCondition *api.NodeCondition if found { - savedCondition = nc.getCondition(&savedNodeStatus.status, api.NodeReady) + _, savedCondition = api.GetNodeCondition(&savedNodeStatus.status, api.NodeReady) } - observedCondition := nc.getCondition(&node.Status, api.NodeReady) + _, observedCondition := api.GetNodeCondition(&node.Status, api.NodeReady) if !found { glog.Warningf("Missing timestamp for Node %s. Assuming now as a timestamp.", node.Name) savedNodeStatus = nodeStatusData{ @@ -829,7 +815,7 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap // Like NodeReady condition, NodeOutOfDisk was last set longer ago than gracePeriod, so update // it to Unknown (regardless of its current value) in the master. // TODO(madhusudancs): Refactor this with readyCondition to remove duplicated code. - oodCondition := nc.getCondition(&node.Status, api.NodeOutOfDisk) + _, oodCondition := api.GetNodeCondition(&node.Status, api.NodeOutOfDisk) if oodCondition == nil { glog.V(2).Infof("Out of disk condition of node %v is never updated by kubelet", node.Name) node.Status.Conditions = append(node.Status.Conditions, api.NodeCondition{ @@ -851,7 +837,8 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap } } - if !api.Semantic.DeepEqual(nc.getCondition(&node.Status, api.NodeReady), &observedReadyCondition) { + _, currentCondition := api.GetNodeCondition(&node.Status, api.NodeReady) + if !api.Semantic.DeepEqual(currentCondition, &observedReadyCondition) { if _, err = nc.kubeClient.Core().Nodes().UpdateStatus(node); err != nil { glog.Errorf("Error updating node %s: %v", node.Name, err) return gracePeriod, observedReadyCondition, currentReadyCondition, err diff --git a/pkg/controller/route/routecontroller.go b/pkg/controller/route/routecontroller.go index 68a2ac8fe5a..15019992614 100644 --- a/pkg/controller/route/routecontroller.go +++ b/pkg/controller/route/routecontroller.go @@ -24,6 +24,7 @@ import ( "github.com/golang/glog" "k8s.io/kubernetes/pkg/api" + "k8s.io/kubernetes/pkg/api/unversioned" clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset" "k8s.io/kubernetes/pkg/cloudprovider" "k8s.io/kubernetes/pkg/util/metrics" @@ -36,6 +37,7 @@ const ( maxConcurrentRouteCreations int = 200 // Maximum number of retries of route creations. maxRetries int = 5 + updateNodeStatusMaxRetries = 3 ) type RouteController struct { @@ -84,6 +86,22 @@ func (rc *RouteController) reconcileNodeRoutes() error { return rc.reconcile(nodeList.Items, routeList) } +func tryUpdateNodeStatus(node *api.Node, kubeClient clientset.Interface) error { + for i := 0; i < updateNodeStatusMaxRetries; i++ { + if _, err := kubeClient.Core().Nodes().UpdateStatus(node); err == nil { + break + } else { + if i+1 < updateNodeStatusMaxRetries { + glog.Errorf("Error updating node %s - will retry: %v", node.Name, err) + } else { + glog.Errorf("Error updating node %s - wont retry: %v", node.Name, err) + return err + } + } + } + return nil +} + func (rc *RouteController) reconcile(nodes []api.Node, routes []*cloudprovider.Route) error { // nodeCIDRs maps nodeName->nodeCIDR nodeCIDRs := make(map[string]string) @@ -111,24 +129,6 @@ func (rc *RouteController) reconcile(nodes []api.Node, routes []*cloudprovider.R } nameHint := string(node.UID) wg.Add(1) - glog.Infof("Creating route for node %s %s with hint %s", node.Name, route.DestinationCIDR, nameHint) - go func(nodeName string, nameHint string, route *cloudprovider.Route) { - defer wg.Done() - for i := 0; i < maxRetries; i++ { - startTime := time.Now() - // Ensure that we don't have more than maxConcurrentRouteCreations - // CreateRoute calls in flight. - rateLimiter <- struct{}{} - err := rc.routes.CreateRoute(rc.clusterName, nameHint, route) - <-rateLimiter - if err != nil { - glog.Errorf("Could not create route %s %s for node %s after %v: %v", nameHint, route.DestinationCIDR, nodeName, time.Now().Sub(startTime), err) - } else { - glog.Infof("Created route for node %s %s with hint %s after %v", nodeName, route.DestinationCIDR, nameHint, time.Now().Sub(startTime)) - return - } - } - }(node.Name, nameHint, route) } nodeCIDRs[node.Name] = node.Spec.PodCIDR } @@ -138,12 +138,12 @@ func (rc *RouteController) reconcile(nodes []api.Node, routes []*cloudprovider.R if nodeCIDRs[route.TargetInstance] != route.DestinationCIDR { wg.Add(1) // Delete the route. - glog.Infof("Deleting route %s %s", route.Name, route.DestinationCIDR) + glog.V(2).Infof("Deleting route %s %s", route.Name, route.DestinationCIDR) go func(route *cloudprovider.Route, startTime time.Time) { if err := rc.routes.DeleteRoute(rc.clusterName, route); err != nil { glog.Errorf("Could not delete route %s %s after %v: %v", route.Name, route.DestinationCIDR, time.Now().Sub(startTime), err) } else { - glog.Infof("Deleted route %s %s after %v", route.Name, route.DestinationCIDR, time.Now().Sub(startTime)) + glog.V(2).Infof("Deleted route %s %s after %v", route.Name, route.DestinationCIDR, time.Now().Sub(startTime)) } wg.Done() diff --git a/pkg/controller/route/routecontroller_test.go b/pkg/controller/route/routecontroller_test.go index 3db2079f5a1..67a6fa946c4 100644 --- a/pkg/controller/route/routecontroller_test.go +++ b/pkg/controller/route/routecontroller_test.go @@ -22,6 +22,7 @@ import ( "time" "k8s.io/kubernetes/pkg/api" + "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/fake" "k8s.io/kubernetes/pkg/cloudprovider" fakecloud "k8s.io/kubernetes/pkg/cloudprovider/providers/fake" ) @@ -67,16 +68,22 @@ func TestIsResponsibleForRoute(t *testing.T) { func TestReconcile(t *testing.T) { cluster := "my-k8s" + node1 := api.Node{ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}} + node2 := api.Node{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}} + nodeNoCidr := api.Node{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: ""}} + testCases := []struct { - nodes []api.Node - initialRoutes []*cloudprovider.Route - expectedRoutes []*cloudprovider.Route + nodes []api.Node + initialRoutes []*cloudprovider.Route + expectedRoutes []*cloudprovider.Route + expectedNetworkReady []bool + clientset *fake.Clientset }{ // 2 nodes, routes already there { nodes: []api.Node{ - {ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}}, - {ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}}, + node1, + node2, }, initialRoutes: []*cloudprovider.Route{ {cluster + "-01", "node-1", "10.120.0.0/24"}, @@ -86,12 +93,14 @@ func TestReconcile(t *testing.T) { {cluster + "-01", "node-1", "10.120.0.0/24"}, {cluster + "-02", "node-2", "10.120.1.0/24"}, }, + expectedNetworkReady: []bool{true, true}, + clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), }, // 2 nodes, one route already there { nodes: []api.Node{ - {ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}}, - {ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}}, + node1, + node2, }, initialRoutes: []*cloudprovider.Route{ {cluster + "-01", "node-1", "10.120.0.0/24"}, @@ -100,24 +109,28 @@ func TestReconcile(t *testing.T) { {cluster + "-01", "node-1", "10.120.0.0/24"}, {cluster + "-02", "node-2", "10.120.1.0/24"}, }, + expectedNetworkReady: []bool{true, true}, + clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), }, // 2 nodes, no routes yet { nodes: []api.Node{ - {ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}}, - {ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}}, + node1, + node2, }, initialRoutes: []*cloudprovider.Route{}, expectedRoutes: []*cloudprovider.Route{ {cluster + "-01", "node-1", "10.120.0.0/24"}, {cluster + "-02", "node-2", "10.120.1.0/24"}, }, + expectedNetworkReady: []bool{true, true}, + clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), }, // 2 nodes, a few too many routes { nodes: []api.Node{ - {ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}}, - {ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}}, + node1, + node2, }, initialRoutes: []*cloudprovider.Route{ {cluster + "-01", "node-1", "10.120.0.0/24"}, @@ -129,12 +142,14 @@ func TestReconcile(t *testing.T) { {cluster + "-01", "node-1", "10.120.0.0/24"}, {cluster + "-02", "node-2", "10.120.1.0/24"}, }, + expectedNetworkReady: []bool{true, true}, + clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), }, // 2 nodes, 2 routes, but only 1 is right { nodes: []api.Node{ - {ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}}, - {ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}}, + node1, + node2, }, initialRoutes: []*cloudprovider.Route{ {cluster + "-01", "node-1", "10.120.0.0/24"}, @@ -144,17 +159,21 @@ func TestReconcile(t *testing.T) { {cluster + "-01", "node-1", "10.120.0.0/24"}, {cluster + "-02", "node-2", "10.120.1.0/24"}, }, + expectedNetworkReady: []bool{true, true}, + clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), }, // 2 nodes, one node without CIDR assigned. { nodes: []api.Node{ - {ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}}, - {ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: ""}}, + node1, + nodeNoCidr, }, initialRoutes: []*cloudprovider.Route{}, expectedRoutes: []*cloudprovider.Route{ {cluster + "-01", "node-1", "10.120.0.0/24"}, }, + expectedNetworkReady: []bool{true, false}, + clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, nodeNoCidr}}), }, } for i, testCase := range testCases { @@ -170,10 +189,36 @@ func TestReconcile(t *testing.T) { t.Error("Error in test: fakecloud doesn't support Routes()") } _, cidr, _ := net.ParseCIDR("10.120.0.0/16") - rc := New(routes, nil, cluster, cidr) + rc := New(routes, testCase.clientset, cluster, cidr) if err := rc.reconcile(testCase.nodes, testCase.initialRoutes); err != nil { t.Errorf("%d. Error from rc.reconcile(): %v", i, err) } + for _, action := range testCase.clientset.Actions() { + if action.GetVerb() == "update" && action.GetResource().Resource == "nodes" { + node := action.GetObject().(*api.Node) + _, condition := api.GetNodeCondition(&node.Status, api.NodeNetworkingReady) + if condition == nil { + t.Errorf("%d. Missing NodeNetworkingReady condition for Node %v", i, node.Name) + } else { + check := func(index int) bool { + return (condition.Status == api.ConditionTrue) == testCase.expectedNetworkReady[index] + } + var index int + if node.Name == node1.Name { + index = 0 + } else if node.Name == node1.Name { + index = 1 + } else { + // Something's wrong + continue + } + if !check(index) { + t.Errorf("%d. Invalid NodeNetworkingReady condition for Node %v, expected %v, got %v", + i, node.Name, (condition.Status == api.ConditionTrue), testCase.expectedNetworkReady[index]) + } + } + } + } var finalRoutes []*cloudprovider.Route var err error timeoutChan := time.After(200 * time.Millisecond) diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 296221c42d0..37858260d27 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -46,6 +46,7 @@ import ( clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset" "k8s.io/kubernetes/pkg/client/record" "k8s.io/kubernetes/pkg/cloudprovider" + "k8s.io/kubernetes/pkg/cloudprovider/providers/gce" "k8s.io/kubernetes/pkg/fieldpath" "k8s.io/kubernetes/pkg/fields" "k8s.io/kubernetes/pkg/kubelet/cadvisor" @@ -3163,12 +3164,23 @@ func (kl *Kubelet) setNodeReadyCondition(node *api.Node) { currentTime := unversioned.NewTime(kl.clock.Now()) var newNodeReadyCondition api.NodeCondition if rs := kl.runtimeState.errors(); len(rs) == 0 { - newNodeReadyCondition = api.NodeCondition{ - Type: api.NodeReady, - Status: api.ConditionTrue, - Reason: "KubeletReady", - Message: "kubelet is posting ready status", - LastHeartbeatTime: currentTime, + _, networkingCondition := api.GetNodeCondition(&node.Status, api.NodeNetworkingReady) + if (kl.cloud.ProviderName() == gce.ProviderName) && (networkingCondition == nil || networkingCondition.Status != api.ConditionTrue) { + newNodeReadyCondition = api.NodeCondition{ + Type: api.NodeReady, + Status: api.ConditionFalse, + Reason: "KubeletNotReady", + Message: "networking is not ready", + LastHeartbeatTime: currentTime, + } + } else { + newNodeReadyCondition = api.NodeCondition{ + Type: api.NodeReady, + Status: api.ConditionTrue, + Reason: "KubeletReady", + Message: "kubelet is posting ready status", + LastHeartbeatTime: currentTime, + } } } else { newNodeReadyCondition = api.NodeCondition{ From be1b57100d05ef3431e3095aab9b339b8dafa3e5 Mon Sep 17 00:00:00 2001 From: Wojciech Tyczynski Date: Fri, 27 May 2016 11:37:20 +0200 Subject: [PATCH 2/2] Change to NotReadyNetworking and use in scheduler --- pkg/api/types.go | 4 +- pkg/api/v1/types.go | 4 +- pkg/controller/route/routecontroller.go | 104 +++++++++++++++---- pkg/controller/route/routecontroller_test.go | 60 +++++------ pkg/kubelet/kubelet.go | 42 ++++---- plugin/pkg/scheduler/factory/factory.go | 9 +- 6 files changed, 151 insertions(+), 72 deletions(-) diff --git a/pkg/api/types.go b/pkg/api/types.go index 3910593019f..7c96fb43dc2 100644 --- a/pkg/api/types.go +++ b/pkg/api/types.go @@ -2009,8 +2009,8 @@ const ( NodeOutOfDisk NodeConditionType = "OutOfDisk" // NodeMemoryPressure means the kubelet is under pressure due to insufficient available memory. NodeMemoryPressure NodeConditionType = "MemoryPressure" - // NodeNetworkingReady means that network for the node is correctly configured. - NodeNetworkingReady NodeConditionType = "NetworkingReady" + // NodeNetworkUnavailable means that network for the node is not correctly configured. + NodeNetworkUnavailable NodeConditionType = "NetworkUnavailable" ) type NodeCondition struct { diff --git a/pkg/api/v1/types.go b/pkg/api/v1/types.go index dd64f6901d0..b555868af90 100644 --- a/pkg/api/v1/types.go +++ b/pkg/api/v1/types.go @@ -2412,8 +2412,8 @@ const ( NodeOutOfDisk NodeConditionType = "OutOfDisk" // NodeMemoryPressure means the kubelet is under pressure due to insufficient available memory. NodeMemoryPressure NodeConditionType = "MemoryPressure" - // NodeNetworkingReady means that network for the node is correctly configured. - NodeNetworkingReady NodeConditionType = "NetworkingReady" + // NodeNetworkUnavailable means that network for the node is not correctly configured. + NodeNetworkUnavailable NodeConditionType = "NetworkUnavailable" ) // NodeCondition contains condition infromation for a node. diff --git a/pkg/controller/route/routecontroller.go b/pkg/controller/route/routecontroller.go index 15019992614..c9d72eb23b8 100644 --- a/pkg/controller/route/routecontroller.go +++ b/pkg/controller/route/routecontroller.go @@ -37,7 +37,8 @@ const ( maxConcurrentRouteCreations int = 200 // Maximum number of retries of route creations. maxRetries int = 5 - updateNodeStatusMaxRetries = 3 + // Maximum number of retries of node status update. + updateNodeStatusMaxRetries int = 3 ) type RouteController struct { @@ -86,22 +87,6 @@ func (rc *RouteController) reconcileNodeRoutes() error { return rc.reconcile(nodeList.Items, routeList) } -func tryUpdateNodeStatus(node *api.Node, kubeClient clientset.Interface) error { - for i := 0; i < updateNodeStatusMaxRetries; i++ { - if _, err := kubeClient.Core().Nodes().UpdateStatus(node); err == nil { - break - } else { - if i+1 < updateNodeStatusMaxRetries { - glog.Errorf("Error updating node %s - will retry: %v", node.Name, err) - } else { - glog.Errorf("Error updating node %s - wont retry: %v", node.Name, err) - return err - } - } - } - return nil -} - func (rc *RouteController) reconcile(nodes []api.Node, routes []*cloudprovider.Route) error { // nodeCIDRs maps nodeName->nodeCIDR nodeCIDRs := make(map[string]string) @@ -129,6 +114,28 @@ func (rc *RouteController) reconcile(nodes []api.Node, routes []*cloudprovider.R } nameHint := string(node.UID) wg.Add(1) + glog.Infof("Creating route for node %s %s with hint %s", node.Name, route.DestinationCIDR, nameHint) + go func(nodeName string, nameHint string, route *cloudprovider.Route) { + defer wg.Done() + for i := 0; i < maxRetries; i++ { + startTime := time.Now() + // Ensure that we don't have more than maxConcurrentRouteCreations + // CreateRoute calls in flight. + rateLimiter <- struct{}{} + err := rc.routes.CreateRoute(rc.clusterName, nameHint, route) + <-rateLimiter + + rc.updateNetworkingCondition(nodeName, err == nil) + if err != nil { + glog.Errorf("Could not create route %s %s for node %s after %v: %v", nameHint, route.DestinationCIDR, nodeName, time.Now().Sub(startTime), err) + } else { + glog.Infof("Created route for node %s %s with hint %s after %v", nodeName, route.DestinationCIDR, nameHint, time.Now().Sub(startTime)) + return + } + } + }(node.Name, nameHint, route) + } else { + rc.updateNetworkingCondition(node.Name, true) } nodeCIDRs[node.Name] = node.Spec.PodCIDR } @@ -138,12 +145,12 @@ func (rc *RouteController) reconcile(nodes []api.Node, routes []*cloudprovider.R if nodeCIDRs[route.TargetInstance] != route.DestinationCIDR { wg.Add(1) // Delete the route. - glog.V(2).Infof("Deleting route %s %s", route.Name, route.DestinationCIDR) + glog.Infof("Deleting route %s %s", route.Name, route.DestinationCIDR) go func(route *cloudprovider.Route, startTime time.Time) { if err := rc.routes.DeleteRoute(rc.clusterName, route); err != nil { glog.Errorf("Could not delete route %s %s after %v: %v", route.Name, route.DestinationCIDR, time.Now().Sub(startTime), err) } else { - glog.V(2).Infof("Deleted route %s %s after %v", route.Name, route.DestinationCIDR, time.Now().Sub(startTime)) + glog.Infof("Deleted route %s %s after %v", route.Name, route.DestinationCIDR, time.Now().Sub(startTime)) } wg.Done() @@ -155,6 +162,65 @@ func (rc *RouteController) reconcile(nodes []api.Node, routes []*cloudprovider.R return nil } +func updateNetworkingCondition(node *api.Node, routeCreated bool) { + _, networkingCondition := api.GetNodeCondition(&node.Status, api.NodeNetworkUnavailable) + currentTime := unversioned.Now() + if routeCreated { + if networkingCondition != nil && networkingCondition.Status != api.ConditionFalse { + networkingCondition.Status = api.ConditionFalse + networkingCondition.Reason = "RouteCreated" + networkingCondition.Message = "RouteController created a route" + networkingCondition.LastTransitionTime = currentTime + } else if networkingCondition == nil { + node.Status.Conditions = append(node.Status.Conditions, api.NodeCondition{ + Type: api.NodeNetworkUnavailable, + Status: api.ConditionFalse, + Reason: "RouteCreated", + Message: "RouteController created a route", + LastTransitionTime: currentTime, + }) + } + } else { + if networkingCondition != nil && networkingCondition.Status != api.ConditionTrue { + networkingCondition.Status = api.ConditionTrue + networkingCondition.Reason = "NoRouteCreated" + networkingCondition.Message = "RouteController failed to create a route" + networkingCondition.LastTransitionTime = currentTime + } else if networkingCondition == nil { + node.Status.Conditions = append(node.Status.Conditions, api.NodeCondition{ + Type: api.NodeNetworkUnavailable, + Status: api.ConditionTrue, + Reason: "NoRouteCreated", + Message: "RouteController failed to create a route", + LastTransitionTime: currentTime, + }) + } + } +} + +func (rc *RouteController) updateNetworkingCondition(nodeName string, routeCreated bool) error { + var err error + for i := 0; i < updateNodeStatusMaxRetries; i++ { + node, err := rc.kubeClient.Core().Nodes().Get(nodeName) + if err != nil { + glog.Errorf("Error geting node: %v", err) + continue + } + updateNetworkingCondition(node, routeCreated) + // TODO: Use Patch instead once #26381 is merged. + // See kubernetes/node-problem-detector#9 for details. + if _, err = rc.kubeClient.Core().Nodes().UpdateStatus(node); err == nil { + return nil + } + if i+1 < updateNodeStatusMaxRetries { + glog.Errorf("Error updating node %s, retrying: %v", node.Name, err) + } else { + glog.Errorf("Error updating node %s: %v", node.Name, err) + } + } + return err +} + func (rc *RouteController) isResponsibleForRoute(route *cloudprovider.Route) bool { _, cidr, err := net.ParseCIDR(route.DestinationCIDR) if err != nil { diff --git a/pkg/controller/route/routecontroller_test.go b/pkg/controller/route/routecontroller_test.go index 67a6fa946c4..398a494ee4d 100644 --- a/pkg/controller/route/routecontroller_test.go +++ b/pkg/controller/route/routecontroller_test.go @@ -23,6 +23,7 @@ import ( "k8s.io/kubernetes/pkg/api" "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/fake" + "k8s.io/kubernetes/pkg/client/testing/core" "k8s.io/kubernetes/pkg/cloudprovider" fakecloud "k8s.io/kubernetes/pkg/cloudprovider/providers/fake" ) @@ -73,11 +74,11 @@ func TestReconcile(t *testing.T) { nodeNoCidr := api.Node{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: ""}} testCases := []struct { - nodes []api.Node - initialRoutes []*cloudprovider.Route - expectedRoutes []*cloudprovider.Route - expectedNetworkReady []bool - clientset *fake.Clientset + nodes []api.Node + initialRoutes []*cloudprovider.Route + expectedRoutes []*cloudprovider.Route + expectedNetworkUnavailable []bool + clientset *fake.Clientset }{ // 2 nodes, routes already there { @@ -93,8 +94,8 @@ func TestReconcile(t *testing.T) { {cluster + "-01", "node-1", "10.120.0.0/24"}, {cluster + "-02", "node-2", "10.120.1.0/24"}, }, - expectedNetworkReady: []bool{true, true}, - clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), + expectedNetworkUnavailable: []bool{true, true}, + clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), }, // 2 nodes, one route already there { @@ -109,8 +110,8 @@ func TestReconcile(t *testing.T) { {cluster + "-01", "node-1", "10.120.0.0/24"}, {cluster + "-02", "node-2", "10.120.1.0/24"}, }, - expectedNetworkReady: []bool{true, true}, - clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), + expectedNetworkUnavailable: []bool{true, true}, + clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), }, // 2 nodes, no routes yet { @@ -123,8 +124,8 @@ func TestReconcile(t *testing.T) { {cluster + "-01", "node-1", "10.120.0.0/24"}, {cluster + "-02", "node-2", "10.120.1.0/24"}, }, - expectedNetworkReady: []bool{true, true}, - clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), + expectedNetworkUnavailable: []bool{true, true}, + clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), }, // 2 nodes, a few too many routes { @@ -142,8 +143,8 @@ func TestReconcile(t *testing.T) { {cluster + "-01", "node-1", "10.120.0.0/24"}, {cluster + "-02", "node-2", "10.120.1.0/24"}, }, - expectedNetworkReady: []bool{true, true}, - clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), + expectedNetworkUnavailable: []bool{true, true}, + clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), }, // 2 nodes, 2 routes, but only 1 is right { @@ -159,8 +160,8 @@ func TestReconcile(t *testing.T) { {cluster + "-01", "node-1", "10.120.0.0/24"}, {cluster + "-02", "node-2", "10.120.1.0/24"}, }, - expectedNetworkReady: []bool{true, true}, - clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), + expectedNetworkUnavailable: []bool{true, true}, + clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), }, // 2 nodes, one node without CIDR assigned. { @@ -172,8 +173,8 @@ func TestReconcile(t *testing.T) { expectedRoutes: []*cloudprovider.Route{ {cluster + "-01", "node-1", "10.120.0.0/24"}, }, - expectedNetworkReady: []bool{true, false}, - clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, nodeNoCidr}}), + expectedNetworkUnavailable: []bool{true, false}, + clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, nodeNoCidr}}), }, } for i, testCase := range testCases { @@ -195,26 +196,27 @@ func TestReconcile(t *testing.T) { } for _, action := range testCase.clientset.Actions() { if action.GetVerb() == "update" && action.GetResource().Resource == "nodes" { - node := action.GetObject().(*api.Node) - _, condition := api.GetNodeCondition(&node.Status, api.NodeNetworkingReady) + node := action.(core.UpdateAction).GetObject().(*api.Node) + _, condition := api.GetNodeCondition(&node.Status, api.NodeNetworkUnavailable) if condition == nil { - t.Errorf("%d. Missing NodeNetworkingReady condition for Node %v", i, node.Name) + t.Errorf("%d. Missing NodeNetworkUnavailable condition for Node %v", i, node.Name) } else { check := func(index int) bool { - return (condition.Status == api.ConditionTrue) == testCase.expectedNetworkReady[index] + return (condition.Status == api.ConditionFalse) == testCase.expectedNetworkUnavailable[index] } - var index int - if node.Name == node1.Name { - index = 0 - } else if node.Name == node1.Name { - index = 1 - } else { + index := -1 + for j := range testCase.nodes { + if testCase.nodes[j].Name == node.Name { + index = j + } + } + if index == -1 { // Something's wrong continue } if !check(index) { - t.Errorf("%d. Invalid NodeNetworkingReady condition for Node %v, expected %v, got %v", - i, node.Name, (condition.Status == api.ConditionTrue), testCase.expectedNetworkReady[index]) + t.Errorf("%d. Invalid NodeNetworkUnavailable condition for Node %v, expected %v, got %v", + i, node.Name, testCase.expectedNetworkUnavailable[index], (condition.Status == api.ConditionFalse)) } } } diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 37858260d27..fc3f50afcc9 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -46,7 +46,6 @@ import ( clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset" "k8s.io/kubernetes/pkg/client/record" "k8s.io/kubernetes/pkg/cloudprovider" - "k8s.io/kubernetes/pkg/cloudprovider/providers/gce" "k8s.io/kubernetes/pkg/fieldpath" "k8s.io/kubernetes/pkg/fields" "k8s.io/kubernetes/pkg/kubelet/cadvisor" @@ -1014,6 +1013,16 @@ func (kl *Kubelet) initialNodeStatus() (*api.Node, error) { Unschedulable: !kl.registerSchedulable, }, } + // Initially, set NodeNetworkUnavailable to true. + if kl.providerRequiresNetworkingConfiguration() { + node.Status.Conditions = append(node.Status.Conditions, api.NodeCondition{ + Type: api.NodeNetworkUnavailable, + Status: api.ConditionTrue, + Reason: "NoRouteCreated", + Message: "Node created without a route", + LastTransitionTime: unversioned.NewTime(kl.clock.Now()), + }) + } // @question: should this be place after the call to the cloud provider? which also applies labels for k, v := range kl.nodeLabels { @@ -1079,6 +1088,14 @@ func (kl *Kubelet) initialNodeStatus() (*api.Node, error) { return node, nil } +func (kl *Kubelet) providerRequiresNetworkingConfiguration() bool { + if kl.cloud == nil || kl.flannelExperimentalOverlay { + return false + } + _, supported := kl.cloud.Routes() + return supported +} + // registerWithApiserver registers the node with the cluster master. It is safe // to call multiple times, but not concurrently (kl.registrationCompleted is // not locked). @@ -3164,23 +3181,12 @@ func (kl *Kubelet) setNodeReadyCondition(node *api.Node) { currentTime := unversioned.NewTime(kl.clock.Now()) var newNodeReadyCondition api.NodeCondition if rs := kl.runtimeState.errors(); len(rs) == 0 { - _, networkingCondition := api.GetNodeCondition(&node.Status, api.NodeNetworkingReady) - if (kl.cloud.ProviderName() == gce.ProviderName) && (networkingCondition == nil || networkingCondition.Status != api.ConditionTrue) { - newNodeReadyCondition = api.NodeCondition{ - Type: api.NodeReady, - Status: api.ConditionFalse, - Reason: "KubeletNotReady", - Message: "networking is not ready", - LastHeartbeatTime: currentTime, - } - } else { - newNodeReadyCondition = api.NodeCondition{ - Type: api.NodeReady, - Status: api.ConditionTrue, - Reason: "KubeletReady", - Message: "kubelet is posting ready status", - LastHeartbeatTime: currentTime, - } + newNodeReadyCondition = api.NodeCondition{ + Type: api.NodeReady, + Status: api.ConditionTrue, + Reason: "KubeletReady", + Message: "kubelet is posting ready status", + LastHeartbeatTime: currentTime, } } else { newNodeReadyCondition = api.NodeCondition{ diff --git a/plugin/pkg/scheduler/factory/factory.go b/plugin/pkg/scheduler/factory/factory.go index d4f03b1bc17..73054f1da26 100644 --- a/plugin/pkg/scheduler/factory/factory.go +++ b/plugin/pkg/scheduler/factory/factory.go @@ -437,14 +437,19 @@ func (f *ConfigFactory) responsibleForPod(pod *api.Pod) bool { func getNodeConditionPredicate() cache.NodeConditionPredicate { return func(node api.Node) bool { for _, cond := range node.Status.Conditions { - // We consider the node for scheduling only when its NodeReady condition status - // is ConditionTrue and its NodeOutOfDisk condition status is ConditionFalse. + // We consider the node for scheduling only when its: + // - NodeReady condition status is ConditionTrue, + // - NodeOutOfDisk condition status is ConditionFalse, + // - NodeNetworkUnavailable condition status is ConditionFalse. if cond.Type == api.NodeReady && cond.Status != api.ConditionTrue { glog.V(4).Infof("Ignoring node %v with %v condition status %v", node.Name, cond.Type, cond.Status) return false } else if cond.Type == api.NodeOutOfDisk && cond.Status != api.ConditionFalse { glog.V(4).Infof("Ignoring node %v with %v condition status %v", node.Name, cond.Type, cond.Status) return false + } else if cond.Type == api.NodeNetworkUnavailable && cond.Status != api.ConditionFalse { + glog.V(4).Infof("Ignoring node %v with %v condition status %v", node.Name, cond.Type, cond.Status) + return false } } return true