From be1b57100d05ef3431e3095aab9b339b8dafa3e5 Mon Sep 17 00:00:00 2001 From: Wojciech Tyczynski Date: Fri, 27 May 2016 11:37:20 +0200 Subject: [PATCH] Change to NotReadyNetworking and use in scheduler --- pkg/api/types.go | 4 +- pkg/api/v1/types.go | 4 +- pkg/controller/route/routecontroller.go | 104 +++++++++++++++---- pkg/controller/route/routecontroller_test.go | 60 +++++------ pkg/kubelet/kubelet.go | 42 ++++---- plugin/pkg/scheduler/factory/factory.go | 9 +- 6 files changed, 151 insertions(+), 72 deletions(-) diff --git a/pkg/api/types.go b/pkg/api/types.go index 3910593019f..7c96fb43dc2 100644 --- a/pkg/api/types.go +++ b/pkg/api/types.go @@ -2009,8 +2009,8 @@ const ( NodeOutOfDisk NodeConditionType = "OutOfDisk" // NodeMemoryPressure means the kubelet is under pressure due to insufficient available memory. NodeMemoryPressure NodeConditionType = "MemoryPressure" - // NodeNetworkingReady means that network for the node is correctly configured. - NodeNetworkingReady NodeConditionType = "NetworkingReady" + // NodeNetworkUnavailable means that network for the node is not correctly configured. + NodeNetworkUnavailable NodeConditionType = "NetworkUnavailable" ) type NodeCondition struct { diff --git a/pkg/api/v1/types.go b/pkg/api/v1/types.go index dd64f6901d0..b555868af90 100644 --- a/pkg/api/v1/types.go +++ b/pkg/api/v1/types.go @@ -2412,8 +2412,8 @@ const ( NodeOutOfDisk NodeConditionType = "OutOfDisk" // NodeMemoryPressure means the kubelet is under pressure due to insufficient available memory. NodeMemoryPressure NodeConditionType = "MemoryPressure" - // NodeNetworkingReady means that network for the node is correctly configured. - NodeNetworkingReady NodeConditionType = "NetworkingReady" + // NodeNetworkUnavailable means that network for the node is not correctly configured. + NodeNetworkUnavailable NodeConditionType = "NetworkUnavailable" ) // NodeCondition contains condition infromation for a node. diff --git a/pkg/controller/route/routecontroller.go b/pkg/controller/route/routecontroller.go index 15019992614..c9d72eb23b8 100644 --- a/pkg/controller/route/routecontroller.go +++ b/pkg/controller/route/routecontroller.go @@ -37,7 +37,8 @@ const ( maxConcurrentRouteCreations int = 200 // Maximum number of retries of route creations. maxRetries int = 5 - updateNodeStatusMaxRetries = 3 + // Maximum number of retries of node status update. + updateNodeStatusMaxRetries int = 3 ) type RouteController struct { @@ -86,22 +87,6 @@ func (rc *RouteController) reconcileNodeRoutes() error { return rc.reconcile(nodeList.Items, routeList) } -func tryUpdateNodeStatus(node *api.Node, kubeClient clientset.Interface) error { - for i := 0; i < updateNodeStatusMaxRetries; i++ { - if _, err := kubeClient.Core().Nodes().UpdateStatus(node); err == nil { - break - } else { - if i+1 < updateNodeStatusMaxRetries { - glog.Errorf("Error updating node %s - will retry: %v", node.Name, err) - } else { - glog.Errorf("Error updating node %s - wont retry: %v", node.Name, err) - return err - } - } - } - return nil -} - func (rc *RouteController) reconcile(nodes []api.Node, routes []*cloudprovider.Route) error { // nodeCIDRs maps nodeName->nodeCIDR nodeCIDRs := make(map[string]string) @@ -129,6 +114,28 @@ func (rc *RouteController) reconcile(nodes []api.Node, routes []*cloudprovider.R } nameHint := string(node.UID) wg.Add(1) + glog.Infof("Creating route for node %s %s with hint %s", node.Name, route.DestinationCIDR, nameHint) + go func(nodeName string, nameHint string, route *cloudprovider.Route) { + defer wg.Done() + for i := 0; i < maxRetries; i++ { + startTime := time.Now() + // Ensure that we don't have more than maxConcurrentRouteCreations + // CreateRoute calls in flight. + rateLimiter <- struct{}{} + err := rc.routes.CreateRoute(rc.clusterName, nameHint, route) + <-rateLimiter + + rc.updateNetworkingCondition(nodeName, err == nil) + if err != nil { + glog.Errorf("Could not create route %s %s for node %s after %v: %v", nameHint, route.DestinationCIDR, nodeName, time.Now().Sub(startTime), err) + } else { + glog.Infof("Created route for node %s %s with hint %s after %v", nodeName, route.DestinationCIDR, nameHint, time.Now().Sub(startTime)) + return + } + } + }(node.Name, nameHint, route) + } else { + rc.updateNetworkingCondition(node.Name, true) } nodeCIDRs[node.Name] = node.Spec.PodCIDR } @@ -138,12 +145,12 @@ func (rc *RouteController) reconcile(nodes []api.Node, routes []*cloudprovider.R if nodeCIDRs[route.TargetInstance] != route.DestinationCIDR { wg.Add(1) // Delete the route. - glog.V(2).Infof("Deleting route %s %s", route.Name, route.DestinationCIDR) + glog.Infof("Deleting route %s %s", route.Name, route.DestinationCIDR) go func(route *cloudprovider.Route, startTime time.Time) { if err := rc.routes.DeleteRoute(rc.clusterName, route); err != nil { glog.Errorf("Could not delete route %s %s after %v: %v", route.Name, route.DestinationCIDR, time.Now().Sub(startTime), err) } else { - glog.V(2).Infof("Deleted route %s %s after %v", route.Name, route.DestinationCIDR, time.Now().Sub(startTime)) + glog.Infof("Deleted route %s %s after %v", route.Name, route.DestinationCIDR, time.Now().Sub(startTime)) } wg.Done() @@ -155,6 +162,65 @@ func (rc *RouteController) reconcile(nodes []api.Node, routes []*cloudprovider.R return nil } +func updateNetworkingCondition(node *api.Node, routeCreated bool) { + _, networkingCondition := api.GetNodeCondition(&node.Status, api.NodeNetworkUnavailable) + currentTime := unversioned.Now() + if routeCreated { + if networkingCondition != nil && networkingCondition.Status != api.ConditionFalse { + networkingCondition.Status = api.ConditionFalse + networkingCondition.Reason = "RouteCreated" + networkingCondition.Message = "RouteController created a route" + networkingCondition.LastTransitionTime = currentTime + } else if networkingCondition == nil { + node.Status.Conditions = append(node.Status.Conditions, api.NodeCondition{ + Type: api.NodeNetworkUnavailable, + Status: api.ConditionFalse, + Reason: "RouteCreated", + Message: "RouteController created a route", + LastTransitionTime: currentTime, + }) + } + } else { + if networkingCondition != nil && networkingCondition.Status != api.ConditionTrue { + networkingCondition.Status = api.ConditionTrue + networkingCondition.Reason = "NoRouteCreated" + networkingCondition.Message = "RouteController failed to create a route" + networkingCondition.LastTransitionTime = currentTime + } else if networkingCondition == nil { + node.Status.Conditions = append(node.Status.Conditions, api.NodeCondition{ + Type: api.NodeNetworkUnavailable, + Status: api.ConditionTrue, + Reason: "NoRouteCreated", + Message: "RouteController failed to create a route", + LastTransitionTime: currentTime, + }) + } + } +} + +func (rc *RouteController) updateNetworkingCondition(nodeName string, routeCreated bool) error { + var err error + for i := 0; i < updateNodeStatusMaxRetries; i++ { + node, err := rc.kubeClient.Core().Nodes().Get(nodeName) + if err != nil { + glog.Errorf("Error geting node: %v", err) + continue + } + updateNetworkingCondition(node, routeCreated) + // TODO: Use Patch instead once #26381 is merged. + // See kubernetes/node-problem-detector#9 for details. + if _, err = rc.kubeClient.Core().Nodes().UpdateStatus(node); err == nil { + return nil + } + if i+1 < updateNodeStatusMaxRetries { + glog.Errorf("Error updating node %s, retrying: %v", node.Name, err) + } else { + glog.Errorf("Error updating node %s: %v", node.Name, err) + } + } + return err +} + func (rc *RouteController) isResponsibleForRoute(route *cloudprovider.Route) bool { _, cidr, err := net.ParseCIDR(route.DestinationCIDR) if err != nil { diff --git a/pkg/controller/route/routecontroller_test.go b/pkg/controller/route/routecontroller_test.go index 67a6fa946c4..398a494ee4d 100644 --- a/pkg/controller/route/routecontroller_test.go +++ b/pkg/controller/route/routecontroller_test.go @@ -23,6 +23,7 @@ import ( "k8s.io/kubernetes/pkg/api" "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/fake" + "k8s.io/kubernetes/pkg/client/testing/core" "k8s.io/kubernetes/pkg/cloudprovider" fakecloud "k8s.io/kubernetes/pkg/cloudprovider/providers/fake" ) @@ -73,11 +74,11 @@ func TestReconcile(t *testing.T) { nodeNoCidr := api.Node{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: ""}} testCases := []struct { - nodes []api.Node - initialRoutes []*cloudprovider.Route - expectedRoutes []*cloudprovider.Route - expectedNetworkReady []bool - clientset *fake.Clientset + nodes []api.Node + initialRoutes []*cloudprovider.Route + expectedRoutes []*cloudprovider.Route + expectedNetworkUnavailable []bool + clientset *fake.Clientset }{ // 2 nodes, routes already there { @@ -93,8 +94,8 @@ func TestReconcile(t *testing.T) { {cluster + "-01", "node-1", "10.120.0.0/24"}, {cluster + "-02", "node-2", "10.120.1.0/24"}, }, - expectedNetworkReady: []bool{true, true}, - clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), + expectedNetworkUnavailable: []bool{true, true}, + clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), }, // 2 nodes, one route already there { @@ -109,8 +110,8 @@ func TestReconcile(t *testing.T) { {cluster + "-01", "node-1", "10.120.0.0/24"}, {cluster + "-02", "node-2", "10.120.1.0/24"}, }, - expectedNetworkReady: []bool{true, true}, - clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), + expectedNetworkUnavailable: []bool{true, true}, + clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), }, // 2 nodes, no routes yet { @@ -123,8 +124,8 @@ func TestReconcile(t *testing.T) { {cluster + "-01", "node-1", "10.120.0.0/24"}, {cluster + "-02", "node-2", "10.120.1.0/24"}, }, - expectedNetworkReady: []bool{true, true}, - clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), + expectedNetworkUnavailable: []bool{true, true}, + clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), }, // 2 nodes, a few too many routes { @@ -142,8 +143,8 @@ func TestReconcile(t *testing.T) { {cluster + "-01", "node-1", "10.120.0.0/24"}, {cluster + "-02", "node-2", "10.120.1.0/24"}, }, - expectedNetworkReady: []bool{true, true}, - clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), + expectedNetworkUnavailable: []bool{true, true}, + clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), }, // 2 nodes, 2 routes, but only 1 is right { @@ -159,8 +160,8 @@ func TestReconcile(t *testing.T) { {cluster + "-01", "node-1", "10.120.0.0/24"}, {cluster + "-02", "node-2", "10.120.1.0/24"}, }, - expectedNetworkReady: []bool{true, true}, - clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), + expectedNetworkUnavailable: []bool{true, true}, + clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), }, // 2 nodes, one node without CIDR assigned. { @@ -172,8 +173,8 @@ func TestReconcile(t *testing.T) { expectedRoutes: []*cloudprovider.Route{ {cluster + "-01", "node-1", "10.120.0.0/24"}, }, - expectedNetworkReady: []bool{true, false}, - clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, nodeNoCidr}}), + expectedNetworkUnavailable: []bool{true, false}, + clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, nodeNoCidr}}), }, } for i, testCase := range testCases { @@ -195,26 +196,27 @@ func TestReconcile(t *testing.T) { } for _, action := range testCase.clientset.Actions() { if action.GetVerb() == "update" && action.GetResource().Resource == "nodes" { - node := action.GetObject().(*api.Node) - _, condition := api.GetNodeCondition(&node.Status, api.NodeNetworkingReady) + node := action.(core.UpdateAction).GetObject().(*api.Node) + _, condition := api.GetNodeCondition(&node.Status, api.NodeNetworkUnavailable) if condition == nil { - t.Errorf("%d. Missing NodeNetworkingReady condition for Node %v", i, node.Name) + t.Errorf("%d. Missing NodeNetworkUnavailable condition for Node %v", i, node.Name) } else { check := func(index int) bool { - return (condition.Status == api.ConditionTrue) == testCase.expectedNetworkReady[index] + return (condition.Status == api.ConditionFalse) == testCase.expectedNetworkUnavailable[index] } - var index int - if node.Name == node1.Name { - index = 0 - } else if node.Name == node1.Name { - index = 1 - } else { + index := -1 + for j := range testCase.nodes { + if testCase.nodes[j].Name == node.Name { + index = j + } + } + if index == -1 { // Something's wrong continue } if !check(index) { - t.Errorf("%d. Invalid NodeNetworkingReady condition for Node %v, expected %v, got %v", - i, node.Name, (condition.Status == api.ConditionTrue), testCase.expectedNetworkReady[index]) + t.Errorf("%d. Invalid NodeNetworkUnavailable condition for Node %v, expected %v, got %v", + i, node.Name, testCase.expectedNetworkUnavailable[index], (condition.Status == api.ConditionFalse)) } } } diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 37858260d27..fc3f50afcc9 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -46,7 +46,6 @@ import ( clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset" "k8s.io/kubernetes/pkg/client/record" "k8s.io/kubernetes/pkg/cloudprovider" - "k8s.io/kubernetes/pkg/cloudprovider/providers/gce" "k8s.io/kubernetes/pkg/fieldpath" "k8s.io/kubernetes/pkg/fields" "k8s.io/kubernetes/pkg/kubelet/cadvisor" @@ -1014,6 +1013,16 @@ func (kl *Kubelet) initialNodeStatus() (*api.Node, error) { Unschedulable: !kl.registerSchedulable, }, } + // Initially, set NodeNetworkUnavailable to true. + if kl.providerRequiresNetworkingConfiguration() { + node.Status.Conditions = append(node.Status.Conditions, api.NodeCondition{ + Type: api.NodeNetworkUnavailable, + Status: api.ConditionTrue, + Reason: "NoRouteCreated", + Message: "Node created without a route", + LastTransitionTime: unversioned.NewTime(kl.clock.Now()), + }) + } // @question: should this be place after the call to the cloud provider? which also applies labels for k, v := range kl.nodeLabels { @@ -1079,6 +1088,14 @@ func (kl *Kubelet) initialNodeStatus() (*api.Node, error) { return node, nil } +func (kl *Kubelet) providerRequiresNetworkingConfiguration() bool { + if kl.cloud == nil || kl.flannelExperimentalOverlay { + return false + } + _, supported := kl.cloud.Routes() + return supported +} + // registerWithApiserver registers the node with the cluster master. It is safe // to call multiple times, but not concurrently (kl.registrationCompleted is // not locked). @@ -3164,23 +3181,12 @@ func (kl *Kubelet) setNodeReadyCondition(node *api.Node) { currentTime := unversioned.NewTime(kl.clock.Now()) var newNodeReadyCondition api.NodeCondition if rs := kl.runtimeState.errors(); len(rs) == 0 { - _, networkingCondition := api.GetNodeCondition(&node.Status, api.NodeNetworkingReady) - if (kl.cloud.ProviderName() == gce.ProviderName) && (networkingCondition == nil || networkingCondition.Status != api.ConditionTrue) { - newNodeReadyCondition = api.NodeCondition{ - Type: api.NodeReady, - Status: api.ConditionFalse, - Reason: "KubeletNotReady", - Message: "networking is not ready", - LastHeartbeatTime: currentTime, - } - } else { - newNodeReadyCondition = api.NodeCondition{ - Type: api.NodeReady, - Status: api.ConditionTrue, - Reason: "KubeletReady", - Message: "kubelet is posting ready status", - LastHeartbeatTime: currentTime, - } + newNodeReadyCondition = api.NodeCondition{ + Type: api.NodeReady, + Status: api.ConditionTrue, + Reason: "KubeletReady", + Message: "kubelet is posting ready status", + LastHeartbeatTime: currentTime, } } else { newNodeReadyCondition = api.NodeCondition{ diff --git a/plugin/pkg/scheduler/factory/factory.go b/plugin/pkg/scheduler/factory/factory.go index d4f03b1bc17..73054f1da26 100644 --- a/plugin/pkg/scheduler/factory/factory.go +++ b/plugin/pkg/scheduler/factory/factory.go @@ -437,14 +437,19 @@ func (f *ConfigFactory) responsibleForPod(pod *api.Pod) bool { func getNodeConditionPredicate() cache.NodeConditionPredicate { return func(node api.Node) bool { for _, cond := range node.Status.Conditions { - // We consider the node for scheduling only when its NodeReady condition status - // is ConditionTrue and its NodeOutOfDisk condition status is ConditionFalse. + // We consider the node for scheduling only when its: + // - NodeReady condition status is ConditionTrue, + // - NodeOutOfDisk condition status is ConditionFalse, + // - NodeNetworkUnavailable condition status is ConditionFalse. if cond.Type == api.NodeReady && cond.Status != api.ConditionTrue { glog.V(4).Infof("Ignoring node %v with %v condition status %v", node.Name, cond.Type, cond.Status) return false } else if cond.Type == api.NodeOutOfDisk && cond.Status != api.ConditionFalse { glog.V(4).Infof("Ignoring node %v with %v condition status %v", node.Name, cond.Type, cond.Status) return false + } else if cond.Type == api.NodeNetworkUnavailable && cond.Status != api.ConditionFalse { + glog.V(4).Infof("Ignoring node %v with %v condition status %v", node.Name, cond.Type, cond.Status) + return false } } return true