diff --git a/pkg/api/resource_helpers.go b/pkg/api/resource_helpers.go index 2a664d8dc8a..4c55b120e14 100644 --- a/pkg/api/resource_helpers.go +++ b/pkg/api/resource_helpers.go @@ -95,8 +95,25 @@ func GetPodReadyCondition(status PodStatus) *PodCondition { // GetPodCondition extracts the provided condition from the given status and returns that. // Returns nil and -1 if the condition is not present, and the the index of the located condition. func GetPodCondition(status *PodStatus, conditionType PodConditionType) (int, *PodCondition) { - for i, c := range status.Conditions { - if c.Type == conditionType { + if status == nil { + return -1, nil + } + for i := range status.Conditions { + if status.Conditions[i].Type == conditionType { + return i, &status.Conditions[i] + } + } + return -1, nil +} + +// GetNodeCondition extracts the provided condition from the given status and returns that. +// Returns nil and -1 if the condition is not present, and the the index of the located condition. +func GetNodeCondition(status *NodeStatus, conditionType NodeConditionType) (int, *NodeCondition) { + if status == nil { + return -1, nil + } + for i := range status.Conditions { + if status.Conditions[i].Type == conditionType { return i, &status.Conditions[i] } } diff --git a/pkg/api/types.go b/pkg/api/types.go index 15b62af4da5..4847ba434c5 100644 --- a/pkg/api/types.go +++ b/pkg/api/types.go @@ -2014,6 +2014,8 @@ const ( NodeOutOfDisk NodeConditionType = "OutOfDisk" // NodeMemoryPressure means the kubelet is under pressure due to insufficient available memory. NodeMemoryPressure NodeConditionType = "MemoryPressure" + // NodeNetworkUnavailable means that network for the node is not correctly configured. + NodeNetworkUnavailable NodeConditionType = "NetworkUnavailable" ) type NodeCondition struct { diff --git a/pkg/api/v1/types.go b/pkg/api/v1/types.go index 25eb5ee711a..382c5513fbe 100644 --- a/pkg/api/v1/types.go +++ b/pkg/api/v1/types.go @@ -2418,6 +2418,8 @@ const ( NodeOutOfDisk NodeConditionType = "OutOfDisk" // NodeMemoryPressure means the kubelet is under pressure due to insufficient available memory. NodeMemoryPressure NodeConditionType = "MemoryPressure" + // NodeNetworkUnavailable means that network for the node is not correctly configured. + NodeNetworkUnavailable NodeConditionType = "NetworkUnavailable" ) // NodeCondition contains condition infromation for a node. diff --git a/pkg/controller/node/nodecontroller.go b/pkg/controller/node/nodecontroller.go index ff3bd064fc6..2d3998bdc31 100644 --- a/pkg/controller/node/nodecontroller.go +++ b/pkg/controller/node/nodecontroller.go @@ -417,20 +417,6 @@ func (nc *NodeController) recycleCIDR(obj interface{}) { } } -// getCondition returns a condition object for the specific condition -// type, nil if the condition is not set. -func (nc *NodeController) getCondition(status *api.NodeStatus, conditionType api.NodeConditionType) *api.NodeCondition { - if status == nil { - return nil - } - for i := range status.Conditions { - if status.Conditions[i].Type == conditionType { - return &status.Conditions[i] - } - } - return nil -} - var gracefulDeletionVersion = version.MustParse("v1.1.0") // maybeDeleteTerminatingPod non-gracefully deletes pods that are terminating @@ -716,7 +702,7 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap var err error var gracePeriod time.Duration var observedReadyCondition api.NodeCondition - currentReadyCondition := nc.getCondition(&node.Status, api.NodeReady) + _, currentReadyCondition := api.GetNodeCondition(&node.Status, api.NodeReady) if currentReadyCondition == nil { // If ready condition is nil, then kubelet (or nodecontroller) never posted node status. // A fake ready condition is created, where LastProbeTime and LastTransitionTime is set @@ -756,9 +742,9 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap // if that's the case, but it does not seem necessary. var savedCondition *api.NodeCondition if found { - savedCondition = nc.getCondition(&savedNodeStatus.status, api.NodeReady) + _, savedCondition = api.GetNodeCondition(&savedNodeStatus.status, api.NodeReady) } - observedCondition := nc.getCondition(&node.Status, api.NodeReady) + _, observedCondition := api.GetNodeCondition(&node.Status, api.NodeReady) if !found { glog.Warningf("Missing timestamp for Node %s. Assuming now as a timestamp.", node.Name) savedNodeStatus = nodeStatusData{ @@ -834,7 +820,7 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap // Like NodeReady condition, NodeOutOfDisk was last set longer ago than gracePeriod, so update // it to Unknown (regardless of its current value) in the master. // TODO(madhusudancs): Refactor this with readyCondition to remove duplicated code. - oodCondition := nc.getCondition(&node.Status, api.NodeOutOfDisk) + _, oodCondition := api.GetNodeCondition(&node.Status, api.NodeOutOfDisk) if oodCondition == nil { glog.V(2).Infof("Out of disk condition of node %v is never updated by kubelet", node.Name) node.Status.Conditions = append(node.Status.Conditions, api.NodeCondition{ @@ -856,7 +842,8 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap } } - if !api.Semantic.DeepEqual(nc.getCondition(&node.Status, api.NodeReady), &observedReadyCondition) { + _, currentCondition := api.GetNodeCondition(&node.Status, api.NodeReady) + if !api.Semantic.DeepEqual(currentCondition, &observedReadyCondition) { if _, err = nc.kubeClient.Core().Nodes().UpdateStatus(node); err != nil { glog.Errorf("Error updating node %s: %v", node.Name, err) return gracePeriod, observedReadyCondition, currentReadyCondition, err diff --git a/pkg/controller/route/routecontroller.go b/pkg/controller/route/routecontroller.go index 68a2ac8fe5a..c9d72eb23b8 100644 --- a/pkg/controller/route/routecontroller.go +++ b/pkg/controller/route/routecontroller.go @@ -24,6 +24,7 @@ import ( "github.com/golang/glog" "k8s.io/kubernetes/pkg/api" + "k8s.io/kubernetes/pkg/api/unversioned" clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset" "k8s.io/kubernetes/pkg/cloudprovider" "k8s.io/kubernetes/pkg/util/metrics" @@ -36,6 +37,8 @@ const ( maxConcurrentRouteCreations int = 200 // Maximum number of retries of route creations. maxRetries int = 5 + // Maximum number of retries of node status update. + updateNodeStatusMaxRetries int = 3 ) type RouteController struct { @@ -121,6 +124,8 @@ func (rc *RouteController) reconcile(nodes []api.Node, routes []*cloudprovider.R rateLimiter <- struct{}{} err := rc.routes.CreateRoute(rc.clusterName, nameHint, route) <-rateLimiter + + rc.updateNetworkingCondition(nodeName, err == nil) if err != nil { glog.Errorf("Could not create route %s %s for node %s after %v: %v", nameHint, route.DestinationCIDR, nodeName, time.Now().Sub(startTime), err) } else { @@ -129,6 +134,8 @@ func (rc *RouteController) reconcile(nodes []api.Node, routes []*cloudprovider.R } } }(node.Name, nameHint, route) + } else { + rc.updateNetworkingCondition(node.Name, true) } nodeCIDRs[node.Name] = node.Spec.PodCIDR } @@ -155,6 +162,65 @@ func (rc *RouteController) reconcile(nodes []api.Node, routes []*cloudprovider.R return nil } +func updateNetworkingCondition(node *api.Node, routeCreated bool) { + _, networkingCondition := api.GetNodeCondition(&node.Status, api.NodeNetworkUnavailable) + currentTime := unversioned.Now() + if routeCreated { + if networkingCondition != nil && networkingCondition.Status != api.ConditionFalse { + networkingCondition.Status = api.ConditionFalse + networkingCondition.Reason = "RouteCreated" + networkingCondition.Message = "RouteController created a route" + networkingCondition.LastTransitionTime = currentTime + } else if networkingCondition == nil { + node.Status.Conditions = append(node.Status.Conditions, api.NodeCondition{ + Type: api.NodeNetworkUnavailable, + Status: api.ConditionFalse, + Reason: "RouteCreated", + Message: "RouteController created a route", + LastTransitionTime: currentTime, + }) + } + } else { + if networkingCondition != nil && networkingCondition.Status != api.ConditionTrue { + networkingCondition.Status = api.ConditionTrue + networkingCondition.Reason = "NoRouteCreated" + networkingCondition.Message = "RouteController failed to create a route" + networkingCondition.LastTransitionTime = currentTime + } else if networkingCondition == nil { + node.Status.Conditions = append(node.Status.Conditions, api.NodeCondition{ + Type: api.NodeNetworkUnavailable, + Status: api.ConditionTrue, + Reason: "NoRouteCreated", + Message: "RouteController failed to create a route", + LastTransitionTime: currentTime, + }) + } + } +} + +func (rc *RouteController) updateNetworkingCondition(nodeName string, routeCreated bool) error { + var err error + for i := 0; i < updateNodeStatusMaxRetries; i++ { + node, err := rc.kubeClient.Core().Nodes().Get(nodeName) + if err != nil { + glog.Errorf("Error geting node: %v", err) + continue + } + updateNetworkingCondition(node, routeCreated) + // TODO: Use Patch instead once #26381 is merged. + // See kubernetes/node-problem-detector#9 for details. + if _, err = rc.kubeClient.Core().Nodes().UpdateStatus(node); err == nil { + return nil + } + if i+1 < updateNodeStatusMaxRetries { + glog.Errorf("Error updating node %s, retrying: %v", node.Name, err) + } else { + glog.Errorf("Error updating node %s: %v", node.Name, err) + } + } + return err +} + func (rc *RouteController) isResponsibleForRoute(route *cloudprovider.Route) bool { _, cidr, err := net.ParseCIDR(route.DestinationCIDR) if err != nil { diff --git a/pkg/controller/route/routecontroller_test.go b/pkg/controller/route/routecontroller_test.go index 3db2079f5a1..398a494ee4d 100644 --- a/pkg/controller/route/routecontroller_test.go +++ b/pkg/controller/route/routecontroller_test.go @@ -22,6 +22,8 @@ import ( "time" "k8s.io/kubernetes/pkg/api" + "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/fake" + "k8s.io/kubernetes/pkg/client/testing/core" "k8s.io/kubernetes/pkg/cloudprovider" fakecloud "k8s.io/kubernetes/pkg/cloudprovider/providers/fake" ) @@ -67,16 +69,22 @@ func TestIsResponsibleForRoute(t *testing.T) { func TestReconcile(t *testing.T) { cluster := "my-k8s" + node1 := api.Node{ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}} + node2 := api.Node{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}} + nodeNoCidr := api.Node{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: ""}} + testCases := []struct { - nodes []api.Node - initialRoutes []*cloudprovider.Route - expectedRoutes []*cloudprovider.Route + nodes []api.Node + initialRoutes []*cloudprovider.Route + expectedRoutes []*cloudprovider.Route + expectedNetworkUnavailable []bool + clientset *fake.Clientset }{ // 2 nodes, routes already there { nodes: []api.Node{ - {ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}}, - {ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}}, + node1, + node2, }, initialRoutes: []*cloudprovider.Route{ {cluster + "-01", "node-1", "10.120.0.0/24"}, @@ -86,12 +94,14 @@ func TestReconcile(t *testing.T) { {cluster + "-01", "node-1", "10.120.0.0/24"}, {cluster + "-02", "node-2", "10.120.1.0/24"}, }, + expectedNetworkUnavailable: []bool{true, true}, + clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), }, // 2 nodes, one route already there { nodes: []api.Node{ - {ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}}, - {ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}}, + node1, + node2, }, initialRoutes: []*cloudprovider.Route{ {cluster + "-01", "node-1", "10.120.0.0/24"}, @@ -100,24 +110,28 @@ func TestReconcile(t *testing.T) { {cluster + "-01", "node-1", "10.120.0.0/24"}, {cluster + "-02", "node-2", "10.120.1.0/24"}, }, + expectedNetworkUnavailable: []bool{true, true}, + clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), }, // 2 nodes, no routes yet { nodes: []api.Node{ - {ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}}, - {ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}}, + node1, + node2, }, initialRoutes: []*cloudprovider.Route{}, expectedRoutes: []*cloudprovider.Route{ {cluster + "-01", "node-1", "10.120.0.0/24"}, {cluster + "-02", "node-2", "10.120.1.0/24"}, }, + expectedNetworkUnavailable: []bool{true, true}, + clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), }, // 2 nodes, a few too many routes { nodes: []api.Node{ - {ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}}, - {ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}}, + node1, + node2, }, initialRoutes: []*cloudprovider.Route{ {cluster + "-01", "node-1", "10.120.0.0/24"}, @@ -129,12 +143,14 @@ func TestReconcile(t *testing.T) { {cluster + "-01", "node-1", "10.120.0.0/24"}, {cluster + "-02", "node-2", "10.120.1.0/24"}, }, + expectedNetworkUnavailable: []bool{true, true}, + clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), }, // 2 nodes, 2 routes, but only 1 is right { nodes: []api.Node{ - {ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}}, - {ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}}, + node1, + node2, }, initialRoutes: []*cloudprovider.Route{ {cluster + "-01", "node-1", "10.120.0.0/24"}, @@ -144,17 +160,21 @@ func TestReconcile(t *testing.T) { {cluster + "-01", "node-1", "10.120.0.0/24"}, {cluster + "-02", "node-2", "10.120.1.0/24"}, }, + expectedNetworkUnavailable: []bool{true, true}, + clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}), }, // 2 nodes, one node without CIDR assigned. { nodes: []api.Node{ - {ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}}, - {ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: ""}}, + node1, + nodeNoCidr, }, initialRoutes: []*cloudprovider.Route{}, expectedRoutes: []*cloudprovider.Route{ {cluster + "-01", "node-1", "10.120.0.0/24"}, }, + expectedNetworkUnavailable: []bool{true, false}, + clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, nodeNoCidr}}), }, } for i, testCase := range testCases { @@ -170,10 +190,37 @@ func TestReconcile(t *testing.T) { t.Error("Error in test: fakecloud doesn't support Routes()") } _, cidr, _ := net.ParseCIDR("10.120.0.0/16") - rc := New(routes, nil, cluster, cidr) + rc := New(routes, testCase.clientset, cluster, cidr) if err := rc.reconcile(testCase.nodes, testCase.initialRoutes); err != nil { t.Errorf("%d. Error from rc.reconcile(): %v", i, err) } + for _, action := range testCase.clientset.Actions() { + if action.GetVerb() == "update" && action.GetResource().Resource == "nodes" { + node := action.(core.UpdateAction).GetObject().(*api.Node) + _, condition := api.GetNodeCondition(&node.Status, api.NodeNetworkUnavailable) + if condition == nil { + t.Errorf("%d. Missing NodeNetworkUnavailable condition for Node %v", i, node.Name) + } else { + check := func(index int) bool { + return (condition.Status == api.ConditionFalse) == testCase.expectedNetworkUnavailable[index] + } + index := -1 + for j := range testCase.nodes { + if testCase.nodes[j].Name == node.Name { + index = j + } + } + if index == -1 { + // Something's wrong + continue + } + if !check(index) { + t.Errorf("%d. Invalid NodeNetworkUnavailable condition for Node %v, expected %v, got %v", + i, node.Name, testCase.expectedNetworkUnavailable[index], (condition.Status == api.ConditionFalse)) + } + } + } + } var finalRoutes []*cloudprovider.Route var err error timeoutChan := time.After(200 * time.Millisecond) diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 83a6ae93f27..b15219166e7 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -1018,6 +1018,16 @@ func (kl *Kubelet) initialNodeStatus() (*api.Node, error) { Unschedulable: !kl.registerSchedulable, }, } + // Initially, set NodeNetworkUnavailable to true. + if kl.providerRequiresNetworkingConfiguration() { + node.Status.Conditions = append(node.Status.Conditions, api.NodeCondition{ + Type: api.NodeNetworkUnavailable, + Status: api.ConditionTrue, + Reason: "NoRouteCreated", + Message: "Node created without a route", + LastTransitionTime: unversioned.NewTime(kl.clock.Now()), + }) + } // @question: should this be place after the call to the cloud provider? which also applies labels for k, v := range kl.nodeLabels { @@ -1088,6 +1098,14 @@ func (kl *Kubelet) initialNodeStatus() (*api.Node, error) { return node, nil } +func (kl *Kubelet) providerRequiresNetworkingConfiguration() bool { + if kl.cloud == nil || kl.flannelExperimentalOverlay { + return false + } + _, supported := kl.cloud.Routes() + return supported +} + // registerWithApiserver registers the node with the cluster master. It is safe // to call multiple times, but not concurrently (kl.registrationCompleted is // not locked). diff --git a/plugin/pkg/scheduler/factory/factory.go b/plugin/pkg/scheduler/factory/factory.go index d4f03b1bc17..73054f1da26 100644 --- a/plugin/pkg/scheduler/factory/factory.go +++ b/plugin/pkg/scheduler/factory/factory.go @@ -437,14 +437,19 @@ func (f *ConfigFactory) responsibleForPod(pod *api.Pod) bool { func getNodeConditionPredicate() cache.NodeConditionPredicate { return func(node api.Node) bool { for _, cond := range node.Status.Conditions { - // We consider the node for scheduling only when its NodeReady condition status - // is ConditionTrue and its NodeOutOfDisk condition status is ConditionFalse. + // We consider the node for scheduling only when its: + // - NodeReady condition status is ConditionTrue, + // - NodeOutOfDisk condition status is ConditionFalse, + // - NodeNetworkUnavailable condition status is ConditionFalse. if cond.Type == api.NodeReady && cond.Status != api.ConditionTrue { glog.V(4).Infof("Ignoring node %v with %v condition status %v", node.Name, cond.Type, cond.Status) return false } else if cond.Type == api.NodeOutOfDisk && cond.Status != api.ConditionFalse { glog.V(4).Infof("Ignoring node %v with %v condition status %v", node.Name, cond.Type, cond.Status) return false + } else if cond.Type == api.NodeNetworkUnavailable && cond.Status != api.ConditionFalse { + glog.V(4).Infof("Ignoring node %v with %v condition status %v", node.Name, cond.Type, cond.Status) + return false } } return true