mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 11:50:44 +00:00
Node is NotReady until the Route is created
This commit is contained in:
parent
1cb088857b
commit
7bdf480340
@ -95,8 +95,25 @@ func GetPodReadyCondition(status PodStatus) *PodCondition {
|
||||
// GetPodCondition extracts the provided condition from the given status and returns that.
|
||||
// Returns nil and -1 if the condition is not present, and the the index of the located condition.
|
||||
func GetPodCondition(status *PodStatus, conditionType PodConditionType) (int, *PodCondition) {
|
||||
for i, c := range status.Conditions {
|
||||
if c.Type == conditionType {
|
||||
if status == nil {
|
||||
return -1, nil
|
||||
}
|
||||
for i := range status.Conditions {
|
||||
if status.Conditions[i].Type == conditionType {
|
||||
return i, &status.Conditions[i]
|
||||
}
|
||||
}
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
// GetNodeCondition extracts the provided condition from the given status and returns that.
|
||||
// Returns nil and -1 if the condition is not present, and the the index of the located condition.
|
||||
func GetNodeCondition(status *NodeStatus, conditionType NodeConditionType) (int, *NodeCondition) {
|
||||
if status == nil {
|
||||
return -1, nil
|
||||
}
|
||||
for i := range status.Conditions {
|
||||
if status.Conditions[i].Type == conditionType {
|
||||
return i, &status.Conditions[i]
|
||||
}
|
||||
}
|
||||
|
@ -2009,6 +2009,8 @@ const (
|
||||
NodeOutOfDisk NodeConditionType = "OutOfDisk"
|
||||
// NodeMemoryPressure means the kubelet is under pressure due to insufficient available memory.
|
||||
NodeMemoryPressure NodeConditionType = "MemoryPressure"
|
||||
// NodeNetworkingReady means that network for the node is correctly configured.
|
||||
NodeNetworkingReady NodeConditionType = "NetworkingReady"
|
||||
)
|
||||
|
||||
type NodeCondition struct {
|
||||
|
@ -2412,6 +2412,8 @@ const (
|
||||
NodeOutOfDisk NodeConditionType = "OutOfDisk"
|
||||
// NodeMemoryPressure means the kubelet is under pressure due to insufficient available memory.
|
||||
NodeMemoryPressure NodeConditionType = "MemoryPressure"
|
||||
// NodeNetworkingReady means that network for the node is correctly configured.
|
||||
NodeNetworkingReady NodeConditionType = "NetworkingReady"
|
||||
)
|
||||
|
||||
// NodeCondition contains condition infromation for a node.
|
||||
|
@ -412,20 +412,6 @@ func (nc *NodeController) recycleCIDR(obj interface{}) {
|
||||
}
|
||||
}
|
||||
|
||||
// getCondition returns a condition object for the specific condition
|
||||
// type, nil if the condition is not set.
|
||||
func (nc *NodeController) getCondition(status *api.NodeStatus, conditionType api.NodeConditionType) *api.NodeCondition {
|
||||
if status == nil {
|
||||
return nil
|
||||
}
|
||||
for i := range status.Conditions {
|
||||
if status.Conditions[i].Type == conditionType {
|
||||
return &status.Conditions[i]
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var gracefulDeletionVersion = version.MustParse("v1.1.0")
|
||||
|
||||
// maybeDeleteTerminatingPod non-gracefully deletes pods that are terminating
|
||||
@ -711,7 +697,7 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap
|
||||
var err error
|
||||
var gracePeriod time.Duration
|
||||
var observedReadyCondition api.NodeCondition
|
||||
currentReadyCondition := nc.getCondition(&node.Status, api.NodeReady)
|
||||
_, currentReadyCondition := api.GetNodeCondition(&node.Status, api.NodeReady)
|
||||
if currentReadyCondition == nil {
|
||||
// If ready condition is nil, then kubelet (or nodecontroller) never posted node status.
|
||||
// A fake ready condition is created, where LastProbeTime and LastTransitionTime is set
|
||||
@ -751,9 +737,9 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap
|
||||
// if that's the case, but it does not seem necessary.
|
||||
var savedCondition *api.NodeCondition
|
||||
if found {
|
||||
savedCondition = nc.getCondition(&savedNodeStatus.status, api.NodeReady)
|
||||
_, savedCondition = api.GetNodeCondition(&savedNodeStatus.status, api.NodeReady)
|
||||
}
|
||||
observedCondition := nc.getCondition(&node.Status, api.NodeReady)
|
||||
_, observedCondition := api.GetNodeCondition(&node.Status, api.NodeReady)
|
||||
if !found {
|
||||
glog.Warningf("Missing timestamp for Node %s. Assuming now as a timestamp.", node.Name)
|
||||
savedNodeStatus = nodeStatusData{
|
||||
@ -829,7 +815,7 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap
|
||||
// Like NodeReady condition, NodeOutOfDisk was last set longer ago than gracePeriod, so update
|
||||
// it to Unknown (regardless of its current value) in the master.
|
||||
// TODO(madhusudancs): Refactor this with readyCondition to remove duplicated code.
|
||||
oodCondition := nc.getCondition(&node.Status, api.NodeOutOfDisk)
|
||||
_, oodCondition := api.GetNodeCondition(&node.Status, api.NodeOutOfDisk)
|
||||
if oodCondition == nil {
|
||||
glog.V(2).Infof("Out of disk condition of node %v is never updated by kubelet", node.Name)
|
||||
node.Status.Conditions = append(node.Status.Conditions, api.NodeCondition{
|
||||
@ -851,7 +837,8 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap
|
||||
}
|
||||
}
|
||||
|
||||
if !api.Semantic.DeepEqual(nc.getCondition(&node.Status, api.NodeReady), &observedReadyCondition) {
|
||||
_, currentCondition := api.GetNodeCondition(&node.Status, api.NodeReady)
|
||||
if !api.Semantic.DeepEqual(currentCondition, &observedReadyCondition) {
|
||||
if _, err = nc.kubeClient.Core().Nodes().UpdateStatus(node); err != nil {
|
||||
glog.Errorf("Error updating node %s: %v", node.Name, err)
|
||||
return gracePeriod, observedReadyCondition, currentReadyCondition, err
|
||||
|
@ -24,6 +24,7 @@ import (
|
||||
|
||||
"github.com/golang/glog"
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
"k8s.io/kubernetes/pkg/api/unversioned"
|
||||
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
|
||||
"k8s.io/kubernetes/pkg/cloudprovider"
|
||||
"k8s.io/kubernetes/pkg/util/metrics"
|
||||
@ -36,6 +37,7 @@ const (
|
||||
maxConcurrentRouteCreations int = 200
|
||||
// Maximum number of retries of route creations.
|
||||
maxRetries int = 5
|
||||
updateNodeStatusMaxRetries = 3
|
||||
)
|
||||
|
||||
type RouteController struct {
|
||||
@ -84,6 +86,22 @@ func (rc *RouteController) reconcileNodeRoutes() error {
|
||||
return rc.reconcile(nodeList.Items, routeList)
|
||||
}
|
||||
|
||||
func tryUpdateNodeStatus(node *api.Node, kubeClient clientset.Interface) error {
|
||||
for i := 0; i < updateNodeStatusMaxRetries; i++ {
|
||||
if _, err := kubeClient.Core().Nodes().UpdateStatus(node); err == nil {
|
||||
break
|
||||
} else {
|
||||
if i+1 < updateNodeStatusMaxRetries {
|
||||
glog.Errorf("Error updating node %s - will retry: %v", node.Name, err)
|
||||
} else {
|
||||
glog.Errorf("Error updating node %s - wont retry: %v", node.Name, err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rc *RouteController) reconcile(nodes []api.Node, routes []*cloudprovider.Route) error {
|
||||
// nodeCIDRs maps nodeName->nodeCIDR
|
||||
nodeCIDRs := make(map[string]string)
|
||||
@ -111,24 +129,6 @@ func (rc *RouteController) reconcile(nodes []api.Node, routes []*cloudprovider.R
|
||||
}
|
||||
nameHint := string(node.UID)
|
||||
wg.Add(1)
|
||||
glog.Infof("Creating route for node %s %s with hint %s", node.Name, route.DestinationCIDR, nameHint)
|
||||
go func(nodeName string, nameHint string, route *cloudprovider.Route) {
|
||||
defer wg.Done()
|
||||
for i := 0; i < maxRetries; i++ {
|
||||
startTime := time.Now()
|
||||
// Ensure that we don't have more than maxConcurrentRouteCreations
|
||||
// CreateRoute calls in flight.
|
||||
rateLimiter <- struct{}{}
|
||||
err := rc.routes.CreateRoute(rc.clusterName, nameHint, route)
|
||||
<-rateLimiter
|
||||
if err != nil {
|
||||
glog.Errorf("Could not create route %s %s for node %s after %v: %v", nameHint, route.DestinationCIDR, nodeName, time.Now().Sub(startTime), err)
|
||||
} else {
|
||||
glog.Infof("Created route for node %s %s with hint %s after %v", nodeName, route.DestinationCIDR, nameHint, time.Now().Sub(startTime))
|
||||
return
|
||||
}
|
||||
}
|
||||
}(node.Name, nameHint, route)
|
||||
}
|
||||
nodeCIDRs[node.Name] = node.Spec.PodCIDR
|
||||
}
|
||||
@ -138,12 +138,12 @@ func (rc *RouteController) reconcile(nodes []api.Node, routes []*cloudprovider.R
|
||||
if nodeCIDRs[route.TargetInstance] != route.DestinationCIDR {
|
||||
wg.Add(1)
|
||||
// Delete the route.
|
||||
glog.Infof("Deleting route %s %s", route.Name, route.DestinationCIDR)
|
||||
glog.V(2).Infof("Deleting route %s %s", route.Name, route.DestinationCIDR)
|
||||
go func(route *cloudprovider.Route, startTime time.Time) {
|
||||
if err := rc.routes.DeleteRoute(rc.clusterName, route); err != nil {
|
||||
glog.Errorf("Could not delete route %s %s after %v: %v", route.Name, route.DestinationCIDR, time.Now().Sub(startTime), err)
|
||||
} else {
|
||||
glog.Infof("Deleted route %s %s after %v", route.Name, route.DestinationCIDR, time.Now().Sub(startTime))
|
||||
glog.V(2).Infof("Deleted route %s %s after %v", route.Name, route.DestinationCIDR, time.Now().Sub(startTime))
|
||||
}
|
||||
wg.Done()
|
||||
|
||||
|
@ -22,6 +22,7 @@ import (
|
||||
"time"
|
||||
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
"k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/fake"
|
||||
"k8s.io/kubernetes/pkg/cloudprovider"
|
||||
fakecloud "k8s.io/kubernetes/pkg/cloudprovider/providers/fake"
|
||||
)
|
||||
@ -67,16 +68,22 @@ func TestIsResponsibleForRoute(t *testing.T) {
|
||||
|
||||
func TestReconcile(t *testing.T) {
|
||||
cluster := "my-k8s"
|
||||
node1 := api.Node{ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}}
|
||||
node2 := api.Node{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}}
|
||||
nodeNoCidr := api.Node{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: ""}}
|
||||
|
||||
testCases := []struct {
|
||||
nodes []api.Node
|
||||
initialRoutes []*cloudprovider.Route
|
||||
expectedRoutes []*cloudprovider.Route
|
||||
nodes []api.Node
|
||||
initialRoutes []*cloudprovider.Route
|
||||
expectedRoutes []*cloudprovider.Route
|
||||
expectedNetworkReady []bool
|
||||
clientset *fake.Clientset
|
||||
}{
|
||||
// 2 nodes, routes already there
|
||||
{
|
||||
nodes: []api.Node{
|
||||
{ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}},
|
||||
{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}},
|
||||
node1,
|
||||
node2,
|
||||
},
|
||||
initialRoutes: []*cloudprovider.Route{
|
||||
{cluster + "-01", "node-1", "10.120.0.0/24"},
|
||||
@ -86,12 +93,14 @@ func TestReconcile(t *testing.T) {
|
||||
{cluster + "-01", "node-1", "10.120.0.0/24"},
|
||||
{cluster + "-02", "node-2", "10.120.1.0/24"},
|
||||
},
|
||||
expectedNetworkReady: []bool{true, true},
|
||||
clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}),
|
||||
},
|
||||
// 2 nodes, one route already there
|
||||
{
|
||||
nodes: []api.Node{
|
||||
{ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}},
|
||||
{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}},
|
||||
node1,
|
||||
node2,
|
||||
},
|
||||
initialRoutes: []*cloudprovider.Route{
|
||||
{cluster + "-01", "node-1", "10.120.0.0/24"},
|
||||
@ -100,24 +109,28 @@ func TestReconcile(t *testing.T) {
|
||||
{cluster + "-01", "node-1", "10.120.0.0/24"},
|
||||
{cluster + "-02", "node-2", "10.120.1.0/24"},
|
||||
},
|
||||
expectedNetworkReady: []bool{true, true},
|
||||
clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}),
|
||||
},
|
||||
// 2 nodes, no routes yet
|
||||
{
|
||||
nodes: []api.Node{
|
||||
{ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}},
|
||||
{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}},
|
||||
node1,
|
||||
node2,
|
||||
},
|
||||
initialRoutes: []*cloudprovider.Route{},
|
||||
expectedRoutes: []*cloudprovider.Route{
|
||||
{cluster + "-01", "node-1", "10.120.0.0/24"},
|
||||
{cluster + "-02", "node-2", "10.120.1.0/24"},
|
||||
},
|
||||
expectedNetworkReady: []bool{true, true},
|
||||
clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}),
|
||||
},
|
||||
// 2 nodes, a few too many routes
|
||||
{
|
||||
nodes: []api.Node{
|
||||
{ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}},
|
||||
{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}},
|
||||
node1,
|
||||
node2,
|
||||
},
|
||||
initialRoutes: []*cloudprovider.Route{
|
||||
{cluster + "-01", "node-1", "10.120.0.0/24"},
|
||||
@ -129,12 +142,14 @@ func TestReconcile(t *testing.T) {
|
||||
{cluster + "-01", "node-1", "10.120.0.0/24"},
|
||||
{cluster + "-02", "node-2", "10.120.1.0/24"},
|
||||
},
|
||||
expectedNetworkReady: []bool{true, true},
|
||||
clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}),
|
||||
},
|
||||
// 2 nodes, 2 routes, but only 1 is right
|
||||
{
|
||||
nodes: []api.Node{
|
||||
{ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}},
|
||||
{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}},
|
||||
node1,
|
||||
node2,
|
||||
},
|
||||
initialRoutes: []*cloudprovider.Route{
|
||||
{cluster + "-01", "node-1", "10.120.0.0/24"},
|
||||
@ -144,17 +159,21 @@ func TestReconcile(t *testing.T) {
|
||||
{cluster + "-01", "node-1", "10.120.0.0/24"},
|
||||
{cluster + "-02", "node-2", "10.120.1.0/24"},
|
||||
},
|
||||
expectedNetworkReady: []bool{true, true},
|
||||
clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}),
|
||||
},
|
||||
// 2 nodes, one node without CIDR assigned.
|
||||
{
|
||||
nodes: []api.Node{
|
||||
{ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}},
|
||||
{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: ""}},
|
||||
node1,
|
||||
nodeNoCidr,
|
||||
},
|
||||
initialRoutes: []*cloudprovider.Route{},
|
||||
expectedRoutes: []*cloudprovider.Route{
|
||||
{cluster + "-01", "node-1", "10.120.0.0/24"},
|
||||
},
|
||||
expectedNetworkReady: []bool{true, false},
|
||||
clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, nodeNoCidr}}),
|
||||
},
|
||||
}
|
||||
for i, testCase := range testCases {
|
||||
@ -170,10 +189,36 @@ func TestReconcile(t *testing.T) {
|
||||
t.Error("Error in test: fakecloud doesn't support Routes()")
|
||||
}
|
||||
_, cidr, _ := net.ParseCIDR("10.120.0.0/16")
|
||||
rc := New(routes, nil, cluster, cidr)
|
||||
rc := New(routes, testCase.clientset, cluster, cidr)
|
||||
if err := rc.reconcile(testCase.nodes, testCase.initialRoutes); err != nil {
|
||||
t.Errorf("%d. Error from rc.reconcile(): %v", i, err)
|
||||
}
|
||||
for _, action := range testCase.clientset.Actions() {
|
||||
if action.GetVerb() == "update" && action.GetResource().Resource == "nodes" {
|
||||
node := action.GetObject().(*api.Node)
|
||||
_, condition := api.GetNodeCondition(&node.Status, api.NodeNetworkingReady)
|
||||
if condition == nil {
|
||||
t.Errorf("%d. Missing NodeNetworkingReady condition for Node %v", i, node.Name)
|
||||
} else {
|
||||
check := func(index int) bool {
|
||||
return (condition.Status == api.ConditionTrue) == testCase.expectedNetworkReady[index]
|
||||
}
|
||||
var index int
|
||||
if node.Name == node1.Name {
|
||||
index = 0
|
||||
} else if node.Name == node1.Name {
|
||||
index = 1
|
||||
} else {
|
||||
// Something's wrong
|
||||
continue
|
||||
}
|
||||
if !check(index) {
|
||||
t.Errorf("%d. Invalid NodeNetworkingReady condition for Node %v, expected %v, got %v",
|
||||
i, node.Name, (condition.Status == api.ConditionTrue), testCase.expectedNetworkReady[index])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
var finalRoutes []*cloudprovider.Route
|
||||
var err error
|
||||
timeoutChan := time.After(200 * time.Millisecond)
|
||||
|
@ -46,6 +46,7 @@ import (
|
||||
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
|
||||
"k8s.io/kubernetes/pkg/client/record"
|
||||
"k8s.io/kubernetes/pkg/cloudprovider"
|
||||
"k8s.io/kubernetes/pkg/cloudprovider/providers/gce"
|
||||
"k8s.io/kubernetes/pkg/fieldpath"
|
||||
"k8s.io/kubernetes/pkg/fields"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cadvisor"
|
||||
@ -3163,12 +3164,23 @@ func (kl *Kubelet) setNodeReadyCondition(node *api.Node) {
|
||||
currentTime := unversioned.NewTime(kl.clock.Now())
|
||||
var newNodeReadyCondition api.NodeCondition
|
||||
if rs := kl.runtimeState.errors(); len(rs) == 0 {
|
||||
newNodeReadyCondition = api.NodeCondition{
|
||||
Type: api.NodeReady,
|
||||
Status: api.ConditionTrue,
|
||||
Reason: "KubeletReady",
|
||||
Message: "kubelet is posting ready status",
|
||||
LastHeartbeatTime: currentTime,
|
||||
_, networkingCondition := api.GetNodeCondition(&node.Status, api.NodeNetworkingReady)
|
||||
if (kl.cloud.ProviderName() == gce.ProviderName) && (networkingCondition == nil || networkingCondition.Status != api.ConditionTrue) {
|
||||
newNodeReadyCondition = api.NodeCondition{
|
||||
Type: api.NodeReady,
|
||||
Status: api.ConditionFalse,
|
||||
Reason: "KubeletNotReady",
|
||||
Message: "networking is not ready",
|
||||
LastHeartbeatTime: currentTime,
|
||||
}
|
||||
} else {
|
||||
newNodeReadyCondition = api.NodeCondition{
|
||||
Type: api.NodeReady,
|
||||
Status: api.ConditionTrue,
|
||||
Reason: "KubeletReady",
|
||||
Message: "kubelet is posting ready status",
|
||||
LastHeartbeatTime: currentTime,
|
||||
}
|
||||
}
|
||||
} else {
|
||||
newNodeReadyCondition = api.NodeCondition{
|
||||
|
Loading…
Reference in New Issue
Block a user