diff --git a/cmd/integration/integration.go b/cmd/integration/integration.go index 11feb2511b6..6c837994ca7 100644 --- a/cmd/integration/integration.go +++ b/cmd/integration/integration.go @@ -224,7 +224,7 @@ func startComponents(firstManifestURL, secondManifestURL, apiVersion string) (st }} nodeController := nodeControllerPkg.NewNodeController(nil, "", machineList, nodeResources, cl, fakeKubeletClient{}, 10, 5*time.Minute, util.NewFakeRateLimiter(), 40*time.Second, 60*time.Second, 5*time.Second) - nodeController.Run(5*time.Second, true, false) + nodeController.Run(5*time.Second, true) cadvisorInterface := new(cadvisor.Fake) // Kubelet (localhost) diff --git a/cmd/kube-controller-manager/app/controllermanager.go b/cmd/kube-controller-manager/app/controllermanager.go index 0943e676897..b81afcd9ed0 100644 --- a/cmd/kube-controller-manager/app/controllermanager.go +++ b/cmd/kube-controller-manager/app/controllermanager.go @@ -86,7 +86,6 @@ func NewCMServer() *CMServer { NodeMilliCPU: 1000, NodeMemory: resource.MustParse("3Gi"), SyncNodeList: true, - SyncNodeStatus: false, KubeletConfig: client.KubeletConfig{ Port: ports.KubeletPort, EnableHttps: true, @@ -116,13 +115,16 @@ func (s *CMServer) AddFlags(fs *pflag.FlagSet) { "The number of retries for initial node registration. Retry interval equals node_sync_period.") fs.Var(&s.MachineList, "machines", "List of machines to schedule onto, comma separated.") fs.BoolVar(&s.SyncNodeList, "sync_nodes", s.SyncNodeList, "If true, and --cloud_provider is specified, sync nodes from the cloud provider. Default true.") - fs.BoolVar(&s.SyncNodeStatus, "sync_node_status", s.SyncNodeStatus, ""+ - "If true, node controller sends probes to kubelet and updates NodeStatus."+ - "If false, Kubelet posts NodeStatus to API server.") - fs.DurationVar(&s.NodeMonitorGracePeriod, "node_monitor_grace_period", 40*time.Second, "Amount of time which we allow running Node to be unresponsive before marking it unhealty."+ - "Must be N times more than kubelet's nodeStatusUpdateFrequency, where N means number of retries allowed for kubelet to post node status.") - fs.DurationVar(&s.NodeStartupGracePeriod, "node_startup_grace_period", 60*time.Second, "Amount of time which we allow starting Node to be unresponsive before marking it unhealty.") - fs.DurationVar(&s.NodeMonitorPeriod, "node_monitor_period", 5*time.Second, "The period for syncing NodeStatus in NodeController.") + fs.BoolVar(&s.SyncNodeStatus, "sync_node_status", s.SyncNodeStatus, + "DEPRECATED. Does not have any effect now and it will be removed in a later release.") + fs.DurationVar(&s.NodeMonitorGracePeriod, "node_monitor_grace_period", 40*time.Second, + "Amount of time which we allow running Node to be unresponsive before marking it unhealty. "+ + "Must be N times more than kubelet's nodeStatusUpdateFrequency, "+ + "where N means number of retries allowed for kubelet to post node status.") + fs.DurationVar(&s.NodeStartupGracePeriod, "node_startup_grace_period", 60*time.Second, + "Amount of time which we allow starting Node to be unresponsive before marking it unhealty.") + fs.DurationVar(&s.NodeMonitorPeriod, "node_monitor_period", 5*time.Second, + "The period for syncing NodeStatus in NodeController.") // TODO: Discover these by pinging the host machines, and rip out these flags. // TODO: in the meantime, use resource.QuantityFlag() instead of these fs.Int64Var(&s.NodeMilliCPU, "node_milli_cpu", s.NodeMilliCPU, "The amount of MilliCPU provisioned on each node") @@ -188,10 +190,14 @@ func (s *CMServer) Run(_ []string) error { }, } + if s.SyncNodeStatus { + glog.Warning("DEPRECATION NOTICE: sync_node_status flag is being deprecated. It has no effect now and it will be removed in a future version.") + } + nodeController := nodeControllerPkg.NewNodeController(cloud, s.MinionRegexp, s.MachineList, nodeResources, kubeClient, kubeletClient, s.RegisterRetryCount, s.PodEvictionTimeout, util.NewTokenBucketRateLimiter(s.DeletingPodsQps, s.DeletingPodsBurst), s.NodeMonitorGracePeriod, s.NodeStartupGracePeriod, s.NodeMonitorPeriod) - nodeController.Run(s.NodeSyncPeriod, s.SyncNodeList, s.SyncNodeStatus) + nodeController.Run(s.NodeSyncPeriod, s.SyncNodeList) resourceQuotaManager := resourcequota.NewResourceQuotaManager(kubeClient) resourceQuotaManager.Run(s.ResourceQuotaSyncPeriod) diff --git a/cmd/kubernetes/kubernetes.go b/cmd/kubernetes/kubernetes.go index 1ede9758d4d..f32e253db93 100644 --- a/cmd/kubernetes/kubernetes.go +++ b/cmd/kubernetes/kubernetes.go @@ -132,7 +132,7 @@ func runControllerManager(machineList []string, cl *client.Client, nodeMilliCPU, nodeController := nodeControllerPkg.NewNodeController( nil, "", machineList, nodeResources, cl, kubeClient, 10, 5*time.Minute, util.NewTokenBucketRateLimiter(*deletingPodsQps, *deletingPodsBurst), 40*time.Second, 60*time.Second, 5*time.Second) - nodeController.Run(10*time.Second, true, true) + nodeController.Run(10*time.Second, true) endpoints := service.NewEndpointController(cl) go util.Forever(func() { endpoints.SyncServiceEndpoints() }, time.Second*10) diff --git a/pkg/api/types.go b/pkg/api/types.go index 29924f7c373..cdc2ca5bed6 100644 --- a/pkg/api/types.go +++ b/pkg/api/types.go @@ -1113,7 +1113,7 @@ const ( type NodeCondition struct { Type NodeConditionType `json:"type"` Status ConditionStatus `json:"status"` - LastProbeTime util.Time `json:"lastProbeTime,omitempty"` + LastHeartbeatTime util.Time `json:"lastHeartbeatTime,omitempty"` LastTransitionTime util.Time `json:"lastTransitionTime,omitempty"` Reason string `json:"reason,omitempty"` Message string `json:"message,omitempty"` diff --git a/pkg/api/v1beta1/conversion.go b/pkg/api/v1beta1/conversion.go index f7f20323375..f3860947094 100644 --- a/pkg/api/v1beta1/conversion.go +++ b/pkg/api/v1beta1/conversion.go @@ -1369,7 +1369,7 @@ func init() { if err := s.Convert(&in.Status, &out.Status, 0); err != nil { return err } - if err := s.Convert(&in.LastProbeTime, &out.LastProbeTime, 0); err != nil { + if err := s.Convert(&in.LastHeartbeatTime, &out.LastProbeTime, 0); err != nil { return err } if err := s.Convert(&in.LastTransitionTime, &out.LastTransitionTime, 0); err != nil { @@ -1390,7 +1390,7 @@ func init() { if err := s.Convert(&in.Status, &out.Status, 0); err != nil { return err } - if err := s.Convert(&in.LastProbeTime, &out.LastProbeTime, 0); err != nil { + if err := s.Convert(&in.LastProbeTime, &out.LastHeartbeatTime, 0); err != nil { return err } if err := s.Convert(&in.LastTransitionTime, &out.LastTransitionTime, 0); err != nil { diff --git a/pkg/api/v1beta2/conversion.go b/pkg/api/v1beta2/conversion.go index 0faad5d5a9b..92d74ebfc20 100644 --- a/pkg/api/v1beta2/conversion.go +++ b/pkg/api/v1beta2/conversion.go @@ -1296,7 +1296,7 @@ func init() { if err := s.Convert(&in.Status, &out.Status, 0); err != nil { return err } - if err := s.Convert(&in.LastProbeTime, &out.LastProbeTime, 0); err != nil { + if err := s.Convert(&in.LastHeartbeatTime, &out.LastProbeTime, 0); err != nil { return err } if err := s.Convert(&in.LastTransitionTime, &out.LastTransitionTime, 0); err != nil { @@ -1317,7 +1317,7 @@ func init() { if err := s.Convert(&in.Status, &out.Status, 0); err != nil { return err } - if err := s.Convert(&in.LastProbeTime, &out.LastProbeTime, 0); err != nil { + if err := s.Convert(&in.LastProbeTime, &out.LastHeartbeatTime, 0); err != nil { return err } if err := s.Convert(&in.LastTransitionTime, &out.LastTransitionTime, 0); err != nil { diff --git a/pkg/api/v1beta3/types.go b/pkg/api/v1beta3/types.go index 3ffc693b81c..1eaba565c0f 100644 --- a/pkg/api/v1beta3/types.go +++ b/pkg/api/v1beta3/types.go @@ -1110,7 +1110,7 @@ const ( type NodeCondition struct { Type NodeConditionType `json:"type" description:"type of node condition, one of Ready, Schedulable"` Status ConditionStatus `json:"status" description:"status of the condition, one of Full, None, Unknown"` - LastProbeTime util.Time `json:"lastProbeTime,omitempty" description:"last time the condition was probed"` + LastHeartbeatTime util.Time `json:"lastHeartbeatTime,omitempty" description:"last time we got an update on a given condition"` LastTransitionTime util.Time `json:"lastTransitionTime,omitempty" description:"last time the condition transit from one status to another"` Reason string `json:"reason,omitempty" description:"(brief) reason for the condition's last transition"` Message string `json:"message,omitempty" description:"human readable message indicating details about last transition"` diff --git a/pkg/cloudprovider/controller/nodecontroller.go b/pkg/cloudprovider/controller/nodecontroller.go index 321dfad3d5b..9213c131094 100644 --- a/pkg/cloudprovider/controller/nodecontroller.go +++ b/pkg/cloudprovider/controller/nodecontroller.go @@ -21,7 +21,6 @@ import ( "fmt" "net" "strings" - "sync" "time" "github.com/GoogleCloudPlatform/kubernetes/pkg/api" @@ -29,7 +28,6 @@ import ( "github.com/GoogleCloudPlatform/kubernetes/pkg/client" "github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider" "github.com/GoogleCloudPlatform/kubernetes/pkg/labels" - "github.com/GoogleCloudPlatform/kubernetes/pkg/probe" "github.com/GoogleCloudPlatform/kubernetes/pkg/util" "github.com/golang/glog" ) @@ -132,12 +130,9 @@ func NewNodeController( // node addresses. // 2. SyncCloudNodes() is called periodically (if enabled) to sync instances from cloudprovider. // Node created here will only have specs. -// 3. Depending on how k8s is configured, there are two ways of syncing the node status: -// 3.1 SyncProbedNodeStatus() is called periodically to trigger master to probe kubelet, -// and incorporate the resulting node status. -// 3.2 MonitorNodeStatus() is called periodically to incorporate the results of node status -// pushed from kubelet to master. -func (nc *NodeController) Run(period time.Duration, syncNodeList, syncNodeStatus bool) { +// 3. MonitorNodeStatus() is called periodically to incorporate the results of node status +// pushed from kubelet to master. +func (nc *NodeController) Run(period time.Duration, syncNodeList bool) { // Register intial set of nodes with their status set. var nodes *api.NodeList var err error @@ -170,20 +165,12 @@ func (nc *NodeController) Run(period time.Duration, syncNodeList, syncNodeStatus }, period) } - // Start syncing or monitoring node status. - if syncNodeStatus { - go util.Forever(func() { - if err := nc.SyncProbedNodeStatus(); err != nil { - glog.Errorf("Error syncing status: %v", err) - } - }, period) - } else { - go util.Forever(func() { - if err := nc.MonitorNodeStatus(); err != nil { - glog.Errorf("Error monitoring node status: %v", err) - } - }, nc.nodeMonitorPeriod) - } + // Start monitoring node status. + go util.Forever(func() { + if err = nc.MonitorNodeStatus(); err != nil { + glog.Errorf("Error monitoring node status: %v", err) + } + }, nc.nodeMonitorPeriod) } // RegisterNodes registers the given list of nodes, it keeps retrying for `retryCount` times. @@ -272,121 +259,6 @@ func (nc *NodeController) SyncCloudNodes() error { return nil } -// SyncProbedNodeStatus synchronizes cluster nodes status to master server. -func (nc *NodeController) SyncProbedNodeStatus() error { - nodes, err := nc.kubeClient.Nodes().List() - if err != nil { - return err - } - nodes, err = nc.PopulateNodesStatus(nodes) - if err != nil { - return err - } - for _, node := range nodes.Items { - // We used to skip updating node when node status doesn't change, this is no longer - // useful after we introduce per-probe status field, e.g. 'LastProbeTime', which will - // differ in every call of the sync loop. - glog.V(2).Infof("updating node %v", node.Name) - _, err = nc.kubeClient.Nodes().Update(&node) - if err != nil { - glog.Errorf("error updating node %s: %v", node.Name, err) - } - } - return nil -} - -// PopulateNodesStatus populates node status for given list of nodes. -func (nc *NodeController) PopulateNodesStatus(nodes *api.NodeList) (*api.NodeList, error) { - var wg sync.WaitGroup - wg.Add(len(nodes.Items)) - for i := range nodes.Items { - go func(node *api.Node) { - node.Status.Conditions = nc.DoCheck(node) - if err := nc.populateNodeInfo(node); err != nil { - glog.Errorf("Can't collect information for node %s: %v", node.Name, err) - } - wg.Done() - }(&nodes.Items[i]) - } - wg.Wait() - return nc.PopulateAddresses(nodes) -} - -// populateNodeInfo gets node info from kubelet and update the node. -func (nc *NodeController) populateNodeInfo(node *api.Node) error { - nodeInfo, err := nc.kubeletClient.GetNodeInfo(node.Name) - if err != nil { - return err - } - for key, value := range nodeInfo.Capacity { - node.Status.Capacity[key] = value - } - node.Status.NodeInfo = nodeInfo.NodeSystemInfo - return nil -} - -// DoCheck performs various condition checks for given node. -func (nc *NodeController) DoCheck(node *api.Node) []api.NodeCondition { - var conditions []api.NodeCondition - - // Check Condition: NodeReady. TODO: More node conditions. - oldReadyCondition := nc.getCondition(&node.Status, api.NodeReady) - newReadyCondition := nc.checkNodeReady(node) - nc.updateLastTransitionTime(oldReadyCondition, newReadyCondition) - if newReadyCondition.Status != api.ConditionTrue { - // Node is not ready for this probe, we need to check if pods need to be deleted. - if newReadyCondition.LastProbeTime.After(newReadyCondition.LastTransitionTime.Add(nc.podEvictionTimeout)) { - // As long as the node fails, we call delete pods to delete all pods. Node controller sync - // is not a closed loop process, there is no feedback from other components regarding pod - // status. Keep listing pods to sanity check if pods are all deleted makes more sense. - nc.deletePods(node.Name) - } - } - conditions = append(conditions, *newReadyCondition) - - return conditions -} - -// updateLastTransitionTime updates LastTransitionTime for the newCondition based on oldCondition. -func (nc *NodeController) updateLastTransitionTime(oldCondition, newCondition *api.NodeCondition) { - if oldCondition != nil && oldCondition.Status == newCondition.Status { - // If node status doesn't change, transition time is same as last time. - newCondition.LastTransitionTime = oldCondition.LastTransitionTime - } else { - // Set transition time to Now() if node status changes or `oldCondition` is nil, which - // happens only when the node is checked for the first time. - newCondition.LastTransitionTime = nc.now() - } -} - -// checkNodeReady checks raw node ready condition, without transition timestamp set. -func (nc *NodeController) checkNodeReady(node *api.Node) *api.NodeCondition { - switch status, err := nc.kubeletClient.HealthCheck(node.Name); { - case err != nil: - glog.V(2).Infof("NodeController: node %s health check error: %v", node.Name, err) - return &api.NodeCondition{ - Type: api.NodeReady, - Status: api.ConditionUnknown, - Reason: fmt.Sprintf("Node health check error: %v", err), - LastProbeTime: nc.now(), - } - case status == probe.Failure: - return &api.NodeCondition{ - Type: api.NodeReady, - Status: api.ConditionFalse, - Reason: fmt.Sprintf("Node health check failed: kubelet /healthz endpoint returns not ok"), - LastProbeTime: nc.now(), - } - default: - return &api.NodeCondition{ - Type: api.NodeReady, - Status: api.ConditionTrue, - Reason: fmt.Sprintf("Node health check succeeded: kubelet /healthz endpoint returns ok"), - LastProbeTime: nc.now(), - } - } -} - // PopulateAddresses queries Address for given list of nodes. func (nc *NodeController) PopulateAddresses(nodes *api.NodeList) (*api.NodeList, error) { if nc.isRunningCloudProvider() { @@ -440,7 +312,7 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap lastReadyCondition = api.NodeCondition{ Type: api.NodeReady, Status: api.ConditionUnknown, - LastProbeTime: node.CreationTimestamp, + LastHeartbeatTime: node.CreationTimestamp, LastTransitionTime: node.CreationTimestamp, } gracePeriod = nc.nodeStartupGracePeriod @@ -497,7 +369,7 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap readyTransitionTimestamp: nc.now(), } nc.nodeStatusMap[node.Name] = savedNodeStatus - } else if savedCondition != nil && observedCondition != nil && savedCondition.LastProbeTime != observedCondition.LastProbeTime { + } else if savedCondition != nil && observedCondition != nil && savedCondition.LastHeartbeatTime != observedCondition.LastHeartbeatTime { var transitionTime util.Time // If ReadyCondition changed since the last time we checked, we update the transition timestamp to "now", // otherwise we leave it as it is. @@ -526,7 +398,7 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap Type: api.NodeReady, Status: api.ConditionUnknown, Reason: fmt.Sprintf("Kubelet never posted node status."), - LastProbeTime: node.CreationTimestamp, + LastHeartbeatTime: node.CreationTimestamp, LastTransitionTime: nc.now(), }) } else { @@ -536,7 +408,7 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap readyCondition.Status = api.ConditionUnknown readyCondition.Reason = fmt.Sprintf("Kubelet stopped posting node status.") // LastProbeTime is the last time we heard from kubelet. - readyCondition.LastProbeTime = lastReadyCondition.LastProbeTime + readyCondition.LastHeartbeatTime = lastReadyCondition.LastHeartbeatTime readyCondition.LastTransitionTime = nc.now() } } diff --git a/pkg/cloudprovider/controller/nodecontroller_test.go b/pkg/cloudprovider/controller/nodecontroller_test.go index 06dfc5a7dfe..36e460a72be 100644 --- a/pkg/cloudprovider/controller/nodecontroller_test.go +++ b/pkg/cloudprovider/controller/nodecontroller_test.go @@ -19,7 +19,6 @@ package controller import ( "errors" "fmt" - "net" "net/http" "reflect" "sort" @@ -614,79 +613,6 @@ func TestSyncCloudNodesEvictPods(t *testing.T) { } } -func TestNodeConditionsCheck(t *testing.T) { - fakeNow := util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC) - table := []struct { - node *api.Node - fakeKubeletClient *FakeKubeletClient - expectedConditions []api.NodeCondition - }{ - { - // Node with default spec and kubelet /healthz probe returns success. - // Expected node condition to be ready and marked schedulable. - node: newNode("node0"), - fakeKubeletClient: &FakeKubeletClient{ - Status: probe.Success, - Err: nil, - }, - expectedConditions: []api.NodeCondition{ - { - Type: api.NodeReady, - Status: api.ConditionTrue, - Reason: "Node health check succeeded: kubelet /healthz endpoint returns ok", - LastProbeTime: fakeNow, - LastTransitionTime: fakeNow, - }, - }, - }, - { - // User specified node as schedulable and kubelet /healthz probe returns failure with no error. - // Expected node condition to be not ready and marked schedulable. - node: &api.Node{ObjectMeta: api.ObjectMeta{Name: "node0"}, Spec: api.NodeSpec{Unschedulable: false}}, - fakeKubeletClient: &FakeKubeletClient{ - Status: probe.Failure, - Err: nil, - }, - expectedConditions: []api.NodeCondition{ - { - Type: api.NodeReady, - Status: api.ConditionFalse, - Reason: "Node health check failed: kubelet /healthz endpoint returns not ok", - LastProbeTime: fakeNow, - LastTransitionTime: fakeNow, - }, - }, - }, - { - // Expected node condition to be not ready as marking Node Unschedulable does not impact Readiness. - node: &api.Node{ObjectMeta: api.ObjectMeta{Name: "node0"}, Spec: api.NodeSpec{Unschedulable: true}}, - fakeKubeletClient: &FakeKubeletClient{ - Status: probe.Failure, - Err: errors.New("Error"), - }, - expectedConditions: []api.NodeCondition{ - { - Type: api.NodeReady, - Status: api.ConditionUnknown, - Reason: "Node health check error: Error", - LastProbeTime: fakeNow, - LastTransitionTime: fakeNow, - }, - }, - }, - } - - for _, item := range table { - nodeController := NewNodeController(nil, "", nil, nil, nil, item.fakeKubeletClient, 10, time.Minute, - util.NewFakeRateLimiter(), testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod) - nodeController.now = func() util.Time { return fakeNow } - conditions := nodeController.DoCheck(item.node) - if !reflect.DeepEqual(item.expectedConditions, conditions) { - t.Errorf("expected conditions %+v, got %+v", item.expectedConditions, conditions) - } - } -} - func TestPopulateNodeAddresses(t *testing.T) { table := []struct { nodes *api.NodeList @@ -724,334 +650,6 @@ func TestPopulateNodeAddresses(t *testing.T) { } } -func TestSyncProbedNodeStatus(t *testing.T) { - fakeNow := util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC) - table := []struct { - fakeNodeHandler *FakeNodeHandler - fakeKubeletClient *FakeKubeletClient - fakeCloud *fake_cloud.FakeCloud - expectedNodes []*api.Node - expectedRequestCount int - }{ - { - fakeNodeHandler: &FakeNodeHandler{ - Existing: []*api.Node{newNode("node0"), newNode("node1")}, - }, - fakeKubeletClient: &FakeKubeletClient{ - Status: probe.Success, - Err: nil, - }, - fakeCloud: &fake_cloud.FakeCloud{ - Addresses: []api.NodeAddress{{Type: api.NodeLegacyHostIP, Address: "1.2.3.4"}}, - }, - expectedNodes: []*api.Node{ - { - ObjectMeta: api.ObjectMeta{Name: "node0"}, - Status: api.NodeStatus{ - Conditions: []api.NodeCondition{ - { - Type: api.NodeReady, - Status: api.ConditionTrue, - Reason: "Node health check succeeded: kubelet /healthz endpoint returns ok", - LastProbeTime: fakeNow, - LastTransitionTime: fakeNow, - }, - }, - Addresses: []api.NodeAddress{ - {Type: api.NodeLegacyHostIP, Address: "1.2.3.4"}, - }, - Capacity: api.ResourceList{ - api.ResourceName(api.ResourceCPU): resource.MustParse("10"), - api.ResourceName(api.ResourceMemory): resource.MustParse("10G"), - }, - }, - Spec: api.NodeSpec{ - ExternalID: "node0", - }, - }, - { - ObjectMeta: api.ObjectMeta{Name: "node1"}, - Status: api.NodeStatus{ - Conditions: []api.NodeCondition{ - { - Type: api.NodeReady, - Status: api.ConditionTrue, - Reason: "Node health check succeeded: kubelet /healthz endpoint returns ok", - LastProbeTime: fakeNow, - LastTransitionTime: fakeNow, - }, - }, - Addresses: []api.NodeAddress{ - {Type: api.NodeLegacyHostIP, Address: "1.2.3.4"}, - }, - Capacity: api.ResourceList{ - api.ResourceName(api.ResourceCPU): resource.MustParse("10"), - api.ResourceName(api.ResourceMemory): resource.MustParse("10G"), - }, - }, - Spec: api.NodeSpec{ - ExternalID: "node1", - }, - }, - }, - expectedRequestCount: 3, // List + 2xUpdate - }, - } - - for _, item := range table { - nodeController := NewNodeController(item.fakeCloud, ".*", nil, nil, item.fakeNodeHandler, item.fakeKubeletClient, 10, time.Minute, - util.NewFakeRateLimiter(), testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod) - nodeController.now = func() util.Time { return fakeNow } - if err := nodeController.SyncProbedNodeStatus(); err != nil { - t.Errorf("unexpected error: %v", err) - } - if item.fakeNodeHandler.RequestCount != item.expectedRequestCount { - t.Errorf("expected %v call, but got %v.", item.expectedRequestCount, item.fakeNodeHandler.RequestCount) - } - if !reflect.DeepEqual(item.expectedNodes, item.fakeNodeHandler.UpdatedNodes) { - t.Errorf("expected nodes %+v, got %+v", item.expectedNodes[0], item.fakeNodeHandler.UpdatedNodes[0]) - } - // Second sync will also update the node. - item.fakeNodeHandler.RequestCount = 0 - if err := nodeController.SyncProbedNodeStatus(); err != nil { - t.Errorf("unexpected error: %v", err) - } - if item.fakeNodeHandler.RequestCount != item.expectedRequestCount { - t.Errorf("expected %v call, but got %v.", item.expectedRequestCount, item.fakeNodeHandler.RequestCount) - } - } -} - -func TestSyncProbedNodeStatusTransitionTime(t *testing.T) { - fakeNow := util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC) - table := []struct { - fakeNodeHandler *FakeNodeHandler - fakeKubeletClient *FakeKubeletClient - expectedRequestCount int - expectedTransitionTime util.Time - }{ - { - // Existing node is healthy, current probe is healthy too. - // Existing node is schedulable, again explicitly mark node as schedulable. - // Expect transition time to stay the same as before. - fakeNodeHandler: &FakeNodeHandler{ - Existing: []*api.Node{ - { - ObjectMeta: api.ObjectMeta{Name: "node0"}, - Spec: api.NodeSpec{Unschedulable: false}, - Status: api.NodeStatus{ - Conditions: []api.NodeCondition{ - { - Type: api.NodeReady, - Status: api.ConditionTrue, - Reason: "Node health check succeeded: kubelet /healthz endpoint returns ok", - LastTransitionTime: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), - }, - }, - }, - }, - }, - }, - fakeKubeletClient: &FakeKubeletClient{ - Status: probe.Success, - Err: nil, - }, - expectedRequestCount: 2, // List+Update - expectedTransitionTime: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), - }, - { - // Existing node is healthy, current probe is unhealthy. - // Existing node is schedulable, mark node as unschedulable. - // Expect transition time to be now. - fakeNodeHandler: &FakeNodeHandler{ - Existing: []*api.Node{ - { - ObjectMeta: api.ObjectMeta{Name: "node0"}, - Spec: api.NodeSpec{Unschedulable: true}, - Status: api.NodeStatus{ - Conditions: []api.NodeCondition{ - { - Type: api.NodeReady, - Status: api.ConditionTrue, - Reason: "Node health check succeeded: kubelet /healthz endpoint returns ok", - LastTransitionTime: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), - }, - }, - }, - }, - }, - }, - fakeKubeletClient: &FakeKubeletClient{ - Status: probe.Failure, - Err: nil, - }, - expectedRequestCount: 2, // List+Update - expectedTransitionTime: fakeNow, - }, - } - - for _, item := range table { - nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, item.fakeKubeletClient, 10, time.Minute, - util.NewFakeRateLimiter(), testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod) - nodeController.lookupIP = func(host string) ([]net.IP, error) { return nil, fmt.Errorf("lookup %v: no such host", host) } - nodeController.now = func() util.Time { return fakeNow } - if err := nodeController.SyncProbedNodeStatus(); err != nil { - t.Errorf("unexpected error: %v", err) - } - if item.expectedRequestCount != item.fakeNodeHandler.RequestCount { - t.Errorf("expected %v call, but got %v.", item.expectedRequestCount, item.fakeNodeHandler.RequestCount) - } - for _, node := range item.fakeNodeHandler.UpdatedNodes { - for _, condition := range node.Status.Conditions { - if !condition.LastTransitionTime.Time.Equal(item.expectedTransitionTime.Time) { - t.Errorf("expected last transition time %v, but got %v", item.expectedTransitionTime, condition.LastTransitionTime) - } - } - } - } -} - -func TestSyncProbedNodeStatusEvictPods(t *testing.T) { - table := []struct { - fakeNodeHandler *FakeNodeHandler - fakeKubeletClient *FakeKubeletClient - expectedRequestCount int - expectedActions []testclient.FakeAction - }{ - { - // Existing node is healthy, current probe is healthy too. - fakeNodeHandler: &FakeNodeHandler{ - Existing: []*api.Node{ - { - ObjectMeta: api.ObjectMeta{Name: "node0"}, - Status: api.NodeStatus{ - Conditions: []api.NodeCondition{ - { - Type: api.NodeReady, - Status: api.ConditionTrue, - Reason: "Node health check succeeded: kubelet /healthz endpoint returns ok", - LastTransitionTime: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), - }, - }, - }, - }, - }, - Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node1")}}), - }, - fakeKubeletClient: &FakeKubeletClient{ - Status: probe.Success, - Err: nil, - }, - expectedRequestCount: 2, // List+Update - expectedActions: nil, - }, - { - // Existing node is healthy, current probe is unhealthy, i.e. node just becomes unhealthy. - // Do not delete pods. - fakeNodeHandler: &FakeNodeHandler{ - Existing: []*api.Node{ - { - ObjectMeta: api.ObjectMeta{Name: "node0"}, - Status: api.NodeStatus{ - Conditions: []api.NodeCondition{ - { - Type: api.NodeReady, - Status: api.ConditionTrue, - Reason: "Node health check succeeded: kubelet /healthz endpoint returns ok", - LastTransitionTime: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), - }, - }, - }, - }, - }, - Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}), - }, - fakeKubeletClient: &FakeKubeletClient{ - Status: probe.Failure, - Err: nil, - }, - expectedRequestCount: 2, // List+Update - expectedActions: nil, - }, - { - // Existing node unhealthy, current probe is unhealthy. Node is still within grace peroid. - fakeNodeHandler: &FakeNodeHandler{ - Existing: []*api.Node{ - { - ObjectMeta: api.ObjectMeta{Name: "node0"}, - Status: api.NodeStatus{ - Conditions: []api.NodeCondition{ - { - Type: api.NodeReady, - Status: api.ConditionFalse, - Reason: "Node health check failed: kubelet /healthz endpoint returns not ok", - // Here, last transition time is Now(). In node controller, the new condition's probe time is - // also Now(). The two calls to Now() yields differnt time due to test execution, but the - // time difference is within 5 minutes, which is the grace peroid. - LastTransitionTime: util.Now(), - }, - }, - }, - }, - }, - Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}), - }, - fakeKubeletClient: &FakeKubeletClient{ - Status: probe.Failure, - Err: nil, - }, - expectedRequestCount: 2, // List+Update - expectedActions: nil, - }, - { - // Existing node unhealthy, current probe is unhealthy. Node exceeds grace peroid. - fakeNodeHandler: &FakeNodeHandler{ - Existing: []*api.Node{ - { - ObjectMeta: api.ObjectMeta{Name: "node0"}, - Status: api.NodeStatus{ - Conditions: []api.NodeCondition{ - { - Type: api.NodeReady, - Status: api.ConditionFalse, - Reason: "Node health check failed: kubelet /healthz endpoint returns not ok", - // Here, last transition time is in the past, and in node controller, the - // new condition's probe time is Now(). The time difference is larger than - // 5*min. The test will fail if system clock is wrong, but we don't yet have - // ways to mock time in our tests. - LastTransitionTime: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), - }, - }, - }, - }, - }, - Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}), - }, - fakeKubeletClient: &FakeKubeletClient{ - Status: probe.Failure, - Err: nil, - }, - expectedRequestCount: 2, // List+Update - expectedActions: []testclient.FakeAction{{Action: "list-pods"}, {Action: "delete-pod", Value: "pod0"}}, - }, - } - - for _, item := range table { - nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, item.fakeKubeletClient, 10, 5*time.Minute, - util.NewFakeRateLimiter(), testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod) - nodeController.lookupIP = func(host string) ([]net.IP, error) { return nil, fmt.Errorf("lookup %v: no such host", host) } - if err := nodeController.SyncProbedNodeStatus(); err != nil { - t.Errorf("unexpected error: %v", err) - } - if item.expectedRequestCount != item.fakeNodeHandler.RequestCount { - t.Errorf("expected %v call, but got %v.", item.expectedRequestCount, item.fakeNodeHandler.RequestCount) - } - if !reflect.DeepEqual(item.expectedActions, item.fakeNodeHandler.Actions) { - t.Errorf("time out waiting for deleting pods, expected %+v, got %+v", item.expectedActions, item.fakeNodeHandler.Actions) - } - } -} - func TestMonitorNodeStatusEvictPods(t *testing.T) { fakeNow := util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC) evictionTimeout := 10 * time.Minute @@ -1095,7 +693,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { { Type: api.NodeReady, Status: api.ConditionFalse, - LastProbeTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastHeartbeatTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), LastTransitionTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), }, }, @@ -1111,7 +709,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { Type: api.NodeReady, Status: api.ConditionFalse, // Node status has just been updated, and is NotReady for 10min. - LastProbeTime: util.Date(2015, 1, 1, 12, 9, 0, 0, time.UTC), + LastHeartbeatTime: util.Date(2015, 1, 1, 12, 9, 0, 0, time.UTC), LastTransitionTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), }, }, @@ -1133,7 +731,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { { Type: api.NodeReady, Status: api.ConditionFalse, - LastProbeTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastHeartbeatTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), LastTransitionTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), }, }, @@ -1149,7 +747,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { Type: api.NodeReady, Status: api.ConditionFalse, // Node status has just been updated, and is NotReady for 1hr. - LastProbeTime: util.Date(2015, 1, 1, 12, 59, 0, 0, time.UTC), + LastHeartbeatTime: util.Date(2015, 1, 1, 12, 59, 0, 0, time.UTC), LastTransitionTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), }, }, @@ -1171,7 +769,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { { Type: api.NodeReady, Status: api.ConditionUnknown, - LastProbeTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastHeartbeatTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), LastTransitionTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), }, }, @@ -1187,7 +785,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { Type: api.NodeReady, Status: api.ConditionUnknown, // Node status was updated by nodecontroller 10min ago - LastProbeTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastHeartbeatTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), LastTransitionTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), }, }, @@ -1209,7 +807,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { { Type: api.NodeReady, Status: api.ConditionUnknown, - LastProbeTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastHeartbeatTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), LastTransitionTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), }, }, @@ -1225,7 +823,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { Type: api.NodeReady, Status: api.ConditionUnknown, // Node status was updated by nodecontroller 1hr ago - LastProbeTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastHeartbeatTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), LastTransitionTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), }, }, @@ -1300,7 +898,7 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) { Type: api.NodeReady, Status: api.ConditionUnknown, Reason: fmt.Sprintf("Kubelet never posted node status."), - LastProbeTime: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + LastHeartbeatTime: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), LastTransitionTime: fakeNow, }, }, @@ -1341,7 +939,7 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) { Type: api.NodeReady, Status: api.ConditionTrue, // Node status hasn't been updated for 1hr. - LastProbeTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastHeartbeatTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), LastTransitionTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), }, }, @@ -1365,7 +963,7 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) { Type: api.NodeReady, Status: api.ConditionTrue, // Node status hasn't been updated for 1hr. - LastProbeTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastHeartbeatTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), LastTransitionTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), }, }, @@ -1386,7 +984,7 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) { Type: api.NodeReady, Status: api.ConditionUnknown, Reason: fmt.Sprintf("Kubelet stopped posting node status."), - LastProbeTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastHeartbeatTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), LastTransitionTime: util.Time{util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC).Add(time.Hour)}, }, }, @@ -1417,7 +1015,7 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) { Type: api.NodeReady, Status: api.ConditionTrue, // Node status has just been updated. - LastProbeTime: fakeNow, + LastHeartbeatTime: fakeNow, LastTransitionTime: fakeNow, }, }, diff --git a/pkg/kubectl/describe.go b/pkg/kubectl/describe.go index 8b5173f3c3f..b613b7776ab 100644 --- a/pkg/kubectl/describe.go +++ b/pkg/kubectl/describe.go @@ -496,12 +496,12 @@ func describeNode(node *api.Node, pods []api.Pod, events *api.EventList) (string fmt.Fprintf(out, "Labels:\t%s\n", formatLabels(node.Labels)) fmt.Fprintf(out, "CreationTimestamp:\t%s\n", node.CreationTimestamp.Time.Format(time.RFC1123Z)) if len(node.Status.Conditions) > 0 { - fmt.Fprint(out, "Conditions:\n Type\tStatus\tLastProbeTime\tLastTransitionTime\tReason\tMessage\n") + fmt.Fprint(out, "Conditions:\n Type\tStatus\tLastHeartbeatTime\tLastTransitionTime\tReason\tMessage\n") for _, c := range node.Status.Conditions { fmt.Fprintf(out, " %v \t%v \t%s \t%s \t%v \t%v\n", c.Type, c.Status, - c.LastProbeTime.Time.Format(time.RFC1123Z), + c.LastHeartbeatTime.Time.Format(time.RFC1123Z), c.LastTransitionTime.Time.Format(time.RFC1123Z), c.Reason, c.Message) diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 53d3e4f941f..c7ebd9cc86f 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -1807,10 +1807,10 @@ func (kl *Kubelet) tryUpdateNodeStatus() error { currentTime := util.Now() newCondition := api.NodeCondition{ - Type: api.NodeReady, - Status: api.ConditionTrue, - Reason: fmt.Sprintf("kubelet is posting ready status"), - LastProbeTime: currentTime, + Type: api.NodeReady, + Status: api.ConditionTrue, + Reason: fmt.Sprintf("kubelet is posting ready status"), + LastHeartbeatTime: currentTime, } updated := false for i := range node.Status.Conditions { diff --git a/pkg/kubelet/kubelet_test.go b/pkg/kubelet/kubelet_test.go index f0fbe1f354a..d1a7568c91e 100644 --- a/pkg/kubelet/kubelet_test.go +++ b/pkg/kubelet/kubelet_test.go @@ -3080,7 +3080,7 @@ func TestUpdateNewNodeStatus(t *testing.T) { Type: api.NodeReady, Status: api.ConditionTrue, Reason: fmt.Sprintf("kubelet is posting ready status"), - LastProbeTime: util.Time{}, + LastHeartbeatTime: util.Time{}, LastTransitionTime: util.Time{}, }, }, @@ -3111,13 +3111,13 @@ func TestUpdateNewNodeStatus(t *testing.T) { if !ok { t.Errorf("unexpected object type") } - if updatedNode.Status.Conditions[0].LastProbeTime.IsZero() { + if updatedNode.Status.Conditions[0].LastHeartbeatTime.IsZero() { t.Errorf("unexpected zero last probe timestamp") } if updatedNode.Status.Conditions[0].LastTransitionTime.IsZero() { t.Errorf("unexpected zero last transition timestamp") } - updatedNode.Status.Conditions[0].LastProbeTime = util.Time{} + updatedNode.Status.Conditions[0].LastHeartbeatTime = util.Time{} updatedNode.Status.Conditions[0].LastTransitionTime = util.Time{} if !reflect.DeepEqual(expectedNode, updatedNode) { t.Errorf("unexpected objects: %s", util.ObjectDiff(expectedNode, updatedNode)) @@ -3138,7 +3138,7 @@ func TestUpdateExistingNodeStatus(t *testing.T) { Type: api.NodeReady, Status: api.ConditionTrue, Reason: fmt.Sprintf("kubelet is posting ready status"), - LastProbeTime: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + LastHeartbeatTime: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), LastTransitionTime: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), }, }, @@ -3173,7 +3173,7 @@ func TestUpdateExistingNodeStatus(t *testing.T) { Type: api.NodeReady, Status: api.ConditionTrue, Reason: fmt.Sprintf("kubelet is posting ready status"), - LastProbeTime: util.Time{}, // placeholder + LastHeartbeatTime: util.Time{}, // placeholder LastTransitionTime: util.Time{}, // placeholder }, }, @@ -3205,14 +3205,14 @@ func TestUpdateExistingNodeStatus(t *testing.T) { t.Errorf("unexpected object type") } // Expect LastProbeTime to be updated to Now, while LastTransitionTime to be the same. - if reflect.DeepEqual(updatedNode.Status.Conditions[0].LastProbeTime.Rfc3339Copy().UTC(), util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC).Time) { + if reflect.DeepEqual(updatedNode.Status.Conditions[0].LastHeartbeatTime.Rfc3339Copy().UTC(), util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC).Time) { t.Errorf("expected \n%v\n, got \n%v", util.Now(), util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC)) } if !reflect.DeepEqual(updatedNode.Status.Conditions[0].LastTransitionTime.Rfc3339Copy().UTC(), util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC).Time) { t.Errorf("expected \n%#v\n, got \n%#v", updatedNode.Status.Conditions[0].LastTransitionTime.Rfc3339Copy(), util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC)) } - updatedNode.Status.Conditions[0].LastProbeTime = util.Time{} + updatedNode.Status.Conditions[0].LastHeartbeatTime = util.Time{} updatedNode.Status.Conditions[0].LastTransitionTime = util.Time{} if !reflect.DeepEqual(expectedNode, updatedNode) { t.Errorf("expected \n%v\n, got \n%v", expectedNode, updatedNode)