Remove Node Controllers ability to pull status from Kubelet

This commit is contained in:
gmarek 2015-03-27 15:09:51 +01:00
parent 2719194154
commit 72182735b9
12 changed files with 62 additions and 586 deletions

View File

@ -224,7 +224,7 @@ func startComponents(firstManifestURL, secondManifestURL, apiVersion string) (st
}}
nodeController := nodeControllerPkg.NewNodeController(nil, "", machineList, nodeResources, cl, fakeKubeletClient{}, 10, 5*time.Minute, util.NewFakeRateLimiter(), 40*time.Second, 60*time.Second, 5*time.Second)
nodeController.Run(5*time.Second, true, false)
nodeController.Run(5*time.Second, true)
cadvisorInterface := new(cadvisor.Fake)
// Kubelet (localhost)

View File

@ -86,7 +86,6 @@ func NewCMServer() *CMServer {
NodeMilliCPU: 1000,
NodeMemory: resource.MustParse("3Gi"),
SyncNodeList: true,
SyncNodeStatus: false,
KubeletConfig: client.KubeletConfig{
Port: ports.KubeletPort,
EnableHttps: true,
@ -116,13 +115,16 @@ func (s *CMServer) AddFlags(fs *pflag.FlagSet) {
"The number of retries for initial node registration. Retry interval equals node_sync_period.")
fs.Var(&s.MachineList, "machines", "List of machines to schedule onto, comma separated.")
fs.BoolVar(&s.SyncNodeList, "sync_nodes", s.SyncNodeList, "If true, and --cloud_provider is specified, sync nodes from the cloud provider. Default true.")
fs.BoolVar(&s.SyncNodeStatus, "sync_node_status", s.SyncNodeStatus, ""+
"If true, node controller sends probes to kubelet and updates NodeStatus."+
"If false, Kubelet posts NodeStatus to API server.")
fs.DurationVar(&s.NodeMonitorGracePeriod, "node_monitor_grace_period", 40*time.Second, "Amount of time which we allow running Node to be unresponsive before marking it unhealty."+
"Must be N times more than kubelet's nodeStatusUpdateFrequency, where N means number of retries allowed for kubelet to post node status.")
fs.DurationVar(&s.NodeStartupGracePeriod, "node_startup_grace_period", 60*time.Second, "Amount of time which we allow starting Node to be unresponsive before marking it unhealty.")
fs.DurationVar(&s.NodeMonitorPeriod, "node_monitor_period", 5*time.Second, "The period for syncing NodeStatus in NodeController.")
fs.BoolVar(&s.SyncNodeStatus, "sync_node_status", s.SyncNodeStatus,
"DEPRECATED. Does not have any effect now and it will be removed in a later release.")
fs.DurationVar(&s.NodeMonitorGracePeriod, "node_monitor_grace_period", 40*time.Second,
"Amount of time which we allow running Node to be unresponsive before marking it unhealty. "+
"Must be N times more than kubelet's nodeStatusUpdateFrequency, "+
"where N means number of retries allowed for kubelet to post node status.")
fs.DurationVar(&s.NodeStartupGracePeriod, "node_startup_grace_period", 60*time.Second,
"Amount of time which we allow starting Node to be unresponsive before marking it unhealty.")
fs.DurationVar(&s.NodeMonitorPeriod, "node_monitor_period", 5*time.Second,
"The period for syncing NodeStatus in NodeController.")
// TODO: Discover these by pinging the host machines, and rip out these flags.
// TODO: in the meantime, use resource.QuantityFlag() instead of these
fs.Int64Var(&s.NodeMilliCPU, "node_milli_cpu", s.NodeMilliCPU, "The amount of MilliCPU provisioned on each node")
@ -188,10 +190,14 @@ func (s *CMServer) Run(_ []string) error {
},
}
if s.SyncNodeStatus {
glog.Warning("DEPRECATION NOTICE: sync_node_status flag is being deprecated. It has no effect now and it will be removed in a future version.")
}
nodeController := nodeControllerPkg.NewNodeController(cloud, s.MinionRegexp, s.MachineList, nodeResources,
kubeClient, kubeletClient, s.RegisterRetryCount, s.PodEvictionTimeout, util.NewTokenBucketRateLimiter(s.DeletingPodsQps, s.DeletingPodsBurst),
s.NodeMonitorGracePeriod, s.NodeStartupGracePeriod, s.NodeMonitorPeriod)
nodeController.Run(s.NodeSyncPeriod, s.SyncNodeList, s.SyncNodeStatus)
nodeController.Run(s.NodeSyncPeriod, s.SyncNodeList)
resourceQuotaManager := resourcequota.NewResourceQuotaManager(kubeClient)
resourceQuotaManager.Run(s.ResourceQuotaSyncPeriod)

View File

@ -132,7 +132,7 @@ func runControllerManager(machineList []string, cl *client.Client, nodeMilliCPU,
nodeController := nodeControllerPkg.NewNodeController(
nil, "", machineList, nodeResources, cl, kubeClient, 10, 5*time.Minute, util.NewTokenBucketRateLimiter(*deletingPodsQps, *deletingPodsBurst), 40*time.Second, 60*time.Second, 5*time.Second)
nodeController.Run(10*time.Second, true, true)
nodeController.Run(10*time.Second, true)
endpoints := service.NewEndpointController(cl)
go util.Forever(func() { endpoints.SyncServiceEndpoints() }, time.Second*10)

View File

@ -1113,7 +1113,7 @@ const (
type NodeCondition struct {
Type NodeConditionType `json:"type"`
Status ConditionStatus `json:"status"`
LastProbeTime util.Time `json:"lastProbeTime,omitempty"`
LastHeartbeatTime util.Time `json:"lastHeartbeatTime,omitempty"`
LastTransitionTime util.Time `json:"lastTransitionTime,omitempty"`
Reason string `json:"reason,omitempty"`
Message string `json:"message,omitempty"`

View File

@ -1369,7 +1369,7 @@ func init() {
if err := s.Convert(&in.Status, &out.Status, 0); err != nil {
return err
}
if err := s.Convert(&in.LastProbeTime, &out.LastProbeTime, 0); err != nil {
if err := s.Convert(&in.LastHeartbeatTime, &out.LastProbeTime, 0); err != nil {
return err
}
if err := s.Convert(&in.LastTransitionTime, &out.LastTransitionTime, 0); err != nil {
@ -1390,7 +1390,7 @@ func init() {
if err := s.Convert(&in.Status, &out.Status, 0); err != nil {
return err
}
if err := s.Convert(&in.LastProbeTime, &out.LastProbeTime, 0); err != nil {
if err := s.Convert(&in.LastProbeTime, &out.LastHeartbeatTime, 0); err != nil {
return err
}
if err := s.Convert(&in.LastTransitionTime, &out.LastTransitionTime, 0); err != nil {

View File

@ -1296,7 +1296,7 @@ func init() {
if err := s.Convert(&in.Status, &out.Status, 0); err != nil {
return err
}
if err := s.Convert(&in.LastProbeTime, &out.LastProbeTime, 0); err != nil {
if err := s.Convert(&in.LastHeartbeatTime, &out.LastProbeTime, 0); err != nil {
return err
}
if err := s.Convert(&in.LastTransitionTime, &out.LastTransitionTime, 0); err != nil {
@ -1317,7 +1317,7 @@ func init() {
if err := s.Convert(&in.Status, &out.Status, 0); err != nil {
return err
}
if err := s.Convert(&in.LastProbeTime, &out.LastProbeTime, 0); err != nil {
if err := s.Convert(&in.LastProbeTime, &out.LastHeartbeatTime, 0); err != nil {
return err
}
if err := s.Convert(&in.LastTransitionTime, &out.LastTransitionTime, 0); err != nil {

View File

@ -1110,7 +1110,7 @@ const (
type NodeCondition struct {
Type NodeConditionType `json:"type" description:"type of node condition, one of Ready, Schedulable"`
Status ConditionStatus `json:"status" description:"status of the condition, one of Full, None, Unknown"`
LastProbeTime util.Time `json:"lastProbeTime,omitempty" description:"last time the condition was probed"`
LastHeartbeatTime util.Time `json:"lastHeartbeatTime,omitempty" description:"last time we got an update on a given condition"`
LastTransitionTime util.Time `json:"lastTransitionTime,omitempty" description:"last time the condition transit from one status to another"`
Reason string `json:"reason,omitempty" description:"(brief) reason for the condition's last transition"`
Message string `json:"message,omitempty" description:"human readable message indicating details about last transition"`

View File

@ -21,7 +21,6 @@ import (
"fmt"
"net"
"strings"
"sync"
"time"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
@ -29,7 +28,6 @@ import (
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
"github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider"
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
"github.com/GoogleCloudPlatform/kubernetes/pkg/probe"
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
"github.com/golang/glog"
)
@ -132,12 +130,9 @@ func NewNodeController(
// node addresses.
// 2. SyncCloudNodes() is called periodically (if enabled) to sync instances from cloudprovider.
// Node created here will only have specs.
// 3. Depending on how k8s is configured, there are two ways of syncing the node status:
// 3.1 SyncProbedNodeStatus() is called periodically to trigger master to probe kubelet,
// and incorporate the resulting node status.
// 3.2 MonitorNodeStatus() is called periodically to incorporate the results of node status
// 3. MonitorNodeStatus() is called periodically to incorporate the results of node status
// pushed from kubelet to master.
func (nc *NodeController) Run(period time.Duration, syncNodeList, syncNodeStatus bool) {
func (nc *NodeController) Run(period time.Duration, syncNodeList bool) {
// Register intial set of nodes with their status set.
var nodes *api.NodeList
var err error
@ -170,21 +165,13 @@ func (nc *NodeController) Run(period time.Duration, syncNodeList, syncNodeStatus
}, period)
}
// Start syncing or monitoring node status.
if syncNodeStatus {
// Start monitoring node status.
go util.Forever(func() {
if err := nc.SyncProbedNodeStatus(); err != nil {
glog.Errorf("Error syncing status: %v", err)
}
}, period)
} else {
go util.Forever(func() {
if err := nc.MonitorNodeStatus(); err != nil {
if err = nc.MonitorNodeStatus(); err != nil {
glog.Errorf("Error monitoring node status: %v", err)
}
}, nc.nodeMonitorPeriod)
}
}
// RegisterNodes registers the given list of nodes, it keeps retrying for `retryCount` times.
func (nc *NodeController) RegisterNodes(nodes *api.NodeList, retryCount int, retryInterval time.Duration) error {
@ -272,121 +259,6 @@ func (nc *NodeController) SyncCloudNodes() error {
return nil
}
// SyncProbedNodeStatus synchronizes cluster nodes status to master server.
func (nc *NodeController) SyncProbedNodeStatus() error {
nodes, err := nc.kubeClient.Nodes().List()
if err != nil {
return err
}
nodes, err = nc.PopulateNodesStatus(nodes)
if err != nil {
return err
}
for _, node := range nodes.Items {
// We used to skip updating node when node status doesn't change, this is no longer
// useful after we introduce per-probe status field, e.g. 'LastProbeTime', which will
// differ in every call of the sync loop.
glog.V(2).Infof("updating node %v", node.Name)
_, err = nc.kubeClient.Nodes().Update(&node)
if err != nil {
glog.Errorf("error updating node %s: %v", node.Name, err)
}
}
return nil
}
// PopulateNodesStatus populates node status for given list of nodes.
func (nc *NodeController) PopulateNodesStatus(nodes *api.NodeList) (*api.NodeList, error) {
var wg sync.WaitGroup
wg.Add(len(nodes.Items))
for i := range nodes.Items {
go func(node *api.Node) {
node.Status.Conditions = nc.DoCheck(node)
if err := nc.populateNodeInfo(node); err != nil {
glog.Errorf("Can't collect information for node %s: %v", node.Name, err)
}
wg.Done()
}(&nodes.Items[i])
}
wg.Wait()
return nc.PopulateAddresses(nodes)
}
// populateNodeInfo gets node info from kubelet and update the node.
func (nc *NodeController) populateNodeInfo(node *api.Node) error {
nodeInfo, err := nc.kubeletClient.GetNodeInfo(node.Name)
if err != nil {
return err
}
for key, value := range nodeInfo.Capacity {
node.Status.Capacity[key] = value
}
node.Status.NodeInfo = nodeInfo.NodeSystemInfo
return nil
}
// DoCheck performs various condition checks for given node.
func (nc *NodeController) DoCheck(node *api.Node) []api.NodeCondition {
var conditions []api.NodeCondition
// Check Condition: NodeReady. TODO: More node conditions.
oldReadyCondition := nc.getCondition(&node.Status, api.NodeReady)
newReadyCondition := nc.checkNodeReady(node)
nc.updateLastTransitionTime(oldReadyCondition, newReadyCondition)
if newReadyCondition.Status != api.ConditionTrue {
// Node is not ready for this probe, we need to check if pods need to be deleted.
if newReadyCondition.LastProbeTime.After(newReadyCondition.LastTransitionTime.Add(nc.podEvictionTimeout)) {
// As long as the node fails, we call delete pods to delete all pods. Node controller sync
// is not a closed loop process, there is no feedback from other components regarding pod
// status. Keep listing pods to sanity check if pods are all deleted makes more sense.
nc.deletePods(node.Name)
}
}
conditions = append(conditions, *newReadyCondition)
return conditions
}
// updateLastTransitionTime updates LastTransitionTime for the newCondition based on oldCondition.
func (nc *NodeController) updateLastTransitionTime(oldCondition, newCondition *api.NodeCondition) {
if oldCondition != nil && oldCondition.Status == newCondition.Status {
// If node status doesn't change, transition time is same as last time.
newCondition.LastTransitionTime = oldCondition.LastTransitionTime
} else {
// Set transition time to Now() if node status changes or `oldCondition` is nil, which
// happens only when the node is checked for the first time.
newCondition.LastTransitionTime = nc.now()
}
}
// checkNodeReady checks raw node ready condition, without transition timestamp set.
func (nc *NodeController) checkNodeReady(node *api.Node) *api.NodeCondition {
switch status, err := nc.kubeletClient.HealthCheck(node.Name); {
case err != nil:
glog.V(2).Infof("NodeController: node %s health check error: %v", node.Name, err)
return &api.NodeCondition{
Type: api.NodeReady,
Status: api.ConditionUnknown,
Reason: fmt.Sprintf("Node health check error: %v", err),
LastProbeTime: nc.now(),
}
case status == probe.Failure:
return &api.NodeCondition{
Type: api.NodeReady,
Status: api.ConditionFalse,
Reason: fmt.Sprintf("Node health check failed: kubelet /healthz endpoint returns not ok"),
LastProbeTime: nc.now(),
}
default:
return &api.NodeCondition{
Type: api.NodeReady,
Status: api.ConditionTrue,
Reason: fmt.Sprintf("Node health check succeeded: kubelet /healthz endpoint returns ok"),
LastProbeTime: nc.now(),
}
}
}
// PopulateAddresses queries Address for given list of nodes.
func (nc *NodeController) PopulateAddresses(nodes *api.NodeList) (*api.NodeList, error) {
if nc.isRunningCloudProvider() {
@ -440,7 +312,7 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap
lastReadyCondition = api.NodeCondition{
Type: api.NodeReady,
Status: api.ConditionUnknown,
LastProbeTime: node.CreationTimestamp,
LastHeartbeatTime: node.CreationTimestamp,
LastTransitionTime: node.CreationTimestamp,
}
gracePeriod = nc.nodeStartupGracePeriod
@ -497,7 +369,7 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap
readyTransitionTimestamp: nc.now(),
}
nc.nodeStatusMap[node.Name] = savedNodeStatus
} else if savedCondition != nil && observedCondition != nil && savedCondition.LastProbeTime != observedCondition.LastProbeTime {
} else if savedCondition != nil && observedCondition != nil && savedCondition.LastHeartbeatTime != observedCondition.LastHeartbeatTime {
var transitionTime util.Time
// If ReadyCondition changed since the last time we checked, we update the transition timestamp to "now",
// otherwise we leave it as it is.
@ -526,7 +398,7 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap
Type: api.NodeReady,
Status: api.ConditionUnknown,
Reason: fmt.Sprintf("Kubelet never posted node status."),
LastProbeTime: node.CreationTimestamp,
LastHeartbeatTime: node.CreationTimestamp,
LastTransitionTime: nc.now(),
})
} else {
@ -536,7 +408,7 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap
readyCondition.Status = api.ConditionUnknown
readyCondition.Reason = fmt.Sprintf("Kubelet stopped posting node status.")
// LastProbeTime is the last time we heard from kubelet.
readyCondition.LastProbeTime = lastReadyCondition.LastProbeTime
readyCondition.LastHeartbeatTime = lastReadyCondition.LastHeartbeatTime
readyCondition.LastTransitionTime = nc.now()
}
}

View File

@ -19,7 +19,6 @@ package controller
import (
"errors"
"fmt"
"net"
"net/http"
"reflect"
"sort"
@ -614,79 +613,6 @@ func TestSyncCloudNodesEvictPods(t *testing.T) {
}
}
func TestNodeConditionsCheck(t *testing.T) {
fakeNow := util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
table := []struct {
node *api.Node
fakeKubeletClient *FakeKubeletClient
expectedConditions []api.NodeCondition
}{
{
// Node with default spec and kubelet /healthz probe returns success.
// Expected node condition to be ready and marked schedulable.
node: newNode("node0"),
fakeKubeletClient: &FakeKubeletClient{
Status: probe.Success,
Err: nil,
},
expectedConditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionTrue,
Reason: "Node health check succeeded: kubelet /healthz endpoint returns ok",
LastProbeTime: fakeNow,
LastTransitionTime: fakeNow,
},
},
},
{
// User specified node as schedulable and kubelet /healthz probe returns failure with no error.
// Expected node condition to be not ready and marked schedulable.
node: &api.Node{ObjectMeta: api.ObjectMeta{Name: "node0"}, Spec: api.NodeSpec{Unschedulable: false}},
fakeKubeletClient: &FakeKubeletClient{
Status: probe.Failure,
Err: nil,
},
expectedConditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionFalse,
Reason: "Node health check failed: kubelet /healthz endpoint returns not ok",
LastProbeTime: fakeNow,
LastTransitionTime: fakeNow,
},
},
},
{
// Expected node condition to be not ready as marking Node Unschedulable does not impact Readiness.
node: &api.Node{ObjectMeta: api.ObjectMeta{Name: "node0"}, Spec: api.NodeSpec{Unschedulable: true}},
fakeKubeletClient: &FakeKubeletClient{
Status: probe.Failure,
Err: errors.New("Error"),
},
expectedConditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionUnknown,
Reason: "Node health check error: Error",
LastProbeTime: fakeNow,
LastTransitionTime: fakeNow,
},
},
},
}
for _, item := range table {
nodeController := NewNodeController(nil, "", nil, nil, nil, item.fakeKubeletClient, 10, time.Minute,
util.NewFakeRateLimiter(), testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod)
nodeController.now = func() util.Time { return fakeNow }
conditions := nodeController.DoCheck(item.node)
if !reflect.DeepEqual(item.expectedConditions, conditions) {
t.Errorf("expected conditions %+v, got %+v", item.expectedConditions, conditions)
}
}
}
func TestPopulateNodeAddresses(t *testing.T) {
table := []struct {
nodes *api.NodeList
@ -724,334 +650,6 @@ func TestPopulateNodeAddresses(t *testing.T) {
}
}
func TestSyncProbedNodeStatus(t *testing.T) {
fakeNow := util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
table := []struct {
fakeNodeHandler *FakeNodeHandler
fakeKubeletClient *FakeKubeletClient
fakeCloud *fake_cloud.FakeCloud
expectedNodes []*api.Node
expectedRequestCount int
}{
{
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{newNode("node0"), newNode("node1")},
},
fakeKubeletClient: &FakeKubeletClient{
Status: probe.Success,
Err: nil,
},
fakeCloud: &fake_cloud.FakeCloud{
Addresses: []api.NodeAddress{{Type: api.NodeLegacyHostIP, Address: "1.2.3.4"}},
},
expectedNodes: []*api.Node{
{
ObjectMeta: api.ObjectMeta{Name: "node0"},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionTrue,
Reason: "Node health check succeeded: kubelet /healthz endpoint returns ok",
LastProbeTime: fakeNow,
LastTransitionTime: fakeNow,
},
},
Addresses: []api.NodeAddress{
{Type: api.NodeLegacyHostIP, Address: "1.2.3.4"},
},
Capacity: api.ResourceList{
api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
},
},
Spec: api.NodeSpec{
ExternalID: "node0",
},
},
{
ObjectMeta: api.ObjectMeta{Name: "node1"},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionTrue,
Reason: "Node health check succeeded: kubelet /healthz endpoint returns ok",
LastProbeTime: fakeNow,
LastTransitionTime: fakeNow,
},
},
Addresses: []api.NodeAddress{
{Type: api.NodeLegacyHostIP, Address: "1.2.3.4"},
},
Capacity: api.ResourceList{
api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
},
},
Spec: api.NodeSpec{
ExternalID: "node1",
},
},
},
expectedRequestCount: 3, // List + 2xUpdate
},
}
for _, item := range table {
nodeController := NewNodeController(item.fakeCloud, ".*", nil, nil, item.fakeNodeHandler, item.fakeKubeletClient, 10, time.Minute,
util.NewFakeRateLimiter(), testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod)
nodeController.now = func() util.Time { return fakeNow }
if err := nodeController.SyncProbedNodeStatus(); err != nil {
t.Errorf("unexpected error: %v", err)
}
if item.fakeNodeHandler.RequestCount != item.expectedRequestCount {
t.Errorf("expected %v call, but got %v.", item.expectedRequestCount, item.fakeNodeHandler.RequestCount)
}
if !reflect.DeepEqual(item.expectedNodes, item.fakeNodeHandler.UpdatedNodes) {
t.Errorf("expected nodes %+v, got %+v", item.expectedNodes[0], item.fakeNodeHandler.UpdatedNodes[0])
}
// Second sync will also update the node.
item.fakeNodeHandler.RequestCount = 0
if err := nodeController.SyncProbedNodeStatus(); err != nil {
t.Errorf("unexpected error: %v", err)
}
if item.fakeNodeHandler.RequestCount != item.expectedRequestCount {
t.Errorf("expected %v call, but got %v.", item.expectedRequestCount, item.fakeNodeHandler.RequestCount)
}
}
}
func TestSyncProbedNodeStatusTransitionTime(t *testing.T) {
fakeNow := util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
table := []struct {
fakeNodeHandler *FakeNodeHandler
fakeKubeletClient *FakeKubeletClient
expectedRequestCount int
expectedTransitionTime util.Time
}{
{
// Existing node is healthy, current probe is healthy too.
// Existing node is schedulable, again explicitly mark node as schedulable.
// Expect transition time to stay the same as before.
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{
{
ObjectMeta: api.ObjectMeta{Name: "node0"},
Spec: api.NodeSpec{Unschedulable: false},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionTrue,
Reason: "Node health check succeeded: kubelet /healthz endpoint returns ok",
LastTransitionTime: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
},
},
},
},
},
fakeKubeletClient: &FakeKubeletClient{
Status: probe.Success,
Err: nil,
},
expectedRequestCount: 2, // List+Update
expectedTransitionTime: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
{
// Existing node is healthy, current probe is unhealthy.
// Existing node is schedulable, mark node as unschedulable.
// Expect transition time to be now.
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{
{
ObjectMeta: api.ObjectMeta{Name: "node0"},
Spec: api.NodeSpec{Unschedulable: true},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionTrue,
Reason: "Node health check succeeded: kubelet /healthz endpoint returns ok",
LastTransitionTime: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
},
},
},
},
},
fakeKubeletClient: &FakeKubeletClient{
Status: probe.Failure,
Err: nil,
},
expectedRequestCount: 2, // List+Update
expectedTransitionTime: fakeNow,
},
}
for _, item := range table {
nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, item.fakeKubeletClient, 10, time.Minute,
util.NewFakeRateLimiter(), testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod)
nodeController.lookupIP = func(host string) ([]net.IP, error) { return nil, fmt.Errorf("lookup %v: no such host", host) }
nodeController.now = func() util.Time { return fakeNow }
if err := nodeController.SyncProbedNodeStatus(); err != nil {
t.Errorf("unexpected error: %v", err)
}
if item.expectedRequestCount != item.fakeNodeHandler.RequestCount {
t.Errorf("expected %v call, but got %v.", item.expectedRequestCount, item.fakeNodeHandler.RequestCount)
}
for _, node := range item.fakeNodeHandler.UpdatedNodes {
for _, condition := range node.Status.Conditions {
if !condition.LastTransitionTime.Time.Equal(item.expectedTransitionTime.Time) {
t.Errorf("expected last transition time %v, but got %v", item.expectedTransitionTime, condition.LastTransitionTime)
}
}
}
}
}
func TestSyncProbedNodeStatusEvictPods(t *testing.T) {
table := []struct {
fakeNodeHandler *FakeNodeHandler
fakeKubeletClient *FakeKubeletClient
expectedRequestCount int
expectedActions []testclient.FakeAction
}{
{
// Existing node is healthy, current probe is healthy too.
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{
{
ObjectMeta: api.ObjectMeta{Name: "node0"},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionTrue,
Reason: "Node health check succeeded: kubelet /healthz endpoint returns ok",
LastTransitionTime: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
},
},
},
},
Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node1")}}),
},
fakeKubeletClient: &FakeKubeletClient{
Status: probe.Success,
Err: nil,
},
expectedRequestCount: 2, // List+Update
expectedActions: nil,
},
{
// Existing node is healthy, current probe is unhealthy, i.e. node just becomes unhealthy.
// Do not delete pods.
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{
{
ObjectMeta: api.ObjectMeta{Name: "node0"},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionTrue,
Reason: "Node health check succeeded: kubelet /healthz endpoint returns ok",
LastTransitionTime: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
},
},
},
},
Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
},
fakeKubeletClient: &FakeKubeletClient{
Status: probe.Failure,
Err: nil,
},
expectedRequestCount: 2, // List+Update
expectedActions: nil,
},
{
// Existing node unhealthy, current probe is unhealthy. Node is still within grace peroid.
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{
{
ObjectMeta: api.ObjectMeta{Name: "node0"},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionFalse,
Reason: "Node health check failed: kubelet /healthz endpoint returns not ok",
// Here, last transition time is Now(). In node controller, the new condition's probe time is
// also Now(). The two calls to Now() yields differnt time due to test execution, but the
// time difference is within 5 minutes, which is the grace peroid.
LastTransitionTime: util.Now(),
},
},
},
},
},
Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
},
fakeKubeletClient: &FakeKubeletClient{
Status: probe.Failure,
Err: nil,
},
expectedRequestCount: 2, // List+Update
expectedActions: nil,
},
{
// Existing node unhealthy, current probe is unhealthy. Node exceeds grace peroid.
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{
{
ObjectMeta: api.ObjectMeta{Name: "node0"},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionFalse,
Reason: "Node health check failed: kubelet /healthz endpoint returns not ok",
// Here, last transition time is in the past, and in node controller, the
// new condition's probe time is Now(). The time difference is larger than
// 5*min. The test will fail if system clock is wrong, but we don't yet have
// ways to mock time in our tests.
LastTransitionTime: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
},
},
},
},
Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
},
fakeKubeletClient: &FakeKubeletClient{
Status: probe.Failure,
Err: nil,
},
expectedRequestCount: 2, // List+Update
expectedActions: []testclient.FakeAction{{Action: "list-pods"}, {Action: "delete-pod", Value: "pod0"}},
},
}
for _, item := range table {
nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, item.fakeKubeletClient, 10, 5*time.Minute,
util.NewFakeRateLimiter(), testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod)
nodeController.lookupIP = func(host string) ([]net.IP, error) { return nil, fmt.Errorf("lookup %v: no such host", host) }
if err := nodeController.SyncProbedNodeStatus(); err != nil {
t.Errorf("unexpected error: %v", err)
}
if item.expectedRequestCount != item.fakeNodeHandler.RequestCount {
t.Errorf("expected %v call, but got %v.", item.expectedRequestCount, item.fakeNodeHandler.RequestCount)
}
if !reflect.DeepEqual(item.expectedActions, item.fakeNodeHandler.Actions) {
t.Errorf("time out waiting for deleting pods, expected %+v, got %+v", item.expectedActions, item.fakeNodeHandler.Actions)
}
}
}
func TestMonitorNodeStatusEvictPods(t *testing.T) {
fakeNow := util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
evictionTimeout := 10 * time.Minute
@ -1095,7 +693,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
{
Type: api.NodeReady,
Status: api.ConditionFalse,
LastProbeTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastHeartbeatTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
@ -1111,7 +709,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
Type: api.NodeReady,
Status: api.ConditionFalse,
// Node status has just been updated, and is NotReady for 10min.
LastProbeTime: util.Date(2015, 1, 1, 12, 9, 0, 0, time.UTC),
LastHeartbeatTime: util.Date(2015, 1, 1, 12, 9, 0, 0, time.UTC),
LastTransitionTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
@ -1133,7 +731,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
{
Type: api.NodeReady,
Status: api.ConditionFalse,
LastProbeTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastHeartbeatTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
@ -1149,7 +747,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
Type: api.NodeReady,
Status: api.ConditionFalse,
// Node status has just been updated, and is NotReady for 1hr.
LastProbeTime: util.Date(2015, 1, 1, 12, 59, 0, 0, time.UTC),
LastHeartbeatTime: util.Date(2015, 1, 1, 12, 59, 0, 0, time.UTC),
LastTransitionTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
@ -1171,7 +769,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
{
Type: api.NodeReady,
Status: api.ConditionUnknown,
LastProbeTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastHeartbeatTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
@ -1187,7 +785,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
Type: api.NodeReady,
Status: api.ConditionUnknown,
// Node status was updated by nodecontroller 10min ago
LastProbeTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastHeartbeatTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
@ -1209,7 +807,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
{
Type: api.NodeReady,
Status: api.ConditionUnknown,
LastProbeTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastHeartbeatTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
@ -1225,7 +823,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
Type: api.NodeReady,
Status: api.ConditionUnknown,
// Node status was updated by nodecontroller 1hr ago
LastProbeTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastHeartbeatTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
@ -1300,7 +898,7 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) {
Type: api.NodeReady,
Status: api.ConditionUnknown,
Reason: fmt.Sprintf("Kubelet never posted node status."),
LastProbeTime: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
LastHeartbeatTime: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
LastTransitionTime: fakeNow,
},
},
@ -1341,7 +939,7 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) {
Type: api.NodeReady,
Status: api.ConditionTrue,
// Node status hasn't been updated for 1hr.
LastProbeTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastHeartbeatTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
@ -1365,7 +963,7 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) {
Type: api.NodeReady,
Status: api.ConditionTrue,
// Node status hasn't been updated for 1hr.
LastProbeTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastHeartbeatTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
@ -1386,7 +984,7 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) {
Type: api.NodeReady,
Status: api.ConditionUnknown,
Reason: fmt.Sprintf("Kubelet stopped posting node status."),
LastProbeTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastHeartbeatTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: util.Time{util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC).Add(time.Hour)},
},
},
@ -1417,7 +1015,7 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) {
Type: api.NodeReady,
Status: api.ConditionTrue,
// Node status has just been updated.
LastProbeTime: fakeNow,
LastHeartbeatTime: fakeNow,
LastTransitionTime: fakeNow,
},
},

View File

@ -496,12 +496,12 @@ func describeNode(node *api.Node, pods []api.Pod, events *api.EventList) (string
fmt.Fprintf(out, "Labels:\t%s\n", formatLabels(node.Labels))
fmt.Fprintf(out, "CreationTimestamp:\t%s\n", node.CreationTimestamp.Time.Format(time.RFC1123Z))
if len(node.Status.Conditions) > 0 {
fmt.Fprint(out, "Conditions:\n Type\tStatus\tLastProbeTime\tLastTransitionTime\tReason\tMessage\n")
fmt.Fprint(out, "Conditions:\n Type\tStatus\tLastHeartbeatTime\tLastTransitionTime\tReason\tMessage\n")
for _, c := range node.Status.Conditions {
fmt.Fprintf(out, " %v \t%v \t%s \t%s \t%v \t%v\n",
c.Type,
c.Status,
c.LastProbeTime.Time.Format(time.RFC1123Z),
c.LastHeartbeatTime.Time.Format(time.RFC1123Z),
c.LastTransitionTime.Time.Format(time.RFC1123Z),
c.Reason,
c.Message)

View File

@ -1810,7 +1810,7 @@ func (kl *Kubelet) tryUpdateNodeStatus() error {
Type: api.NodeReady,
Status: api.ConditionTrue,
Reason: fmt.Sprintf("kubelet is posting ready status"),
LastProbeTime: currentTime,
LastHeartbeatTime: currentTime,
}
updated := false
for i := range node.Status.Conditions {

View File

@ -3080,7 +3080,7 @@ func TestUpdateNewNodeStatus(t *testing.T) {
Type: api.NodeReady,
Status: api.ConditionTrue,
Reason: fmt.Sprintf("kubelet is posting ready status"),
LastProbeTime: util.Time{},
LastHeartbeatTime: util.Time{},
LastTransitionTime: util.Time{},
},
},
@ -3111,13 +3111,13 @@ func TestUpdateNewNodeStatus(t *testing.T) {
if !ok {
t.Errorf("unexpected object type")
}
if updatedNode.Status.Conditions[0].LastProbeTime.IsZero() {
if updatedNode.Status.Conditions[0].LastHeartbeatTime.IsZero() {
t.Errorf("unexpected zero last probe timestamp")
}
if updatedNode.Status.Conditions[0].LastTransitionTime.IsZero() {
t.Errorf("unexpected zero last transition timestamp")
}
updatedNode.Status.Conditions[0].LastProbeTime = util.Time{}
updatedNode.Status.Conditions[0].LastHeartbeatTime = util.Time{}
updatedNode.Status.Conditions[0].LastTransitionTime = util.Time{}
if !reflect.DeepEqual(expectedNode, updatedNode) {
t.Errorf("unexpected objects: %s", util.ObjectDiff(expectedNode, updatedNode))
@ -3138,7 +3138,7 @@ func TestUpdateExistingNodeStatus(t *testing.T) {
Type: api.NodeReady,
Status: api.ConditionTrue,
Reason: fmt.Sprintf("kubelet is posting ready status"),
LastProbeTime: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
LastHeartbeatTime: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
LastTransitionTime: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
},
@ -3173,7 +3173,7 @@ func TestUpdateExistingNodeStatus(t *testing.T) {
Type: api.NodeReady,
Status: api.ConditionTrue,
Reason: fmt.Sprintf("kubelet is posting ready status"),
LastProbeTime: util.Time{}, // placeholder
LastHeartbeatTime: util.Time{}, // placeholder
LastTransitionTime: util.Time{}, // placeholder
},
},
@ -3205,14 +3205,14 @@ func TestUpdateExistingNodeStatus(t *testing.T) {
t.Errorf("unexpected object type")
}
// Expect LastProbeTime to be updated to Now, while LastTransitionTime to be the same.
if reflect.DeepEqual(updatedNode.Status.Conditions[0].LastProbeTime.Rfc3339Copy().UTC(), util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC).Time) {
if reflect.DeepEqual(updatedNode.Status.Conditions[0].LastHeartbeatTime.Rfc3339Copy().UTC(), util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC).Time) {
t.Errorf("expected \n%v\n, got \n%v", util.Now(), util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC))
}
if !reflect.DeepEqual(updatedNode.Status.Conditions[0].LastTransitionTime.Rfc3339Copy().UTC(), util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC).Time) {
t.Errorf("expected \n%#v\n, got \n%#v", updatedNode.Status.Conditions[0].LastTransitionTime.Rfc3339Copy(),
util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC))
}
updatedNode.Status.Conditions[0].LastProbeTime = util.Time{}
updatedNode.Status.Conditions[0].LastHeartbeatTime = util.Time{}
updatedNode.Status.Conditions[0].LastTransitionTime = util.Time{}
if !reflect.DeepEqual(expectedNode, updatedNode) {
t.Errorf("expected \n%v\n, got \n%v", expectedNode, updatedNode)