mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 19:56:01 +00:00
Merge pull request #30828 from bprashanth/nc_podready
Automatic merge from submit-queue Nodecontroller doesn't flip readiness on pods if kubeletVersion < 1.2.0 Older versions of the kubelet didn't know how to reconcile pod.Status, so the nodecontroller would mark pods NotReady on netsplit, and if the partition recovered in < 5m, the pods would never get marked Ready resulting in NotReady endpoints indefinitely (till kubelet restart/pod recreate etc).
This commit is contained in:
commit
7ceb23c719
@ -209,7 +209,15 @@ func (nc *NodeController) maybeDeleteTerminatingPod(obj interface{}) {
|
||||
|
||||
// update ready status of all pods running on given node from master
|
||||
// return true if success
|
||||
func markAllPodsNotReady(kubeClient clientset.Interface, nodeName string) error {
|
||||
func markAllPodsNotReady(kubeClient clientset.Interface, node *api.Node) error {
|
||||
// Don't set pods to NotReady if the kubelet is running a version that
|
||||
// doesn't understand how to correct readiness.
|
||||
// TODO: Remove this check when we no longer guarantee backward compatibility
|
||||
// with node versions < 1.2.0.
|
||||
if nodeRunningOutdatedKubelet(node) {
|
||||
return nil
|
||||
}
|
||||
nodeName := node.Name
|
||||
glog.V(2).Infof("Update ready status of pods on node [%v]", nodeName)
|
||||
opts := api.ListOptions{FieldSelector: fields.OneTermEqualSelector(api.PodHostField, nodeName)}
|
||||
pods, err := kubeClient.Core().Pods(api.NamespaceAll).List(opts)
|
||||
@ -243,6 +251,23 @@ func markAllPodsNotReady(kubeClient clientset.Interface, nodeName string) error
|
||||
return fmt.Errorf("%v", strings.Join(errMsg, "; "))
|
||||
}
|
||||
|
||||
// nodeRunningOutdatedKubelet returns true if the kubeletVersion reported
|
||||
// in the nodeInfo of the given node is "outdated", meaning < 1.2.0.
|
||||
// Older versions were inflexible and modifying pod.Status directly through
|
||||
// the apiserver would result in unexpected outcomes.
|
||||
func nodeRunningOutdatedKubelet(node *api.Node) bool {
|
||||
v, err := version.Parse(node.Status.NodeInfo.KubeletVersion)
|
||||
if err != nil {
|
||||
glog.Errorf("couldn't parse version %q of node %v", node.Status.NodeInfo.KubeletVersion, err)
|
||||
return true
|
||||
}
|
||||
if podStatusReconciliationVersion.GT(v) {
|
||||
glog.Infof("Node %v running kubelet at (%v) which is less than the minimum version that allows nodecontroller to mark pods NotReady (%v).", node.Name, v, podStatusReconciliationVersion)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func nodeExistsInCloudProvider(cloud cloudprovider.Interface, nodeName string) (bool, error) {
|
||||
instances, ok := cloud.Instances()
|
||||
if !ok {
|
||||
|
@ -51,6 +51,10 @@ import (
|
||||
var (
|
||||
ErrCloudInstance = errors.New("cloud provider doesn't support instances.")
|
||||
gracefulDeletionVersion = version.MustParse("v1.1.0")
|
||||
|
||||
// The minimum kubelet version for which the nodecontroller
|
||||
// can safely flip pod.Status to NotReady.
|
||||
podStatusReconciliationVersion = version.MustParse("v1.2.0")
|
||||
)
|
||||
|
||||
const (
|
||||
@ -527,7 +531,7 @@ func (nc *NodeController) monitorNodeStatus() error {
|
||||
// Report node event.
|
||||
if currentReadyCondition.Status != api.ConditionTrue && observedReadyCondition.Status == api.ConditionTrue {
|
||||
recordNodeStatusChange(nc.recorder, node, "NodeNotReady")
|
||||
if err = markAllPodsNotReady(nc.kubeClient, node.Name); err != nil {
|
||||
if err = markAllPodsNotReady(nc.kubeClient, node); err != nil {
|
||||
utilruntime.HandleError(fmt.Errorf("Unable to mark all pods NotReady on node %v: %v", node.Name, err))
|
||||
}
|
||||
}
|
||||
|
@ -1398,6 +1398,9 @@ func TestMonitorNodeStatusMarkPodsNotReady(t *testing.T) {
|
||||
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||
},
|
||||
Status: api.NodeStatus{
|
||||
NodeInfo: api.NodeSystemInfo{
|
||||
KubeletVersion: "v1.2.0",
|
||||
},
|
||||
Conditions: []api.NodeCondition{
|
||||
{
|
||||
Type: api.NodeReady,
|
||||
@ -1428,6 +1431,9 @@ func TestMonitorNodeStatusMarkPodsNotReady(t *testing.T) {
|
||||
},
|
||||
timeToPass: 1 * time.Minute,
|
||||
newNodeStatus: api.NodeStatus{
|
||||
NodeInfo: api.NodeSystemInfo{
|
||||
KubeletVersion: "v1.2.0",
|
||||
},
|
||||
Conditions: []api.NodeCondition{
|
||||
{
|
||||
Type: api.NodeReady,
|
||||
@ -1451,6 +1457,76 @@ func TestMonitorNodeStatusMarkPodsNotReady(t *testing.T) {
|
||||
},
|
||||
expectedPodStatusUpdate: true,
|
||||
},
|
||||
// Node created long time ago, with outdated kubelet version 1.1.0 and status
|
||||
// updated by kubelet exceeds grace period. Expect no action from node controller.
|
||||
{
|
||||
fakeNodeHandler: &FakeNodeHandler{
|
||||
Existing: []*api.Node{
|
||||
{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: "node0",
|
||||
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||
},
|
||||
Status: api.NodeStatus{
|
||||
NodeInfo: api.NodeSystemInfo{
|
||||
KubeletVersion: "v1.1.0",
|
||||
},
|
||||
Conditions: []api.NodeCondition{
|
||||
{
|
||||
Type: api.NodeReady,
|
||||
Status: api.ConditionTrue,
|
||||
// Node status hasn't been updated for 1hr.
|
||||
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||
},
|
||||
{
|
||||
Type: api.NodeOutOfDisk,
|
||||
Status: api.ConditionFalse,
|
||||
// Node status hasn't been updated for 1hr.
|
||||
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||
},
|
||||
},
|
||||
Capacity: api.ResourceList{
|
||||
api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
|
||||
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
|
||||
},
|
||||
},
|
||||
Spec: api.NodeSpec{
|
||||
ExternalID: "node0",
|
||||
},
|
||||
},
|
||||
},
|
||||
Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
|
||||
},
|
||||
timeToPass: 1 * time.Minute,
|
||||
newNodeStatus: api.NodeStatus{
|
||||
NodeInfo: api.NodeSystemInfo{
|
||||
KubeletVersion: "v1.1.0",
|
||||
},
|
||||
Conditions: []api.NodeCondition{
|
||||
{
|
||||
Type: api.NodeReady,
|
||||
Status: api.ConditionTrue,
|
||||
// Node status hasn't been updated for 1hr.
|
||||
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||
},
|
||||
{
|
||||
Type: api.NodeOutOfDisk,
|
||||
Status: api.ConditionFalse,
|
||||
// Node status hasn't been updated for 1hr.
|
||||
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||
},
|
||||
},
|
||||
Capacity: api.ResourceList{
|
||||
api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
|
||||
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
|
||||
},
|
||||
},
|
||||
expectedPodStatusUpdate: false,
|
||||
},
|
||||
}
|
||||
|
||||
for i, item := range table {
|
||||
@ -1799,3 +1875,63 @@ func TestCleanupOrphanedPods(t *testing.T) {
|
||||
t.Fatalf("expected deleted pod name to be 'c', but got: %q", deletedPodName)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckNodeKubeletVersionParsing(t *testing.T) {
|
||||
tests := []struct {
|
||||
version string
|
||||
outdated bool
|
||||
}{
|
||||
{
|
||||
version: "",
|
||||
outdated: true,
|
||||
},
|
||||
{
|
||||
version: "v0.21.4",
|
||||
outdated: true,
|
||||
},
|
||||
{
|
||||
version: "v1.0.0",
|
||||
outdated: true,
|
||||
},
|
||||
{
|
||||
version: "v1.1.0",
|
||||
outdated: true,
|
||||
},
|
||||
{
|
||||
version: "v1.1.0-alpha.2.961+9d4c6846fc03b9-dirty",
|
||||
outdated: true,
|
||||
},
|
||||
{
|
||||
version: "v1.2.0",
|
||||
outdated: false,
|
||||
},
|
||||
{
|
||||
version: "v1.3.3",
|
||||
outdated: false,
|
||||
},
|
||||
{
|
||||
version: "v1.4.0-alpha.2.961+9d4c6846fc03b9-dirty",
|
||||
outdated: false,
|
||||
},
|
||||
{
|
||||
version: "v2.0.0",
|
||||
outdated: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, ov := range tests {
|
||||
n := &api.Node{
|
||||
Status: api.NodeStatus{
|
||||
NodeInfo: api.NodeSystemInfo{
|
||||
KubeletVersion: ov.version,
|
||||
},
|
||||
},
|
||||
}
|
||||
isOutdated := nodeRunningOutdatedKubelet(n)
|
||||
if ov.outdated != isOutdated {
|
||||
t.Errorf("Version %v doesn't match test expectation. Expected outdated %v got %v", n.Status.NodeInfo.KubeletVersion, ov.outdated, isOutdated)
|
||||
} else {
|
||||
t.Logf("Version %v outdated %v", ov.version, isOutdated)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user