Merge pull request #67734 from Huang-Wei/fix-nodelost-issue

Automatic merge from submit-queue (batch tested with PRs 64597, 67854, 67734, 67917, 67688). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

fix an issue that scheduling doesn't respect NodeLost status of a node

**What this PR does / why we need it**:

- if Node is in UnknowStatus, apply unreachable taint with NoSchedule effect
- some internal data structure refactoring
- update unit test

**Which issue(s) this PR fixes**:
Fixes #67733, and very likely #67536

**Special notes for your reviewer**:

See detailed reproducing steps in #67733.

**Release note**:
```release-note
Apply unreachable taint to a node when it lost network connection.
```
This commit is contained in:
Kubernetes Submit Queue 2018-08-27 22:18:12 -07:00 committed by GitHub
commit 0148f25fe7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 53 additions and 26 deletions

View File

@ -77,40 +77,35 @@ var (
Effect: v1.TaintEffectNoExecute, Effect: v1.TaintEffectNoExecute,
} }
nodeConditionToTaintKeyStatusMap = map[v1.NodeConditionType]struct { // map {NodeConditionType: {ConditionStatus: TaintKey}}
taintKey string // represents which NodeConditionType under which ConditionStatus should be
// noScheduleStatus is the condition under which the node should be tainted as not schedulable for this // tainted with which TaintKey
// NodeConditionType // for certain NodeConditionType, there are multiple {ConditionStatus,TaintKey} pairs
noScheduleStatus v1.ConditionStatus nodeConditionToTaintKeyStatusMap = map[v1.NodeConditionType]map[v1.ConditionStatus]string{
}{
v1.NodeReady: { v1.NodeReady: {
taintKey: algorithm.TaintNodeNotReady, v1.ConditionFalse: algorithm.TaintNodeNotReady,
noScheduleStatus: v1.ConditionFalse, v1.ConditionUnknown: algorithm.TaintNodeUnreachable,
}, },
v1.NodeMemoryPressure: { v1.NodeMemoryPressure: {
taintKey: algorithm.TaintNodeMemoryPressure, v1.ConditionTrue: algorithm.TaintNodeMemoryPressure,
noScheduleStatus: v1.ConditionTrue,
}, },
v1.NodeOutOfDisk: { v1.NodeOutOfDisk: {
taintKey: algorithm.TaintNodeOutOfDisk, v1.ConditionTrue: algorithm.TaintNodeOutOfDisk,
noScheduleStatus: v1.ConditionTrue,
}, },
v1.NodeDiskPressure: { v1.NodeDiskPressure: {
taintKey: algorithm.TaintNodeDiskPressure, v1.ConditionTrue: algorithm.TaintNodeDiskPressure,
noScheduleStatus: v1.ConditionTrue,
}, },
v1.NodeNetworkUnavailable: { v1.NodeNetworkUnavailable: {
taintKey: algorithm.TaintNodeNetworkUnavailable, v1.ConditionTrue: algorithm.TaintNodeNetworkUnavailable,
noScheduleStatus: v1.ConditionTrue,
}, },
v1.NodePIDPressure: { v1.NodePIDPressure: {
taintKey: algorithm.TaintNodePIDPressure, v1.ConditionTrue: algorithm.TaintNodePIDPressure,
noScheduleStatus: v1.ConditionTrue,
}, },
} }
taintKeyToNodeConditionMap = map[string]v1.NodeConditionType{ taintKeyToNodeConditionMap = map[string]v1.NodeConditionType{
algorithm.TaintNodeNotReady: v1.NodeReady, algorithm.TaintNodeNotReady: v1.NodeReady,
algorithm.TaintNodeUnreachable: v1.NodeReady,
algorithm.TaintNodeNetworkUnavailable: v1.NodeNetworkUnavailable, algorithm.TaintNodeNetworkUnavailable: v1.NodeNetworkUnavailable,
algorithm.TaintNodeMemoryPressure: v1.NodeMemoryPressure, algorithm.TaintNodeMemoryPressure: v1.NodeMemoryPressure,
algorithm.TaintNodeOutOfDisk: v1.NodeOutOfDisk, algorithm.TaintNodeOutOfDisk: v1.NodeOutOfDisk,
@ -454,10 +449,10 @@ func (nc *Controller) doNoScheduleTaintingPass(node *v1.Node) error {
// Map node's condition to Taints. // Map node's condition to Taints.
var taints []v1.Taint var taints []v1.Taint
for _, condition := range node.Status.Conditions { for _, condition := range node.Status.Conditions {
if taint, found := nodeConditionToTaintKeyStatusMap[condition.Type]; found { if taintMap, found := nodeConditionToTaintKeyStatusMap[condition.Type]; found {
if condition.Status == taint.noScheduleStatus { if taintKey, found := taintMap[condition.Status]; found {
taints = append(taints, v1.Taint{ taints = append(taints, v1.Taint{
Key: taint.taintKey, Key: taintKey,
Effect: v1.TaintEffectNoSchedule, Effect: v1.TaintEffectNoSchedule,
}) })
} }
@ -473,6 +468,10 @@ func (nc *Controller) doNoScheduleTaintingPass(node *v1.Node) error {
// Get exist taints of node. // Get exist taints of node.
nodeTaints := taintutils.TaintSetFilter(node.Spec.Taints, func(t *v1.Taint) bool { nodeTaints := taintutils.TaintSetFilter(node.Spec.Taints, func(t *v1.Taint) bool {
// only NoSchedule taints are candidates to be compared with "taints" later
if t.Effect != v1.TaintEffectNoSchedule {
return false
}
// Find unschedulable taint of node. // Find unschedulable taint of node.
if t.Key == algorithm.TaintNodeUnschedulable { if t.Key == algorithm.TaintNodeUnschedulable {
return true return true

View File

@ -2167,6 +2167,10 @@ func TestTaintsNodeByCondition(t *testing.T) {
Key: algorithm.TaintNodeNotReady, Key: algorithm.TaintNodeNotReady,
Effect: v1.TaintEffectNoSchedule, Effect: v1.TaintEffectNoSchedule,
} }
unreachableTaint := &v1.Taint{
Key: algorithm.TaintNodeUnreachable,
Effect: v1.TaintEffectNoSchedule,
}
tests := []struct { tests := []struct {
Name string Name string
@ -2299,6 +2303,30 @@ func TestTaintsNodeByCondition(t *testing.T) {
}, },
ExpectedTaints: []*v1.Taint{notReadyTaint}, ExpectedTaints: []*v1.Taint{notReadyTaint},
}, },
{
Name: "Ready is unknown",
Node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
kubeletapis.LabelZoneRegion: "region1",
kubeletapis.LabelZoneFailureDomain: "zone1",
},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
ExpectedTaints: []*v1.Taint{unreachableTaint},
},
} }
for _, test := range tests { for _, test := range tests {

View File

@ -72,15 +72,15 @@ __EOF__
__EOF__ __EOF__
# taint/untaint # taint/untaint
# Pre-condition: node has no taints # Pre-condition: node doesn't have dedicated=foo:PreferNoSchedule taint
kube::test::get_object_assert "nodes 127.0.0.1" "{{.spec.taints}}" '<no value>' kube::test::get_object_assert "nodes 127.0.0.1" '{{range .spec.taints}}{{if eq .key \"dedicated\"}}{{.key}}={{.value}}:{{.effect}}{{end}}{{end}}' "" # expect no output
# taint can add a taint # taint can add a taint
kubectl taint node 127.0.0.1 dedicated=foo:PreferNoSchedule kubectl taint node 127.0.0.1 dedicated=foo:PreferNoSchedule
kube::test::get_object_assert "nodes 127.0.0.1" '{{range .spec.taints}}{{.effect}}{{end}}' 'PreferNoSchedule' kube::test::get_object_assert "nodes 127.0.0.1" '{{range .spec.taints}}{{if eq .key \"dedicated\"}}{{.key}}={{.value}}:{{.effect}}{{end}}{{end}}' "dedicated=foo:PreferNoSchedule"
# taint can remove a taint # taint can remove a taint
kubectl taint node 127.0.0.1 dedicated- kubectl taint node 127.0.0.1 dedicated-
# Post-condition: node has no taints # Post-condition: node doesn't have dedicated=foo:PreferNoSchedule taint
kube::test::get_object_assert "nodes 127.0.0.1" "{{.spec.taints}}" '<no value>' kube::test::get_object_assert "nodes 127.0.0.1" '{{range .spec.taints}}{{if eq .key \"dedicated\"}}{{.key}}={{.value}}:{{.effect}}{{end}}{{end}}' "" # expect no output
### kubectl cordon update with --dry-run does not mark node unschedulable ### kubectl cordon update with --dry-run does not mark node unschedulable
# Pre-condition: node is schedulable # Pre-condition: node is schedulable