Merge pull request #67734 from Huang-Wei/fix-nodelost-issue

Automatic merge from submit-queue (batch tested with PRs 64597, 67854, 67734, 67917, 67688). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

fix an issue that scheduling doesn't respect NodeLost status of a node

**What this PR does / why we need it**:

- if Node is in UnknowStatus, apply unreachable taint with NoSchedule effect
- some internal data structure refactoring
- update unit test

**Which issue(s) this PR fixes**:
Fixes #67733, and very likely #67536

**Special notes for your reviewer**:

See detailed reproducing steps in #67733.

**Release note**:
```release-note
Apply unreachable taint to a node when it lost network connection.
```
This commit is contained in:
Kubernetes Submit Queue 2018-08-27 22:18:12 -07:00 committed by GitHub
commit 0148f25fe7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 53 additions and 26 deletions

View File

@ -77,40 +77,35 @@ var (
Effect: v1.TaintEffectNoExecute,
}
nodeConditionToTaintKeyStatusMap = map[v1.NodeConditionType]struct {
taintKey string
// noScheduleStatus is the condition under which the node should be tainted as not schedulable for this
// NodeConditionType
noScheduleStatus v1.ConditionStatus
}{
// map {NodeConditionType: {ConditionStatus: TaintKey}}
// represents which NodeConditionType under which ConditionStatus should be
// tainted with which TaintKey
// for certain NodeConditionType, there are multiple {ConditionStatus,TaintKey} pairs
nodeConditionToTaintKeyStatusMap = map[v1.NodeConditionType]map[v1.ConditionStatus]string{
v1.NodeReady: {
taintKey: algorithm.TaintNodeNotReady,
noScheduleStatus: v1.ConditionFalse,
v1.ConditionFalse: algorithm.TaintNodeNotReady,
v1.ConditionUnknown: algorithm.TaintNodeUnreachable,
},
v1.NodeMemoryPressure: {
taintKey: algorithm.TaintNodeMemoryPressure,
noScheduleStatus: v1.ConditionTrue,
v1.ConditionTrue: algorithm.TaintNodeMemoryPressure,
},
v1.NodeOutOfDisk: {
taintKey: algorithm.TaintNodeOutOfDisk,
noScheduleStatus: v1.ConditionTrue,
v1.ConditionTrue: algorithm.TaintNodeOutOfDisk,
},
v1.NodeDiskPressure: {
taintKey: algorithm.TaintNodeDiskPressure,
noScheduleStatus: v1.ConditionTrue,
v1.ConditionTrue: algorithm.TaintNodeDiskPressure,
},
v1.NodeNetworkUnavailable: {
taintKey: algorithm.TaintNodeNetworkUnavailable,
noScheduleStatus: v1.ConditionTrue,
v1.ConditionTrue: algorithm.TaintNodeNetworkUnavailable,
},
v1.NodePIDPressure: {
taintKey: algorithm.TaintNodePIDPressure,
noScheduleStatus: v1.ConditionTrue,
v1.ConditionTrue: algorithm.TaintNodePIDPressure,
},
}
taintKeyToNodeConditionMap = map[string]v1.NodeConditionType{
algorithm.TaintNodeNotReady: v1.NodeReady,
algorithm.TaintNodeUnreachable: v1.NodeReady,
algorithm.TaintNodeNetworkUnavailable: v1.NodeNetworkUnavailable,
algorithm.TaintNodeMemoryPressure: v1.NodeMemoryPressure,
algorithm.TaintNodeOutOfDisk: v1.NodeOutOfDisk,
@ -454,10 +449,10 @@ func (nc *Controller) doNoScheduleTaintingPass(node *v1.Node) error {
// Map node's condition to Taints.
var taints []v1.Taint
for _, condition := range node.Status.Conditions {
if taint, found := nodeConditionToTaintKeyStatusMap[condition.Type]; found {
if condition.Status == taint.noScheduleStatus {
if taintMap, found := nodeConditionToTaintKeyStatusMap[condition.Type]; found {
if taintKey, found := taintMap[condition.Status]; found {
taints = append(taints, v1.Taint{
Key: taint.taintKey,
Key: taintKey,
Effect: v1.TaintEffectNoSchedule,
})
}
@ -473,6 +468,10 @@ func (nc *Controller) doNoScheduleTaintingPass(node *v1.Node) error {
// Get exist taints of node.
nodeTaints := taintutils.TaintSetFilter(node.Spec.Taints, func(t *v1.Taint) bool {
// only NoSchedule taints are candidates to be compared with "taints" later
if t.Effect != v1.TaintEffectNoSchedule {
return false
}
// Find unschedulable taint of node.
if t.Key == algorithm.TaintNodeUnschedulable {
return true

View File

@ -2167,6 +2167,10 @@ func TestTaintsNodeByCondition(t *testing.T) {
Key: algorithm.TaintNodeNotReady,
Effect: v1.TaintEffectNoSchedule,
}
unreachableTaint := &v1.Taint{
Key: algorithm.TaintNodeUnreachable,
Effect: v1.TaintEffectNoSchedule,
}
tests := []struct {
Name string
@ -2299,6 +2303,30 @@ func TestTaintsNodeByCondition(t *testing.T) {
},
ExpectedTaints: []*v1.Taint{notReadyTaint},
},
{
Name: "Ready is unknown",
Node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
kubeletapis.LabelZoneRegion: "region1",
kubeletapis.LabelZoneFailureDomain: "zone1",
},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
ExpectedTaints: []*v1.Taint{unreachableTaint},
},
}
for _, test := range tests {

View File

@ -72,15 +72,15 @@ __EOF__
__EOF__
# taint/untaint
# Pre-condition: node has no taints
kube::test::get_object_assert "nodes 127.0.0.1" "{{.spec.taints}}" '<no value>'
# Pre-condition: node doesn't have dedicated=foo:PreferNoSchedule taint
kube::test::get_object_assert "nodes 127.0.0.1" '{{range .spec.taints}}{{if eq .key \"dedicated\"}}{{.key}}={{.value}}:{{.effect}}{{end}}{{end}}' "" # expect no output
# taint can add a taint
kubectl taint node 127.0.0.1 dedicated=foo:PreferNoSchedule
kube::test::get_object_assert "nodes 127.0.0.1" '{{range .spec.taints}}{{.effect}}{{end}}' 'PreferNoSchedule'
kube::test::get_object_assert "nodes 127.0.0.1" '{{range .spec.taints}}{{if eq .key \"dedicated\"}}{{.key}}={{.value}}:{{.effect}}{{end}}{{end}}' "dedicated=foo:PreferNoSchedule"
# taint can remove a taint
kubectl taint node 127.0.0.1 dedicated-
# Post-condition: node has no taints
kube::test::get_object_assert "nodes 127.0.0.1" "{{.spec.taints}}" '<no value>'
# Post-condition: node doesn't have dedicated=foo:PreferNoSchedule taint
kube::test::get_object_assert "nodes 127.0.0.1" '{{range .spec.taints}}{{if eq .key \"dedicated\"}}{{.key}}={{.value}}:{{.effect}}{{end}}{{end}}' "" # expect no output
### kubectl cordon update with --dry-run does not mark node unschedulable
# Pre-condition: node is schedulable