Merge pull request #44520 from dashpole/test_eviction_fix

Automatic merge from submit-queue (batch tested with PRs 44520, 45253, 45838, 44685, 45901)

Ensure ordering of using dynamic kubelet config and setting up tests.

This PR simply places the body of the eviction test within its own context.  This ensures that the kubelet config is set before the pods are created, and that the kubelet config is reverted only after the pods are deleted.
This commit is contained in:
Kubernetes Submit Queue 2017-05-16 21:27:54 -07:00 committed by GitHub
commit 85775105f1
2 changed files with 157 additions and 151 deletions

View File

@ -65,16 +65,22 @@ var _ = framework.KubeDescribe("AllocatableEviction [Slow] [Serial] [Disruptive]
} }
evictionTestTimeout := 40 * time.Minute evictionTestTimeout := 40 * time.Minute
testCondition := "Memory Pressure" testCondition := "Memory Pressure"
kubeletConfigUpdate := func(initialConfig *componentconfig.KubeletConfiguration) {
initialConfig.EvictionHard = "memory.available<10%" Context(fmt.Sprintf("when we run containers that should cause %s", testCondition), func() {
// Set large system and kube reserved values to trigger allocatable thresholds far before hard eviction thresholds. tempSetCurrentKubeletConfig(f, func(initialConfig *componentconfig.KubeletConfiguration) {
initialConfig.SystemReserved = componentconfig.ConfigurationMap(map[string]string{"memory": "1Gi"}) initialConfig.EvictionHard = "memory.available<10%"
initialConfig.KubeReserved = componentconfig.ConfigurationMap(map[string]string{"memory": "1Gi"}) // Set large system and kube reserved values to trigger allocatable thresholds far before hard eviction thresholds.
initialConfig.EnforceNodeAllocatable = []string{cm.NodeAllocatableEnforcementKey} initialConfig.SystemReserved = componentconfig.ConfigurationMap(map[string]string{"memory": "1Gi"})
initialConfig.ExperimentalNodeAllocatableIgnoreEvictionThreshold = false initialConfig.KubeReserved = componentconfig.ConfigurationMap(map[string]string{"memory": "1Gi"})
initialConfig.CgroupsPerQOS = true initialConfig.EnforceNodeAllocatable = []string{cm.NodeAllocatableEnforcementKey}
} initialConfig.ExperimentalNodeAllocatableIgnoreEvictionThreshold = false
runEvictionTest(f, testCondition, podTestSpecs, evictionTestTimeout, hasMemoryPressure, kubeletConfigUpdate) initialConfig.CgroupsPerQOS = true
})
// Place the remainder of the test within a context so that the kubelet config is set before and after the test.
Context("With kubeconfig updated", func() {
runEvictionTest(f, testCondition, podTestSpecs, evictionTestTimeout, hasMemoryPressure)
})
})
}) })
// Returns TRUE if the node has Memory Pressure, FALSE otherwise // Returns TRUE if the node has Memory Pressure, FALSE otherwise

View File

@ -114,11 +114,16 @@ var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive] [Flak
} }
evictionTestTimeout := 30 * time.Minute evictionTestTimeout := 30 * time.Minute
testCondition := "Disk Pressure due to Inodes" testCondition := "Disk Pressure due to Inodes"
kubeletConfigUpdate := func(initialConfig *componentconfig.KubeletConfiguration) {
initialConfig.EvictionHard = "nodefs.inodesFree<50%"
}
runEvictionTest(f, testCondition, podTestSpecs, evictionTestTimeout, hasInodePressure, kubeletConfigUpdate) Context(fmt.Sprintf("when we run containers that should cause %s", testCondition), func() {
tempSetCurrentKubeletConfig(f, func(initialConfig *componentconfig.KubeletConfiguration) {
initialConfig.EvictionHard = "nodefs.inodesFree<50%"
})
// Place the remainder of the test within a context so that the kubelet config is set before and after the test.
Context("With kubeconfig updated", func() {
runEvictionTest(f, testCondition, podTestSpecs, evictionTestTimeout, hasInodePressure)
})
})
}) })
// Struct used by runEvictionTest that specifies the pod, and when that pod should be evicted, relative to other pods // Struct used by runEvictionTest that specifies the pod, and when that pod should be evicted, relative to other pods
@ -137,171 +142,166 @@ type podTestSpec struct {
// It ensures that all lower evictionPriority pods are eventually evicted. // It ensures that all lower evictionPriority pods are eventually evicted.
// runEvictionTest then cleans up the testing environment by deleting provided nodes, and ensures that testCondition no longer exists // runEvictionTest then cleans up the testing environment by deleting provided nodes, and ensures that testCondition no longer exists
func runEvictionTest(f *framework.Framework, testCondition string, podTestSpecs []podTestSpec, evictionTestTimeout time.Duration, func runEvictionTest(f *framework.Framework, testCondition string, podTestSpecs []podTestSpec, evictionTestTimeout time.Duration,
hasPressureCondition func(*framework.Framework, string) (bool, error), updateFunction func(initialConfig *componentconfig.KubeletConfiguration)) { hasPressureCondition func(*framework.Framework, string) (bool, error)) {
BeforeEach(func() {
By("seting up pods to be used by tests")
for _, spec := range podTestSpecs {
By(fmt.Sprintf("creating pod with container: %s", spec.pod.Name))
f.PodClient().CreateSync(&spec.pod)
}
})
Context(fmt.Sprintf("when we run containers that should cause %s", testCondition), func() { It(fmt.Sprintf("should eventually see %s, and then evict all of the correct pods", testCondition), func() {
configEnabled, err := isKubeletConfigEnabled(f)
tempSetCurrentKubeletConfig(f, updateFunction) framework.ExpectNoError(err)
BeforeEach(func() { if !configEnabled {
By("seting up pods to be used by tests") framework.Skipf("Dynamic kubelet config must be enabled for this test to run.")
for _, spec := range podTestSpecs { }
By(fmt.Sprintf("creating pod with container: %s", spec.pod.Name)) Eventually(func() error {
f.PodClient().CreateSync(&spec.pod) hasPressure, err := hasPressureCondition(f, testCondition)
if err != nil {
return err
} }
}) if hasPressure {
return nil
It(fmt.Sprintf("should eventually see %s, and then evict all of the correct pods", testCondition), func() {
configEnabled, err := isKubeletConfigEnabled(f)
framework.ExpectNoError(err)
if !configEnabled {
framework.Skipf("Dynamic kubelet config must be enabled for this test to run.")
} }
Eventually(func() error { return fmt.Errorf("Condition: %s not encountered", testCondition)
hasPressure, err := hasPressureCondition(f, testCondition) }, evictionTestTimeout, evictionPollInterval).Should(BeNil())
if err != nil {
return err
}
if hasPressure {
return nil
}
return fmt.Errorf("Condition: %s not encountered", testCondition)
}, evictionTestTimeout, evictionPollInterval).Should(BeNil())
Eventually(func() error { Eventually(func() error {
// Gather current information // Gather current information
updatedPodList, err := f.ClientSet.Core().Pods(f.Namespace.Name).List(metav1.ListOptions{}) updatedPodList, err := f.ClientSet.Core().Pods(f.Namespace.Name).List(metav1.ListOptions{})
updatedPods := updatedPodList.Items updatedPods := updatedPodList.Items
for _, p := range updatedPods {
framework.Logf("fetching pod %s; phase= %v", p.Name, p.Status.Phase)
}
_, err = hasPressureCondition(f, testCondition)
if err != nil {
return err
}
By("checking eviction ordering and ensuring important pods dont fail")
done := true
for _, priorityPodSpec := range podTestSpecs {
var priorityPod v1.Pod
for _, p := range updatedPods { for _, p := range updatedPods {
framework.Logf("fetching pod %s; phase= %v", p.Name, p.Status.Phase) if p.Name == priorityPodSpec.pod.Name {
} priorityPod = p
_, err = hasPressureCondition(f, testCondition) }
if err != nil {
return err
} }
Expect(priorityPod).NotTo(BeNil())
By("checking eviction ordering and ensuring important pods dont fail") // Check eviction ordering.
done := true // Note: it is alright for a priority 1 and priority 2 pod (for example) to fail in the same round
for _, priorityPodSpec := range podTestSpecs { for _, lowPriorityPodSpec := range podTestSpecs {
var priorityPod v1.Pod var lowPriorityPod v1.Pod
for _, p := range updatedPods { for _, p := range updatedPods {
if p.Name == priorityPodSpec.pod.Name { if p.Name == lowPriorityPodSpec.pod.Name {
priorityPod = p lowPriorityPod = p
} }
} }
Expect(priorityPod).NotTo(BeNil()) Expect(lowPriorityPod).NotTo(BeNil())
if priorityPodSpec.evictionPriority < lowPriorityPodSpec.evictionPriority && lowPriorityPod.Status.Phase == v1.PodRunning {
// Check eviction ordering.
// Note: it is alright for a priority 1 and priority 2 pod (for example) to fail in the same round
for _, lowPriorityPodSpec := range podTestSpecs {
var lowPriorityPod v1.Pod
for _, p := range updatedPods {
if p.Name == lowPriorityPodSpec.pod.Name {
lowPriorityPod = p
}
}
Expect(lowPriorityPod).NotTo(BeNil())
if priorityPodSpec.evictionPriority < lowPriorityPodSpec.evictionPriority && lowPriorityPod.Status.Phase == v1.PodRunning {
Expect(priorityPod.Status.Phase).NotTo(Equal(v1.PodFailed),
fmt.Sprintf("%s pod failed before %s pod", priorityPodSpec.pod.Name, lowPriorityPodSpec.pod.Name))
}
}
// EvictionPriority 0 pods should not fail
if priorityPodSpec.evictionPriority == 0 {
Expect(priorityPod.Status.Phase).NotTo(Equal(v1.PodFailed), Expect(priorityPod.Status.Phase).NotTo(Equal(v1.PodFailed),
fmt.Sprintf("%s pod failed (and shouldn't have failed)", priorityPod.Name)) fmt.Sprintf("%s pod failed before %s pod", priorityPodSpec.pod.Name, lowPriorityPodSpec.pod.Name))
}
// If a pod that is not evictionPriority 0 has not been evicted, we are not done
if priorityPodSpec.evictionPriority != 0 && priorityPod.Status.Phase != v1.PodFailed {
done = false
} }
} }
if done {
return nil
}
return fmt.Errorf("pods that caused %s have not been evicted.", testCondition)
}, evictionTestTimeout, evictionPollInterval).Should(BeNil())
// We observe pressure from the API server. The eviction manager observes pressure from the kubelet internal stats. // EvictionPriority 0 pods should not fail
// This means the eviction manager will observe pressure before we will, creating a delay between when the eviction manager if priorityPodSpec.evictionPriority == 0 {
// evicts a pod, and when we observe the pressure by querrying the API server. Add a delay here to account for this delay Expect(priorityPod.Status.Phase).NotTo(Equal(v1.PodFailed),
By("making sure pressure from test has surfaced before continuing") fmt.Sprintf("%s pod failed (and shouldn't have failed)", priorityPod.Name))
time.Sleep(pressureDelay) }
By("making sure conditions eventually return to normal") // If a pod that is not evictionPriority 0 has not been evicted, we are not done
Eventually(func() error { if priorityPodSpec.evictionPriority != 0 && priorityPod.Status.Phase != v1.PodFailed {
hasPressure, err := hasPressureCondition(f, testCondition) done = false
if err != nil {
return err
}
if hasPressure {
return fmt.Errorf("Conditions havent returned to normal, we still have %s", testCondition)
} }
}
if done {
return nil return nil
}, evictionTestTimeout, evictionPollInterval).Should(BeNil()) }
return fmt.Errorf("pods that caused %s have not been evicted.", testCondition)
}, evictionTestTimeout, evictionPollInterval).Should(BeNil())
By("making sure conditions do not return, and that pods that shouldnt fail dont fail") // We observe pressure from the API server. The eviction manager observes pressure from the kubelet internal stats.
Consistently(func() error { // This means the eviction manager will observe pressure before we will, creating a delay between when the eviction manager
hasPressure, err := hasPressureCondition(f, testCondition) // evicts a pod, and when we observe the pressure by querrying the API server. Add a delay here to account for this delay
if err != nil { By("making sure pressure from test has surfaced before continuing")
// Race conditions sometimes occur when checking pressure condition due to #38710 (Docker bug) time.Sleep(pressureDelay)
// Do not fail the test when this occurs, since this is expected to happen occasionally.
framework.Logf("Failed to check pressure condition. Error: %v", err) By("making sure conditions eventually return to normal")
return nil Eventually(func() error {
} hasPressure, err := hasPressureCondition(f, testCondition)
if hasPressure { if err != nil {
return fmt.Errorf("%s dissappeared and then reappeared", testCondition) return err
} }
// Gather current information if hasPressure {
updatedPodList, _ := f.ClientSet.Core().Pods(f.Namespace.Name).List(metav1.ListOptions{}) return fmt.Errorf("Conditions havent returned to normal, we still have %s", testCondition)
for _, priorityPodSpec := range podTestSpecs { }
// EvictionPriority 0 pods should not fail return nil
if priorityPodSpec.evictionPriority == 0 { }, evictionTestTimeout, evictionPollInterval).Should(BeNil())
for _, p := range updatedPodList.Items {
if p.Name == priorityPodSpec.pod.Name && p.Status.Phase == v1.PodFailed { By("making sure conditions do not return, and that pods that shouldnt fail dont fail")
return fmt.Errorf("%s pod failed (delayed) and shouldn't have failed", p.Name) Consistently(func() error {
} hasPressure, err := hasPressureCondition(f, testCondition)
if err != nil {
// Race conditions sometimes occur when checking pressure condition due to #38710 (Docker bug)
// Do not fail the test when this occurs, since this is expected to happen occasionally.
framework.Logf("Failed to check pressure condition. Error: %v", err)
return nil
}
if hasPressure {
return fmt.Errorf("%s dissappeared and then reappeared", testCondition)
}
// Gather current information
updatedPodList, _ := f.ClientSet.Core().Pods(f.Namespace.Name).List(metav1.ListOptions{})
for _, priorityPodSpec := range podTestSpecs {
// EvictionPriority 0 pods should not fail
if priorityPodSpec.evictionPriority == 0 {
for _, p := range updatedPodList.Items {
if p.Name == priorityPodSpec.pod.Name && p.Status.Phase == v1.PodFailed {
return fmt.Errorf("%s pod failed (delayed) and shouldn't have failed", p.Name)
} }
} }
} }
return nil }
}, postTestConditionMonitoringPeriod, evictionPollInterval).Should(BeNil()) return nil
}, postTestConditionMonitoringPeriod, evictionPollInterval).Should(BeNil())
By("making sure we can start a new pod after the test") By("making sure we can start a new pod after the test")
podName := "test-admit-pod" podName := "test-admit-pod"
f.PodClient().CreateSync(&v1.Pod{ f.PodClient().CreateSync(&v1.Pod{
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
Name: podName, Name: podName,
}, },
Spec: v1.PodSpec{ Spec: v1.PodSpec{
RestartPolicy: v1.RestartPolicyNever, RestartPolicy: v1.RestartPolicyNever,
Containers: []v1.Container{ Containers: []v1.Container{
{ {
Image: framework.GetPauseImageNameForHostArch(), Image: framework.GetPauseImageNameForHostArch(),
Name: podName, Name: podName,
},
}, },
}, },
}) },
}) })
})
AfterEach(func() { AfterEach(func() {
By("deleting pods") By("deleting pods")
for _, spec := range podTestSpecs { for _, spec := range podTestSpecs {
By(fmt.Sprintf("deleting pod: %s", spec.pod.Name)) By(fmt.Sprintf("deleting pod: %s", spec.pod.Name))
f.PodClient().DeleteSync(spec.pod.Name, &metav1.DeleteOptions{}, framework.DefaultPodDeletionTimeout) f.PodClient().DeleteSync(spec.pod.Name, &metav1.DeleteOptions{}, framework.DefaultPodDeletionTimeout)
} }
if CurrentGinkgoTestDescription().Failed { if CurrentGinkgoTestDescription().Failed {
if framework.TestContext.DumpLogsOnFailure { if framework.TestContext.DumpLogsOnFailure {
logPodEvents(f) logPodEvents(f)
logNodeEvents(f) logNodeEvents(f)
}
By("sleeping to allow for cleanup of test")
time.Sleep(postTestConditionMonitoringPeriod)
} }
}) By("sleeping to allow for cleanup of test")
time.Sleep(postTestConditionMonitoringPeriod)
}
}) })
} }