e2e: move funs of framework/gpu to e2e_node

This commit is contained in:
Haosdent Huang 2019-12-09 16:42:05 +08:00
parent ded2ff39c3
commit 973fddd155
3 changed files with 34 additions and 46 deletions

View File

@ -5,12 +5,6 @@ go_library(
srcs = ["gpu_util.go"], srcs = ["gpu_util.go"],
importpath = "k8s.io/kubernetes/test/e2e/framework/gpu", importpath = "k8s.io/kubernetes/test/e2e/framework/gpu",
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
deps = [
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/uuid:go_default_library",
"//test/e2e/framework:go_default_library",
],
) )
filegroup( filegroup(

View File

@ -16,13 +16,6 @@ limitations under the License.
package gpu package gpu
import (
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/uuid"
"k8s.io/kubernetes/test/e2e/framework"
)
const ( const (
// NVIDIAGPUResourceName is the extended name of the GPU resource since v1.8 // NVIDIAGPUResourceName is the extended name of the GPU resource since v1.8
// this uses the device plugin mechanism // this uses the device plugin mechanism
@ -33,31 +26,3 @@ const (
// so we can override the daemonset in other setups (non COS). // so we can override the daemonset in other setups (non COS).
GPUDevicePluginDSYAML = "https://raw.githubusercontent.com/kubernetes/kubernetes/master/cluster/addons/device-plugins/nvidia-gpu/daemonset.yaml" GPUDevicePluginDSYAML = "https://raw.githubusercontent.com/kubernetes/kubernetes/master/cluster/addons/device-plugins/nvidia-gpu/daemonset.yaml"
) )
// NumberOfNVIDIAGPUs returns the number of GPUs advertised by a node
// This is based on the Device Plugin system and expected to run on a COS based node
// After the NVIDIA drivers were installed
// TODO make this generic and not linked to COS only
func NumberOfNVIDIAGPUs(node *v1.Node) int64 {
val, ok := node.Status.Capacity[NVIDIAGPUResourceName]
if !ok {
return 0
}
return val.Value()
}
// NVIDIADevicePlugin returns the official Google Device Plugin pod for NVIDIA GPU in GKE
func NVIDIADevicePlugin() *v1.Pod {
ds, err := framework.DsFromManifest(GPUDevicePluginDSYAML)
framework.ExpectNoError(err)
p := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "device-plugin-nvidia-gpu-" + string(uuid.NewUUID()),
Namespace: metav1.NamespaceSystem,
},
Spec: ds.Spec.Template.Spec,
}
// Remove node affinity
p.Spec.Affinity = nil
return p
}

View File

@ -23,6 +23,7 @@ import (
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/uuid"
kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics" kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
"k8s.io/kubernetes/test/e2e/framework" "k8s.io/kubernetes/test/e2e/framework"
"k8s.io/kubernetes/test/e2e/framework/gpu" "k8s.io/kubernetes/test/e2e/framework/gpu"
@ -33,6 +34,34 @@ import (
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
) )
// numberOfNVIDIAGPUs returns the number of GPUs advertised by a node
// This is based on the Device Plugin system and expected to run on a COS based node
// After the NVIDIA drivers were installed
// TODO make this generic and not linked to COS only
func numberOfNVIDIAGPUs(node *v1.Node) int64 {
val, ok := node.Status.Capacity[gpu.NVIDIAGPUResourceName]
if !ok {
return 0
}
return val.Value()
}
// NVIDIADevicePlugin returns the official Google Device Plugin pod for NVIDIA GPU in GKE
func NVIDIADevicePlugin() *v1.Pod {
ds, err := framework.DsFromManifest(gpu.GPUDevicePluginDSYAML)
framework.ExpectNoError(err)
p := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "device-plugin-nvidia-gpu-" + string(uuid.NewUUID()),
Namespace: metav1.NamespaceSystem,
},
Spec: ds.Spec.Template.Spec,
}
// Remove node affinity
p.Spec.Affinity = nil
return p
}
// Serial because the test restarts Kubelet // Serial because the test restarts Kubelet
var _ = framework.KubeDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugin][NodeFeature:GPUDevicePlugin][Serial] [Disruptive]", func() { var _ = framework.KubeDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugin][NodeFeature:GPUDevicePlugin][Serial] [Disruptive]", func() {
f := framework.NewDefaultFramework("device-plugin-gpus-errors") f := framework.NewDefaultFramework("device-plugin-gpus-errors")
@ -47,15 +76,15 @@ var _ = framework.KubeDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugi
} }
ginkgo.By("Creating the Google Device Plugin pod for NVIDIA GPU in GKE") ginkgo.By("Creating the Google Device Plugin pod for NVIDIA GPU in GKE")
devicePluginPod, err = f.ClientSet.CoreV1().Pods(metav1.NamespaceSystem).Create(gpu.NVIDIADevicePlugin()) devicePluginPod, err = f.ClientSet.CoreV1().Pods(metav1.NamespaceSystem).Create(NVIDIADevicePlugin())
framework.ExpectNoError(err) framework.ExpectNoError(err)
ginkgo.By("Waiting for GPUs to become available on the local node") ginkgo.By("Waiting for GPUs to become available on the local node")
gomega.Eventually(func() bool { gomega.Eventually(func() bool {
return gpu.NumberOfNVIDIAGPUs(getLocalNode(f)) > 0 return numberOfNVIDIAGPUs(getLocalNode(f)) > 0
}, 5*time.Minute, framework.Poll).Should(gomega.BeTrue()) }, 5*time.Minute, framework.Poll).Should(gomega.BeTrue())
if gpu.NumberOfNVIDIAGPUs(getLocalNode(f)) < 2 { if numberOfNVIDIAGPUs(getLocalNode(f)) < 2 {
ginkgo.Skip("Not enough GPUs to execute this test (at least two needed)") ginkgo.Skip("Not enough GPUs to execute this test (at least two needed)")
} }
}) })
@ -95,7 +124,7 @@ var _ = framework.KubeDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugi
restartKubelet() restartKubelet()
framework.WaitForAllNodesSchedulable(f.ClientSet, framework.TestContext.NodeSchedulableTimeout) framework.WaitForAllNodesSchedulable(f.ClientSet, framework.TestContext.NodeSchedulableTimeout)
gomega.Eventually(func() bool { gomega.Eventually(func() bool {
return gpu.NumberOfNVIDIAGPUs(getLocalNode(f)) > 0 return numberOfNVIDIAGPUs(getLocalNode(f)) > 0
}, 5*time.Minute, framework.Poll).Should(gomega.BeTrue()) }, 5*time.Minute, framework.Poll).Should(gomega.BeTrue())
p2 := f.PodClient().CreateSync(makeBusyboxPod(gpu.NVIDIAGPUResourceName, podRECMD)) p2 := f.PodClient().CreateSync(makeBusyboxPod(gpu.NVIDIAGPUResourceName, podRECMD))
@ -110,7 +139,7 @@ var _ = framework.KubeDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugi
gomega.Eventually(func() bool { gomega.Eventually(func() bool {
node, err := f.ClientSet.CoreV1().Nodes().Get(framework.TestContext.NodeName, metav1.GetOptions{}) node, err := f.ClientSet.CoreV1().Nodes().Get(framework.TestContext.NodeName, metav1.GetOptions{})
framework.ExpectNoError(err) framework.ExpectNoError(err)
return gpu.NumberOfNVIDIAGPUs(node) <= 0 return numberOfNVIDIAGPUs(node) <= 0
}, 10*time.Minute, framework.Poll).Should(gomega.BeTrue()) }, 10*time.Minute, framework.Poll).Should(gomega.BeTrue())
ginkgo.By("Checking that scheduled pods can continue to run even after we delete device plugin.") ginkgo.By("Checking that scheduled pods can continue to run even after we delete device plugin.")
ensurePodContainerRestart(f, p1.Name, p1.Name) ensurePodContainerRestart(f, p1.Name, p1.Name)