Merge pull request #76400 from jiatongw/gpu

[e2e] Move gpu_util.go to e2e/framework/gpu
This commit is contained in:
Kubernetes Prow Robot 2019-04-12 00:41:28 -07:00 committed by GitHub
commit daf75a0ad5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 58 additions and 19 deletions

View File

@ -14,7 +14,6 @@ go_library(
"framework.go", "framework.go",
"get-kubemark-resource-usage.go", "get-kubemark-resource-usage.go",
"google_compute.go", "google_compute.go",
"gpu_util.go",
"jobs_util.go", "jobs_util.go",
"kubelet_stats.go", "kubelet_stats.go",
"log_size_monitoring.go", "log_size_monitoring.go",
@ -156,6 +155,7 @@ filegroup(
":package-srcs", ":package-srcs",
"//test/e2e/framework/config:all-srcs", "//test/e2e/framework/config:all-srcs",
"//test/e2e/framework/ginkgowrapper:all-srcs", "//test/e2e/framework/ginkgowrapper:all-srcs",
"//test/e2e/framework/gpu:all-srcs",
"//test/e2e/framework/ingress:all-srcs", "//test/e2e/framework/ingress:all-srcs",
"//test/e2e/framework/metrics:all-srcs", "//test/e2e/framework/metrics:all-srcs",
"//test/e2e/framework/podlogs:all-srcs", "//test/e2e/framework/podlogs:all-srcs",

View File

@ -0,0 +1,29 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "go_default_library",
srcs = ["gpu_util.go"],
importpath = "k8s.io/kubernetes/test/e2e/framework/gpu",
visibility = ["//visibility:public"],
deps = [
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/uuid:go_default_library",
"//test/e2e/framework:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@ -14,13 +14,14 @@ See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
*/ */
package framework package gpu
import ( import (
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/uuid" "k8s.io/apimachinery/pkg/util/uuid"
"k8s.io/klog" "k8s.io/klog"
"k8s.io/kubernetes/test/e2e/framework"
) )
const ( const (
@ -48,8 +49,8 @@ func NumberOfNVIDIAGPUs(node *v1.Node) int64 {
// NVIDIADevicePlugin returns the official Google Device Plugin pod for NVIDIA GPU in GKE // NVIDIADevicePlugin returns the official Google Device Plugin pod for NVIDIA GPU in GKE
func NVIDIADevicePlugin() *v1.Pod { func NVIDIADevicePlugin() *v1.Pod {
ds, err := DsFromManifest(GPUDevicePluginDSYAML) ds, err := framework.DsFromManifest(GPUDevicePluginDSYAML)
ExpectNoError(err) framework.ExpectNoError(err)
p := &v1.Pod{ p := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
Name: "device-plugin-nvidia-gpu-" + string(uuid.NewUUID()), Name: "device-plugin-nvidia-gpu-" + string(uuid.NewUUID()),
@ -64,7 +65,7 @@ func NVIDIADevicePlugin() *v1.Pod {
// GetGPUDevicePluginImage returns the image of GPU device plugin. // GetGPUDevicePluginImage returns the image of GPU device plugin.
func GetGPUDevicePluginImage() string { func GetGPUDevicePluginImage() string {
ds, err := DsFromManifest(GPUDevicePluginDSYAML) ds, err := framework.DsFromManifest(GPUDevicePluginDSYAML)
if err != nil { if err != nil {
klog.Errorf("Failed to parse the device plugin image: %v", err) klog.Errorf("Failed to parse the device plugin image: %v", err)
return "" return ""

View File

@ -37,6 +37,7 @@ go_library(
"//test/e2e/common:go_default_library", "//test/e2e/common:go_default_library",
"//test/e2e/framework:go_default_library", "//test/e2e/framework:go_default_library",
"//test/e2e/framework/config:go_default_library", "//test/e2e/framework/config:go_default_library",
"//test/e2e/framework/gpu:go_default_library",
"//test/e2e/framework/metrics:go_default_library", "//test/e2e/framework/metrics:go_default_library",
"//test/e2e/instrumentation/common:go_default_library", "//test/e2e/instrumentation/common:go_default_library",
"//test/e2e/scheduling:go_default_library", "//test/e2e/scheduling:go_default_library",

View File

@ -24,11 +24,12 @@ import (
"github.com/onsi/ginkgo" "github.com/onsi/ginkgo"
"golang.org/x/oauth2/google" "golang.org/x/oauth2/google"
gcm "google.golang.org/api/monitoring/v3" gcm "google.golang.org/api/monitoring/v3"
"k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait" "k8s.io/apimachinery/pkg/util/wait"
"k8s.io/kubernetes/test/e2e/framework" "k8s.io/kubernetes/test/e2e/framework"
"k8s.io/kubernetes/test/e2e/framework/gpu"
instrumentation "k8s.io/kubernetes/test/e2e/instrumentation/common" instrumentation "k8s.io/kubernetes/test/e2e/instrumentation/common"
"k8s.io/kubernetes/test/e2e/scheduling" "k8s.io/kubernetes/test/e2e/scheduling"
"k8s.io/kubernetes/test/utils/image" "k8s.io/kubernetes/test/utils/image"
@ -88,7 +89,7 @@ func testStackdriverAcceleratorMonitoring(f *framework.Framework) {
Args: []string{"nvidia-smi && sleep infinity"}, Args: []string{"nvidia-smi && sleep infinity"},
Resources: v1.ResourceRequirements{ Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{ Limits: v1.ResourceList{
framework.NVIDIAGPUResourceName: *resource.NewQuantity(1, resource.DecimalSI), gpu.NVIDIAGPUResourceName: *resource.NewQuantity(1, resource.DecimalSI),
}, },
}, },
}, },

View File

@ -44,6 +44,7 @@ go_library(
"//staging/src/k8s.io/client-go/tools/cache:go_default_library", "//staging/src/k8s.io/client-go/tools/cache:go_default_library",
"//test/e2e/common:go_default_library", "//test/e2e/common:go_default_library",
"//test/e2e/framework:go_default_library", "//test/e2e/framework:go_default_library",
"//test/e2e/framework/gpu:go_default_library",
"//test/e2e/framework/providers/gce:go_default_library", "//test/e2e/framework/providers/gce:go_default_library",
"//test/utils:go_default_library", "//test/utils:go_default_library",
"//test/utils/image:go_default_library", "//test/utils/image:go_default_library",

View File

@ -20,12 +20,13 @@ import (
"os" "os"
"time" "time"
"k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/uuid" "k8s.io/apimachinery/pkg/util/uuid"
extensionsinternal "k8s.io/kubernetes/pkg/apis/extensions" extensionsinternal "k8s.io/kubernetes/pkg/apis/extensions"
"k8s.io/kubernetes/test/e2e/framework" "k8s.io/kubernetes/test/e2e/framework"
"k8s.io/kubernetes/test/e2e/framework/gpu"
imageutils "k8s.io/kubernetes/test/utils/image" imageutils "k8s.io/kubernetes/test/utils/image"
. "github.com/onsi/ginkgo" . "github.com/onsi/ginkgo"
@ -114,7 +115,7 @@ func SetupNVIDIAGPUNode(f *framework.Framework, setupResourceGatherer bool) *fra
} else { } else {
dsYamlUrl = "https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/daemonset.yaml" dsYamlUrl = "https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/daemonset.yaml"
} }
gpuResourceName = framework.NVIDIAGPUResourceName gpuResourceName = gpu.NVIDIAGPUResourceName
framework.Logf("Using %v", dsYamlUrl) framework.Logf("Using %v", dsYamlUrl)
// Creates the DaemonSet that installs Nvidia Drivers. // Creates the DaemonSet that installs Nvidia Drivers.

View File

@ -37,6 +37,7 @@ go_library(
"//staging/src/k8s.io/client-go/kubernetes:go_default_library", "//staging/src/k8s.io/client-go/kubernetes:go_default_library",
"//test/e2e/common:go_default_library", "//test/e2e/common:go_default_library",
"//test/e2e/framework:go_default_library", "//test/e2e/framework:go_default_library",
"//test/e2e/framework/gpu:go_default_library",
"//test/e2e/framework/testfiles:go_default_library", "//test/e2e/framework/testfiles:go_default_library",
"//test/e2e/scheduling:go_default_library", "//test/e2e/scheduling:go_default_library",
"//test/utils/image:go_default_library", "//test/utils/image:go_default_library",

View File

@ -23,6 +23,7 @@ import (
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/api/resource"
"k8s.io/kubernetes/test/e2e/framework" "k8s.io/kubernetes/test/e2e/framework"
"k8s.io/kubernetes/test/e2e/framework/gpu"
"k8s.io/kubernetes/test/e2e/scheduling" "k8s.io/kubernetes/test/e2e/scheduling"
imageutils "k8s.io/kubernetes/test/utils/image" imageutils "k8s.io/kubernetes/test/utils/image"
@ -78,7 +79,7 @@ func (t *NvidiaGPUUpgradeTest) startJob(f *framework.Framework) {
Command: []string{"/bin/sh", "-c", "./vectorAdd && sleep 60"}, Command: []string{"/bin/sh", "-c", "./vectorAdd && sleep 60"},
Resources: v1.ResourceRequirements{ Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{ Limits: v1.ResourceList{
framework.NVIDIAGPUResourceName: *resource.NewQuantity(1, resource.DecimalSI), gpu.NVIDIAGPUResourceName: *resource.NewQuantity(1, resource.DecimalSI),
}, },
}, },
}, },

View File

@ -47,6 +47,7 @@ go_library(
"//staging/src/k8s.io/kubelet/config/v1beta1:go_default_library", "//staging/src/k8s.io/kubelet/config/v1beta1:go_default_library",
"//test/e2e/common:go_default_library", "//test/e2e/common:go_default_library",
"//test/e2e/framework:go_default_library", "//test/e2e/framework:go_default_library",
"//test/e2e/framework/gpu:go_default_library",
"//test/e2e/framework/metrics:go_default_library", "//test/e2e/framework/metrics:go_default_library",
"//test/utils/image:go_default_library", "//test/utils/image:go_default_library",
"//vendor/github.com/blang/semver:go_default_library", "//vendor/github.com/blang/semver:go_default_library",

View File

@ -21,10 +21,11 @@ import (
"strconv" "strconv"
"time" "time"
"k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics" kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
"k8s.io/kubernetes/test/e2e/framework" "k8s.io/kubernetes/test/e2e/framework"
"k8s.io/kubernetes/test/e2e/framework/gpu"
"k8s.io/kubernetes/test/e2e/framework/metrics" "k8s.io/kubernetes/test/e2e/framework/metrics"
. "github.com/onsi/ginkgo" . "github.com/onsi/ginkgo"
@ -46,15 +47,15 @@ var _ = framework.KubeDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugi
} }
By("Creating the Google Device Plugin pod for NVIDIA GPU in GKE") By("Creating the Google Device Plugin pod for NVIDIA GPU in GKE")
devicePluginPod, err = f.ClientSet.CoreV1().Pods(metav1.NamespaceSystem).Create(framework.NVIDIADevicePlugin()) devicePluginPod, err = f.ClientSet.CoreV1().Pods(metav1.NamespaceSystem).Create(gpu.NVIDIADevicePlugin())
framework.ExpectNoError(err) framework.ExpectNoError(err)
By("Waiting for GPUs to become available on the local node") By("Waiting for GPUs to become available on the local node")
Eventually(func() bool { Eventually(func() bool {
return framework.NumberOfNVIDIAGPUs(getLocalNode(f)) > 0 return gpu.NumberOfNVIDIAGPUs(getLocalNode(f)) > 0
}, 5*time.Minute, framework.Poll).Should(BeTrue()) }, 5*time.Minute, framework.Poll).Should(BeTrue())
if framework.NumberOfNVIDIAGPUs(getLocalNode(f)) < 2 { if gpu.NumberOfNVIDIAGPUs(getLocalNode(f)) < 2 {
Skip("Not enough GPUs to execute this test (at least two needed)") Skip("Not enough GPUs to execute this test (at least two needed)")
} }
}) })
@ -75,7 +76,7 @@ var _ = framework.KubeDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugi
It("checks that when Kubelet restarts exclusive GPU assignation to pods is kept.", func() { It("checks that when Kubelet restarts exclusive GPU assignation to pods is kept.", func() {
By("Creating one GPU pod on a node with at least two GPUs") By("Creating one GPU pod on a node with at least two GPUs")
podRECMD := "devs=$(ls /dev/ | egrep '^nvidia[0-9]+$') && echo gpu devices: $devs" podRECMD := "devs=$(ls /dev/ | egrep '^nvidia[0-9]+$') && echo gpu devices: $devs"
p1 := f.PodClient().CreateSync(makeBusyboxPod(framework.NVIDIAGPUResourceName, podRECMD)) p1 := f.PodClient().CreateSync(makeBusyboxPod(gpu.NVIDIAGPUResourceName, podRECMD))
deviceIDRE := "gpu devices: (nvidia[0-9]+)" deviceIDRE := "gpu devices: (nvidia[0-9]+)"
devId1 := parseLog(f, p1.Name, p1.Name, deviceIDRE) devId1 := parseLog(f, p1.Name, p1.Name, deviceIDRE)
@ -94,9 +95,9 @@ var _ = framework.KubeDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugi
restartKubelet() restartKubelet()
framework.WaitForAllNodesSchedulable(f.ClientSet, framework.TestContext.NodeSchedulableTimeout) framework.WaitForAllNodesSchedulable(f.ClientSet, framework.TestContext.NodeSchedulableTimeout)
Eventually(func() bool { Eventually(func() bool {
return framework.NumberOfNVIDIAGPUs(getLocalNode(f)) > 0 return gpu.NumberOfNVIDIAGPUs(getLocalNode(f)) > 0
}, 5*time.Minute, framework.Poll).Should(BeTrue()) }, 5*time.Minute, framework.Poll).Should(BeTrue())
p2 := f.PodClient().CreateSync(makeBusyboxPod(framework.NVIDIAGPUResourceName, podRECMD)) p2 := f.PodClient().CreateSync(makeBusyboxPod(gpu.NVIDIAGPUResourceName, podRECMD))
By("Checking that pods got a different GPU") By("Checking that pods got a different GPU")
devId2 := parseLog(f, p2.Name, p2.Name, deviceIDRE) devId2 := parseLog(f, p2.Name, p2.Name, deviceIDRE)
@ -109,7 +110,7 @@ var _ = framework.KubeDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugi
Eventually(func() bool { Eventually(func() bool {
node, err := f.ClientSet.CoreV1().Nodes().Get(framework.TestContext.NodeName, metav1.GetOptions{}) node, err := f.ClientSet.CoreV1().Nodes().Get(framework.TestContext.NodeName, metav1.GetOptions{})
framework.ExpectNoError(err) framework.ExpectNoError(err)
return framework.NumberOfNVIDIAGPUs(node) <= 0 return gpu.NumberOfNVIDIAGPUs(node) <= 0
}, 10*time.Minute, framework.Poll).Should(BeTrue()) }, 10*time.Minute, framework.Poll).Should(BeTrue())
By("Checking that scheduled pods can continue to run even after we delete device plugin.") By("Checking that scheduled pods can continue to run even after we delete device plugin.")
ensurePodContainerRestart(f, p1.Name, p1.Name) ensurePodContainerRestart(f, p1.Name, p1.Name)

View File

@ -30,6 +30,7 @@ import (
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
commontest "k8s.io/kubernetes/test/e2e/common" commontest "k8s.io/kubernetes/test/e2e/common"
"k8s.io/kubernetes/test/e2e/framework" "k8s.io/kubernetes/test/e2e/framework"
"k8s.io/kubernetes/test/e2e/framework/gpu"
imageutils "k8s.io/kubernetes/test/utils/image" imageutils "k8s.io/kubernetes/test/utils/image"
) )
@ -52,7 +53,7 @@ var NodeImageWhiteList = sets.NewString(
imageutils.GetE2EImage(imageutils.Netexec), imageutils.GetE2EImage(imageutils.Netexec),
imageutils.GetE2EImage(imageutils.Nonewprivs), imageutils.GetE2EImage(imageutils.Nonewprivs),
imageutils.GetPauseImageName(), imageutils.GetPauseImageName(),
framework.GetGPUDevicePluginImage(), gpu.GetGPUDevicePluginImage(),
"gcr.io/kubernetes-e2e-test-images/node-perf/npb-is:1.0", "gcr.io/kubernetes-e2e-test-images/node-perf/npb-is:1.0",
"gcr.io/kubernetes-e2e-test-images/node-perf/npb-ep:1.0", "gcr.io/kubernetes-e2e-test-images/node-perf/npb-ep:1.0",
"gcr.io/kubernetes-e2e-test-images/node-perf/tf-wide-deep-amd64:1.0", "gcr.io/kubernetes-e2e-test-images/node-perf/tf-wide-deep-amd64:1.0",