mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-07 11:13:48 +00:00
Merge pull request #47467 from mindprince/issue-47388-e2e-gke-gpu
Automatic merge from submit-queue Update GPU e2e tests. * Use nvidia driver installer from external repo. That installer decouples itself from COS image version (as long as the image version is newer than cos-stable-59-9460-60-0). A separate commit in the test-infra repo will update the cos version used for this test to cos-stable-59-9460-60-0. * Use cos-stable-59-9460-60-0 and newer installer for GPU node e2e tests. This is to enable #47388. This supercedes #47091. **Release note**: ```release-note NONE ``` /sig node
This commit is contained in:
commit
9fff13b72a
@ -17,6 +17,8 @@ limitations under the License.
|
|||||||
package e2e
|
package e2e
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"io/ioutil"
|
||||||
|
"net/http"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -29,7 +31,6 @@ import (
|
|||||||
"k8s.io/kubernetes/pkg/api/v1"
|
"k8s.io/kubernetes/pkg/api/v1"
|
||||||
extensions "k8s.io/kubernetes/pkg/apis/extensions/v1beta1"
|
extensions "k8s.io/kubernetes/pkg/apis/extensions/v1beta1"
|
||||||
"k8s.io/kubernetes/test/e2e/framework"
|
"k8s.io/kubernetes/test/e2e/framework"
|
||||||
"k8s.io/kubernetes/test/e2e/generated"
|
|
||||||
|
|
||||||
. "github.com/onsi/ginkgo"
|
. "github.com/onsi/ginkgo"
|
||||||
. "github.com/onsi/gomega"
|
. "github.com/onsi/gomega"
|
||||||
@ -42,7 +43,7 @@ const (
|
|||||||
// Nvidia driver installation can take upwards of 5 minutes.
|
// Nvidia driver installation can take upwards of 5 minutes.
|
||||||
driverInstallTimeout = 10 * time.Minute
|
driverInstallTimeout = 10 * time.Minute
|
||||||
// Nvidia COS driver installer daemonset.
|
// Nvidia COS driver installer daemonset.
|
||||||
cosNvidiaDriverInstallerPath = "cluster/gce/gci/nvidia-gpus/cos-installer-daemonset.yaml"
|
cosNvidiaDriverInstallerUrl = "https://raw.githubusercontent.com/ContainerEngine/accelerators/stable/cos-nvidia-gpu-installer/daemonset.yaml"
|
||||||
)
|
)
|
||||||
|
|
||||||
func makeCudaAdditionTestPod() *v1.Pod {
|
func makeCudaAdditionTestPod() *v1.Pod {
|
||||||
@ -135,7 +136,7 @@ func testNvidiaGPUsOnCOS(f *framework.Framework) {
|
|||||||
// GPU drivers might have already been installed.
|
// GPU drivers might have already been installed.
|
||||||
if !areGPUsAvailableOnAllSchedulableNodes(f) {
|
if !areGPUsAvailableOnAllSchedulableNodes(f) {
|
||||||
// Install Nvidia Drivers.
|
// Install Nvidia Drivers.
|
||||||
ds := dsFromManifest(cosNvidiaDriverInstallerPath)
|
ds := dsFromManifest(cosNvidiaDriverInstallerUrl)
|
||||||
ds.Namespace = f.Namespace.Name
|
ds.Namespace = f.Namespace.Name
|
||||||
_, err := f.ClientSet.Extensions().DaemonSets(f.Namespace.Name).Create(ds)
|
_, err := f.ClientSet.Extensions().DaemonSets(f.Namespace.Name).Create(ds)
|
||||||
framework.ExpectNoError(err, "failed to create daemonset")
|
framework.ExpectNoError(err, "failed to create daemonset")
|
||||||
@ -158,10 +159,25 @@ func testNvidiaGPUsOnCOS(f *framework.Framework) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// dsFromManifest reads a .json/yaml file and returns the daemonset in it.
|
// dsFromManifest reads a .json/yaml file and returns the daemonset in it.
|
||||||
func dsFromManifest(fileName string) *extensions.DaemonSet {
|
func dsFromManifest(url string) *extensions.DaemonSet {
|
||||||
var controller extensions.DaemonSet
|
var controller extensions.DaemonSet
|
||||||
framework.Logf("Parsing ds from %v", fileName)
|
framework.Logf("Parsing ds from %v", url)
|
||||||
data := generated.ReadOrDie(fileName)
|
|
||||||
|
var response *http.Response
|
||||||
|
var err error
|
||||||
|
for i := 1; i <= 5; i++ {
|
||||||
|
response, err = http.Get(url)
|
||||||
|
if err == nil && response.StatusCode == 200 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
time.Sleep(time.Duration(i) * time.Second)
|
||||||
|
}
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
Expect(response.StatusCode).To(Equal(200))
|
||||||
|
defer response.Body.Close()
|
||||||
|
|
||||||
|
data, err := ioutil.ReadAll(response.Body)
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
|
||||||
json, err := utilyaml.ToJSON(data)
|
json, err := utilyaml.ToJSON(data)
|
||||||
Expect(err).NotTo(HaveOccurred())
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
runcmd:
|
runcmd:
|
||||||
- modprobe configs
|
- modprobe configs
|
||||||
- docker run -v /dev:/dev -v /home/kubernetes/bin/nvidia:/rootfs/nvidia -v /etc/os-release:/rootfs/etc/os-release -v /proc/sysrq-trigger:/sysrq -e LAKITU_KERNEL_SHA1=26481563cb3788ad254c2bf2126b843c161c7e48 -e BASE_DIR=/rootfs/nvidia --privileged gcr.io/google_containers/cos-nvidia-driver-install@sha256:ad83ede6e0c6d768bf7cf69a7dec972aa5e8f88778142ca46afd3286ad58cfc8
|
- docker run -v /dev:/dev -v /home/kubernetes/bin/nvidia:/rootfs/nvidia -v /etc/os-release:/rootfs/etc/os-release -v /proc/sysrq-trigger:/sysrq -e BASE_DIR=/rootfs/nvidia --privileged gcr.io/google_containers/cos-nvidia-driver-install@sha256:cb55c7971c337fece62f2bfe858662522a01e43ac9984a2dd1dd5c71487d225c
|
||||||
- mount /tmp /tmp -o remount,exec,suid
|
- mount /tmp /tmp -o remount,exec,suid
|
||||||
- usermod -a -G docker jenkins
|
- usermod -a -G docker jenkins
|
||||||
- mkdir -p /var/lib/kubelet
|
- mkdir -p /var/lib/kubelet
|
||||||
|
@ -16,11 +16,10 @@ images:
|
|||||||
image: e2e-node-containervm-v20161208-image # docker 1.11.2
|
image: e2e-node-containervm-v20161208-image # docker 1.11.2
|
||||||
project: kubernetes-node-e2e-images
|
project: kubernetes-node-e2e-images
|
||||||
gci:
|
gci:
|
||||||
image_regex: cos-beta-59-9460-20-0 # docker 1.11.2
|
image_regex: cos-stable-59-9460-60-0 # docker 1.11.2
|
||||||
project: cos-cloud
|
project: cos-cloud
|
||||||
metadata: "user-data<test/e2e_node/jenkins/gci-init-gpu.yaml,gci-update-strategy=update_disabled"
|
metadata: "user-data<test/e2e_node/jenkins/gci-init-gpu.yaml,gci-update-strategy=update_disabled"
|
||||||
resources:
|
resources:
|
||||||
accelerators:
|
accelerators:
|
||||||
- type: nvidia-tesla-k80
|
- type: nvidia-tesla-k80
|
||||||
count: 2
|
count: 2
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user