diff --git a/test/e2e/scheduling/BUILD b/test/e2e/scheduling/BUILD index 1269cb883dd..5a355c35949 100644 --- a/test/e2e/scheduling/BUILD +++ b/test/e2e/scheduling/BUILD @@ -55,6 +55,7 @@ go_library( "//test/e2e/framework/resource:go_default_library", "//test/e2e/framework/service:go_default_library", "//test/e2e/framework/skipper:go_default_library", + "//test/e2e/framework/testfiles:go_default_library", "//test/utils:go_default_library", "//test/utils/image:go_default_library", "//vendor/github.com/onsi/ginkgo:go_default_library", diff --git a/test/e2e/scheduling/nvidia-gpus.go b/test/e2e/scheduling/nvidia-gpus.go index 334a6f5b9f6..313e773b8e2 100644 --- a/test/e2e/scheduling/nvidia-gpus.go +++ b/test/e2e/scheduling/nvidia-gpus.go @@ -22,6 +22,7 @@ import ( "regexp" "time" + appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -36,6 +37,7 @@ import ( "k8s.io/kubernetes/test/e2e/framework/providers/gce" e2eresource "k8s.io/kubernetes/test/e2e/framework/resource" e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" + e2etestfiles "k8s.io/kubernetes/test/e2e/framework/testfiles" imageutils "k8s.io/kubernetes/test/utils/image" "github.com/onsi/ginkgo" @@ -50,7 +52,6 @@ const ( var ( gpuResourceName v1.ResourceName - dsYamlURL string ) func makeCudaAdditionDevicePluginTestPod() *v1.Pod { @@ -128,18 +129,23 @@ func getGPUsAvailable(f *framework.Framework) int64 { func SetupNVIDIAGPUNode(f *framework.Framework, setupResourceGatherer bool) *framework.ContainerResourceGatherer { logOSImages(f) + var err error + var ds *appsv1.DaemonSet dsYamlURLFromEnv := os.Getenv("NVIDIA_DRIVER_INSTALLER_DAEMONSET") if dsYamlURLFromEnv != "" { - dsYamlURL = dsYamlURLFromEnv + // Using DaemonSet from remote URL + framework.Logf("Using remote nvidia-driver-installer daemonset manifest from %v", dsYamlURLFromEnv) + ds, err = e2emanifest.DaemonSetFromURL(dsYamlURLFromEnv) + framework.ExpectNoError(err, "failed get remote") } else { - dsYamlURL = "https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/daemonset.yaml" + // Using default local DaemonSet + framework.Logf("Using default local nvidia-driver-installer daemonset manifest.") + data, err := e2etestfiles.Read("test/e2e/testing-manifests/scheduling/nvidia-driver-installer.yaml") + framework.ExpectNoError(err, "failed to read local manifest for nvidia-driver-installer daemonset") + ds, err = e2emanifest.DaemonSetFromData(data) + framework.ExpectNoError(err, "failed to parse local manifest for nvidia-driver-installer daemonset") } gpuResourceName = e2egpu.NVIDIAGPUResourceName - - framework.Logf("Using %v", dsYamlURL) - // Creates the DaemonSet that installs Nvidia Drivers. - ds, err := e2emanifest.DaemonSetFromURL(dsYamlURL) - framework.ExpectNoError(err) ds.Namespace = f.Namespace.Name _, err = f.ClientSet.AppsV1().DaemonSets(f.Namespace.Name).Create(context.TODO(), ds, metav1.CreateOptions{}) framework.ExpectNoError(err, "failed to create nvidia-driver-installer daemonset") diff --git a/test/e2e/testing-manifests/scheduling/nvidia-driver-installer.yaml b/test/e2e/testing-manifests/scheduling/nvidia-driver-installer.yaml new file mode 100644 index 00000000000..79eafba0457 --- /dev/null +++ b/test/e2e/testing-manifests/scheduling/nvidia-driver-installer.yaml @@ -0,0 +1,80 @@ +# This DaemonSet was originally referenced from +# https://github.com/GoogleCloudPlatform/container-engine-accelerators/blob/master/daemonset.yaml + +# The Dockerfile and other source for this daemonset are in +# https://github.com/GoogleCloudPlatform/cos-gpu-installer + +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: nvidia-driver-installer + namespace: kube-system + labels: + k8s-app: nvidia-driver-installer +spec: + selector: + matchLabels: + k8s-app: nvidia-driver-installer + updateStrategy: + type: RollingUpdate + template: + metadata: + labels: + name: nvidia-driver-installer + k8s-app: nvidia-driver-installer + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: cloud.google.com/gke-accelerator + operator: Exists + tolerations: + - operator: "Exists" + hostNetwork: true + hostPID: true + volumes: + - name: dev + hostPath: + path: /dev + - name: vulkan-icd-mount + hostPath: + path: /home/kubernetes/bin/nvidia/vulkan/icd.d + - name: nvidia-install-dir-host + hostPath: + path: /home/kubernetes/bin/nvidia + - name: root-mount + hostPath: + path: / + initContainers: + - image: gcr.io/cos-cloud/cos-gpu-installer:v20200701 + name: nvidia-driver-installer + resources: + requests: + cpu: 0.15 + securityContext: + privileged: true + env: + - name: NVIDIA_INSTALL_DIR_HOST + value: /home/kubernetes/bin/nvidia + - name: NVIDIA_INSTALL_DIR_CONTAINER + value: /usr/local/nvidia + - name: VULKAN_ICD_DIR_HOST + value: /home/kubernetes/bin/nvidia/vulkan/icd.d + - name: VULKAN_ICD_DIR_CONTAINER + value: /etc/vulkan/icd.d + - name: ROOT_MOUNT_DIR + value: /root + volumeMounts: + - name: nvidia-install-dir-host + mountPath: /usr/local/nvidia + - name: vulkan-icd-mount + mountPath: /etc/vulkan/icd.d + - name: dev + mountPath: /dev + - name: root-mount + mountPath: /root + containers: + - image: "gcr.io/google-containers/pause:3.2" + name: pause \ No newline at end of file