mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-02 00:07:50 +00:00
Added device plugin e2e kubelet failure test
Signed-off-by: Renaud Gaubert <renaud.gaubert@gmail.com>
This commit is contained in:
parent
46ff2c44c7
commit
6993612cec
@ -17,6 +17,7 @@ go_library(
|
|||||||
"framework.go",
|
"framework.go",
|
||||||
"get-kubemark-resource-usage.go",
|
"get-kubemark-resource-usage.go",
|
||||||
"google_compute.go",
|
"google_compute.go",
|
||||||
|
"gpu_util.go",
|
||||||
"ingress_utils.go",
|
"ingress_utils.go",
|
||||||
"jobs_util.go",
|
"jobs_util.go",
|
||||||
"kubelet_stats.go",
|
"kubelet_stats.go",
|
||||||
@ -121,6 +122,7 @@ go_library(
|
|||||||
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
|
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
|
||||||
"//vendor/k8s.io/apimachinery/pkg/util/uuid:go_default_library",
|
"//vendor/k8s.io/apimachinery/pkg/util/uuid:go_default_library",
|
||||||
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
|
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
|
||||||
|
"//vendor/k8s.io/apimachinery/pkg/util/yaml:go_default_library",
|
||||||
"//vendor/k8s.io/apimachinery/pkg/version:go_default_library",
|
"//vendor/k8s.io/apimachinery/pkg/version:go_default_library",
|
||||||
"//vendor/k8s.io/apimachinery/pkg/watch:go_default_library",
|
"//vendor/k8s.io/apimachinery/pkg/watch:go_default_library",
|
||||||
"//vendor/k8s.io/client-go/discovery:go_default_library",
|
"//vendor/k8s.io/client-go/discovery:go_default_library",
|
||||||
|
66
test/e2e/framework/gpu_util.go
Normal file
66
test/e2e/framework/gpu_util.go
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2017 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package framework
|
||||||
|
|
||||||
|
import (
|
||||||
|
"k8s.io/api/core/v1"
|
||||||
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
|
"k8s.io/apimachinery/pkg/util/uuid"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// GPUResourceName is the extended name of the GPU resource since v1.8
|
||||||
|
// this uses the device plugin mechanism
|
||||||
|
NVIDIAGPUResourceName = "nvidia.com/gpu"
|
||||||
|
|
||||||
|
// TODO: Parametrize it by making it a feature in TestFramework.
|
||||||
|
// so we can override the daemonset in other setups (non COS).
|
||||||
|
// GPUDevicePluginDSYAML is the official Google Device Plugin Daemonset NVIDIA GPU manifest for GKE
|
||||||
|
GPUDevicePluginDSYAML = "https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/device-plugin-daemonset.yaml"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TODO make this generic and not linked to COS only
|
||||||
|
// NumberOfGPUs returs the number of GPUs advertised by a node
|
||||||
|
// This is based on the Device Plugin system and expected to run on a COS based node
|
||||||
|
// After the NVIDIA drivers were installed
|
||||||
|
func NumberOfNVIDIAGPUs(node *v1.Node) int64 {
|
||||||
|
val, ok := node.Status.Capacity[NVIDIAGPUResourceName]
|
||||||
|
|
||||||
|
if !ok {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
return val.Value()
|
||||||
|
}
|
||||||
|
|
||||||
|
// NVIDIADevicePlugin returns the official Google Device Plugin pod for NVIDIA GPU in GKE
|
||||||
|
func NVIDIADevicePlugin(ns string) *v1.Pod {
|
||||||
|
ds := DsFromManifest(GPUDevicePluginDSYAML)
|
||||||
|
p := &v1.Pod{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Name: "device-plugin-nvidia-gpu-" + string(uuid.NewUUID()),
|
||||||
|
Namespace: ns,
|
||||||
|
},
|
||||||
|
|
||||||
|
Spec: ds.Spec.Template.Spec,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove NVIDIA drivers installation
|
||||||
|
p.Spec.InitContainers = []v1.Container{}
|
||||||
|
|
||||||
|
return p
|
||||||
|
}
|
@ -65,6 +65,7 @@ import (
|
|||||||
"k8s.io/apimachinery/pkg/util/sets"
|
"k8s.io/apimachinery/pkg/util/sets"
|
||||||
"k8s.io/apimachinery/pkg/util/uuid"
|
"k8s.io/apimachinery/pkg/util/uuid"
|
||||||
"k8s.io/apimachinery/pkg/util/wait"
|
"k8s.io/apimachinery/pkg/util/wait"
|
||||||
|
utilyaml "k8s.io/apimachinery/pkg/util/yaml"
|
||||||
"k8s.io/apimachinery/pkg/watch"
|
"k8s.io/apimachinery/pkg/watch"
|
||||||
"k8s.io/client-go/discovery"
|
"k8s.io/client-go/discovery"
|
||||||
"k8s.io/client-go/dynamic"
|
"k8s.io/client-go/dynamic"
|
||||||
@ -5015,3 +5016,33 @@ func DumpDebugInfo(c clientset.Interface, ns string) {
|
|||||||
func IsRetryableAPIError(err error) bool {
|
func IsRetryableAPIError(err error) bool {
|
||||||
return apierrs.IsTimeout(err) || apierrs.IsServerTimeout(err) || apierrs.IsTooManyRequests(err) || apierrs.IsInternalError(err)
|
return apierrs.IsTimeout(err) || apierrs.IsServerTimeout(err) || apierrs.IsTooManyRequests(err) || apierrs.IsInternalError(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DsFromManifest reads a .json/yaml file and returns the daemonset in it.
|
||||||
|
func DsFromManifest(url string) *extensions.DaemonSet {
|
||||||
|
var controller extensions.DaemonSet
|
||||||
|
Logf("Parsing ds from %v", url)
|
||||||
|
|
||||||
|
var response *http.Response
|
||||||
|
var err error
|
||||||
|
|
||||||
|
for i := 1; i <= 5; i++ {
|
||||||
|
response, err = http.Get(url)
|
||||||
|
if err == nil && response.StatusCode == 200 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
time.Sleep(time.Duration(i) * time.Second)
|
||||||
|
}
|
||||||
|
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
Expect(response.StatusCode).To(Equal(200))
|
||||||
|
defer response.Body.Close()
|
||||||
|
|
||||||
|
data, err := ioutil.ReadAll(response.Body)
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
|
||||||
|
json, err := utilyaml.ToJSON(data)
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
|
||||||
|
Expect(runtime.DecodeInto(api.Codecs.UniversalDecoder(), json, &controller)).NotTo(HaveOccurred())
|
||||||
|
return &controller
|
||||||
|
}
|
||||||
|
@ -34,17 +34,14 @@ go_library(
|
|||||||
"//vendor/github.com/onsi/gomega:go_default_library",
|
"//vendor/github.com/onsi/gomega:go_default_library",
|
||||||
"//vendor/github.com/stretchr/testify/assert:go_default_library",
|
"//vendor/github.com/stretchr/testify/assert:go_default_library",
|
||||||
"//vendor/k8s.io/api/core/v1:go_default_library",
|
"//vendor/k8s.io/api/core/v1:go_default_library",
|
||||||
"//vendor/k8s.io/api/extensions/v1beta1:go_default_library",
|
|
||||||
"//vendor/k8s.io/api/scheduling/v1alpha1:go_default_library",
|
"//vendor/k8s.io/api/scheduling/v1alpha1:go_default_library",
|
||||||
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
|
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
|
||||||
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
|
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
|
||||||
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||||
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
|
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
|
||||||
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
|
|
||||||
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
|
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
|
||||||
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
|
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
|
||||||
"//vendor/k8s.io/apimachinery/pkg/util/uuid:go_default_library",
|
"//vendor/k8s.io/apimachinery/pkg/util/uuid:go_default_library",
|
||||||
"//vendor/k8s.io/apimachinery/pkg/util/yaml:go_default_library",
|
|
||||||
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
|
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
@ -17,19 +17,13 @@ limitations under the License.
|
|||||||
package scheduling
|
package scheduling
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"io/ioutil"
|
|
||||||
"net/http"
|
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"k8s.io/api/core/v1"
|
"k8s.io/api/core/v1"
|
||||||
extensions "k8s.io/api/extensions/v1beta1"
|
|
||||||
"k8s.io/apimachinery/pkg/api/resource"
|
"k8s.io/apimachinery/pkg/api/resource"
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
"k8s.io/apimachinery/pkg/runtime"
|
|
||||||
"k8s.io/apimachinery/pkg/util/uuid"
|
"k8s.io/apimachinery/pkg/util/uuid"
|
||||||
utilyaml "k8s.io/apimachinery/pkg/util/yaml"
|
|
||||||
"k8s.io/kubernetes/pkg/api"
|
|
||||||
"k8s.io/kubernetes/test/e2e/framework"
|
"k8s.io/kubernetes/test/e2e/framework"
|
||||||
imageutils "k8s.io/kubernetes/test/utils/image"
|
imageutils "k8s.io/kubernetes/test/utils/image"
|
||||||
|
|
||||||
@ -168,8 +162,8 @@ func testNvidiaGPUsOnCOS(f *framework.Framework) {
|
|||||||
framework.Logf("Cluster is running on COS. Proceeding with test")
|
framework.Logf("Cluster is running on COS. Proceeding with test")
|
||||||
|
|
||||||
if f.BaseName == "device-plugin-gpus" {
|
if f.BaseName == "device-plugin-gpus" {
|
||||||
dsYamlUrl = "https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/device-plugin-daemonset.yaml"
|
dsYamlUrl = framework.GPUDevicePluginDSYAML
|
||||||
gpuResourceName = "nvidia.com/gpu"
|
gpuResourceName = framework.NVIDIAGPUResourceName
|
||||||
podCreationFunc = makeCudaAdditionDevicePluginTestPod
|
podCreationFunc = makeCudaAdditionDevicePluginTestPod
|
||||||
} else {
|
} else {
|
||||||
dsYamlUrl = "https://raw.githubusercontent.com/ContainerEngine/accelerators/master/cos-nvidia-gpu-installer/daemonset.yaml"
|
dsYamlUrl = "https://raw.githubusercontent.com/ContainerEngine/accelerators/master/cos-nvidia-gpu-installer/daemonset.yaml"
|
||||||
@ -180,7 +174,7 @@ func testNvidiaGPUsOnCOS(f *framework.Framework) {
|
|||||||
// GPU drivers might have already been installed.
|
// GPU drivers might have already been installed.
|
||||||
if !areGPUsAvailableOnAllSchedulableNodes(f) {
|
if !areGPUsAvailableOnAllSchedulableNodes(f) {
|
||||||
// Install Nvidia Drivers.
|
// Install Nvidia Drivers.
|
||||||
ds := dsFromManifest(dsYamlUrl)
|
ds := framework.DsFromManifest(dsYamlUrl)
|
||||||
ds.Namespace = f.Namespace.Name
|
ds.Namespace = f.Namespace.Name
|
||||||
_, err := f.ClientSet.Extensions().DaemonSets(f.Namespace.Name).Create(ds)
|
_, err := f.ClientSet.Extensions().DaemonSets(f.Namespace.Name).Create(ds)
|
||||||
framework.ExpectNoError(err, "failed to create daemonset")
|
framework.ExpectNoError(err, "failed to create daemonset")
|
||||||
@ -202,34 +196,6 @@ func testNvidiaGPUsOnCOS(f *framework.Framework) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// dsFromManifest reads a .json/yaml file and returns the daemonset in it.
|
|
||||||
func dsFromManifest(url string) *extensions.DaemonSet {
|
|
||||||
var controller extensions.DaemonSet
|
|
||||||
framework.Logf("Parsing ds from %v", url)
|
|
||||||
|
|
||||||
var response *http.Response
|
|
||||||
var err error
|
|
||||||
for i := 1; i <= 5; i++ {
|
|
||||||
response, err = http.Get(url)
|
|
||||||
if err == nil && response.StatusCode == 200 {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
time.Sleep(time.Duration(i) * time.Second)
|
|
||||||
}
|
|
||||||
Expect(err).NotTo(HaveOccurred())
|
|
||||||
Expect(response.StatusCode).To(Equal(200))
|
|
||||||
defer response.Body.Close()
|
|
||||||
|
|
||||||
data, err := ioutil.ReadAll(response.Body)
|
|
||||||
Expect(err).NotTo(HaveOccurred())
|
|
||||||
|
|
||||||
json, err := utilyaml.ToJSON(data)
|
|
||||||
Expect(err).NotTo(HaveOccurred())
|
|
||||||
|
|
||||||
Expect(runtime.DecodeInto(api.Codecs.UniversalDecoder(), json, &controller)).NotTo(HaveOccurred())
|
|
||||||
return &controller
|
|
||||||
}
|
|
||||||
|
|
||||||
var _ = SIGDescribe("[Feature:GPU]", func() {
|
var _ = SIGDescribe("[Feature:GPU]", func() {
|
||||||
f := framework.NewDefaultFramework("gpus")
|
f := framework.NewDefaultFramework("gpus")
|
||||||
It("run Nvidia GPU tests on Container Optimized OS only", func() {
|
It("run Nvidia GPU tests on Container Optimized OS only", func() {
|
||||||
@ -247,7 +213,7 @@ var _ = SIGDescribe("[Feature:GPUDevicePlugin]", func() {
|
|||||||
|
|
||||||
// 2. Verifies that when the device plugin DaemonSet is removed, resource capacity drops to zero.
|
// 2. Verifies that when the device plugin DaemonSet is removed, resource capacity drops to zero.
|
||||||
By("Deleting device plugin daemonset")
|
By("Deleting device plugin daemonset")
|
||||||
ds := dsFromManifest(dsYamlUrl)
|
ds := framework.DsFromManifest(dsYamlUrl)
|
||||||
falseVar := false
|
falseVar := false
|
||||||
err := f.ClientSet.Extensions().DaemonSets(f.Namespace.Name).Delete(ds.Name, &metav1.DeleteOptions{OrphanDependents: &falseVar})
|
err := f.ClientSet.Extensions().DaemonSets(f.Namespace.Name).Delete(ds.Name, &metav1.DeleteOptions{OrphanDependents: &falseVar})
|
||||||
framework.ExpectNoError(err, "failed to delete daemonset")
|
framework.ExpectNoError(err, "failed to delete daemonset")
|
||||||
|
@ -12,6 +12,7 @@ go_library(
|
|||||||
"container.go",
|
"container.go",
|
||||||
"doc.go",
|
"doc.go",
|
||||||
"docker_util.go",
|
"docker_util.go",
|
||||||
|
"gpu_device_plugin.go",
|
||||||
"gpus.go",
|
"gpus.go",
|
||||||
"image_list.go",
|
"image_list.go",
|
||||||
"simple_mount.go",
|
"simple_mount.go",
|
||||||
|
164
test/e2e_node/gpu_device_plugin.go
Normal file
164
test/e2e_node/gpu_device_plugin.go
Normal file
@ -0,0 +1,164 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2017 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package e2e_node
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os/exec"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"k8s.io/api/core/v1"
|
||||||
|
"k8s.io/apimachinery/pkg/api/resource"
|
||||||
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
|
"k8s.io/apimachinery/pkg/util/uuid"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig"
|
||||||
|
"k8s.io/kubernetes/test/e2e/framework"
|
||||||
|
|
||||||
|
. "github.com/onsi/ginkgo"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
devicePluginFeatureGate = "DevicePlugins=true"
|
||||||
|
testPodNamePrefix = "nvidia-gpu-"
|
||||||
|
sleepTimeout = 30
|
||||||
|
)
|
||||||
|
|
||||||
|
// Serial because the test restarts Kubelet
|
||||||
|
var _ = framework.KubeDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugin] [Serial] [Disruptive]", func() {
|
||||||
|
f := framework.NewDefaultFramework("device-plugin-gpus-errors")
|
||||||
|
|
||||||
|
Context("", func() {
|
||||||
|
BeforeEach(func() {
|
||||||
|
By("Ensuring that Nvidia GPUs exists on the node")
|
||||||
|
if !checkIfNvidiaGPUsExistOnNode() {
|
||||||
|
Skip("Nvidia GPUs do not exist on the node. Skipping test.")
|
||||||
|
}
|
||||||
|
|
||||||
|
By("Enabling support for Device Plugin")
|
||||||
|
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||||
|
initialConfig.FeatureGates += "," + devicePluginFeatureGate
|
||||||
|
})
|
||||||
|
|
||||||
|
By("Creating the Google Device Plugin pod for NVIDIA GPU in GKE")
|
||||||
|
f.PodClient().CreateSync(framework.NVIDIADevicePlugin(f.Namespace.Name))
|
||||||
|
|
||||||
|
By("Waiting for GPUs to become available on the local node")
|
||||||
|
Eventually(framework.NumberOfNVIDIAGPUs(getLocalNode(f)) != 0, time.Minute, time.Second).Should(BeTrue())
|
||||||
|
|
||||||
|
if framework.NumberOfNVIDIAGPUs(getLocalNode(f)) < 2 {
|
||||||
|
Skip("Not enough GPUs to execute this test (at least two needed)")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
AfterEach(func() {
|
||||||
|
l, err := f.PodClient().List(metav1.ListOptions{})
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
|
||||||
|
for _, p := range l.Items {
|
||||||
|
if p.Namespace != f.Namespace.Name {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
f.PodClient().Delete(p.Name, &metav1.DeleteOptions{})
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
It("checks that when Kubelet restarts exclusive GPU assignation to pods is kept.", func() {
|
||||||
|
n := getLocalNode(f)
|
||||||
|
|
||||||
|
By("Creating one GPU pod on a node with at least two GPUs")
|
||||||
|
p1 := f.PodClient().CreateSync(makeCudaPauseImage())
|
||||||
|
cmd := fmt.Sprintf("exec %s %s nvidia-smi -L", n.Name, p1.Spec.Containers[0].Name)
|
||||||
|
uuid1, _ := framework.RunKubectl(cmd)
|
||||||
|
|
||||||
|
By("Restarting Kubelet and waiting for the current running pod to restart")
|
||||||
|
restartKubelet(f)
|
||||||
|
Eventually(func() bool {
|
||||||
|
p, err := f.PodClient().Get(p1.Name, metav1.GetOptions{})
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
|
||||||
|
return p.Status.ContainerStatuses[0].RestartCount != p1.Status.ContainerStatuses[0].RestartCount
|
||||||
|
}, 2*sleepTimeout)
|
||||||
|
|
||||||
|
By("Confirming that after a kubelet and pod restart, GPU assignement is kept")
|
||||||
|
uuid1Restart, _ := framework.RunKubectl(cmd)
|
||||||
|
Expect(uuid1Restart).To(Equal(uuid1))
|
||||||
|
|
||||||
|
By("Restarting Kubelet and creating another pod")
|
||||||
|
restartKubelet(f)
|
||||||
|
p2 := f.PodClient().CreateSync(makeCudaPauseImage())
|
||||||
|
|
||||||
|
By("Checking that pods got a different GPU")
|
||||||
|
cmd = fmt.Sprintf("exec %s %s nvidia-smi -L", n.Name, p2.Spec.Containers[0].Name)
|
||||||
|
uuid2, _ := framework.RunKubectl(cmd)
|
||||||
|
Expect(uuid1).To(Not(Equal(uuid2)))
|
||||||
|
|
||||||
|
// Cleanup
|
||||||
|
f.PodClient().DeleteSync(p1.Name, &metav1.DeleteOptions{}, framework.DefaultPodDeletionTimeout)
|
||||||
|
f.PodClient().DeleteSync(p2.Name, &metav1.DeleteOptions{}, framework.DefaultPodDeletionTimeout)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
func makeCudaPauseImage() *v1.Pod {
|
||||||
|
podName := testPodNamePrefix + string(uuid.NewUUID())
|
||||||
|
|
||||||
|
return &v1.Pod{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{Name: podName},
|
||||||
|
Spec: v1.PodSpec{
|
||||||
|
RestartPolicy: v1.RestartPolicyAlways,
|
||||||
|
Containers: []v1.Container{{
|
||||||
|
Name: "cuda-pause",
|
||||||
|
Image: "nvidia/cuda",
|
||||||
|
Command: []string{"sleep", string(sleepTimeout)},
|
||||||
|
|
||||||
|
Resources: v1.ResourceRequirements{
|
||||||
|
Limits: newDecimalResourceList(framework.NVIDIAGPUResourceName, 1),
|
||||||
|
Requests: newDecimalResourceList(framework.NVIDIAGPUResourceName, 1),
|
||||||
|
},
|
||||||
|
}},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func newDecimalResourceList(name v1.ResourceName, quantity int64) v1.ResourceList {
|
||||||
|
return v1.ResourceList{name: *resource.NewQuantity(quantity, resource.DecimalSI)}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Find a uniform way to deal with systemctl/initctl/service operations. #34494
|
||||||
|
func restartKubelet(f *framework.Framework) {
|
||||||
|
stdout1, err1 := exec.Command("sudo", "systemctl", "restart", "kubelet").CombinedOutput()
|
||||||
|
if err1 == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
stdout2, err2 := exec.Command("sudo", "/etc/init.d/kubelet", "restart").CombinedOutput()
|
||||||
|
if err2 == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
stdout3, err3 := exec.Command("sudo", "service", "kubelet", "restart").CombinedOutput()
|
||||||
|
if err3 == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
framework.Failf("Failed to trigger kubelet restart with systemctl/initctl/service operations:"+
|
||||||
|
"\nsystemclt: %v, %v"+
|
||||||
|
"\ninitctl: %v, %v"+
|
||||||
|
"\nservice: %v, %v", err1, stdout1, err2, stdout2, err3, stdout3)
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user