From 87dda3375b6155dca7679440321836e8ea0a6871 Mon Sep 17 00:00:00 2001 From: Rohit Agarwal Date: Wed, 21 Mar 2018 13:56:51 -0700 Subject: [PATCH] Delete in-tree support for NVIDIA GPUs. This removes the alpha Accelerators feature gate which was deprecated in 1.10. The alternative feature DevicePlugins went beta in 1.10. --- hack/.golint_failures | 1 - pkg/apis/core/helper/helpers.go | 7 +- pkg/apis/core/helper/helpers_test.go | 4 - pkg/apis/core/resource.go | 7 - pkg/apis/core/types.go | 2 - pkg/apis/core/v1/helper/BUILD | 1 - pkg/apis/core/v1/helper/helpers.go | 8 +- pkg/apis/core/v1/helper/helpers_test.go | 4 - pkg/apis/core/v1/helper/qos/qos_test.go | 32 +- pkg/apis/core/v1/validation/validation.go | 2 - .../core/v1/validation/validation_test.go | 61 +--- pkg/apis/core/validation/validation_test.go | 60 +--- pkg/features/kube_features.go | 11 - pkg/kubelet/BUILD | 4 - pkg/kubelet/gpu/BUILD | 32 -- pkg/kubelet/gpu/OWNERS | 12 - pkg/kubelet/gpu/gpu_manager_stub.go | 41 --- pkg/kubelet/gpu/nvidia/BUILD | 54 ---- pkg/kubelet/gpu/nvidia/helpers.go | 77 ----- pkg/kubelet/gpu/nvidia/nvidia_gpu_manager.go | 280 ------------------ .../gpu/nvidia/nvidia_gpu_manager_test.go | 213 ------------- pkg/kubelet/gpu/types.go | 32 -- pkg/kubelet/kubelet.go | 24 -- pkg/kubelet/kubelet_node_status.go | 8 - pkg/kubelet/kubelet_pods.go | 26 -- pkg/kubelet/kubelet_test.go | 2 - pkg/kubelet/preemption/preemption.go | 1 - .../algorithm/predicates/predicates.go | 9 - .../algorithm/predicates/predicates_test.go | 50 +--- .../algorithm/priorities/resource_limits.go | 4 - pkg/scheduler/schedulercache/node_info.go | 7 - .../schedulercache/node_info_test.go | 17 -- staging/src/k8s.io/api/core/v1/resource.go | 7 - staging/src/k8s.io/api/core/v1/types.go | 2 - test/e2e/scheduling/nvidia-gpus.go | 69 +---- test/e2e_node/BUILD | 1 - test/e2e_node/gpu_device_plugin.go | 11 + test/e2e_node/gpus.go | 174 ----------- 38 files changed, 42 insertions(+), 1315 deletions(-) delete mode 100644 pkg/kubelet/gpu/BUILD delete mode 100644 pkg/kubelet/gpu/OWNERS delete mode 100644 pkg/kubelet/gpu/gpu_manager_stub.go delete mode 100644 pkg/kubelet/gpu/nvidia/BUILD delete mode 100644 pkg/kubelet/gpu/nvidia/helpers.go delete mode 100644 pkg/kubelet/gpu/nvidia/nvidia_gpu_manager.go delete mode 100644 pkg/kubelet/gpu/nvidia/nvidia_gpu_manager_test.go delete mode 100644 pkg/kubelet/gpu/types.go delete mode 100644 test/e2e_node/gpus.go diff --git a/hack/.golint_failures b/hack/.golint_failures index 70ddf57a792..66822b09591 100644 --- a/hack/.golint_failures +++ b/hack/.golint_failures @@ -173,7 +173,6 @@ pkg/kubelet/dockershim/cm pkg/kubelet/dockershim/libdocker pkg/kubelet/dockershim/testing pkg/kubelet/events -pkg/kubelet/gpu pkg/kubelet/images pkg/kubelet/kuberuntime pkg/kubelet/leaky diff --git a/pkg/apis/core/helper/helpers.go b/pkg/apis/core/helper/helpers.go index 038978fa9f4..bb3bcd08044 100644 --- a/pkg/apis/core/helper/helpers.go +++ b/pkg/apis/core/helper/helpers.go @@ -172,14 +172,11 @@ func IsNativeResource(name core.ResourceName) bool { strings.Contains(string(name), core.ResourceDefaultNamespacePrefix) } -var overcommitBlacklist = sets.NewString(string(core.ResourceNvidiaGPU)) - // IsOvercommitAllowed returns true if the resource is in the default -// namespace and not blacklisted. +// namespace and is not hugepages. func IsOvercommitAllowed(name core.ResourceName) bool { return IsNativeResource(name) && - !IsHugePageResourceName(name) && - !overcommitBlacklist.Has(string(name)) + !IsHugePageResourceName(name) } var standardLimitRangeTypes = sets.NewString( diff --git a/pkg/apis/core/helper/helpers_test.go b/pkg/apis/core/helper/helpers_test.go index 81bb96b786f..c63ba46c295 100644 --- a/pkg/apis/core/helper/helpers_test.go +++ b/pkg/apis/core/helper/helpers_test.go @@ -387,10 +387,6 @@ func TestIsOvercommitAllowed(t *testing.T) { name: core.ResourceMemory, allowed: true, }, - { - name: core.ResourceNvidiaGPU, - allowed: false, - }, { name: HugePageResourceName(resource.MustParse("2Mi")), allowed: false, diff --git a/pkg/apis/core/resource.go b/pkg/apis/core/resource.go index 1910cd921d9..1367e00e56e 100644 --- a/pkg/apis/core/resource.go +++ b/pkg/apis/core/resource.go @@ -47,13 +47,6 @@ func (self *ResourceList) Pods() *resource.Quantity { return &resource.Quantity{} } -func (self *ResourceList) NvidiaGPU() *resource.Quantity { - if val, ok := (*self)[ResourceNvidiaGPU]; ok { - return &val - } - return &resource.Quantity{} -} - func (self *ResourceList) StorageEphemeral() *resource.Quantity { if val, ok := (*self)[ResourceEphemeralStorage]; ok { return &val diff --git a/pkg/apis/core/types.go b/pkg/apis/core/types.go index 56018ca632d..3a20514dee8 100644 --- a/pkg/apis/core/types.go +++ b/pkg/apis/core/types.go @@ -3641,8 +3641,6 @@ const ( // Local ephemeral storage, in bytes. (500Gi = 500GiB = 500 * 1024 * 1024 * 1024) // The resource name for ResourceEphemeralStorage is alpha and it can change across releases. ResourceEphemeralStorage ResourceName = "ephemeral-storage" - // NVIDIA GPU, in devices. Alpha, might change: although fractional and allowing values >1, only one whole device per node is assigned. - ResourceNvidiaGPU ResourceName = "alpha.kubernetes.io/nvidia-gpu" ) const ( diff --git a/pkg/apis/core/v1/helper/BUILD b/pkg/apis/core/v1/helper/BUILD index 6c74f416ac0..c5dfb0249bc 100644 --- a/pkg/apis/core/v1/helper/BUILD +++ b/pkg/apis/core/v1/helper/BUILD @@ -29,7 +29,6 @@ go_library( "//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library", "//vendor/k8s.io/apimachinery/pkg/labels:go_default_library", "//vendor/k8s.io/apimachinery/pkg/selection:go_default_library", - "//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/validation:go_default_library", ], ) diff --git a/pkg/apis/core/v1/helper/helpers.go b/pkg/apis/core/v1/helper/helpers.go index a86c0c17abe..c469a956612 100644 --- a/pkg/apis/core/v1/helper/helpers.go +++ b/pkg/apis/core/v1/helper/helpers.go @@ -25,7 +25,6 @@ import ( "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/selection" - "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/validation" "k8s.io/kubernetes/pkg/apis/core/helper" ) @@ -85,14 +84,11 @@ func HugePageSizeFromResourceName(name v1.ResourceName) (resource.Quantity, erro return resource.ParseQuantity(pageSize) } -var overcommitBlacklist = sets.NewString(string(v1.ResourceNvidiaGPU)) - // IsOvercommitAllowed returns true if the resource is in the default -// namespace and not blacklisted and is not hugepages. +// namespace and is not hugepages. func IsOvercommitAllowed(name v1.ResourceName) bool { return IsNativeResource(name) && - !IsHugePageResourceName(name) && - !overcommitBlacklist.Has(string(name)) + !IsHugePageResourceName(name) } // Extended and Hugepages resources diff --git a/pkg/apis/core/v1/helper/helpers_test.go b/pkg/apis/core/v1/helper/helpers_test.go index beb81a6e87a..44a1c504208 100644 --- a/pkg/apis/core/v1/helper/helpers_test.go +++ b/pkg/apis/core/v1/helper/helpers_test.go @@ -125,10 +125,6 @@ func TestIsOvercommitAllowed(t *testing.T) { resourceName: "kubernetes.io/resource-foo", expectVal: true, }, - { - resourceName: "alpha.kubernetes.io/nvidia-gpu", - expectVal: false, - }, { resourceName: "hugepages-100m", expectVal: false, diff --git a/pkg/apis/core/v1/helper/qos/qos_test.go b/pkg/apis/core/v1/helper/qos/qos_test.go index f6440db10a3..7e9b71fef90 100644 --- a/pkg/apis/core/v1/helper/qos/qos_test.go +++ b/pkg/apis/core/v1/helper/qos/qos_test.go @@ -38,12 +38,6 @@ func TestGetPodQOS(t *testing.T) { }), expected: v1.PodQOSGuaranteed, }, - { - pod: newPod("guaranteed-with-gpu", []v1.Container{ - newContainer("guaranteed", getResourceList("100m", "100Mi"), addResource("nvidia-gpu", "2", getResourceList("100m", "100Mi"))), - }), - expected: v1.PodQOSGuaranteed, - }, { pod: newPod("guaranteed-guaranteed", []v1.Container{ newContainer("guaranteed", getResourceList("100m", "100Mi"), getResourceList("100m", "100Mi")), @@ -51,13 +45,6 @@ func TestGetPodQOS(t *testing.T) { }), expected: v1.PodQOSGuaranteed, }, - { - pod: newPod("guaranteed-guaranteed-with-gpu", []v1.Container{ - newContainer("guaranteed", getResourceList("100m", "100Mi"), addResource("nvidia-gpu", "2", getResourceList("100m", "100Mi"))), - newContainer("guaranteed", getResourceList("100m", "100Mi"), getResourceList("100m", "100Mi")), - }), - expected: v1.PodQOSGuaranteed, - }, { pod: newPod("best-effort-best-effort", []v1.Container{ newContainer("best-effort", getResourceList("", ""), getResourceList("", "")), @@ -71,29 +58,16 @@ func TestGetPodQOS(t *testing.T) { }), expected: v1.PodQOSBestEffort, }, - { - pod: newPod("best-effort-best-effort-with-gpu", []v1.Container{ - newContainer("best-effort", getResourceList("", ""), addResource("nvidia-gpu", "2", getResourceList("", ""))), - newContainer("best-effort", getResourceList("", ""), getResourceList("", "")), - }), - expected: v1.PodQOSBestEffort, - }, - { - pod: newPod("best-effort-with-gpu", []v1.Container{ - newContainer("best-effort", getResourceList("", ""), addResource("nvidia-gpu", "2", getResourceList("", ""))), - }), - expected: v1.PodQOSBestEffort, - }, { pod: newPod("best-effort-burstable", []v1.Container{ - newContainer("best-effort", getResourceList("", ""), addResource("nvidia-gpu", "2", getResourceList("", ""))), + newContainer("best-effort", getResourceList("", ""), getResourceList("", "")), newContainer("burstable", getResourceList("1", ""), getResourceList("2", "")), }), expected: v1.PodQOSBurstable, }, { pod: newPod("best-effort-guaranteed", []v1.Container{ - newContainer("best-effort", getResourceList("", ""), addResource("nvidia-gpu", "2", getResourceList("", ""))), + newContainer("best-effort", getResourceList("", ""), getResourceList("", "")), newContainer("guaranteed", getResourceList("10m", "100Mi"), getResourceList("10m", "100Mi")), }), expected: v1.PodQOSBurstable, @@ -132,7 +106,7 @@ func TestGetPodQOS(t *testing.T) { }, { pod: newPod("burstable-2", []v1.Container{ - newContainer("burstable", getResourceList("0", "0"), addResource("nvidia-gpu", "2", getResourceList("100m", "200Mi"))), + newContainer("burstable", getResourceList("0", "0"), getResourceList("100m", "200Mi")), }), expected: v1.PodQOSBurstable, }, diff --git a/pkg/apis/core/v1/validation/validation.go b/pkg/apis/core/v1/validation/validation.go index c9d67fc1aa8..02362233281 100644 --- a/pkg/apis/core/v1/validation/validation.go +++ b/pkg/apis/core/v1/validation/validation.go @@ -61,8 +61,6 @@ func ValidateResourceRequirements(requirements *v1.ResourceRequirements, fldPath } else if quantity.Cmp(limitQuantity) > 0 { allErrs = append(allErrs, field.Invalid(reqPath, quantity.String(), fmt.Sprintf("must be less than or equal to %s limit", resourceName))) } - } else if resourceName == v1.ResourceNvidiaGPU { - allErrs = append(allErrs, field.Invalid(reqPath, quantity.String(), fmt.Sprintf("must be equal to %s request", v1.ResourceNvidiaGPU))) } } diff --git a/pkg/apis/core/v1/validation/validation_test.go b/pkg/apis/core/v1/validation/validation_test.go index db725dd2fc5..5ff038f3c69 100644 --- a/pkg/apis/core/v1/validation/validation_test.go +++ b/pkg/apis/core/v1/validation/validation_test.go @@ -32,36 +32,15 @@ func TestValidateResourceRequirements(t *testing.T) { requirements v1.ResourceRequirements }{ { - Name: "GPU only setting Limits", - requirements: v1.ResourceRequirements{ - Limits: v1.ResourceList{ - v1.ResourceName(v1.ResourceNvidiaGPU): resource.MustParse("10"), - }, - }, - }, - { - Name: "GPU setting Limits equals Requests", - requirements: v1.ResourceRequirements{ - Limits: v1.ResourceList{ - v1.ResourceName(v1.ResourceNvidiaGPU): resource.MustParse("10"), - }, - Requests: v1.ResourceList{ - v1.ResourceName(v1.ResourceNvidiaGPU): resource.MustParse("10"), - }, - }, - }, - { - Name: "Resources with GPU with Requests", + Name: "Resources with Requests equal to Limits", requirements: v1.ResourceRequirements{ Requests: v1.ResourceList{ - v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"), - v1.ResourceName(v1.ResourceMemory): resource.MustParse("10G"), - v1.ResourceName(v1.ResourceNvidiaGPU): resource.MustParse("1"), + v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"), + v1.ResourceName(v1.ResourceMemory): resource.MustParse("10G"), }, Limits: v1.ResourceList{ - v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"), - v1.ResourceName(v1.ResourceMemory): resource.MustParse("10G"), - v1.ResourceName(v1.ResourceNvidiaGPU): resource.MustParse("1"), + v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"), + v1.ResourceName(v1.ResourceMemory): resource.MustParse("10G"), }, }, }, @@ -111,36 +90,6 @@ func TestValidateResourceRequirements(t *testing.T) { Name string requirements v1.ResourceRequirements }{ - { - Name: "GPU only setting Requests", - requirements: v1.ResourceRequirements{ - Requests: v1.ResourceList{ - v1.ResourceName(v1.ResourceNvidiaGPU): resource.MustParse("10"), - }, - }, - }, - { - Name: "GPU setting Limits less than Requests", - requirements: v1.ResourceRequirements{ - Limits: v1.ResourceList{ - v1.ResourceName(v1.ResourceNvidiaGPU): resource.MustParse("10"), - }, - Requests: v1.ResourceList{ - v1.ResourceName(v1.ResourceNvidiaGPU): resource.MustParse("11"), - }, - }, - }, - { - Name: "GPU setting Limits larger than Requests", - requirements: v1.ResourceRequirements{ - Limits: v1.ResourceList{ - v1.ResourceName(v1.ResourceNvidiaGPU): resource.MustParse("10"), - }, - Requests: v1.ResourceList{ - v1.ResourceName(v1.ResourceNvidiaGPU): resource.MustParse("9"), - }, - }, - }, { Name: "Resources with Requests Larger Than Limits", requirements: v1.ResourceRequirements{ diff --git a/pkg/apis/core/validation/validation_test.go b/pkg/apis/core/validation/validation_test.go index cae3fa81259..2cbc8a58750 100644 --- a/pkg/apis/core/validation/validation_test.go +++ b/pkg/apis/core/validation/validation_test.go @@ -5042,25 +5042,7 @@ func TestValidateContainers(t *testing.T) { TerminationMessagePolicy: "File", }, { - Name: "resources-test-with-gpu-with-request", - Image: "image", - Resources: core.ResourceRequirements{ - Requests: core.ResourceList{ - core.ResourceName(core.ResourceCPU): resource.MustParse("10"), - core.ResourceName(core.ResourceMemory): resource.MustParse("10G"), - core.ResourceName(core.ResourceNvidiaGPU): resource.MustParse("1"), - }, - Limits: core.ResourceList{ - core.ResourceName(core.ResourceCPU): resource.MustParse("10"), - core.ResourceName(core.ResourceMemory): resource.MustParse("10G"), - core.ResourceName(core.ResourceNvidiaGPU): resource.MustParse("1"), - }, - }, - ImagePullPolicy: "IfNotPresent", - TerminationMessagePolicy: "File", - }, - { - Name: "resources-test-with-gpu-without-request", + Name: "resources-test-with-request-and-limit", Image: "image", Resources: core.ResourceRequirements{ Requests: core.ResourceList{ @@ -5068,9 +5050,8 @@ func TestValidateContainers(t *testing.T) { core.ResourceName(core.ResourceMemory): resource.MustParse("10G"), }, Limits: core.ResourceList{ - core.ResourceName(core.ResourceCPU): resource.MustParse("10"), - core.ResourceName(core.ResourceMemory): resource.MustParse("10G"), - core.ResourceName(core.ResourceNvidiaGPU): resource.MustParse("1"), + core.ResourceName(core.ResourceCPU): resource.MustParse("10"), + core.ResourceName(core.ResourceMemory): resource.MustParse("10G"), }, }, ImagePullPolicy: "IfNotPresent", @@ -5359,41 +5340,6 @@ func TestValidateContainers(t *testing.T) { TerminationMessagePolicy: "File", }, }, - "Resource GPU limit must match request": { - { - Name: "gpu-resource-request-limit", - Image: "image", - Resources: core.ResourceRequirements{ - Requests: core.ResourceList{ - core.ResourceName(core.ResourceCPU): resource.MustParse("10"), - core.ResourceName(core.ResourceMemory): resource.MustParse("10G"), - core.ResourceName(core.ResourceNvidiaGPU): resource.MustParse("0"), - }, - Limits: core.ResourceList{ - core.ResourceName(core.ResourceCPU): resource.MustParse("10"), - core.ResourceName(core.ResourceMemory): resource.MustParse("10G"), - core.ResourceName(core.ResourceNvidiaGPU): resource.MustParse("1"), - }, - }, - TerminationMessagePolicy: "File", - ImagePullPolicy: "IfNotPresent", - }, - }, - "Resource GPU invalid setting only request": { - { - Name: "gpu-resource-request-limit", - Image: "image", - Resources: core.ResourceRequirements{ - Requests: core.ResourceList{ - core.ResourceName(core.ResourceCPU): resource.MustParse("10"), - core.ResourceName(core.ResourceMemory): resource.MustParse("10G"), - core.ResourceName(core.ResourceNvidiaGPU): resource.MustParse("1"), - }, - }, - TerminationMessagePolicy: "File", - ImagePullPolicy: "IfNotPresent", - }, - }, "Request limit simple invalid": { { Name: "abc-123", diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index a04eb70d15b..a551ae792b0 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -53,16 +53,6 @@ const ( // Note: This feature is not supported for `BestEffort` pods. ExperimentalCriticalPodAnnotation utilfeature.Feature = "ExperimentalCriticalPodAnnotation" - // owner: @vishh - // alpha: v1.6 - // - // This is deprecated and will be removed in v1.11. Use DevicePlugins instead. - // - // Enables support for GPUs as a schedulable resource. - // Only Nvidia GPUs are supported as of v1.6. - // Works only with Docker Container Runtime. - Accelerators utilfeature.Feature = "Accelerators" - // owner: @jiayingz // beta: v1.10 // @@ -296,7 +286,6 @@ var defaultKubernetesFeatureGates = map[utilfeature.Feature]utilfeature.FeatureS DynamicKubeletConfig: {Default: false, PreRelease: utilfeature.Alpha}, ExperimentalHostUserNamespaceDefaultingGate: {Default: false, PreRelease: utilfeature.Beta}, ExperimentalCriticalPodAnnotation: {Default: false, PreRelease: utilfeature.Alpha}, - Accelerators: {Default: false, PreRelease: utilfeature.Alpha}, DevicePlugins: {Default: true, PreRelease: utilfeature.Beta}, TaintBasedEvictions: {Default: false, PreRelease: utilfeature.Alpha}, RotateKubeletServerCertificate: {Default: false, PreRelease: utilfeature.Alpha}, diff --git a/pkg/kubelet/BUILD b/pkg/kubelet/BUILD index 6662eafe72e..aebbbbd5343 100644 --- a/pkg/kubelet/BUILD +++ b/pkg/kubelet/BUILD @@ -55,8 +55,6 @@ go_library( "//pkg/kubelet/envvars:go_default_library", "//pkg/kubelet/events:go_default_library", "//pkg/kubelet/eviction:go_default_library", - "//pkg/kubelet/gpu:go_default_library", - "//pkg/kubelet/gpu/nvidia:go_default_library", "//pkg/kubelet/images:go_default_library", "//pkg/kubelet/kubeletconfig:go_default_library", "//pkg/kubelet/kuberuntime:go_default_library", @@ -179,7 +177,6 @@ go_test( "//pkg/kubelet/container:go_default_library", "//pkg/kubelet/container/testing:go_default_library", "//pkg/kubelet/eviction:go_default_library", - "//pkg/kubelet/gpu:go_default_library", "//pkg/kubelet/images:go_default_library", "//pkg/kubelet/lifecycle:go_default_library", "//pkg/kubelet/logs:go_default_library", @@ -264,7 +261,6 @@ filegroup( "//pkg/kubelet/envvars:all-srcs", "//pkg/kubelet/events:all-srcs", "//pkg/kubelet/eviction:all-srcs", - "//pkg/kubelet/gpu:all-srcs", "//pkg/kubelet/images:all-srcs", "//pkg/kubelet/kubeletconfig:all-srcs", "//pkg/kubelet/kuberuntime:all-srcs", diff --git a/pkg/kubelet/gpu/BUILD b/pkg/kubelet/gpu/BUILD deleted file mode 100644 index 1747aa67a24..00000000000 --- a/pkg/kubelet/gpu/BUILD +++ /dev/null @@ -1,32 +0,0 @@ -package(default_visibility = ["//visibility:public"]) - -load( - "@io_bazel_rules_go//go:def.bzl", - "go_library", -) - -go_library( - name = "go_default_library", - srcs = [ - "gpu_manager_stub.go", - "types.go", - ], - importpath = "k8s.io/kubernetes/pkg/kubelet/gpu", - deps = ["//vendor/k8s.io/api/core/v1:go_default_library"], -) - -filegroup( - name = "package-srcs", - srcs = glob(["**"]), - tags = ["automanaged"], - visibility = ["//visibility:private"], -) - -filegroup( - name = "all-srcs", - srcs = [ - ":package-srcs", - "//pkg/kubelet/gpu/nvidia:all-srcs", - ], - tags = ["automanaged"], -) diff --git a/pkg/kubelet/gpu/OWNERS b/pkg/kubelet/gpu/OWNERS deleted file mode 100644 index 7635d5b58d9..00000000000 --- a/pkg/kubelet/gpu/OWNERS +++ /dev/null @@ -1,12 +0,0 @@ -approvers: -- dchen1107 -- derekwaynecarr -- vishh -- yujuhong -reviewers: -- cmluciano -- jiayingz -- mindprince -- RenaudWasTaken -- vishh -- sig-node-reviewers diff --git a/pkg/kubelet/gpu/gpu_manager_stub.go b/pkg/kubelet/gpu/gpu_manager_stub.go deleted file mode 100644 index c0dbcf46c81..00000000000 --- a/pkg/kubelet/gpu/gpu_manager_stub.go +++ /dev/null @@ -1,41 +0,0 @@ -/* -Copyright 2017 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package gpu - -import ( - "fmt" - - "k8s.io/api/core/v1" -) - -type gpuManagerStub struct{} - -func (gms *gpuManagerStub) Start() error { - return nil -} - -func (gms *gpuManagerStub) Capacity() v1.ResourceList { - return nil -} - -func (gms *gpuManagerStub) AllocateGPU(_ *v1.Pod, _ *v1.Container) ([]string, error) { - return nil, fmt.Errorf("GPUs are not supported") -} - -func NewGPUManagerStub() GPUManager { - return &gpuManagerStub{} -} diff --git a/pkg/kubelet/gpu/nvidia/BUILD b/pkg/kubelet/gpu/nvidia/BUILD deleted file mode 100644 index 28feb876ff7..00000000000 --- a/pkg/kubelet/gpu/nvidia/BUILD +++ /dev/null @@ -1,54 +0,0 @@ -package(default_visibility = ["//visibility:public"]) - -load( - "@io_bazel_rules_go//go:def.bzl", - "go_library", - "go_test", -) - -go_library( - name = "go_default_library", - srcs = [ - "helpers.go", - "nvidia_gpu_manager.go", - ], - importpath = "k8s.io/kubernetes/pkg/kubelet/gpu/nvidia", - deps = [ - "//pkg/kubelet/dockershim:go_default_library", - "//pkg/kubelet/dockershim/libdocker:go_default_library", - "//pkg/kubelet/gpu:go_default_library", - "//vendor/github.com/golang/glog:go_default_library", - "//vendor/k8s.io/api/core/v1:go_default_library", - "//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library", - "//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library", - ], -) - -filegroup( - name = "package-srcs", - srcs = glob(["**"]), - tags = ["automanaged"], - visibility = ["//visibility:private"], -) - -filegroup( - name = "all-srcs", - srcs = [":package-srcs"], - tags = ["automanaged"], -) - -go_test( - name = "go_default_test", - srcs = ["nvidia_gpu_manager_test.go"], - embed = [":go_default_library"], - deps = [ - "//pkg/kubelet/dockershim:go_default_library", - "//pkg/kubelet/dockershim/libdocker:go_default_library", - "//vendor/github.com/stretchr/testify/assert:go_default_library", - "//vendor/k8s.io/api/core/v1:go_default_library", - "//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library", - "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", - "//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library", - "//vendor/k8s.io/apimachinery/pkg/util/uuid:go_default_library", - ], -) diff --git a/pkg/kubelet/gpu/nvidia/helpers.go b/pkg/kubelet/gpu/nvidia/helpers.go deleted file mode 100644 index a6ed7c4f935..00000000000 --- a/pkg/kubelet/gpu/nvidia/helpers.go +++ /dev/null @@ -1,77 +0,0 @@ -/* -Copyright 2017 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package nvidia - -import "k8s.io/apimachinery/pkg/util/sets" - -type containerToGPU map[string]sets.String - -// podGPUs represents a list of pod to GPU mappings. -type podGPUs struct { - podGPUMapping map[string]containerToGPU -} - -func newPodGPUs() *podGPUs { - return &podGPUs{ - podGPUMapping: make(map[string]containerToGPU), - } -} -func (pgpu *podGPUs) pods() sets.String { - ret := sets.NewString() - for k := range pgpu.podGPUMapping { - ret.Insert(k) - } - return ret -} - -func (pgpu *podGPUs) insert(podUID, contName string, device string) { - if _, exists := pgpu.podGPUMapping[podUID]; !exists { - pgpu.podGPUMapping[podUID] = make(containerToGPU) - } - if _, exists := pgpu.podGPUMapping[podUID][contName]; !exists { - pgpu.podGPUMapping[podUID][contName] = sets.NewString() - } - pgpu.podGPUMapping[podUID][contName].Insert(device) -} - -func (pgpu *podGPUs) getGPUs(podUID, contName string) sets.String { - containers, exists := pgpu.podGPUMapping[podUID] - if !exists { - return nil - } - devices, exists := containers[contName] - if !exists { - return nil - } - return devices -} - -func (pgpu *podGPUs) delete(pods []string) { - for _, uid := range pods { - delete(pgpu.podGPUMapping, uid) - } -} - -func (pgpu *podGPUs) devices() sets.String { - ret := sets.NewString() - for _, containerToGPU := range pgpu.podGPUMapping { - for _, deviceSet := range containerToGPU { - ret = ret.Union(deviceSet) - } - } - return ret -} diff --git a/pkg/kubelet/gpu/nvidia/nvidia_gpu_manager.go b/pkg/kubelet/gpu/nvidia/nvidia_gpu_manager.go deleted file mode 100644 index 37c30d14b55..00000000000 --- a/pkg/kubelet/gpu/nvidia/nvidia_gpu_manager.go +++ /dev/null @@ -1,280 +0,0 @@ -/* -Copyright 2017 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package nvidia - -import ( - "fmt" - "io/ioutil" - "os" - "path" - "regexp" - "strings" - "sync" - - "github.com/golang/glog" - - "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/resource" - "k8s.io/apimachinery/pkg/util/sets" - "k8s.io/kubernetes/pkg/kubelet/dockershim" - "k8s.io/kubernetes/pkg/kubelet/dockershim/libdocker" - "k8s.io/kubernetes/pkg/kubelet/gpu" -) - -// TODO: rework to use Nvidia's NVML, which is more complex, but also provides more fine-grained information and stats. -const ( - // All NVIDIA GPUs cards should be mounted with nvidiactl and nvidia-uvm - // If the driver installed correctly, the 2 devices will be there. - nvidiaCtlDevice string = "/dev/nvidiactl" - nvidiaUVMDevice string = "/dev/nvidia-uvm" - // Optional device. - nvidiaUVMToolsDevice string = "/dev/nvidia-uvm-tools" - devDirectory = "/dev" -) - -var ( - nvidiaDeviceRE = regexp.MustCompile(`^nvidia[0-9]*$`) - nvidiaFullpathRE = regexp.MustCompile(`^/dev/nvidia[0-9]*$`) -) - -type activePodsLister interface { - // Returns a list of active pods on the node. - GetActivePods() []*v1.Pod -} - -// nvidiaGPUManager manages nvidia gpu devices. -type nvidiaGPUManager struct { - sync.Mutex - // All gpus available on the Node - allGPUs sets.String - allocated *podGPUs - defaultDevices []string - // The interface which could get GPU mapping from all the containers. - // TODO: Should make this independent of Docker in the future. - dockerClient libdocker.Interface - activePodsLister activePodsLister -} - -// NewNvidiaGPUManager returns a GPUManager that manages local Nvidia GPUs. -// TODO: Migrate to use pod level cgroups and make it generic to all runtimes. -func NewNvidiaGPUManager(activePodsLister activePodsLister, config *dockershim.ClientConfig) (gpu.GPUManager, error) { - dockerClient := dockershim.NewDockerClientFromConfig(config) - if dockerClient == nil { - return nil, fmt.Errorf("invalid docker client configure specified") - } - - return &nvidiaGPUManager{ - allGPUs: sets.NewString(), - dockerClient: dockerClient, - activePodsLister: activePodsLister, - }, nil -} - -// Initialize the GPU devices, so far only needed to discover the GPU paths. -func (ngm *nvidiaGPUManager) Start() error { - if ngm.dockerClient == nil { - return fmt.Errorf("Invalid docker client specified in GPU Manager") - } - ngm.Lock() - defer ngm.Unlock() - - if _, err := os.Stat(nvidiaCtlDevice); err != nil { - return err - } - - if _, err := os.Stat(nvidiaUVMDevice); err != nil { - return err - } - ngm.defaultDevices = []string{nvidiaCtlDevice, nvidiaUVMDevice} - _, err := os.Stat(nvidiaUVMToolsDevice) - if !os.IsNotExist(err) { - ngm.defaultDevices = append(ngm.defaultDevices, nvidiaUVMToolsDevice) - } - - if err := ngm.discoverGPUs(); err != nil { - return err - } - - // We ignore errors when identifying allocated GPUs because it is possible that the runtime interfaces may be not be logically up. - return nil -} - -// Get how many GPU cards we have. -func (ngm *nvidiaGPUManager) Capacity() v1.ResourceList { - gpus := resource.NewQuantity(int64(len(ngm.allGPUs)), resource.DecimalSI) - return v1.ResourceList{ - v1.ResourceNvidiaGPU: *gpus, - } -} - -// AllocateGPUs returns `num` GPUs if available, error otherwise. -// Allocation is made thread safe using the following logic. -// A list of all GPUs allocated is maintained along with their respective Pod UIDs. -// It is expected that the list of active pods will not return any false positives. -// As part of initialization or allocation, the list of GPUs in use will be computed once. -// Whenever an allocation happens, the list of GPUs allocated is updated based on the list of currently active pods. -// GPUs allocated to terminated pods are freed up lazily as part of allocation. -// GPUs are allocated based on the internal list of allocatedGPUs. -// It is not safe to generate a list of GPUs in use by inspecting active containers because of the delay between GPU allocation and container creation. -// A GPU allocated to a container might be re-allocated to a subsequent container because the original container wasn't started quick enough. -// The current algorithm scans containers only once and then uses a list of active pods to track GPU usage. -// This is a sub-optimal solution and a better alternative would be that of using pod level cgroups instead. -// GPUs allocated to containers should be reflected in pod level device cgroups before completing allocations. -// The pod level cgroups will then serve as a checkpoint of GPUs in use. -func (ngm *nvidiaGPUManager) AllocateGPU(pod *v1.Pod, container *v1.Container) ([]string, error) { - gpusNeeded := container.Resources.Limits.NvidiaGPU().Value() - if gpusNeeded == 0 { - return []string{}, nil - } - ngm.Lock() - defer ngm.Unlock() - if ngm.allocated == nil { - // Initialization is not complete. Try now. Failures can no longer be tolerated. - ngm.allocated = ngm.gpusInUse() - } else { - // update internal list of GPUs in use prior to allocating new GPUs. - ngm.updateAllocatedGPUs() - } - // Check if GPUs have already been allocated. If so return them right away. - // This can happen if a container restarts for example. - if devices := ngm.allocated.getGPUs(string(pod.UID), container.Name); devices != nil { - glog.V(2).Infof("Found pre-allocated GPUs for container %q in Pod %q: %v", container.Name, pod.UID, devices.List()) - return append(devices.List(), ngm.defaultDevices...), nil - } - // Get GPU devices in use. - devicesInUse := ngm.allocated.devices() - glog.V(5).Infof("gpus in use: %v", devicesInUse.List()) - // Get a list of available GPUs. - available := ngm.allGPUs.Difference(devicesInUse) - glog.V(5).Infof("gpus available: %v", available.List()) - if int64(available.Len()) < gpusNeeded { - return nil, fmt.Errorf("requested number of GPUs unavailable. Requested: %d, Available: %d", gpusNeeded, available.Len()) - } - ret := available.UnsortedList()[:gpusNeeded] - for _, device := range ret { - // Update internal allocated GPU cache. - ngm.allocated.insert(string(pod.UID), container.Name, device) - } - // Add standard devices files that needs to be exposed. - ret = append(ret, ngm.defaultDevices...) - - return ret, nil -} - -// updateAllocatedGPUs updates the list of GPUs in use. -// It gets a list of active pods and then frees any GPUs that are bound to terminated pods. -// Returns error on failure. -func (ngm *nvidiaGPUManager) updateAllocatedGPUs() { - activePods := ngm.activePodsLister.GetActivePods() - activePodUids := sets.NewString() - for _, pod := range activePods { - activePodUids.Insert(string(pod.UID)) - } - allocatedPodUids := ngm.allocated.pods() - podsToBeRemoved := allocatedPodUids.Difference(activePodUids) - glog.V(5).Infof("pods to be removed: %v", podsToBeRemoved.List()) - ngm.allocated.delete(podsToBeRemoved.List()) -} - -// discoverGPUs identifies allGPUs NVIDIA GPU devices available on the local node by walking `/dev` directory. -// TODO: Without NVML support we only can check whether there has GPU devices, but -// could not give a health check or get more information like GPU cores, memory, or -// family name. Need to support NVML in the future. But we do not need NVML until -// we want more features, features like schedule containers according to GPU family -// name. -func (ngm *nvidiaGPUManager) discoverGPUs() error { - files, err := ioutil.ReadDir(devDirectory) - if err != nil { - return err - } - for _, f := range files { - if f.IsDir() { - continue - } - if nvidiaDeviceRE.MatchString(f.Name()) { - glog.V(2).Infof("Found Nvidia GPU %q", f.Name()) - ngm.allGPUs.Insert(path.Join(devDirectory, f.Name())) - } - } - - return nil -} - -// gpusInUse returns a list of GPUs in use along with the respective pods that are using it. -func (ngm *nvidiaGPUManager) gpusInUse() *podGPUs { - pods := ngm.activePodsLister.GetActivePods() - type containerIdentifier struct { - id string - name string - } - type podContainers struct { - uid string - containers []containerIdentifier - } - // List of containers to inspect. - podContainersToInspect := []podContainers{} - for _, pod := range pods { - containers := sets.NewString() - for _, container := range pod.Spec.Containers { - // GPUs are expected to be specified only in limits. - if !container.Resources.Limits.NvidiaGPU().IsZero() { - containers.Insert(container.Name) - } - } - // If no GPUs were requested skip this pod. - if containers.Len() == 0 { - continue - } - // TODO: If kubelet restarts right after allocating a GPU to a pod, the container might not have started yet and so container status might not be available yet. - // Use an internal checkpoint instead or try using the CRI if its checkpoint is reliable. - var containersToInspect []containerIdentifier - for _, container := range pod.Status.ContainerStatuses { - if containers.Has(container.Name) { - containersToInspect = append(containersToInspect, containerIdentifier{strings.Replace(container.ContainerID, "docker://", "", 1), container.Name}) - } - } - // add the pod and its containers that need to be inspected. - podContainersToInspect = append(podContainersToInspect, podContainers{string(pod.UID), containersToInspect}) - } - ret := newPodGPUs() - for _, podContainer := range podContainersToInspect { - for _, containerIdentifier := range podContainer.containers { - containerJSON, err := ngm.dockerClient.InspectContainer(containerIdentifier.id) - if err != nil { - glog.V(3).Infof("Failed to inspect container %q in pod %q while attempting to reconcile nvidia gpus in use", containerIdentifier.id, podContainer.uid) - continue - } - - devices := containerJSON.HostConfig.Devices - if devices == nil { - continue - } - - for _, device := range devices { - if isValidPath(device.PathOnHost) { - glog.V(4).Infof("Nvidia GPU %q is in use by Docker Container: %q", device.PathOnHost, containerJSON.ID) - ret.insert(podContainer.uid, containerIdentifier.name, device.PathOnHost) - } - } - } - } - return ret -} - -func isValidPath(path string) bool { - return nvidiaFullpathRE.MatchString(path) -} diff --git a/pkg/kubelet/gpu/nvidia/nvidia_gpu_manager_test.go b/pkg/kubelet/gpu/nvidia/nvidia_gpu_manager_test.go deleted file mode 100644 index 8dc2cd6e672..00000000000 --- a/pkg/kubelet/gpu/nvidia/nvidia_gpu_manager_test.go +++ /dev/null @@ -1,213 +0,0 @@ -/* -Copyright 2017 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package nvidia - -import ( - "os" - "reflect" - "testing" - - "github.com/stretchr/testify/assert" - - "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/resource" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/sets" - "k8s.io/apimachinery/pkg/util/uuid" - "k8s.io/kubernetes/pkg/kubelet/dockershim" - "k8s.io/kubernetes/pkg/kubelet/dockershim/libdocker" -) - -type testActivePodsLister struct { - activePods []*v1.Pod -} - -func (tapl *testActivePodsLister) GetActivePods() []*v1.Pod { - return tapl.activePods -} - -func makeTestPod(numContainers, gpusPerContainer int) *v1.Pod { - quantity := resource.NewQuantity(int64(gpusPerContainer), resource.DecimalSI) - resources := v1.ResourceRequirements{ - Limits: v1.ResourceList{ - v1.ResourceNvidiaGPU: *quantity, - }, - } - pod := &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - UID: uuid.NewUUID(), - }, - Spec: v1.PodSpec{ - Containers: []v1.Container{}, - }, - } - for ; numContainers > 0; numContainers-- { - pod.Spec.Containers = append(pod.Spec.Containers, v1.Container{ - Name: string(uuid.NewUUID()), - Resources: resources, - }) - } - return pod -} - -func TestNewNvidiaGPUManager(t *testing.T) { - podLister := &testActivePodsLister{} - - // Expects nil GPUManager and an error with nil dockerClient. - testGpuManager1, err := NewNvidiaGPUManager(podLister, nil) - as := assert.New(t) - as.Nil(testGpuManager1) - as.NotNil(err) - - // Expects a GPUManager to be created with non-nil dockerClient. - testGpuManager2, err := NewNvidiaGPUManager(podLister, &dockershim.ClientConfig{ - DockerEndpoint: libdocker.FakeDockerEndpoint, - }) - as.NotNil(testGpuManager2) - as.Nil(err) - - // Expects zero capacity without any GPUs. - gpuCapacity := testGpuManager2.Capacity() - as.Equal(len(gpuCapacity), 1) - rgpu := gpuCapacity[v1.ResourceNvidiaGPU] - as.Equal(rgpu.Value(), int64(0)) - - err2 := testGpuManager2.Start() - if !os.IsNotExist(err2) { - gpus := reflect.ValueOf(testGpuManager2).Elem().FieldByName("allGPUs").Len() - as.NotZero(gpus) - } -} - -func TestMultiContainerPodGPUAllocation(t *testing.T) { - podLister := &testActivePodsLister{} - - testGpuManager := &nvidiaGPUManager{ - activePodsLister: podLister, - allGPUs: sets.NewString("/dev/nvidia0", "/dev/nvidia1"), - allocated: newPodGPUs(), - } - - // Expect that no devices are in use. - gpusInUse := testGpuManager.gpusInUse() - as := assert.New(t) - as.Equal(len(gpusInUse.devices()), 0) - - // Allocated GPUs for a pod with two containers. - pod := makeTestPod(2, 1) - // Allocate for the first container. - devices1, err := testGpuManager.AllocateGPU(pod, &pod.Spec.Containers[0]) - as.Nil(err) - as.Equal(len(devices1), 1) - - podLister.activePods = append(podLister.activePods, pod) - // Allocate for the second container. - devices2, err := testGpuManager.AllocateGPU(pod, &pod.Spec.Containers[1]) - as.Nil(err) - as.Equal(len(devices2), 1) - - as.NotEqual(devices1, devices2, "expected containers to get different devices") - - // further allocations should fail. - newPod := makeTestPod(2, 1) - devices1, err = testGpuManager.AllocateGPU(newPod, &newPod.Spec.Containers[0]) - as.NotNil(err, "expected gpu allocation to fail. got: %v", devices1) - - // Now terminate the original pod and observe that GPU allocation for new pod succeeds. - podLister.activePods = podLister.activePods[:0] - - devices1, err = testGpuManager.AllocateGPU(newPod, &newPod.Spec.Containers[0]) - as.Nil(err) - as.Equal(len(devices1), 1) - - podLister.activePods = append(podLister.activePods, newPod) - - devices2, err = testGpuManager.AllocateGPU(newPod, &newPod.Spec.Containers[1]) - as.Nil(err) - as.Equal(len(devices2), 1) - - as.NotEqual(devices1, devices2, "expected containers to get different devices") -} - -func TestMultiPodGPUAllocation(t *testing.T) { - podLister := &testActivePodsLister{} - - testGpuManager := &nvidiaGPUManager{ - activePodsLister: podLister, - allGPUs: sets.NewString("/dev/nvidia0", "/dev/nvidia1"), - allocated: newPodGPUs(), - } - - // Expect that no devices are in use. - gpusInUse := testGpuManager.gpusInUse() - as := assert.New(t) - as.Equal(len(gpusInUse.devices()), 0) - - // Allocated GPUs for a pod with two containers. - podA := makeTestPod(1, 1) - // Allocate for the first container. - devicesA, err := testGpuManager.AllocateGPU(podA, &podA.Spec.Containers[0]) - as.Nil(err) - as.Equal(len(devicesA), 1) - - podLister.activePods = append(podLister.activePods, podA) - - // further allocations should fail. - podB := makeTestPod(1, 1) - // Allocate for the first container. - devicesB, err := testGpuManager.AllocateGPU(podB, &podB.Spec.Containers[0]) - as.Nil(err) - as.Equal(len(devicesB), 1) - as.NotEqual(devicesA, devicesB, "expected pods to get different devices") -} - -func TestPodContainerRestart(t *testing.T) { - podLister := &testActivePodsLister{} - - testGpuManager := &nvidiaGPUManager{ - activePodsLister: podLister, - allGPUs: sets.NewString("/dev/nvidia0", "/dev/nvidia1"), - allocated: newPodGPUs(), - defaultDevices: []string{"/dev/nvidia-smi"}, - } - - // Expect that no devices are in use. - gpusInUse := testGpuManager.gpusInUse() - as := assert.New(t) - as.Equal(len(gpusInUse.devices()), 0) - - // Make a pod with one containers that requests two GPUs. - podA := makeTestPod(1, 2) - // Allocate GPUs - devicesA, err := testGpuManager.AllocateGPU(podA, &podA.Spec.Containers[0]) - as.Nil(err) - as.Equal(len(devicesA), 3) - - podLister.activePods = append(podLister.activePods, podA) - - // further allocations should fail. - podB := makeTestPod(1, 1) - _, err = testGpuManager.AllocateGPU(podB, &podB.Spec.Containers[0]) - as.NotNil(err) - - // Allcate GPU for existing Pod A. - // The same gpus must be returned. - devicesAretry, err := testGpuManager.AllocateGPU(podA, &podA.Spec.Containers[0]) - as.Nil(err) - as.Equal(len(devicesA), 3) - as.True(sets.NewString(devicesA...).Equal(sets.NewString(devicesAretry...))) -} diff --git a/pkg/kubelet/gpu/types.go b/pkg/kubelet/gpu/types.go deleted file mode 100644 index b834c9b52b3..00000000000 --- a/pkg/kubelet/gpu/types.go +++ /dev/null @@ -1,32 +0,0 @@ -/* -Copyright 2017 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package gpu - -import "k8s.io/api/core/v1" - -// GPUManager manages GPUs on a local node. -// Implementations are expected to be thread safe. -type GPUManager interface { - // Start logically initializes GPUManager - Start() error - // Capacity returns the total number of GPUs on the node. - Capacity() v1.ResourceList - // AllocateGPU attempts to allocate GPUs for input container. - // Returns paths to allocated GPUs and nil on success. - // Returns an error on failure. - AllocateGPU(*v1.Pod, *v1.Container) ([]string, error) -} diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 3b967e8235d..1f232a36828 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -69,8 +69,6 @@ import ( dockerremote "k8s.io/kubernetes/pkg/kubelet/dockershim/remote" "k8s.io/kubernetes/pkg/kubelet/events" "k8s.io/kubernetes/pkg/kubelet/eviction" - "k8s.io/kubernetes/pkg/kubelet/gpu" - "k8s.io/kubernetes/pkg/kubelet/gpu/nvidia" "k8s.io/kubernetes/pkg/kubelet/images" "k8s.io/kubernetes/pkg/kubelet/kubeletconfig" "k8s.io/kubernetes/pkg/kubelet/kuberuntime" @@ -866,20 +864,6 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration, klet.appArmorValidator = apparmor.NewValidator(containerRuntime) klet.softAdmitHandlers.AddPodAdmitHandler(lifecycle.NewAppArmorAdmitHandler(klet.appArmorValidator)) klet.softAdmitHandlers.AddPodAdmitHandler(lifecycle.NewNoNewPrivsAdmitHandler(klet.containerRuntime)) - if utilfeature.DefaultFeatureGate.Enabled(features.Accelerators) { - if containerRuntime == kubetypes.DockerContainerRuntime { - glog.Warningln("Accelerators feature is deprecated and will be removed in v1.11. Please use device plugins instead. They can be enabled using the DevicePlugins feature gate.") - if klet.gpuManager, err = nvidia.NewNvidiaGPUManager(klet, kubeDeps.DockerClientConfig); err != nil { - return nil, err - } - } else { - glog.Errorf("Accelerators feature is supported with docker runtime only. Disabling this feature internally.") - } - } - // Set GPU manager to a stub implementation if it is not enabled or cannot be supported. - if klet.gpuManager == nil { - klet.gpuManager = gpu.NewGPUManagerStub() - } // Finally, put the most recent version of the config on the Kubelet, so // people can see how it was configured. klet.kubeletConfiguration = *kubeCfg @@ -1152,9 +1136,6 @@ type Kubelet struct { // experimental behavior is desired. experimentalHostUserNamespaceDefaulting bool - // GPU Manager - gpuManager gpu.GPUManager - // dockerLegacyService contains some legacy methods for backward compatibility. // It should be set only when docker is using non json-file logging driver. dockerLegacyService dockershim.DockerLegacyService @@ -1292,11 +1273,6 @@ func (kl *Kubelet) initializeModules() error { return fmt.Errorf("Failed to start OOM watcher %v", err) } - // Initialize GPUs - if err := kl.gpuManager.Start(); err != nil { - glog.Errorf("Failed to start gpuManager %v", err) - } - // Start resource analyzer kl.resourceAnalyzer.Start() diff --git a/pkg/kubelet/kubelet_node_status.go b/pkg/kubelet/kubelet_node_status.go index 85e7ca7d17c..6c51b3561b1 100644 --- a/pkg/kubelet/kubelet_node_status.go +++ b/pkg/kubelet/kubelet_node_status.go @@ -540,14 +540,6 @@ func (kl *Kubelet) setNodeStatusMachineInfo(node *v1.Node) { node.Status.Capacity = v1.ResourceList{} } - // populate GPU capacity. - gpuCapacity := kl.gpuManager.Capacity() - if gpuCapacity != nil { - for k, v := range gpuCapacity { - node.Status.Capacity[k] = v - } - } - var devicePluginAllocatable v1.ResourceList var devicePluginCapacity v1.ResourceList var removedDevicePlugins []string diff --git a/pkg/kubelet/kubelet_pods.go b/pkg/kubelet/kubelet_pods.go index 76f59f3b3c6..c1beb58c405 100644 --- a/pkg/kubelet/kubelet_pods.go +++ b/pkg/kubelet/kubelet_pods.go @@ -90,26 +90,6 @@ func (kl *Kubelet) GetActivePods() []*v1.Pod { return activePods } -// makeGPUDevices determines the devices for the given container. -// Experimental. -func (kl *Kubelet) makeGPUDevices(pod *v1.Pod, container *v1.Container) ([]kubecontainer.DeviceInfo, error) { - if container.Resources.Limits.NvidiaGPU().IsZero() { - return nil, nil - } - - nvidiaGPUPaths, err := kl.gpuManager.AllocateGPU(pod, container) - if err != nil { - return nil, err - } - var devices []kubecontainer.DeviceInfo - for _, path := range nvidiaGPUPaths { - // Devices have to be mapped one to one because of nvidia CUDA library requirements. - devices = append(devices, kubecontainer.DeviceInfo{PathOnHost: path, PathInContainer: path, Permissions: "mrw"}) - } - - return devices, nil -} - func makeAbsolutePath(goos, path string) string { if goos != "windows" { return "/" + path @@ -470,12 +450,6 @@ func (kl *Kubelet) GenerateRunContainerOptions(pod *v1.Pod, container *v1.Contai volumes := kl.volumeManager.GetMountedVolumesForPod(podName) opts.PortMappings = kubecontainer.MakePortMappings(container) - // TODO(random-liu): Move following convert functions into pkg/kubelet/container - devices, err := kl.makeGPUDevices(pod, container) - if err != nil { - return nil, nil, err - } - opts.Devices = append(opts.Devices, devices...) // TODO: remove feature gate check after no longer needed if utilfeature.DefaultFeatureGate.Enabled(features.BlockVolume) { diff --git a/pkg/kubelet/kubelet_test.go b/pkg/kubelet/kubelet_test.go index b849f91dafd..0a3bad14c34 100644 --- a/pkg/kubelet/kubelet_test.go +++ b/pkg/kubelet/kubelet_test.go @@ -49,7 +49,6 @@ import ( kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" containertest "k8s.io/kubernetes/pkg/kubelet/container/testing" "k8s.io/kubernetes/pkg/kubelet/eviction" - "k8s.io/kubernetes/pkg/kubelet/gpu" "k8s.io/kubernetes/pkg/kubelet/images" "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/pkg/kubelet/logs" @@ -325,7 +324,6 @@ func newTestKubeletWithImageList( kubelet.AddPodSyncLoopHandler(activeDeadlineHandler) kubelet.AddPodSyncHandler(activeDeadlineHandler) - kubelet.gpuManager = gpu.NewGPUManagerStub() return &TestKubelet{kubelet, fakeRuntime, mockCadvisor, fakeKubeClient, fakeMirrorClient, fakeClock, nil, plug} } diff --git a/pkg/kubelet/preemption/preemption.go b/pkg/kubelet/preemption/preemption.go index 96d829e4f8c..fdccd2c40a0 100644 --- a/pkg/kubelet/preemption/preemption.go +++ b/pkg/kubelet/preemption/preemption.go @@ -248,7 +248,6 @@ func sortPodsByQOS(pods []*v1.Pod) (bestEffort, burstable, guaranteed []*v1.Pod) // returns true if pod1 has a smaller request than pod2 func smallerResourceRequest(pod1 *v1.Pod, pod2 *v1.Pod) bool { priorityList := []v1.ResourceName{ - v1.ResourceNvidiaGPU, v1.ResourceMemory, v1.ResourceCPU, } diff --git a/pkg/scheduler/algorithm/predicates/predicates.go b/pkg/scheduler/algorithm/predicates/predicates.go index c03b6b412b3..0a8f8c02f32 100644 --- a/pkg/scheduler/algorithm/predicates/predicates.go +++ b/pkg/scheduler/algorithm/predicates/predicates.go @@ -682,10 +682,6 @@ func GetResourceRequest(pod *v1.Pod) *schedulercache.Resource { if cpu := rQuantity.MilliValue(); cpu > result.MilliCPU { result.MilliCPU = cpu } - case v1.ResourceNvidiaGPU: - if gpu := rQuantity.Value(); gpu > result.NvidiaGPU { - result.NvidiaGPU = gpu - } default: if v1helper.IsScalarResourceName(rName) { value := rQuantity.Value() @@ -734,7 +730,6 @@ func PodFitsResources(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *s } if podRequest.MilliCPU == 0 && podRequest.Memory == 0 && - podRequest.NvidiaGPU == 0 && podRequest.EphemeralStorage == 0 && len(podRequest.ScalarResources) == 0 { return len(predicateFails) == 0, predicateFails, nil @@ -747,10 +742,6 @@ func PodFitsResources(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *s if allocatable.Memory < podRequest.Memory+nodeInfo.RequestedResource().Memory { predicateFails = append(predicateFails, NewInsufficientResourceError(v1.ResourceMemory, podRequest.Memory, nodeInfo.RequestedResource().Memory, allocatable.Memory)) } - if allocatable.NvidiaGPU < podRequest.NvidiaGPU+nodeInfo.RequestedResource().NvidiaGPU { - predicateFails = append(predicateFails, NewInsufficientResourceError(v1.ResourceNvidiaGPU, podRequest.NvidiaGPU, nodeInfo.RequestedResource().NvidiaGPU, allocatable.NvidiaGPU)) - } - if allocatable.EphemeralStorage < podRequest.EphemeralStorage+nodeInfo.RequestedResource().EphemeralStorage { predicateFails = append(predicateFails, NewInsufficientResourceError(v1.ResourceEphemeralStorage, podRequest.EphemeralStorage, nodeInfo.RequestedResource().EphemeralStorage, allocatable.EphemeralStorage)) } diff --git a/pkg/scheduler/algorithm/predicates/predicates_test.go b/pkg/scheduler/algorithm/predicates/predicates_test.go index 736c864650e..6b9e6f0a0b4 100644 --- a/pkg/scheduler/algorithm/predicates/predicates_test.go +++ b/pkg/scheduler/algorithm/predicates/predicates_test.go @@ -44,13 +44,12 @@ var ( hugePageResourceA = v1helper.HugePageResourceName(resource.MustParse("2Mi")) ) -func makeResources(milliCPU, memory, nvidiaGPUs, pods, extendedA, storage, hugePageA int64) v1.NodeResources { +func makeResources(milliCPU, memory, pods, extendedA, storage, hugePageA int64) v1.NodeResources { return v1.NodeResources{ Capacity: v1.ResourceList{ v1.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI), v1.ResourceMemory: *resource.NewQuantity(memory, resource.BinarySI), v1.ResourcePods: *resource.NewQuantity(pods, resource.DecimalSI), - v1.ResourceNvidiaGPU: *resource.NewQuantity(nvidiaGPUs, resource.DecimalSI), extendedResourceA: *resource.NewQuantity(extendedA, resource.DecimalSI), v1.ResourceEphemeralStorage: *resource.NewQuantity(storage, resource.BinarySI), hugePageResourceA: *resource.NewQuantity(hugePageA, resource.BinarySI), @@ -58,12 +57,11 @@ func makeResources(milliCPU, memory, nvidiaGPUs, pods, extendedA, storage, hugeP } } -func makeAllocatableResources(milliCPU, memory, nvidiaGPUs, pods, extendedA, storage, hugePageA int64) v1.ResourceList { +func makeAllocatableResources(milliCPU, memory, pods, extendedA, storage, hugePageA int64) v1.ResourceList { return v1.ResourceList{ v1.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI), v1.ResourceMemory: *resource.NewQuantity(memory, resource.BinarySI), v1.ResourcePods: *resource.NewQuantity(pods, resource.DecimalSI), - v1.ResourceNvidiaGPU: *resource.NewQuantity(nvidiaGPUs, resource.DecimalSI), extendedResourceA: *resource.NewQuantity(extendedA, resource.DecimalSI), v1.ResourceEphemeralStorage: *resource.NewQuantity(storage, resource.BinarySI), hugePageResourceA: *resource.NewQuantity(hugePageA, resource.BinarySI), @@ -357,7 +355,7 @@ func TestPodFitsResources(t *testing.T) { } for _, test := range enoughPodsTests { - node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5, 20, 5).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5, 20, 5)}} + node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 32, 5, 20, 5).Capacity, Allocatable: makeAllocatableResources(10, 20, 32, 5, 20, 5)}} test.nodeInfo.SetNode(&node) RegisterPredicateMetadataProducerWithExtendedResourceOptions(test.ignoredExtendedResources) meta := PredicateMetadata(test.pod, nil) @@ -414,7 +412,7 @@ func TestPodFitsResources(t *testing.T) { }, } for _, test := range notEnoughPodsTests { - node := v1.Node{Status: v1.NodeStatus{Capacity: v1.ResourceList{}, Allocatable: makeAllocatableResources(10, 20, 0, 1, 0, 0, 0)}} + node := v1.Node{Status: v1.NodeStatus{Capacity: v1.ResourceList{}, Allocatable: makeAllocatableResources(10, 20, 1, 0, 0, 0)}} test.nodeInfo.SetNode(&node) fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo) if err != nil { @@ -472,7 +470,7 @@ func TestPodFitsResources(t *testing.T) { } for _, test := range storagePodsTests { - node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5, 20, 5).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5, 20, 5)}} + node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 32, 5, 20, 5).Capacity, Allocatable: makeAllocatableResources(10, 20, 32, 5, 20, 5)}} test.nodeInfo.SetNode(&node) fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo) if err != nil { @@ -2062,7 +2060,7 @@ func TestRunGeneralPredicates(t *testing.T) { newResourcePod(schedulercache.Resource{MilliCPU: 9, Memory: 19})), node: &v1.Node{ ObjectMeta: metav1.ObjectMeta{Name: "machine1"}, - Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0, 0)}, + Status: v1.NodeStatus{Capacity: makeResources(10, 20, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 32, 0, 0, 0)}, }, fits: true, wErr: nil, @@ -2074,7 +2072,7 @@ func TestRunGeneralPredicates(t *testing.T) { newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 19})), node: &v1.Node{ ObjectMeta: metav1.ObjectMeta{Name: "machine1"}, - Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0, 0)}, + Status: v1.NodeStatus{Capacity: makeResources(10, 20, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 32, 0, 0, 0)}, }, fits: false, wErr: nil, @@ -2084,34 +2082,6 @@ func TestRunGeneralPredicates(t *testing.T) { }, test: "not enough cpu and memory resource", }, - { - pod: &v1.Pod{}, - nodeInfo: schedulercache.NewNodeInfo( - newResourcePod(schedulercache.Resource{MilliCPU: 9, Memory: 19})), - node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0, 0)}}, - fits: true, - wErr: nil, - test: "no resources/port/host requested always fits on GPU machine", - }, - { - pod: newResourcePod(schedulercache.Resource{MilliCPU: 3, Memory: 1, NvidiaGPU: 1}), - nodeInfo: schedulercache.NewNodeInfo( - newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 10, NvidiaGPU: 1})), - node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0, 0)}}, - fits: false, - wErr: nil, - reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(v1.ResourceNvidiaGPU, 1, 1, 1)}, - test: "not enough GPU resource", - }, - { - pod: newResourcePod(schedulercache.Resource{MilliCPU: 3, Memory: 1, NvidiaGPU: 1}), - nodeInfo: schedulercache.NewNodeInfo( - newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 10, NvidiaGPU: 0})), - node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0, 0)}}, - fits: true, - wErr: nil, - test: "enough GPU resource", - }, { pod: &v1.Pod{ Spec: v1.PodSpec{ @@ -2121,7 +2091,7 @@ func TestRunGeneralPredicates(t *testing.T) { nodeInfo: schedulercache.NewNodeInfo(), node: &v1.Node{ ObjectMeta: metav1.ObjectMeta{Name: "machine1"}, - Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0, 0)}, + Status: v1.NodeStatus{Capacity: makeResources(10, 20, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 32, 0, 0, 0)}, }, fits: false, wErr: nil, @@ -2133,7 +2103,7 @@ func TestRunGeneralPredicates(t *testing.T) { nodeInfo: schedulercache.NewNodeInfo(newPodWithPort(123)), node: &v1.Node{ ObjectMeta: metav1.ObjectMeta{Name: "machine1"}, - Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0, 0)}, + Status: v1.NodeStatus{Capacity: makeResources(10, 20, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 32, 0, 0, 0)}, }, fits: false, wErr: nil, @@ -3443,7 +3413,7 @@ func TestPodSchedulesOnNodeWithMemoryPressureCondition(t *testing.T) { ImagePullPolicy: "Always", // at least one requirement -> burstable pod Resources: v1.ResourceRequirements{ - Requests: makeAllocatableResources(100, 100, 100, 100, 0, 0, 0), + Requests: makeAllocatableResources(100, 100, 100, 0, 0, 0), }, }, }, diff --git a/pkg/scheduler/algorithm/priorities/resource_limits.go b/pkg/scheduler/algorithm/priorities/resource_limits.go index 3267368d2f9..6f440d8f1dd 100644 --- a/pkg/scheduler/algorithm/priorities/resource_limits.go +++ b/pkg/scheduler/algorithm/priorities/resource_limits.go @@ -109,10 +109,6 @@ func getResourceLimits(pod *v1.Pod) *schedulercache.Resource { if ephemeralStorage := rQuantity.Value(); ephemeralStorage > result.EphemeralStorage { result.EphemeralStorage = ephemeralStorage } - case v1.ResourceNvidiaGPU: - if gpu := rQuantity.Value(); gpu > result.NvidiaGPU { - result.NvidiaGPU = gpu - } default: if v1helper.IsScalarResourceName(rName) { value := rQuantity.Value() diff --git a/pkg/scheduler/schedulercache/node_info.go b/pkg/scheduler/schedulercache/node_info.go index 9d09be74285..a98e9cdcabc 100644 --- a/pkg/scheduler/schedulercache/node_info.go +++ b/pkg/scheduler/schedulercache/node_info.go @@ -114,7 +114,6 @@ func (transientSchedInfo *transientSchedulerInfo) resetTransientSchedulerInfo() type Resource struct { MilliCPU int64 Memory int64 - NvidiaGPU int64 EphemeralStorage int64 // We store allowedPodNumber (which is Node.Status.Allocatable.Pods().Value()) // explicitly as int, to avoid conversions and improve performance. @@ -142,8 +141,6 @@ func (r *Resource) Add(rl v1.ResourceList) { r.MilliCPU += rQuant.MilliValue() case v1.ResourceMemory: r.Memory += rQuant.Value() - case v1.ResourceNvidiaGPU: - r.NvidiaGPU += rQuant.Value() case v1.ResourcePods: r.AllowedPodNumber += int(rQuant.Value()) case v1.ResourceEphemeralStorage: @@ -161,7 +158,6 @@ func (r *Resource) ResourceList() v1.ResourceList { result := v1.ResourceList{ v1.ResourceCPU: *resource.NewMilliQuantity(r.MilliCPU, resource.DecimalSI), v1.ResourceMemory: *resource.NewQuantity(r.Memory, resource.BinarySI), - v1.ResourceNvidiaGPU: *resource.NewQuantity(r.NvidiaGPU, resource.DecimalSI), v1.ResourcePods: *resource.NewQuantity(int64(r.AllowedPodNumber), resource.BinarySI), v1.ResourceEphemeralStorage: *resource.NewQuantity(r.EphemeralStorage, resource.BinarySI), } @@ -180,7 +176,6 @@ func (r *Resource) Clone() *Resource { res := &Resource{ MilliCPU: r.MilliCPU, Memory: r.Memory, - NvidiaGPU: r.NvidiaGPU, AllowedPodNumber: r.AllowedPodNumber, EphemeralStorage: r.EphemeralStorage, } @@ -369,7 +364,6 @@ func (n *NodeInfo) AddPod(pod *v1.Pod) { res, non0CPU, non0Mem := calculateResource(pod) n.requestedResource.MilliCPU += res.MilliCPU n.requestedResource.Memory += res.Memory - n.requestedResource.NvidiaGPU += res.NvidiaGPU n.requestedResource.EphemeralStorage += res.EphemeralStorage if n.requestedResource.ScalarResources == nil && len(res.ScalarResources) > 0 { n.requestedResource.ScalarResources = map[v1.ResourceName]int64{} @@ -425,7 +419,6 @@ func (n *NodeInfo) RemovePod(pod *v1.Pod) error { n.requestedResource.MilliCPU -= res.MilliCPU n.requestedResource.Memory -= res.Memory - n.requestedResource.NvidiaGPU -= res.NvidiaGPU n.requestedResource.EphemeralStorage -= res.EphemeralStorage if len(res.ScalarResources) > 0 && n.requestedResource.ScalarResources == nil { n.requestedResource.ScalarResources = map[v1.ResourceName]int64{} diff --git a/pkg/scheduler/schedulercache/node_info_test.go b/pkg/scheduler/schedulercache/node_info_test.go index 03b3646a8c2..40a9e5afbac 100644 --- a/pkg/scheduler/schedulercache/node_info_test.go +++ b/pkg/scheduler/schedulercache/node_info_test.go @@ -41,7 +41,6 @@ func TestNewResource(t *testing.T) { resourceList: map[v1.ResourceName]resource.Quantity{ v1.ResourceCPU: *resource.NewScaledQuantity(4, -3), v1.ResourceMemory: *resource.NewQuantity(2000, resource.BinarySI), - v1.ResourceNvidiaGPU: *resource.NewQuantity(1000, resource.DecimalSI), v1.ResourcePods: *resource.NewQuantity(80, resource.BinarySI), v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI), "scalar.test/" + "scalar1": *resource.NewQuantity(1, resource.DecimalSI), @@ -50,7 +49,6 @@ func TestNewResource(t *testing.T) { expected: &Resource{ MilliCPU: 4, Memory: 2000, - NvidiaGPU: 1000, EphemeralStorage: 5000, AllowedPodNumber: 80, ScalarResources: map[v1.ResourceName]int64{"scalar.test/scalar1": 1, "hugepages-test": 2}, @@ -76,7 +74,6 @@ func TestResourceList(t *testing.T) { expected: map[v1.ResourceName]resource.Quantity{ v1.ResourceCPU: *resource.NewScaledQuantity(0, -3), v1.ResourceMemory: *resource.NewQuantity(0, resource.BinarySI), - v1.ResourceNvidiaGPU: *resource.NewQuantity(0, resource.DecimalSI), v1.ResourcePods: *resource.NewQuantity(0, resource.BinarySI), v1.ResourceEphemeralStorage: *resource.NewQuantity(0, resource.BinarySI), }, @@ -85,7 +82,6 @@ func TestResourceList(t *testing.T) { resource: &Resource{ MilliCPU: 4, Memory: 2000, - NvidiaGPU: 1000, EphemeralStorage: 5000, AllowedPodNumber: 80, ScalarResources: map[v1.ResourceName]int64{"scalar.test/scalar1": 1, "hugepages-test": 2}, @@ -93,7 +89,6 @@ func TestResourceList(t *testing.T) { expected: map[v1.ResourceName]resource.Quantity{ v1.ResourceCPU: *resource.NewScaledQuantity(4, -3), v1.ResourceMemory: *resource.NewQuantity(2000, resource.BinarySI), - v1.ResourceNvidiaGPU: *resource.NewQuantity(1000, resource.DecimalSI), v1.ResourcePods: *resource.NewQuantity(80, resource.BinarySI), v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI), "scalar.test/" + "scalar1": *resource.NewQuantity(1, resource.DecimalSI), @@ -123,7 +118,6 @@ func TestResourceClone(t *testing.T) { resource: &Resource{ MilliCPU: 4, Memory: 2000, - NvidiaGPU: 1000, EphemeralStorage: 5000, AllowedPodNumber: 80, ScalarResources: map[v1.ResourceName]int64{"scalar.test/scalar1": 1, "hugepages-test": 2}, @@ -131,7 +125,6 @@ func TestResourceClone(t *testing.T) { expected: &Resource{ MilliCPU: 4, Memory: 2000, - NvidiaGPU: 1000, EphemeralStorage: 5000, AllowedPodNumber: 80, ScalarResources: map[v1.ResourceName]int64{"scalar.test/scalar1": 1, "hugepages-test": 2}, @@ -168,7 +161,6 @@ func TestResourceAddScalar(t *testing.T) { resource: &Resource{ MilliCPU: 4, Memory: 2000, - NvidiaGPU: 1000, EphemeralStorage: 5000, AllowedPodNumber: 80, ScalarResources: map[v1.ResourceName]int64{"hugepages-test": 2}, @@ -178,7 +170,6 @@ func TestResourceAddScalar(t *testing.T) { expected: &Resource{ MilliCPU: 4, Memory: 2000, - NvidiaGPU: 1000, EphemeralStorage: 5000, AllowedPodNumber: 80, ScalarResources: map[v1.ResourceName]int64{"hugepages-test": 2, "scalar2": 200}, @@ -205,7 +196,6 @@ func TestNewNodeInfo(t *testing.T) { requestedResource: &Resource{ MilliCPU: 300, Memory: 1524, - NvidiaGPU: 0, EphemeralStorage: 0, AllowedPodNumber: 0, ScalarResources: map[v1.ResourceName]int64(nil), @@ -213,7 +203,6 @@ func TestNewNodeInfo(t *testing.T) { nonzeroRequest: &Resource{ MilliCPU: 300, Memory: 1524, - NvidiaGPU: 0, EphemeralStorage: 0, AllowedPodNumber: 0, ScalarResources: map[v1.ResourceName]int64(nil), @@ -516,7 +505,6 @@ func TestNodeInfoAddPod(t *testing.T) { requestedResource: &Resource{ MilliCPU: 300, Memory: 1524, - NvidiaGPU: 0, EphemeralStorage: 0, AllowedPodNumber: 0, ScalarResources: map[v1.ResourceName]int64(nil), @@ -524,7 +512,6 @@ func TestNodeInfoAddPod(t *testing.T) { nonzeroRequest: &Resource{ MilliCPU: 300, Memory: 1524, - NvidiaGPU: 0, EphemeralStorage: 0, AllowedPodNumber: 0, ScalarResources: map[v1.ResourceName]int64(nil), @@ -630,7 +617,6 @@ func TestNodeInfoRemovePod(t *testing.T) { requestedResource: &Resource{ MilliCPU: 300, Memory: 1524, - NvidiaGPU: 0, EphemeralStorage: 0, AllowedPodNumber: 0, ScalarResources: map[v1.ResourceName]int64(nil), @@ -638,7 +624,6 @@ func TestNodeInfoRemovePod(t *testing.T) { nonzeroRequest: &Resource{ MilliCPU: 300, Memory: 1524, - NvidiaGPU: 0, EphemeralStorage: 0, AllowedPodNumber: 0, ScalarResources: map[v1.ResourceName]int64(nil), @@ -748,7 +733,6 @@ func TestNodeInfoRemovePod(t *testing.T) { requestedResource: &Resource{ MilliCPU: 200, Memory: 1024, - NvidiaGPU: 0, EphemeralStorage: 0, AllowedPodNumber: 0, ScalarResources: map[v1.ResourceName]int64(nil), @@ -756,7 +740,6 @@ func TestNodeInfoRemovePod(t *testing.T) { nonzeroRequest: &Resource{ MilliCPU: 200, Memory: 1024, - NvidiaGPU: 0, EphemeralStorage: 0, AllowedPodNumber: 0, ScalarResources: map[v1.ResourceName]int64(nil), diff --git a/staging/src/k8s.io/api/core/v1/resource.go b/staging/src/k8s.io/api/core/v1/resource.go index 3bd6fec62fa..bb804125462 100644 --- a/staging/src/k8s.io/api/core/v1/resource.go +++ b/staging/src/k8s.io/api/core/v1/resource.go @@ -48,13 +48,6 @@ func (self *ResourceList) Pods() *resource.Quantity { return &resource.Quantity{} } -func (self *ResourceList) NvidiaGPU() *resource.Quantity { - if val, ok := (*self)[ResourceNvidiaGPU]; ok { - return &val - } - return &resource.Quantity{} -} - func (self *ResourceList) StorageEphemeral() *resource.Quantity { if val, ok := (*self)[ResourceEphemeralStorage]; ok { return &val diff --git a/staging/src/k8s.io/api/core/v1/types.go b/staging/src/k8s.io/api/core/v1/types.go index 85b83de04b6..18b5cf90d8b 100644 --- a/staging/src/k8s.io/api/core/v1/types.go +++ b/staging/src/k8s.io/api/core/v1/types.go @@ -4076,8 +4076,6 @@ const ( // Local ephemeral storage, in bytes. (500Gi = 500GiB = 500 * 1024 * 1024 * 1024) // The resource name for ResourceEphemeralStorage is alpha and it can change across releases. ResourceEphemeralStorage ResourceName = "ephemeral-storage" - // NVIDIA GPU, in devices. Alpha, might change: although fractional and allowing values >1, only one whole device per node is assigned. - ResourceNvidiaGPU ResourceName = "alpha.kubernetes.io/nvidia-gpu" ) const ( diff --git a/test/e2e/scheduling/nvidia-gpus.go b/test/e2e/scheduling/nvidia-gpus.go index 93dc4e1eee0..5aec0331d54 100644 --- a/test/e2e/scheduling/nvidia-gpus.go +++ b/test/e2e/scheduling/nvidia-gpus.go @@ -40,54 +40,11 @@ const ( driverInstallTimeout = 10 * time.Minute ) -type podCreationFuncType func() *v1.Pod - var ( gpuResourceName v1.ResourceName dsYamlUrl string - podCreationFunc podCreationFuncType ) -func makeCudaAdditionTestPod() *v1.Pod { - podName := testPodNamePrefix + string(uuid.NewUUID()) - testPod := &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: podName, - }, - Spec: v1.PodSpec{ - RestartPolicy: v1.RestartPolicyNever, - Containers: []v1.Container{ - { - Name: "vector-addition", - Image: imageutils.GetE2EImage(imageutils.CudaVectorAdd), - Resources: v1.ResourceRequirements{ - Limits: v1.ResourceList{ - gpuResourceName: *resource.NewQuantity(1, resource.DecimalSI), - }, - }, - VolumeMounts: []v1.VolumeMount{ - { - Name: "nvidia-libraries", - MountPath: "/usr/local/nvidia/lib64", - }, - }, - }, - }, - Volumes: []v1.Volume{ - { - Name: "nvidia-libraries", - VolumeSource: v1.VolumeSource{ - HostPath: &v1.HostPathVolumeSource{ - Path: "/home/kubernetes/bin/nvidia/lib", - }, - }, - }, - }, - }, - } - return testPod -} - func makeCudaAdditionDevicePluginTestPod() *v1.Pod { podName := testPodNamePrefix + string(uuid.NewUUID()) testPod := &v1.Pod{ @@ -163,20 +120,13 @@ func SetupNVIDIAGPUNode(f *framework.Framework, setupResourceGatherer bool) *fra } framework.Logf("Cluster is running on COS. Proceeding with test") - if f.BaseName == "gpus" { - dsYamlUrl = "https://raw.githubusercontent.com/ContainerEngine/accelerators/master/cos-nvidia-gpu-installer/daemonset.yaml" - gpuResourceName = v1.ResourceNvidiaGPU - podCreationFunc = makeCudaAdditionTestPod + dsYamlUrlFromEnv := os.Getenv("NVIDIA_DRIVER_INSTALLER_DAEMONSET") + if dsYamlUrlFromEnv != "" { + dsYamlUrl = dsYamlUrlFromEnv } else { - dsYamlUrlFromEnv := os.Getenv("NVIDIA_DRIVER_INSTALLER_DAEMONSET") - if dsYamlUrlFromEnv != "" { - dsYamlUrl = dsYamlUrlFromEnv - } else { - dsYamlUrl = "https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/daemonset.yaml" - } - gpuResourceName = framework.NVIDIAGPUResourceName - podCreationFunc = makeCudaAdditionDevicePluginTestPod + dsYamlUrl = "https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/daemonset.yaml" } + gpuResourceName = framework.NVIDIAGPUResourceName framework.Logf("Using %v", dsYamlUrl) // Creates the DaemonSet that installs Nvidia Drivers. @@ -218,7 +168,7 @@ func testNvidiaGPUsOnCOS(f *framework.Framework) { framework.Logf("Creating as many pods as there are Nvidia GPUs and have the pods run a CUDA app") podList := []*v1.Pod{} for i := int64(0); i < getGPUsAvailable(f); i++ { - podList = append(podList, f.PodClient().Create(podCreationFunc())) + podList = append(podList, f.PodClient().Create(makeCudaAdditionDevicePluginTestPod())) } framework.Logf("Wait for all test pods to succeed") // Wait for all pods to succeed @@ -234,13 +184,6 @@ func testNvidiaGPUsOnCOS(f *framework.Framework) { framework.ExpectNoError(err, "getting resource usage summary") } -var _ = SIGDescribe("[Feature:GPU]", func() { - f := framework.NewDefaultFramework("gpus") - It("run Nvidia GPU tests on Container Optimized OS only", func() { - testNvidiaGPUsOnCOS(f) - }) -}) - var _ = SIGDescribe("[Feature:GPUDevicePlugin]", func() { f := framework.NewDefaultFramework("device-plugin-gpus") It("run Nvidia GPU Device Plugin tests on Container Optimized OS only", func() { diff --git a/test/e2e_node/BUILD b/test/e2e_node/BUILD index ec7f37a26cc..53e775dd672 100644 --- a/test/e2e_node/BUILD +++ b/test/e2e_node/BUILD @@ -11,7 +11,6 @@ go_library( "docker_util.go", "framework.go", "gpu_device_plugin.go", - "gpus.go", "image_list.go", "simple_mount.go", "util.go", diff --git a/test/e2e_node/gpu_device_plugin.go b/test/e2e_node/gpu_device_plugin.go index 69845201685..198d94b0645 100644 --- a/test/e2e_node/gpu_device_plugin.go +++ b/test/e2e_node/gpu_device_plugin.go @@ -17,6 +17,7 @@ limitations under the License. package e2e_node import ( + "os/exec" "strconv" "time" @@ -132,6 +133,16 @@ var _ = framework.KubeDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugi }) }) +func checkIfNvidiaGPUsExistOnNode() bool { + // Cannot use `lspci` because it is not installed on all distros by default. + err := exec.Command("/bin/sh", "-c", "find /sys/devices/pci* -type f | grep vendor | xargs cat | grep 0x10de").Run() + if err != nil { + framework.Logf("check for nvidia GPUs failed. Got Error: %v", err) + return false + } + return true +} + func logDevicePluginMetrics() { ms, err := metrics.GrabKubeletMetricsWithoutProxy(framework.TestContext.NodeName + ":10255") framework.ExpectNoError(err) diff --git a/test/e2e_node/gpus.go b/test/e2e_node/gpus.go deleted file mode 100644 index 41c364db619..00000000000 --- a/test/e2e_node/gpus.go +++ /dev/null @@ -1,174 +0,0 @@ -/* -Copyright 2017 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package e2e_node - -import ( - "fmt" - "os/exec" - "time" - - "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/resource" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/kubernetes/pkg/features" - "k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig" - "k8s.io/kubernetes/test/e2e/framework" - - . "github.com/onsi/ginkgo" - . "github.com/onsi/gomega" -) - -func getGPUsAvailable(f *framework.Framework) int64 { - nodeList, err := f.ClientSet.CoreV1().Nodes().List(metav1.ListOptions{}) - framework.ExpectNoError(err, "getting node list") - var gpusAvailable int64 - for _, node := range nodeList.Items { - gpusAvailable += node.Status.Capacity.NvidiaGPU().Value() - } - return gpusAvailable -} - -func gpusExistOnAllNodes(f *framework.Framework) bool { - nodeList, err := f.ClientSet.CoreV1().Nodes().List(metav1.ListOptions{}) - framework.ExpectNoError(err, "getting node list") - for _, node := range nodeList.Items { - if node.Name == "kubernetes-master" { - continue - } - if node.Status.Capacity.NvidiaGPU().Value() == 0 { - return false - } - } - return true -} - -func checkIfNvidiaGPUsExistOnNode() bool { - // Cannot use `lspci` because it is not installed on all distros by default. - err := exec.Command("/bin/sh", "-c", "find /sys/devices/pci* -type f | grep vendor | xargs cat | grep 0x10de").Run() - if err != nil { - framework.Logf("check for nvidia GPUs failed. Got Error: %v", err) - return false - } - return true -} - -// Serial because the test updates kubelet configuration. -var _ = framework.KubeDescribe("GPU [Serial]", func() { - f := framework.NewDefaultFramework("gpu-test") - Context("attempt to use GPUs if available", func() { - It("setup the node and create pods to test gpus", func() { - By("ensuring that Nvidia GPUs exist on the node") - if !checkIfNvidiaGPUsExistOnNode() { - Skip("Nvidia GPUs do not exist on the node. Skipping test.") - } - By("ensuring that dynamic kubelet configuration is enabled") - enabled, err := isKubeletConfigEnabled(f) - framework.ExpectNoError(err) - if !enabled { - Skip("Dynamic Kubelet configuration is not enabled. Skipping test.") - } - - By("enabling support for GPUs") - var oldCfg *kubeletconfig.KubeletConfiguration - defer func() { - if oldCfg != nil { - framework.ExpectNoError(setKubeletConfiguration(f, oldCfg)) - } - }() - - // Enable Accelerators - oldCfg, err = getCurrentKubeletConfig() - framework.ExpectNoError(err) - newCfg := oldCfg.DeepCopy() - newCfg.FeatureGates[string(features.Accelerators)] = true - framework.ExpectNoError(setKubeletConfiguration(f, newCfg)) - - By("Waiting for GPUs to become available on the local node") - Eventually(gpusExistOnAllNodes(f), 10*time.Minute, time.Second).Should(BeTrue()) - - By("Creating a pod that will consume all GPUs") - podSuccess := makePod(getGPUsAvailable(f), "gpus-success") - podSuccess = f.PodClient().CreateSync(podSuccess) - - By("Checking the containers in the pod had restarted at-least twice successfully thereby ensuring GPUs are reused") - const minContainerRestartCount = 2 - Eventually(func() bool { - p, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(podSuccess.Name, metav1.GetOptions{}) - if err != nil { - framework.Logf("failed to get pod status: %v", err) - return false - } - if p.Status.ContainerStatuses[0].RestartCount < minContainerRestartCount { - return false - } - return true - }, time.Minute, time.Second).Should(BeTrue()) - - By("Checking if the pod outputted Success to its logs") - framework.ExpectNoError(f.PodClient().MatchContainerOutput(podSuccess.Name, podSuccess.Name, "Success")) - - By("Creating a new pod requesting a GPU and noticing that it is rejected by the Kubelet") - podFailure := makePod(1, "gpu-failure") - framework.WaitForPodCondition(f.ClientSet, f.Namespace.Name, podFailure.Name, "pod rejected", framework.PodStartTimeout, func(pod *v1.Pod) (bool, error) { - if pod.Status.Phase == v1.PodFailed { - return true, nil - - } - return false, nil - }) - - By("stopping the original Pod with GPUs") - gp := int64(0) - deleteOptions := metav1.DeleteOptions{ - GracePeriodSeconds: &gp, - } - f.PodClient().DeleteSync(podSuccess.Name, &deleteOptions, framework.DefaultPodDeletionTimeout) - - By("attempting to start the failed pod again") - f.PodClient().DeleteSync(podFailure.Name, &deleteOptions, framework.DefaultPodDeletionTimeout) - podFailure = f.PodClient().CreateSync(podFailure) - - By("Checking if the pod outputted Success to its logs") - framework.ExpectNoError(f.PodClient().MatchContainerOutput(podFailure.Name, podFailure.Name, "Success")) - }) - }) -}) - -func makePod(gpus int64, name string) *v1.Pod { - resources := v1.ResourceRequirements{ - Limits: v1.ResourceList{ - v1.ResourceNvidiaGPU: *resource.NewQuantity(gpus, resource.DecimalSI), - }, - } - gpuverificationCmd := fmt.Sprintf("if [[ %d -ne $(ls /dev/ | egrep '^nvidia[0-9]+$' | wc -l) ]]; then exit 1; else echo Success; fi", gpus) - return &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - }, - Spec: v1.PodSpec{ - RestartPolicy: v1.RestartPolicyAlways, - Containers: []v1.Container{ - { - Image: busyboxImage, - Name: name, - Command: []string{"sh", "-c", gpuverificationCmd}, - Resources: resources, - }, - }, - }, - } -}