From 13582a65aa165fc446a1b89cf90f8a785b33cc2f Mon Sep 17 00:00:00 2001
From: Vishnu kannan <vishnuk@google.com>
Date: Mon, 27 Feb 2017 22:13:43 -0800
Subject: [PATCH] fix a bug in nvidia gpu allocation and added unit test

Signed-off-by: Vishnu kannan <vishnuk@google.com>
---
 pkg/kubelet/gpu/nvidia/BUILD                  |  16 ++
 pkg/kubelet/gpu/nvidia/helpers.go             |   2 +-
 pkg/kubelet/gpu/nvidia/nvidia_gpu_manager.go  |   9 +-
 .../gpu/nvidia/nvidia_gpu_manager_test.go     | 144 ++++++++++++++++++
 pkg/kubelet/kubelet_node_status_test.go       |   7 +-
 5 files changed, 170 insertions(+), 8 deletions(-)
 create mode 100644 pkg/kubelet/gpu/nvidia/nvidia_gpu_manager_test.go

diff --git a/pkg/kubelet/gpu/nvidia/BUILD b/pkg/kubelet/gpu/nvidia/BUILD
index c7f03202945..e9827578ac5 100644
--- a/pkg/kubelet/gpu/nvidia/BUILD
+++ b/pkg/kubelet/gpu/nvidia/BUILD
@@ -5,6 +5,7 @@ licenses(["notice"])
 load(
     "@io_bazel_rules_go//go:def.bzl",
     "go_library",
+    "go_test",
 )
 
 go_library(
@@ -36,3 +37,18 @@ filegroup(
     srcs = [":package-srcs"],
     tags = ["automanaged"],
 )
+
+go_test(
+    name = "go_default_test",
+    srcs = ["nvidia_gpu_manager_test.go"],
+    library = ":go_default_library",
+    tags = ["automanaged"],
+    deps = [
+        "//pkg/api/v1:go_default_library",
+        "//vendor:github.com/stretchr/testify/assert",
+        "//vendor:k8s.io/apimachinery/pkg/api/resource",
+        "//vendor:k8s.io/apimachinery/pkg/apis/meta/v1",
+        "//vendor:k8s.io/apimachinery/pkg/util/sets",
+        "//vendor:k8s.io/apimachinery/pkg/util/uuid",
+    ],
+)
diff --git a/pkg/kubelet/gpu/nvidia/helpers.go b/pkg/kubelet/gpu/nvidia/helpers.go
index 5b1b2f490b4..0abedbf4014 100644
--- a/pkg/kubelet/gpu/nvidia/helpers.go
+++ b/pkg/kubelet/gpu/nvidia/helpers.go
@@ -53,7 +53,7 @@ func (pgpu *podGPUs) delete(pods []string) {
 func (pgpu *podGPUs) devices() sets.String {
 	ret := sets.NewString()
 	for _, devices := range pgpu.podGPUMapping {
-		ret.Union(devices)
+		ret = ret.Union(devices)
 	}
 	return ret
 }
diff --git a/pkg/kubelet/gpu/nvidia/nvidia_gpu_manager.go b/pkg/kubelet/gpu/nvidia/nvidia_gpu_manager.go
index da9f7f0da16..43b0e3b32a0 100644
--- a/pkg/kubelet/gpu/nvidia/nvidia_gpu_manager.go
+++ b/pkg/kubelet/gpu/nvidia/nvidia_gpu_manager.go
@@ -80,7 +80,7 @@ func NewNvidiaGPUManager(activePodsLister activePodsLister, dockerClient dockert
 // Initialize the GPU devices, so far only needed to discover the GPU paths.
 func (ngm *nvidiaGPUManager) Start() error {
 	if ngm.dockerClient == nil {
-		return fmt.Errorf("invalid docker client specified")
+		return fmt.Errorf("Invalid docker client specified in GPU Manager")
 	}
 	ngm.Lock()
 	defer ngm.Unlock()
@@ -94,7 +94,7 @@ func (ngm *nvidiaGPUManager) Start() error {
 	}
 	ngm.defaultDevices = []string{nvidiaCtlDevice, nvidiaUVMDevice}
 	_, err := os.Stat(nvidiaUVMToolsDevice)
-	if os.IsNotExist(err) {
+	if !os.IsNotExist(err) {
 		ngm.defaultDevices = append(ngm.defaultDevices, nvidiaUVMToolsDevice)
 	}
 
@@ -154,12 +154,14 @@ func (ngm *nvidiaGPUManager) AllocateGPU(pod *v1.Pod, container *v1.Container) (
 	}
 	// Get GPU devices in use.
 	devicesInUse := ngm.allocated.devices()
+	glog.V(5).Infof("gpus in use: %v", devicesInUse.List())
 	// Get a list of available GPUs.
 	available := ngm.allGPUs.Difference(devicesInUse)
+	glog.V(5).Infof("gpus available: %v", available.List())
 	if int64(available.Len()) < gpusNeeded {
 		return nil, fmt.Errorf("requested number of GPUs unavailable. Requested: %d, Available: %d", gpusNeeded, available.Len())
 	}
-	ret := available.List()[:gpusNeeded]
+	ret := available.UnsortedList()[:gpusNeeded]
 	for _, device := range ret {
 		// Update internal allocated GPU cache.
 		ngm.allocated.insert(string(pod.UID), device)
@@ -184,6 +186,7 @@ func (ngm *nvidiaGPUManager) updateAllocatedGPUs() error {
 	}
 	allocatedPodUids := ngm.allocated.pods()
 	podsToBeRemoved := allocatedPodUids.Difference(activePodUids)
+	glog.V(5).Infof("pods to be removed: %v", podsToBeRemoved.List())
 	ngm.allocated.delete(podsToBeRemoved.List())
 	return nil
 }
diff --git a/pkg/kubelet/gpu/nvidia/nvidia_gpu_manager_test.go b/pkg/kubelet/gpu/nvidia/nvidia_gpu_manager_test.go
new file mode 100644
index 00000000000..aea168ba568
--- /dev/null
+++ b/pkg/kubelet/gpu/nvidia/nvidia_gpu_manager_test.go
@@ -0,0 +1,144 @@
+/*
+Copyright 2017 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package nvidia
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/util/sets"
+	"k8s.io/apimachinery/pkg/util/uuid"
+	"k8s.io/kubernetes/pkg/api/v1"
+)
+
+type testActivePodsLister struct {
+	activePods []*v1.Pod
+}
+
+func (tapl *testActivePodsLister) GetRunningPods() ([]*v1.Pod, error) {
+	return tapl.activePods, nil
+}
+
+func makeTestPod(numContainers int) *v1.Pod {
+	quantity := resource.NewQuantity(1, resource.DecimalSI)
+	resources := v1.ResourceRequirements{
+		Limits: v1.ResourceList{
+			v1.ResourceNvidiaGPU: *quantity,
+		},
+	}
+	pod := &v1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			UID: uuid.NewUUID(),
+		},
+		Spec: v1.PodSpec{
+			Containers: []v1.Container{},
+		},
+	}
+	for ; numContainers > 0; numContainers-- {
+		pod.Spec.Containers = append(pod.Spec.Containers, v1.Container{
+			Resources: resources,
+		})
+	}
+	return pod
+}
+
+func TestMultiContainerPodGPUAllocation(t *testing.T) {
+	podLister := &testActivePodsLister{}
+
+	testGpuManager := &nvidiaGPUManager{
+		activePodsLister: podLister,
+		allGPUs:          sets.NewString("/dev/nvidia0", "/dev/nvidia1"),
+		allocated:        newPodGPUs(),
+	}
+
+	// Expect that no devices are in use.
+	gpusInUse, err := testGpuManager.gpusInUse()
+	as := assert.New(t)
+	as.Nil(err)
+	as.Equal(len(gpusInUse.devices()), 0)
+
+	// Allocated GPUs for a pod with two containers.
+	pod := makeTestPod(2)
+	// Allocate for the first container.
+	devices1, err := testGpuManager.AllocateGPU(pod, &pod.Spec.Containers[0])
+	as.Nil(err)
+	as.Equal(len(devices1), 1)
+
+	podLister.activePods = append(podLister.activePods, pod)
+	// Allocate for the second container.
+	devices2, err := testGpuManager.AllocateGPU(pod, &pod.Spec.Containers[1])
+	as.Nil(err)
+	as.Equal(len(devices2), 1)
+
+	as.NotEqual(devices1, devices2, "expected containers to get different devices")
+
+	// further allocations should fail.
+	newPod := makeTestPod(2)
+	devices1, err = testGpuManager.AllocateGPU(newPod, &newPod.Spec.Containers[0])
+	as.NotNil(err, "expected gpu allocation to fail. got: %v", devices1)
+
+	// Now terminate the original pod and observe that GPU allocation for new pod succeeds.
+	podLister.activePods = podLister.activePods[:0]
+
+	devices1, err = testGpuManager.AllocateGPU(newPod, &newPod.Spec.Containers[0])
+	as.Nil(err)
+	as.Equal(len(devices1), 1)
+
+	podLister.activePods = append(podLister.activePods, newPod)
+
+	devices2, err = testGpuManager.AllocateGPU(newPod, &newPod.Spec.Containers[1])
+	as.Nil(err)
+	as.Equal(len(devices2), 1)
+
+	as.NotEqual(devices1, devices2, "expected containers to get different devices")
+}
+
+func TestMultiPodGPUAllocation(t *testing.T) {
+	podLister := &testActivePodsLister{}
+
+	testGpuManager := &nvidiaGPUManager{
+		activePodsLister: podLister,
+		allGPUs:          sets.NewString("/dev/nvidia0", "/dev/nvidia1"),
+		allocated:        newPodGPUs(),
+	}
+
+	// Expect that no devices are in use.
+	gpusInUse, err := testGpuManager.gpusInUse()
+	as := assert.New(t)
+	as.Nil(err)
+	as.Equal(len(gpusInUse.devices()), 0)
+
+	// Allocated GPUs for a pod with two containers.
+	podA := makeTestPod(1)
+	// Allocate for the first container.
+	devicesA, err := testGpuManager.AllocateGPU(podA, &podA.Spec.Containers[0])
+	as.Nil(err)
+	as.Equal(len(devicesA), 1)
+
+	podLister.activePods = append(podLister.activePods, podA)
+
+	// further allocations should fail.
+	podB := makeTestPod(1)
+	// Allocate for the first container.
+	devicesB, err := testGpuManager.AllocateGPU(podB, &podB.Spec.Containers[0])
+	as.Nil(err)
+	as.Equal(len(devicesB), 1)
+	as.NotEqual(devicesA, devicesB, "expected pods to get different devices")
+}
diff --git a/pkg/kubelet/kubelet_node_status_test.go b/pkg/kubelet/kubelet_node_status_test.go
index 24b479079f3..1f57051a932 100644
--- a/pkg/kubelet/kubelet_node_status_test.go
+++ b/pkg/kubelet/kubelet_node_status_test.go
@@ -400,10 +400,9 @@ func TestUpdateExistingNodeStatus(t *testing.T) {
 				v1.ResourcePods:   *resource.NewQuantity(0, resource.DecimalSI),
 			},
 			Allocatable: v1.ResourceList{
-				v1.ResourceCPU:       *resource.NewMilliQuantity(2800, resource.DecimalSI),
-				v1.ResourceMemory:    *resource.NewQuantity(19900E6, resource.BinarySI),
-				v1.ResourcePods:      *resource.NewQuantity(0, resource.DecimalSI),
-				v1.ResourceNvidiaGPU: *resource.NewQuantity(0, resource.DecimalSI),
+				v1.ResourceCPU:    *resource.NewMilliQuantity(2800, resource.DecimalSI),
+				v1.ResourceMemory: *resource.NewQuantity(19900E6, resource.BinarySI),
+				v1.ResourcePods:   *resource.NewQuantity(0, resource.DecimalSI),
 			},
 		},
 	}