mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-31 07:20:13 +00:00
Add support for multiple nvidia gpus
This commit is contained in:
parent
81d01a84e0
commit
57c77ffbdd
@ -206,7 +206,7 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
|
||||
fs.BoolVar(&s.BabysitDaemons, "babysit-daemons", s.BabysitDaemons, "If true, the node has babysitter process monitoring docker and kubelet.")
|
||||
fs.MarkDeprecated("babysit-daemons", "Will be removed in a future version.")
|
||||
fs.Int32Var(&s.MaxPods, "max-pods", s.MaxPods, "Number of Pods that can run on this Kubelet.")
|
||||
fs.Int32Var(&s.NvidiaGPUs, "experimental-nvidia-gpus", s.NvidiaGPUs, "Number of NVIDIA GPU devices on this node. Only 0 (default) and 1 are currently supported.")
|
||||
fs.BoolVar(&s.EnableExperimentalNvidiaGPU, "experimental-enable-nvidia-gpu", s.EnableExperimentalNvidiaGPU, "Enable experimental Nvidia GPU support.")
|
||||
// TODO(#40229): Remove the docker-exec-handler flag.
|
||||
fs.StringVar(&s.DockerExecHandlerName, "docker-exec-handler", s.DockerExecHandlerName, "Handler to use when executing a command in a container. Valid values are 'native' and 'nsenter'. Defaults to 'native'.")
|
||||
fs.MarkDeprecated("docker-exec-handler", "this flag will be removed and only the 'native' handler will be supported in the future.")
|
||||
|
@ -362,8 +362,8 @@ type KubeletConfiguration struct {
|
||||
BabysitDaemons bool
|
||||
// maxPods is the number of pods that can run on this Kubelet.
|
||||
MaxPods int32
|
||||
// nvidiaGPUs is the number of NVIDIA GPU devices on this node.
|
||||
NvidiaGPUs int32
|
||||
// Enable experimental Nvidia GPU
|
||||
EnableExperimentalNvidiaGPU bool
|
||||
// dockerExecHandlerName is the handler to use when executing a command
|
||||
// in a container. Valid values are 'native' and 'nsenter'. Defaults to
|
||||
// 'native'.
|
||||
|
@ -407,8 +407,8 @@ type KubeletConfiguration struct {
|
||||
BabysitDaemons bool `json:"babysitDaemons"`
|
||||
// maxPods is the number of pods that can run on this Kubelet.
|
||||
MaxPods int32 `json:"maxPods"`
|
||||
// nvidiaGPUs is the number of NVIDIA GPU devices on this node.
|
||||
NvidiaGPUs int32 `json:"nvidiaGPUs"`
|
||||
// Enable Nvidia GPU support on this node.
|
||||
EnableExperimentalNvidiaGPU bool `json:"enableExperimentalNvidiaGPU"`
|
||||
// dockerExecHandlerName is the handler to use when executing a command
|
||||
// in a container. Valid values are 'native' and 'nsenter'. Defaults to
|
||||
// 'native'.
|
||||
|
181
pkg/kubelet/gpu/nvidia/nvidia_gpu_manager.go
Normal file
181
pkg/kubelet/gpu/nvidia/nvidia_gpu_manager.go
Normal file
@ -0,0 +1,181 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package nvidia
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"sync"
|
||||
|
||||
"k8s.io/kubernetes/pkg/kubelet/dockertools"
|
||||
)
|
||||
|
||||
// TODO: If use NVML in the future, the implementation could be more complex,
|
||||
// but also more powerful!
|
||||
|
||||
const (
|
||||
// All NVIDIA GPUs cards should be mounted with nvidiactl and nvidia-uvm
|
||||
// If the driver installed correctly, the 2 devices must be there.
|
||||
NvidiaCtlDevice string = "/dev/nvidiactl"
|
||||
NvidiaUVMDevice string = "/dev/nvidia-uvm"
|
||||
)
|
||||
|
||||
// Manage GPU devices.
|
||||
type NvidiaGPUManager struct {
|
||||
gpuPaths []string
|
||||
gpuMutex sync.Mutex
|
||||
|
||||
// The interface which could get GPU mapping from all the containers.
|
||||
// TODO: Should make this independent of Docker in the future.
|
||||
dockerClient dockertools.DockerInterface
|
||||
}
|
||||
|
||||
// Get all the paths of NVIDIA GPU card from /dev/
|
||||
// TODO: Without NVML support we only can check whether there has GPU devices, but
|
||||
// could not give a health check or get more information like GPU cores, memory, or
|
||||
// family name. Need to support NVML in the future. But we do not need NVML until
|
||||
// we want more features, features like schedule containers according to GPU family
|
||||
// name.
|
||||
func (ngm *NvidiaGPUManager) discovery() (err error) {
|
||||
if ngm.gpuPaths == nil {
|
||||
err = filepath.Walk("/dev", func(path string, f os.FileInfo, err error) error {
|
||||
reg := regexp.MustCompile(`^nvidia[0-9]*$`)
|
||||
gpupath := reg.FindAllString(f.Name(), -1)
|
||||
if gpupath != nil && gpupath[0] != "" {
|
||||
ngm.gpuPaths = append(ngm.gpuPaths, "/dev/"+gpupath[0])
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func Valid(path string) bool {
|
||||
reg := regexp.MustCompile(`^/dev/nvidia[0-9]*$`)
|
||||
check := reg.FindAllString(path, -1)
|
||||
|
||||
return check != nil && check[0] != ""
|
||||
}
|
||||
|
||||
// Initialize the GPU devices, so far only needed to discover the GPU paths.
|
||||
func (ngm *NvidiaGPUManager) Init(dc dockertools.DockerInterface) error {
|
||||
if _, err := os.Stat(NvidiaCtlDevice); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if _, err := os.Stat(NvidiaUVMDevice); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
ngm.gpuMutex.Lock()
|
||||
defer ngm.gpuMutex.Unlock()
|
||||
|
||||
err := ngm.discovery()
|
||||
|
||||
ngm.dockerClient = dc
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (ngm *NvidiaGPUManager) Shutdown() {
|
||||
ngm.gpuMutex.Lock()
|
||||
defer ngm.gpuMutex.Unlock()
|
||||
|
||||
ngm.gpuPaths = nil
|
||||
}
|
||||
|
||||
// Get how many GPU cards we have.
|
||||
func (ngm *NvidiaGPUManager) Capacity() int {
|
||||
ngm.gpuMutex.Lock()
|
||||
defer ngm.gpuMutex.Unlock()
|
||||
|
||||
return len(ngm.gpuPaths)
|
||||
}
|
||||
|
||||
// Check whether the GPU device could be assigned to a container.
|
||||
func (ngm *NvidiaGPUManager) isAvailable(path string) bool {
|
||||
containers, err := dockertools.GetKubeletDockerContainers(ngm.dockerClient, false)
|
||||
|
||||
if err != nil {
|
||||
return true
|
||||
}
|
||||
|
||||
for i := range containers {
|
||||
containerJSON, err := ngm.dockerClient.InspectContainer(containers[i].ID)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
devices := containerJSON.HostConfig.Devices
|
||||
if devices == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
for _, device := range devices {
|
||||
if Valid(device.PathOnHost) && path == device.PathOnHost {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// Return the GPU paths as needed, otherwise, return error.
|
||||
func (ngm *NvidiaGPUManager) AllocateGPUs(num int) (paths []string, err error) {
|
||||
if num <= 0 {
|
||||
return
|
||||
}
|
||||
|
||||
ngm.gpuMutex.Lock()
|
||||
defer ngm.gpuMutex.Unlock()
|
||||
|
||||
for _, path := range ngm.gpuPaths {
|
||||
if ngm.isAvailable(path) {
|
||||
paths = append(paths, path)
|
||||
if len(paths) == num {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
err = fmt.Errorf("Not enough GPUs!")
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// Return the count of GPUs which are free.
|
||||
func (ngm *NvidiaGPUManager) AvailableGPUs() (num int) {
|
||||
ngm.gpuMutex.Lock()
|
||||
defer ngm.gpuMutex.Unlock()
|
||||
|
||||
for _, path := range ngm.gpuPaths {
|
||||
if ngm.isAvailable(path) {
|
||||
num++
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
@ -67,6 +67,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/dockertools"
|
||||
"k8s.io/kubernetes/pkg/kubelet/events"
|
||||
"k8s.io/kubernetes/pkg/kubelet/eviction"
|
||||
"k8s.io/kubernetes/pkg/kubelet/gpu/nvidia"
|
||||
"k8s.io/kubernetes/pkg/kubelet/images"
|
||||
"k8s.io/kubernetes/pkg/kubelet/kuberuntime"
|
||||
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
||||
@ -449,8 +450,8 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub
|
||||
writer: kubeDeps.Writer,
|
||||
nonMasqueradeCIDR: kubeCfg.NonMasqueradeCIDR,
|
||||
maxPods: int(kubeCfg.MaxPods),
|
||||
enableNvidiaGPU: kubeCfg.EnableNvidiaGPU,
|
||||
podsPerCore: int(kubeCfg.PodsPerCore),
|
||||
nvidiaGPUs: int(kubeCfg.NvidiaGPUs),
|
||||
syncLoopMonitor: atomic.Value{},
|
||||
resolverConfig: kubeCfg.ResolverConfig,
|
||||
cpuCFSQuota: kubeCfg.CPUCFSQuota,
|
||||
@ -981,8 +982,8 @@ type Kubelet struct {
|
||||
// Maximum Number of Pods which can be run by this Kubelet
|
||||
maxPods int
|
||||
|
||||
// Number of NVIDIA GPUs on this node
|
||||
nvidiaGPUs int
|
||||
// Enable experimental Nvidia GPU
|
||||
enableExperimentalNvidiaGPU bool
|
||||
|
||||
// Monitor Kubelet's sync loop
|
||||
syncLoopMonitor atomic.Value
|
||||
@ -1089,6 +1090,9 @@ type Kubelet struct {
|
||||
// This should only be enabled when the container runtime is performing user remapping AND if the
|
||||
// experimental behavior is desired.
|
||||
experimentalHostUserNamespaceDefaulting bool
|
||||
|
||||
// NVIDIA GPU Manager
|
||||
nvidiaGPUManager nvidia.NvidiaGPUManager
|
||||
}
|
||||
|
||||
// setupDataDirs creates:
|
||||
@ -1182,7 +1186,13 @@ func (kl *Kubelet) initializeModules() error {
|
||||
return fmt.Errorf("Failed to start OOM watcher %v", err)
|
||||
}
|
||||
|
||||
// Step 7: Start resource analyzer
|
||||
// Step 7: Init Nvidia Manager. Do not need to return err until we use NVML instead.
|
||||
// Only works when user give true to EnableExperimentalNvidiaGPU
|
||||
if kl.enableExperimentalNvidiaGPU {
|
||||
kl.nvidiaGPUManager.Init(kl.dockerClient)
|
||||
}
|
||||
|
||||
// Step 8: Start resource analyzer
|
||||
kl.resourceAnalyzer.Start()
|
||||
|
||||
return nil
|
||||
|
@ -482,6 +482,11 @@ func (kl *Kubelet) setNodeStatusMachineInfo(node *v1.Node) {
|
||||
node.Status.Capacity = v1.ResourceList{}
|
||||
}
|
||||
|
||||
nvidiaGPUCapacity := 0
|
||||
if kl.enableExperimentalNvidiaGPU {
|
||||
nvidiaGPUCapacity = kl.nvidiaGPUManager.Capacity()
|
||||
}
|
||||
|
||||
// TODO: Post NotReady if we cannot get MachineInfo from cAdvisor. This needs to start
|
||||
// cAdvisor locally, e.g. for test-cmd.sh, and in integration test.
|
||||
info, err := kl.GetCachedMachineInfo()
|
||||
@ -491,7 +496,7 @@ func (kl *Kubelet) setNodeStatusMachineInfo(node *v1.Node) {
|
||||
node.Status.Capacity[v1.ResourceCPU] = *resource.NewMilliQuantity(0, resource.DecimalSI)
|
||||
node.Status.Capacity[v1.ResourceMemory] = resource.MustParse("0Gi")
|
||||
node.Status.Capacity[v1.ResourcePods] = *resource.NewQuantity(int64(kl.maxPods), resource.DecimalSI)
|
||||
node.Status.Capacity[v1.ResourceNvidiaGPU] = *resource.NewQuantity(int64(kl.nvidiaGPUs), resource.DecimalSI)
|
||||
node.Status.Capacity[v1.ResourceNvidiaGPU] = *resource.NewQuantity(int64(nvidiaGPUCapacity), resource.DecimalSI)
|
||||
|
||||
glog.Errorf("Error getting machine info: %v", err)
|
||||
} else {
|
||||
@ -510,7 +515,7 @@ func (kl *Kubelet) setNodeStatusMachineInfo(node *v1.Node) {
|
||||
int64(kl.maxPods), resource.DecimalSI)
|
||||
}
|
||||
node.Status.Capacity[v1.ResourceNvidiaGPU] = *resource.NewQuantity(
|
||||
int64(kl.nvidiaGPUs), resource.DecimalSI)
|
||||
int64(nvidiaGPUCapacity), resource.DecimalSI)
|
||||
if node.Status.NodeInfo.BootID != "" &&
|
||||
node.Status.NodeInfo.BootID != info.BootID {
|
||||
// TODO: This requires a transaction, either both node status is updated
|
||||
|
@ -28,6 +28,7 @@ import (
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@ -48,6 +49,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/envvars"
|
||||
"k8s.io/kubernetes/pkg/kubelet/gpu/nvidia"
|
||||
"k8s.io/kubernetes/pkg/kubelet/images"
|
||||
"k8s.io/kubernetes/pkg/kubelet/qos"
|
||||
"k8s.io/kubernetes/pkg/kubelet/server/portforward"
|
||||
@ -84,16 +86,25 @@ func (kl *Kubelet) getActivePods() []*v1.Pod {
|
||||
}
|
||||
|
||||
// makeDevices determines the devices for the given container.
|
||||
// Experimental. For now, we hardcode /dev/nvidia0 no matter what the user asks for
|
||||
// (we only support one device per node).
|
||||
// TODO: add support for more than 1 GPU after #28216.
|
||||
func makeDevices(container *v1.Container) []kubecontainer.DeviceInfo {
|
||||
// Experimental.
|
||||
func (kl *Kubelet) makeDevices(container *v1.Container) []kubecontainer.DeviceInfo {
|
||||
if !kl.enableExperimentalNvidiaGPU {
|
||||
return nil
|
||||
}
|
||||
|
||||
nvidiaGPULimit := container.Resources.Limits.NvidiaGPU()
|
||||
|
||||
if nvidiaGPULimit.Value() != 0 {
|
||||
return []kubecontainer.DeviceInfo{
|
||||
{PathOnHost: "/dev/nvidia0", PathInContainer: "/dev/nvidia0", Permissions: "mrw"},
|
||||
{PathOnHost: "/dev/nvidiactl", PathInContainer: "/dev/nvidiactl", Permissions: "mrw"},
|
||||
{PathOnHost: "/dev/nvidia-uvm", PathInContainer: "/dev/nvidia-uvm", Permissions: "mrw"},
|
||||
if nvidiaGPUPaths, err := kl.nvidiaGPUManager.AllocateGPUs(int(nvidiaGPULimit.Value())); err == nil {
|
||||
devices := []kubecontainer.DeviceInfo{{PathOnHost: nvidia.NvidiaCtlDevice, PathInContainer: nvidia.NvidiaCtlDevice, Permissions: "mrw"},
|
||||
{PathOnHost: nvidia.NvidiaUVMDevice, PathInContainer: nvidia.NvidiaUVMDevice, Permissions: "mrw"}}
|
||||
|
||||
for i, path := range nvidiaGPUPaths {
|
||||
devices = append(devices, kubecontainer.DeviceInfo{PathOnHost: path, PathInContainer: "/dev/nvidia" + strconv.Itoa(i), Permissions: "mrw"})
|
||||
}
|
||||
|
||||
return devices
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -285,7 +296,7 @@ func (kl *Kubelet) GenerateRunContainerOptions(pod *v1.Pod, container *v1.Contai
|
||||
|
||||
opts.PortMappings = kubecontainer.MakePortMappings(container)
|
||||
// TODO(random-liu): Move following convert functions into pkg/kubelet/container
|
||||
opts.Devices = makeDevices(container)
|
||||
opts.Devices = kl.makeDevices(container)
|
||||
|
||||
opts.Mounts, err = makeMounts(pod, kl.getPodDir(pod.UID), container, hostname, hostDomainName, podIP, volumes)
|
||||
if err != nil {
|
||||
|
@ -150,7 +150,7 @@ func GetHollowKubeletConfig(
|
||||
c.MaxContainerCount = 100
|
||||
c.MaxOpenFiles = 1024
|
||||
c.MaxPerPodContainerCount = 2
|
||||
c.NvidiaGPUs = 0
|
||||
c.EnableExperimentalNvidiaGPU = false
|
||||
c.RegisterNode = true
|
||||
c.RegisterSchedulable = true
|
||||
c.RegistryBurst = 10
|
||||
|
Loading…
Reference in New Issue
Block a user