mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-19 09:52:49 +00:00
Merge pull request #93285 from aarnaud/windows-devicemanager
Port deviceManager to windows container manager to enable GPU access
This commit is contained in:
commit
e20300b1a0
@ -169,6 +169,7 @@ go_library(
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:windows": [
|
||||
"//pkg/kubelet/cadvisor:go_default_library",
|
||||
"//pkg/kubelet/cm/devicemanager:go_default_library",
|
||||
"//staging/src/k8s.io/client-go/tools/record:go_default_library",
|
||||
"//staging/src/k8s.io/mount-utils:go_default_library",
|
||||
],
|
||||
|
@ -36,6 +36,7 @@ import (
|
||||
kubefeatures "k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cadvisor"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
@ -52,6 +53,10 @@ type containerManagerImpl struct {
|
||||
cadvisorInterface cadvisor.Interface
|
||||
// Config of this node.
|
||||
nodeConfig NodeConfig
|
||||
// Interface for exporting and allocating devices reported by device plugins.
|
||||
deviceManager devicemanager.Manager
|
||||
// Interface for Topology resource co-ordination
|
||||
topologyManager topologymanager.Manager
|
||||
}
|
||||
|
||||
type noopWindowsResourceAllocator struct{}
|
||||
@ -79,6 +84,11 @@ func (cm *containerManagerImpl) Start(node *v1.Node,
|
||||
}
|
||||
}
|
||||
|
||||
// Starts device manager.
|
||||
if err := cm.deviceManager.Start(devicemanager.ActivePodsFunc(activePods), sourcesReady); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -93,11 +103,26 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
|
||||
}
|
||||
capacity := cadvisor.CapacityFromMachineInfo(machineInfo)
|
||||
|
||||
return &containerManagerImpl{
|
||||
cm := &containerManagerImpl{
|
||||
capacity: capacity,
|
||||
nodeConfig: nodeConfig,
|
||||
cadvisorInterface: cadvisorInterface,
|
||||
}, nil
|
||||
}
|
||||
|
||||
cm.topologyManager = topologymanager.NewFakeManager()
|
||||
|
||||
klog.Infof("Creating device plugin manager: %t", devicePluginEnabled)
|
||||
if devicePluginEnabled {
|
||||
cm.deviceManager, err = devicemanager.NewManagerImpl(nil, cm.topologyManager)
|
||||
cm.topologyManager.AddHintProvider(cm.deviceManager)
|
||||
} else {
|
||||
cm.deviceManager, err = devicemanager.NewManagerStub()
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return cm, nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) SystemCgroupsLimit() v1.ResourceList {
|
||||
@ -150,11 +175,11 @@ func (cm *containerManagerImpl) GetCapacity() v1.ResourceList {
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) GetPluginRegistrationHandler() cache.PluginHandler {
|
||||
return nil
|
||||
return cm.deviceManager.GetWatcherHandler()
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) GetDevicePluginResourceCapacity() (v1.ResourceList, v1.ResourceList, []string) {
|
||||
return nil, nil, []string{}
|
||||
return cm.deviceManager.GetCapacity()
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) NewPodContainerManager() PodContainerManager {
|
||||
@ -162,11 +187,24 @@ func (cm *containerManagerImpl) NewPodContainerManager() PodContainerManager {
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) GetResources(pod *v1.Pod, container *v1.Container) (*kubecontainer.RunContainerOptions, error) {
|
||||
return &kubecontainer.RunContainerOptions{}, nil
|
||||
opts := &kubecontainer.RunContainerOptions{}
|
||||
// Allocate should already be called during predicateAdmitHandler.Admit(),
|
||||
// just try to fetch device runtime information from cached state here
|
||||
devOpts, err := cm.deviceManager.GetDeviceRunContainerOptions(pod, container)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
} else if devOpts == nil {
|
||||
return opts, nil
|
||||
}
|
||||
opts.Devices = append(opts.Devices, devOpts.Devices...)
|
||||
opts.Mounts = append(opts.Mounts, devOpts.Mounts...)
|
||||
opts.Envs = append(opts.Envs, devOpts.Envs...)
|
||||
opts.Annotations = append(opts.Annotations, devOpts.Annotations...)
|
||||
return opts, nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) UpdatePluginResources(*schedulerframework.NodeInfo, *lifecycle.PodAdmitAttributes) error {
|
||||
return nil
|
||||
func (cm *containerManagerImpl) UpdatePluginResources(node *schedulerframework.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error {
|
||||
return cm.deviceManager.UpdatePluginResources(node, attrs)
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) InternalContainerLifecycle() InternalContainerLifecycle {
|
||||
@ -177,12 +215,12 @@ func (cm *containerManagerImpl) GetPodCgroupRoot() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) GetDevices(_, _ string) []*podresourcesapi.ContainerDevices {
|
||||
return nil
|
||||
func (cm *containerManagerImpl) GetDevices(podUID, containerName string) []*podresourcesapi.ContainerDevices {
|
||||
return cm.deviceManager.GetDevices(podUID, containerName)
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) ShouldResetExtendedResourceCapacity() bool {
|
||||
return false
|
||||
return cm.deviceManager.ShouldResetExtendedResourceCapacity()
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) GetAllocateResourcesPodAdmitHandler() lifecycle.PodAdmitHandler {
|
||||
|
@ -22,6 +22,7 @@ import (
|
||||
"net"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
@ -126,7 +127,11 @@ func (s *sourcesReadyStub) AllReady() bool { return true }
|
||||
|
||||
// NewManagerImpl creates a new manager.
|
||||
func NewManagerImpl(topology []cadvisorapi.Node, topologyAffinityStore topologymanager.Store) (*ManagerImpl, error) {
|
||||
return newManagerImpl(pluginapi.KubeletSocket, topology, topologyAffinityStore)
|
||||
socketPath := pluginapi.KubeletSocket
|
||||
if runtime.GOOS == "windows" {
|
||||
socketPath = os.Getenv("SYSTEMDRIVE") + pluginapi.KubeletSocketWindows
|
||||
}
|
||||
return newManagerImpl(socketPath, topology, topologyAffinityStore)
|
||||
}
|
||||
|
||||
func newManagerImpl(socketPath string, topology []cadvisorapi.Node, topologyAffinityStore topologymanager.Store) (*ManagerImpl, error) {
|
||||
|
@ -30,7 +30,17 @@ const (
|
||||
DevicePluginPath = "/var/lib/kubelet/device-plugins/"
|
||||
// KubeletSocket is the path of the Kubelet registry socket
|
||||
KubeletSocket = DevicePluginPath + "kubelet.sock"
|
||||
|
||||
// DevicePluginPathWindows Avoid failed to run Kubelet: bad socketPath,
|
||||
// must be an absolute path: /var/lib/kubelet/device-plugins/kubelet.sock
|
||||
// https://github.com/kubernetes/kubernetes/issues/93262
|
||||
// https://github.com/kubernetes/kubernetes/pull/93285#discussion_r458140701
|
||||
DevicePluginPathWindows = "\\var\\lib\\kubelet\\device-plugins\\"
|
||||
// KubeletSocketWindows is the path of the Kubelet registry socket on windows
|
||||
KubeletSocketWindows = DevicePluginPathWindows + "kubelet.sock"
|
||||
|
||||
// KubeletPreStartContainerRPCTimeoutInSecs is the timeout duration in secs for PreStartContainer RPC
|
||||
// Timeout duration in secs for PreStartContainer RPC
|
||||
KubeletPreStartContainerRPCTimeoutInSecs = 30
|
||||
)
|
||||
|
||||
|
@ -7,6 +7,7 @@ go_library(
|
||||
srcs = [
|
||||
"cpu_limits.go",
|
||||
"density.go",
|
||||
"device_plugin.go",
|
||||
"dns.go",
|
||||
"framework.go",
|
||||
"gmsa_full.go",
|
||||
@ -22,6 +23,7 @@ go_library(
|
||||
importpath = "k8s.io/kubernetes/test/e2e/windows",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//staging/src/k8s.io/api/apps/v1:go_default_library",
|
||||
"//staging/src/k8s.io/api/core/v1:go_default_library",
|
||||
"//staging/src/k8s.io/api/rbac/v1:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
|
||||
|
157
test/e2e/windows/device_plugin.go
Normal file
157
test/e2e/windows/device_plugin.go
Normal file
@ -0,0 +1,157 @@
|
||||
/*
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package windows
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
appsv1 "k8s.io/api/apps/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
|
||||
e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
|
||||
imageutils "k8s.io/kubernetes/test/utils/image"
|
||||
|
||||
"github.com/onsi/ginkgo"
|
||||
)
|
||||
|
||||
const (
|
||||
testSlowMultiplier = 60
|
||||
)
|
||||
|
||||
var _ = SIGDescribe("Device Plugin", func() {
|
||||
f := framework.NewDefaultFramework("device-plugin")
|
||||
|
||||
var cs clientset.Interface
|
||||
|
||||
ginkgo.BeforeEach(func() {
|
||||
//Only for Windows containers
|
||||
e2eskipper.SkipUnlessNodeOSDistroIs("windows")
|
||||
cs = f.ClientSet
|
||||
})
|
||||
ginkgo.It("should be able to create a functioning device plugin for Windows", func() {
|
||||
ginkgo.By("creating Windows device plugin daemonset")
|
||||
dsName := "directx-device-plugin"
|
||||
daemonsetNameLabel := "daemonset-name"
|
||||
image := "e2eteam/k8s-directx-device-plugin:0.9.0-1809"
|
||||
mountName := "device-plugin"
|
||||
mountPath := "/var/lib/kubelet/device-plugins"
|
||||
labels := map[string]string{
|
||||
daemonsetNameLabel: dsName,
|
||||
}
|
||||
ds := &appsv1.DaemonSet{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: dsName,
|
||||
Namespace: "kube-system",
|
||||
},
|
||||
Spec: appsv1.DaemonSetSpec{
|
||||
Selector: &metav1.LabelSelector{
|
||||
MatchLabels: labels,
|
||||
},
|
||||
Template: v1.PodTemplateSpec{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Annotations: map[string]string{
|
||||
"scheduler.alpha.kubernetes.io/critical-pod": "",
|
||||
},
|
||||
Labels: labels,
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
Tolerations: []v1.Toleration{
|
||||
{
|
||||
Key: "CriticalAddonsOnly",
|
||||
Operator: "Exists",
|
||||
},
|
||||
},
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Name: "hostdev",
|
||||
Image: image,
|
||||
VolumeMounts: []v1.VolumeMount{
|
||||
{
|
||||
Name: mountName,
|
||||
MountPath: mountPath,
|
||||
},
|
||||
},
|
||||
Env: []v1.EnvVar{
|
||||
{
|
||||
Name: "DIRECTX_GPU_MATCH_NAME",
|
||||
Value: " ",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Volumes: []v1.Volume{
|
||||
{
|
||||
Name: mountName,
|
||||
VolumeSource: v1.VolumeSource{
|
||||
HostPath: &v1.HostPathVolumeSource{
|
||||
Path: mountPath,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
NodeSelector: map[string]string{
|
||||
"kubernetes.io/os": "windows",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
sysNs := "kube-system"
|
||||
_, err := cs.AppsV1().DaemonSets(sysNs).Create(context.TODO(), ds, metav1.CreateOptions{})
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
ginkgo.By("creating Windows testing Pod")
|
||||
windowsPod := createTestPod(f, imageutils.GetE2EImage(imageutils.WindowsServer), windowsOS)
|
||||
windowsPod.Spec.Containers[0].Args = []string{"powershell.exe", "Start-Sleep", "3600"}
|
||||
windowsPod.Spec.Containers[0].Resources.Limits = v1.ResourceList{
|
||||
"microsoft.com/directx": resource.MustParse("1"),
|
||||
}
|
||||
windowsPod, err = cs.CoreV1().Pods(f.Namespace.Name).Create(context.TODO(), windowsPod, metav1.CreateOptions{})
|
||||
framework.ExpectNoError(err)
|
||||
ginkgo.By("Waiting for the pod Running")
|
||||
err = e2epod.WaitTimeoutForPodRunningInNamespace(cs, windowsPod.Name, f.Namespace.Name, testSlowMultiplier*framework.PodStartTimeout)
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
ginkgo.By("verifying device access in Windows testing Pod")
|
||||
dxdiagCommand := []string{"cmd.exe", "/c", "dxdiag", "/t", "dxdiag_output.txt", "&", "type", "dxdiag_output.txt"}
|
||||
//If DirectX version issues caused by supsequent windows releases occur, these tests need to do version checks
|
||||
//based on the windows version running the test.
|
||||
dxdiagDirectxVersion := "DirectX Version: DirectX 12"
|
||||
defaultNs := f.Namespace.Name
|
||||
_, dxdiagDirectxVersionErr := framework.LookForStringInPodExec(defaultNs, windowsPod.Name, dxdiagCommand, dxdiagDirectxVersion, time.Minute)
|
||||
framework.ExpectNoError(dxdiagDirectxVersionErr, "failed: didn't find directX version dxdiag output.")
|
||||
|
||||
dxdiagDdiVersion := "DDI Version: 12"
|
||||
_, dxdiagDdiVersionErr := framework.LookForStringInPodExec(defaultNs, windowsPod.Name, dxdiagCommand, dxdiagDdiVersion, time.Minute)
|
||||
framework.ExpectNoError(dxdiagDdiVersionErr, "failed: didn't find DDI version in dxdiag output.")
|
||||
|
||||
dxdiagVendorID := "Vendor ID: 0x"
|
||||
_, dxdiagVendorIDErr := framework.LookForStringInPodExec(defaultNs, windowsPod.Name, dxdiagCommand, dxdiagVendorID, time.Minute)
|
||||
framework.ExpectNoError(dxdiagVendorIDErr, "failed: didn't find vendorID in dxdiag output.")
|
||||
|
||||
envVarCommand := []string{"cmd.exe", "/c", "set", "DIRECTX_GPU_Name"}
|
||||
envVarDirectxGpuName := "DIRECTX_GPU_Name="
|
||||
_, envVarDirectxGpuNameErr := framework.LookForStringInPodExec(defaultNs, windowsPod.Name, envVarCommand, envVarDirectxGpuName, time.Minute)
|
||||
framework.ExpectNoError(envVarDirectxGpuNameErr, "failed: didn't find expected environment variable.")
|
||||
})
|
||||
})
|
@ -40,6 +40,7 @@ type RegistryList struct {
|
||||
GcrReleaseRegistry string `yaml:"gcrReleaseRegistry"`
|
||||
PrivateRegistry string `yaml:"privateRegistry"`
|
||||
SampleRegistry string `yaml:"sampleRegistry"`
|
||||
MicrosoftRegistry string `yaml:"microsoftRegistry"`
|
||||
}
|
||||
|
||||
// Config holds an images registry, name, and version
|
||||
@ -79,6 +80,7 @@ func initReg() RegistryList {
|
||||
GcrReleaseRegistry: "gcr.io/gke-release",
|
||||
PrivateRegistry: "gcr.io/k8s-authenticated-test",
|
||||
SampleRegistry: "gcr.io/google-samples",
|
||||
MicrosoftRegistry: "mcr.microsoft.com",
|
||||
}
|
||||
repoList := os.Getenv("KUBE_TEST_REPO_LIST")
|
||||
if repoList == "" {
|
||||
@ -116,6 +118,8 @@ var (
|
||||
|
||||
// Preconfigured image configs
|
||||
imageConfigs = initImageConfigs()
|
||||
|
||||
microsoftRegistry = registry.MicrosoftRegistry
|
||||
)
|
||||
|
||||
const (
|
||||
@ -198,6 +202,8 @@ const (
|
||||
VolumeGlusterServer
|
||||
// VolumeRBDServer image
|
||||
VolumeRBDServer
|
||||
// WindowsServer image
|
||||
WindowsServer
|
||||
)
|
||||
|
||||
func initImageConfigs() map[int]Config {
|
||||
@ -241,6 +247,7 @@ func initImageConfigs() map[int]Config {
|
||||
configs[VolumeISCSIServer] = Config{e2eVolumeRegistry, "iscsi", "2.0"}
|
||||
configs[VolumeGlusterServer] = Config{e2eVolumeRegistry, "gluster", "1.0"}
|
||||
configs[VolumeRBDServer] = Config{e2eVolumeRegistry, "rbd", "1.0.1"}
|
||||
configs[WindowsServer] = Config{microsoftRegistry, "windows", "1809"}
|
||||
return configs
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user