diff --git a/pkg/kubelet/cm/BUILD b/pkg/kubelet/cm/BUILD index 6935ba92d09..1122bba86f7 100644 --- a/pkg/kubelet/cm/BUILD +++ b/pkg/kubelet/cm/BUILD @@ -169,6 +169,7 @@ go_library( ], "@io_bazel_rules_go//go/platform:windows": [ "//pkg/kubelet/cadvisor:go_default_library", + "//pkg/kubelet/cm/devicemanager:go_default_library", "//staging/src/k8s.io/client-go/tools/record:go_default_library", "//staging/src/k8s.io/mount-utils:go_default_library", ], diff --git a/pkg/kubelet/cm/container_manager_windows.go b/pkg/kubelet/cm/container_manager_windows.go index e6637b4d443..072ea63fabe 100644 --- a/pkg/kubelet/cm/container_manager_windows.go +++ b/pkg/kubelet/cm/container_manager_windows.go @@ -36,6 +36,7 @@ import ( kubefeatures "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/kubelet/cadvisor" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager" + "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/config" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" @@ -52,6 +53,10 @@ type containerManagerImpl struct { cadvisorInterface cadvisor.Interface // Config of this node. nodeConfig NodeConfig + // Interface for exporting and allocating devices reported by device plugins. + deviceManager devicemanager.Manager + // Interface for Topology resource co-ordination + topologyManager topologymanager.Manager } type noopWindowsResourceAllocator struct{} @@ -79,6 +84,11 @@ func (cm *containerManagerImpl) Start(node *v1.Node, } } + // Starts device manager. + if err := cm.deviceManager.Start(devicemanager.ActivePodsFunc(activePods), sourcesReady); err != nil { + return err + } + return nil } @@ -93,11 +103,26 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I } capacity := cadvisor.CapacityFromMachineInfo(machineInfo) - return &containerManagerImpl{ + cm := &containerManagerImpl{ capacity: capacity, nodeConfig: nodeConfig, cadvisorInterface: cadvisorInterface, - }, nil + } + + cm.topologyManager = topologymanager.NewFakeManager() + + klog.Infof("Creating device plugin manager: %t", devicePluginEnabled) + if devicePluginEnabled { + cm.deviceManager, err = devicemanager.NewManagerImpl(nil, cm.topologyManager) + cm.topologyManager.AddHintProvider(cm.deviceManager) + } else { + cm.deviceManager, err = devicemanager.NewManagerStub() + } + if err != nil { + return nil, err + } + + return cm, nil } func (cm *containerManagerImpl) SystemCgroupsLimit() v1.ResourceList { @@ -150,11 +175,11 @@ func (cm *containerManagerImpl) GetCapacity() v1.ResourceList { } func (cm *containerManagerImpl) GetPluginRegistrationHandler() cache.PluginHandler { - return nil + return cm.deviceManager.GetWatcherHandler() } func (cm *containerManagerImpl) GetDevicePluginResourceCapacity() (v1.ResourceList, v1.ResourceList, []string) { - return nil, nil, []string{} + return cm.deviceManager.GetCapacity() } func (cm *containerManagerImpl) NewPodContainerManager() PodContainerManager { @@ -162,11 +187,24 @@ func (cm *containerManagerImpl) NewPodContainerManager() PodContainerManager { } func (cm *containerManagerImpl) GetResources(pod *v1.Pod, container *v1.Container) (*kubecontainer.RunContainerOptions, error) { - return &kubecontainer.RunContainerOptions{}, nil + opts := &kubecontainer.RunContainerOptions{} + // Allocate should already be called during predicateAdmitHandler.Admit(), + // just try to fetch device runtime information from cached state here + devOpts, err := cm.deviceManager.GetDeviceRunContainerOptions(pod, container) + if err != nil { + return nil, err + } else if devOpts == nil { + return opts, nil + } + opts.Devices = append(opts.Devices, devOpts.Devices...) + opts.Mounts = append(opts.Mounts, devOpts.Mounts...) + opts.Envs = append(opts.Envs, devOpts.Envs...) + opts.Annotations = append(opts.Annotations, devOpts.Annotations...) + return opts, nil } -func (cm *containerManagerImpl) UpdatePluginResources(*schedulerframework.NodeInfo, *lifecycle.PodAdmitAttributes) error { - return nil +func (cm *containerManagerImpl) UpdatePluginResources(node *schedulerframework.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error { + return cm.deviceManager.UpdatePluginResources(node, attrs) } func (cm *containerManagerImpl) InternalContainerLifecycle() InternalContainerLifecycle { @@ -177,12 +215,12 @@ func (cm *containerManagerImpl) GetPodCgroupRoot() string { return "" } -func (cm *containerManagerImpl) GetDevices(_, _ string) []*podresourcesapi.ContainerDevices { - return nil +func (cm *containerManagerImpl) GetDevices(podUID, containerName string) []*podresourcesapi.ContainerDevices { + return cm.deviceManager.GetDevices(podUID, containerName) } func (cm *containerManagerImpl) ShouldResetExtendedResourceCapacity() bool { - return false + return cm.deviceManager.ShouldResetExtendedResourceCapacity() } func (cm *containerManagerImpl) GetAllocateResourcesPodAdmitHandler() lifecycle.PodAdmitHandler { diff --git a/pkg/kubelet/cm/devicemanager/manager.go b/pkg/kubelet/cm/devicemanager/manager.go index 95cf058f1a7..e103b875bda 100644 --- a/pkg/kubelet/cm/devicemanager/manager.go +++ b/pkg/kubelet/cm/devicemanager/manager.go @@ -22,6 +22,7 @@ import ( "net" "os" "path/filepath" + "runtime" "sort" "sync" "time" @@ -126,7 +127,11 @@ func (s *sourcesReadyStub) AllReady() bool { return true } // NewManagerImpl creates a new manager. func NewManagerImpl(topology []cadvisorapi.Node, topologyAffinityStore topologymanager.Store) (*ManagerImpl, error) { - return newManagerImpl(pluginapi.KubeletSocket, topology, topologyAffinityStore) + socketPath := pluginapi.KubeletSocket + if runtime.GOOS == "windows" { + socketPath = os.Getenv("SYSTEMDRIVE") + pluginapi.KubeletSocketWindows + } + return newManagerImpl(socketPath, topology, topologyAffinityStore) } func newManagerImpl(socketPath string, topology []cadvisorapi.Node, topologyAffinityStore topologymanager.Store) (*ManagerImpl, error) { diff --git a/staging/src/k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1/constants.go b/staging/src/k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1/constants.go index 4c40f21240e..3bed2148033 100644 --- a/staging/src/k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1/constants.go +++ b/staging/src/k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1/constants.go @@ -30,7 +30,17 @@ const ( DevicePluginPath = "/var/lib/kubelet/device-plugins/" // KubeletSocket is the path of the Kubelet registry socket KubeletSocket = DevicePluginPath + "kubelet.sock" + + // DevicePluginPathWindows Avoid failed to run Kubelet: bad socketPath, + // must be an absolute path: /var/lib/kubelet/device-plugins/kubelet.sock + // https://github.com/kubernetes/kubernetes/issues/93262 + // https://github.com/kubernetes/kubernetes/pull/93285#discussion_r458140701 + DevicePluginPathWindows = "\\var\\lib\\kubelet\\device-plugins\\" + // KubeletSocketWindows is the path of the Kubelet registry socket on windows + KubeletSocketWindows = DevicePluginPathWindows + "kubelet.sock" + // KubeletPreStartContainerRPCTimeoutInSecs is the timeout duration in secs for PreStartContainer RPC + // Timeout duration in secs for PreStartContainer RPC KubeletPreStartContainerRPCTimeoutInSecs = 30 ) diff --git a/test/e2e/windows/BUILD b/test/e2e/windows/BUILD index 1054b7d68de..9a22dfec2ab 100644 --- a/test/e2e/windows/BUILD +++ b/test/e2e/windows/BUILD @@ -7,6 +7,7 @@ go_library( srcs = [ "cpu_limits.go", "density.go", + "device_plugin.go", "dns.go", "framework.go", "gmsa_full.go", @@ -22,6 +23,7 @@ go_library( importpath = "k8s.io/kubernetes/test/e2e/windows", visibility = ["//visibility:public"], deps = [ + "//staging/src/k8s.io/api/apps/v1:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/api/rbac/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library", diff --git a/test/e2e/windows/device_plugin.go b/test/e2e/windows/device_plugin.go new file mode 100644 index 00000000000..b901a3d984e --- /dev/null +++ b/test/e2e/windows/device_plugin.go @@ -0,0 +1,157 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package windows + +import ( + "context" + "time" + + appsv1 "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/kubernetes/test/e2e/framework" + e2epod "k8s.io/kubernetes/test/e2e/framework/pod" + e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" + imageutils "k8s.io/kubernetes/test/utils/image" + + "github.com/onsi/ginkgo" +) + +const ( + testSlowMultiplier = 60 +) + +var _ = SIGDescribe("Device Plugin", func() { + f := framework.NewDefaultFramework("device-plugin") + + var cs clientset.Interface + + ginkgo.BeforeEach(func() { + //Only for Windows containers + e2eskipper.SkipUnlessNodeOSDistroIs("windows") + cs = f.ClientSet + }) + ginkgo.It("should be able to create a functioning device plugin for Windows", func() { + ginkgo.By("creating Windows device plugin daemonset") + dsName := "directx-device-plugin" + daemonsetNameLabel := "daemonset-name" + image := "e2eteam/k8s-directx-device-plugin:0.9.0-1809" + mountName := "device-plugin" + mountPath := "/var/lib/kubelet/device-plugins" + labels := map[string]string{ + daemonsetNameLabel: dsName, + } + ds := &appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: dsName, + Namespace: "kube-system", + }, + Spec: appsv1.DaemonSetSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: labels, + }, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + "scheduler.alpha.kubernetes.io/critical-pod": "", + }, + Labels: labels, + }, + Spec: v1.PodSpec{ + Tolerations: []v1.Toleration{ + { + Key: "CriticalAddonsOnly", + Operator: "Exists", + }, + }, + Containers: []v1.Container{ + { + Name: "hostdev", + Image: image, + VolumeMounts: []v1.VolumeMount{ + { + Name: mountName, + MountPath: mountPath, + }, + }, + Env: []v1.EnvVar{ + { + Name: "DIRECTX_GPU_MATCH_NAME", + Value: " ", + }, + }, + }, + }, + Volumes: []v1.Volume{ + { + Name: mountName, + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: mountPath, + }, + }, + }, + }, + NodeSelector: map[string]string{ + "kubernetes.io/os": "windows", + }, + }, + }, + }, + } + + sysNs := "kube-system" + _, err := cs.AppsV1().DaemonSets(sysNs).Create(context.TODO(), ds, metav1.CreateOptions{}) + framework.ExpectNoError(err) + + ginkgo.By("creating Windows testing Pod") + windowsPod := createTestPod(f, imageutils.GetE2EImage(imageutils.WindowsServer), windowsOS) + windowsPod.Spec.Containers[0].Args = []string{"powershell.exe", "Start-Sleep", "3600"} + windowsPod.Spec.Containers[0].Resources.Limits = v1.ResourceList{ + "microsoft.com/directx": resource.MustParse("1"), + } + windowsPod, err = cs.CoreV1().Pods(f.Namespace.Name).Create(context.TODO(), windowsPod, metav1.CreateOptions{}) + framework.ExpectNoError(err) + ginkgo.By("Waiting for the pod Running") + err = e2epod.WaitTimeoutForPodRunningInNamespace(cs, windowsPod.Name, f.Namespace.Name, testSlowMultiplier*framework.PodStartTimeout) + framework.ExpectNoError(err) + + ginkgo.By("verifying device access in Windows testing Pod") + dxdiagCommand := []string{"cmd.exe", "/c", "dxdiag", "/t", "dxdiag_output.txt", "&", "type", "dxdiag_output.txt"} + //If DirectX version issues caused by supsequent windows releases occur, these tests need to do version checks + //based on the windows version running the test. + dxdiagDirectxVersion := "DirectX Version: DirectX 12" + defaultNs := f.Namespace.Name + _, dxdiagDirectxVersionErr := framework.LookForStringInPodExec(defaultNs, windowsPod.Name, dxdiagCommand, dxdiagDirectxVersion, time.Minute) + framework.ExpectNoError(dxdiagDirectxVersionErr, "failed: didn't find directX version dxdiag output.") + + dxdiagDdiVersion := "DDI Version: 12" + _, dxdiagDdiVersionErr := framework.LookForStringInPodExec(defaultNs, windowsPod.Name, dxdiagCommand, dxdiagDdiVersion, time.Minute) + framework.ExpectNoError(dxdiagDdiVersionErr, "failed: didn't find DDI version in dxdiag output.") + + dxdiagVendorID := "Vendor ID: 0x" + _, dxdiagVendorIDErr := framework.LookForStringInPodExec(defaultNs, windowsPod.Name, dxdiagCommand, dxdiagVendorID, time.Minute) + framework.ExpectNoError(dxdiagVendorIDErr, "failed: didn't find vendorID in dxdiag output.") + + envVarCommand := []string{"cmd.exe", "/c", "set", "DIRECTX_GPU_Name"} + envVarDirectxGpuName := "DIRECTX_GPU_Name=" + _, envVarDirectxGpuNameErr := framework.LookForStringInPodExec(defaultNs, windowsPod.Name, envVarCommand, envVarDirectxGpuName, time.Minute) + framework.ExpectNoError(envVarDirectxGpuNameErr, "failed: didn't find expected environment variable.") + }) +}) diff --git a/test/utils/image/manifest.go b/test/utils/image/manifest.go index 86caa0218fc..2b9026c9ed7 100644 --- a/test/utils/image/manifest.go +++ b/test/utils/image/manifest.go @@ -40,6 +40,7 @@ type RegistryList struct { GcrReleaseRegistry string `yaml:"gcrReleaseRegistry"` PrivateRegistry string `yaml:"privateRegistry"` SampleRegistry string `yaml:"sampleRegistry"` + MicrosoftRegistry string `yaml:"microsoftRegistry"` } // Config holds an images registry, name, and version @@ -79,6 +80,7 @@ func initReg() RegistryList { GcrReleaseRegistry: "gcr.io/gke-release", PrivateRegistry: "gcr.io/k8s-authenticated-test", SampleRegistry: "gcr.io/google-samples", + MicrosoftRegistry: "mcr.microsoft.com", } repoList := os.Getenv("KUBE_TEST_REPO_LIST") if repoList == "" { @@ -116,6 +118,8 @@ var ( // Preconfigured image configs imageConfigs = initImageConfigs() + + microsoftRegistry = registry.MicrosoftRegistry ) const ( @@ -198,6 +202,8 @@ const ( VolumeGlusterServer // VolumeRBDServer image VolumeRBDServer + // WindowsServer image + WindowsServer ) func initImageConfigs() map[int]Config { @@ -241,6 +247,7 @@ func initImageConfigs() map[int]Config { configs[VolumeISCSIServer] = Config{e2eVolumeRegistry, "iscsi", "2.0"} configs[VolumeGlusterServer] = Config{e2eVolumeRegistry, "gluster", "1.0"} configs[VolumeRBDServer] = Config{e2eVolumeRegistry, "rbd", "1.0.1"} + configs[WindowsServer] = Config{microsoftRegistry, "windows", "1809"} return configs }