mirror of
https://github.com/k8snetworkplumbingwg/multus-cni.git
synced 2025-07-31 07:32:09 +00:00
Merge pull request #1078 from moshe010/dra
add support for Dynamic Resource Allocation
This commit is contained in:
commit
9f5c0239a8
4
.github/workflows/kind-e2e.yml
vendored
4
.github/workflows/kind-e2e.yml
vendored
@ -85,6 +85,10 @@ jobs:
|
||||
working-directory: ./e2e
|
||||
run: ./test-default-route1.sh
|
||||
|
||||
- name: Test DRA integration
|
||||
working-directory: ./e2e
|
||||
run: ./test-dra-integration.sh
|
||||
|
||||
- name: Export kind logs
|
||||
if: always()
|
||||
run: |
|
||||
|
1
.gitignore
vendored
1
.gitignore
vendored
@ -2,6 +2,7 @@
|
||||
bin/
|
||||
e2e/bin/
|
||||
e2e/yamls/
|
||||
e2e/repos/
|
||||
|
||||
# GOPATH created by the build script
|
||||
gopath/
|
||||
|
@ -511,7 +511,7 @@ spec:
|
||||
EOF
|
||||
```
|
||||
|
||||
We can then create a pod which uses the `default-route` key in the JSON formatted `k8s.v1.cni.cncf.io/networks` annotation.
|
||||
We can then create a pod which uses the `default-route` key in the JSON formatted `k8s.v1.cni.cncf.io/networks` annotation.
|
||||
|
||||
```
|
||||
cat <<EOF | kubectl create -f -
|
||||
@ -537,9 +537,9 @@ This will set `192.168.2.1` as the default route over the `net1` interface, such
|
||||
```
|
||||
kubectl exec -it samplepod -- ip route
|
||||
|
||||
default via 192.168.2.1 dev net1
|
||||
10.244.0.0/24 dev eth0 proto kernel scope link src 10.244.0.169
|
||||
10.244.0.0/16 via 10.244.0.1 dev eth0
|
||||
default via 192.168.2.1 dev net1
|
||||
10.244.0.0/24 dev eth0 proto kernel scope link src 10.244.0.169
|
||||
10.244.0.0/16 via 10.244.0.1 dev eth0
|
||||
```
|
||||
|
||||
## Entrypoint Parameters
|
||||
@ -634,3 +634,123 @@ Sometimes, you may wish to not have the entrypoint copy the binary file onto the
|
||||
If you wish to have auto configuration use the `readinessindicatorfile` in the configuration, you can use the `--readiness-indicator-file` to express which file should be used as the readiness indicator.
|
||||
|
||||
--readiness-indicator-file=/path/to/file
|
||||
|
||||
### Run pod with network annotation and Dynamic Resource Allocation driver
|
||||
|
||||
> :warning: Dynamic Resource Allocation (DRA) is [currently an alpha](https://kubernetes.io/docs/concepts/scheduling-eviction/dynamic-resource-allocation/),
|
||||
> and is subject to change. Please consider this functionality as a preview. The architecture and usage of DRA in
|
||||
> Multus CNI may change in the future as this technology matures.
|
||||
|
||||
Dynamic Resource Allocation is alternative mechanism to device plugin which allows to requests pod and container
|
||||
resources.
|
||||
|
||||
The following sections describe how to use DRA with multus and NVIDIA DRA driver. Other DRA networking driver vendors
|
||||
should follow similar concepts to make use of multus DRA support.
|
||||
|
||||
#### Prerequisite
|
||||
|
||||
1. Kubernetes 1.27
|
||||
2. Container Runtime with CDI support enabled
|
||||
3. Kubernetes runtime-config=resource.k8s.io/v1alpha2
|
||||
4. Kubernetes feature-gates=DynamicResourceAllocation=True,KubeletPodResourcesDynamicResources=true
|
||||
|
||||
#### Install DRA driver
|
||||
|
||||
The current example uses NVIDIA DRA driver for networking. This DRA driver is not publicly available. An alternative to
|
||||
this DRA driver is available at [dra-example-driver](https://github.com/kubernetes-sigs/dra-example-driver).
|
||||
|
||||
#### Create dynamic resource class with NVIDIA network DRA driver
|
||||
|
||||
The `ResourceClass` defines the resource pool of `sf-pool-1`.
|
||||
|
||||
```
|
||||
# Execute following command at Kubernetes master
|
||||
cat <<EOF | kubectl create -f -
|
||||
apiVersion: resource.k8s.io/v1alpha2
|
||||
kind: ResourceClass
|
||||
metadata:
|
||||
name: sf-pool-1
|
||||
driverName: net.resource.nvidia.com
|
||||
EOF
|
||||
```
|
||||
|
||||
#### Create network attachment definition with resource name
|
||||
|
||||
The `k8s.v1.cni.cncf.io/resourceName` should match the `ResourceClass` name defined in the section above.
|
||||
In this example it is `sf-pool-1`. Multus query the K8s PodResource API to fetch the `resourceClass` name and also
|
||||
query the NetworkAttachmentDefinition `k8s.v1.cni.cncf.io/resourceName`. If both has the same name multus send the
|
||||
CDI device name in the DeviceID argument.
|
||||
|
||||
##### NetworkAttachmentDefinition for ovn-kubernetes example:
|
||||
|
||||
Following command creates NetworkAttachmentDefinition. CNI config is in `config:` field.
|
||||
|
||||
```
|
||||
# Execute following command at Kubernetes master
|
||||
cat <<EOF | kubectl create -f -
|
||||
apiVersion: "k8s.cni.cncf.io/v1"
|
||||
kind: NetworkAttachmentDefinition
|
||||
metadata:
|
||||
name: default
|
||||
annotations:
|
||||
k8s.v1.cni.cncf.io/resourceName: sf-pool-1
|
||||
spec:
|
||||
config: '{
|
||||
"cniVersion": "0.4.0",
|
||||
"dns": {},
|
||||
"ipam": {},
|
||||
"logFile": "/var/log/ovn-kubernetes/ovn-k8s-cni-overlay.log",
|
||||
"logLevel": "4",
|
||||
"logfile-maxage": 5,
|
||||
"logfile-maxbackups": 5,
|
||||
"logfile-maxsize": 100,
|
||||
"name": "ovn-kubernetes",
|
||||
"type": "ovn-k8s-cni-overlay"
|
||||
}'
|
||||
EOF
|
||||
```
|
||||
|
||||
#### Create DRA Resource Claim
|
||||
|
||||
Following command creates `ResourceClaim` `sf` which request resource from `ResourceClass` `sf-pool-1`.
|
||||
|
||||
```
|
||||
# Execute following command at Kubernetes master
|
||||
cat <<EOF | kubectl create -f -
|
||||
apiVersion: resource.k8s.io/v1alpha2
|
||||
kind: ResourceClaim
|
||||
metadata:
|
||||
namespace: default
|
||||
name: sf
|
||||
spec:
|
||||
spec:
|
||||
resourceClassName: sf-pool-1
|
||||
EOF
|
||||
```
|
||||
|
||||
#### Launch pod with DRA Resource Claim
|
||||
|
||||
Following command Launch a Pod with primiry network `default` and `ResourceClaim` `sf`.
|
||||
|
||||
```
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
namespace: default
|
||||
name: test-sf-claim
|
||||
annotations:
|
||||
v1.multus-cni.io/default-network: default
|
||||
spec:
|
||||
restartPolicy: Always
|
||||
containers:
|
||||
- name: with-resource
|
||||
image: docker.io/library/ubuntu:22.04
|
||||
command: ["/bin/sh", "-ec", "while :; do echo '.'; sleep 5 ; done"]
|
||||
resources:
|
||||
claims:
|
||||
- name: resource
|
||||
resourceClaims:
|
||||
- name: resource
|
||||
source:
|
||||
resourceClaimName: sf
|
||||
```
|
||||
|
@ -13,3 +13,4 @@ curl -Lo ./bin/koko https://github.com/redhat-nfvpe/koko/releases/download/v0.83
|
||||
chmod +x ./bin/koko
|
||||
curl -Lo ./bin/jq https://github.com/stedolan/jq/releases/download/jq-1.6/jq-linux64
|
||||
chmod +x ./bin/jq
|
||||
wget -qO- https://get.helm.sh/helm-v3.14.3-linux-amd64.tar.gz | tar xvzf - --strip-components=1 -C ./bin linux-amd64/helm
|
||||
|
@ -34,7 +34,21 @@ nodes:
|
||||
nodeRegistration:
|
||||
kubeletExtraArgs:
|
||||
pod-manifest-path: "/etc/kubernetes/manifests/"
|
||||
feature-gates: "DynamicResourceAllocation=true,KubeletPodResourcesDynamicResources=true"
|
||||
- role: worker
|
||||
# Required by DRA Integration
|
||||
##
|
||||
featureGates:
|
||||
DynamicResourceAllocation: true
|
||||
runtimeConfig:
|
||||
"api/alpha": "true"
|
||||
containerdConfigPatches:
|
||||
# Enable CDI as described in
|
||||
# https://github.com/container-orchestrated-devices/container-device-interface#containerd-configuration
|
||||
- |-
|
||||
[plugins."io.containerd.grpc.v1.cri"]
|
||||
enable_cdi = true
|
||||
##
|
||||
EOF
|
||||
|
||||
# load multus image from container host to kind node
|
||||
|
49
e2e/templates/dra-integration.yml.j2
Normal file
49
e2e/templates/dra-integration.yml.j2
Normal file
@ -0,0 +1,49 @@
|
||||
---
|
||||
apiVersion: resource.k8s.io/v1alpha2
|
||||
kind: ResourceClaimTemplate
|
||||
metadata:
|
||||
name: gpu.example.com
|
||||
spec:
|
||||
spec:
|
||||
resourceClassName: gpu.example.com
|
||||
---
|
||||
apiVersion: "k8s.cni.cncf.io/v1"
|
||||
kind: NetworkAttachmentDefinition
|
||||
metadata:
|
||||
name: dra-net
|
||||
annotations:
|
||||
k8s.v1.cni.cncf.io/resourceName: gpu.example.com
|
||||
spec:
|
||||
config: '{
|
||||
"cniVersion": "{{ CNI_VERSION }}",
|
||||
"plugins": [{
|
||||
"name": "mynet",
|
||||
"type": "dummy",
|
||||
"ipam": {
|
||||
"type": "host-local",
|
||||
"subnet": "10.1.2.0/24"
|
||||
}
|
||||
}]
|
||||
}'
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: dra-integration
|
||||
labels:
|
||||
app: dra-integration
|
||||
annotations:
|
||||
k8s.v1.cni.cncf.io/networks: default/dra-net
|
||||
spec:
|
||||
containers:
|
||||
- name: ctr0
|
||||
image: ubuntu:22.04
|
||||
command: ["bash", "-c"]
|
||||
args: ["export; sleep 9999"]
|
||||
resources:
|
||||
claims:
|
||||
- name: gpu
|
||||
resourceClaims:
|
||||
- name: gpu
|
||||
source:
|
||||
resourceClaimTemplateName: gpu.example.com
|
@ -158,6 +158,9 @@ spec:
|
||||
- name: multus-daemon-config
|
||||
mountPath: /etc/cni/net.d/multus.d
|
||||
readOnly: true
|
||||
- name: kubelet-pod-resources
|
||||
mountPath: /var/lib/kubelet/pod-resources
|
||||
readOnly: true
|
||||
env:
|
||||
- name: MULTUS_NODE_NAME
|
||||
valueFrom:
|
||||
@ -187,6 +190,9 @@ spec:
|
||||
- name: cnibin
|
||||
hostPath:
|
||||
path: /opt/cni/bin
|
||||
- name: kubelet-pod-resources
|
||||
hostPath:
|
||||
path: /var/lib/kubelet/pod-resources
|
||||
- name: multus-daemon-config
|
||||
configMap:
|
||||
name: multus-daemon-config
|
||||
|
59
e2e/test-dra-integration.sh
Executable file
59
e2e/test-dra-integration.sh
Executable file
@ -0,0 +1,59 @@
|
||||
#!/bin/sh
|
||||
set -o errexit
|
||||
|
||||
export PATH=${PATH}:./bin
|
||||
|
||||
# This test is using an example implementation of a DRA driver. This driver is mocking GPU resources. At our test we
|
||||
# don't care about what these resources are. We want to ensure that such resource is correctly passed in the Pod using
|
||||
# Multus configurations. A couple of notes:
|
||||
# - We explitictly don't pin the revision of the dra-example-driver to a specific commit to ensure that the integration
|
||||
# continues to work even when the dra-example-driver is updated (which may also indicate API changes on the DRA).
|
||||
# - The chart and latest is image is not published somewhere, therefore we have to build locally. This leads to slower
|
||||
# e2e suite runs.
|
||||
echo "installing dra-example-driver"
|
||||
repo_path="repos/dra-example-driver"
|
||||
|
||||
rm -rf $repo_path || true
|
||||
git clone https://github.com/kubernetes-sigs/dra-example-driver.git ${repo_path}
|
||||
${repo_path}/demo/build-driver.sh
|
||||
KIND_CLUSTER_NAME=kind ${repo_path}/demo/scripts/load-driver-image-into-kind.sh
|
||||
chart_path=${repo_path}/deployments/helm/dra-example-driver/
|
||||
overriden_values_path=${chart_path}/overriden_values.yaml
|
||||
|
||||
# With the thick plugin, in kind, the primary network on the control plane is not always working as expected. The pods
|
||||
# sometimes are not able to communicate with the control plane and the error looks like this:
|
||||
# failed to list *v1alpha2.PodSchedulingContext: Get "https://10.96.0.1:443/apis/resource.k8s.io/v1alpha2/podschedulingcontexts?limit=500&resourceVersion=0": dial tcp 10.96.0.1:443: connect: no route to host
|
||||
# We override the values here to schedule the controller on the worker nodes where the network is working as expected.
|
||||
cat <<EOF >> ${overriden_values_path}
|
||||
controller:
|
||||
nodeSelector: null
|
||||
tolerations: null
|
||||
EOF
|
||||
|
||||
helm install \
|
||||
-n dra-example-driver \
|
||||
--create-namespace \
|
||||
-f ${overriden_values_path} \
|
||||
dra-example-driver \
|
||||
${chart_path}
|
||||
|
||||
echo "installing testing pods"
|
||||
kubectl create -f yamls/dra-integration.yml
|
||||
kubectl wait --for=condition=ready -l app=dra-integration --timeout=300s pod
|
||||
|
||||
echo "check dra-integration pod for DRA injected environment variable"
|
||||
|
||||
# We can validate that the resource is correctly injected by checking an environment variable this dra driver is injecting
|
||||
# in the Pod.
|
||||
# https://github.com/kubernetes-sigs/dra-example-driver/blob/be2b8b1db47b8c757440e955ce5ced88c23bfe86/cmd/dra-example-kubeletplugin/cdi.go#L71C20-L71C44
|
||||
env_variable=$(kubectl exec dra-integration -- bash -c "echo \$DRA_RESOURCE_DRIVER_NAME | grep gpu.resource.example.com")
|
||||
if [ $? -eq 0 ];then
|
||||
echo "dra-integration pod has DRA injected environment variable"
|
||||
else
|
||||
echo "dra-integration pod doesn't have DRA injected environment variable"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "cleanup resources"
|
||||
kubectl delete -f yamls/dra-integration.yml
|
||||
helm uninstall -n dra-example-driver dra-example-driver
|
@ -21,6 +21,7 @@ import (
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"golang.org/x/net/context"
|
||||
@ -137,19 +138,45 @@ func (rc *kubeletClient) GetPodResourceMap(pod *v1.Pod) (map[string]*types.Resou
|
||||
for _, pr := range rc.resources {
|
||||
if pr.Name == name && pr.Namespace == ns {
|
||||
for _, cnt := range pr.Containers {
|
||||
for _, dev := range cnt.Devices {
|
||||
if rInfo, ok := resourceMap[dev.ResourceName]; ok {
|
||||
rInfo.DeviceIDs = append(rInfo.DeviceIDs, dev.DeviceIds...)
|
||||
} else {
|
||||
resourceMap[dev.ResourceName] = &types.ResourceInfo{DeviceIDs: dev.DeviceIds}
|
||||
}
|
||||
}
|
||||
rc.getDevicePluginResources(cnt.Devices, resourceMap)
|
||||
rc.getDRAResources(cnt.DynamicResources, resourceMap)
|
||||
}
|
||||
}
|
||||
}
|
||||
return resourceMap, nil
|
||||
}
|
||||
|
||||
func (rc *kubeletClient) getDevicePluginResources(devices []*podresourcesapi.ContainerDevices, resourceMap map[string]*types.ResourceInfo) {
|
||||
for _, dev := range devices {
|
||||
if rInfo, ok := resourceMap[dev.ResourceName]; ok {
|
||||
rInfo.DeviceIDs = append(rInfo.DeviceIDs, dev.DeviceIds...)
|
||||
} else {
|
||||
resourceMap[dev.ResourceName] = &types.ResourceInfo{DeviceIDs: dev.DeviceIds}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (rc *kubeletClient) getDRAResources(dynamicResources []*podresourcesapi.DynamicResource, resourceMap map[string]*types.ResourceInfo) {
|
||||
for _, dynamicResource := range dynamicResources {
|
||||
var deviceIDs []string
|
||||
for _, claimResource := range dynamicResource.ClaimResources {
|
||||
for _, cdiDevice := range claimResource.CDIDevices {
|
||||
res := strings.Split(cdiDevice.Name, "=")
|
||||
if len(res) == 2 {
|
||||
deviceIDs = append(deviceIDs, res[1])
|
||||
} else {
|
||||
logging.Errorf("GetPodResourceMap: Invalid CDI format")
|
||||
}
|
||||
}
|
||||
}
|
||||
if rInfo, ok := resourceMap[dynamicResource.ClassName]; ok {
|
||||
rInfo.DeviceIDs = append(rInfo.DeviceIDs, deviceIDs...)
|
||||
} else {
|
||||
resourceMap[dynamicResource.ClassName] = &types.ResourceInfo{DeviceIDs: deviceIDs}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func hasKubeletAPIEndpoint(url *url.URL) bool {
|
||||
// Check for kubelet resource API socket file
|
||||
if _, err := os.Stat(url.Path); err != nil {
|
||||
|
@ -60,10 +60,6 @@ func (m *fakeResourceServer) Get(_ context.Context, _ *podresourcesapi.GetPodRes
|
||||
}
|
||||
|
||||
func (m *fakeResourceServer) List(_ context.Context, _ *podresourcesapi.ListPodResourcesRequest) (*podresourcesapi.ListPodResourcesResponse, error) {
|
||||
podName := "pod-name"
|
||||
podNamespace := "pod-namespace"
|
||||
containerName := "container-name"
|
||||
|
||||
devs := []*podresourcesapi.ContainerDevices{
|
||||
{
|
||||
ResourceName: "resource",
|
||||
@ -71,18 +67,49 @@ func (m *fakeResourceServer) List(_ context.Context, _ *podresourcesapi.ListPodR
|
||||
},
|
||||
}
|
||||
|
||||
cdiDevices := []*podresourcesapi.CDIDevice{
|
||||
{
|
||||
Name: "cdi-kind=cdi-resource",
|
||||
},
|
||||
}
|
||||
|
||||
claimsResource := []*podresourcesapi.ClaimResource{
|
||||
{
|
||||
CDIDevices: cdiDevices,
|
||||
},
|
||||
}
|
||||
|
||||
dynamicResources := []*podresourcesapi.DynamicResource{
|
||||
{
|
||||
ClassName: "resource-class",
|
||||
ClaimName: "resource-claim",
|
||||
ClaimNamespace: "dynamic-resource-pod-namespace",
|
||||
ClaimResources: claimsResource,
|
||||
},
|
||||
}
|
||||
|
||||
resp := &podresourcesapi.ListPodResourcesResponse{
|
||||
PodResources: []*podresourcesapi.PodResources{
|
||||
{
|
||||
Name: podName,
|
||||
Namespace: podNamespace,
|
||||
Name: "pod-name",
|
||||
Namespace: "pod-namespace",
|
||||
Containers: []*podresourcesapi.ContainerResources{
|
||||
{
|
||||
Name: containerName,
|
||||
Name: "container-name",
|
||||
Devices: devs,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "dynamic-resource-pod-name",
|
||||
Namespace: "dynamic-resource-pod-namespace",
|
||||
Containers: []*podresourcesapi.ContainerResources{
|
||||
{
|
||||
Name: "dynamic-resource-container-name",
|
||||
DynamicResources: dynamicResources,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
return resp, nil
|
||||
@ -188,7 +215,7 @@ var _ = Describe("Kubelet resource endpoint data read operations", func() {
|
||||
})
|
||||
})
|
||||
Context("GetPodResourceMap() with valid pod name and namespace", func() {
|
||||
It("should return no error", func() {
|
||||
It("should return no error with device plugin resource", func() {
|
||||
podUID := k8sTypes.UID("970a395d-bb3b-11e8-89df-408d5c537d23")
|
||||
fakePod := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
@ -216,6 +243,34 @@ var _ = Describe("Kubelet resource endpoint data read operations", func() {
|
||||
Expect(resourceMap).To(Equal(outputRMap))
|
||||
})
|
||||
|
||||
It("should return no error with dynamic resource", func() {
|
||||
podUID := k8sTypes.UID("9f94e27b-4233-43d6-bd10-f73b4de6f456")
|
||||
fakePod := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "dynamic-resource-pod-name",
|
||||
Namespace: "dynamic-resource-pod-namespace",
|
||||
UID: podUID,
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Name: "dynamic-resource-container-name",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
client, err := getKubeletClient(testKubeletSocket)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
outputRMap := map[string]*mtypes.ResourceInfo{
|
||||
"resource-class": {DeviceIDs: []string{"cdi-resource"}},
|
||||
}
|
||||
resourceMap, err := client.GetPodResourceMap(fakePod)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(resourceMap).ShouldNot(BeNil())
|
||||
Expect(resourceMap).To(Equal(outputRMap))
|
||||
})
|
||||
|
||||
It("should return an error with garbage socket value", func() {
|
||||
u, err := url.Parse("/badfilepath!?//")
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
Loading…
Reference in New Issue
Block a user