mirror of
https://github.com/k8snetworkplumbingwg/multus-cni.git
synced 2025-08-31 08:33:06 +00:00
Add DRA Integration E2E test
Signed-off-by: Vasilis Remmas <vremmas@nvidia.com>
This commit is contained in:
4
.github/workflows/kind-e2e.yml
vendored
4
.github/workflows/kind-e2e.yml
vendored
@@ -85,6 +85,10 @@ jobs:
|
||||
working-directory: ./e2e
|
||||
run: ./test-default-route1.sh
|
||||
|
||||
- name: Test DRA integration
|
||||
working-directory: ./e2e
|
||||
run: ./test-dra-integration.sh
|
||||
|
||||
- name: Export kind logs
|
||||
if: always()
|
||||
run: |
|
||||
|
1
.gitignore
vendored
1
.gitignore
vendored
@@ -2,6 +2,7 @@
|
||||
bin/
|
||||
e2e/bin/
|
||||
e2e/yamls/
|
||||
e2e/repos/
|
||||
|
||||
# GOPATH created by the build script
|
||||
gopath/
|
||||
|
@@ -13,3 +13,4 @@ curl -Lo ./bin/koko https://github.com/redhat-nfvpe/koko/releases/download/v0.83
|
||||
chmod +x ./bin/koko
|
||||
curl -Lo ./bin/jq https://github.com/stedolan/jq/releases/download/jq-1.6/jq-linux64
|
||||
chmod +x ./bin/jq
|
||||
wget -qO- https://get.helm.sh/helm-v3.14.3-linux-amd64.tar.gz | tar xvzf - --strip-components=1 -C ./bin linux-amd64/helm
|
||||
|
@@ -34,7 +34,21 @@ nodes:
|
||||
nodeRegistration:
|
||||
kubeletExtraArgs:
|
||||
pod-manifest-path: "/etc/kubernetes/manifests/"
|
||||
feature-gates: "DynamicResourceAllocation=true,KubeletPodResourcesDynamicResources=true"
|
||||
- role: worker
|
||||
# Required by DRA Integration
|
||||
##
|
||||
featureGates:
|
||||
DynamicResourceAllocation: true
|
||||
runtimeConfig:
|
||||
"api/alpha": "true"
|
||||
containerdConfigPatches:
|
||||
# Enable CDI as described in
|
||||
# https://github.com/container-orchestrated-devices/container-device-interface#containerd-configuration
|
||||
- |-
|
||||
[plugins."io.containerd.grpc.v1.cri"]
|
||||
enable_cdi = true
|
||||
##
|
||||
EOF
|
||||
|
||||
# load multus image from container host to kind node
|
||||
|
49
e2e/templates/dra-integration.yml.j2
Normal file
49
e2e/templates/dra-integration.yml.j2
Normal file
@@ -0,0 +1,49 @@
|
||||
---
|
||||
apiVersion: resource.k8s.io/v1alpha2
|
||||
kind: ResourceClaimTemplate
|
||||
metadata:
|
||||
name: gpu.example.com
|
||||
spec:
|
||||
spec:
|
||||
resourceClassName: gpu.example.com
|
||||
---
|
||||
apiVersion: "k8s.cni.cncf.io/v1"
|
||||
kind: NetworkAttachmentDefinition
|
||||
metadata:
|
||||
name: dra-net
|
||||
annotations:
|
||||
k8s.v1.cni.cncf.io/resourceName: gpu.example.com
|
||||
spec:
|
||||
config: '{
|
||||
"cniVersion": "{{ CNI_VERSION }}",
|
||||
"plugins": [{
|
||||
"name": "mynet",
|
||||
"type": "dummy",
|
||||
"ipam": {
|
||||
"type": "host-local",
|
||||
"subnet": "10.1.2.0/24"
|
||||
}
|
||||
}]
|
||||
}'
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: dra-integration
|
||||
labels:
|
||||
app: dra-integration
|
||||
annotations:
|
||||
k8s.v1.cni.cncf.io/networks: default/dra-net
|
||||
spec:
|
||||
containers:
|
||||
- name: ctr0
|
||||
image: ubuntu:22.04
|
||||
command: ["bash", "-c"]
|
||||
args: ["export; sleep 9999"]
|
||||
resources:
|
||||
claims:
|
||||
- name: gpu
|
||||
resourceClaims:
|
||||
- name: gpu
|
||||
source:
|
||||
resourceClaimTemplateName: gpu.example.com
|
@@ -158,6 +158,9 @@ spec:
|
||||
- name: multus-daemon-config
|
||||
mountPath: /etc/cni/net.d/multus.d
|
||||
readOnly: true
|
||||
- name: kubelet-pod-resources
|
||||
mountPath: /var/lib/kubelet/pod-resources
|
||||
readOnly: true
|
||||
env:
|
||||
- name: MULTUS_NODE_NAME
|
||||
valueFrom:
|
||||
@@ -187,6 +190,9 @@ spec:
|
||||
- name: cnibin
|
||||
hostPath:
|
||||
path: /opt/cni/bin
|
||||
- name: kubelet-pod-resources
|
||||
hostPath:
|
||||
path: /var/lib/kubelet/pod-resources
|
||||
- name: multus-daemon-config
|
||||
configMap:
|
||||
name: multus-daemon-config
|
||||
|
59
e2e/test-dra-integration.sh
Executable file
59
e2e/test-dra-integration.sh
Executable file
@@ -0,0 +1,59 @@
|
||||
#!/bin/sh
|
||||
set -o errexit
|
||||
|
||||
export PATH=${PATH}:./bin
|
||||
|
||||
# This test is using an example implementation of a DRA driver. This driver is mocking GPU resources. At our test we
|
||||
# don't care about what these resources are. We want to ensure that such resource is correctly passed in the Pod using
|
||||
# Multus configurations. A couple of notes:
|
||||
# - We explitictly don't pin the revision of the dra-example-driver to a specific commit to ensure that the integration
|
||||
# continues to work even when the dra-example-driver is updated (which may also indicate API changes on the DRA).
|
||||
# - The chart and latest is image is not published somewhere, therefore we have to build locally. This leads to slower
|
||||
# e2e suite runs.
|
||||
echo "installing dra-example-driver"
|
||||
repo_path="repos/dra-example-driver"
|
||||
|
||||
rm -rf $repo_path || true
|
||||
git clone https://github.com/kubernetes-sigs/dra-example-driver.git ${repo_path}
|
||||
${repo_path}/demo/build-driver.sh
|
||||
KIND_CLUSTER_NAME=kind ${repo_path}/demo/scripts/load-driver-image-into-kind.sh
|
||||
chart_path=${repo_path}/deployments/helm/dra-example-driver/
|
||||
overriden_values_path=${chart_path}/overriden_values.yaml
|
||||
|
||||
# With the thick plugin, in kind, the primary network on the control plane is not always working as expected. The pods
|
||||
# sometimes are not able to communicate with the control plane and the error looks like this:
|
||||
# failed to list *v1alpha2.PodSchedulingContext: Get "https://10.96.0.1:443/apis/resource.k8s.io/v1alpha2/podschedulingcontexts?limit=500&resourceVersion=0": dial tcp 10.96.0.1:443: connect: no route to host
|
||||
# We override the values here to schedule the controller on the worker nodes where the network is working as expected.
|
||||
cat <<EOF >> ${overriden_values_path}
|
||||
controller:
|
||||
nodeSelector: null
|
||||
tolerations: null
|
||||
EOF
|
||||
|
||||
helm install \
|
||||
-n dra-example-driver \
|
||||
--create-namespace \
|
||||
-f ${overriden_values_path} \
|
||||
dra-example-driver \
|
||||
${chart_path}
|
||||
|
||||
echo "installing testing pods"
|
||||
kubectl create -f yamls/dra-integration.yml
|
||||
kubectl wait --for=condition=ready -l app=dra-integration --timeout=300s pod
|
||||
|
||||
echo "check dra-integration pod for DRA injected environment variable"
|
||||
|
||||
# We can validate that the resource is correctly injected by checking an environment variable this dra driver is injecting
|
||||
# in the Pod.
|
||||
# https://github.com/kubernetes-sigs/dra-example-driver/blob/be2b8b1db47b8c757440e955ce5ced88c23bfe86/cmd/dra-example-kubeletplugin/cdi.go#L71C20-L71C44
|
||||
env_variable=$(kubectl exec dra-integration -- bash -c "echo \$DRA_RESOURCE_DRIVER_NAME | grep gpu.resource.example.com")
|
||||
if [ $? -eq 0 ];then
|
||||
echo "dra-integration pod has DRA injected environment variable"
|
||||
else
|
||||
echo "dra-integration pod doesn't have DRA injected environment variable"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "cleanup resources"
|
||||
kubectl delete -f yamls/dra-integration.yml
|
||||
helm uninstall -n dra-example-driver dra-example-driver
|
Reference in New Issue
Block a user