mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-04-27 11:31:05 +00:00
gpu: Add driver version selection
Besides latest and lts options add an option to specify the exact driver version. Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
This commit is contained in:
parent
311c3638c6
commit
f153229865
@ -13,14 +13,53 @@ shopt -s extglob
|
||||
run_file_name=$2
|
||||
run_fm_file_name=$3
|
||||
arch_target=$4
|
||||
driver_version="$5"
|
||||
driver_type="open"
|
||||
nvidia_gpu_stack="$5"
|
||||
driver_version=""
|
||||
driver_type="-open"
|
||||
supported_gpu_devids="/supported-gpu.devids"
|
||||
|
||||
APT_INSTALL="apt -o Dpkg::Options::='--force-confdef' -o Dpkg::Options::='--force-confold' -yqq --no-install-recommends install"
|
||||
|
||||
export DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
is_feature_enabled() {
|
||||
local feature="$1"
|
||||
# Check if feature is in the comma-separated list
|
||||
if [[ ",$nvidia_gpu_stack," == *",$feature,"* ]]; then
|
||||
return 0
|
||||
else
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
set_driver_version_type() {
|
||||
echo "chroot: Setting the correct driver version"
|
||||
|
||||
if [[ ",$nvidia_gpu_stack," == *",latest,"* ]]; then
|
||||
driver_version="latest"
|
||||
elif [[ ",$nvidia_gpu_stack," == *",lts,"* ]]; then
|
||||
driver_version="lts"
|
||||
elif [[ "$nvidia_gpu_stack" =~ version=([^,]+) ]]; then
|
||||
driver_version="${BASH_REMATCH[1]}"
|
||||
else
|
||||
echo "No known driver spec found. Please specify \"latest\", \"lts\", or \"version=<VERSION>\"."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "chroot: driver_version: ${driver_version}"
|
||||
|
||||
echo "chroot: Setting the correct driver type"
|
||||
|
||||
# driver -> enable open or closed drivers
|
||||
if [[ "$nvidia_gpu_stack" =~ (^|,)driver=open($|,) ]]; then
|
||||
driver_type="-open"
|
||||
elif [[ "$nvidia_gpu_stack" =~ (^|,)driver=closed($|,) ]]; then
|
||||
driver_type=""
|
||||
fi
|
||||
|
||||
echo "chroot: driver_type: ${driver_type}"
|
||||
}
|
||||
|
||||
install_nvidia_ctk() {
|
||||
echo "chroot: Installing NVIDIA GPU container runtime"
|
||||
apt list nvidia-container-toolkit-base -a
|
||||
@ -29,6 +68,10 @@ install_nvidia_ctk() {
|
||||
}
|
||||
|
||||
install_nvidia_fabricmanager() {
|
||||
is_feature_enabled "nvswitch" || {
|
||||
echo "chroot: Skipping NVIDIA fabricmanager installation"
|
||||
return
|
||||
}
|
||||
# if run_fm_file_name exists run it
|
||||
if [ -f /"${run_fm_file_name}" ]; then
|
||||
install_nvidia_fabricmanager_from_run_file
|
||||
@ -52,6 +95,11 @@ install_nvidia_fabricmanager_from_distribution() {
|
||||
}
|
||||
|
||||
build_nvidia_drivers() {
|
||||
is_feature_enabled "compute" || {
|
||||
echo "chroot: Skipping NVIDIA drivers build"
|
||||
return
|
||||
}
|
||||
|
||||
echo "chroot: Build NVIDIA drivers"
|
||||
pushd "${driver_source_files}" >> /dev/null
|
||||
|
||||
@ -129,7 +177,7 @@ prepare_distribution_drivers() {
|
||||
fi
|
||||
|
||||
echo "chroot: Prepare NVIDIA distribution drivers"
|
||||
eval "${APT_INSTALL}" nvidia-headless-no-dkms-"${driver_version}-${driver_type}" \
|
||||
eval "${APT_INSTALL}" nvidia-headless-no-dkms-"${driver_version}${driver_type}" \
|
||||
libnvidia-cfg1-"${driver_version}" \
|
||||
nvidia-compute-utils-"${driver_version}" \
|
||||
nvidia-utils-"${driver_version}" \
|
||||
@ -152,7 +200,7 @@ prepare_nvidia_drivers() {
|
||||
|
||||
for source_dir in /NVIDIA-*; do
|
||||
if [ -d "${source_dir}" ]; then
|
||||
driver_source_files="${source_dir}"/kernel-${driver_type}
|
||||
driver_source_files="${source_dir}"/kernel${driver_type}
|
||||
driver_source_dir="${source_dir}"
|
||||
break
|
||||
fi
|
||||
@ -245,6 +293,11 @@ export_driver_version() {
|
||||
|
||||
|
||||
install_nvidia_dcgm() {
|
||||
is_feature_enabled "dcgm" || {
|
||||
echo "chroot: Skipping NVIDIA DCGM installation"
|
||||
return
|
||||
}
|
||||
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb
|
||||
dpkg -i cuda-keyring_1.0-1_all.deb && rm -f cuda-keyring_1.0-1_all.deb
|
||||
|
||||
@ -292,11 +345,11 @@ cleanup_rootfs() {
|
||||
|
||||
apt purge -yqq jq make gcc wget libc6-dev git xz-utils curl gpg \
|
||||
python3-pip software-properties-common ca-certificates \
|
||||
linux-libc-dev nuitka python3-minimal cuda-keyring
|
||||
linux-libc-dev nuitka python3-minimal
|
||||
|
||||
if [ -n "${driver_version}" ]; then
|
||||
apt purge -yqq nvidia-headless-no-dkms-"${driver_version}-${driver_type}" \
|
||||
nvidia-kernel-source-"${driver_version}-${driver_type}" -yqq
|
||||
apt purge -yqq nvidia-headless-no-dkms-"${driver_version}${driver_type}" \
|
||||
nvidia-kernel-source-"${driver_version}${driver_type}" -yqq
|
||||
fi
|
||||
|
||||
apt autoremove -yqq
|
||||
@ -325,7 +378,7 @@ cleanup_rootfs() {
|
||||
# Start of script
|
||||
echo "chroot: Setup NVIDIA GPU rootfs stage one"
|
||||
|
||||
|
||||
set_driver_version_type
|
||||
setup_apt_repositories
|
||||
install_kernel_dependencies
|
||||
install_build_dependencies
|
||||
|
@ -17,6 +17,7 @@ DEBUG=""
|
||||
|
||||
setup_nvidia-nvrc() {
|
||||
local TARGET="nvidia-nvrc"
|
||||
local TARGET_VERSION="main"
|
||||
local PROJECT="nvrc"
|
||||
local TARGET_BUILD_DIR="${BUILD_DIR}/${TARGET}/builddir"
|
||||
local TARGET_DEST_DIR="${BUILD_DIR}/${TARGET}/destdir"
|
||||
@ -45,6 +46,7 @@ setup_nvidia-nvrc() {
|
||||
|
||||
setup_nvidia-gpu-admin-tools() {
|
||||
local TARGET="nvidia-gpu-admin-tools"
|
||||
local TARGET_VERSION="v2024.12.06"
|
||||
local TARGET_GIT="https://github.com/NVIDIA/gpu-admin-tools"
|
||||
local TARGET_BUILD_DIR="${BUILD_DIR}/${TARGET}/builddir"
|
||||
local TARGET_DEST_DIR="${BUILD_DIR}/${TARGET}/destdir"
|
||||
@ -72,6 +74,7 @@ setup_nvidia-gpu-admin-tools() {
|
||||
|
||||
setup_nvidia-dcgm-exporter() {
|
||||
local TARGET="nvidia-dcgm-exporter"
|
||||
local TARGET_VERSION="3.3.9-3.6.1"
|
||||
local TARGET_BUILD_DIR="${BUILD_DIR}/${TARGET}/builddir"
|
||||
local TARGET_DEST_DIR="${BUILD_DIR}/${TARGET}/destdir"
|
||||
local TARBALL="${BUILD_DIR}/kata-static-${TARGET}.tar.zst"
|
||||
@ -85,7 +88,7 @@ setup_nvidia-dcgm-exporter() {
|
||||
local dex="dcgm-exporter"
|
||||
|
||||
rm -rf "${dex}"
|
||||
git clone https://github.com/NVIDIA/${dex}
|
||||
git clone --branch "${TARGET_VERSION}" https://github.com/NVIDIA/${dex}
|
||||
make -C ${dex} binary
|
||||
|
||||
mkdir -p ../destdir/bin
|
||||
@ -151,14 +154,8 @@ setup_nvidia_gpu_rootfs_stage_one() {
|
||||
mount --make-rslave ./dev
|
||||
mount -t proc /proc ./proc
|
||||
|
||||
local driver_version="latest"
|
||||
if echo "$NVIDIA_GPU_STACK" | grep -q '\<latest\>'; then
|
||||
driver_version="latest"
|
||||
elif echo "$NVIDIA_GPU_STACK" | grep -q '\<lts\>'; then
|
||||
driver_version="lts"
|
||||
fi
|
||||
|
||||
chroot . /bin/bash -c "/nvidia_chroot.sh $(uname -r) ${run_file_name} ${run_fm_file_name} ${ARCH} ${driver_version}"
|
||||
chroot . /bin/bash -c "/nvidia_chroot.sh $(uname -r) ${run_file_name} \
|
||||
${run_fm_file_name} ${ARCH} ${NVIDIA_GPU_STACK}"
|
||||
|
||||
umount -R ./dev
|
||||
umount ./proc
|
||||
|
@ -475,7 +475,10 @@ install_initrd_confidential() {
|
||||
# For all nvidia_gpu targets we can customize the stack that is enbled
|
||||
# in the VM by setting the NVIDIA_GPU_STACK= environment variable
|
||||
#
|
||||
# latest | lts -> use the latest and greatest driver or lts release
|
||||
# latest | lts | version
|
||||
# -> use the latest and greatest driver,
|
||||
# lts release or e.g. version=550.127.1
|
||||
# driver -> enable open or closed drivers
|
||||
# debug -> enable debugging support
|
||||
# compute -> enable the compute GPU stack, includes utility
|
||||
# graphics -> enable the graphics GPU stack, includes compute
|
||||
|
Loading…
Reference in New Issue
Block a user