mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-04-27 19:35:32 +00:00
gpu: Add driver version selection
Besides latest and lts options add an option to specify the exact driver version. Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
This commit is contained in:
parent
311c3638c6
commit
f153229865
@ -13,14 +13,53 @@ shopt -s extglob
|
|||||||
run_file_name=$2
|
run_file_name=$2
|
||||||
run_fm_file_name=$3
|
run_fm_file_name=$3
|
||||||
arch_target=$4
|
arch_target=$4
|
||||||
driver_version="$5"
|
nvidia_gpu_stack="$5"
|
||||||
driver_type="open"
|
driver_version=""
|
||||||
|
driver_type="-open"
|
||||||
supported_gpu_devids="/supported-gpu.devids"
|
supported_gpu_devids="/supported-gpu.devids"
|
||||||
|
|
||||||
APT_INSTALL="apt -o Dpkg::Options::='--force-confdef' -o Dpkg::Options::='--force-confold' -yqq --no-install-recommends install"
|
APT_INSTALL="apt -o Dpkg::Options::='--force-confdef' -o Dpkg::Options::='--force-confold' -yqq --no-install-recommends install"
|
||||||
|
|
||||||
export DEBIAN_FRONTEND=noninteractive
|
export DEBIAN_FRONTEND=noninteractive
|
||||||
|
|
||||||
|
is_feature_enabled() {
|
||||||
|
local feature="$1"
|
||||||
|
# Check if feature is in the comma-separated list
|
||||||
|
if [[ ",$nvidia_gpu_stack," == *",$feature,"* ]]; then
|
||||||
|
return 0
|
||||||
|
else
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
set_driver_version_type() {
|
||||||
|
echo "chroot: Setting the correct driver version"
|
||||||
|
|
||||||
|
if [[ ",$nvidia_gpu_stack," == *",latest,"* ]]; then
|
||||||
|
driver_version="latest"
|
||||||
|
elif [[ ",$nvidia_gpu_stack," == *",lts,"* ]]; then
|
||||||
|
driver_version="lts"
|
||||||
|
elif [[ "$nvidia_gpu_stack" =~ version=([^,]+) ]]; then
|
||||||
|
driver_version="${BASH_REMATCH[1]}"
|
||||||
|
else
|
||||||
|
echo "No known driver spec found. Please specify \"latest\", \"lts\", or \"version=<VERSION>\"."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "chroot: driver_version: ${driver_version}"
|
||||||
|
|
||||||
|
echo "chroot: Setting the correct driver type"
|
||||||
|
|
||||||
|
# driver -> enable open or closed drivers
|
||||||
|
if [[ "$nvidia_gpu_stack" =~ (^|,)driver=open($|,) ]]; then
|
||||||
|
driver_type="-open"
|
||||||
|
elif [[ "$nvidia_gpu_stack" =~ (^|,)driver=closed($|,) ]]; then
|
||||||
|
driver_type=""
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "chroot: driver_type: ${driver_type}"
|
||||||
|
}
|
||||||
|
|
||||||
install_nvidia_ctk() {
|
install_nvidia_ctk() {
|
||||||
echo "chroot: Installing NVIDIA GPU container runtime"
|
echo "chroot: Installing NVIDIA GPU container runtime"
|
||||||
apt list nvidia-container-toolkit-base -a
|
apt list nvidia-container-toolkit-base -a
|
||||||
@ -29,6 +68,10 @@ install_nvidia_ctk() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
install_nvidia_fabricmanager() {
|
install_nvidia_fabricmanager() {
|
||||||
|
is_feature_enabled "nvswitch" || {
|
||||||
|
echo "chroot: Skipping NVIDIA fabricmanager installation"
|
||||||
|
return
|
||||||
|
}
|
||||||
# if run_fm_file_name exists run it
|
# if run_fm_file_name exists run it
|
||||||
if [ -f /"${run_fm_file_name}" ]; then
|
if [ -f /"${run_fm_file_name}" ]; then
|
||||||
install_nvidia_fabricmanager_from_run_file
|
install_nvidia_fabricmanager_from_run_file
|
||||||
@ -52,6 +95,11 @@ install_nvidia_fabricmanager_from_distribution() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
build_nvidia_drivers() {
|
build_nvidia_drivers() {
|
||||||
|
is_feature_enabled "compute" || {
|
||||||
|
echo "chroot: Skipping NVIDIA drivers build"
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
echo "chroot: Build NVIDIA drivers"
|
echo "chroot: Build NVIDIA drivers"
|
||||||
pushd "${driver_source_files}" >> /dev/null
|
pushd "${driver_source_files}" >> /dev/null
|
||||||
|
|
||||||
@ -129,7 +177,7 @@ prepare_distribution_drivers() {
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
echo "chroot: Prepare NVIDIA distribution drivers"
|
echo "chroot: Prepare NVIDIA distribution drivers"
|
||||||
eval "${APT_INSTALL}" nvidia-headless-no-dkms-"${driver_version}-${driver_type}" \
|
eval "${APT_INSTALL}" nvidia-headless-no-dkms-"${driver_version}${driver_type}" \
|
||||||
libnvidia-cfg1-"${driver_version}" \
|
libnvidia-cfg1-"${driver_version}" \
|
||||||
nvidia-compute-utils-"${driver_version}" \
|
nvidia-compute-utils-"${driver_version}" \
|
||||||
nvidia-utils-"${driver_version}" \
|
nvidia-utils-"${driver_version}" \
|
||||||
@ -152,7 +200,7 @@ prepare_nvidia_drivers() {
|
|||||||
|
|
||||||
for source_dir in /NVIDIA-*; do
|
for source_dir in /NVIDIA-*; do
|
||||||
if [ -d "${source_dir}" ]; then
|
if [ -d "${source_dir}" ]; then
|
||||||
driver_source_files="${source_dir}"/kernel-${driver_type}
|
driver_source_files="${source_dir}"/kernel${driver_type}
|
||||||
driver_source_dir="${source_dir}"
|
driver_source_dir="${source_dir}"
|
||||||
break
|
break
|
||||||
fi
|
fi
|
||||||
@ -245,6 +293,11 @@ export_driver_version() {
|
|||||||
|
|
||||||
|
|
||||||
install_nvidia_dcgm() {
|
install_nvidia_dcgm() {
|
||||||
|
is_feature_enabled "dcgm" || {
|
||||||
|
echo "chroot: Skipping NVIDIA DCGM installation"
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb
|
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb
|
||||||
dpkg -i cuda-keyring_1.0-1_all.deb && rm -f cuda-keyring_1.0-1_all.deb
|
dpkg -i cuda-keyring_1.0-1_all.deb && rm -f cuda-keyring_1.0-1_all.deb
|
||||||
|
|
||||||
@ -292,11 +345,11 @@ cleanup_rootfs() {
|
|||||||
|
|
||||||
apt purge -yqq jq make gcc wget libc6-dev git xz-utils curl gpg \
|
apt purge -yqq jq make gcc wget libc6-dev git xz-utils curl gpg \
|
||||||
python3-pip software-properties-common ca-certificates \
|
python3-pip software-properties-common ca-certificates \
|
||||||
linux-libc-dev nuitka python3-minimal cuda-keyring
|
linux-libc-dev nuitka python3-minimal
|
||||||
|
|
||||||
if [ -n "${driver_version}" ]; then
|
if [ -n "${driver_version}" ]; then
|
||||||
apt purge -yqq nvidia-headless-no-dkms-"${driver_version}-${driver_type}" \
|
apt purge -yqq nvidia-headless-no-dkms-"${driver_version}${driver_type}" \
|
||||||
nvidia-kernel-source-"${driver_version}-${driver_type}" -yqq
|
nvidia-kernel-source-"${driver_version}${driver_type}" -yqq
|
||||||
fi
|
fi
|
||||||
|
|
||||||
apt autoremove -yqq
|
apt autoremove -yqq
|
||||||
@ -325,7 +378,7 @@ cleanup_rootfs() {
|
|||||||
# Start of script
|
# Start of script
|
||||||
echo "chroot: Setup NVIDIA GPU rootfs stage one"
|
echo "chroot: Setup NVIDIA GPU rootfs stage one"
|
||||||
|
|
||||||
|
set_driver_version_type
|
||||||
setup_apt_repositories
|
setup_apt_repositories
|
||||||
install_kernel_dependencies
|
install_kernel_dependencies
|
||||||
install_build_dependencies
|
install_build_dependencies
|
||||||
|
@ -17,6 +17,7 @@ DEBUG=""
|
|||||||
|
|
||||||
setup_nvidia-nvrc() {
|
setup_nvidia-nvrc() {
|
||||||
local TARGET="nvidia-nvrc"
|
local TARGET="nvidia-nvrc"
|
||||||
|
local TARGET_VERSION="main"
|
||||||
local PROJECT="nvrc"
|
local PROJECT="nvrc"
|
||||||
local TARGET_BUILD_DIR="${BUILD_DIR}/${TARGET}/builddir"
|
local TARGET_BUILD_DIR="${BUILD_DIR}/${TARGET}/builddir"
|
||||||
local TARGET_DEST_DIR="${BUILD_DIR}/${TARGET}/destdir"
|
local TARGET_DEST_DIR="${BUILD_DIR}/${TARGET}/destdir"
|
||||||
@ -45,6 +46,7 @@ setup_nvidia-nvrc() {
|
|||||||
|
|
||||||
setup_nvidia-gpu-admin-tools() {
|
setup_nvidia-gpu-admin-tools() {
|
||||||
local TARGET="nvidia-gpu-admin-tools"
|
local TARGET="nvidia-gpu-admin-tools"
|
||||||
|
local TARGET_VERSION="v2024.12.06"
|
||||||
local TARGET_GIT="https://github.com/NVIDIA/gpu-admin-tools"
|
local TARGET_GIT="https://github.com/NVIDIA/gpu-admin-tools"
|
||||||
local TARGET_BUILD_DIR="${BUILD_DIR}/${TARGET}/builddir"
|
local TARGET_BUILD_DIR="${BUILD_DIR}/${TARGET}/builddir"
|
||||||
local TARGET_DEST_DIR="${BUILD_DIR}/${TARGET}/destdir"
|
local TARGET_DEST_DIR="${BUILD_DIR}/${TARGET}/destdir"
|
||||||
@ -72,6 +74,7 @@ setup_nvidia-gpu-admin-tools() {
|
|||||||
|
|
||||||
setup_nvidia-dcgm-exporter() {
|
setup_nvidia-dcgm-exporter() {
|
||||||
local TARGET="nvidia-dcgm-exporter"
|
local TARGET="nvidia-dcgm-exporter"
|
||||||
|
local TARGET_VERSION="3.3.9-3.6.1"
|
||||||
local TARGET_BUILD_DIR="${BUILD_DIR}/${TARGET}/builddir"
|
local TARGET_BUILD_DIR="${BUILD_DIR}/${TARGET}/builddir"
|
||||||
local TARGET_DEST_DIR="${BUILD_DIR}/${TARGET}/destdir"
|
local TARGET_DEST_DIR="${BUILD_DIR}/${TARGET}/destdir"
|
||||||
local TARBALL="${BUILD_DIR}/kata-static-${TARGET}.tar.zst"
|
local TARBALL="${BUILD_DIR}/kata-static-${TARGET}.tar.zst"
|
||||||
@ -85,7 +88,7 @@ setup_nvidia-dcgm-exporter() {
|
|||||||
local dex="dcgm-exporter"
|
local dex="dcgm-exporter"
|
||||||
|
|
||||||
rm -rf "${dex}"
|
rm -rf "${dex}"
|
||||||
git clone https://github.com/NVIDIA/${dex}
|
git clone --branch "${TARGET_VERSION}" https://github.com/NVIDIA/${dex}
|
||||||
make -C ${dex} binary
|
make -C ${dex} binary
|
||||||
|
|
||||||
mkdir -p ../destdir/bin
|
mkdir -p ../destdir/bin
|
||||||
@ -151,14 +154,8 @@ setup_nvidia_gpu_rootfs_stage_one() {
|
|||||||
mount --make-rslave ./dev
|
mount --make-rslave ./dev
|
||||||
mount -t proc /proc ./proc
|
mount -t proc /proc ./proc
|
||||||
|
|
||||||
local driver_version="latest"
|
chroot . /bin/bash -c "/nvidia_chroot.sh $(uname -r) ${run_file_name} \
|
||||||
if echo "$NVIDIA_GPU_STACK" | grep -q '\<latest\>'; then
|
${run_fm_file_name} ${ARCH} ${NVIDIA_GPU_STACK}"
|
||||||
driver_version="latest"
|
|
||||||
elif echo "$NVIDIA_GPU_STACK" | grep -q '\<lts\>'; then
|
|
||||||
driver_version="lts"
|
|
||||||
fi
|
|
||||||
|
|
||||||
chroot . /bin/bash -c "/nvidia_chroot.sh $(uname -r) ${run_file_name} ${run_fm_file_name} ${ARCH} ${driver_version}"
|
|
||||||
|
|
||||||
umount -R ./dev
|
umount -R ./dev
|
||||||
umount ./proc
|
umount ./proc
|
||||||
|
@ -475,7 +475,10 @@ install_initrd_confidential() {
|
|||||||
# For all nvidia_gpu targets we can customize the stack that is enbled
|
# For all nvidia_gpu targets we can customize the stack that is enbled
|
||||||
# in the VM by setting the NVIDIA_GPU_STACK= environment variable
|
# in the VM by setting the NVIDIA_GPU_STACK= environment variable
|
||||||
#
|
#
|
||||||
# latest | lts -> use the latest and greatest driver or lts release
|
# latest | lts | version
|
||||||
|
# -> use the latest and greatest driver,
|
||||||
|
# lts release or e.g. version=550.127.1
|
||||||
|
# driver -> enable open or closed drivers
|
||||||
# debug -> enable debugging support
|
# debug -> enable debugging support
|
||||||
# compute -> enable the compute GPU stack, includes utility
|
# compute -> enable the compute GPU stack, includes utility
|
||||||
# graphics -> enable the graphics GPU stack, includes compute
|
# graphics -> enable the graphics GPU stack, includes compute
|
||||||
|
Loading…
Reference in New Issue
Block a user