mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-10-22 12:29:49 +00:00
NVRC introduced the confidential feature flag and we haven't updated the rootfs build to accomodate. If rootfs_type==confidential user --feature=confidential Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
369 lines
11 KiB
Bash
369 lines
11 KiB
Bash
#!/usr/bin/env bash
|
|
#
|
|
# Copyright (c) 2024 NVIDIA Corporation
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
#!/bin/bash
|
|
set -euo pipefail
|
|
|
|
shopt -s nullglob
|
|
shopt -s extglob
|
|
|
|
run_file_name=$2
|
|
run_fm_file_name=$3
|
|
arch_target=$4
|
|
nvidia_gpu_stack="$5"
|
|
driver_version=""
|
|
driver_type="-open"
|
|
supported_gpu_devids="/supported-gpu.devids"
|
|
base_os="noble"
|
|
|
|
APT_INSTALL="apt -o Dpkg::Options::='--force-confdef' -o Dpkg::Options::='--force-confold' -yqq --no-install-recommends install"
|
|
|
|
export KBUILD_SIGN_PIN="${6:-}"
|
|
|
|
export DEBIAN_FRONTEND=noninteractive
|
|
|
|
is_feature_enabled() {
|
|
local feature="$1"
|
|
# Check if feature is in the comma-separated list
|
|
if [[ ",${nvidia_gpu_stack}," == *",${feature},"* ]]; then
|
|
return 0
|
|
else
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
set_driver_version_type() {
|
|
echo "chroot: Setting the correct driver version"
|
|
|
|
if [[ ",${nvidia_gpu_stack}," == *",latest,"* ]]; then
|
|
driver_version="latest"
|
|
elif [[ ",${nvidia_gpu_stack}," == *",lts,"* ]]; then
|
|
driver_version="lts"
|
|
elif [[ "${nvidia_gpu_stack}" =~ version=([^,]+) ]]; then
|
|
driver_version="${BASH_REMATCH[1]}"
|
|
else
|
|
echo "No known driver spec found. Please specify \"latest\", \"lts\", or \"version=<VERSION>\"."
|
|
exit 1
|
|
fi
|
|
|
|
echo "chroot: driver_version: ${driver_version}"
|
|
|
|
echo "chroot: Setting the correct driver type"
|
|
|
|
# driver -> enable open or closed drivers
|
|
if [[ "${nvidia_gpu_stack}" =~ (^|,)driver=open($|,) ]]; then
|
|
driver_type="-open"
|
|
elif [[ "${nvidia_gpu_stack}" =~ (^|,)driver=closed($|,) ]]; then
|
|
driver_type=""
|
|
fi
|
|
|
|
echo "chroot: driver_type: ${driver_type}"
|
|
}
|
|
|
|
install_nvidia_ctk() {
|
|
echo "chroot: Installing NVIDIA GPU container runtime"
|
|
apt list nvidia-container-toolkit-base -a
|
|
# Base gives a nvidia-ctk and the nvidia-container-runtime
|
|
eval "${APT_INSTALL}" nvidia-container-toolkit-base=1.17.6-1
|
|
}
|
|
|
|
install_nvidia_fabricmanager() {
|
|
is_feature_enabled "nvswitch" || {
|
|
echo "chroot: Skipping NVIDIA fabricmanager installation"
|
|
return
|
|
}
|
|
# if run_fm_file_name exists run it
|
|
if [[ -f /"${run_fm_file_name}" ]]; then
|
|
install_nvidia_fabricmanager_from_run_file
|
|
else
|
|
install_nvidia_fabricmanager_from_distribution
|
|
fi
|
|
}
|
|
|
|
install_nvidia_fabricmanager_from_run_file() {
|
|
echo "chroot: Install NVIDIA fabricmanager from run file"
|
|
pushd / >> /dev/null
|
|
chmod +x "${run_fm_file_name}"
|
|
./"${run_fm_file_name}" --nox11
|
|
popd >> /dev/null
|
|
}
|
|
|
|
install_nvidia_fabricmanager_from_distribution() {
|
|
echo "chroot: Install NVIDIA fabricmanager from distribution"
|
|
eval "${APT_INSTALL}" nvidia-fabricmanager-"${driver_version}" libnvidia-nscq-"${driver_version}"
|
|
apt-mark hold nvidia-fabricmanager-"${driver_version}" libnvidia-nscq-"${driver_version}"
|
|
}
|
|
|
|
build_nvidia_drivers() {
|
|
is_feature_enabled "compute" || {
|
|
echo "chroot: Skipping NVIDIA drivers build"
|
|
return
|
|
}
|
|
|
|
echo "chroot: Build NVIDIA drivers"
|
|
pushd "${driver_source_files}" >> /dev/null
|
|
|
|
local certs_dir
|
|
local kernel_version
|
|
local ARCH
|
|
for version in /lib/modules/*; do
|
|
kernel_version=$(basename "${version}")
|
|
certs_dir=/lib/modules/"${kernel_version}"/build/certs
|
|
signing_key=${certs_dir}/signing_key.pem
|
|
|
|
echo "chroot: Building GPU modules for: ${kernel_version}"
|
|
cp /boot/System.map-"${kernel_version}" /lib/modules/"${kernel_version}"/build/System.map
|
|
|
|
if [[ "${arch_target}" == "aarch64" ]]; then
|
|
ln -sf /lib/modules/"${kernel_version}"/build/arch/arm64 /lib/modules/"${kernel_version}"/build/arch/aarch64
|
|
ARCH=arm64
|
|
fi
|
|
|
|
if [[ "${arch_target}" == "x86_64" ]]; then
|
|
ln -sf /lib/modules/"${kernel_version}"/build/arch/x86 /lib/modules/"${kernel_version}"/build/arch/amd64
|
|
ARCH=x86_64
|
|
fi
|
|
|
|
echo "chroot: Building GPU modules for: ${kernel_version} ${ARCH}"
|
|
|
|
make -j "$(nproc)" CC=gcc SYSSRC=/lib/modules/"${kernel_version}"/build > /dev/null
|
|
|
|
if [[ -n "${KBUILD_SIGN_PIN}" ]]; then
|
|
mkdir -p "${certs_dir}" && mv /signing_key.* "${certs_dir}"/.
|
|
fi
|
|
|
|
make INSTALL_MOD_STRIP=1 -j "$(nproc)" CC=gcc SYSSRC=/lib/modules/"${kernel_version}"/build modules_install
|
|
make -j "$(nproc)" CC=gcc SYSSRC=/lib/modules/"${kernel_version}"/build clean > /dev/null
|
|
# The make clean above should clear also the certs directory but just in case something
|
|
# went wroing make sure the signing_key.pem is removed
|
|
[[ -e "${signing_key}" ]] && rm -f "${signing_key}"
|
|
done
|
|
popd >> /dev/null
|
|
}
|
|
|
|
install_userspace_components() {
|
|
if [[ ! -f /"${run_file_name}" ]]; then
|
|
echo "chroot: Skipping NVIDIA userspace runfile components installation"
|
|
return
|
|
fi
|
|
|
|
pushd /NVIDIA-* >> /dev/null
|
|
# if aarch64 we need to remove --no-install-compat32-libs
|
|
if [[ "${arch_target}" == "aarch64" ]]; then
|
|
./nvidia-installer --no-kernel-modules --no-systemd --no-nvidia-modprobe -s --x-prefix=/root
|
|
else
|
|
./nvidia-installer --no-kernel-modules --no-systemd --no-nvidia-modprobe -s --x-prefix=/root --no-install-compat32-libs
|
|
fi
|
|
popd >> /dev/null
|
|
|
|
}
|
|
|
|
prepare_run_file_drivers() {
|
|
if [[ "${driver_version}" == "latest" ]]; then
|
|
driver_version=""
|
|
echo "chroot: Resetting driver version not supported with run-file"
|
|
elif [[ "${driver_version}" == "lts" ]]; then
|
|
driver_version=""
|
|
echo "chroot: Resetting driver version not supported with run-file"
|
|
fi
|
|
|
|
echo "chroot: Prepare NVIDIA run file drivers"
|
|
pushd / >> /dev/null
|
|
chmod +x "${run_file_name}"
|
|
./"${run_file_name}" -x
|
|
|
|
mkdir -p /usr/share/nvidia/rim/
|
|
|
|
# Sooner or later RIM files will be only available remotely
|
|
RIMFILE=$(ls NVIDIA-*/RIM_GH100PROD.swidtag)
|
|
if [[ -e "${RIMFILE}" ]]; then
|
|
cp NVIDIA-*/RIM_GH100PROD.swidtag /usr/share/nvidia/rim/.
|
|
fi
|
|
popd >> /dev/null
|
|
}
|
|
|
|
prepare_distribution_drivers() {
|
|
if [[ "${driver_version}" == "latest" ]]; then
|
|
driver_version=$(apt-cache search --names-only 'nvidia-headless-no-dkms-.?.?.?-open' | sort | awk '{ print $1 }' | tail -n 1 | cut -d'-' -f5)
|
|
elif [[ "${driver_version}" == "lts" ]]; then
|
|
driver_version="550"
|
|
fi
|
|
|
|
echo "chroot: Prepare NVIDIA distribution drivers"
|
|
|
|
eval "${APT_INSTALL}" nvidia-utils-"${driver_version}"
|
|
|
|
eval "${APT_INSTALL}" nvidia-headless-no-dkms-"${driver_version}${driver_type}" \
|
|
nvidia-firmware-"${driver_version}" \
|
|
nvidia-imex-"${driver_version}" \
|
|
libnvidia-cfg1-"${driver_version}" \
|
|
libnvidia-gl-"${driver_version}" \
|
|
libnvidia-extra-"${driver_version}" \
|
|
libnvidia-decode-"${driver_version}" \
|
|
libnvidia-fbc1-"${driver_version}" \
|
|
libnvidia-encode-"${driver_version}"
|
|
}
|
|
|
|
prepare_nvidia_drivers() {
|
|
local driver_source_dir=""
|
|
|
|
if [[ -f /"${run_file_name}" ]]; then
|
|
prepare_run_file_drivers
|
|
|
|
for source_dir in /NVIDIA-*; do
|
|
if [[ -d "${source_dir}" ]]; then
|
|
driver_source_files="${source_dir}"/kernel${driver_type}
|
|
driver_source_dir="${source_dir}"
|
|
break
|
|
fi
|
|
done
|
|
get_supported_gpus_from_run_file "${driver_source_dir}"
|
|
|
|
else
|
|
prepare_distribution_drivers
|
|
|
|
for source_dir in /usr/src/nvidia*; do
|
|
if [[ -d "${source_dir}" ]]; then
|
|
driver_source_files="${source_dir}"
|
|
driver_source_dir="${source_dir}"
|
|
break
|
|
fi
|
|
done
|
|
get_supported_gpus_from_distro_drivers "${driver_source_dir}"
|
|
fi
|
|
|
|
}
|
|
|
|
install_build_dependencies() {
|
|
echo "chroot: Install NVIDIA drivers build dependencies"
|
|
eval "${APT_INSTALL}" make gcc gawk kmod libvulkan1 pciutils jq zstd linuxptp xz-utils
|
|
}
|
|
|
|
setup_apt_repositories() {
|
|
echo "chroot: Setup APT repositories"
|
|
mkdir -p /var/cache/apt/archives/partial
|
|
mkdir -p /var/log/apt
|
|
mkdir -p /var/lib/dpkg/info
|
|
mkdir -p /var/lib/dpkg/updates
|
|
mkdir -p /var/lib/dpkg/alternatives
|
|
mkdir -p /var/lib/dpkg/triggers
|
|
mkdir -p /var/lib/dpkg/parts
|
|
touch /var/lib/dpkg/status
|
|
rm -f /etc/apt/sources.list.d/*
|
|
|
|
if [[ "${arch_target}" == "x86_64" ]]; then
|
|
cat <<-CHROOT_EOF > /etc/apt/sources.list.d/"${base_os}".list
|
|
deb [arch=amd64] http://us.archive.ubuntu.com/ubuntu ${base_os} main restricted universe multiverse
|
|
deb [arch=amd64] http://us.archive.ubuntu.com/ubuntu ${base_os}-updates main restricted universe multiverse
|
|
deb [arch=amd64] http://us.archive.ubuntu.com/ubuntu ${base_os}-security main restricted universe multiverse
|
|
deb [arch=amd64] http://us.archive.ubuntu.com/ubuntu ${base_os}-backports main restricted universe multiverse
|
|
CHROOT_EOF
|
|
fi
|
|
|
|
if [[ "${arch_target}" == "aarch64" ]]; then
|
|
cat <<-CHROOT_EOF > /etc/apt/sources.list.d/"${base_os}".list
|
|
deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports ${base_os} main restricted universe multiverse
|
|
deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports ${base_os}-updates main restricted universe multiverse
|
|
deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports ${base_os}-security main restricted universe multiverse
|
|
deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports ${base_os}-backports main restricted universe multiverse
|
|
CHROOT_EOF
|
|
fi
|
|
|
|
local arch="${arch_target}"
|
|
[[ ${arch_target} == "aarch64" ]] && arch="sbsa"
|
|
# shellcheck disable=SC2015
|
|
[[ ${base_os} == "noble" ]] && osver="ubuntu2404" || die "Unknown base_os ${base_os} used"
|
|
|
|
keyring="cuda-keyring_1.1-1_all.deb"
|
|
curl -O "https://developer.download.nvidia.com/compute/cuda/repos/${osver}/${arch}/${keyring}"
|
|
dpkg -i "${keyring}" && rm -f "${keyring}"
|
|
|
|
# Set repository priorities, prefere NVIDIA repositories over Ubuntu ones
|
|
cat <<-CHROOT_EOF > /etc/apt/preferences.d/nvidia-priority
|
|
# Prioritize NVIDIA CUDA repository
|
|
Package: *
|
|
Pin: origin developer.download.nvidia.com
|
|
Pin-Priority: 1000
|
|
|
|
# Prioritize NVIDIA Container Toolkit repository
|
|
Package: *
|
|
Pin: origin nvidia.github.io
|
|
Pin-Priority: 950
|
|
|
|
# Lower priority for Ubuntu repositories
|
|
Package: *
|
|
Pin: origin us.archive.ubuntu.com
|
|
Pin-Priority: 500
|
|
|
|
Package: *
|
|
Pin: origin ports.ubuntu.com
|
|
Pin-Priority: 500
|
|
CHROOT_EOF
|
|
|
|
apt update
|
|
}
|
|
|
|
install_kernel_dependencies() {
|
|
dpkg -i /linux-*deb
|
|
}
|
|
|
|
get_supported_gpus_from_run_file() {
|
|
local source_dir="$1"
|
|
local supported_gpus_json="${source_dir}"/supported-gpus/supported-gpus.json
|
|
|
|
jq . < "${supported_gpus_json}" | grep '"devid"' | awk '{ print $2 }' | tr -d ',"' > "${supported_gpu_devids}"
|
|
}
|
|
|
|
get_supported_gpus_from_distro_drivers() {
|
|
local supported_gpus_json=./usr/share/doc/nvidia-driver-"${driver_version}"/supported-gpus.json
|
|
|
|
mkdir _tmp
|
|
pushd _tmp >> /dev/null
|
|
|
|
apt download nvidia-driver-"${driver_version}"
|
|
ar -x nvidia-driver-"${driver_version}"*.deb
|
|
tar -xvf data.tar.xz
|
|
|
|
jq . < "${supported_gpus_json}" | grep '"devid"' | awk '{ print $2 }' | tr -d ',"' > "${supported_gpu_devids}"
|
|
|
|
popd >> /dev/null
|
|
rm -rf _tmp
|
|
}
|
|
|
|
export_driver_version() {
|
|
for modules_version in /lib/modules/*; do
|
|
modinfo "${modules_version}"/kernel/drivers/video/nvidia.ko | grep ^version | awk '{ print $2 }' > /nvidia_driver_version
|
|
break
|
|
done
|
|
}
|
|
|
|
install_nvidia_dcgm() {
|
|
is_feature_enabled "dcgm" || {
|
|
echo "chroot: Skipping NVIDIA DCGM installation"
|
|
return
|
|
}
|
|
|
|
echo "chroot: Install NVIDIA DCGM"
|
|
|
|
eval "${APT_INSTALL}" datacenter-gpu-manager \
|
|
datacenter-gpu-manager-exporter
|
|
}
|
|
|
|
# Start of script
|
|
echo "chroot: Setup NVIDIA GPU rootfs stage one"
|
|
|
|
set_driver_version_type
|
|
setup_apt_repositories
|
|
install_kernel_dependencies
|
|
install_build_dependencies
|
|
prepare_nvidia_drivers
|
|
build_nvidia_drivers
|
|
install_userspace_components
|
|
install_nvidia_fabricmanager
|
|
install_nvidia_ctk
|
|
export_driver_version
|
|
install_nvidia_dcgm
|