mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-04-27 19:35:32 +00:00
Merge pull request #10464 from zvonkok/nvidia-gpu-rootfs
gpu: NVIDIA GPU initrd/image build
This commit is contained in:
commit
c3d1b3c5e3
339
tools/osbuilder/rootfs-builder/nvidia/nvidia_chroot.sh
Normal file
339
tools/osbuilder/rootfs-builder/nvidia/nvidia_chroot.sh
Normal file
@ -0,0 +1,339 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# Copyright (c) 2024 NVIDIA Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#!/bin/bash
|
||||
set -xe
|
||||
|
||||
shopt -s nullglob
|
||||
shopt -s extglob
|
||||
|
||||
run_file_name=$2
|
||||
run_fm_file_name=$3
|
||||
arch_target=$4
|
||||
driver_version="$5"
|
||||
driver_type="open"
|
||||
supported_gpu_devids="/supported-gpu.devids"
|
||||
|
||||
APT_INSTALL="apt -o Dpkg::Options::='--force-confdef' -o Dpkg::Options::='--force-confold' -yqq --no-install-recommends install"
|
||||
|
||||
export DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
install_nvidia_ctk() {
|
||||
echo "chroot: Installing NVIDIA GPU container runtime"
|
||||
apt list nvidia-container-toolkit-base -a
|
||||
# Base gives a nvidia-ctk and the nvidia-container-runtime
|
||||
eval "${APT_INSTALL}" nvidia-container-toolkit-base
|
||||
}
|
||||
|
||||
install_nvidia_fabricmanager() {
|
||||
# if run_fm_file_name exists run it
|
||||
if [ -f /"${run_fm_file_name}" ]; then
|
||||
install_nvidia_fabricmanager_from_run_file
|
||||
else
|
||||
install_nvidia_fabricmanager_from_distribution
|
||||
fi
|
||||
}
|
||||
|
||||
install_nvidia_fabricmanager_from_run_file() {
|
||||
echo "chroot: Install NVIDIA fabricmanager from run file"
|
||||
pushd / >> /dev/null
|
||||
chmod +x "${run_fm_file_name}"
|
||||
./"${run_fm_file_name}" --nox11
|
||||
popd >> /dev/null
|
||||
}
|
||||
|
||||
install_nvidia_fabricmanager_from_distribution() {
|
||||
echo "chroot: Install NVIDIA fabricmanager from distribution"
|
||||
eval "${APT_INSTALL}" nvidia-fabricmanager-"${driver_version}" libnvidia-nscq-"${driver_version}"
|
||||
apt-mark hold nvidia-fabricmanager-"${driver_version}" libnvidia-nscq-"${driver_version}"
|
||||
}
|
||||
|
||||
build_nvidia_drivers() {
|
||||
echo "chroot: Build NVIDIA drivers"
|
||||
pushd "${driver_source_files}" >> /dev/null
|
||||
|
||||
local kernel_version
|
||||
for version in /lib/modules/*; do
|
||||
kernel_version=$(basename "${version}")
|
||||
echo "chroot: Building GPU modules for: ${kernel_version}"
|
||||
cp /boot/System.map-"${kernel_version}" /lib/modules/"${kernel_version}"/build/System.map
|
||||
|
||||
if [ "${arch_target}" == "aarch64" ]; then
|
||||
ln -sf /lib/modules/"${kernel_version}"/build/arch/arm64 /lib/modules/"${kernel_version}"/build/arch/aarch64
|
||||
fi
|
||||
|
||||
if [ "${arch_target}" == "x86_64" ]; then
|
||||
ln -sf /lib/modules/"${kernel_version}"/build/arch/x86 /lib/modules/"${kernel_version}"/build/arch/amd64
|
||||
fi
|
||||
|
||||
make -j "$(nproc)" CC=gcc SYSSRC=/lib/modules/"${kernel_version}"/build > /dev/null
|
||||
make INSTALL_MOD_STRIP=1 -j "$(nproc)" CC=gcc SYSSRC=/lib/modules/"${kernel_version}"/build modules_install
|
||||
make -j "$(nproc)" CC=gcc SYSSRC=/lib/modules/"${kernel_version}"/build clean > /dev/null
|
||||
|
||||
done
|
||||
# Save the modules for later so that a linux-image purge does not remove it
|
||||
tar cvfa /lib/modules.save_from_purge.tar.zst /lib/modules
|
||||
popd >> /dev/null
|
||||
}
|
||||
|
||||
install_userspace_components() {
|
||||
if [ ! -f /"${run_file_name}" ]; then
|
||||
echo "chroot: Skipping NVIDIA userspace runfile components installation"
|
||||
return
|
||||
fi
|
||||
|
||||
pushd /NVIDIA-* >> /dev/null
|
||||
# if aarch64 we need to remove --no-install-compat32-libs
|
||||
if [ "${arch_target}" == "aarch64" ]; then
|
||||
./nvidia-installer --no-kernel-modules --no-systemd --no-nvidia-modprobe -s --x-prefix=/root
|
||||
else
|
||||
./nvidia-installer --no-kernel-modules --no-systemd --no-nvidia-modprobe -s --x-prefix=/root --no-install-compat32-libs
|
||||
fi
|
||||
popd >> /dev/null
|
||||
|
||||
}
|
||||
|
||||
prepare_run_file_drivers() {
|
||||
if [ "${driver_version}" == "latest" ]; then
|
||||
driver_version=""
|
||||
echo "chroot: Resetting driver version not supported with run-file"
|
||||
elif [ "${driver_version}" == "lts" ]; then
|
||||
driver_version=""
|
||||
echo "chroot: Resetting driver version not supported with run-file"
|
||||
fi
|
||||
|
||||
|
||||
echo "chroot: Prepare NVIDIA run file drivers"
|
||||
pushd / >> /dev/null
|
||||
chmod +x "${run_file_name}"
|
||||
./"${run_file_name}" -x
|
||||
|
||||
mkdir -p /usr/share/nvidia/rim/
|
||||
|
||||
# Sooner or later RIM files will be only available remotely
|
||||
RIMFILE=$(ls NVIDIA-*/RIM_GH100PROD.swidtag)
|
||||
if [ -e "${RIMFILE}" ]; then
|
||||
cp NVIDIA-*/RIM_GH100PROD.swidtag /usr/share/nvidia/rim/.
|
||||
fi
|
||||
popd >> /dev/null
|
||||
}
|
||||
|
||||
prepare_distribution_drivers() {
|
||||
if [ "${driver_version}" == "latest" ]; then
|
||||
driver_version=$(apt-cache search --names-only 'nvidia-headless-no-dkms-.?.?.?-open' | awk '{ print $1 }' | tail -n 1 | cut -d'-' -f5)
|
||||
elif [ "${driver_version}" == "lts" ]; then
|
||||
driver_version="550"
|
||||
fi
|
||||
|
||||
echo "chroot: Prepare NVIDIA distribution drivers"
|
||||
eval "${APT_INSTALL}" nvidia-headless-no-dkms-"${driver_version}-${driver_type}" \
|
||||
libnvidia-cfg1-"${driver_version}" \
|
||||
nvidia-compute-utils-"${driver_version}" \
|
||||
nvidia-utils-"${driver_version}" \
|
||||
nvidia-kernel-common-"${driver_version}" \
|
||||
nvidia-imex-"${driver_version}" \
|
||||
libnvidia-compute-"${driver_version}" \
|
||||
libnvidia-compute-"${driver_version}" \
|
||||
libnvidia-gl-"${driver_version}" \
|
||||
libnvidia-extra-"${driver_version}" \
|
||||
libnvidia-decode-"${driver_version}" \
|
||||
libnvidia-fbc1-"${driver_version}" \
|
||||
libnvidia-encode-"${driver_version}"
|
||||
}
|
||||
|
||||
prepare_nvidia_drivers() {
|
||||
local driver_source_dir=""
|
||||
|
||||
if [ -f /"${run_file_name}" ]; then
|
||||
prepare_run_file_drivers
|
||||
|
||||
for source_dir in /NVIDIA-*; do
|
||||
if [ -d "${source_dir}" ]; then
|
||||
driver_source_files="${source_dir}"/kernel-${driver_type}
|
||||
driver_source_dir="${source_dir}"
|
||||
break
|
||||
fi
|
||||
done
|
||||
get_supported_gpus_from_run_file "${driver_source_dir}"
|
||||
|
||||
else
|
||||
prepare_distribution_drivers
|
||||
|
||||
for source_dir in /usr/src/nvidia*; do
|
||||
if [ -d "${source_dir}" ]; then
|
||||
driver_source_files="${source_dir}"
|
||||
driver_source_dir="${source_dir}"
|
||||
break
|
||||
fi
|
||||
done
|
||||
get_supported_gpus_from_distro_drivers "${driver_source_dir}"
|
||||
fi
|
||||
|
||||
}
|
||||
|
||||
install_build_dependencies() {
|
||||
echo "chroot: Install NVIDIA drivers build dependencies"
|
||||
eval "${APT_INSTALL}" make gcc gawk kmod libvulkan1 pciutils jq zstd linuxptp
|
||||
}
|
||||
|
||||
setup_apt_repositories() {
|
||||
echo "chroot: Setup APT repositories"
|
||||
mkdir -p /var/cache/apt/archives/partial
|
||||
mkdir -p /var/log/apt
|
||||
mkdir -p /var/lib/dpkg/info
|
||||
mkdir -p /var/lib/dpkg/updates
|
||||
mkdir -p /var/lib/dpkg/alternatives
|
||||
mkdir -p /var/lib/dpkg/triggers
|
||||
mkdir -p /var/lib/dpkg/parts
|
||||
touch /var/lib/dpkg/status
|
||||
rm -f /etc/apt/sources.list.d/*
|
||||
|
||||
if [ "${arch_target}" == "aarch64" ]; then
|
||||
cat <<-'CHROOT_EOF' > /etc/apt/sources.list.d/jammy.list
|
||||
deb http://ports.ubuntu.com/ubuntu-ports/ jammy main restricted universe multiverse
|
||||
deb http://ports.ubuntu.com/ubuntu-ports/ jammy-updates main restricted universe multiverse
|
||||
deb http://ports.ubuntu.com/ubuntu-ports/ jammy-security main restricted universe multiverse
|
||||
deb http://ports.ubuntu.com/ubuntu-ports/ jammy-backports main restricted universe multiverse
|
||||
CHROOT_EOF
|
||||
else
|
||||
cat <<-'CHROOT_EOF' > /etc/apt/sources.list.d/noble.list
|
||||
deb http://us.archive.ubuntu.com/ubuntu/ jammy main restricted universe multiverse
|
||||
deb http://us.archive.ubuntu.com/ubuntu/ jammy-updates main restricted universe multiverse
|
||||
deb http://us.archive.ubuntu.com/ubuntu/ jammy-security main restricted universe multiverse
|
||||
deb http://us.archive.ubuntu.com/ubuntu/ jammy-backports main restricted universe multiverse
|
||||
CHROOT_EOF
|
||||
fi
|
||||
|
||||
apt update
|
||||
|
||||
eval "${APT_INSTALL}" curl gpg ca-certificates
|
||||
|
||||
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
|
||||
curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list |
|
||||
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' |
|
||||
tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
|
||||
|
||||
apt update
|
||||
}
|
||||
|
||||
install_kernel_dependencies() {
|
||||
dpkg -i /linux-*deb
|
||||
}
|
||||
|
||||
get_supported_gpus_from_run_file() {
|
||||
local source_dir="$1"
|
||||
local supported_gpus_json="${source_dir}"/supported-gpus/supported-gpus.json
|
||||
|
||||
jq . < "${supported_gpus_json}" | grep '"devid"' | awk '{ print $2 }' | tr -d ',"' > ${supported_gpu_devids}
|
||||
}
|
||||
|
||||
get_supported_gpus_from_distro_drivers() {
|
||||
local supported_gpus_json=/usr/share/doc/nvidia-kernel-common-"${driver_version}"/supported-gpus.json
|
||||
|
||||
jq . < "${supported_gpus_json}" | grep '"devid"' | awk '{ print $2 }' | tr -d ',"' > ${supported_gpu_devids}
|
||||
}
|
||||
|
||||
export_driver_version() {
|
||||
for modules_version in /lib/modules/*; do
|
||||
modinfo "${modules_version}"/kernel/drivers/video/nvidia.ko | grep ^version | awk '{ print $2 }' > /nvidia_driver_version
|
||||
break
|
||||
done
|
||||
}
|
||||
|
||||
|
||||
install_nvidia_dcgm() {
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb
|
||||
dpkg -i cuda-keyring_1.0-1_all.deb && rm -f cuda-keyring_1.0-1_all.deb
|
||||
|
||||
if [ "${arch_target}" == "aarch64" ]; then
|
||||
cat <<-'CHROOT_EOF' > /etc/apt/sources.list.d/cuda.list
|
||||
deb [signed-by=/usr/share/keyrings/cuda-archive-keyring.gpg] https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/ /
|
||||
CHROOT_EOF
|
||||
else
|
||||
cat <<-'CHROOT_EOF' > /etc/apt/sources.list.d/cuda.list
|
||||
deb [signed-by=/usr/share/keyrings/cuda-archive-keyring.gpg] https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/ /
|
||||
CHROOT_EOF
|
||||
fi
|
||||
apt update
|
||||
eval "${APT_INSTALL}" datacenter-gpu-manager
|
||||
}
|
||||
|
||||
cleanup_rootfs() {
|
||||
echo "chroot: Cleanup NVIDIA GPU rootfs"
|
||||
|
||||
apt-mark hold libstdc++6 libzstd1 libgnutls30 pciutils
|
||||
# noble=libgnutls30t64
|
||||
|
||||
if [ -n "${driver_version}" ]; then
|
||||
apt-mark hold libnvidia-cfg1-"${driver_version}" \
|
||||
nvidia-compute-utils-"${driver_version}" \
|
||||
nvidia-utils-"${driver_version}" \
|
||||
nvidia-kernel-common-"${driver_version}" \
|
||||
nvidia-imex-"${driver_version}" \
|
||||
libnvidia-compute-"${driver_version}" \
|
||||
libnvidia-compute-"${driver_version}" \
|
||||
libnvidia-gl-"${driver_version}" \
|
||||
libnvidia-extra-"${driver_version}" \
|
||||
libnvidia-decode-"${driver_version}" \
|
||||
libnvidia-fbc1-"${driver_version}" \
|
||||
libnvidia-encode-"${driver_version}" \
|
||||
libnvidia-nscq-"${driver_version}" \
|
||||
linuxptp libnftnl11
|
||||
fi
|
||||
|
||||
kernel_headers=$(dpkg --get-selections | cut -f1 | grep linux-headers)
|
||||
linux_images=$(dpkg --get-selections | cut -f1 | grep linux-image)
|
||||
for i in ${kernel_headers} ${linux_images}; do
|
||||
apt purge -yqq "${i}"
|
||||
done
|
||||
|
||||
apt purge -yqq jq make gcc wget libc6-dev git xz-utils curl gpg \
|
||||
python3-pip software-properties-common ca-certificates \
|
||||
linux-libc-dev nuitka python3-minimal cuda-keyring
|
||||
|
||||
if [ -n "${driver_version}" ]; then
|
||||
apt purge -yqq nvidia-headless-no-dkms-"${driver_version}-${driver_type}" \
|
||||
nvidia-kernel-source-"${driver_version}-${driver_type}" -yqq
|
||||
fi
|
||||
|
||||
apt autoremove -yqq
|
||||
|
||||
apt clean
|
||||
apt autoclean
|
||||
|
||||
for modules_version in /lib/modules/*; do
|
||||
ln -sf "${modules_version}" /lib/modules/"$(uname -r)"
|
||||
touch "${modules_version}"/modules.order
|
||||
touch "${modules_version}"/modules.builtin
|
||||
depmod -a
|
||||
done
|
||||
|
||||
rm -rf /etc/apt/sources.list* /var/lib/apt /var/log/apt /var/cache/debconf
|
||||
rm -f /usr/bin/nvidia-ngx-updater /usr/bin/nvidia-container-runtime
|
||||
rm -f /var/log/{nvidia-installer.log,dpkg.log,alternatives.log}
|
||||
|
||||
# Clear and regenerate the ld cache
|
||||
rm -f /etc/ld.so.cache
|
||||
ldconfig
|
||||
|
||||
tar xvf /lib/modules.save_from_purge.tar.zst -C /
|
||||
|
||||
}
|
||||
# Start of script
|
||||
echo "chroot: Setup NVIDIA GPU rootfs stage one"
|
||||
|
||||
|
||||
setup_apt_repositories
|
||||
install_kernel_dependencies
|
||||
install_build_dependencies
|
||||
prepare_nvidia_drivers
|
||||
build_nvidia_drivers
|
||||
install_userspace_components
|
||||
install_nvidia_fabricmanager
|
||||
install_nvidia_ctk
|
||||
export_driver_version
|
||||
install_nvidia_dcgm
|
||||
cleanup_rootfs
|
348
tools/osbuilder/rootfs-builder/nvidia/nvidia_rootfs.sh
Normal file
348
tools/osbuilder/rootfs-builder/nvidia/nvidia_rootfs.sh
Normal file
@ -0,0 +1,348 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# Copyright (c) 2024 NVIDIA Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -e
|
||||
set -x
|
||||
|
||||
readonly BUILD_DIR="/kata-containers/tools/packaging/kata-deploy/local-build/build/"
|
||||
# catch errors and then assign
|
||||
script_dir="$(dirname "$(readlink -f "$0")")"
|
||||
readonly SCRIPT_DIR="${script_dir}/nvidia"
|
||||
|
||||
# This will control how much output the inird/image will produce
|
||||
DEBUG=""
|
||||
|
||||
setup_nvidia-nvrc() {
|
||||
local TARGET="nvidia-nvrc"
|
||||
local PROJECT="nvrc"
|
||||
local TARGET_BUILD_DIR="${BUILD_DIR}/${TARGET}/builddir"
|
||||
local TARGET_DEST_DIR="${BUILD_DIR}/${TARGET}/destdir"
|
||||
local TARBALL="${BUILD_DIR}/kata-static-${TARGET}.tar.zst"
|
||||
|
||||
mkdir -p "${TARGET_BUILD_DIR}"
|
||||
mkdir -p "${TARGET_DEST_DIR}/bin"
|
||||
|
||||
pushd "${TARGET_BUILD_DIR}" > /dev/null || exit 1
|
||||
|
||||
rm -rf "${PROJECT}"
|
||||
git clone https://github.com/NVIDIA/${PROJECT}.git
|
||||
|
||||
pushd "${PROJECT}" > /dev/null || exit 1
|
||||
|
||||
cargo build --release --target=x86_64-unknown-linux-musl
|
||||
cp target/x86_64-unknown-linux-musl/release/NVRC ../../destdir/bin/.
|
||||
|
||||
popd > /dev/null || exit 1
|
||||
|
||||
tar cvfa "${TARBALL}" -C ../destdir .
|
||||
tar tvf "${TARBALL}"
|
||||
|
||||
popd > /dev/null || exit 1
|
||||
}
|
||||
|
||||
setup_nvidia-gpu-admin-tools() {
|
||||
local TARGET="nvidia-gpu-admin-tools"
|
||||
local TARGET_GIT="https://github.com/NVIDIA/gpu-admin-tools"
|
||||
local TARGET_BUILD_DIR="${BUILD_DIR}/${TARGET}/builddir"
|
||||
local TARGET_DEST_DIR="${BUILD_DIR}/${TARGET}/destdir"
|
||||
local TARBALL="${BUILD_DIR}/kata-static-${TARGET}.tar.zst"
|
||||
|
||||
mkdir -p "${TARGET_BUILD_DIR}"
|
||||
mkdir -p "${TARGET_DEST_DIR}/sbin"
|
||||
|
||||
pushd "${TARGET_BUILD_DIR}" > /dev/null || exit 1
|
||||
|
||||
rm -rf "$(basename ${TARGET_GIT})"
|
||||
git clone ${TARGET_GIT}
|
||||
|
||||
rm -rf dist
|
||||
# Installed via pipx local python environment
|
||||
"${HOME}"/local/bin/pyinstaller -s -F gpu-admin-tools/nvidia_gpu_tools.py
|
||||
|
||||
cp dist/nvidia_gpu_tools ../destdir/sbin/.
|
||||
|
||||
tar cvfa "${TARBALL}" -C ../destdir .
|
||||
tar tvf "${TARBALL}"
|
||||
|
||||
popd > /dev/null || exit 1
|
||||
}
|
||||
|
||||
setup_nvidia-dcgm-exporter() {
|
||||
local TARGET="nvidia-dcgm-exporter"
|
||||
local TARGET_BUILD_DIR="${BUILD_DIR}/${TARGET}/builddir"
|
||||
local TARGET_DEST_DIR="${BUILD_DIR}/${TARGET}/destdir"
|
||||
local TARBALL="${BUILD_DIR}/kata-static-${TARGET}.tar.zst"
|
||||
|
||||
mkdir -p "${TARGET_BUILD_DIR}"
|
||||
mkdir -p "${TARGET_DEST_DIR}/bin"
|
||||
mkdir -p "${TARGET_DEST_DIR}/etc"
|
||||
|
||||
pushd "${TARGET_BUILD_DIR}" > /dev/null || exit 1
|
||||
|
||||
local dex="dcgm-exporter"
|
||||
|
||||
rm -rf "${dex}"
|
||||
git clone https://github.com/NVIDIA/${dex}
|
||||
make -C ${dex} binary
|
||||
|
||||
mkdir -p ../destdir/bin
|
||||
mkdir -p ../destdir/etc/${dex}
|
||||
|
||||
cp ${dex}/cmd/${dex}/${dex} ../destdir/bin/.
|
||||
cp ${dex}/etc/*.csv ../destdir/etc/${dex}/.
|
||||
|
||||
tar cvfa "${TARBALL}" -C ../destdir .
|
||||
tar tvf "${TARBALL}"
|
||||
|
||||
popd > /dev/null || exit 1
|
||||
}
|
||||
|
||||
setup_nvidia_gpu_rootfs_stage_one() {
|
||||
if [ -e "${BUILD_DIR}/kata-static-nvidia-gpu-rootfs-stage-one.tar.zst" ]; then
|
||||
info "nvidia: GPU rootfs stage one already exists"
|
||||
return
|
||||
fi
|
||||
|
||||
pushd "${ROOTFS_DIR:?}" >> /dev/null
|
||||
|
||||
local rootfs_type=${1:-""}
|
||||
|
||||
info "nvidia: Setup GPU rootfs type=$rootfs_type"
|
||||
|
||||
for component in "nvidia-gpu-admin-tools" "nvidia-dcgm-exporter" "nvidia-nvrc"; do
|
||||
if [ ! -e "${BUILD_DIR}/kata-static-${component}.tar.zst" ]; then
|
||||
setup_${component}
|
||||
fi
|
||||
done
|
||||
|
||||
cp "${SCRIPT_DIR}/nvidia_chroot.sh" ./nvidia_chroot.sh
|
||||
|
||||
chmod +x ./nvidia_chroot.sh
|
||||
|
||||
local appendix=""
|
||||
if [ "$rootfs_type" == "confidential" ]; then
|
||||
appendix="-${rootfs_type}"
|
||||
fi
|
||||
if echo "$NVIDIA_GPU_STACK" | grep -q '\<dragonball\>'; then
|
||||
appendix="-dragonball-experimental"
|
||||
fi
|
||||
|
||||
# We need the kernel packages for building the drivers cleanly will be
|
||||
# deinstalled and removed from the roofs once the build finishes.
|
||||
tar -xvf ${BUILD_DIR}/kata-static-kernel-nvidia-gpu"${appendix}"-headers.tar.xz -C .
|
||||
|
||||
# If we find a local downloaded run file build the kernel modules
|
||||
# with it, otherwise use the distribution packages. Run files may have
|
||||
# more recent drivers available then the distribution packages.
|
||||
local run_file_name="nvidia-driver.run"
|
||||
if [ -f ${BUILD_DIR}/${run_file_name} ]; then
|
||||
cp -L ${BUILD_DIR}/${run_file_name} ./${run_file_name}
|
||||
fi
|
||||
|
||||
local run_fm_file_name="nvidia-fabricmanager.run"
|
||||
if [ -f ${BUILD_DIR}/${run_fm_file_name} ]; then
|
||||
cp -L ${BUILD_DIR}/${run_fm_file_name} ./${run_fm_file_name}
|
||||
fi
|
||||
|
||||
mount --rbind /dev ./dev
|
||||
mount --make-rslave ./dev
|
||||
mount -t proc /proc ./proc
|
||||
|
||||
local driver_version="latest"
|
||||
if echo "$NVIDIA_GPU_STACK" | grep -q '\<latest\>'; then
|
||||
driver_version="latest"
|
||||
elif echo "$NVIDIA_GPU_STACK" | grep -q '\<lts\>'; then
|
||||
driver_version="lts"
|
||||
fi
|
||||
|
||||
chroot . /bin/bash -c "/nvidia_chroot.sh $(uname -r) ${run_file_name} ${run_fm_file_name} ${ARCH} ${driver_version}"
|
||||
|
||||
umount -R ./dev
|
||||
umount ./proc
|
||||
|
||||
rm ./nvidia_chroot.sh
|
||||
rm ./*.deb
|
||||
|
||||
tar cfa "${BUILD_DIR}"/kata-static-rootfs-nvidia-gpu-stage-one.tar.zst --remove-files -- *
|
||||
|
||||
popd >> /dev/null
|
||||
|
||||
pushd "${BUILD_DIR}" >> /dev/null
|
||||
curl -LO https://github.com/upx/upx/releases/download/v4.2.4/upx-4.2.4-amd64_linux.tar.xz
|
||||
tar xvf upx-4.2.4-amd64_linux.tar.xz
|
||||
popd >> /dev/null
|
||||
}
|
||||
|
||||
chisseled_iptables() {
|
||||
echo "nvidia: chisseling iptables"
|
||||
cp -a "${stage_one}"/usr/sbin/xtables-nft-multi sbin/.
|
||||
|
||||
ln -s ../sbin/xtables-nft-multi sbin/iptables-restore
|
||||
ln -s ../sbin/xtables-nft-multi sbin/iptables-save
|
||||
|
||||
libdir="lib/x86_64-linux-gnu"
|
||||
cp -a "${stage_one}"/${libdir}/libmnl.so.0* lib/.
|
||||
|
||||
libdir="usr/lib/x86_64-linux-gnu"
|
||||
cp -a "${stage_one}"/${libdir}/libnftnl.so.11* lib/.
|
||||
cp -a "${stage_one}"/${libdir}/libxtables.so.12* lib/.
|
||||
}
|
||||
|
||||
chisseled_nvswitch() {
|
||||
echo "nvidia: chisseling NVSwitch"
|
||||
echo "nvidia: not implemented yet"
|
||||
exit 1
|
||||
}
|
||||
|
||||
chisseled_dcgm() {
|
||||
echo "nvidia: chisseling DCGM"
|
||||
|
||||
mkdir -p etc/dcgm-exporter
|
||||
libdir="lib/x86_64-linux-gnu"
|
||||
|
||||
cp -a "${stage_one}"/usr/${libdir}/libdcgm.* ${libdir}/.
|
||||
cp -a "${stage_one}"/${libdir}/libgcc_s.so.1* ${libdir}/.
|
||||
cp -a "${stage_one}"/usr/bin/nv-hostengine bin/.
|
||||
|
||||
tar xvf "${BUILD_DIR}"/kata-static-nvidia-dcgm-exporter.tar.zst -C .
|
||||
}
|
||||
|
||||
# copute always includes utility per default
|
||||
chisseled_compute() {
|
||||
echo "nvidia: chisseling GPU"
|
||||
|
||||
cp -a "${stage_one}"/nvidia_driver_version .
|
||||
|
||||
tar xvf "${BUILD_DIR}"/kata-static-nvidia-gpu-admin-tools.tar.zst -C .
|
||||
|
||||
cp -a "${stage_one}"/lib/modules/* lib/modules/.
|
||||
|
||||
libdir="lib/x86_64-linux-gnu"
|
||||
cp -a "${stage_one}"/${libdir}/libdl.so.2* lib/x86_64-linux-gnu/.
|
||||
cp -a "${stage_one}"/${libdir}/libz.so.1* lib/x86_64-linux-gnu/.
|
||||
cp -a "${stage_one}"/${libdir}/libpthread.so.0* lib/x86_64-linux-gnu/.
|
||||
cp -a "${stage_one}"/${libdir}/libresolv.so.2* lib/x86_64-linux-gnu/.
|
||||
cp -a "${stage_one}"/${libdir}/libc.so.6* lib/x86_64-linux-gnu/.
|
||||
cp -a "${stage_one}"/${libdir}/libm.so.6* lib/x86_64-linux-gnu/.
|
||||
cp -a "${stage_one}"/${libdir}/librt.so.1* lib/x86_64-linux-gnu/.
|
||||
|
||||
libdir="lib64"
|
||||
cp -aL "${stage_one}"/${libdir}/ld-linux-x86-64.so.* lib64/.
|
||||
|
||||
libdir="usr/lib/x86_64-linux-gnu"
|
||||
cp -a "${stage_one}"/${libdir}/libnvidia-ml.so.* lib/x86_64-linux-gnu/.
|
||||
cp -a "${stage_one}"/${libdir}/libcuda.so.* lib/x86_64-linux-gnu/.
|
||||
cp -a "${stage_one}"/${libdir}/libnvidia-cfg.so.* lib/x86_64-linux-gnu/.
|
||||
|
||||
# basich GPU admin tools
|
||||
cp -a "${stage_one}"/usr/bin/nvidia-persistenced bin/.
|
||||
cp -a "${stage_one}"/usr/bin/nvidia-smi bin/.
|
||||
cp -a "${stage_one}"/usr/bin/nvidia-ctk bin/.
|
||||
cp -a "${stage_one}"/usr/bin/nvidia-cdi-hook bin/.
|
||||
ln -s ../bin usr/bin
|
||||
}
|
||||
|
||||
chisseled_gpudirect() {
|
||||
echo "nvidia: chisseling GPUDirect"
|
||||
echo "nvidia: not implemented yet"
|
||||
exit 1
|
||||
}
|
||||
|
||||
chisseled_init() {
|
||||
echo "nvidia: chisseling init"
|
||||
tar xvf "${BUILD_DIR}"/kata-static-busybox.tar.xz -C .
|
||||
|
||||
mkdir -p dev etc proc run/cdi sys tmp usr var lib/modules lib/firmware \
|
||||
usr/share/nvidia lib/x86_64-linux-gnu lib64
|
||||
|
||||
ln -sf ../run var/run
|
||||
|
||||
tar xvf "${BUILD_DIR}"/kata-static-nvidia-nvrc.tar.zst -C .
|
||||
|
||||
ln -sf /bin/NVRC init
|
||||
|
||||
cp -a "${stage_one}"/sbin/init sbin/.
|
||||
cp -a "${stage_one}"/etc/kata-opa etc/.
|
||||
cp -a "${stage_one}"/etc/resolv.conf etc/.
|
||||
cp -a "${stage_one}"/supported-gpu.devids .
|
||||
|
||||
cp -a "${stage_one}"/lib/firmware/nvidia lib/firmware/.
|
||||
cp -a "${stage_one}"/sbin/ldconfig.real sbin/ldconfig
|
||||
}
|
||||
|
||||
compress_rootfs() {
|
||||
echo "nvidia: compressing rootfs"
|
||||
|
||||
# For some unobvious reason libc has executable bit set
|
||||
# clean this up otherwise the find -executable will not work correctly
|
||||
find . -type f -name "*.so.*" | while IFS= read -r file; do
|
||||
chmod -x "${file}"
|
||||
strip "${file}"
|
||||
done
|
||||
|
||||
find . -type f -executable | while IFS= read -r file; do
|
||||
strip "${file}"
|
||||
${BUILD_DIR}/upx-4.2.4-amd64_linux/upx --best --lzma "${file}"
|
||||
done
|
||||
|
||||
# While I was playing with compression the executable flag on
|
||||
# /lib64/ld-linux-x86-64.so.2 was lost...
|
||||
# Since this is the program interpreter, it needs to be executable
|
||||
# as well.. sigh
|
||||
chmod +x lib64/ld-linux-x86-64.so.2
|
||||
|
||||
}
|
||||
|
||||
toggle_debug() {
|
||||
if echo "$NVIDIA_GPU_STACK" | grep -q '\<debug\>'; then
|
||||
export DEBUG="true"
|
||||
fi
|
||||
}
|
||||
|
||||
setup_nvidia_gpu_rootfs_stage_two() {
|
||||
readonly stage_one="${BUILD_DIR:?}/rootfs-${VARIANT}-stage-one"
|
||||
readonly stage_two="${ROOTFS_DIR:?}"
|
||||
readonly stack="${NVIDIA_GPU_STACK:?}"
|
||||
|
||||
echo "nvidia: chisseling the following stack components: $stack"
|
||||
|
||||
|
||||
[ -e "${stage_one}" ] && rm -rf "${stage_one}"
|
||||
[ ! -e "${stage_one}" ] && mkdir -p "${stage_one}"
|
||||
|
||||
tar -C "${stage_one}" -xf ${BUILD_DIR}/kata-static-rootfs-nvidia-gpu-stage-one.tar.zst
|
||||
|
||||
|
||||
pushd "${stage_two}" >> /dev/null
|
||||
|
||||
toggle_debug
|
||||
chisseled_init
|
||||
chisseled_iptables
|
||||
|
||||
IFS=',' read -r -a stack_components <<< "$NVIDIA_GPU_STACK"
|
||||
|
||||
for component in "${stack_components[@]}"; do
|
||||
if [ "$component" = "compute" ]; then
|
||||
echo "nvidia: processing \"compute\" component"
|
||||
chisseled_compute
|
||||
elif [ "$component" = "dcgm" ]; then
|
||||
echo "nvidia: processing DCGM component"
|
||||
chisseled_dcgm
|
||||
elif [ "$component" = "nvswitch" ]; then
|
||||
echo "nvidia: processing NVSwitch component"
|
||||
chisseled_nvswitch
|
||||
elif [ "$component" = "gpudirect" ]; then
|
||||
echo "nvidia: processing GPUDirect component"
|
||||
chisseled_gpudirect
|
||||
fi
|
||||
done
|
||||
|
||||
compress_rootfs
|
||||
|
||||
chroot . ldconfig
|
||||
|
||||
popd >> /dev/null
|
||||
}
|
@ -43,6 +43,10 @@ if [[ "${AGENT_POLICY}" == "yes" ]]; then
|
||||
agent_policy_file="$(readlink -f -v "${AGENT_POLICY_FILE:-"${script_dir}/../../../src/kata-opa/allow-all.rego"}")"
|
||||
fi
|
||||
|
||||
NVIDIA_GPU_STACK=${NVIDIA_GPU_STACK:-""}
|
||||
nvidia_rootfs="${script_dir}/nvidia/nvidia_rootfs.sh"
|
||||
source "$nvidia_rootfs"
|
||||
|
||||
#For cross build
|
||||
CROSS_BUILD=${CROSS_BUILD:-false}
|
||||
BUILDX=""
|
||||
@ -516,6 +520,7 @@ build_rootfs_distro()
|
||||
--env EXTRA_PKGS="${EXTRA_PKGS}" \
|
||||
--env OSBUILDER_VERSION="${OSBUILDER_VERSION}" \
|
||||
--env OS_VERSION="${OS_VERSION}" \
|
||||
--env VARIANT="${VARIANT}" \
|
||||
--env INSIDE_CONTAINER=1 \
|
||||
--env SECCOMP="${SECCOMP}" \
|
||||
--env SELINUX="${SELINUX}" \
|
||||
@ -525,6 +530,7 @@ build_rootfs_distro()
|
||||
--env HOME="/root" \
|
||||
--env AGENT_POLICY="${AGENT_POLICY}" \
|
||||
--env CONFIDENTIAL_GUEST="${CONFIDENTIAL_GUEST}" \
|
||||
--env NVIDIA_GPU_STACK="${NVIDIA_GPU_STACK}" \
|
||||
-v "${repo_dir}":"/kata-containers" \
|
||||
-v "${ROOTFS_DIR}":"/rootfs" \
|
||||
-v "${script_dir}/../scripts":"/scripts" \
|
||||
@ -819,6 +825,18 @@ main()
|
||||
|
||||
init="${ROOTFS_DIR}/sbin/init"
|
||||
setup_rootfs
|
||||
|
||||
if [ "${VARIANT}" = "nvidia-gpu" ]; then
|
||||
setup_nvidia_gpu_rootfs_stage_one
|
||||
setup_nvidia_gpu_rootfs_stage_two
|
||||
return $?
|
||||
fi
|
||||
|
||||
if [ "${VARIANT}" = "nvidia-gpu-confidential" ]; then
|
||||
setup_nvidia_gpu_rootfs_stage_one "confidential"
|
||||
setup_nvidia_gpu_rootfs_stage_two "confidential"
|
||||
return $?
|
||||
fi
|
||||
}
|
||||
|
||||
main $*
|
||||
|
@ -80,8 +80,9 @@ agent-tarball: copy-scripts-for-the-agent-build
|
||||
agent-ctl-tarball: copy-scripts-for-the-tools-build
|
||||
${MAKE} $@-build
|
||||
|
||||
BUSYBOX_CONF_FILE ?= busybox.nvidia.conf
|
||||
busybox-tarball:
|
||||
${MAKE} $@-build
|
||||
${MAKE} BUSYBOX_CONF_FILE=${BUSYBOX_CONF_FILE} $@-build
|
||||
|
||||
coco-guest-components-tarball:
|
||||
${MAKE} $@-build
|
||||
@ -163,6 +164,17 @@ rootfs-initrd-tarball: agent-tarball
|
||||
|
||||
runk-tarball: copy-scripts-for-the-tools-build
|
||||
${MAKE} $@-build
|
||||
rootfs-nvidia-gpu-image-tarball: agent-tarball busybox-tarball
|
||||
${MAKE} $@-build
|
||||
|
||||
rootfs-nvidia-gpu-initrd-tarball: agent-tarball busybox-tarball
|
||||
${MAKE} $@-build
|
||||
|
||||
rootfs-nvidia-gpu-confidential-image-tarball: agent-tarball busybox-tarball pause-image-tarball coco-guest-components-tarball kernel-nvidia-gpu-confidential-tarball
|
||||
${MAKE} $@-build
|
||||
|
||||
rootfs-nvidia-gpu-confidential-initrd-tarball: agent-tarball busybox-tarball pause-image-tarball coco-guest-components-tarball kernel-nvidia-gpu-confidential-tarball
|
||||
${MAKE} $@-build
|
||||
|
||||
shim-v2-tarball:
|
||||
${MAKE} $@-build
|
||||
|
@ -57,6 +57,9 @@ RUN apt-get update && \
|
||||
cpio \
|
||||
gcc \
|
||||
unzip \
|
||||
git \
|
||||
make \
|
||||
wget \
|
||||
xz-utils && \
|
||||
if [ "${ARCH}" != "$(uname -m)" ] && [ "${ARCH}" == "s390x" ]; then \
|
||||
apt-get install -y --no-install-recommends \
|
||||
|
@ -102,6 +102,7 @@ MEASURED_ROOTFS="${MEASURED_ROOTFS:-}"
|
||||
PULL_TYPE="${PULL_TYPE:-default}"
|
||||
USE_CACHE="${USE_CACHE:-}"
|
||||
BUSYBOX_CONF_FILE=${BUSYBOX_CONF_FILE:-}
|
||||
NVIDIA_GPU_STACK="${NVIDIA_GPU_STACK:-}"
|
||||
|
||||
docker run \
|
||||
-v $HOME/.docker:/root/.docker \
|
||||
@ -131,6 +132,7 @@ docker run \
|
||||
--env PULL_TYPE="${PULL_TYPE}" \
|
||||
--env USE_CACHE="${USE_CACHE}" \
|
||||
--env BUSYBOX_CONF_FILE="${BUSYBOX_CONF_FILE}" \
|
||||
--env NVIDIA_GPU_STACK="${NVIDIA_GPU_STACK}" \
|
||||
--env AA_KBC="${AA_KBC:-}" \
|
||||
--env HKD_PATH="$(realpath "${HKD_PATH:-}" 2> /dev/null || true)" \
|
||||
--env SE_KERNEL_PARAMS="${SE_KERNEL_PARAMS:-}" \
|
||||
|
@ -448,7 +448,7 @@ install_initrd() {
|
||||
os_name="$(get_from_kata_deps ".assets.initrd.architecture.${ARCH}.${variant}.name")"
|
||||
os_version="$(get_from_kata_deps ".assets.initrd.architecture.${ARCH}.${variant}.version")"
|
||||
|
||||
if [ "${variant}" == "confidential" ]; then
|
||||
if [[ "${variant}" == *-confidential ]]; then
|
||||
export COCO_GUEST_COMPONENTS_TARBALL="$(get_coco_guest_components_tarball_path)"
|
||||
export PAUSE_IMAGE_TARBALL="$(get_pause_image_tarball_path)"
|
||||
fi
|
||||
@ -470,35 +470,57 @@ install_initrd_confidential() {
|
||||
install_initrd "confidential"
|
||||
}
|
||||
|
||||
#Instal NVIDIA GPU image
|
||||
# For all nvidia_gpu targets we can customize the stack that is enbled
|
||||
# in the VM by setting the NVIDIA_GPU_STACK= environment variable
|
||||
#
|
||||
# latest | lts -> use the latest and greatest driver or lts release
|
||||
# debug -> enable debugging support
|
||||
# compute -> enable the compute GPU stack, includes utility
|
||||
# graphics -> enable the graphics GPU stack, includes compute
|
||||
# dcgm -> enable the DCGM stack + DGCM exporter
|
||||
# nvswitch -> enable DGX like systems
|
||||
# gpudirect -> enable use-cases like GPUDirect RDMA, GPUDirect GDS
|
||||
# dragonball -> enable dragonball support
|
||||
#
|
||||
# The full stack can be enabled by setting all the options like:
|
||||
#
|
||||
# NVIDIA_GPU_STACK="latest,compute,dcgm,nvswitch,gpudirect"
|
||||
#
|
||||
# Install NVIDIA GPU image
|
||||
install_image_nvidia_gpu() {
|
||||
export AGENT_POLICY="yes"
|
||||
export AGENT_INIT="yes"
|
||||
export EXTRA_PKGS="apt udev"
|
||||
export EXTRA_PKGS="apt"
|
||||
NVIDIA_GPU_STACK=${NVIDIA_GPU_STACK:-"latest,compute,dcgm"}
|
||||
install_image "nvidia-gpu"
|
||||
}
|
||||
|
||||
#Install NVIDIA GPU initrd
|
||||
# Install NVIDIA GPU initrd
|
||||
install_initrd_nvidia_gpu() {
|
||||
export AGENT_POLICY="yes"
|
||||
export AGENT_INIT="yes"
|
||||
export EXTRA_PKGS="apt udev"
|
||||
export EXTRA_PKGS="apt"
|
||||
NVIDIA_GPU_STACK=${NVIDIA_GPU_STACK:-"latest,compute,dcgm"}
|
||||
install_initrd "nvidia-gpu"
|
||||
}
|
||||
|
||||
#Instal NVIDIA GPU confidential image
|
||||
# Instal NVIDIA GPU confidential image
|
||||
install_image_nvidia_gpu_confidential() {
|
||||
export AGENT_POLICY="yes"
|
||||
export AGENT_INIT="yes"
|
||||
export EXTRA_PKGS="apt udev"
|
||||
export EXTRA_PKGS="apt"
|
||||
# TODO: export MEASURED_ROOTFS=yes
|
||||
NVIDIA_GPU_STACK=${NVIDIA_GPU_STACK:-"latest,compute"}
|
||||
install_image "nvidia-gpu-confidential"
|
||||
}
|
||||
|
||||
#Install NVIDIA GPU confidential initrd
|
||||
# Install NVIDIA GPU confidential initrd
|
||||
install_initrd_nvidia_gpu_confidential() {
|
||||
export AGENT_POLICY="yes"
|
||||
export AGENT_INIT="yes"
|
||||
export EXTRA_PKGS="apt udev"
|
||||
export EXTRA_PKGS="apt"
|
||||
# TODO: export MEASURED_ROOTFS=yes
|
||||
NVIDIA_GPU_STACK=${NVIDIA_GPU_STACK:-"latest,compute"}
|
||||
install_initrd "nvidia-gpu-confidential"
|
||||
}
|
||||
|
||||
@ -1122,6 +1144,7 @@ handle_build() {
|
||||
kernel-confidential) install_kernel_confidential ;;
|
||||
|
||||
kernel-dragonball-experimental) install_kernel_dragonball_experimental ;;
|
||||
|
||||
kernel-nvidia-gpu-dragonball-experimental) install_kernel_nvidia_gpu_dragonball_experimental ;;
|
||||
|
||||
kernel-nvidia-gpu) install_kernel_nvidia_gpu ;;
|
||||
|
Loading…
Reference in New Issue
Block a user