gpu: Add more debugging to CI/CD

Capture NVRC logs via journalctl

Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
This commit is contained in:
Zvonko Kaiser 2025-08-19 16:15:28 +00:00 committed by Fabiano Fidêncio
parent b7d2973ce5
commit 60d87b7785

View File

@ -58,6 +58,13 @@ create_inference_embedqa_pods() {
echo "# POD_IP_EMBEDQA=${POD_IP_EMBEDQA}" >&3 echo "# POD_IP_EMBEDQA=${POD_IP_EMBEDQA}" >&3
} }
enable_nvrc_trace() {
if [[ ${RUNTIME_CLASS_NAME} == "kata-qemu-nvidia-gpu" ]]; then
config_file="/opt/kata/share/defaults/kata-containers/configuration-qemu-nvidia-gpu.toml"
fi
sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 nvrc.log=trace"/g' "${config_file}"
}
setup_file() { setup_file() {
dpkg -s jq >/dev/null 2>&1 || sudo apt -y install jq dpkg -s jq >/dev/null 2>&1 || sudo apt -y install jq
@ -82,6 +89,8 @@ setup_file() {
export POD_INSTRUCT_YAML="${pod_instruct_yaml}" export POD_INSTRUCT_YAML="${pod_instruct_yaml}"
export POD_EMBEDQA_YAML="${pod_embedqa_yaml}" export POD_EMBEDQA_YAML="${pod_embedqa_yaml}"
enable_nvrc_trace
setup_langchain_flow setup_langchain_flow
create_inference_embedqa_pods create_inference_embedqa_pods
} }
@ -324,4 +333,5 @@ EOF
teardown_file() { teardown_file() {
kubectl delete -f "${POD_INSTRUCT_YAML}" kubectl delete -f "${POD_INSTRUCT_YAML}"
kubectl delete -f "${POD_EMBEDQA_YAML}"
} }