mirror of
https://github.com/kata-containers/kata-containers.git
synced 2026-03-16 01:32:26 +00:00
Compare commits
11 Commits
dependabot
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
99f32de1e5 | ||
|
|
6a853a9684 | ||
|
|
8ff5d164c6 | ||
|
|
d4c21f50b5 | ||
|
|
5c9683f006 | ||
|
|
d22c314e91 | ||
|
|
7fe84c8038 | ||
|
|
1fd66db271 | ||
|
|
9332b75c04 | ||
|
|
4a7022d2f4 | ||
|
|
f25fa6ab25 |
@@ -483,16 +483,12 @@ ifneq (,$(QEMUCMD))
|
||||
KERNELPATH_CONFIDENTIAL_NV = $(KERNELDIR)/$(KERNELNAME_CONFIDENTIAL_NV)
|
||||
|
||||
DEFAULTVCPUS_NV = 1
|
||||
DEFAULTMEMORY_NV = 2048
|
||||
DEFAULTMEMORY_NV = 8192
|
||||
DEFAULTTIMEOUT_NV = 1200
|
||||
DEFAULTVFIOPORT_NV = root-port
|
||||
DEFAULTPCIEROOTPORT_NV = 8
|
||||
|
||||
# Disable the devtmpfs mount in guest. NVRC does this, and later kata-agent
|
||||
# attempts this as well in a non-failing manner. Otherwise, NVRC fails when
|
||||
# using an image and /dev is already mounted.
|
||||
KERNELPARAMS_NV = "cgroup_no_v1=all"
|
||||
KERNELPARAMS_NV += "devtmpfs.mount=0"
|
||||
KERNELPARAMS_NV += "pci=realloc"
|
||||
KERNELPARAMS_NV += "pci=nocrs"
|
||||
KERNELPARAMS_NV += "pci=assign-busses"
|
||||
|
||||
@@ -599,7 +599,7 @@ debug_console_enabled = false
|
||||
|
||||
# Agent connection dialing timeout value in seconds
|
||||
# (default: 90)
|
||||
dial_timeout = 90
|
||||
dial_timeout = @DEFAULTTIMEOUT_NV@
|
||||
|
||||
[runtime]
|
||||
# If enabled, the runtime will log additional debug messages to the
|
||||
|
||||
@@ -576,7 +576,7 @@ debug_console_enabled = false
|
||||
|
||||
# Agent connection dialing timeout value in seconds
|
||||
# (default: 90)
|
||||
dial_timeout = 90
|
||||
dial_timeout = @DEFAULTTIMEOUT_NV@
|
||||
|
||||
[runtime]
|
||||
# If enabled, the runtime will log additional debug messages to the
|
||||
|
||||
@@ -578,7 +578,7 @@ debug_console_enabled = false
|
||||
|
||||
# Agent connection dialing timeout value in seconds
|
||||
# (default: 90)
|
||||
dial_timeout = 90
|
||||
dial_timeout = @DEFAULTTIMEOUT_NV@
|
||||
|
||||
[runtime]
|
||||
# If enabled, the runtime will log additional debug messages to the
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
module github.com/kata-containers/kata-containers/src/runtime
|
||||
|
||||
// Keep in sync with version in versions.yaml
|
||||
go 1.25.7
|
||||
go 1.25.8
|
||||
|
||||
// WARNING: Do NOT use `replace` directives as those break dependabot:
|
||||
// https://github.com/kata-containers/kata-containers/issues/11020
|
||||
|
||||
@@ -72,7 +72,7 @@ func IsPCIeDevice(bdf string) bool {
|
||||
}
|
||||
|
||||
// read from /sys/bus/pci/devices/xxx/property
|
||||
func getPCIDeviceProperty(bdf string, property PCISysFsProperty) string {
|
||||
func GetPCIDeviceProperty(bdf string, property PCISysFsProperty) string {
|
||||
if len(strings.Split(bdf, ":")) == 2 {
|
||||
bdf = PCIDomain + ":" + bdf
|
||||
}
|
||||
@@ -220,9 +220,9 @@ func GetDeviceFromVFIODev(device config.DeviceInfo) ([]*config.VFIODev, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
vendorID := getPCIDeviceProperty(deviceBDF, PCISysFsDevicesVendor)
|
||||
deviceID := getPCIDeviceProperty(deviceBDF, PCISysFsDevicesDevice)
|
||||
pciClass := getPCIDeviceProperty(deviceBDF, PCISysFsDevicesClass)
|
||||
vendorID := GetPCIDeviceProperty(deviceBDF, PCISysFsDevicesVendor)
|
||||
deviceID := GetPCIDeviceProperty(deviceBDF, PCISysFsDevicesDevice)
|
||||
pciClass := GetPCIDeviceProperty(deviceBDF, PCISysFsDevicesClass)
|
||||
|
||||
i, err := extractIndex(device.HostPath)
|
||||
if err != nil {
|
||||
@@ -276,7 +276,7 @@ func GetAllVFIODevicesFromIOMMUGroup(device config.DeviceInfo) ([]*config.VFIODe
|
||||
switch vfioDeviceType {
|
||||
case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType:
|
||||
// This is vfio-pci and vfio-mdev specific
|
||||
pciClass := getPCIDeviceProperty(deviceBDF, PCISysFsDevicesClass)
|
||||
pciClass := GetPCIDeviceProperty(deviceBDF, PCISysFsDevicesClass)
|
||||
// We need to ignore Host or PCI Bridges that are in the same IOMMU group as the
|
||||
// passed-through devices. One CANNOT pass-through a PCI bridge or Host bridge.
|
||||
// Class 0x0604 is PCI bridge, 0x0600 is Host bridge
|
||||
@@ -288,8 +288,8 @@ func GetAllVFIODevicesFromIOMMUGroup(device config.DeviceInfo) ([]*config.VFIODe
|
||||
continue
|
||||
}
|
||||
// Fetch the PCI Vendor ID and Device ID
|
||||
vendorID := getPCIDeviceProperty(deviceBDF, PCISysFsDevicesVendor)
|
||||
deviceID := getPCIDeviceProperty(deviceBDF, PCISysFsDevicesDevice)
|
||||
vendorID := GetPCIDeviceProperty(deviceBDF, PCISysFsDevicesVendor)
|
||||
deviceID := GetPCIDeviceProperty(deviceBDF, PCISysFsDevicesDevice)
|
||||
|
||||
// Do not directly assign to `vfio` -- need to access field still
|
||||
vfio = config.VFIODev{
|
||||
|
||||
@@ -7,6 +7,7 @@ package virtcontainers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
@@ -1135,7 +1136,9 @@ func (c *Container) createDevices(ctx context.Context, contConfig *ContainerConf
|
||||
|
||||
// If we're hot-plugging this will be a no-op because at this stage
|
||||
// no devices are attached to the root-port or switch-port
|
||||
c.annotateContainerWithVFIOMetadata(vfioColdPlugDevices)
|
||||
if err := c.annotateContainerWithVFIOMetadata(vfioColdPlugDevices); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -1194,11 +1197,40 @@ func sortContainerVFIODevices(devices []config.DeviceInfo) []config.DeviceInfo {
|
||||
return vfioDevices
|
||||
}
|
||||
|
||||
// errNoSiblingFound is returned by siblingAnnotation when the VFIO device is
|
||||
// not of a supported CDI device type, i.e. it has no entry in the cdiDeviceKind
|
||||
// table (e.g. NVSwitches). Callers should treat this as a non-fatal "device not
|
||||
// applicable" condition rather than a sibling-matching failure.
|
||||
var errNoSiblingFound = fmt.Errorf("no suitable sibling found")
|
||||
|
||||
// cdiDeviceKey identifies a device type by vendor ID and PCI class prefix.
|
||||
type cdiDeviceKey struct {
|
||||
VendorID string
|
||||
ClassPrefix string
|
||||
}
|
||||
|
||||
// cdiDeviceKind maps known device types to their CDI annotation kind.
|
||||
var cdiDeviceKind = map[cdiDeviceKey]string{
|
||||
{VendorID: "0x10de", ClassPrefix: "0x030"}: "nvidia.com/gpu",
|
||||
}
|
||||
|
||||
// cdiKindForDevice returns the CDI kind for a given vendor ID and PCI class,
|
||||
// or empty string and false if the device is not recognized.
|
||||
func cdiKindForDevice(vendorID, class string) (string, bool) {
|
||||
for key, kind := range cdiDeviceKind {
|
||||
if vendorID == key.VendorID && strings.Contains(class, key.ClassPrefix) {
|
||||
return kind, true
|
||||
}
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
type DeviceRelation struct {
|
||||
Bus string
|
||||
Path string
|
||||
Index int
|
||||
BDF string
|
||||
Bus string
|
||||
Path string
|
||||
Index int
|
||||
BDF string
|
||||
CDIKind string
|
||||
}
|
||||
|
||||
// Depending on the HW we might need to inject metadata into the container
|
||||
@@ -1223,15 +1255,13 @@ func (c *Container) annotateContainerWithVFIOMetadata(devices interface{}) error
|
||||
// so lets first iterate over all root-port devices and then
|
||||
// switch-port devices no special handling for bridge-port (PCI)
|
||||
for _, dev := range config.PCIeDevicesPerPort["root-port"] {
|
||||
// For the NV GPU we need special handling let's use only those
|
||||
if dev.VendorID == "0x10de" && strings.Contains(dev.Class, "0x030") {
|
||||
siblings = append(siblings, DeviceRelation{Bus: dev.Bus, Path: dev.HostPath, BDF: dev.BDF})
|
||||
if kind, ok := cdiKindForDevice(dev.VendorID, dev.Class); ok {
|
||||
siblings = append(siblings, DeviceRelation{Bus: dev.Bus, Path: dev.HostPath, BDF: dev.BDF, CDIKind: kind})
|
||||
}
|
||||
}
|
||||
for _, dev := range config.PCIeDevicesPerPort["switch-port"] {
|
||||
// For the NV GPU we need special handling let's use only those
|
||||
if dev.VendorID == "0x10de" && strings.Contains(dev.Class, "0x030") {
|
||||
siblings = append(siblings, DeviceRelation{Bus: dev.Bus, Path: dev.HostPath, BDF: dev.BDF})
|
||||
if kind, ok := cdiKindForDevice(dev.VendorID, dev.Class); ok {
|
||||
siblings = append(siblings, DeviceRelation{Bus: dev.Bus, Path: dev.HostPath, BDF: dev.BDF, CDIKind: kind})
|
||||
}
|
||||
}
|
||||
// We need to sort the VFIO devices by bus to get the correct
|
||||
@@ -1244,48 +1274,53 @@ func (c *Container) annotateContainerWithVFIOMetadata(devices interface{}) error
|
||||
siblings[i].Index = i
|
||||
}
|
||||
|
||||
// Now that we have the index lets connect the /dev/vfio/<num>
|
||||
// to the correct index
|
||||
if devices, ok := devices.([]ContainerDevice); ok {
|
||||
for _, dev := range devices {
|
||||
if dev.ContainerPath == "/dev/vfio/vfio" {
|
||||
c.Logger().Infof("skipping /dev/vfio/vfio for vfio_mode=guest-kernel")
|
||||
continue
|
||||
}
|
||||
err := c.siblingAnnotation(dev.ContainerPath, siblings)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Collect container paths from either hot-plug or cold-plug devices
|
||||
var containerPaths []string
|
||||
if devs, ok := devices.([]ContainerDevice); ok {
|
||||
for _, dev := range devs {
|
||||
containerPaths = append(containerPaths, dev.ContainerPath)
|
||||
}
|
||||
}
|
||||
if devs, ok := devices.([]config.DeviceInfo); ok {
|
||||
for _, dev := range devs {
|
||||
containerPaths = append(containerPaths, dev.ContainerPath)
|
||||
}
|
||||
}
|
||||
|
||||
if devices, ok := devices.([]config.DeviceInfo); ok {
|
||||
for _, dev := range devices {
|
||||
if dev.ContainerPath == "/dev/vfio/vfio" {
|
||||
c.Logger().Infof("skipping /dev/vfio/vfio for vfio_mode=guest-kernel")
|
||||
// Now that we have the index lets connect the /dev/vfio/<num>
|
||||
// to the correct index
|
||||
for _, devPath := range containerPaths {
|
||||
if !strings.HasPrefix(devPath, "/dev/vfio") {
|
||||
c.Logger().Infof("skipping guest annotations for non-VFIO device %q", devPath)
|
||||
continue
|
||||
}
|
||||
if devPath == "/dev/vfio/vfio" {
|
||||
c.Logger().Infof("skipping /dev/vfio/vfio for vfio_mode=guest-kernel")
|
||||
continue
|
||||
}
|
||||
if err := c.siblingAnnotation(devPath, siblings); err != nil {
|
||||
if errors.Is(err, errNoSiblingFound) {
|
||||
c.Logger().Infof("no CDI annotation for device %s (not a known CDI device type)", devPath)
|
||||
continue
|
||||
}
|
||||
err := c.siblingAnnotation(dev.ContainerPath, siblings)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// createCDIAnnotation adds a container annotation mapping a VFIO device to a GPU index.
|
||||
// createCDIAnnotation adds a container annotation mapping a VFIO device to a device index.
|
||||
//
|
||||
// devPath is the path to the VFIO device, which can be in the format
|
||||
// "/dev/vfio/<num>" or "/dev/vfio/devices/vfio<num>". The function extracts
|
||||
// the device number from the path and creates an annotation with the key
|
||||
// "cdi.k8s.io/vfio<num>" and the value "nvidia.com/gpu=<index>", where
|
||||
// <num> is the device number and <index> is the provided GPU index.
|
||||
// "cdi.k8s.io/vfio<num>" and the value "<cdiKind>=<index>", where
|
||||
// <cdiKind> is the CDI device kind (e.g. "nvidia.com/gpu"),
|
||||
// <num> is the device number and <index> is the provided device index.
|
||||
// The annotation is stored in c.config.CustomSpec.Annotations.
|
||||
func (c *Container) createCDIAnnotation(devPath string, index int) {
|
||||
func (c *Container) createCDIAnnotation(devPath string, index int, cdiKind string) {
|
||||
// We have here either /dev/vfio/<num> or /dev/vfio/devices/vfio<num>
|
||||
baseName := filepath.Base(devPath)
|
||||
vfioNum := baseName
|
||||
@@ -1294,66 +1329,68 @@ func (c *Container) createCDIAnnotation(devPath string, index int) {
|
||||
vfioNum = strings.TrimPrefix(baseName, "vfio")
|
||||
}
|
||||
annoKey := fmt.Sprintf("cdi.k8s.io/vfio%s", vfioNum)
|
||||
annoValue := fmt.Sprintf("nvidia.com/gpu=%d", index)
|
||||
annoValue := fmt.Sprintf("%s=%d", cdiKind, index)
|
||||
if c.config.CustomSpec.Annotations == nil {
|
||||
c.config.CustomSpec.Annotations = make(map[string]string)
|
||||
}
|
||||
c.config.CustomSpec.Annotations[annoKey] = annoValue
|
||||
c.Logger().Infof("annotated container with %s: %s", annoKey, annoValue)
|
||||
}
|
||||
|
||||
func (c *Container) siblingAnnotation(devPath string, siblings []DeviceRelation) error {
|
||||
for _, sibling := range siblings {
|
||||
if sibling.Path == devPath {
|
||||
c.createCDIAnnotation(devPath, sibling.Index)
|
||||
return nil
|
||||
// Resolve the device's BDFs once upfront. This serves two purposes:
|
||||
// 1. Determine if the device is a known CDI type (if not, skip it)
|
||||
// 2. Reuse the BDFs for sibling matching without redundant sysfs reads
|
||||
isKnownCDIDevice := false
|
||||
var devBDFs []string
|
||||
|
||||
if strings.HasPrefix(filepath.Base(devPath), "vfio") {
|
||||
// IOMMUFD device (/dev/vfio/devices/vfio<NUM>): single device per char dev
|
||||
major, minor, err := deviceUtils.GetMajorMinorFromDevPath(devPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// If the sandbox has cold-plugged an IOMMUFD device and if the
|
||||
// device-plugins sends us a /dev/vfio/<NUM> device we need to
|
||||
// check if the IOMMUFD device and the VFIO device are the same
|
||||
// We have the sibling.BDF we now need to extract the BDF of the
|
||||
// devPath that is either /dev/vfio/<NUM> or
|
||||
// /dev/vfio/devices/vfio<NUM>
|
||||
if strings.HasPrefix(filepath.Base(devPath), "vfio") {
|
||||
// IOMMUFD device format (/dev/vfio/devices/vfio<NUM>), extract BDF from sysfs
|
||||
major, minor, err := deviceUtils.GetMajorMinorFromDevPath(devPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
iommufdBDF, err := deviceUtils.GetBDFFromVFIODev(major, minor)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if sibling.BDF == iommufdBDF {
|
||||
c.createCDIAnnotation(devPath, sibling.Index)
|
||||
// exit handling IOMMUFD device
|
||||
return nil
|
||||
}
|
||||
bdf, err := deviceUtils.GetBDFFromVFIODev(major, minor)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Legacy VFIO group device (/dev/vfio/<GROUP_NUM>), extract BDF from sysfs
|
||||
devBDFs = []string{bdf}
|
||||
vendorID := deviceUtils.GetPCIDeviceProperty(bdf, deviceUtils.PCISysFsDevicesVendor)
|
||||
class := deviceUtils.GetPCIDeviceProperty(bdf, deviceUtils.PCISysFsDevicesClass)
|
||||
_, isKnownCDIDevice = cdiKindForDevice(vendorID, class)
|
||||
} else {
|
||||
// Legacy VFIO group (/dev/vfio/<GROUP>): may contain multiple devices
|
||||
vfioGroup := filepath.Base(devPath)
|
||||
iommuDevicesPath := filepath.Join(config.SysIOMMUGroupPath, vfioGroup, "devices")
|
||||
|
||||
deviceFiles, err := os.ReadDir(iommuDevicesPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
vfioBDFs := make([]string, 0)
|
||||
for _, deviceFile := range deviceFiles {
|
||||
// Get bdf of device eg 0000:00:1c.0
|
||||
deviceBDF, _, _, err := deviceUtils.GetVFIODetails(deviceFile.Name(), iommuDevicesPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
vfioBDFs = append(vfioBDFs, deviceBDF)
|
||||
devBDFs = append(devBDFs, deviceBDF)
|
||||
if !isKnownCDIDevice {
|
||||
vendorID := deviceUtils.GetPCIDeviceProperty(deviceBDF, deviceUtils.PCISysFsDevicesVendor)
|
||||
class := deviceUtils.GetPCIDeviceProperty(deviceBDF, deviceUtils.PCISysFsDevicesClass)
|
||||
if _, ok := cdiKindForDevice(vendorID, class); ok {
|
||||
isKnownCDIDevice = true
|
||||
}
|
||||
}
|
||||
}
|
||||
if slices.Contains(vfioBDFs, sibling.BDF) {
|
||||
c.createCDIAnnotation(devPath, sibling.Index)
|
||||
// exit handling legacy VFIO device
|
||||
}
|
||||
if !isKnownCDIDevice {
|
||||
return fmt.Errorf("device %s: %w", devPath, errNoSiblingFound)
|
||||
}
|
||||
|
||||
for _, sibling := range siblings {
|
||||
if sibling.Path == devPath || slices.Contains(devBDFs, sibling.BDF) {
|
||||
c.createCDIAnnotation(devPath, sibling.Index, sibling.CDIKind)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("failed to match device %s with any cold-plugged GPU device by path or BDF; no suitable sibling found", devPath)
|
||||
return fmt.Errorf("device %s is a known CDI device type but failed to match any sibling by path or BDF", devPath)
|
||||
}
|
||||
|
||||
// create creates and starts a container inside a Sandbox. It has to be
|
||||
@@ -1382,7 +1419,9 @@ func (c *Container) create(ctx context.Context) (err error) {
|
||||
return
|
||||
}
|
||||
|
||||
c.annotateContainerWithVFIOMetadata(c.devices)
|
||||
if err := c.annotateContainerWithVFIOMetadata(c.devices); err != nil {
|
||||
return fmt.Errorf("annotating VFIO devices: %w", err)
|
||||
}
|
||||
|
||||
// Deduce additional system mount info that should be handled by the agent
|
||||
// inside the VM
|
||||
|
||||
@@ -841,7 +841,6 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
|
||||
// /dev/vfio/devices/vfio0
|
||||
// (1) Check if we have the new IOMMUFD or old container based VFIO
|
||||
if strings.HasPrefix(dev.HostPath, pkgDevice.IommufdDevPath) {
|
||||
q.Logger().Infof("### IOMMUFD Path: %s", dev.HostPath)
|
||||
vfioDevices, err = drivers.GetDeviceFromVFIODev(dev)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Cannot get VFIO device from IOMMUFD with device: %v err: %v", dev, err)
|
||||
|
||||
@@ -113,27 +113,6 @@ setup_langchain_flow() {
|
||||
[[ "$(pip show beautifulsoup4 2>/dev/null | awk '/^Version:/{print $2}')" = "4.13.4" ]] || pip install beautifulsoup4==4.13.4
|
||||
}
|
||||
|
||||
# Create Docker config for genpolicy so it can authenticate to nvcr.io when
|
||||
# pulling image manifests (avoids "UnauthorizedError" from genpolicy's registry pull).
|
||||
# Genpolicy (src/tools/genpolicy) uses docker_credential::get_credential() in
|
||||
# src/tools/genpolicy/src/registry.rs build_auth(). The docker_credential crate
|
||||
# reads config from DOCKER_CONFIG (directory) + "/config.json", so we set
|
||||
# DOCKER_CONFIG to a directory containing config.json with nvcr.io auth.
|
||||
setup_genpolicy_registry_auth() {
|
||||
if [[ -z "${NGC_API_KEY:-}" ]]; then
|
||||
return
|
||||
fi
|
||||
local auth_dir
|
||||
auth_dir="${BATS_SUITE_TMPDIR}/.docker-genpolicy"
|
||||
mkdir -p "${auth_dir}"
|
||||
# Docker config format: auths -> registry -> auth (base64 of "user:password")
|
||||
echo -n "{\"auths\":{\"nvcr.io\":{\"username\":\"\$oauthtoken\",\"password\":\"${NGC_API_KEY}\",\"auth\":\"$(echo -n "\$oauthtoken:${NGC_API_KEY}" | base64 -w0)\"}}}" \
|
||||
> "${auth_dir}/config.json"
|
||||
export DOCKER_CONFIG="${auth_dir}"
|
||||
# REGISTRY_AUTH_FILE (containers-auth.json format) is the same structure for auths
|
||||
export REGISTRY_AUTH_FILE="${auth_dir}/config.json"
|
||||
}
|
||||
|
||||
# Create initdata TOML file for genpolicy with CDH configuration.
|
||||
# This file is used by genpolicy via --initdata-path. Genpolicy will add the
|
||||
# generated policy.rego to it and set it as the cc_init_data annotation.
|
||||
@@ -243,9 +222,6 @@ setup_file() {
|
||||
add_requests_to_policy_settings "${policy_settings_dir}" "ReadStreamRequest"
|
||||
|
||||
if [ "${TEE}" = "true" ]; then
|
||||
# So genpolicy can pull nvcr.io image manifests when generating policy (avoids UnauthorizedError).
|
||||
setup_genpolicy_registry_auth
|
||||
|
||||
setup_kbs_credentials
|
||||
# Overwrite the empty default-initdata.toml with our CDH configuration.
|
||||
# This must happen AFTER create_tmp_policy_settings_dir() copies the empty
|
||||
|
||||
@@ -51,6 +51,27 @@ kernel_params = "${new_params}"
|
||||
EOF
|
||||
}
|
||||
|
||||
# Create Docker config for genpolicy so it can authenticate to nvcr.io when
|
||||
# pulling image manifests (avoids "UnauthorizedError" from genpolicy's registry pull).
|
||||
# Genpolicy (src/tools/genpolicy) uses docker_credential::get_credential() in
|
||||
# src/tools/genpolicy/src/registry.rs build_auth(). The docker_credential crate
|
||||
# reads config from DOCKER_CONFIG (directory) + "/config.json", so we set
|
||||
# DOCKER_CONFIG to a directory containing config.json with nvcr.io auth.
|
||||
setup_genpolicy_registry_auth() {
|
||||
if [[ -z "${NGC_API_KEY:-}" ]]; then
|
||||
return
|
||||
fi
|
||||
local auth_dir
|
||||
auth_dir="${kubernetes_dir}/.docker-genpolicy"
|
||||
mkdir -p "${auth_dir}"
|
||||
# Docker config format: auths -> registry -> auth (base64 of "user:password")
|
||||
echo -n "{\"auths\":{\"nvcr.io\":{\"username\":\"\$oauthtoken\",\"password\":\"${NGC_API_KEY}\",\"auth\":\"$(echo -n "\$oauthtoken:${NGC_API_KEY}" | base64 -w0)\"}}}" \
|
||||
> "${auth_dir}/config.json"
|
||||
export DOCKER_CONFIG="${auth_dir}"
|
||||
# REGISTRY_AUTH_FILE (containers-auth.json format) is the same structure for auths
|
||||
export REGISTRY_AUTH_FILE="${auth_dir}/config.json"
|
||||
}
|
||||
|
||||
cleanup() {
|
||||
true
|
||||
}
|
||||
@@ -84,6 +105,9 @@ if [[ "${ENABLE_NVRC_TRACE:-true}" == "true" ]]; then
|
||||
enable_nvrc_trace
|
||||
fi
|
||||
|
||||
# So genpolicy can pull nvcr.io image manifests when generating policy (avoids UnauthorizedError).
|
||||
setup_genpolicy_registry_auth
|
||||
|
||||
# Use common bats test runner with proper reporting
|
||||
export BATS_TEST_FAIL_FAST="${K8S_TEST_FAIL_FAST}"
|
||||
run_bats_tests "${kubernetes_dir}" K8S_TEST_NV
|
||||
|
||||
@@ -61,12 +61,12 @@ install_userspace_components() {
|
||||
eval "${APT_INSTALL}" nvidia-imex nvidia-firmware \
|
||||
libnvidia-cfg1 libnvidia-gl libnvidia-extra \
|
||||
libnvidia-decode libnvidia-fbc1 libnvidia-encode \
|
||||
libnvidia-nscq
|
||||
libnvidia-nscq libnvidia-compute nvidia-settings
|
||||
|
||||
apt-mark hold nvidia-imex nvidia-firmware \
|
||||
libnvidia-cfg1 libnvidia-gl libnvidia-extra \
|
||||
libnvidia-decode libnvidia-fbc1 libnvidia-encode \
|
||||
libnvidia-nscq
|
||||
libnvidia-nscq libnvidia-compute nvidia-settings
|
||||
}
|
||||
|
||||
setup_apt_repositories() {
|
||||
|
||||
@@ -151,14 +151,8 @@ chisseled_nvswitch() {
|
||||
cp -a "${stage_one}"/usr/share/nvidia/nvswitch usr/share/nvidia/.
|
||||
|
||||
libdir=usr/lib/"${machine_arch}"-linux-gnu
|
||||
|
||||
cp -a "${stage_one}/${libdir}"/libnvidia-nscq.so.* lib/"${machine_arch}"-linux-gnu/.
|
||||
|
||||
# Logs will be redirected to console(stderr)
|
||||
# if the specified log file can't be opened or the path is empty.
|
||||
# LOG_FILE_NAME=/var/log/fabricmanager.log -> setting to empty for stderr -> kmsg
|
||||
sed -i 's|^LOG_FILE_NAME=.*|LOG_FILE_NAME=|' usr/share/nvidia/nvswitch/fabricmanager.cfg
|
||||
|
||||
# NVLINK SubnetManager dependencies
|
||||
local nvlsm=usr/share/nvidia/nvlsm
|
||||
mkdir -p "${nvlsm}"
|
||||
@@ -166,6 +160,8 @@ chisseled_nvswitch() {
|
||||
cp -a "${stage_one}"/opt/nvidia/nvlsm/lib/libgrpc_mgr.so lib/.
|
||||
cp -a "${stage_one}"/opt/nvidia/nvlsm/sbin/nvlsm sbin/.
|
||||
cp -a "${stage_one}/${nvlsm}"/*.conf "${nvlsm}"/.
|
||||
# Redirect all the logs to syslog instead of logging to file
|
||||
sed -i 's|^LOG_USE_SYSLOG=.*|LOG_USE_SYSLOG=1|' usr/share/nvidia/nvswitch/fabricmanager.cfg
|
||||
}
|
||||
|
||||
chisseled_dcgm() {
|
||||
@@ -211,9 +207,8 @@ chisseled_compute() {
|
||||
cp -aL "${stage_one}/${libdir}"/ld-linux-* "${libdir}"/.
|
||||
|
||||
libdir=usr/lib/"${machine_arch}"-linux-gnu
|
||||
cp -a "${stage_one}/${libdir}"/libnvidia-ml.so.* lib/"${machine_arch}"-linux-gnu/.
|
||||
cp -a "${stage_one}/${libdir}"/libnv* lib/"${machine_arch}"-linux-gnu/.
|
||||
cp -a "${stage_one}/${libdir}"/libcuda.so.* lib/"${machine_arch}"-linux-gnu/.
|
||||
cp -a "${stage_one}/${libdir}"/libnvidia-cfg.so.* lib/"${machine_arch}"-linux-gnu/.
|
||||
|
||||
# basic GPU admin tools
|
||||
cp -a "${stage_one}"/usr/bin/nvidia-persistenced bin/.
|
||||
@@ -245,6 +240,8 @@ chisseled_init() {
|
||||
usr/bin etc/modprobe.d etc/ssl/certs
|
||||
|
||||
ln -sf ../run var/run
|
||||
ln -sf ../run var/log
|
||||
ln -sf ../run var/cache
|
||||
|
||||
# Needed for various RUST static builds with LIBC=gnu
|
||||
libdir=lib/"${machine_arch}"-linux-gnu
|
||||
|
||||
@@ -96,9 +96,9 @@ scheduling:
|
||||
"qemu-snp-runtime-rs" (dict "memory" "2048Mi" "cpu" "1.0")
|
||||
"qemu-tdx" (dict "memory" "2048Mi" "cpu" "1.0")
|
||||
"qemu-tdx-runtime-rs" (dict "memory" "2048Mi" "cpu" "1.0")
|
||||
"qemu-nvidia-gpu" (dict "memory" "4096Mi" "cpu" "1.0")
|
||||
"qemu-nvidia-gpu-snp" (dict "memory" "20480Mi" "cpu" "1.0")
|
||||
"qemu-nvidia-gpu-tdx" (dict "memory" "20480Mi" "cpu" "1.0")
|
||||
"qemu-nvidia-gpu" (dict "memory" "10240Mi" "cpu" "1.0")
|
||||
"qemu-nvidia-gpu-snp" (dict "memory" "10240Mi" "cpu" "1.0")
|
||||
"qemu-nvidia-gpu-tdx" (dict "memory" "10240Mi" "cpu" "1.0")
|
||||
"qemu-cca" (dict "memory" "2048Mi" "cpu" "1.0")
|
||||
"stratovirt" (dict "memory" "130Mi" "cpu" "250m")
|
||||
"remote" (dict "memory" "120Mi" "cpu" "250m")
|
||||
|
||||
@@ -234,7 +234,7 @@ externals:
|
||||
nvrc:
|
||||
# yamllint disable-line rule:line-length
|
||||
desc: "The NVRC project provides a Rust binary that implements a simple init system for microVMs"
|
||||
version: "v0.1.1"
|
||||
version: "v0.1.3"
|
||||
url: "https://github.com/NVIDIA/nvrc/releases/download/"
|
||||
|
||||
nvidia:
|
||||
|
||||
Reference in New Issue
Block a user