mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-08-13 21:56:32 +00:00
agent: Enable VFIO and initContainers
We had a static mapping of host guest PCI addresses, which prevented to use VFIO devices in initContainers. We're tracking now the host-guest mapping per container and removing this mapping if a container is removed. Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
This commit is contained in:
parent
545022f295
commit
248d04c20c
@ -9,6 +9,7 @@ use self::nvdimm_device_handler::VirtioNvdimmDeviceHandler;
|
|||||||
use self::scsi_device_handler::ScsiDeviceHandler;
|
use self::scsi_device_handler::ScsiDeviceHandler;
|
||||||
use self::vfio_device_handler::{VfioApDeviceHandler, VfioPciDeviceHandler};
|
use self::vfio_device_handler::{VfioApDeviceHandler, VfioPciDeviceHandler};
|
||||||
use crate::pci;
|
use crate::pci;
|
||||||
|
use crate::sandbox::PciHostGuestMapping;
|
||||||
use crate::sandbox::Sandbox;
|
use crate::sandbox::Sandbox;
|
||||||
use anyhow::{anyhow, Context, Result};
|
use anyhow::{anyhow, Context, Result};
|
||||||
use cdi::annotations::parse_annotations;
|
use cdi::annotations::parse_annotations;
|
||||||
@ -180,6 +181,7 @@ lazy_static! {
|
|||||||
|
|
||||||
#[instrument]
|
#[instrument]
|
||||||
pub async fn add_devices(
|
pub async fn add_devices(
|
||||||
|
cid: &String,
|
||||||
logger: &Logger,
|
logger: &Logger,
|
||||||
devices: &[Device],
|
devices: &[Device],
|
||||||
spec: &mut Spec,
|
spec: &mut Spec,
|
||||||
@ -211,8 +213,9 @@ pub async fn add_devices(
|
|||||||
}
|
}
|
||||||
|
|
||||||
let mut sb = sandbox.lock().await;
|
let mut sb = sandbox.lock().await;
|
||||||
|
let mut host_guest: PciHostGuestMapping = HashMap::new();
|
||||||
for (host, guest) in update.pci {
|
for (host, guest) in update.pci {
|
||||||
if let Some(other_guest) = sb.pcimap.insert(host, guest) {
|
if let Some(other_guest) = host_guest.insert(host, guest) {
|
||||||
return Err(anyhow!(
|
return Err(anyhow!(
|
||||||
"Conflicting guest address for host device {} ({} versus {})",
|
"Conflicting guest address for host device {} ({} versus {})",
|
||||||
host,
|
host,
|
||||||
@ -221,6 +224,9 @@ pub async fn add_devices(
|
|||||||
));
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Save all the host -> guest mappings per container upon
|
||||||
|
// removal of the container, the mappings will be removed
|
||||||
|
sb.pcimap.insert(cid.clone(), host_guest);
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
error!(logger, "failed to add devices, error: {e:?}");
|
error!(logger, "failed to add devices, error: {e:?}");
|
||||||
@ -238,7 +244,7 @@ pub async fn add_devices(
|
|||||||
if let Some(process) = spec.process_mut() {
|
if let Some(process) = spec.process_mut() {
|
||||||
let env_vec: &mut Vec<String> =
|
let env_vec: &mut Vec<String> =
|
||||||
&mut process.env_mut().get_or_insert_with(Vec::new).to_vec();
|
&mut process.env_mut().get_or_insert_with(Vec::new).to_vec();
|
||||||
update_env_pci(env_vec, &sandbox.lock().await.pcimap)?
|
update_env_pci(cid, env_vec, &sandbox.lock().await.pcimap)?
|
||||||
}
|
}
|
||||||
update_spec_devices(logger, spec, dev_updates)
|
update_spec_devices(logger, spec, dev_updates)
|
||||||
}
|
}
|
||||||
@ -391,8 +397,9 @@ pub fn insert_devices_cgroup_rule(
|
|||||||
// given a map of (host address => guest address)
|
// given a map of (host address => guest address)
|
||||||
#[instrument]
|
#[instrument]
|
||||||
pub fn update_env_pci(
|
pub fn update_env_pci(
|
||||||
|
cid: &String,
|
||||||
env: &mut [String],
|
env: &mut [String],
|
||||||
pcimap: &HashMap<pci::Address, pci::Address>,
|
pcimap: &HashMap<String, PciHostGuestMapping>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
// SR-IOV device plugin may add two environment variables for one resource:
|
// SR-IOV device plugin may add two environment variables for one resource:
|
||||||
// - PCIDEVICE_<prefix>_<resource-name>: a list of PCI device ids separated by comma
|
// - PCIDEVICE_<prefix>_<resource-name>: a list of PCI device ids separated by comma
|
||||||
@ -418,7 +425,10 @@ pub fn update_env_pci(
|
|||||||
for host_addr_str in val.split(',') {
|
for host_addr_str in val.split(',') {
|
||||||
let host_addr = pci::Address::from_str(host_addr_str)
|
let host_addr = pci::Address::from_str(host_addr_str)
|
||||||
.with_context(|| format!("Can't parse {} environment variable", name))?;
|
.with_context(|| format!("Can't parse {} environment variable", name))?;
|
||||||
let guest_addr = pcimap
|
let host_guest = pcimap
|
||||||
|
.get(cid)
|
||||||
|
.ok_or_else(|| anyhow!("No PCI mapping found for container {}", cid))?;
|
||||||
|
let guest_addr = host_guest
|
||||||
.get(&host_addr)
|
.get(&host_addr)
|
||||||
.ok_or_else(|| anyhow!("Unable to translate host PCI address {}", host_addr))?;
|
.ok_or_else(|| anyhow!("Unable to translate host PCI address {}", host_addr))?;
|
||||||
|
|
||||||
@ -1052,7 +1062,7 @@ mod tests {
|
|||||||
"NOTAPCIDEVICE_blah=abcd:ef:01.0".to_string(),
|
"NOTAPCIDEVICE_blah=abcd:ef:01.0".to_string(),
|
||||||
];
|
];
|
||||||
|
|
||||||
let pci_fixups = example_map
|
let _pci_fixups = example_map
|
||||||
.iter()
|
.iter()
|
||||||
.map(|(h, g)| {
|
.map(|(h, g)| {
|
||||||
(
|
(
|
||||||
@ -1062,7 +1072,11 @@ mod tests {
|
|||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let res = update_env_pci(&mut env, &pci_fixups);
|
let cid = "0".to_string();
|
||||||
|
let mut pci_fixups: HashMap<String, HashMap<pci::Address, pci::Address>> = HashMap::new();
|
||||||
|
pci_fixups.insert(cid.clone(), _pci_fixups);
|
||||||
|
|
||||||
|
let res = update_env_pci(&cid, &mut env, &pci_fixups);
|
||||||
assert!(res.is_ok(), "error: {}", res.err().unwrap());
|
assert!(res.is_ok(), "error: {}", res.err().unwrap());
|
||||||
|
|
||||||
assert_eq!(env[0], "PCIDEVICE_x=0000:01:01.0,0000:01:02.0");
|
assert_eq!(env[0], "PCIDEVICE_x=0000:01:01.0,0000:01:02.0");
|
||||||
|
@ -230,7 +230,7 @@ impl AgentService {
|
|||||||
// updates the devices listed in the OCI spec, so that they actually
|
// updates the devices listed in the OCI spec, so that they actually
|
||||||
// match real devices inside the VM. This step is necessary since we
|
// match real devices inside the VM. This step is necessary since we
|
||||||
// cannot predict everything from the caller.
|
// cannot predict everything from the caller.
|
||||||
add_devices(&sl(), &req.devices, &mut oci, &self.sandbox).await?;
|
add_devices(&cid, &sl(), &req.devices, &mut oci, &self.sandbox).await?;
|
||||||
|
|
||||||
// In guest-kernel mode some devices need extra handling. Taking the
|
// In guest-kernel mode some devices need extra handling. Taking the
|
||||||
// GPU as an example the shim will inject CDI annotations that will
|
// GPU as an example the shim will inject CDI annotations that will
|
||||||
@ -377,6 +377,9 @@ impl AgentService {
|
|||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let cid = req.container_id;
|
let cid = req.container_id;
|
||||||
|
|
||||||
|
// Drop the host guest mapping for this container so we can reuse the
|
||||||
|
// PCI slots for the next containers
|
||||||
|
|
||||||
if req.timeout == 0 {
|
if req.timeout == 0 {
|
||||||
let mut sandbox = self.sandbox.lock().await;
|
let mut sandbox = self.sandbox.lock().await;
|
||||||
sandbox.bind_watcher.remove_container(&cid).await;
|
sandbox.bind_watcher.remove_container(&cid).await;
|
||||||
@ -432,7 +435,7 @@ impl AgentService {
|
|||||||
.ok_or_else(|| anyhow!("Unable to parse process from ExecProcessRequest"))?;
|
.ok_or_else(|| anyhow!("Unable to parse process from ExecProcessRequest"))?;
|
||||||
|
|
||||||
// Apply any necessary corrections for PCI addresses
|
// Apply any necessary corrections for PCI addresses
|
||||||
update_env_pci(&mut process.Env, &sandbox.pcimap)?;
|
update_env_pci(&cid, &mut process.Env, &sandbox.pcimap)?;
|
||||||
|
|
||||||
let pipe_size = AGENT_CONFIG.container_pipe_size;
|
let pipe_size = AGENT_CONFIG.container_pipe_size;
|
||||||
let ocip = process.into();
|
let ocip = process.into();
|
||||||
@ -1878,6 +1881,8 @@ async fn remove_container_resources(sandbox: &mut Sandbox, cid: &str) -> Result<
|
|||||||
|
|
||||||
sandbox.container_mounts.remove(cid);
|
sandbox.container_mounts.remove(cid);
|
||||||
sandbox.containers.remove(cid);
|
sandbox.containers.remove(cid);
|
||||||
|
// Remove any host -> guest mappings for this container
|
||||||
|
sandbox.pcimap.remove(cid);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -95,6 +95,8 @@ impl StorageState {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub type PciHostGuestMapping = HashMap<pci::Address, pci::Address>;
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Sandbox {
|
pub struct Sandbox {
|
||||||
pub logger: Logger,
|
pub logger: Logger,
|
||||||
@ -118,7 +120,7 @@ pub struct Sandbox {
|
|||||||
pub event_rx: Arc<Mutex<Receiver<String>>>,
|
pub event_rx: Arc<Mutex<Receiver<String>>>,
|
||||||
pub event_tx: Option<Sender<String>>,
|
pub event_tx: Option<Sender<String>>,
|
||||||
pub bind_watcher: BindWatcher,
|
pub bind_watcher: BindWatcher,
|
||||||
pub pcimap: HashMap<pci::Address, pci::Address>,
|
pub pcimap: HashMap<String, PciHostGuestMapping>,
|
||||||
pub devcg_info: Arc<RwLock<DevicesCgroupInfo>>,
|
pub devcg_info: Arc<RwLock<DevicesCgroupInfo>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user