mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-07-11 06:13:43 +00:00
Merge pull request #4582 from BbolroC/vfio-ap
agent: Bring in VFIO-AP device handling again
This commit is contained in:
commit
2fe0733dcb
1
src/agent/Cargo.lock
generated
1
src/agent/Cargo.lock
generated
@ -801,6 +801,7 @@ dependencies = [
|
|||||||
"async-recursion",
|
"async-recursion",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
"capctl",
|
"capctl",
|
||||||
|
"cfg-if 1.0.0",
|
||||||
"cgroups-rs",
|
"cgroups-rs",
|
||||||
"clap",
|
"clap",
|
||||||
"futures",
|
"futures",
|
||||||
|
@ -48,6 +48,7 @@ slog-scope = "4.1.2"
|
|||||||
slog-stdlog = "4.0.0"
|
slog-stdlog = "4.0.0"
|
||||||
log = "0.4.11"
|
log = "0.4.11"
|
||||||
|
|
||||||
|
cfg-if = "1.0.0"
|
||||||
prometheus = { version = "0.13.0", features = ["process"] }
|
prometheus = { version = "0.13.0", features = ["process"] }
|
||||||
procfs = "0.12.0"
|
procfs = "0.12.0"
|
||||||
anyhow = "1.0.32"
|
anyhow = "1.0.32"
|
||||||
|
79
src/agent/src/ap.rs
Normal file
79
src/agent/src/ap.rs
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
// Copyright (c) IBM Corp. 2023
|
||||||
|
//
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
use std::fmt;
|
||||||
|
use std::str::FromStr;
|
||||||
|
|
||||||
|
use anyhow::{anyhow, Context};
|
||||||
|
|
||||||
|
// IBM Adjunct Processor (AP) is used for cryptographic operations
|
||||||
|
// by IBM Crypto Express hardware security modules on IBM zSystem & LinuxONE (s390x).
|
||||||
|
// In Linux, virtual cryptographic devices are called AP queues.
|
||||||
|
// The name of an AP queue respects a format <xx>.<xxxx> in hexadecimal notation [1, p.467]:
|
||||||
|
// - <xx> is an adapter ID
|
||||||
|
// - <xxxx> is an adapter domain ID
|
||||||
|
// [1] https://www.ibm.com/docs/en/linuxonibm/pdf/lku5dd05.pdf
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct Address {
|
||||||
|
pub adapter_id: u8,
|
||||||
|
pub adapter_domain: u16,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Address {
|
||||||
|
pub fn new(adapter_id: u8, adapter_domain: u16) -> Address {
|
||||||
|
Address {
|
||||||
|
adapter_id,
|
||||||
|
adapter_domain,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FromStr for Address {
|
||||||
|
type Err = anyhow::Error;
|
||||||
|
|
||||||
|
fn from_str(s: &str) -> anyhow::Result<Self> {
|
||||||
|
let split: Vec<&str> = s.split('.').collect();
|
||||||
|
if split.len() != 2 {
|
||||||
|
return Err(anyhow!(
|
||||||
|
"Wrong AP bus format. It needs to be in the form <xx>.<xxxx> (e.g. 0a.003f), got {:?}",
|
||||||
|
s
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let adapter_id = u8::from_str_radix(split[0], 16).context(format!(
|
||||||
|
"Wrong AP bus format. AP ID needs to be in the form <xx> (e.g. 0a), got {:?}",
|
||||||
|
split[0]
|
||||||
|
))?;
|
||||||
|
let adapter_domain = u16::from_str_radix(split[1], 16).context(format!(
|
||||||
|
"Wrong AP bus format. AP domain needs to be in the form <xxxx> (e.g. 003f), got {:?}",
|
||||||
|
split[1]
|
||||||
|
))?;
|
||||||
|
|
||||||
|
Ok(Address::new(adapter_id, adapter_domain))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for Address {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
||||||
|
write!(f, "{:02x}.{:04x}", self.adapter_id, self.adapter_domain)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_from_str() {
|
||||||
|
let device = Address::from_str("a.1").unwrap();
|
||||||
|
assert_eq!(format!("{}", device), "0a.0001");
|
||||||
|
|
||||||
|
assert!(Address::from_str("").is_err());
|
||||||
|
assert!(Address::from_str(".").is_err());
|
||||||
|
assert!(Address::from_str("0.0.0").is_err());
|
||||||
|
assert!(Address::from_str("0g.0000").is_err());
|
||||||
|
assert!(Address::from_str("0a.10000").is_err());
|
||||||
|
}
|
||||||
|
}
|
@ -16,13 +16,12 @@ use std::str::FromStr;
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use tokio::sync::Mutex;
|
use tokio::sync::Mutex;
|
||||||
|
|
||||||
#[cfg(target_arch = "s390x")]
|
|
||||||
use crate::ccw;
|
|
||||||
use crate::linux_abi::*;
|
use crate::linux_abi::*;
|
||||||
use crate::pci;
|
use crate::pci;
|
||||||
use crate::sandbox::Sandbox;
|
use crate::sandbox::Sandbox;
|
||||||
use crate::uevent::{wait_for_uevent, Uevent, UeventMatcher};
|
use crate::uevent::{wait_for_uevent, Uevent, UeventMatcher};
|
||||||
use anyhow::{anyhow, Context, Result};
|
use anyhow::{anyhow, Context, Result};
|
||||||
|
use cfg_if::cfg_if;
|
||||||
use oci::{LinuxDeviceCgroup, LinuxResources, Spec};
|
use oci::{LinuxDeviceCgroup, LinuxResources, Spec};
|
||||||
use protocols::agent::Device;
|
use protocols::agent::Device;
|
||||||
use tracing::instrument;
|
use tracing::instrument;
|
||||||
@ -46,14 +45,22 @@ pub const DRIVER_NVDIMM_TYPE: &str = "nvdimm";
|
|||||||
pub const DRIVER_EPHEMERAL_TYPE: &str = "ephemeral";
|
pub const DRIVER_EPHEMERAL_TYPE: &str = "ephemeral";
|
||||||
pub const DRIVER_LOCAL_TYPE: &str = "local";
|
pub const DRIVER_LOCAL_TYPE: &str = "local";
|
||||||
pub const DRIVER_WATCHABLE_BIND_TYPE: &str = "watchable-bind";
|
pub const DRIVER_WATCHABLE_BIND_TYPE: &str = "watchable-bind";
|
||||||
// VFIO device to be bound to a guest kernel driver
|
// VFIO PCI device to be bound to a guest kernel driver
|
||||||
pub const DRIVER_VFIO_GK_TYPE: &str = "vfio-gk";
|
pub const DRIVER_VFIO_PCI_GK_TYPE: &str = "vfio-pci-gk";
|
||||||
// VFIO device to be bound to vfio-pci and made available inside the
|
// VFIO PCI device to be bound to vfio-pci and made available inside the
|
||||||
// container as a VFIO device node
|
// container as a VFIO device node
|
||||||
pub const DRIVER_VFIO_TYPE: &str = "vfio";
|
pub const DRIVER_VFIO_PCI_TYPE: &str = "vfio-pci";
|
||||||
|
pub const DRIVER_VFIO_AP_TYPE: &str = "vfio-ap";
|
||||||
pub const DRIVER_OVERLAYFS_TYPE: &str = "overlayfs";
|
pub const DRIVER_OVERLAYFS_TYPE: &str = "overlayfs";
|
||||||
pub const FS_TYPE_HUGETLB: &str = "hugetlbfs";
|
pub const FS_TYPE_HUGETLB: &str = "hugetlbfs";
|
||||||
|
|
||||||
|
cfg_if! {
|
||||||
|
if #[cfg(target_arch = "s390x")] {
|
||||||
|
use crate::ap;
|
||||||
|
use crate::ccw;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[instrument]
|
#[instrument]
|
||||||
pub fn online_device(path: &str) -> Result<()> {
|
pub fn online_device(path: &str) -> Result<()> {
|
||||||
fs::write(path, "1")?;
|
fs::write(path, "1")?;
|
||||||
@ -280,7 +287,7 @@ pub async fn get_virtio_blk_ccw_device_name(
|
|||||||
sandbox: &Arc<Mutex<Sandbox>>,
|
sandbox: &Arc<Mutex<Sandbox>>,
|
||||||
device: &ccw::Device,
|
device: &ccw::Device,
|
||||||
) -> Result<String> {
|
) -> Result<String> {
|
||||||
let matcher = VirtioBlkCCWMatcher::new(&create_ccw_root_bus_path(), device);
|
let matcher = VirtioBlkCCWMatcher::new(CCW_ROOT_BUS_PATH, device);
|
||||||
let uev = wait_for_uevent(sandbox, matcher).await?;
|
let uev = wait_for_uevent(sandbox, matcher).await?;
|
||||||
let devname = uev.devname;
|
let devname = uev.devname;
|
||||||
return match Path::new(SYSTEM_DEV_PATH).join(&devname).to_str() {
|
return match Path::new(SYSTEM_DEV_PATH).join(&devname).to_str() {
|
||||||
@ -401,6 +408,39 @@ async fn get_vfio_device_name(sandbox: &Arc<Mutex<Sandbox>>, grp: IommuGroup) ->
|
|||||||
Ok(format!("{}/{}", SYSTEM_DEV_PATH, &uev.devname))
|
Ok(format!("{}/{}", SYSTEM_DEV_PATH, &uev.devname))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(target_arch = "s390x")]
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct ApMatcher {
|
||||||
|
syspath: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(target_arch = "s390x")]
|
||||||
|
impl ApMatcher {
|
||||||
|
fn new(address: ap::Address) -> ApMatcher {
|
||||||
|
ApMatcher {
|
||||||
|
syspath: format!(
|
||||||
|
"{}/card{:02x}/{}",
|
||||||
|
AP_ROOT_BUS_PATH, address.adapter_id, address
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(target_arch = "s390x")]
|
||||||
|
impl UeventMatcher for ApMatcher {
|
||||||
|
fn is_match(&self, uev: &Uevent) -> bool {
|
||||||
|
uev.action == "add" && uev.devpath == self.syspath
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(target_arch = "s390x")]
|
||||||
|
#[instrument]
|
||||||
|
async fn wait_for_ap_device(sandbox: &Arc<Mutex<Sandbox>>, address: ap::Address) -> Result<()> {
|
||||||
|
let matcher = ApMatcher::new(address);
|
||||||
|
wait_for_uevent(sandbox, matcher).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
/// Scan SCSI bus for the given SCSI address(SCSI-Id and LUN)
|
/// Scan SCSI bus for the given SCSI address(SCSI-Id and LUN)
|
||||||
#[instrument]
|
#[instrument]
|
||||||
fn scan_scsi_bus(scsi_addr: &str) -> Result<()> {
|
fn scan_scsi_bus(scsi_addr: &str) -> Result<()> {
|
||||||
@ -699,7 +739,7 @@ async fn virtio_nvdimm_device_handler(
|
|||||||
Ok(DevNumUpdate::from_vm_path(&device.vm_path)?.into())
|
Ok(DevNumUpdate::from_vm_path(&device.vm_path)?.into())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn split_vfio_option(opt: &str) -> Option<(&str, &str)> {
|
fn split_vfio_pci_option(opt: &str) -> Option<(&str, &str)> {
|
||||||
let mut tokens = opt.split('=');
|
let mut tokens = opt.split('=');
|
||||||
let hostbdf = tokens.next()?;
|
let hostbdf = tokens.next()?;
|
||||||
let path = tokens.next()?;
|
let path = tokens.next()?;
|
||||||
@ -714,14 +754,18 @@ fn split_vfio_option(opt: &str) -> Option<(&str, &str)> {
|
|||||||
// Each option should have the form "DDDD:BB:DD.F=<pcipath>"
|
// Each option should have the form "DDDD:BB:DD.F=<pcipath>"
|
||||||
// DDDD:BB:DD.F is the device's PCI address in the host
|
// DDDD:BB:DD.F is the device's PCI address in the host
|
||||||
// <pcipath> is a PCI path to the device in the guest (see pci.rs)
|
// <pcipath> is a PCI path to the device in the guest (see pci.rs)
|
||||||
async fn vfio_device_handler(device: &Device, sandbox: &Arc<Mutex<Sandbox>>) -> Result<SpecUpdate> {
|
#[instrument]
|
||||||
let vfio_in_guest = device.field_type != DRIVER_VFIO_GK_TYPE;
|
async fn vfio_pci_device_handler(
|
||||||
|
device: &Device,
|
||||||
|
sandbox: &Arc<Mutex<Sandbox>>,
|
||||||
|
) -> Result<SpecUpdate> {
|
||||||
|
let vfio_in_guest = device.field_type != DRIVER_VFIO_PCI_GK_TYPE;
|
||||||
let mut pci_fixups = Vec::<(pci::Address, pci::Address)>::new();
|
let mut pci_fixups = Vec::<(pci::Address, pci::Address)>::new();
|
||||||
let mut group = None;
|
let mut group = None;
|
||||||
|
|
||||||
for opt in device.options.iter() {
|
for opt in device.options.iter() {
|
||||||
let (host, pcipath) =
|
let (host, pcipath) = split_vfio_pci_option(opt)
|
||||||
split_vfio_option(opt).ok_or_else(|| anyhow!("Malformed VFIO option {:?}", opt))?;
|
.ok_or_else(|| anyhow!("Malformed VFIO PCI option {:?}", opt))?;
|
||||||
let host =
|
let host =
|
||||||
pci::Address::from_str(host).context("Bad host PCI address in VFIO option {:?}")?;
|
pci::Address::from_str(host).context("Bad host PCI address in VFIO option {:?}")?;
|
||||||
let pcipath = pci::Path::from_str(pcipath)?;
|
let pcipath = pci::Path::from_str(pcipath)?;
|
||||||
@ -763,6 +807,28 @@ async fn vfio_device_handler(device: &Device, sandbox: &Arc<Mutex<Sandbox>>) ->
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The VFIO AP (Adjunct Processor) device handler takes all the APQNs provided as device options
|
||||||
|
// and awaits them. It sets the minimum AP rescan time of 5 seconds and temporarily adds that
|
||||||
|
// amount to the hotplug timeout.
|
||||||
|
#[cfg(target_arch = "s390x")]
|
||||||
|
#[instrument]
|
||||||
|
async fn vfio_ap_device_handler(
|
||||||
|
device: &Device,
|
||||||
|
sandbox: &Arc<Mutex<Sandbox>>,
|
||||||
|
) -> Result<SpecUpdate> {
|
||||||
|
// Force AP bus rescan
|
||||||
|
fs::write(AP_SCANS_PATH, "1")?;
|
||||||
|
for apqn in device.options.iter() {
|
||||||
|
wait_for_ap_device(sandbox, ap::Address::from_str(apqn)?).await?;
|
||||||
|
}
|
||||||
|
Ok(Default::default())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(not(target_arch = "s390x"))]
|
||||||
|
async fn vfio_ap_device_handler(_: &Device, _: &Arc<Mutex<Sandbox>>) -> Result<SpecUpdate> {
|
||||||
|
Err(anyhow!("AP is only supported on s390x"))
|
||||||
|
}
|
||||||
|
|
||||||
#[instrument]
|
#[instrument]
|
||||||
pub async fn add_devices(
|
pub async fn add_devices(
|
||||||
devices: &[Device],
|
devices: &[Device],
|
||||||
@ -828,7 +894,10 @@ async fn add_device(device: &Device, sandbox: &Arc<Mutex<Sandbox>>) -> Result<Sp
|
|||||||
DRIVER_MMIO_BLK_TYPE => virtiommio_blk_device_handler(device, sandbox).await,
|
DRIVER_MMIO_BLK_TYPE => virtiommio_blk_device_handler(device, sandbox).await,
|
||||||
DRIVER_NVDIMM_TYPE => virtio_nvdimm_device_handler(device, sandbox).await,
|
DRIVER_NVDIMM_TYPE => virtio_nvdimm_device_handler(device, sandbox).await,
|
||||||
DRIVER_SCSI_TYPE => virtio_scsi_device_handler(device, sandbox).await,
|
DRIVER_SCSI_TYPE => virtio_scsi_device_handler(device, sandbox).await,
|
||||||
DRIVER_VFIO_GK_TYPE | DRIVER_VFIO_TYPE => vfio_device_handler(device, sandbox).await,
|
DRIVER_VFIO_PCI_GK_TYPE | DRIVER_VFIO_PCI_TYPE => {
|
||||||
|
vfio_pci_device_handler(device, sandbox).await
|
||||||
|
}
|
||||||
|
DRIVER_VFIO_AP_TYPE => vfio_ap_device_handler(device, sandbox).await,
|
||||||
_ => Err(anyhow!("Unknown device type {}", device.field_type)),
|
_ => Err(anyhow!("Unknown device type {}", device.field_type)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1378,7 +1447,7 @@ mod tests {
|
|||||||
#[cfg(target_arch = "s390x")]
|
#[cfg(target_arch = "s390x")]
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_virtio_blk_ccw_matcher() {
|
async fn test_virtio_blk_ccw_matcher() {
|
||||||
let root_bus = create_ccw_root_bus_path();
|
let root_bus = CCW_ROOT_BUS_PATH;
|
||||||
let subsystem = "block";
|
let subsystem = "block";
|
||||||
let devname = "vda";
|
let devname = "vda";
|
||||||
let relpath = "0.0.0002";
|
let relpath = "0.0.0002";
|
||||||
@ -1487,13 +1556,13 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_split_vfio_option() {
|
fn test_split_vfio_pci_option() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
split_vfio_option("0000:01:00.0=02/01"),
|
split_vfio_pci_option("0000:01:00.0=02/01"),
|
||||||
Some(("0000:01:00.0", "02/01"))
|
Some(("0000:01:00.0", "02/01"))
|
||||||
);
|
);
|
||||||
assert_eq!(split_vfio_option("0000:01:00.0=02/01=rubbish"), None);
|
assert_eq!(split_vfio_pci_option("0000:01:00.0=02/01=rubbish"), None);
|
||||||
assert_eq!(split_vfio_option("0000:01:00.0"), None);
|
assert_eq!(split_vfio_pci_option("0000:01:00.0"), None);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -1572,4 +1641,35 @@ mod tests {
|
|||||||
// Test dev2
|
// Test dev2
|
||||||
assert!(pci_iommu_group(&syspci, dev2).is_err());
|
assert!(pci_iommu_group(&syspci, dev2).is_err());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(target_arch = "s390x")]
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_vfio_ap_matcher() {
|
||||||
|
let subsystem = "ap";
|
||||||
|
let card = "0a";
|
||||||
|
let relpath = format!("{}.0001", card);
|
||||||
|
|
||||||
|
let mut uev = Uevent::default();
|
||||||
|
uev.action = U_EVENT_ACTION_ADD.to_string();
|
||||||
|
uev.subsystem = subsystem.to_string();
|
||||||
|
uev.devpath = format!("{}/card{}/{}", AP_ROOT_BUS_PATH, card, relpath);
|
||||||
|
|
||||||
|
let ap_address = ap::Address::from_str(&relpath).unwrap();
|
||||||
|
let matcher = ApMatcher::new(ap_address);
|
||||||
|
|
||||||
|
assert!(matcher.is_match(&uev));
|
||||||
|
|
||||||
|
let mut uev_remove = uev.clone();
|
||||||
|
uev_remove.action = U_EVENT_ACTION_REMOVE.to_string();
|
||||||
|
assert!(!matcher.is_match(&uev_remove));
|
||||||
|
|
||||||
|
let mut uev_other_device = uev.clone();
|
||||||
|
uev_other_device.devpath = format!(
|
||||||
|
"{}/card{}/{}",
|
||||||
|
AP_ROOT_BUS_PATH,
|
||||||
|
card,
|
||||||
|
format!("{}.0002", card)
|
||||||
|
);
|
||||||
|
assert!(!matcher.is_match(&uev_other_device));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -3,6 +3,8 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
//
|
//
|
||||||
|
|
||||||
|
use cfg_if::cfg_if;
|
||||||
|
|
||||||
/// Linux ABI related constants.
|
/// Linux ABI related constants.
|
||||||
|
|
||||||
#[cfg(target_arch = "aarch64")]
|
#[cfg(target_arch = "aarch64")]
|
||||||
@ -64,10 +66,14 @@ pub fn create_pci_root_bus_path() -> String {
|
|||||||
ret
|
ret
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(target_arch = "s390x")]
|
cfg_if! {
|
||||||
pub fn create_ccw_root_bus_path() -> String {
|
if #[cfg(target_arch = "s390x")] {
|
||||||
String::from("/devices/css0")
|
pub const CCW_ROOT_BUS_PATH: &str = "/devices/css0";
|
||||||
|
pub const AP_ROOT_BUS_PATH: &str = "/devices/ap";
|
||||||
|
pub const AP_SCANS_PATH: &str = "/sys/bus/ap/scans";
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// From https://www.kernel.org/doc/Documentation/acpi/namespace.txt
|
// From https://www.kernel.org/doc/Documentation/acpi/namespace.txt
|
||||||
// The Linux kernel's core ACPI subsystem creates struct acpi_device
|
// The Linux kernel's core ACPI subsystem creates struct acpi_device
|
||||||
// objects for ACPI namespace objects representing devices, power resources
|
// objects for ACPI namespace objects representing devices, power resources
|
||||||
|
@ -20,6 +20,7 @@ extern crate scopeguard;
|
|||||||
extern crate slog;
|
extern crate slog;
|
||||||
|
|
||||||
use anyhow::{anyhow, Context, Result};
|
use anyhow::{anyhow, Context, Result};
|
||||||
|
use cfg_if::cfg_if;
|
||||||
use clap::{AppSettings, Parser};
|
use clap::{AppSettings, Parser};
|
||||||
use nix::fcntl::OFlag;
|
use nix::fcntl::OFlag;
|
||||||
use nix::sys::socket::{self, AddressFamily, SockFlag, SockType, VsockAddr};
|
use nix::sys::socket::{self, AddressFamily, SockFlag, SockType, VsockAddr};
|
||||||
@ -34,8 +35,6 @@ use std::process::exit;
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use tracing::{instrument, span};
|
use tracing::{instrument, span};
|
||||||
|
|
||||||
#[cfg(target_arch = "s390x")]
|
|
||||||
mod ccw;
|
|
||||||
mod config;
|
mod config;
|
||||||
mod console;
|
mod console;
|
||||||
mod device;
|
mod device;
|
||||||
@ -74,6 +73,13 @@ use tokio::{
|
|||||||
mod rpc;
|
mod rpc;
|
||||||
mod tracer;
|
mod tracer;
|
||||||
|
|
||||||
|
cfg_if! {
|
||||||
|
if #[cfg(target_arch = "s390x")] {
|
||||||
|
mod ap;
|
||||||
|
mod ccw;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const NAME: &str = "kata-agent";
|
const NAME: &str = "kata-agent";
|
||||||
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
|
@ -258,15 +258,24 @@ const (
|
|||||||
// VFIODeviceErrorType is the error type of VFIO device
|
// VFIODeviceErrorType is the error type of VFIO device
|
||||||
VFIODeviceErrorType VFIODeviceType = iota
|
VFIODeviceErrorType VFIODeviceType = iota
|
||||||
|
|
||||||
// VFIODeviceNormalType is a normal VFIO device type
|
// VFIOPCIDeviceNormalType is a normal VFIO PCI device type
|
||||||
VFIODeviceNormalType
|
VFIOPCIDeviceNormalType
|
||||||
|
|
||||||
// VFIODeviceMediatedType is a VFIO mediated device type
|
// VFIOPCIDeviceMediatedType is a VFIO PCI mediated device type
|
||||||
VFIODeviceMediatedType
|
VFIOPCIDeviceMediatedType
|
||||||
|
|
||||||
|
// VFIOAPDeviceMediatedType is a VFIO AP mediated device type
|
||||||
|
VFIOAPDeviceMediatedType
|
||||||
)
|
)
|
||||||
|
|
||||||
// VFIODev represents a VFIO drive used for hotplugging
|
type VFIODev interface {
|
||||||
type VFIODev struct {
|
GetID() *string
|
||||||
|
GetType() VFIODeviceType
|
||||||
|
GetSysfsDev() *string
|
||||||
|
}
|
||||||
|
|
||||||
|
// VFIOPCIDev represents a VFIO PCI device used for hotplugging
|
||||||
|
type VFIOPCIDev struct {
|
||||||
// ID is used to identify this drive in the hypervisor options.
|
// ID is used to identify this drive in the hypervisor options.
|
||||||
ID string
|
ID string
|
||||||
|
|
||||||
@ -298,6 +307,44 @@ type VFIODev struct {
|
|||||||
IsPCIe bool
|
IsPCIe bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (d VFIOPCIDev) GetID() *string {
|
||||||
|
return &d.ID
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d VFIOPCIDev) GetType() VFIODeviceType {
|
||||||
|
return d.Type
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d VFIOPCIDev) GetSysfsDev() *string {
|
||||||
|
return &d.SysfsDev
|
||||||
|
}
|
||||||
|
|
||||||
|
type VFIOAPDev struct {
|
||||||
|
// ID is used to identify this drive in the hypervisor options.
|
||||||
|
ID string
|
||||||
|
|
||||||
|
// sysfsdev of VFIO mediated device
|
||||||
|
SysfsDev string
|
||||||
|
|
||||||
|
// APDevices are the Adjunct Processor devices assigned to the mdev
|
||||||
|
APDevices []string
|
||||||
|
|
||||||
|
// Type of VFIO device
|
||||||
|
Type VFIODeviceType
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d VFIOAPDev) GetID() *string {
|
||||||
|
return &d.ID
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d VFIOAPDev) GetType() VFIODeviceType {
|
||||||
|
return d.Type
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d VFIOAPDev) GetSysfsDev() *string {
|
||||||
|
return &d.SysfsDev
|
||||||
|
}
|
||||||
|
|
||||||
// RNGDev represents a random number generator device
|
// RNGDev represents a random number generator device
|
||||||
type RNGDev struct {
|
type RNGDev struct {
|
||||||
// ID is used to identify the device in the hypervisor options.
|
// ID is used to identify the device in the hypervisor options.
|
||||||
|
@ -89,18 +89,47 @@ func readPCIProperty(propertyPath string) (string, error) {
|
|||||||
return strings.Split(string(buf), "\n")[0], nil
|
return strings.Split(string(buf), "\n")[0], nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetVFIODeviceType(deviceFileName string) config.VFIODeviceType {
|
func GetVFIODeviceType(deviceFilePath string) (config.VFIODeviceType, error) {
|
||||||
|
deviceFileName := filepath.Base(deviceFilePath)
|
||||||
|
|
||||||
//For example, 0000:04:00.0
|
//For example, 0000:04:00.0
|
||||||
tokens := strings.Split(deviceFileName, ":")
|
tokens := strings.Split(deviceFileName, ":")
|
||||||
vfioDeviceType := config.VFIODeviceErrorType
|
|
||||||
if len(tokens) == 3 {
|
if len(tokens) == 3 {
|
||||||
vfioDeviceType = config.VFIODeviceNormalType
|
return config.VFIOPCIDeviceNormalType, nil
|
||||||
} else {
|
}
|
||||||
|
|
||||||
//For example, 83b8f4f2-509f-382f-3c1e-e6bfe0fa1001
|
//For example, 83b8f4f2-509f-382f-3c1e-e6bfe0fa1001
|
||||||
tokens = strings.Split(deviceFileName, "-")
|
tokens = strings.Split(deviceFileName, "-")
|
||||||
if len(tokens) == 5 {
|
if len(tokens) != 5 {
|
||||||
vfioDeviceType = config.VFIODeviceMediatedType
|
return config.VFIODeviceErrorType, fmt.Errorf("Incorrect tokens found while parsing VFIO details: %s", deviceFileName)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
deviceSysfsDev, err := GetSysfsDev(deviceFilePath)
|
||||||
|
if err != nil {
|
||||||
|
return config.VFIODeviceErrorType, err
|
||||||
}
|
}
|
||||||
return vfioDeviceType
|
|
||||||
|
if strings.HasPrefix(deviceSysfsDev, vfioAPSysfsDir) {
|
||||||
|
return config.VFIOAPDeviceMediatedType, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return config.VFIOPCIDeviceMediatedType, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetSysfsDev returns the sysfsdev of mediated device
|
||||||
|
// Expected input string format is absolute path to the sysfs dev node
|
||||||
|
// eg. /sys/kernel/iommu_groups/0/devices/f79944e4-5a3d-11e8-99ce-479cbab002e4
|
||||||
|
func GetSysfsDev(sysfsDevStr string) (string, error) {
|
||||||
|
return filepath.EvalSymlinks(sysfsDevStr)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetAPVFIODevices retrieves all APQNs associated with a mediated VFIO-AP
|
||||||
|
// device
|
||||||
|
func GetAPVFIODevices(sysfsdev string) ([]string, error) {
|
||||||
|
data, err := os.ReadFile(filepath.Join(sysfsdev, "matrix"))
|
||||||
|
if err != nil {
|
||||||
|
return []string{}, err
|
||||||
|
}
|
||||||
|
// Split by newlines, omitting final newline
|
||||||
|
return strings.Split(string(data[:len(data)-1]), "\n"), nil
|
||||||
}
|
}
|
||||||
|
@ -30,6 +30,7 @@ const (
|
|||||||
iommuGroupPath = "/sys/bus/pci/devices/%s/iommu_group"
|
iommuGroupPath = "/sys/bus/pci/devices/%s/iommu_group"
|
||||||
vfioDevPath = "/dev/vfio/%s"
|
vfioDevPath = "/dev/vfio/%s"
|
||||||
pcieRootPortPrefix = "rp"
|
pcieRootPortPrefix = "rp"
|
||||||
|
vfioAPSysfsDir = "/sys/devices/vfio_ap"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@ -85,19 +86,42 @@ func (device *VFIODevice) Attach(ctx context.Context, devReceiver api.DeviceRece
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
vfio := &config.VFIODev{
|
id := utils.MakeNameID("vfio", device.DeviceInfo.ID+strconv.Itoa(i), maxDevIDSize)
|
||||||
ID: utils.MakeNameID("vfio", device.DeviceInfo.ID+strconv.Itoa(i), maxDevIDSize),
|
|
||||||
|
var vfio config.VFIODev
|
||||||
|
|
||||||
|
switch vfioDeviceType {
|
||||||
|
case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType:
|
||||||
|
isPCIe := isPCIeDevice(deviceBDF)
|
||||||
|
// Do not directly assign to `vfio` -- need to access field still
|
||||||
|
vfioPCI := config.VFIOPCIDev{
|
||||||
|
ID: id,
|
||||||
Type: vfioDeviceType,
|
Type: vfioDeviceType,
|
||||||
BDF: deviceBDF,
|
BDF: deviceBDF,
|
||||||
SysfsDev: deviceSysfsDev,
|
SysfsDev: deviceSysfsDev,
|
||||||
IsPCIe: isPCIeDevice(deviceBDF),
|
IsPCIe: isPCIe,
|
||||||
Class: getPCIDeviceProperty(deviceBDF, PCISysFsDevicesClass),
|
Class: getPCIDeviceProperty(deviceBDF, PCISysFsDevicesClass),
|
||||||
}
|
}
|
||||||
device.VfioDevs = append(device.VfioDevs, vfio)
|
if isPCIe {
|
||||||
if vfio.IsPCIe {
|
vfioPCI.Bus = fmt.Sprintf("%s%d", pcieRootPortPrefix, len(AllPCIeDevs))
|
||||||
vfio.Bus = fmt.Sprintf("%s%d", pcieRootPortPrefix, len(AllPCIeDevs))
|
AllPCIeDevs[deviceBDF] = true
|
||||||
AllPCIeDevs[vfio.BDF] = true
|
|
||||||
}
|
}
|
||||||
|
vfio = vfioPCI
|
||||||
|
case config.VFIOAPDeviceMediatedType:
|
||||||
|
devices, err := GetAPVFIODevices(deviceSysfsDev)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
vfio = config.VFIOAPDev{
|
||||||
|
ID: id,
|
||||||
|
SysfsDev: deviceSysfsDev,
|
||||||
|
Type: config.VFIOAPDeviceMediatedType,
|
||||||
|
APDevices: devices,
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("Failed to append device: VFIO device type unrecognized")
|
||||||
|
}
|
||||||
|
device.VfioDevs = append(device.VfioDevs, &vfio)
|
||||||
}
|
}
|
||||||
|
|
||||||
coldPlug := device.DeviceInfo.ColdPlug
|
coldPlug := device.DeviceInfo.ColdPlug
|
||||||
@ -192,31 +216,60 @@ func (device *VFIODevice) Load(ds config.DeviceState) {
|
|||||||
device.GenericDevice.Load(ds)
|
device.GenericDevice.Load(ds)
|
||||||
|
|
||||||
for _, dev := range ds.VFIODevs {
|
for _, dev := range ds.VFIODevs {
|
||||||
device.VfioDevs = append(device.VfioDevs, &config.VFIODev{
|
var vfio config.VFIODev
|
||||||
ID: dev.ID,
|
|
||||||
Type: config.VFIODeviceType(dev.Type),
|
vfioDeviceType := (*device.VfioDevs[0]).GetType()
|
||||||
BDF: dev.BDF,
|
switch vfioDeviceType {
|
||||||
SysfsDev: dev.SysfsDev,
|
case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType:
|
||||||
})
|
bdf := ""
|
||||||
|
if pciDev, ok := (*dev).(config.VFIOPCIDev); ok {
|
||||||
|
bdf = pciDev.BDF
|
||||||
|
}
|
||||||
|
vfio = config.VFIOPCIDev{
|
||||||
|
ID: *(*dev).GetID(),
|
||||||
|
Type: config.VFIODeviceType((*dev).GetType()),
|
||||||
|
BDF: bdf,
|
||||||
|
SysfsDev: *(*dev).GetSysfsDev(),
|
||||||
|
}
|
||||||
|
case config.VFIOAPDeviceMediatedType:
|
||||||
|
vfio = config.VFIOAPDev{
|
||||||
|
ID: *(*dev).GetID(),
|
||||||
|
SysfsDev: *(*dev).GetSysfsDev(),
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
deviceLogger().WithError(
|
||||||
|
fmt.Errorf("VFIO device type unrecognized"),
|
||||||
|
).Error("Failed to append device")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
device.VfioDevs = append(device.VfioDevs, &vfio)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// It should implement GetAttachCount() and DeviceID() as api.Device implementation
|
// It should implement GetAttachCount() and DeviceID() as api.Device implementation
|
||||||
// here it shares function from *GenericDevice so we don't need duplicate codes
|
// here it shares function from *GenericDevice so we don't need duplicate codes
|
||||||
func getVFIODetails(deviceFileName, iommuDevicesPath string) (deviceBDF, deviceSysfsDev string, vfioDeviceType config.VFIODeviceType, err error) {
|
func getVFIODetails(deviceFileName, iommuDevicesPath string) (deviceBDF, deviceSysfsDev string, vfioDeviceType config.VFIODeviceType, err error) {
|
||||||
vfioDeviceType = GetVFIODeviceType(deviceFileName)
|
sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName)
|
||||||
|
vfioDeviceType, err = GetVFIODeviceType(sysfsDevStr)
|
||||||
|
if err != nil {
|
||||||
|
return deviceBDF, deviceSysfsDev, vfioDeviceType, err
|
||||||
|
}
|
||||||
|
|
||||||
switch vfioDeviceType {
|
switch vfioDeviceType {
|
||||||
case config.VFIODeviceNormalType:
|
case config.VFIOPCIDeviceNormalType:
|
||||||
// Get bdf of device eg. 0000:00:1c.0
|
// Get bdf of device eg. 0000:00:1c.0
|
||||||
deviceBDF = getBDF(deviceFileName)
|
deviceBDF = getBDF(deviceFileName)
|
||||||
// Get sysfs path used by cloud-hypervisor
|
// Get sysfs path used by cloud-hypervisor
|
||||||
deviceSysfsDev = filepath.Join(config.SysBusPciDevicesPath, deviceFileName)
|
deviceSysfsDev = filepath.Join(config.SysBusPciDevicesPath, deviceFileName)
|
||||||
case config.VFIODeviceMediatedType:
|
case config.VFIOPCIDeviceMediatedType:
|
||||||
// Get sysfsdev of device eg. /sys/devices/pci0000:00/0000:00:02.0/f79944e4-5a3d-11e8-99ce-479cbab002e4
|
// Get sysfsdev of device eg. /sys/devices/pci0000:00/0000:00:02.0/f79944e4-5a3d-11e8-99ce-479cbab002e4
|
||||||
sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName)
|
sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName)
|
||||||
deviceSysfsDev, err = getSysfsDev(sysfsDevStr)
|
deviceSysfsDev, err = GetSysfsDev(sysfsDevStr)
|
||||||
deviceBDF = getBDF(getMediatedBDF(deviceSysfsDev))
|
deviceBDF = getBDF(getMediatedBDF(deviceSysfsDev))
|
||||||
|
case config.VFIOAPDeviceMediatedType:
|
||||||
|
sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName)
|
||||||
|
deviceSysfsDev, err = GetSysfsDev(sysfsDevStr)
|
||||||
default:
|
default:
|
||||||
err = fmt.Errorf("Incorrect tokens found while parsing vfio details: %s", deviceFileName)
|
err = fmt.Errorf("Incorrect tokens found while parsing vfio details: %s", deviceFileName)
|
||||||
}
|
}
|
||||||
@ -244,13 +297,6 @@ func getBDF(deviceSysStr string) string {
|
|||||||
return tokens[1]
|
return tokens[1]
|
||||||
}
|
}
|
||||||
|
|
||||||
// getSysfsDev returns the sysfsdev of mediated device
|
|
||||||
// Expected input string format is absolute path to the sysfs dev node
|
|
||||||
// eg. /sys/kernel/iommu_groups/0/devices/f79944e4-5a3d-11e8-99ce-479cbab002e4
|
|
||||||
func getSysfsDev(sysfsDevStr string) (string, error) {
|
|
||||||
return filepath.EvalSymlinks(sysfsDevStr)
|
|
||||||
}
|
|
||||||
|
|
||||||
// BindDevicetoVFIO binds the device to vfio driver after unbinding from host.
|
// BindDevicetoVFIO binds the device to vfio driver after unbinding from host.
|
||||||
// Will be called by a network interface or a generic pcie device.
|
// Will be called by a network interface or a generic pcie device.
|
||||||
func BindDevicetoVFIO(bdf, hostDriver, vendorDeviceID string) (string, error) {
|
func BindDevicetoVFIO(bdf, hostDriver, vendorDeviceID string) (string, error) {
|
||||||
|
@ -32,9 +32,9 @@ func TestGetVFIODetails(t *testing.T) {
|
|||||||
deviceBDF, deviceSysfsDev, vfioDeviceType, err := getVFIODetails(d.deviceStr, "")
|
deviceBDF, deviceSysfsDev, vfioDeviceType, err := getVFIODetails(d.deviceStr, "")
|
||||||
|
|
||||||
switch vfioDeviceType {
|
switch vfioDeviceType {
|
||||||
case config.VFIODeviceNormalType:
|
case config.VFIOPCIDeviceNormalType:
|
||||||
assert.Equal(t, d.expectedStr, deviceBDF)
|
assert.Equal(t, d.expectedStr, deviceBDF)
|
||||||
case config.VFIODeviceMediatedType:
|
case config.VFIOPCIDeviceMediatedType, config.VFIOAPDeviceMediatedType:
|
||||||
assert.Equal(t, d.expectedStr, deviceSysfsDev)
|
assert.Equal(t, d.expectedStr, deviceSysfsDev)
|
||||||
default:
|
default:
|
||||||
assert.NotNil(t, err)
|
assert.NotNil(t, err)
|
||||||
|
@ -857,12 +857,12 @@ func (clh *cloudHypervisor) hotPlugVFIODevice(device *config.VFIODev) error {
|
|||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
// Create the clh device config via the constructor to ensure default values are properly assigned
|
// Create the clh device config via the constructor to ensure default values are properly assigned
|
||||||
clhDevice := *chclient.NewDeviceConfig(device.SysfsDev)
|
clhDevice := *chclient.NewDeviceConfig(*(*device).GetSysfsDev())
|
||||||
pciInfo, _, err := cl.VmAddDevicePut(ctx, clhDevice)
|
pciInfo, _, err := cl.VmAddDevicePut(ctx, clhDevice)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("Failed to hotplug device %+v %s", device, openAPIClientError(err))
|
return fmt.Errorf("Failed to hotplug device %+v %s", device, openAPIClientError(err))
|
||||||
}
|
}
|
||||||
clh.devicesIds[device.ID] = pciInfo.GetId()
|
clh.devicesIds[*(*device).GetID()] = pciInfo.GetId()
|
||||||
|
|
||||||
// clh doesn't use bridges, so the PCI path is simply the slot
|
// clh doesn't use bridges, so the PCI path is simply the slot
|
||||||
// number of the device. This will break if clh starts using
|
// number of the device. This will break if clh starts using
|
||||||
@ -879,7 +879,14 @@ func (clh *cloudHypervisor) hotPlugVFIODevice(device *config.VFIODev) error {
|
|||||||
return fmt.Errorf("Unexpected PCI address %q from clh hotplug", pciInfo.Bdf)
|
return fmt.Errorf("Unexpected PCI address %q from clh hotplug", pciInfo.Bdf)
|
||||||
}
|
}
|
||||||
|
|
||||||
device.GuestPciPath, err = types.PciPathFromString(tokens[0])
|
guestPciPath, err := types.PciPathFromString(tokens[0])
|
||||||
|
|
||||||
|
pciDevice, ok := (*device).(config.VFIOPCIDev)
|
||||||
|
if !ok {
|
||||||
|
return fmt.Errorf("VFIO device %+v is not PCI, only PCI is supported in Cloud Hypervisor", device)
|
||||||
|
}
|
||||||
|
pciDevice.GuestPciPath = guestPciPath
|
||||||
|
*device = pciDevice
|
||||||
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -923,7 +930,7 @@ func (clh *cloudHypervisor) HotplugRemoveDevice(ctx context.Context, devInfo int
|
|||||||
case BlockDev:
|
case BlockDev:
|
||||||
deviceID = clhDriveIndexToID(devInfo.(*config.BlockDrive).Index)
|
deviceID = clhDriveIndexToID(devInfo.(*config.BlockDrive).Index)
|
||||||
case VfioDev:
|
case VfioDev:
|
||||||
deviceID = devInfo.(*config.VFIODev).ID
|
deviceID = *devInfo.(config.VFIODev).GetID()
|
||||||
default:
|
default:
|
||||||
clh.Logger().WithFields(log.Fields{"devInfo": devInfo,
|
clh.Logger().WithFields(log.Fields{"devInfo": devInfo,
|
||||||
"deviceType": devType}).Error("HotplugRemoveDevice: unsupported device")
|
"deviceType": devType}).Error("HotplugRemoveDevice: unsupported device")
|
||||||
|
@ -624,7 +624,7 @@ func TestCloudHypervisorHotplugRemoveDevice(t *testing.T) {
|
|||||||
_, err = clh.HotplugRemoveDevice(context.Background(), &config.BlockDrive{}, BlockDev)
|
_, err = clh.HotplugRemoveDevice(context.Background(), &config.BlockDrive{}, BlockDev)
|
||||||
assert.NoError(err, "Hotplug remove block device expected no error")
|
assert.NoError(err, "Hotplug remove block device expected no error")
|
||||||
|
|
||||||
_, err = clh.HotplugRemoveDevice(context.Background(), &config.VFIODev{}, VfioDev)
|
_, err = clh.HotplugRemoveDevice(context.Background(), &config.VFIOPCIDev{}, VfioDev)
|
||||||
assert.NoError(err, "Hotplug remove vfio block device expected no error")
|
assert.NoError(err, "Hotplug remove vfio block device expected no error")
|
||||||
|
|
||||||
_, err = clh.HotplugRemoveDevice(context.Background(), nil, NetDev)
|
_, err = clh.HotplugRemoveDevice(context.Background(), nil, NetDev)
|
||||||
|
@ -97,8 +97,9 @@ var (
|
|||||||
kataVirtioFSDevType = "virtio-fs"
|
kataVirtioFSDevType = "virtio-fs"
|
||||||
kataOverlayDevType = "overlayfs"
|
kataOverlayDevType = "overlayfs"
|
||||||
kataWatchableBindDevType = "watchable-bind"
|
kataWatchableBindDevType = "watchable-bind"
|
||||||
kataVfioDevType = "vfio" // VFIO device to used as VFIO in the container
|
kataVfioPciDevType = "vfio-pci" // VFIO PCI device to used as VFIO in the container
|
||||||
kataVfioGuestKernelDevType = "vfio-gk" // VFIO device for consumption by the guest kernel
|
kataVfioPciGuestKernelDevType = "vfio-pci-gk" // VFIO PCI device for consumption by the guest kernel
|
||||||
|
kataVfioApDevType = "vfio-ap"
|
||||||
sharedDir9pOptions = []string{"trans=virtio,version=9p2000.L,cache=mmap", "nodev"}
|
sharedDir9pOptions = []string{"trans=virtio,version=9p2000.L,cache=mmap", "nodev"}
|
||||||
sharedDirVirtioFSOptions = []string{}
|
sharedDirVirtioFSOptions = []string{}
|
||||||
sharedDirVirtioFSDaxOptions = "dax"
|
sharedDirVirtioFSDaxOptions = "dax"
|
||||||
@ -1117,20 +1118,25 @@ func (k *kataAgent) appendVfioDevice(dev ContainerDevice, device api.Device, c *
|
|||||||
|
|
||||||
groupNum := filepath.Base(dev.ContainerPath)
|
groupNum := filepath.Base(dev.ContainerPath)
|
||||||
|
|
||||||
// Each /dev/vfio/NN device represents a VFIO group, which
|
// For VFIO-PCI, each /dev/vfio/NN device represents a VFIO group,
|
||||||
// could include several PCI devices. So we give group
|
// which could include several PCI devices. So we give group
|
||||||
// information in the main structure, then list each
|
// information in the main structure, then list each individual PCI
|
||||||
// individual PCI device in the Options array.
|
// device in the Options array.
|
||||||
//
|
//
|
||||||
// Each option is formatted as "DDDD:BB:DD.F=<pcipath>"
|
// Each option is formatted as "DDDD:BB:DD.F=<pcipath>"
|
||||||
// DDDD:BB:DD.F is the device's PCI address on the
|
// DDDD:BB:DD.F is the device's PCI address on the
|
||||||
// *host*. <pcipath> is the device's PCI path in the guest
|
// *host*. <pcipath> is the device's PCI path in the guest
|
||||||
// (see qomGetPciPath() for details).
|
// (see qomGetPciPath() for details).
|
||||||
|
//
|
||||||
|
// For VFIO-AP, one VFIO group could include several queue devices. They are
|
||||||
|
// identified by APQNs (Adjunct Processor Queue Numbers), which do not differ
|
||||||
|
// between host and guest. They are passed as options so they can be awaited
|
||||||
|
// by the agent.
|
||||||
kataDevice := &grpc.Device{
|
kataDevice := &grpc.Device{
|
||||||
ContainerPath: dev.ContainerPath,
|
ContainerPath: dev.ContainerPath,
|
||||||
Type: kataVfioDevType,
|
Type: kataVfioPciDevType,
|
||||||
Id: groupNum,
|
Id: groupNum,
|
||||||
Options: make([]string, len(devList)),
|
Options: nil,
|
||||||
}
|
}
|
||||||
|
|
||||||
// We always pass the device information to the agent, since
|
// We always pass the device information to the agent, since
|
||||||
@ -1138,11 +1144,18 @@ func (k *kataAgent) appendVfioDevice(dev ContainerDevice, device api.Device, c *
|
|||||||
// on the vfio_mode, we need to use a different device type so
|
// on the vfio_mode, we need to use a different device type so
|
||||||
// the agent can handle it properly
|
// the agent can handle it properly
|
||||||
if c.sandbox.config.VfioMode == config.VFIOModeGuestKernel {
|
if c.sandbox.config.VfioMode == config.VFIOModeGuestKernel {
|
||||||
kataDevice.Type = kataVfioGuestKernelDevType
|
kataDevice.Type = kataVfioPciGuestKernelDevType
|
||||||
}
|
}
|
||||||
|
|
||||||
for i, pciDev := range devList {
|
if (*devList[0]).GetType() == config.VFIOAPDeviceMediatedType {
|
||||||
kataDevice.Options[i] = fmt.Sprintf("0000:%s=%s", pciDev.BDF, pciDev.GuestPciPath)
|
kataDevice.Type = kataVfioApDevType
|
||||||
|
kataDevice.Options = (*devList[0]).(config.VFIOAPDev).APDevices
|
||||||
|
} else {
|
||||||
|
kataDevice.Options = make([]string, len(devList))
|
||||||
|
for i, device := range devList {
|
||||||
|
pciDevice := (*device).(config.VFIOPCIDev)
|
||||||
|
kataDevice.Options[i] = fmt.Sprintf("0000:%s=%s", pciDevice.BDF, pciDevice.GuestPciPath)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return kataDevice
|
return kataDevice
|
||||||
|
@ -1713,7 +1713,7 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
devID := device.ID
|
devID := *(*device).GetID()
|
||||||
machineType := q.HypervisorConfig().HypervisorMachineType
|
machineType := q.HypervisorConfig().HypervisorMachineType
|
||||||
|
|
||||||
if op == AddDevice {
|
if op == AddDevice {
|
||||||
@ -1730,29 +1730,31 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op
|
|||||||
// for pc machine type instead of bridge. This is useful for devices that require
|
// for pc machine type instead of bridge. This is useful for devices that require
|
||||||
// a large PCI BAR which is a currently a limitation with PCI bridges.
|
// a large PCI BAR which is a currently a limitation with PCI bridges.
|
||||||
if q.state.HotplugVFIOOnRootBus {
|
if q.state.HotplugVFIOOnRootBus {
|
||||||
|
switch (*device).GetType() {
|
||||||
|
case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType:
|
||||||
// In case MachineType is q35, a PCIe device is hotplugged on a PCIe Root Port.
|
// In case MachineType is q35, a PCIe device is hotplugged on a PCIe Root Port.
|
||||||
|
pciDevice, ok := (*device).(config.VFIOPCIDev)
|
||||||
|
if !ok {
|
||||||
|
return fmt.Errorf("VFIO device %+v is not PCI, but its Type said otherwise", device)
|
||||||
|
}
|
||||||
switch machineType {
|
switch machineType {
|
||||||
case QemuQ35:
|
case QemuQ35:
|
||||||
if device.IsPCIe && q.state.PCIeRootPort <= 0 {
|
if pciDevice.IsPCIe && q.state.PCIeRootPort <= 0 {
|
||||||
q.Logger().WithField("dev-id", device.ID).Warn("VFIO device is a PCIe device. It's recommended to add the PCIe Root Port by setting the pcie_root_port parameter in the configuration for q35")
|
q.Logger().WithField("dev-id", (*device).GetID()).Warn("VFIO device is a PCIe device. It's recommended to add the PCIe Root Port by setting the pcie_root_port parameter in the configuration for q35")
|
||||||
device.Bus = ""
|
pciDevice.Bus = ""
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
device.Bus = ""
|
pciDevice.Bus = ""
|
||||||
}
|
}
|
||||||
|
*device = pciDevice
|
||||||
|
|
||||||
switch device.Type {
|
if pciDevice.Type == config.VFIOPCIDeviceNormalType {
|
||||||
case config.VFIODeviceNormalType:
|
err = q.qmpMonitorCh.qmp.ExecuteVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, pciDevice.BDF, pciDevice.Bus, romFile)
|
||||||
err = q.qmpMonitorCh.qmp.ExecuteVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, device.BDF, device.Bus, romFile)
|
|
||||||
case config.VFIODeviceMediatedType:
|
|
||||||
if utils.IsAPVFIOMediatedDevice(device.SysfsDev) {
|
|
||||||
err = q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev)
|
|
||||||
} else {
|
} else {
|
||||||
err = q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, device.SysfsDev, "", device.Bus, romFile)
|
err = q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, *(*device).GetSysfsDev(), "", pciDevice.Bus, romFile)
|
||||||
}
|
}
|
||||||
default:
|
case config.VFIOAPDeviceMediatedType:
|
||||||
return fmt.Errorf("Incorrect VFIO device type found")
|
err = q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, *(*device).GetSysfsDev())
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
addr, bridge, err := q.arch.addDeviceToBridge(ctx, devID, types.PCI)
|
addr, bridge, err := q.arch.addDeviceToBridge(ctx, devID, types.PCI)
|
||||||
@ -1766,15 +1768,17 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
switch device.Type {
|
switch (*device).GetType() {
|
||||||
case config.VFIODeviceNormalType:
|
case config.VFIOPCIDeviceNormalType:
|
||||||
err = q.qmpMonitorCh.qmp.ExecutePCIVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, device.BDF, addr, bridge.ID, romFile)
|
pciDevice, ok := (*device).(config.VFIOPCIDev)
|
||||||
case config.VFIODeviceMediatedType:
|
if !ok {
|
||||||
if utils.IsAPVFIOMediatedDevice(device.SysfsDev) {
|
return fmt.Errorf("VFIO device %+v is not PCI, but its Type said otherwise", device)
|
||||||
err = q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev)
|
|
||||||
} else {
|
|
||||||
err = q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, device.SysfsDev, addr, bridge.ID, romFile)
|
|
||||||
}
|
}
|
||||||
|
err = q.qmpMonitorCh.qmp.ExecutePCIVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, pciDevice.BDF, addr, bridge.ID, romFile)
|
||||||
|
case config.VFIOPCIDeviceMediatedType:
|
||||||
|
err = q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, *(*device).GetSysfsDev(), addr, bridge.ID, romFile)
|
||||||
|
case config.VFIOAPDeviceMediatedType:
|
||||||
|
err = q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, *(*device).GetSysfsDev())
|
||||||
default:
|
default:
|
||||||
return fmt.Errorf("Incorrect VFIO device type found")
|
return fmt.Errorf("Incorrect VFIO device type found")
|
||||||
}
|
}
|
||||||
@ -1782,13 +1786,24 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
switch (*device).GetType() {
|
||||||
|
case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType:
|
||||||
|
pciDevice, ok := (*device).(config.VFIOPCIDev)
|
||||||
|
if !ok {
|
||||||
|
return fmt.Errorf("VFIO device %+v is not PCI, but its Type said otherwise", device)
|
||||||
|
}
|
||||||
// XXX: Depending on whether we're doing root port or
|
// XXX: Depending on whether we're doing root port or
|
||||||
// bridge hotplug, and how the bridge is set up in
|
// bridge hotplug, and how the bridge is set up in
|
||||||
// other parts of the code, we may or may not already
|
// other parts of the code, we may or may not already
|
||||||
// have information about the slot number of the
|
// have information about the slot number of the
|
||||||
// bridge and or the device. For simplicity, just
|
// bridge and or the device. For simplicity, just
|
||||||
// query both of them back from qemu
|
// query both of them back from qemu
|
||||||
device.GuestPciPath, err = q.qomGetPciPath(devID)
|
guestPciPath, err := q.qomGetPciPath(devID)
|
||||||
|
pciDevice.GuestPciPath = guestPciPath
|
||||||
|
*device = pciDevice
|
||||||
|
return err
|
||||||
|
}
|
||||||
return err
|
return err
|
||||||
} else {
|
} else {
|
||||||
q.Logger().WithField("dev-id", devID).Info("Start hot-unplug VFIO device")
|
q.Logger().WithField("dev-id", devID).Info("Start hot-unplug VFIO device")
|
||||||
|
@ -675,16 +675,17 @@ func (q *qemuArchBase) appendVhostUserDevice(ctx context.Context, devices []govm
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (q *qemuArchBase) appendVFIODevice(devices []govmmQemu.Device, vfioDev config.VFIODev) []govmmQemu.Device {
|
func (q *qemuArchBase) appendVFIODevice(devices []govmmQemu.Device, vfioDev config.VFIODev) []govmmQemu.Device {
|
||||||
if vfioDev.BDF == "" {
|
pciDevice := vfioDev.(config.VFIOPCIDev)
|
||||||
|
if pciDevice.BDF == "" {
|
||||||
return devices
|
return devices
|
||||||
}
|
}
|
||||||
|
|
||||||
devices = append(devices,
|
devices = append(devices,
|
||||||
govmmQemu.VFIODevice{
|
govmmQemu.VFIODevice{
|
||||||
BDF: vfioDev.BDF,
|
BDF: pciDevice.BDF,
|
||||||
VendorID: vfioDev.VendorID,
|
VendorID: pciDevice.VendorID,
|
||||||
DeviceID: vfioDev.DeviceID,
|
DeviceID: pciDevice.DeviceID,
|
||||||
Bus: vfioDev.Bus,
|
Bus: pciDevice.Bus,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -463,7 +463,7 @@ func TestQemuArchBaseAppendVFIODevice(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
vfDevice := config.VFIODev{
|
vfDevice := config.VFIOPCIDev{
|
||||||
BDF: bdf,
|
BDF: bdf,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -483,7 +483,7 @@ func TestQemuArchBaseAppendVFIODeviceWithVendorDeviceID(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
vfDevice := config.VFIODev{
|
vfDevice := config.VFIOPCIDev{
|
||||||
BDF: bdf,
|
BDF: bdf,
|
||||||
VendorID: vendorID,
|
VendorID: vendorID,
|
||||||
DeviceID: deviceID,
|
DeviceID: deviceID,
|
||||||
|
@ -1856,11 +1856,15 @@ func (s *Sandbox) HotplugAddDevice(ctx context.Context, device api.Device, devTy
|
|||||||
// adding a group of VFIO devices
|
// adding a group of VFIO devices
|
||||||
for _, dev := range vfioDevices {
|
for _, dev := range vfioDevices {
|
||||||
if _, err := s.hypervisor.HotplugAddDevice(ctx, dev, VfioDev); err != nil {
|
if _, err := s.hypervisor.HotplugAddDevice(ctx, dev, VfioDev); err != nil {
|
||||||
|
bdf := ""
|
||||||
|
if pciDevice, ok := (*dev).(config.VFIOPCIDev); ok {
|
||||||
|
bdf = pciDevice.BDF
|
||||||
|
}
|
||||||
s.Logger().
|
s.Logger().
|
||||||
WithFields(logrus.Fields{
|
WithFields(logrus.Fields{
|
||||||
"sandbox": s.id,
|
"sandbox": s.id,
|
||||||
"vfio-device-ID": dev.ID,
|
"vfio-device-ID": (*dev).GetID(),
|
||||||
"vfio-device-BDF": dev.BDF,
|
"vfio-device-BDF": bdf,
|
||||||
}).WithError(err).Error("failed to hotplug VFIO device")
|
}).WithError(err).Error("failed to hotplug VFIO device")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -1909,11 +1913,15 @@ func (s *Sandbox) HotplugRemoveDevice(ctx context.Context, device api.Device, de
|
|||||||
// remove a group of VFIO devices
|
// remove a group of VFIO devices
|
||||||
for _, dev := range vfioDevices {
|
for _, dev := range vfioDevices {
|
||||||
if _, err := s.hypervisor.HotplugRemoveDevice(ctx, dev, VfioDev); err != nil {
|
if _, err := s.hypervisor.HotplugRemoveDevice(ctx, dev, VfioDev); err != nil {
|
||||||
|
bdf := ""
|
||||||
|
if pciDevice, ok := (*dev).(config.VFIOPCIDev); ok {
|
||||||
|
bdf = pciDevice.BDF
|
||||||
|
}
|
||||||
s.Logger().WithError(err).
|
s.Logger().WithError(err).
|
||||||
WithFields(logrus.Fields{
|
WithFields(logrus.Fields{
|
||||||
"sandbox": s.id,
|
"sandbox": s.id,
|
||||||
"vfio-device-ID": dev.ID,
|
"vfio-device-ID": (*dev).GetID(),
|
||||||
"vfio-device-BDF": dev.BDF,
|
"vfio-device-BDF": bdf,
|
||||||
}).Error("failed to hot unplug VFIO device")
|
}).Error("failed to hot unplug VFIO device")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -90,7 +90,6 @@ const (
|
|||||||
procMountsFile = "/proc/mounts"
|
procMountsFile = "/proc/mounts"
|
||||||
|
|
||||||
fieldsPerLine = 6
|
fieldsPerLine = 6
|
||||||
vfioAPSysfsDir = "vfio_ap"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@ -142,18 +141,6 @@ func GetDevicePathAndFsTypeOptions(mountPoint string) (devicePath, fsType string
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsAPVFIOMediatedDevice decides whether a device is a VFIO-AP device
|
|
||||||
// by checking for the existence of "vfio_ap" in the path
|
|
||||||
func IsAPVFIOMediatedDevice(sysfsdev string) bool {
|
|
||||||
split := strings.Split(sysfsdev, string(os.PathSeparator))
|
|
||||||
for _, el := range split {
|
|
||||||
if el == vfioAPSysfsDir {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
func waitProcessUsingPidfd(pid int, timeoutSecs uint, logger *logrus.Entry) (bool, error) {
|
func waitProcessUsingPidfd(pid int, timeoutSecs uint, logger *logrus.Entry) (bool, error) {
|
||||||
pidfd, err := unix.PidfdOpen(pid, 0)
|
pidfd, err := unix.PidfdOpen(pid, 0)
|
||||||
|
|
||||||
|
@ -63,19 +63,3 @@ func TestGetDevicePathAndFsTypeOptionsSuccessful(t *testing.T) {
|
|||||||
assert.Equal(fstype, fstypeOut)
|
assert.Equal(fstype, fstypeOut)
|
||||||
assert.Equal(fsOptions, optsOut)
|
assert.Equal(fsOptions, optsOut)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestIsAPVFIOMediatedDeviceFalse(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
|
|
||||||
// Should be false for a PCI device
|
|
||||||
isAPMdev := IsAPVFIOMediatedDevice("/sys/bus/pci/devices/0000:00:02.0/a297db4a-f4c2-11e6-90f6-d3b88d6c9525")
|
|
||||||
assert.False(isAPMdev)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestIsAPVFIOMediatedDeviceTrue(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
|
|
||||||
// Typical AP sysfsdev
|
|
||||||
isAPMdev := IsAPVFIOMediatedDevice("/sys/devices/vfio_ap/matrix/a297db4a-f4c2-11e6-90f6-d3b88d6c9525")
|
|
||||||
assert.True(isAPMdev)
|
|
||||||
}
|
|
||||||
|
Loading…
Reference in New Issue
Block a user