mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-07-10 13:54:12 +00:00
Merge pull request #4582 from BbolroC/vfio-ap
agent: Bring in VFIO-AP device handling again
This commit is contained in:
commit
2fe0733dcb
1
src/agent/Cargo.lock
generated
1
src/agent/Cargo.lock
generated
@ -801,6 +801,7 @@ dependencies = [
|
||||
"async-recursion",
|
||||
"async-trait",
|
||||
"capctl",
|
||||
"cfg-if 1.0.0",
|
||||
"cgroups-rs",
|
||||
"clap",
|
||||
"futures",
|
||||
|
@ -48,6 +48,7 @@ slog-scope = "4.1.2"
|
||||
slog-stdlog = "4.0.0"
|
||||
log = "0.4.11"
|
||||
|
||||
cfg-if = "1.0.0"
|
||||
prometheus = { version = "0.13.0", features = ["process"] }
|
||||
procfs = "0.12.0"
|
||||
anyhow = "1.0.32"
|
||||
|
79
src/agent/src/ap.rs
Normal file
79
src/agent/src/ap.rs
Normal file
@ -0,0 +1,79 @@
|
||||
// Copyright (c) IBM Corp. 2023
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
use std::fmt;
|
||||
use std::str::FromStr;
|
||||
|
||||
use anyhow::{anyhow, Context};
|
||||
|
||||
// IBM Adjunct Processor (AP) is used for cryptographic operations
|
||||
// by IBM Crypto Express hardware security modules on IBM zSystem & LinuxONE (s390x).
|
||||
// In Linux, virtual cryptographic devices are called AP queues.
|
||||
// The name of an AP queue respects a format <xx>.<xxxx> in hexadecimal notation [1, p.467]:
|
||||
// - <xx> is an adapter ID
|
||||
// - <xxxx> is an adapter domain ID
|
||||
// [1] https://www.ibm.com/docs/en/linuxonibm/pdf/lku5dd05.pdf
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Address {
|
||||
pub adapter_id: u8,
|
||||
pub adapter_domain: u16,
|
||||
}
|
||||
|
||||
impl Address {
|
||||
pub fn new(adapter_id: u8, adapter_domain: u16) -> Address {
|
||||
Address {
|
||||
adapter_id,
|
||||
adapter_domain,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for Address {
|
||||
type Err = anyhow::Error;
|
||||
|
||||
fn from_str(s: &str) -> anyhow::Result<Self> {
|
||||
let split: Vec<&str> = s.split('.').collect();
|
||||
if split.len() != 2 {
|
||||
return Err(anyhow!(
|
||||
"Wrong AP bus format. It needs to be in the form <xx>.<xxxx> (e.g. 0a.003f), got {:?}",
|
||||
s
|
||||
));
|
||||
}
|
||||
|
||||
let adapter_id = u8::from_str_radix(split[0], 16).context(format!(
|
||||
"Wrong AP bus format. AP ID needs to be in the form <xx> (e.g. 0a), got {:?}",
|
||||
split[0]
|
||||
))?;
|
||||
let adapter_domain = u16::from_str_radix(split[1], 16).context(format!(
|
||||
"Wrong AP bus format. AP domain needs to be in the form <xxxx> (e.g. 003f), got {:?}",
|
||||
split[1]
|
||||
))?;
|
||||
|
||||
Ok(Address::new(adapter_id, adapter_domain))
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Address {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
||||
write!(f, "{:02x}.{:04x}", self.adapter_id, self.adapter_domain)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_from_str() {
|
||||
let device = Address::from_str("a.1").unwrap();
|
||||
assert_eq!(format!("{}", device), "0a.0001");
|
||||
|
||||
assert!(Address::from_str("").is_err());
|
||||
assert!(Address::from_str(".").is_err());
|
||||
assert!(Address::from_str("0.0.0").is_err());
|
||||
assert!(Address::from_str("0g.0000").is_err());
|
||||
assert!(Address::from_str("0a.10000").is_err());
|
||||
}
|
||||
}
|
@ -16,13 +16,12 @@ use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
#[cfg(target_arch = "s390x")]
|
||||
use crate::ccw;
|
||||
use crate::linux_abi::*;
|
||||
use crate::pci;
|
||||
use crate::sandbox::Sandbox;
|
||||
use crate::uevent::{wait_for_uevent, Uevent, UeventMatcher};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use cfg_if::cfg_if;
|
||||
use oci::{LinuxDeviceCgroup, LinuxResources, Spec};
|
||||
use protocols::agent::Device;
|
||||
use tracing::instrument;
|
||||
@ -46,14 +45,22 @@ pub const DRIVER_NVDIMM_TYPE: &str = "nvdimm";
|
||||
pub const DRIVER_EPHEMERAL_TYPE: &str = "ephemeral";
|
||||
pub const DRIVER_LOCAL_TYPE: &str = "local";
|
||||
pub const DRIVER_WATCHABLE_BIND_TYPE: &str = "watchable-bind";
|
||||
// VFIO device to be bound to a guest kernel driver
|
||||
pub const DRIVER_VFIO_GK_TYPE: &str = "vfio-gk";
|
||||
// VFIO device to be bound to vfio-pci and made available inside the
|
||||
// VFIO PCI device to be bound to a guest kernel driver
|
||||
pub const DRIVER_VFIO_PCI_GK_TYPE: &str = "vfio-pci-gk";
|
||||
// VFIO PCI device to be bound to vfio-pci and made available inside the
|
||||
// container as a VFIO device node
|
||||
pub const DRIVER_VFIO_TYPE: &str = "vfio";
|
||||
pub const DRIVER_VFIO_PCI_TYPE: &str = "vfio-pci";
|
||||
pub const DRIVER_VFIO_AP_TYPE: &str = "vfio-ap";
|
||||
pub const DRIVER_OVERLAYFS_TYPE: &str = "overlayfs";
|
||||
pub const FS_TYPE_HUGETLB: &str = "hugetlbfs";
|
||||
|
||||
cfg_if! {
|
||||
if #[cfg(target_arch = "s390x")] {
|
||||
use crate::ap;
|
||||
use crate::ccw;
|
||||
}
|
||||
}
|
||||
|
||||
#[instrument]
|
||||
pub fn online_device(path: &str) -> Result<()> {
|
||||
fs::write(path, "1")?;
|
||||
@ -280,7 +287,7 @@ pub async fn get_virtio_blk_ccw_device_name(
|
||||
sandbox: &Arc<Mutex<Sandbox>>,
|
||||
device: &ccw::Device,
|
||||
) -> Result<String> {
|
||||
let matcher = VirtioBlkCCWMatcher::new(&create_ccw_root_bus_path(), device);
|
||||
let matcher = VirtioBlkCCWMatcher::new(CCW_ROOT_BUS_PATH, device);
|
||||
let uev = wait_for_uevent(sandbox, matcher).await?;
|
||||
let devname = uev.devname;
|
||||
return match Path::new(SYSTEM_DEV_PATH).join(&devname).to_str() {
|
||||
@ -401,6 +408,39 @@ async fn get_vfio_device_name(sandbox: &Arc<Mutex<Sandbox>>, grp: IommuGroup) ->
|
||||
Ok(format!("{}/{}", SYSTEM_DEV_PATH, &uev.devname))
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "s390x")]
|
||||
#[derive(Debug)]
|
||||
struct ApMatcher {
|
||||
syspath: String,
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "s390x")]
|
||||
impl ApMatcher {
|
||||
fn new(address: ap::Address) -> ApMatcher {
|
||||
ApMatcher {
|
||||
syspath: format!(
|
||||
"{}/card{:02x}/{}",
|
||||
AP_ROOT_BUS_PATH, address.adapter_id, address
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "s390x")]
|
||||
impl UeventMatcher for ApMatcher {
|
||||
fn is_match(&self, uev: &Uevent) -> bool {
|
||||
uev.action == "add" && uev.devpath == self.syspath
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "s390x")]
|
||||
#[instrument]
|
||||
async fn wait_for_ap_device(sandbox: &Arc<Mutex<Sandbox>>, address: ap::Address) -> Result<()> {
|
||||
let matcher = ApMatcher::new(address);
|
||||
wait_for_uevent(sandbox, matcher).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Scan SCSI bus for the given SCSI address(SCSI-Id and LUN)
|
||||
#[instrument]
|
||||
fn scan_scsi_bus(scsi_addr: &str) -> Result<()> {
|
||||
@ -699,7 +739,7 @@ async fn virtio_nvdimm_device_handler(
|
||||
Ok(DevNumUpdate::from_vm_path(&device.vm_path)?.into())
|
||||
}
|
||||
|
||||
fn split_vfio_option(opt: &str) -> Option<(&str, &str)> {
|
||||
fn split_vfio_pci_option(opt: &str) -> Option<(&str, &str)> {
|
||||
let mut tokens = opt.split('=');
|
||||
let hostbdf = tokens.next()?;
|
||||
let path = tokens.next()?;
|
||||
@ -714,14 +754,18 @@ fn split_vfio_option(opt: &str) -> Option<(&str, &str)> {
|
||||
// Each option should have the form "DDDD:BB:DD.F=<pcipath>"
|
||||
// DDDD:BB:DD.F is the device's PCI address in the host
|
||||
// <pcipath> is a PCI path to the device in the guest (see pci.rs)
|
||||
async fn vfio_device_handler(device: &Device, sandbox: &Arc<Mutex<Sandbox>>) -> Result<SpecUpdate> {
|
||||
let vfio_in_guest = device.field_type != DRIVER_VFIO_GK_TYPE;
|
||||
#[instrument]
|
||||
async fn vfio_pci_device_handler(
|
||||
device: &Device,
|
||||
sandbox: &Arc<Mutex<Sandbox>>,
|
||||
) -> Result<SpecUpdate> {
|
||||
let vfio_in_guest = device.field_type != DRIVER_VFIO_PCI_GK_TYPE;
|
||||
let mut pci_fixups = Vec::<(pci::Address, pci::Address)>::new();
|
||||
let mut group = None;
|
||||
|
||||
for opt in device.options.iter() {
|
||||
let (host, pcipath) =
|
||||
split_vfio_option(opt).ok_or_else(|| anyhow!("Malformed VFIO option {:?}", opt))?;
|
||||
let (host, pcipath) = split_vfio_pci_option(opt)
|
||||
.ok_or_else(|| anyhow!("Malformed VFIO PCI option {:?}", opt))?;
|
||||
let host =
|
||||
pci::Address::from_str(host).context("Bad host PCI address in VFIO option {:?}")?;
|
||||
let pcipath = pci::Path::from_str(pcipath)?;
|
||||
@ -763,6 +807,28 @@ async fn vfio_device_handler(device: &Device, sandbox: &Arc<Mutex<Sandbox>>) ->
|
||||
})
|
||||
}
|
||||
|
||||
// The VFIO AP (Adjunct Processor) device handler takes all the APQNs provided as device options
|
||||
// and awaits them. It sets the minimum AP rescan time of 5 seconds and temporarily adds that
|
||||
// amount to the hotplug timeout.
|
||||
#[cfg(target_arch = "s390x")]
|
||||
#[instrument]
|
||||
async fn vfio_ap_device_handler(
|
||||
device: &Device,
|
||||
sandbox: &Arc<Mutex<Sandbox>>,
|
||||
) -> Result<SpecUpdate> {
|
||||
// Force AP bus rescan
|
||||
fs::write(AP_SCANS_PATH, "1")?;
|
||||
for apqn in device.options.iter() {
|
||||
wait_for_ap_device(sandbox, ap::Address::from_str(apqn)?).await?;
|
||||
}
|
||||
Ok(Default::default())
|
||||
}
|
||||
|
||||
#[cfg(not(target_arch = "s390x"))]
|
||||
async fn vfio_ap_device_handler(_: &Device, _: &Arc<Mutex<Sandbox>>) -> Result<SpecUpdate> {
|
||||
Err(anyhow!("AP is only supported on s390x"))
|
||||
}
|
||||
|
||||
#[instrument]
|
||||
pub async fn add_devices(
|
||||
devices: &[Device],
|
||||
@ -828,7 +894,10 @@ async fn add_device(device: &Device, sandbox: &Arc<Mutex<Sandbox>>) -> Result<Sp
|
||||
DRIVER_MMIO_BLK_TYPE => virtiommio_blk_device_handler(device, sandbox).await,
|
||||
DRIVER_NVDIMM_TYPE => virtio_nvdimm_device_handler(device, sandbox).await,
|
||||
DRIVER_SCSI_TYPE => virtio_scsi_device_handler(device, sandbox).await,
|
||||
DRIVER_VFIO_GK_TYPE | DRIVER_VFIO_TYPE => vfio_device_handler(device, sandbox).await,
|
||||
DRIVER_VFIO_PCI_GK_TYPE | DRIVER_VFIO_PCI_TYPE => {
|
||||
vfio_pci_device_handler(device, sandbox).await
|
||||
}
|
||||
DRIVER_VFIO_AP_TYPE => vfio_ap_device_handler(device, sandbox).await,
|
||||
_ => Err(anyhow!("Unknown device type {}", device.field_type)),
|
||||
}
|
||||
}
|
||||
@ -1378,7 +1447,7 @@ mod tests {
|
||||
#[cfg(target_arch = "s390x")]
|
||||
#[tokio::test]
|
||||
async fn test_virtio_blk_ccw_matcher() {
|
||||
let root_bus = create_ccw_root_bus_path();
|
||||
let root_bus = CCW_ROOT_BUS_PATH;
|
||||
let subsystem = "block";
|
||||
let devname = "vda";
|
||||
let relpath = "0.0.0002";
|
||||
@ -1487,13 +1556,13 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_vfio_option() {
|
||||
fn test_split_vfio_pci_option() {
|
||||
assert_eq!(
|
||||
split_vfio_option("0000:01:00.0=02/01"),
|
||||
split_vfio_pci_option("0000:01:00.0=02/01"),
|
||||
Some(("0000:01:00.0", "02/01"))
|
||||
);
|
||||
assert_eq!(split_vfio_option("0000:01:00.0=02/01=rubbish"), None);
|
||||
assert_eq!(split_vfio_option("0000:01:00.0"), None);
|
||||
assert_eq!(split_vfio_pci_option("0000:01:00.0=02/01=rubbish"), None);
|
||||
assert_eq!(split_vfio_pci_option("0000:01:00.0"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -1572,4 +1641,35 @@ mod tests {
|
||||
// Test dev2
|
||||
assert!(pci_iommu_group(&syspci, dev2).is_err());
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "s390x")]
|
||||
#[tokio::test]
|
||||
async fn test_vfio_ap_matcher() {
|
||||
let subsystem = "ap";
|
||||
let card = "0a";
|
||||
let relpath = format!("{}.0001", card);
|
||||
|
||||
let mut uev = Uevent::default();
|
||||
uev.action = U_EVENT_ACTION_ADD.to_string();
|
||||
uev.subsystem = subsystem.to_string();
|
||||
uev.devpath = format!("{}/card{}/{}", AP_ROOT_BUS_PATH, card, relpath);
|
||||
|
||||
let ap_address = ap::Address::from_str(&relpath).unwrap();
|
||||
let matcher = ApMatcher::new(ap_address);
|
||||
|
||||
assert!(matcher.is_match(&uev));
|
||||
|
||||
let mut uev_remove = uev.clone();
|
||||
uev_remove.action = U_EVENT_ACTION_REMOVE.to_string();
|
||||
assert!(!matcher.is_match(&uev_remove));
|
||||
|
||||
let mut uev_other_device = uev.clone();
|
||||
uev_other_device.devpath = format!(
|
||||
"{}/card{}/{}",
|
||||
AP_ROOT_BUS_PATH,
|
||||
card,
|
||||
format!("{}.0002", card)
|
||||
);
|
||||
assert!(!matcher.is_match(&uev_other_device));
|
||||
}
|
||||
}
|
||||
|
@ -3,6 +3,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use cfg_if::cfg_if;
|
||||
|
||||
/// Linux ABI related constants.
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
@ -64,10 +66,14 @@ pub fn create_pci_root_bus_path() -> String {
|
||||
ret
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "s390x")]
|
||||
pub fn create_ccw_root_bus_path() -> String {
|
||||
String::from("/devices/css0")
|
||||
cfg_if! {
|
||||
if #[cfg(target_arch = "s390x")] {
|
||||
pub const CCW_ROOT_BUS_PATH: &str = "/devices/css0";
|
||||
pub const AP_ROOT_BUS_PATH: &str = "/devices/ap";
|
||||
pub const AP_SCANS_PATH: &str = "/sys/bus/ap/scans";
|
||||
}
|
||||
}
|
||||
|
||||
// From https://www.kernel.org/doc/Documentation/acpi/namespace.txt
|
||||
// The Linux kernel's core ACPI subsystem creates struct acpi_device
|
||||
// objects for ACPI namespace objects representing devices, power resources
|
||||
|
@ -20,6 +20,7 @@ extern crate scopeguard;
|
||||
extern crate slog;
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use cfg_if::cfg_if;
|
||||
use clap::{AppSettings, Parser};
|
||||
use nix::fcntl::OFlag;
|
||||
use nix::sys::socket::{self, AddressFamily, SockFlag, SockType, VsockAddr};
|
||||
@ -34,8 +35,6 @@ use std::process::exit;
|
||||
use std::sync::Arc;
|
||||
use tracing::{instrument, span};
|
||||
|
||||
#[cfg(target_arch = "s390x")]
|
||||
mod ccw;
|
||||
mod config;
|
||||
mod console;
|
||||
mod device;
|
||||
@ -74,6 +73,13 @@ use tokio::{
|
||||
mod rpc;
|
||||
mod tracer;
|
||||
|
||||
cfg_if! {
|
||||
if #[cfg(target_arch = "s390x")] {
|
||||
mod ap;
|
||||
mod ccw;
|
||||
}
|
||||
}
|
||||
|
||||
const NAME: &str = "kata-agent";
|
||||
|
||||
lazy_static! {
|
||||
|
@ -258,15 +258,24 @@ const (
|
||||
// VFIODeviceErrorType is the error type of VFIO device
|
||||
VFIODeviceErrorType VFIODeviceType = iota
|
||||
|
||||
// VFIODeviceNormalType is a normal VFIO device type
|
||||
VFIODeviceNormalType
|
||||
// VFIOPCIDeviceNormalType is a normal VFIO PCI device type
|
||||
VFIOPCIDeviceNormalType
|
||||
|
||||
// VFIODeviceMediatedType is a VFIO mediated device type
|
||||
VFIODeviceMediatedType
|
||||
// VFIOPCIDeviceMediatedType is a VFIO PCI mediated device type
|
||||
VFIOPCIDeviceMediatedType
|
||||
|
||||
// VFIOAPDeviceMediatedType is a VFIO AP mediated device type
|
||||
VFIOAPDeviceMediatedType
|
||||
)
|
||||
|
||||
// VFIODev represents a VFIO drive used for hotplugging
|
||||
type VFIODev struct {
|
||||
type VFIODev interface {
|
||||
GetID() *string
|
||||
GetType() VFIODeviceType
|
||||
GetSysfsDev() *string
|
||||
}
|
||||
|
||||
// VFIOPCIDev represents a VFIO PCI device used for hotplugging
|
||||
type VFIOPCIDev struct {
|
||||
// ID is used to identify this drive in the hypervisor options.
|
||||
ID string
|
||||
|
||||
@ -298,6 +307,44 @@ type VFIODev struct {
|
||||
IsPCIe bool
|
||||
}
|
||||
|
||||
func (d VFIOPCIDev) GetID() *string {
|
||||
return &d.ID
|
||||
}
|
||||
|
||||
func (d VFIOPCIDev) GetType() VFIODeviceType {
|
||||
return d.Type
|
||||
}
|
||||
|
||||
func (d VFIOPCIDev) GetSysfsDev() *string {
|
||||
return &d.SysfsDev
|
||||
}
|
||||
|
||||
type VFIOAPDev struct {
|
||||
// ID is used to identify this drive in the hypervisor options.
|
||||
ID string
|
||||
|
||||
// sysfsdev of VFIO mediated device
|
||||
SysfsDev string
|
||||
|
||||
// APDevices are the Adjunct Processor devices assigned to the mdev
|
||||
APDevices []string
|
||||
|
||||
// Type of VFIO device
|
||||
Type VFIODeviceType
|
||||
}
|
||||
|
||||
func (d VFIOAPDev) GetID() *string {
|
||||
return &d.ID
|
||||
}
|
||||
|
||||
func (d VFIOAPDev) GetType() VFIODeviceType {
|
||||
return d.Type
|
||||
}
|
||||
|
||||
func (d VFIOAPDev) GetSysfsDev() *string {
|
||||
return &d.SysfsDev
|
||||
}
|
||||
|
||||
// RNGDev represents a random number generator device
|
||||
type RNGDev struct {
|
||||
// ID is used to identify the device in the hypervisor options.
|
||||
|
@ -89,18 +89,47 @@ func readPCIProperty(propertyPath string) (string, error) {
|
||||
return strings.Split(string(buf), "\n")[0], nil
|
||||
}
|
||||
|
||||
func GetVFIODeviceType(deviceFileName string) config.VFIODeviceType {
|
||||
func GetVFIODeviceType(deviceFilePath string) (config.VFIODeviceType, error) {
|
||||
deviceFileName := filepath.Base(deviceFilePath)
|
||||
|
||||
//For example, 0000:04:00.0
|
||||
tokens := strings.Split(deviceFileName, ":")
|
||||
vfioDeviceType := config.VFIODeviceErrorType
|
||||
if len(tokens) == 3 {
|
||||
vfioDeviceType = config.VFIODeviceNormalType
|
||||
} else {
|
||||
//For example, 83b8f4f2-509f-382f-3c1e-e6bfe0fa1001
|
||||
tokens = strings.Split(deviceFileName, "-")
|
||||
if len(tokens) == 5 {
|
||||
vfioDeviceType = config.VFIODeviceMediatedType
|
||||
}
|
||||
return config.VFIOPCIDeviceNormalType, nil
|
||||
}
|
||||
return vfioDeviceType
|
||||
|
||||
//For example, 83b8f4f2-509f-382f-3c1e-e6bfe0fa1001
|
||||
tokens = strings.Split(deviceFileName, "-")
|
||||
if len(tokens) != 5 {
|
||||
return config.VFIODeviceErrorType, fmt.Errorf("Incorrect tokens found while parsing VFIO details: %s", deviceFileName)
|
||||
}
|
||||
|
||||
deviceSysfsDev, err := GetSysfsDev(deviceFilePath)
|
||||
if err != nil {
|
||||
return config.VFIODeviceErrorType, err
|
||||
}
|
||||
|
||||
if strings.HasPrefix(deviceSysfsDev, vfioAPSysfsDir) {
|
||||
return config.VFIOAPDeviceMediatedType, nil
|
||||
}
|
||||
|
||||
return config.VFIOPCIDeviceMediatedType, nil
|
||||
}
|
||||
|
||||
// GetSysfsDev returns the sysfsdev of mediated device
|
||||
// Expected input string format is absolute path to the sysfs dev node
|
||||
// eg. /sys/kernel/iommu_groups/0/devices/f79944e4-5a3d-11e8-99ce-479cbab002e4
|
||||
func GetSysfsDev(sysfsDevStr string) (string, error) {
|
||||
return filepath.EvalSymlinks(sysfsDevStr)
|
||||
}
|
||||
|
||||
// GetAPVFIODevices retrieves all APQNs associated with a mediated VFIO-AP
|
||||
// device
|
||||
func GetAPVFIODevices(sysfsdev string) ([]string, error) {
|
||||
data, err := os.ReadFile(filepath.Join(sysfsdev, "matrix"))
|
||||
if err != nil {
|
||||
return []string{}, err
|
||||
}
|
||||
// Split by newlines, omitting final newline
|
||||
return strings.Split(string(data[:len(data)-1]), "\n"), nil
|
||||
}
|
||||
|
@ -30,6 +30,7 @@ const (
|
||||
iommuGroupPath = "/sys/bus/pci/devices/%s/iommu_group"
|
||||
vfioDevPath = "/dev/vfio/%s"
|
||||
pcieRootPortPrefix = "rp"
|
||||
vfioAPSysfsDir = "/sys/devices/vfio_ap"
|
||||
)
|
||||
|
||||
var (
|
||||
@ -85,19 +86,42 @@ func (device *VFIODevice) Attach(ctx context.Context, devReceiver api.DeviceRece
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
vfio := &config.VFIODev{
|
||||
ID: utils.MakeNameID("vfio", device.DeviceInfo.ID+strconv.Itoa(i), maxDevIDSize),
|
||||
Type: vfioDeviceType,
|
||||
BDF: deviceBDF,
|
||||
SysfsDev: deviceSysfsDev,
|
||||
IsPCIe: isPCIeDevice(deviceBDF),
|
||||
Class: getPCIDeviceProperty(deviceBDF, PCISysFsDevicesClass),
|
||||
}
|
||||
device.VfioDevs = append(device.VfioDevs, vfio)
|
||||
if vfio.IsPCIe {
|
||||
vfio.Bus = fmt.Sprintf("%s%d", pcieRootPortPrefix, len(AllPCIeDevs))
|
||||
AllPCIeDevs[vfio.BDF] = true
|
||||
id := utils.MakeNameID("vfio", device.DeviceInfo.ID+strconv.Itoa(i), maxDevIDSize)
|
||||
|
||||
var vfio config.VFIODev
|
||||
|
||||
switch vfioDeviceType {
|
||||
case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType:
|
||||
isPCIe := isPCIeDevice(deviceBDF)
|
||||
// Do not directly assign to `vfio` -- need to access field still
|
||||
vfioPCI := config.VFIOPCIDev{
|
||||
ID: id,
|
||||
Type: vfioDeviceType,
|
||||
BDF: deviceBDF,
|
||||
SysfsDev: deviceSysfsDev,
|
||||
IsPCIe: isPCIe,
|
||||
Class: getPCIDeviceProperty(deviceBDF, PCISysFsDevicesClass),
|
||||
}
|
||||
if isPCIe {
|
||||
vfioPCI.Bus = fmt.Sprintf("%s%d", pcieRootPortPrefix, len(AllPCIeDevs))
|
||||
AllPCIeDevs[deviceBDF] = true
|
||||
}
|
||||
vfio = vfioPCI
|
||||
case config.VFIOAPDeviceMediatedType:
|
||||
devices, err := GetAPVFIODevices(deviceSysfsDev)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
vfio = config.VFIOAPDev{
|
||||
ID: id,
|
||||
SysfsDev: deviceSysfsDev,
|
||||
Type: config.VFIOAPDeviceMediatedType,
|
||||
APDevices: devices,
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("Failed to append device: VFIO device type unrecognized")
|
||||
}
|
||||
device.VfioDevs = append(device.VfioDevs, &vfio)
|
||||
}
|
||||
|
||||
coldPlug := device.DeviceInfo.ColdPlug
|
||||
@ -192,31 +216,60 @@ func (device *VFIODevice) Load(ds config.DeviceState) {
|
||||
device.GenericDevice.Load(ds)
|
||||
|
||||
for _, dev := range ds.VFIODevs {
|
||||
device.VfioDevs = append(device.VfioDevs, &config.VFIODev{
|
||||
ID: dev.ID,
|
||||
Type: config.VFIODeviceType(dev.Type),
|
||||
BDF: dev.BDF,
|
||||
SysfsDev: dev.SysfsDev,
|
||||
})
|
||||
var vfio config.VFIODev
|
||||
|
||||
vfioDeviceType := (*device.VfioDevs[0]).GetType()
|
||||
switch vfioDeviceType {
|
||||
case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType:
|
||||
bdf := ""
|
||||
if pciDev, ok := (*dev).(config.VFIOPCIDev); ok {
|
||||
bdf = pciDev.BDF
|
||||
}
|
||||
vfio = config.VFIOPCIDev{
|
||||
ID: *(*dev).GetID(),
|
||||
Type: config.VFIODeviceType((*dev).GetType()),
|
||||
BDF: bdf,
|
||||
SysfsDev: *(*dev).GetSysfsDev(),
|
||||
}
|
||||
case config.VFIOAPDeviceMediatedType:
|
||||
vfio = config.VFIOAPDev{
|
||||
ID: *(*dev).GetID(),
|
||||
SysfsDev: *(*dev).GetSysfsDev(),
|
||||
}
|
||||
default:
|
||||
deviceLogger().WithError(
|
||||
fmt.Errorf("VFIO device type unrecognized"),
|
||||
).Error("Failed to append device")
|
||||
return
|
||||
}
|
||||
|
||||
device.VfioDevs = append(device.VfioDevs, &vfio)
|
||||
}
|
||||
}
|
||||
|
||||
// It should implement GetAttachCount() and DeviceID() as api.Device implementation
|
||||
// here it shares function from *GenericDevice so we don't need duplicate codes
|
||||
func getVFIODetails(deviceFileName, iommuDevicesPath string) (deviceBDF, deviceSysfsDev string, vfioDeviceType config.VFIODeviceType, err error) {
|
||||
vfioDeviceType = GetVFIODeviceType(deviceFileName)
|
||||
sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName)
|
||||
vfioDeviceType, err = GetVFIODeviceType(sysfsDevStr)
|
||||
if err != nil {
|
||||
return deviceBDF, deviceSysfsDev, vfioDeviceType, err
|
||||
}
|
||||
|
||||
switch vfioDeviceType {
|
||||
case config.VFIODeviceNormalType:
|
||||
case config.VFIOPCIDeviceNormalType:
|
||||
// Get bdf of device eg. 0000:00:1c.0
|
||||
deviceBDF = getBDF(deviceFileName)
|
||||
// Get sysfs path used by cloud-hypervisor
|
||||
deviceSysfsDev = filepath.Join(config.SysBusPciDevicesPath, deviceFileName)
|
||||
case config.VFIODeviceMediatedType:
|
||||
case config.VFIOPCIDeviceMediatedType:
|
||||
// Get sysfsdev of device eg. /sys/devices/pci0000:00/0000:00:02.0/f79944e4-5a3d-11e8-99ce-479cbab002e4
|
||||
sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName)
|
||||
deviceSysfsDev, err = getSysfsDev(sysfsDevStr)
|
||||
deviceSysfsDev, err = GetSysfsDev(sysfsDevStr)
|
||||
deviceBDF = getBDF(getMediatedBDF(deviceSysfsDev))
|
||||
case config.VFIOAPDeviceMediatedType:
|
||||
sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName)
|
||||
deviceSysfsDev, err = GetSysfsDev(sysfsDevStr)
|
||||
default:
|
||||
err = fmt.Errorf("Incorrect tokens found while parsing vfio details: %s", deviceFileName)
|
||||
}
|
||||
@ -244,13 +297,6 @@ func getBDF(deviceSysStr string) string {
|
||||
return tokens[1]
|
||||
}
|
||||
|
||||
// getSysfsDev returns the sysfsdev of mediated device
|
||||
// Expected input string format is absolute path to the sysfs dev node
|
||||
// eg. /sys/kernel/iommu_groups/0/devices/f79944e4-5a3d-11e8-99ce-479cbab002e4
|
||||
func getSysfsDev(sysfsDevStr string) (string, error) {
|
||||
return filepath.EvalSymlinks(sysfsDevStr)
|
||||
}
|
||||
|
||||
// BindDevicetoVFIO binds the device to vfio driver after unbinding from host.
|
||||
// Will be called by a network interface or a generic pcie device.
|
||||
func BindDevicetoVFIO(bdf, hostDriver, vendorDeviceID string) (string, error) {
|
||||
|
@ -32,9 +32,9 @@ func TestGetVFIODetails(t *testing.T) {
|
||||
deviceBDF, deviceSysfsDev, vfioDeviceType, err := getVFIODetails(d.deviceStr, "")
|
||||
|
||||
switch vfioDeviceType {
|
||||
case config.VFIODeviceNormalType:
|
||||
case config.VFIOPCIDeviceNormalType:
|
||||
assert.Equal(t, d.expectedStr, deviceBDF)
|
||||
case config.VFIODeviceMediatedType:
|
||||
case config.VFIOPCIDeviceMediatedType, config.VFIOAPDeviceMediatedType:
|
||||
assert.Equal(t, d.expectedStr, deviceSysfsDev)
|
||||
default:
|
||||
assert.NotNil(t, err)
|
||||
|
@ -857,12 +857,12 @@ func (clh *cloudHypervisor) hotPlugVFIODevice(device *config.VFIODev) error {
|
||||
defer cancel()
|
||||
|
||||
// Create the clh device config via the constructor to ensure default values are properly assigned
|
||||
clhDevice := *chclient.NewDeviceConfig(device.SysfsDev)
|
||||
clhDevice := *chclient.NewDeviceConfig(*(*device).GetSysfsDev())
|
||||
pciInfo, _, err := cl.VmAddDevicePut(ctx, clhDevice)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Failed to hotplug device %+v %s", device, openAPIClientError(err))
|
||||
}
|
||||
clh.devicesIds[device.ID] = pciInfo.GetId()
|
||||
clh.devicesIds[*(*device).GetID()] = pciInfo.GetId()
|
||||
|
||||
// clh doesn't use bridges, so the PCI path is simply the slot
|
||||
// number of the device. This will break if clh starts using
|
||||
@ -879,7 +879,14 @@ func (clh *cloudHypervisor) hotPlugVFIODevice(device *config.VFIODev) error {
|
||||
return fmt.Errorf("Unexpected PCI address %q from clh hotplug", pciInfo.Bdf)
|
||||
}
|
||||
|
||||
device.GuestPciPath, err = types.PciPathFromString(tokens[0])
|
||||
guestPciPath, err := types.PciPathFromString(tokens[0])
|
||||
|
||||
pciDevice, ok := (*device).(config.VFIOPCIDev)
|
||||
if !ok {
|
||||
return fmt.Errorf("VFIO device %+v is not PCI, only PCI is supported in Cloud Hypervisor", device)
|
||||
}
|
||||
pciDevice.GuestPciPath = guestPciPath
|
||||
*device = pciDevice
|
||||
|
||||
return err
|
||||
}
|
||||
@ -923,7 +930,7 @@ func (clh *cloudHypervisor) HotplugRemoveDevice(ctx context.Context, devInfo int
|
||||
case BlockDev:
|
||||
deviceID = clhDriveIndexToID(devInfo.(*config.BlockDrive).Index)
|
||||
case VfioDev:
|
||||
deviceID = devInfo.(*config.VFIODev).ID
|
||||
deviceID = *devInfo.(config.VFIODev).GetID()
|
||||
default:
|
||||
clh.Logger().WithFields(log.Fields{"devInfo": devInfo,
|
||||
"deviceType": devType}).Error("HotplugRemoveDevice: unsupported device")
|
||||
|
@ -624,7 +624,7 @@ func TestCloudHypervisorHotplugRemoveDevice(t *testing.T) {
|
||||
_, err = clh.HotplugRemoveDevice(context.Background(), &config.BlockDrive{}, BlockDev)
|
||||
assert.NoError(err, "Hotplug remove block device expected no error")
|
||||
|
||||
_, err = clh.HotplugRemoveDevice(context.Background(), &config.VFIODev{}, VfioDev)
|
||||
_, err = clh.HotplugRemoveDevice(context.Background(), &config.VFIOPCIDev{}, VfioDev)
|
||||
assert.NoError(err, "Hotplug remove vfio block device expected no error")
|
||||
|
||||
_, err = clh.HotplugRemoveDevice(context.Background(), nil, NetDev)
|
||||
|
@ -77,38 +77,39 @@ const (
|
||||
)
|
||||
|
||||
var (
|
||||
checkRequestTimeout = 30 * time.Second
|
||||
defaultRequestTimeout = 60 * time.Second
|
||||
errorMissingOCISpec = errors.New("Missing OCI specification")
|
||||
defaultKataHostSharedDir = "/run/kata-containers/shared/sandboxes/"
|
||||
defaultKataGuestSharedDir = "/run/kata-containers/shared/containers/"
|
||||
defaultKataGuestNydusRootDir = "/run/kata-containers/shared/"
|
||||
mountGuestTag = "kataShared"
|
||||
defaultKataGuestSandboxDir = "/run/kata-containers/sandbox/"
|
||||
type9pFs = "9p"
|
||||
typeVirtioFS = "virtiofs"
|
||||
typeOverlayFS = "overlay"
|
||||
kata9pDevType = "9p"
|
||||
kataMmioBlkDevType = "mmioblk"
|
||||
kataBlkDevType = "blk"
|
||||
kataBlkCCWDevType = "blk-ccw"
|
||||
kataSCSIDevType = "scsi"
|
||||
kataNvdimmDevType = "nvdimm"
|
||||
kataVirtioFSDevType = "virtio-fs"
|
||||
kataOverlayDevType = "overlayfs"
|
||||
kataWatchableBindDevType = "watchable-bind"
|
||||
kataVfioDevType = "vfio" // VFIO device to used as VFIO in the container
|
||||
kataVfioGuestKernelDevType = "vfio-gk" // VFIO device for consumption by the guest kernel
|
||||
sharedDir9pOptions = []string{"trans=virtio,version=9p2000.L,cache=mmap", "nodev"}
|
||||
sharedDirVirtioFSOptions = []string{}
|
||||
sharedDirVirtioFSDaxOptions = "dax"
|
||||
shmDir = "shm"
|
||||
kataEphemeralDevType = "ephemeral"
|
||||
defaultEphemeralPath = filepath.Join(defaultKataGuestSandboxDir, kataEphemeralDevType)
|
||||
grpcMaxDataSize = int64(1024 * 1024)
|
||||
localDirOptions = []string{"mode=0777"}
|
||||
maxHostnameLen = 64
|
||||
GuestDNSFile = "/etc/resolv.conf"
|
||||
checkRequestTimeout = 30 * time.Second
|
||||
defaultRequestTimeout = 60 * time.Second
|
||||
errorMissingOCISpec = errors.New("Missing OCI specification")
|
||||
defaultKataHostSharedDir = "/run/kata-containers/shared/sandboxes/"
|
||||
defaultKataGuestSharedDir = "/run/kata-containers/shared/containers/"
|
||||
defaultKataGuestNydusRootDir = "/run/kata-containers/shared/"
|
||||
mountGuestTag = "kataShared"
|
||||
defaultKataGuestSandboxDir = "/run/kata-containers/sandbox/"
|
||||
type9pFs = "9p"
|
||||
typeVirtioFS = "virtiofs"
|
||||
typeOverlayFS = "overlay"
|
||||
kata9pDevType = "9p"
|
||||
kataMmioBlkDevType = "mmioblk"
|
||||
kataBlkDevType = "blk"
|
||||
kataBlkCCWDevType = "blk-ccw"
|
||||
kataSCSIDevType = "scsi"
|
||||
kataNvdimmDevType = "nvdimm"
|
||||
kataVirtioFSDevType = "virtio-fs"
|
||||
kataOverlayDevType = "overlayfs"
|
||||
kataWatchableBindDevType = "watchable-bind"
|
||||
kataVfioPciDevType = "vfio-pci" // VFIO PCI device to used as VFIO in the container
|
||||
kataVfioPciGuestKernelDevType = "vfio-pci-gk" // VFIO PCI device for consumption by the guest kernel
|
||||
kataVfioApDevType = "vfio-ap"
|
||||
sharedDir9pOptions = []string{"trans=virtio,version=9p2000.L,cache=mmap", "nodev"}
|
||||
sharedDirVirtioFSOptions = []string{}
|
||||
sharedDirVirtioFSDaxOptions = "dax"
|
||||
shmDir = "shm"
|
||||
kataEphemeralDevType = "ephemeral"
|
||||
defaultEphemeralPath = filepath.Join(defaultKataGuestSandboxDir, kataEphemeralDevType)
|
||||
grpcMaxDataSize = int64(1024 * 1024)
|
||||
localDirOptions = []string{"mode=0777"}
|
||||
maxHostnameLen = 64
|
||||
GuestDNSFile = "/etc/resolv.conf"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -1117,20 +1118,25 @@ func (k *kataAgent) appendVfioDevice(dev ContainerDevice, device api.Device, c *
|
||||
|
||||
groupNum := filepath.Base(dev.ContainerPath)
|
||||
|
||||
// Each /dev/vfio/NN device represents a VFIO group, which
|
||||
// could include several PCI devices. So we give group
|
||||
// information in the main structure, then list each
|
||||
// individual PCI device in the Options array.
|
||||
// For VFIO-PCI, each /dev/vfio/NN device represents a VFIO group,
|
||||
// which could include several PCI devices. So we give group
|
||||
// information in the main structure, then list each individual PCI
|
||||
// device in the Options array.
|
||||
//
|
||||
// Each option is formatted as "DDDD:BB:DD.F=<pcipath>"
|
||||
// DDDD:BB:DD.F is the device's PCI address on the
|
||||
// *host*. <pcipath> is the device's PCI path in the guest
|
||||
// (see qomGetPciPath() for details).
|
||||
//
|
||||
// For VFIO-AP, one VFIO group could include several queue devices. They are
|
||||
// identified by APQNs (Adjunct Processor Queue Numbers), which do not differ
|
||||
// between host and guest. They are passed as options so they can be awaited
|
||||
// by the agent.
|
||||
kataDevice := &grpc.Device{
|
||||
ContainerPath: dev.ContainerPath,
|
||||
Type: kataVfioDevType,
|
||||
Type: kataVfioPciDevType,
|
||||
Id: groupNum,
|
||||
Options: make([]string, len(devList)),
|
||||
Options: nil,
|
||||
}
|
||||
|
||||
// We always pass the device information to the agent, since
|
||||
@ -1138,11 +1144,18 @@ func (k *kataAgent) appendVfioDevice(dev ContainerDevice, device api.Device, c *
|
||||
// on the vfio_mode, we need to use a different device type so
|
||||
// the agent can handle it properly
|
||||
if c.sandbox.config.VfioMode == config.VFIOModeGuestKernel {
|
||||
kataDevice.Type = kataVfioGuestKernelDevType
|
||||
kataDevice.Type = kataVfioPciGuestKernelDevType
|
||||
}
|
||||
|
||||
for i, pciDev := range devList {
|
||||
kataDevice.Options[i] = fmt.Sprintf("0000:%s=%s", pciDev.BDF, pciDev.GuestPciPath)
|
||||
if (*devList[0]).GetType() == config.VFIOAPDeviceMediatedType {
|
||||
kataDevice.Type = kataVfioApDevType
|
||||
kataDevice.Options = (*devList[0]).(config.VFIOAPDev).APDevices
|
||||
} else {
|
||||
kataDevice.Options = make([]string, len(devList))
|
||||
for i, device := range devList {
|
||||
pciDevice := (*device).(config.VFIOPCIDev)
|
||||
kataDevice.Options[i] = fmt.Sprintf("0000:%s=%s", pciDevice.BDF, pciDevice.GuestPciPath)
|
||||
}
|
||||
}
|
||||
|
||||
return kataDevice
|
||||
|
@ -1713,7 +1713,7 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op
|
||||
return err
|
||||
}
|
||||
|
||||
devID := device.ID
|
||||
devID := *(*device).GetID()
|
||||
machineType := q.HypervisorConfig().HypervisorMachineType
|
||||
|
||||
if op == AddDevice {
|
||||
@ -1730,29 +1730,31 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op
|
||||
// for pc machine type instead of bridge. This is useful for devices that require
|
||||
// a large PCI BAR which is a currently a limitation with PCI bridges.
|
||||
if q.state.HotplugVFIOOnRootBus {
|
||||
|
||||
// In case MachineType is q35, a PCIe device is hotplugged on a PCIe Root Port.
|
||||
switch machineType {
|
||||
case QemuQ35:
|
||||
if device.IsPCIe && q.state.PCIeRootPort <= 0 {
|
||||
q.Logger().WithField("dev-id", device.ID).Warn("VFIO device is a PCIe device. It's recommended to add the PCIe Root Port by setting the pcie_root_port parameter in the configuration for q35")
|
||||
device.Bus = ""
|
||||
switch (*device).GetType() {
|
||||
case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType:
|
||||
// In case MachineType is q35, a PCIe device is hotplugged on a PCIe Root Port.
|
||||
pciDevice, ok := (*device).(config.VFIOPCIDev)
|
||||
if !ok {
|
||||
return fmt.Errorf("VFIO device %+v is not PCI, but its Type said otherwise", device)
|
||||
}
|
||||
default:
|
||||
device.Bus = ""
|
||||
}
|
||||
switch machineType {
|
||||
case QemuQ35:
|
||||
if pciDevice.IsPCIe && q.state.PCIeRootPort <= 0 {
|
||||
q.Logger().WithField("dev-id", (*device).GetID()).Warn("VFIO device is a PCIe device. It's recommended to add the PCIe Root Port by setting the pcie_root_port parameter in the configuration for q35")
|
||||
pciDevice.Bus = ""
|
||||
}
|
||||
default:
|
||||
pciDevice.Bus = ""
|
||||
}
|
||||
*device = pciDevice
|
||||
|
||||
switch device.Type {
|
||||
case config.VFIODeviceNormalType:
|
||||
err = q.qmpMonitorCh.qmp.ExecuteVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, device.BDF, device.Bus, romFile)
|
||||
case config.VFIODeviceMediatedType:
|
||||
if utils.IsAPVFIOMediatedDevice(device.SysfsDev) {
|
||||
err = q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev)
|
||||
if pciDevice.Type == config.VFIOPCIDeviceNormalType {
|
||||
err = q.qmpMonitorCh.qmp.ExecuteVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, pciDevice.BDF, pciDevice.Bus, romFile)
|
||||
} else {
|
||||
err = q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, device.SysfsDev, "", device.Bus, romFile)
|
||||
err = q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, *(*device).GetSysfsDev(), "", pciDevice.Bus, romFile)
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("Incorrect VFIO device type found")
|
||||
case config.VFIOAPDeviceMediatedType:
|
||||
err = q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, *(*device).GetSysfsDev())
|
||||
}
|
||||
} else {
|
||||
addr, bridge, err := q.arch.addDeviceToBridge(ctx, devID, types.PCI)
|
||||
@ -1766,15 +1768,17 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op
|
||||
}
|
||||
}()
|
||||
|
||||
switch device.Type {
|
||||
case config.VFIODeviceNormalType:
|
||||
err = q.qmpMonitorCh.qmp.ExecutePCIVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, device.BDF, addr, bridge.ID, romFile)
|
||||
case config.VFIODeviceMediatedType:
|
||||
if utils.IsAPVFIOMediatedDevice(device.SysfsDev) {
|
||||
err = q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev)
|
||||
} else {
|
||||
err = q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, device.SysfsDev, addr, bridge.ID, romFile)
|
||||
switch (*device).GetType() {
|
||||
case config.VFIOPCIDeviceNormalType:
|
||||
pciDevice, ok := (*device).(config.VFIOPCIDev)
|
||||
if !ok {
|
||||
return fmt.Errorf("VFIO device %+v is not PCI, but its Type said otherwise", device)
|
||||
}
|
||||
err = q.qmpMonitorCh.qmp.ExecutePCIVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, pciDevice.BDF, addr, bridge.ID, romFile)
|
||||
case config.VFIOPCIDeviceMediatedType:
|
||||
err = q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, *(*device).GetSysfsDev(), addr, bridge.ID, romFile)
|
||||
case config.VFIOAPDeviceMediatedType:
|
||||
err = q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, *(*device).GetSysfsDev())
|
||||
default:
|
||||
return fmt.Errorf("Incorrect VFIO device type found")
|
||||
}
|
||||
@ -1782,13 +1786,24 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// XXX: Depending on whether we're doing root port or
|
||||
// bridge hotplug, and how the bridge is set up in
|
||||
// other parts of the code, we may or may not already
|
||||
// have information about the slot number of the
|
||||
// bridge and or the device. For simplicity, just
|
||||
// query both of them back from qemu
|
||||
device.GuestPciPath, err = q.qomGetPciPath(devID)
|
||||
|
||||
switch (*device).GetType() {
|
||||
case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType:
|
||||
pciDevice, ok := (*device).(config.VFIOPCIDev)
|
||||
if !ok {
|
||||
return fmt.Errorf("VFIO device %+v is not PCI, but its Type said otherwise", device)
|
||||
}
|
||||
// XXX: Depending on whether we're doing root port or
|
||||
// bridge hotplug, and how the bridge is set up in
|
||||
// other parts of the code, we may or may not already
|
||||
// have information about the slot number of the
|
||||
// bridge and or the device. For simplicity, just
|
||||
// query both of them back from qemu
|
||||
guestPciPath, err := q.qomGetPciPath(devID)
|
||||
pciDevice.GuestPciPath = guestPciPath
|
||||
*device = pciDevice
|
||||
return err
|
||||
}
|
||||
return err
|
||||
} else {
|
||||
q.Logger().WithField("dev-id", devID).Info("Start hot-unplug VFIO device")
|
||||
|
@ -675,16 +675,17 @@ func (q *qemuArchBase) appendVhostUserDevice(ctx context.Context, devices []govm
|
||||
}
|
||||
|
||||
func (q *qemuArchBase) appendVFIODevice(devices []govmmQemu.Device, vfioDev config.VFIODev) []govmmQemu.Device {
|
||||
if vfioDev.BDF == "" {
|
||||
pciDevice := vfioDev.(config.VFIOPCIDev)
|
||||
if pciDevice.BDF == "" {
|
||||
return devices
|
||||
}
|
||||
|
||||
devices = append(devices,
|
||||
govmmQemu.VFIODevice{
|
||||
BDF: vfioDev.BDF,
|
||||
VendorID: vfioDev.VendorID,
|
||||
DeviceID: vfioDev.DeviceID,
|
||||
Bus: vfioDev.Bus,
|
||||
BDF: pciDevice.BDF,
|
||||
VendorID: pciDevice.VendorID,
|
||||
DeviceID: pciDevice.DeviceID,
|
||||
Bus: pciDevice.Bus,
|
||||
},
|
||||
)
|
||||
|
||||
|
@ -463,7 +463,7 @@ func TestQemuArchBaseAppendVFIODevice(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
vfDevice := config.VFIODev{
|
||||
vfDevice := config.VFIOPCIDev{
|
||||
BDF: bdf,
|
||||
}
|
||||
|
||||
@ -483,7 +483,7 @@ func TestQemuArchBaseAppendVFIODeviceWithVendorDeviceID(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
vfDevice := config.VFIODev{
|
||||
vfDevice := config.VFIOPCIDev{
|
||||
BDF: bdf,
|
||||
VendorID: vendorID,
|
||||
DeviceID: deviceID,
|
||||
|
@ -1856,11 +1856,15 @@ func (s *Sandbox) HotplugAddDevice(ctx context.Context, device api.Device, devTy
|
||||
// adding a group of VFIO devices
|
||||
for _, dev := range vfioDevices {
|
||||
if _, err := s.hypervisor.HotplugAddDevice(ctx, dev, VfioDev); err != nil {
|
||||
bdf := ""
|
||||
if pciDevice, ok := (*dev).(config.VFIOPCIDev); ok {
|
||||
bdf = pciDevice.BDF
|
||||
}
|
||||
s.Logger().
|
||||
WithFields(logrus.Fields{
|
||||
"sandbox": s.id,
|
||||
"vfio-device-ID": dev.ID,
|
||||
"vfio-device-BDF": dev.BDF,
|
||||
"vfio-device-ID": (*dev).GetID(),
|
||||
"vfio-device-BDF": bdf,
|
||||
}).WithError(err).Error("failed to hotplug VFIO device")
|
||||
return err
|
||||
}
|
||||
@ -1909,11 +1913,15 @@ func (s *Sandbox) HotplugRemoveDevice(ctx context.Context, device api.Device, de
|
||||
// remove a group of VFIO devices
|
||||
for _, dev := range vfioDevices {
|
||||
if _, err := s.hypervisor.HotplugRemoveDevice(ctx, dev, VfioDev); err != nil {
|
||||
bdf := ""
|
||||
if pciDevice, ok := (*dev).(config.VFIOPCIDev); ok {
|
||||
bdf = pciDevice.BDF
|
||||
}
|
||||
s.Logger().WithError(err).
|
||||
WithFields(logrus.Fields{
|
||||
"sandbox": s.id,
|
||||
"vfio-device-ID": dev.ID,
|
||||
"vfio-device-BDF": dev.BDF,
|
||||
"vfio-device-ID": (*dev).GetID(),
|
||||
"vfio-device-BDF": bdf,
|
||||
}).Error("failed to hot unplug VFIO device")
|
||||
return err
|
||||
}
|
||||
|
@ -89,8 +89,7 @@ func FindContextID() (*os.File, uint64, error) {
|
||||
const (
|
||||
procMountsFile = "/proc/mounts"
|
||||
|
||||
fieldsPerLine = 6
|
||||
vfioAPSysfsDir = "vfio_ap"
|
||||
fieldsPerLine = 6
|
||||
)
|
||||
|
||||
const (
|
||||
@ -142,18 +141,6 @@ func GetDevicePathAndFsTypeOptions(mountPoint string) (devicePath, fsType string
|
||||
}
|
||||
}
|
||||
|
||||
// IsAPVFIOMediatedDevice decides whether a device is a VFIO-AP device
|
||||
// by checking for the existence of "vfio_ap" in the path
|
||||
func IsAPVFIOMediatedDevice(sysfsdev string) bool {
|
||||
split := strings.Split(sysfsdev, string(os.PathSeparator))
|
||||
for _, el := range split {
|
||||
if el == vfioAPSysfsDir {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func waitProcessUsingPidfd(pid int, timeoutSecs uint, logger *logrus.Entry) (bool, error) {
|
||||
pidfd, err := unix.PidfdOpen(pid, 0)
|
||||
|
||||
|
@ -63,19 +63,3 @@ func TestGetDevicePathAndFsTypeOptionsSuccessful(t *testing.T) {
|
||||
assert.Equal(fstype, fstypeOut)
|
||||
assert.Equal(fsOptions, optsOut)
|
||||
}
|
||||
|
||||
func TestIsAPVFIOMediatedDeviceFalse(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
// Should be false for a PCI device
|
||||
isAPMdev := IsAPVFIOMediatedDevice("/sys/bus/pci/devices/0000:00:02.0/a297db4a-f4c2-11e6-90f6-d3b88d6c9525")
|
||||
assert.False(isAPMdev)
|
||||
}
|
||||
|
||||
func TestIsAPVFIOMediatedDeviceTrue(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
// Typical AP sysfsdev
|
||||
isAPMdev := IsAPVFIOMediatedDevice("/sys/devices/vfio_ap/matrix/a297db4a-f4c2-11e6-90f6-d3b88d6c9525")
|
||||
assert.True(isAPMdev)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user