mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-07-05 19:47:53 +00:00
Merge pull request #1682 from dgibson/rescan
Remove forced PCI rescans from agent
This commit is contained in:
commit
b57613f53e
@ -17,10 +17,6 @@ use tokio::sync::Mutex;
|
||||
#[cfg(target_arch = "s390x")]
|
||||
use crate::ccw;
|
||||
use crate::linux_abi::*;
|
||||
use crate::mount::{
|
||||
DRIVER_BLK_CCW_TYPE, DRIVER_BLK_TYPE, DRIVER_MMIO_BLK_TYPE, DRIVER_NVDIMM_TYPE,
|
||||
DRIVER_SCSI_TYPE,
|
||||
};
|
||||
use crate::pci;
|
||||
use crate::sandbox::Sandbox;
|
||||
use crate::uevent::{wait_for_uevent, Uevent, UeventMatcher};
|
||||
@ -38,6 +34,19 @@ macro_rules! sl {
|
||||
|
||||
const VM_ROOTFS: &str = "/";
|
||||
|
||||
pub const DRIVER_9P_TYPE: &str = "9p";
|
||||
pub const DRIVER_VIRTIOFS_TYPE: &str = "virtio-fs";
|
||||
pub const DRIVER_BLK_TYPE: &str = "blk";
|
||||
pub const DRIVER_BLK_CCW_TYPE: &str = "blk-ccw";
|
||||
pub const DRIVER_MMIO_BLK_TYPE: &str = "mmioblk";
|
||||
pub const DRIVER_SCSI_TYPE: &str = "scsi";
|
||||
pub const DRIVER_NVDIMM_TYPE: &str = "nvdimm";
|
||||
pub const DRIVER_EPHEMERAL_TYPE: &str = "ephemeral";
|
||||
pub const DRIVER_LOCAL_TYPE: &str = "local";
|
||||
pub const DRIVER_WATCHABLE_BIND_TYPE: &str = "watchable-bind";
|
||||
// VFIO device to be bound to a guest kernel driver
|
||||
pub const DRIVER_VFIO_GK_TYPE: &str = "vfio-gk";
|
||||
|
||||
#[derive(Debug)]
|
||||
struct DevIndexEntry {
|
||||
idx: usize,
|
||||
@ -47,11 +56,6 @@ struct DevIndexEntry {
|
||||
#[derive(Debug)]
|
||||
struct DevIndex(HashMap<String, DevIndexEntry>);
|
||||
|
||||
#[instrument]
|
||||
pub fn rescan_pci_bus() -> Result<()> {
|
||||
online_device(SYSFS_PCI_BUS_RESCAN_FILE)
|
||||
}
|
||||
|
||||
#[instrument]
|
||||
pub fn online_device(path: &str) -> Result<()> {
|
||||
fs::write(path, "1")?;
|
||||
@ -162,8 +166,6 @@ pub async fn get_virtio_blk_pci_device_name(
|
||||
let sysfs_rel_path = pcipath_to_sysfs(&root_bus_sysfs, pcipath)?;
|
||||
let matcher = VirtioBlkPciMatcher::new(&sysfs_rel_path);
|
||||
|
||||
rescan_pci_bus()?;
|
||||
|
||||
let uev = wait_for_uevent(sandbox, matcher).await?;
|
||||
Ok(format!("{}/{}", SYSTEM_DEV_PATH, &uev.devname))
|
||||
}
|
||||
@ -255,6 +257,35 @@ pub async fn wait_for_pmem_device(sandbox: &Arc<Mutex<Sandbox>>, devpath: &str)
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct PciMatcher {
|
||||
devpath: String,
|
||||
}
|
||||
|
||||
impl PciMatcher {
|
||||
fn new(relpath: &str) -> Result<PciMatcher> {
|
||||
let root_bus = create_pci_root_bus_path();
|
||||
Ok(PciMatcher {
|
||||
devpath: format!("{}{}", root_bus, relpath),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl UeventMatcher for PciMatcher {
|
||||
fn is_match(&self, uev: &Uevent) -> bool {
|
||||
uev.devpath == self.devpath
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn wait_for_pci_device(sandbox: &Arc<Mutex<Sandbox>>, pcipath: &pci::Path) -> Result<()> {
|
||||
let root_bus_sysfs = format!("{}{}", SYSFS_DIR, create_pci_root_bus_path());
|
||||
let sysfs_rel_path = pcipath_to_sysfs(&root_bus_sysfs, pcipath)?;
|
||||
let matcher = PciMatcher::new(&sysfs_rel_path)?;
|
||||
|
||||
let _ = wait_for_uevent(sandbox, matcher).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Scan SCSI bus for the given SCSI address(SCSI-Id and LUN)
|
||||
#[instrument]
|
||||
fn scan_scsi_bus(scsi_addr: &str) -> Result<()> {
|
||||
@ -451,6 +482,37 @@ async fn virtio_nvdimm_device_handler(
|
||||
update_spec_device_list(device, spec, devidx)
|
||||
}
|
||||
|
||||
fn split_vfio_option(opt: &str) -> Option<(&str, &str)> {
|
||||
let mut tokens = opt.split('=');
|
||||
let hostbdf = tokens.next()?;
|
||||
let path = tokens.next()?;
|
||||
if tokens.next().is_some() {
|
||||
None
|
||||
} else {
|
||||
Some((hostbdf, path))
|
||||
}
|
||||
}
|
||||
|
||||
// device.options should have one entry for each PCI device in the VFIO group
|
||||
// Each option should have the form "DDDD:BB:DD.F=<pcipath>"
|
||||
// DDDD:BB:DD.F is the device's PCI address in the host
|
||||
// <pcipath> is a PCI path to the device in the guest (see pci.rs)
|
||||
async fn vfio_gk_device_handler(
|
||||
device: &Device,
|
||||
_: &mut Spec,
|
||||
sandbox: &Arc<Mutex<Sandbox>>,
|
||||
_: &DevIndex,
|
||||
) -> Result<()> {
|
||||
for opt in device.options.iter() {
|
||||
let (_, pcipath) =
|
||||
split_vfio_option(opt).ok_or_else(|| anyhow!("Malformed VFIO option {:?}", opt))?;
|
||||
let pcipath = pci::Path::from_str(pcipath)?;
|
||||
|
||||
wait_for_pci_device(sandbox, &pcipath).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
impl DevIndex {
|
||||
fn new(spec: &Spec) -> DevIndex {
|
||||
let mut map = HashMap::new();
|
||||
@ -520,6 +582,7 @@ async fn add_device(
|
||||
DRIVER_MMIO_BLK_TYPE => virtiommio_blk_device_handler(device, spec, sandbox, devidx).await,
|
||||
DRIVER_NVDIMM_TYPE => virtio_nvdimm_device_handler(device, spec, sandbox, devidx).await,
|
||||
DRIVER_SCSI_TYPE => virtio_scsi_device_handler(device, spec, sandbox, devidx).await,
|
||||
DRIVER_VFIO_GK_TYPE => vfio_gk_device_handler(device, spec, sandbox, devidx).await,
|
||||
_ => Err(anyhow!("Unknown device type {}", device.field_type)),
|
||||
}
|
||||
}
|
||||
@ -1068,4 +1131,14 @@ mod tests {
|
||||
assert!(!matcher_b.is_match(&uev_a));
|
||||
assert!(!matcher_a.is_match(&uev_b));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_vfio_option() {
|
||||
assert_eq!(
|
||||
split_vfio_option("0000:01:00.0=02/01"),
|
||||
Some(("0000:01:00.0", "02/01"))
|
||||
);
|
||||
assert_eq!(split_vfio_option("0000:01:00.0=02/01=rubbish"), None);
|
||||
assert_eq!(split_vfio_option("0000:01:00.0"), None);
|
||||
}
|
||||
}
|
||||
|
@ -9,7 +9,6 @@
|
||||
use std::fs;
|
||||
|
||||
pub const SYSFS_DIR: &str = "/sys";
|
||||
pub const SYSFS_PCI_BUS_RESCAN_FILE: &str = "/sys/bus/pci/rescan";
|
||||
#[cfg(any(
|
||||
target_arch = "powerpc64",
|
||||
target_arch = "s390x",
|
||||
|
@ -22,6 +22,9 @@ use regex::Regex;
|
||||
|
||||
use crate::device::{
|
||||
get_scsi_device_name, get_virtio_blk_pci_device_name, online_device, wait_for_pmem_device,
|
||||
DRIVER_9P_TYPE, DRIVER_BLK_CCW_TYPE, DRIVER_BLK_TYPE, DRIVER_EPHEMERAL_TYPE, DRIVER_LOCAL_TYPE,
|
||||
DRIVER_MMIO_BLK_TYPE, DRIVER_NVDIMM_TYPE, DRIVER_SCSI_TYPE, DRIVER_VIRTIOFS_TYPE,
|
||||
DRIVER_WATCHABLE_BIND_TYPE,
|
||||
};
|
||||
use crate::linux_abi::*;
|
||||
use crate::pci;
|
||||
@ -33,17 +36,6 @@ use anyhow::{anyhow, Context, Result};
|
||||
use slog::Logger;
|
||||
use tracing::instrument;
|
||||
|
||||
pub const DRIVER_9P_TYPE: &str = "9p";
|
||||
pub const DRIVER_VIRTIOFS_TYPE: &str = "virtio-fs";
|
||||
pub const DRIVER_BLK_TYPE: &str = "blk";
|
||||
pub const DRIVER_BLK_CCW_TYPE: &str = "blk-ccw";
|
||||
pub const DRIVER_MMIO_BLK_TYPE: &str = "mmioblk";
|
||||
pub const DRIVER_SCSI_TYPE: &str = "scsi";
|
||||
pub const DRIVER_NVDIMM_TYPE: &str = "nvdimm";
|
||||
pub const DRIVER_EPHEMERAL_TYPE: &str = "ephemeral";
|
||||
pub const DRIVER_LOCAL_TYPE: &str = "local";
|
||||
pub const DRIVER_WATCHABLE_BIND_TYPE: &str = "watchable-bind";
|
||||
|
||||
pub const TYPE_ROOTFS: &str = "rootfs";
|
||||
|
||||
pub const MOUNT_GUEST_TAG: &str = "kataShared";
|
||||
|
@ -3,7 +3,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use crate::pci;
|
||||
use async_trait::async_trait;
|
||||
use rustjail::{pipestream::PipeStream, process::StreamType};
|
||||
use tokio::io::{AsyncReadExt, AsyncWriteExt, ReadHalf};
|
||||
@ -44,12 +43,13 @@ use nix::sys::stat;
|
||||
use nix::unistd::{self, Pid};
|
||||
use rustjail::process::ProcessOperations;
|
||||
|
||||
use crate::device::{add_devices, pcipath_to_sysfs, rescan_pci_bus, update_device_cgroup};
|
||||
use crate::device::{add_devices, get_virtio_blk_pci_device_name, update_device_cgroup};
|
||||
use crate::linux_abi::*;
|
||||
use crate::metrics::get_metrics;
|
||||
use crate::mount::{add_storages, baremount, remove_mounts, STORAGE_HANDLER_LIST};
|
||||
use crate::namespace::{NSTYPEIPC, NSTYPEPID, NSTYPEUTS};
|
||||
use crate::network::setup_guest_dns;
|
||||
use crate::pci;
|
||||
use crate::random;
|
||||
use crate::sandbox::Sandbox;
|
||||
use crate::version::{AGENT_VERSION, API_VERSION};
|
||||
@ -134,10 +134,6 @@ impl AgentService {
|
||||
|
||||
info!(sl!(), "receive createcontainer, spec: {:?}", &oci);
|
||||
|
||||
// re-scan PCI bus
|
||||
// looking for hidden devices
|
||||
rescan_pci_bus().context("Could not rescan PCI bus")?;
|
||||
|
||||
// Some devices need some extra processing (the ones invoked with
|
||||
// --device for instance), and that's what this call is doing. It
|
||||
// updates the devices listed in the OCI spec, so that they actually
|
||||
@ -1204,7 +1200,9 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
|
||||
) -> ttrpc::Result<Empty> {
|
||||
trace_rpc_call!(ctx, "add_swap", req);
|
||||
|
||||
do_add_swap(&req).map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
|
||||
do_add_swap(&self.sandbox, &req)
|
||||
.await
|
||||
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
|
||||
|
||||
Ok(Empty::new())
|
||||
}
|
||||
@ -1557,43 +1555,13 @@ fn do_copy_file(req: &CopyFileRequest) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn path_name_lookup<P: std::clone::Clone + AsRef<Path> + std::fmt::Debug>(
|
||||
path: P,
|
||||
lookup: &str,
|
||||
) -> Result<(PathBuf, String)> {
|
||||
for entry in fs::read_dir(path.clone())? {
|
||||
let entry = entry?;
|
||||
if let Some(name) = entry.path().file_name() {
|
||||
if let Some(name) = name.to_str() {
|
||||
if Some(0) == name.find(lookup) {
|
||||
return Ok((entry.path(), name.to_string()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(anyhow!("cannot get {} dir in {:?}", lookup, path))
|
||||
}
|
||||
|
||||
fn do_add_swap(req: &AddSwapRequest) -> Result<()> {
|
||||
// re-scan PCI bus
|
||||
// looking for hidden devices
|
||||
rescan_pci_bus().context("Could not rescan PCI bus")?;
|
||||
|
||||
async fn do_add_swap(sandbox: &Arc<Mutex<Sandbox>>, req: &AddSwapRequest) -> Result<()> {
|
||||
let mut slots = Vec::new();
|
||||
for slot in &req.PCIPath {
|
||||
slots.push(pci::Slot::new(*slot as u8)?);
|
||||
slots.push(pci::Slot::new(*slot)?);
|
||||
}
|
||||
let pcipath = pci::Path::new(slots)?;
|
||||
let root_bus_sysfs = format!("{}{}", SYSFS_DIR, create_pci_root_bus_path());
|
||||
let sysfs_rel_path = format!(
|
||||
"{}{}",
|
||||
root_bus_sysfs,
|
||||
pcipath_to_sysfs(&root_bus_sysfs, &pcipath)?
|
||||
);
|
||||
let (mut virtio_path, _) = path_name_lookup(sysfs_rel_path, "virtio")?;
|
||||
virtio_path.push("block");
|
||||
let (_, dev_name) = path_name_lookup(virtio_path, "vd")?;
|
||||
let dev_name = format!("/dev/{}", dev_name);
|
||||
let dev_name = get_virtio_blk_pci_device_name(sandbox, &pcipath).await?;
|
||||
|
||||
let c_str = CString::new(dev_name)?;
|
||||
let ret = unsafe { libc::swapon(c_str.as_ptr() as *const c_char, 0) };
|
||||
|
@ -111,10 +111,13 @@ pub async fn wait_for_uevent(
|
||||
sandbox: &Arc<Mutex<Sandbox>>,
|
||||
matcher: impl UeventMatcher,
|
||||
) -> Result<Uevent> {
|
||||
let logprefix = format!("Waiting for {:?}", &matcher);
|
||||
|
||||
info!(sl!(), "{}", logprefix);
|
||||
let mut sb = sandbox.lock().await;
|
||||
for uev in sb.uevent_map.values() {
|
||||
if matcher.is_match(uev) {
|
||||
info!(sl!(), "Device {:?} found in device map", uev);
|
||||
info!(sl!(), "{}: found {:?} in uevent map", logprefix, &uev);
|
||||
return Ok(uev.clone());
|
||||
}
|
||||
}
|
||||
@ -129,7 +132,8 @@ pub async fn wait_for_uevent(
|
||||
sb.uevent_watchers.push(Some((Box::new(matcher), tx)));
|
||||
drop(sb); // unlock
|
||||
|
||||
info!(sl!(), "Waiting on channel for uevent notification\n");
|
||||
info!(sl!(), "{}: waiting on channel", logprefix);
|
||||
|
||||
let hotplug_timeout = AGENT_CONFIG.read().await.hotplug_timeout;
|
||||
|
||||
let uev = match tokio::time::timeout(hotplug_timeout, rx).await {
|
||||
@ -146,6 +150,7 @@ pub async fn wait_for_uevent(
|
||||
}
|
||||
};
|
||||
|
||||
info!(sl!(), "{}: found {:?} on channel", logprefix, &uev);
|
||||
Ok(uev)
|
||||
}
|
||||
|
||||
|
@ -503,7 +503,7 @@ func (clh *cloudHypervisor) hotplugAddBlockDevice(drive *config.BlockDrive) erro
|
||||
return err
|
||||
}
|
||||
|
||||
func (clh *cloudHypervisor) hotPlugVFIODevice(device config.VFIODev) error {
|
||||
func (clh *cloudHypervisor) hotPlugVFIODevice(device *config.VFIODev) error {
|
||||
cl := clh.client()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), clhHotPlugAPITimeout*time.Second)
|
||||
defer cancel()
|
||||
@ -512,10 +512,28 @@ func (clh *cloudHypervisor) hotPlugVFIODevice(device config.VFIODev) error {
|
||||
clhDevice := *chclient.NewVmAddDevice()
|
||||
clhDevice.Path = &device.SysfsDev
|
||||
clhDevice.Id = &device.ID
|
||||
_, _, err := cl.VmAddDevicePut(ctx, clhDevice)
|
||||
pciInfo, _, err := cl.VmAddDevicePut(ctx, clhDevice)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("Failed to hotplug device %+v %s", device, openAPIClientError(err))
|
||||
return fmt.Errorf("Failed to hotplug device %+v %s", device, openAPIClientError(err))
|
||||
}
|
||||
|
||||
// clh doesn't use bridges, so the PCI path is simply the slot
|
||||
// number of the device. This will break if clh starts using
|
||||
// bridges (including PCI-E root ports), but so will the clh
|
||||
// API, since there's no way it can reliably predict a guest
|
||||
// Bdf when bridges are present.
|
||||
tokens := strings.Split(pciInfo.Bdf, ":")
|
||||
if len(tokens) != 3 || tokens[0] != "0000" || tokens[1] != "00" {
|
||||
return fmt.Errorf("Unexpected PCI address %q from clh hotplug", pciInfo.Bdf)
|
||||
}
|
||||
|
||||
tokens = strings.Split(tokens[2], ".")
|
||||
if len(tokens) != 2 || tokens[1] != "0" || len(tokens[0]) != 2 {
|
||||
return fmt.Errorf("Unexpected PCI address %q from clh hotplug", pciInfo.Bdf)
|
||||
}
|
||||
|
||||
device.GuestPciPath, err = vcTypes.PciPathFromString(tokens[0])
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
@ -529,7 +547,7 @@ func (clh *cloudHypervisor) hotplugAddDevice(ctx context.Context, devInfo interf
|
||||
return nil, clh.hotplugAddBlockDevice(drive)
|
||||
case vfioDev:
|
||||
device := devInfo.(*config.VFIODev)
|
||||
return nil, clh.hotPlugVFIODevice(*device)
|
||||
return nil, clh.hotPlugVFIODevice(device)
|
||||
default:
|
||||
return nil, fmt.Errorf("cannot hotplug device: unsupported device type '%v'", devType)
|
||||
}
|
||||
|
@ -224,6 +224,9 @@ type VFIODev struct {
|
||||
// Bus of VFIO PCIe device
|
||||
Bus string
|
||||
|
||||
// Guest PCI path of device
|
||||
GuestPciPath vcTypes.PciPath
|
||||
|
||||
// Type of VFIO device
|
||||
Type VFIODeviceType
|
||||
|
||||
|
@ -92,6 +92,7 @@ var (
|
||||
kataNvdimmDevType = "nvdimm"
|
||||
kataVirtioFSDevType = "virtio-fs"
|
||||
kataWatchableBindDevType = "watchable-bind"
|
||||
kataVfioGuestKernelDevType = "vfio-gk" // VFIO device for consumption by the guest kernel
|
||||
sharedDir9pOptions = []string{"trans=virtio,version=9p2000.L,cache=mmap", "nodev"}
|
||||
sharedDirVirtioFSOptions = []string{}
|
||||
sharedDirVirtioFSDaxOptions = "dax"
|
||||
@ -1144,9 +1145,7 @@ func (k *kataAgent) handleShm(mounts []specs.Mount, sandbox *Sandbox) {
|
||||
}
|
||||
}
|
||||
|
||||
func (k *kataAgent) appendBlockDevice(dev ContainerDevice, c *Container) *grpc.Device {
|
||||
device := c.sandbox.devManager.GetDeviceByID(dev.ID)
|
||||
|
||||
func (k *kataAgent) appendBlockDevice(dev ContainerDevice, device api.Device, c *Container) *grpc.Device {
|
||||
d, ok := device.GetDeviceInfo().(*config.BlockDrive)
|
||||
if !ok || d == nil {
|
||||
k.Logger().WithField("device", device).Error("malformed block drive")
|
||||
@ -1187,9 +1186,7 @@ func (k *kataAgent) appendBlockDevice(dev ContainerDevice, c *Container) *grpc.D
|
||||
return kataDevice
|
||||
}
|
||||
|
||||
func (k *kataAgent) appendVhostUserBlkDevice(dev ContainerDevice, c *Container) *grpc.Device {
|
||||
device := c.sandbox.devManager.GetDeviceByID(dev.ID)
|
||||
|
||||
func (k *kataAgent) appendVhostUserBlkDevice(dev ContainerDevice, device api.Device, c *Container) *grpc.Device {
|
||||
d, ok := device.GetDeviceInfo().(*config.VhostUserDeviceAttrs)
|
||||
if !ok || d == nil {
|
||||
k.Logger().WithField("device", device).Error("malformed vhost-user-blk drive")
|
||||
@ -1205,6 +1202,38 @@ func (k *kataAgent) appendVhostUserBlkDevice(dev ContainerDevice, c *Container)
|
||||
return kataDevice
|
||||
}
|
||||
|
||||
func (k *kataAgent) appendVfioDevice(dev ContainerDevice, device api.Device, c *Container) *grpc.Device {
|
||||
devList, ok := device.GetDeviceInfo().([]*config.VFIODev)
|
||||
if !ok || devList == nil {
|
||||
k.Logger().WithField("device", device).Error("malformed vfio device")
|
||||
return nil
|
||||
}
|
||||
|
||||
groupNum := filepath.Base(dev.ContainerPath)
|
||||
|
||||
// Each /dev/vfio/NN device represents a VFIO group, which
|
||||
// could include several PCI devices. So we give group
|
||||
// information in the main structure, then list each
|
||||
// individual PCI device in the Options array.
|
||||
//
|
||||
// Each option is formatted as "DDDD:BB:DD.F=<pcipath>"
|
||||
// DDDD:BB:DD.F is the device's PCI address on the
|
||||
// *host*. <pcipath> is the device's PCI path in the guest
|
||||
// (see qomGetPciPath() for details).
|
||||
kataDevice := &grpc.Device{
|
||||
ContainerPath: dev.ContainerPath,
|
||||
Type: kataVfioGuestKernelDevType,
|
||||
Id: groupNum,
|
||||
Options: make([]string, len(devList)),
|
||||
}
|
||||
|
||||
for i, pciDev := range devList {
|
||||
kataDevice.Options[i] = fmt.Sprintf("0000:%s=%s", pciDev.BDF, pciDev.GuestPciPath)
|
||||
}
|
||||
|
||||
return kataDevice
|
||||
}
|
||||
|
||||
func (k *kataAgent) appendDevices(deviceList []*grpc.Device, c *Container) []*grpc.Device {
|
||||
var kataDevice *grpc.Device
|
||||
|
||||
@ -1217,9 +1246,11 @@ func (k *kataAgent) appendDevices(deviceList []*grpc.Device, c *Container) []*gr
|
||||
|
||||
switch device.DeviceType() {
|
||||
case config.DeviceBlock:
|
||||
kataDevice = k.appendBlockDevice(dev, c)
|
||||
kataDevice = k.appendBlockDevice(dev, device, c)
|
||||
case config.VhostUserBlk:
|
||||
kataDevice = k.appendVhostUserBlkDevice(dev, c)
|
||||
kataDevice = k.appendVhostUserBlkDevice(dev, device, c)
|
||||
case config.DeviceVFIO:
|
||||
kataDevice = k.appendVfioDevice(dev, device, c)
|
||||
}
|
||||
|
||||
if kataDevice == nil {
|
||||
|
@ -1440,6 +1440,69 @@ func (q *qemu) hotplugVhostUserDevice(ctx context.Context, vAttr *config.VhostUs
|
||||
}
|
||||
}
|
||||
|
||||
// Query QMP to find the PCI slot of a device, given its QOM path or ID
|
||||
func (q *qemu) qomGetSlot(qomPath string) (vcTypes.PciSlot, error) {
|
||||
addr, err := q.qmpMonitorCh.qmp.ExecQomGet(q.qmpMonitorCh.ctx, qomPath, "addr")
|
||||
if err != nil {
|
||||
return vcTypes.PciSlot{}, err
|
||||
}
|
||||
addrf, ok := addr.(float64)
|
||||
// XXX going via float makes no real sense, but that's how
|
||||
// JSON works, and we'll get away with it for the small values
|
||||
// we have here
|
||||
if !ok {
|
||||
return vcTypes.PciSlot{}, fmt.Errorf("addr QOM property of %q is %T not a number", qomPath, addr)
|
||||
}
|
||||
addri := int(addrf)
|
||||
|
||||
slotNum, funcNum := addri>>3, addri&0x7
|
||||
if funcNum != 0 {
|
||||
return vcTypes.PciSlot{}, fmt.Errorf("Unexpected non-zero PCI function (%02x.%1x) on %q",
|
||||
slotNum, funcNum, qomPath)
|
||||
}
|
||||
|
||||
return vcTypes.PciSlotFromInt(slotNum)
|
||||
}
|
||||
|
||||
// Query QMP to find a device's PCI path given its QOM path or ID
|
||||
func (q *qemu) qomGetPciPath(qemuID string) (vcTypes.PciPath, error) {
|
||||
// XXX: For now we assume there's exactly one bridge, since
|
||||
// that's always how we configure qemu from Kata for now. It
|
||||
// would be good to generalize this to different PCI
|
||||
// topologies
|
||||
devSlot, err := q.qomGetSlot(qemuID)
|
||||
if err != nil {
|
||||
return vcTypes.PciPath{}, err
|
||||
}
|
||||
|
||||
busq, err := q.qmpMonitorCh.qmp.ExecQomGet(q.qmpMonitorCh.ctx, qemuID, "parent_bus")
|
||||
if err != nil {
|
||||
return vcTypes.PciPath{}, err
|
||||
}
|
||||
|
||||
bus, ok := busq.(string)
|
||||
if !ok {
|
||||
return vcTypes.PciPath{}, fmt.Errorf("parent_bus QOM property of %s is %t not a string", qemuID, busq)
|
||||
}
|
||||
|
||||
// `bus` is the QOM path of the QOM bus object, but we need
|
||||
// the PCI bridge which manages that bus. There doesn't seem
|
||||
// to be a way to get that other than to simply drop the last
|
||||
// path component.
|
||||
idx := strings.LastIndex(bus, "/")
|
||||
if idx == -1 {
|
||||
return vcTypes.PciPath{}, fmt.Errorf("Bus has unexpected QOM path %s", bus)
|
||||
}
|
||||
bridge := bus[:idx]
|
||||
|
||||
bridgeSlot, err := q.qomGetSlot(bridge)
|
||||
if err != nil {
|
||||
return vcTypes.PciPath{}, err
|
||||
}
|
||||
|
||||
return vcTypes.PciPathFromSlots(bridgeSlot, devSlot)
|
||||
}
|
||||
|
||||
func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op operation) (err error) {
|
||||
if err = q.qmpSetup(); err != nil {
|
||||
return err
|
||||
@ -1476,17 +1539,17 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op
|
||||
|
||||
switch device.Type {
|
||||
case config.VFIODeviceNormalType:
|
||||
return q.qmpMonitorCh.qmp.ExecuteVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, device.BDF, device.Bus, romFile)
|
||||
err = q.qmpMonitorCh.qmp.ExecuteVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, device.BDF, device.Bus, romFile)
|
||||
case config.VFIODeviceMediatedType:
|
||||
if utils.IsAPVFIOMediatedDevice(device.SysfsDev) {
|
||||
return q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev)
|
||||
err = q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev)
|
||||
} else {
|
||||
err = q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, device.SysfsDev, "", device.Bus, romFile)
|
||||
}
|
||||
return q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, device.SysfsDev, "", device.Bus, romFile)
|
||||
default:
|
||||
return fmt.Errorf("Incorrect VFIO device type found")
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
addr, bridge, err := q.arch.addDeviceToBridge(ctx, devID, types.PCI)
|
||||
if err != nil {
|
||||
return err
|
||||
@ -1500,15 +1563,28 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op
|
||||
|
||||
switch device.Type {
|
||||
case config.VFIODeviceNormalType:
|
||||
return q.qmpMonitorCh.qmp.ExecutePCIVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, device.BDF, addr, bridge.ID, romFile)
|
||||
err = q.qmpMonitorCh.qmp.ExecutePCIVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, device.BDF, addr, bridge.ID, romFile)
|
||||
case config.VFIODeviceMediatedType:
|
||||
if utils.IsAPVFIOMediatedDevice(device.SysfsDev) {
|
||||
return q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev)
|
||||
err = q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev)
|
||||
} else {
|
||||
err = q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, device.SysfsDev, addr, bridge.ID, romFile)
|
||||
}
|
||||
return q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, device.SysfsDev, addr, bridge.ID, romFile)
|
||||
default:
|
||||
return fmt.Errorf("Incorrect VFIO device type found")
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// XXX: Depending on whether we're doing root port or
|
||||
// bridge hotplug, and how the bridge is set up in
|
||||
// other parts of the code, we may or may not already
|
||||
// have information about the slot number of the
|
||||
// bridge and or the device. For simplicity, just
|
||||
// query both of them back from qemu
|
||||
device.GuestPciPath, err = q.qomGetPciPath(devID)
|
||||
return err
|
||||
} else {
|
||||
q.Logger().WithField("dev-id", devID).Info("Start hot-unplug VFIO device")
|
||||
|
||||
|
@ -8,8 +8,8 @@ package virtcontainers
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
govmmQemu "github.com/kata-containers/govmm/qemu"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types"
|
||||
|
Loading…
Reference in New Issue
Block a user