mirror of
https://github.com/kata-containers/kata-containers.git
synced 2026-07-02 07:02:16 +00:00
For QEMU cold-plug + guest-kernel mode the guest BDF of a cold-plugged VFIO device is auto-allocated at boot (each pcie-root-port is added with chassis=N,slot=N but no pinned addr=, so QEMU picks the next free slot on pcie.0). The hot-plug path already queries QMP via qomGetPciPath; reuse that same mechanism for cold-plugged devices. Add ResolveColdPlugVFIOGuestPciPaths to the Hypervisor interface. Implement it in qemu.go using qomGetPciPath. Add no-op stubs for all other hypervisors. Call it at the start of setupNetworks so that the PCI paths are resolved before generateVCNetworkStructures emits the agent Interface proto. Also stamp the resolved path onto PhysicalEndpoints (used by SR-IOV VFs exposed as physical network devices) so that update_interface carries a non-empty devicePath. Without devicePath the agent falls back to a by-MAC link lookup which fails when the VF firmware MAC differs from the CNI-assigned MAC after the vfio-pci unbind/rebind cycle. Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com> Assisted-by: Cursor <cursoragent@cursor.com>
304 lines
8.1 KiB
Go
304 lines
8.1 KiB
Go
// Copyright (c) 2022 IBM Corporation
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
|
|
package virtcontainers
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net"
|
|
"os"
|
|
"strconv"
|
|
"time"
|
|
|
|
cri "github.com/containerd/containerd/pkg/cri/annotations"
|
|
"github.com/containerd/ttrpc"
|
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
|
|
persistapi "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
|
|
pb "github.com/kata-containers/kata-containers/src/runtime/protocols/hypervisor"
|
|
hypannotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations"
|
|
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types"
|
|
"github.com/pkg/errors"
|
|
)
|
|
|
|
const defaultMinTimeout = 60
|
|
|
|
type remoteHypervisor struct {
|
|
sandboxID remoteHypervisorSandboxID
|
|
agentSocketPath string
|
|
config HypervisorConfig
|
|
}
|
|
|
|
type remoteHypervisorSandboxID string
|
|
|
|
type remoteService struct {
|
|
conn net.Conn
|
|
client pb.HypervisorService
|
|
}
|
|
|
|
func openRemoteService(socketPath string) (*remoteService, error) {
|
|
|
|
conn, err := net.Dial("unix", socketPath)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to connect to remote hypervisor socket: %w", err)
|
|
}
|
|
|
|
ttrpcClient := ttrpc.NewClient(conn)
|
|
|
|
client := pb.NewHypervisorClient(ttrpcClient)
|
|
|
|
s := &remoteService{
|
|
conn: conn,
|
|
client: client,
|
|
}
|
|
|
|
return s, nil
|
|
}
|
|
|
|
func (s *remoteService) Close() error {
|
|
return s.conn.Close()
|
|
}
|
|
|
|
func (rh *remoteHypervisor) CreateVM(ctx context.Context, id string, network Network, hypervisorConfig *HypervisorConfig) error {
|
|
|
|
rh.sandboxID = remoteHypervisorSandboxID(id)
|
|
|
|
if err := rh.setConfig(hypervisorConfig); err != nil {
|
|
return err
|
|
}
|
|
|
|
s, err := openRemoteService(hypervisorConfig.RemoteHypervisorSocket)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer s.Close()
|
|
|
|
annotations := map[string]string{}
|
|
annotations[cri.SandboxName] = hypervisorConfig.SandboxName
|
|
annotations[cri.SandboxNamespace] = hypervisorConfig.SandboxNamespace
|
|
annotations[hypannotations.MachineType] = hypervisorConfig.HypervisorMachineType
|
|
annotations[hypannotations.ImagePath] = hypervisorConfig.ImagePath
|
|
annotations[hypannotations.DefaultVCPUs] = strconv.FormatUint(uint64(hypervisorConfig.NumVCPUs()), 10)
|
|
annotations[hypannotations.DefaultMemory] = strconv.FormatUint(uint64(hypervisorConfig.MemorySize), 10)
|
|
annotations[hypannotations.Initdata] = hypervisorConfig.Initdata
|
|
annotations[hypannotations.DefaultGPUs] = strconv.FormatUint(uint64(hypervisorConfig.DefaultGPUs), 10)
|
|
annotations[hypannotations.DefaultGPUModel] = hypervisorConfig.DefaultGPUModel
|
|
|
|
req := &pb.CreateVMRequest{
|
|
Id: id,
|
|
Annotations: annotations,
|
|
NetworkNamespacePath: network.NetworkID(),
|
|
}
|
|
|
|
res, err := s.client.CreateVM(ctx, req)
|
|
if err != nil {
|
|
return fmt.Errorf("remote hypervisor call failed: %w", err)
|
|
}
|
|
|
|
if res.AgentSocketPath == "" {
|
|
return errors.New("remote hypervisor does not return tunnel socket path")
|
|
}
|
|
|
|
rh.agentSocketPath = res.AgentSocketPath
|
|
|
|
return nil
|
|
}
|
|
|
|
func (rh *remoteHypervisor) StartVM(ctx context.Context, timeout int) error {
|
|
|
|
minTimeout := defaultMinTimeout
|
|
if rh.config.RemoteHypervisorTimeout > 0 {
|
|
minTimeout = int(rh.config.RemoteHypervisorTimeout)
|
|
}
|
|
|
|
if timeout < minTimeout {
|
|
timeout = minTimeout
|
|
}
|
|
|
|
s, err := openRemoteService(rh.config.RemoteHypervisorSocket)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer s.Close()
|
|
|
|
req := &pb.StartVMRequest{
|
|
Id: string(rh.sandboxID),
|
|
}
|
|
|
|
ctx2, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Second)
|
|
defer cancel()
|
|
|
|
hvLogger.Infof("calling remote hypervisor StartVM (timeout: %d)", timeout)
|
|
|
|
if _, err := s.client.StartVM(ctx2, req); err != nil {
|
|
return fmt.Errorf("remote hypervisor call failed: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (rh *remoteHypervisor) AttestVM(ctx context.Context) error {
|
|
return nil
|
|
}
|
|
|
|
func (rh *remoteHypervisor) StopVM(ctx context.Context, waitOnly bool) error {
|
|
|
|
s, err := openRemoteService(rh.config.RemoteHypervisorSocket)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer s.Close()
|
|
|
|
req := &pb.StopVMRequest{
|
|
Id: string(rh.sandboxID),
|
|
}
|
|
|
|
if _, err := s.client.StopVM(ctx, req); err != nil {
|
|
return fmt.Errorf("remote hypervisor call failed: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (rh *remoteHypervisor) GenerateSocket(id string) (interface{}, error) {
|
|
|
|
socketPath := rh.agentSocketPath
|
|
if len(socketPath) == 0 {
|
|
return nil, errors.New("failed to generate remote sock: TunnelSocketPath is not set")
|
|
}
|
|
|
|
remoteSock := types.RemoteSock{
|
|
SandboxID: id,
|
|
TunnelSocketPath: socketPath,
|
|
}
|
|
|
|
return remoteSock, nil
|
|
}
|
|
|
|
func notImplemented(name string) error {
|
|
|
|
err := errors.Errorf("%s: not implemented", name)
|
|
|
|
hvLogger.Error(err.Error())
|
|
|
|
if tracer, ok := err.(interface{ StackTrace() errors.StackTrace }); ok {
|
|
for _, f := range tracer.StackTrace() {
|
|
hvLogger.Errorf("%+s:%d\n", f, f)
|
|
}
|
|
}
|
|
|
|
return err
|
|
}
|
|
|
|
func (rh *remoteHypervisor) PauseVM(ctx context.Context) error {
|
|
return notImplemented("PauseVM")
|
|
}
|
|
|
|
func (rh *remoteHypervisor) SaveVM() error {
|
|
return notImplemented("SaveVM")
|
|
}
|
|
|
|
func (rh *remoteHypervisor) ResumeVM(ctx context.Context) error {
|
|
return notImplemented("ResumeVM")
|
|
}
|
|
|
|
func (rh *remoteHypervisor) AddDevice(ctx context.Context, devInfo interface{}, devType DeviceType) error {
|
|
// TODO should we return notImplemented("AddDevice"), rather than nil and ignoring it?
|
|
hvLogger.Infof("addDevice: deviceType=%v devInfo=%#v", devType, devInfo)
|
|
return nil
|
|
}
|
|
|
|
func (rh *remoteHypervisor) HotplugAddDevice(ctx context.Context, devInfo interface{}, devType DeviceType) (interface{}, error) {
|
|
return nil, notImplemented("HotplugAddDevice")
|
|
}
|
|
|
|
func (rh *remoteHypervisor) HotplugRemoveDevice(ctx context.Context, devInfo interface{}, devType DeviceType) (interface{}, error) {
|
|
return nil, notImplemented("HotplugRemoveDevice")
|
|
}
|
|
|
|
func (rh *remoteHypervisor) ResizeMemory(ctx context.Context, memMB uint32, memoryBlockSizeMB uint32, probe bool) (uint32, MemoryDevice, error) {
|
|
return memMB, MemoryDevice{}, nil
|
|
}
|
|
|
|
func (rh *remoteHypervisor) GetTotalMemoryMB(ctx context.Context) uint32 {
|
|
//The remote hypervisor uses the peer pod config to determine the memory of the VM, so we need to use static resource management
|
|
hvLogger.Error("GetTotalMemoryMB - remote hypervisor cannot update resources")
|
|
return 0
|
|
}
|
|
|
|
func (rh *remoteHypervisor) ResizeVCPUs(ctx context.Context, vcpus uint32) (uint32, uint32, error) {
|
|
return vcpus, vcpus, nil
|
|
}
|
|
|
|
func (rh *remoteHypervisor) GetVMConsole(ctx context.Context, sandboxID string) (string, string, error) {
|
|
return "", "", notImplemented("GetVMConsole")
|
|
}
|
|
|
|
func (rh *remoteHypervisor) Disconnect(ctx context.Context) {
|
|
notImplemented("Disconnect")
|
|
}
|
|
|
|
func (rh *remoteHypervisor) Capabilities(ctx context.Context) types.Capabilities {
|
|
var caps types.Capabilities
|
|
caps.SetBlockDeviceHotplugSupport()
|
|
return caps
|
|
}
|
|
|
|
func (rh *remoteHypervisor) HypervisorConfig() HypervisorConfig {
|
|
return rh.config
|
|
}
|
|
|
|
func (rh *remoteHypervisor) GetThreadIDs(ctx context.Context) (VcpuThreadIDs, error) {
|
|
// Not supported. return success
|
|
// Just allocating an empty map
|
|
return VcpuThreadIDs{}, nil
|
|
}
|
|
|
|
func (rh *remoteHypervisor) Cleanup(ctx context.Context) error {
|
|
return nil
|
|
}
|
|
|
|
func (rh *remoteHypervisor) setConfig(config *HypervisorConfig) error {
|
|
// Create a Validator specific for remote hypervisor
|
|
rh.config = *config
|
|
|
|
return nil
|
|
}
|
|
|
|
func (rh *remoteHypervisor) GetPids() []int {
|
|
// let's use shim pid as it used by crio to fetch start time
|
|
return []int{os.Getpid()}
|
|
}
|
|
|
|
func (rh *remoteHypervisor) GetVirtioFsPid() *int {
|
|
return nil
|
|
}
|
|
|
|
func (rh *remoteHypervisor) fromGrpc(ctx context.Context, hypervisorConfig *HypervisorConfig, j []byte) error {
|
|
panic(notImplemented("fromGrpc"))
|
|
}
|
|
|
|
func (rh *remoteHypervisor) toGrpc(ctx context.Context) ([]byte, error) {
|
|
panic(notImplemented("toGrpc"))
|
|
}
|
|
|
|
func (rh *remoteHypervisor) Check() error {
|
|
return nil
|
|
}
|
|
|
|
func (rh *remoteHypervisor) Save() persistapi.HypervisorState {
|
|
return persistapi.HypervisorState{}
|
|
}
|
|
|
|
func (rh *remoteHypervisor) Load(persistapi.HypervisorState) {
|
|
notImplemented("Load")
|
|
}
|
|
|
|
func (rh *remoteHypervisor) IsRateLimiterBuiltin() bool {
|
|
return false
|
|
}
|
|
|
|
func (rh *remoteHypervisor) ResolveColdPlugVFIOGuestPciPaths(_ context.Context, _ []*config.VFIODev) error {
|
|
return nil
|
|
}
|