dragonball: Add patches for 6.1.x

Ported the 5.10 patchs to 6.1.x

Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
This commit is contained in:
Zvonko Kaiser 2024-07-04 17:06:39 +00:00
parent ddb8a94677
commit ad574b7e10
8 changed files with 2111 additions and 0 deletions

View File

@ -0,0 +1,522 @@
From 691186a091ecfc1777531a61594b88394d384cff Mon Sep 17 00:00:00 2001
From: Chao Wu <chaowu@linux.alibaba.com>
Date: Wed, 9 Nov 2022 11:38:36 +0800
Subject: [PATCH 1/4] upcall: establish upcall server
Upcall is a direct communication tool between hypervisor and guest. This
patch introduces the server side in the upcall system.
At the start of the upcall server, A kthread `db-vsock-srv` will be
created. In this kthread, a vsock listener is established upon specific
port(currently that port is 0xDB, DB refers to Dragonball). After socket
is created, it will start accepting the connection from the client side.
If the connection is established, upcall server will try to get cmd from
the client and that cmd could determine which upcall service will handle
the request from the client.
Besides, different service needs to be registered into upcall server so
that it could handle the request from the client. There is a
`register_db_vsock_service` in this commit provided for every service to
register service into service_entry list during initialization and we will
introduce device manager service in the following commits.
Signed-off-by: Liu Jiang <gerry@linux.alibaba.com>
Signed-off-by: Zizheng Bian <zizheng.bian@linux.alibaba.com>
Signed-off-by: Chao Wu <chaowu@linux.alibaba.com>
Signed-off-by: WangYu <WangYu@linux.alibaba.com>
Signed-off-by: Xingjun Liu <xingjun.liu@linux.alibaba.com>
---
drivers/misc/Kconfig | 1 +
drivers/misc/Makefile | 1 +
drivers/misc/dragonball/Kconfig | 21 ++
drivers/misc/dragonball/Makefile | 6 +
drivers/misc/dragonball/upcall_srv/Kconfig | 14 +
drivers/misc/dragonball/upcall_srv/Makefile | 13 +
.../upcall_srv/dragonball_upcall_srv.c | 323 ++++++++++++++++++
include/dragonball/upcall_srv.h | 42 +++
8 files changed, 421 insertions(+)
create mode 100644 drivers/misc/dragonball/Kconfig
create mode 100644 drivers/misc/dragonball/Makefile
create mode 100644 drivers/misc/dragonball/upcall_srv/Kconfig
create mode 100644 drivers/misc/dragonball/upcall_srv/Makefile
create mode 100644 drivers/misc/dragonball/upcall_srv/dragonball_upcall_srv.c
create mode 100644 include/dragonball/upcall_srv.h
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 0cef98319..297d896f6 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -515,4 +515,5 @@ source "drivers/misc/habanalabs/Kconfig"
source "drivers/misc/uacce/Kconfig"
source "drivers/misc/pvpanic/Kconfig"
source "drivers/misc/mchp_pci1xxxx/Kconfig"
+source "drivers/misc/dragonball/Kconfig"
endmenu
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index ac9b3e757..6ef1ddc50 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -62,3 +62,4 @@ obj-$(CONFIG_HI6421V600_IRQ) += hi6421v600-irq.o
obj-$(CONFIG_OPEN_DICE) += open-dice.o
obj-$(CONFIG_GP_PCI1XXXX) += mchp_pci1xxxx/
obj-$(CONFIG_VCPU_STALL_DETECTOR) += vcpu_stall_detector.o
+obj-$(CONFIG_DRAGONBALL_DRIVERS) += dragonball/
diff --git a/drivers/misc/dragonball/Kconfig b/drivers/misc/dragonball/Kconfig
new file mode 100644
index 000000000000..f81be3721908
--- /dev/null
+++ b/drivers/misc/dragonball/Kconfig
@@ -0,0 +1,21 @@
+#
+# Alibaba Dragonball Secure Container Runtime Drivers
+#
+
+menuconfig DRAGONBALL_DRIVERS
+ bool "Alibaba Dragonball Secure Container Runtime Drivers"
+ depends on X86_64 || ARM64
+ default n
+ help
+ Alibaba Dragonball is a secure container runtime with an embedded micro-vmm
+ to securely isolate container workloads.
+
+ Say Y here to get to see options for various misc drivers to support the
+ Alibaba Dragonball secure container runtime. This option alone does not
+ add any kernel code.
+
+ If unsure, say N.
+
+if DRAGONBALL_DRIVERS
+source "drivers/misc/dragonball/upcall_srv/Kconfig"
+endif # DRAGONBALL_DRIVERS
diff --git a/drivers/misc/dragonball/Makefile b/drivers/misc/dragonball/Makefile
new file mode 100644
index 000000000000..b7bd86d73ade
--- /dev/null
+++ b/drivers/misc/dragonball/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Dragonball misc drivers
+#
+
+obj-$(CONFIG_DRAGONBALL_UPCALL_SRV) += upcall_srv/
diff --git a/drivers/misc/dragonball/upcall_srv/Kconfig b/drivers/misc/dragonball/upcall_srv/Kconfig
new file mode 100644
index 000000000000..b00bf1f8637d
--- /dev/null
+++ b/drivers/misc/dragonball/upcall_srv/Kconfig
@@ -0,0 +1,14 @@
+#
+# Alibaba Dragonball Secure Container Runtime Drivers for vsock
+#
+
+config DRAGONBALL_UPCALL_SRV
+ bool "Dragonball in-kernel Virtual Sockets Server"
+ depends on VIRTIO_VSOCKETS
+ default y
+ help
+ This configure implements an in-kernel vsock server to dispatch Dragonball
+ requests to registered service handlers, based on the reliable Virtual
+ Sockets communication channels between guest and host/vmm.
+
+ If unsure, say N.
diff --git a/drivers/misc/dragonball/upcall_srv/Makefile b/drivers/misc/dragonball/upcall_srv/Makefile
new file mode 100644
index 000000000000..4102e6c7edef
--- /dev/null
+++ b/drivers/misc/dragonball/upcall_srv/Makefile
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the in-kernel vsock server.
+#
+# Copyright (C) 2022 Alibaba Cloud, Inc
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version
+# 2 of the License, or (at your option) any later version.
+#
+
+obj-$(CONFIG_DRAGONBALL_UPCALL_SRV) += dragonball_upcall_srv.o
diff --git a/drivers/misc/dragonball/upcall_srv/dragonball_upcall_srv.c b/drivers/misc/dragonball/upcall_srv/dragonball_upcall_srv.c
new file mode 100644
index 000000000000..1670bd8597f0
--- /dev/null
+++ b/drivers/misc/dragonball/upcall_srv/dragonball_upcall_srv.c
@@ -0,0 +1,323 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * drivers/misc/dragonball/upcall_srv/dragonball_upcall_srv.c
+ * Dragonball upcall server
+ *
+ * Copyright (C) 2022 Alibaba Cloud, Inc
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#define pr_fmt(fmt) "db-upcall-srv: " fmt
+
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/vm_sockets.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <dragonball/upcall_srv.h>
+
+struct db_conn_info {
+ struct work_struct work;
+ struct socket *sock;
+};
+
+struct db_service_entry {
+ char cmd;
+ db_vsock_svc_handler_t handler;
+ struct list_head list;
+};
+
+/* Protects registered command. */
+static DEFINE_RWLOCK(db_service_lock);
+static LIST_HEAD(db_service_list);
+
+static struct task_struct *db_service_task;
+static unsigned int db_server_port = DB_SERVER_PORT;
+
+struct socket *db_create_vsock_listener(unsigned int port)
+{
+ struct socket *sock;
+ int ret = 0;
+
+ union {
+ struct sockaddr sa;
+ struct sockaddr_vm svm;
+ } addr = {
+ .svm = {
+ .svm_family = AF_VSOCK,
+ .svm_port = port,
+ .svm_cid = VMADDR_CID_ANY,
+ }
+ };
+
+ ret = sock_create_kern(&init_net, AF_VSOCK, SOCK_STREAM, 0, &sock);
+ if (ret) {
+ pr_err("Server vsock create failed, err: %d\n", ret);
+ return ERR_PTR(ret);
+ }
+
+ ret = sock->ops->bind(sock, &addr.sa, sizeof(addr.svm));
+ if (ret) {
+ pr_err("Server vsock bind failed, err: %d\n", ret);
+ goto err;
+ }
+ ret = sock->ops->listen(sock, 10);
+ if (ret < 0) {
+ pr_err("Server vsock listen error: %d\n", ret);
+ goto err;
+ }
+
+ return sock;
+err:
+ sock_release(sock);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(db_create_vsock_listener);
+
+int db_vsock_sendmsg(struct socket *sock, char *buf, size_t len)
+{
+ struct kvec vec;
+ struct msghdr msgh;
+
+ vec.iov_base = buf;
+ vec.iov_len = len;
+ memset(&msgh, 0, sizeof(msgh));
+
+ return kernel_sendmsg(sock, &msgh, &vec, 1, len);
+}
+EXPORT_SYMBOL_GPL(db_vsock_sendmsg);
+
+int db_vsock_recvmsg(struct socket *sock, char *buf, size_t len, int flags)
+{
+ struct kvec vec;
+ struct msghdr msgh;
+
+ memset(&vec, 0, sizeof(vec));
+ memset(&msgh, 0, sizeof(msgh));
+ vec.iov_base = buf;
+ vec.iov_len = len;
+
+ return kernel_recvmsg(sock, &msgh, &vec, 1, len, flags);
+}
+EXPORT_SYMBOL_GPL(db_vsock_recvmsg);
+
+static int db_vsock_recvcmd(struct socket *cli_socket, char *cmd)
+{
+ int ret;
+ char rcv;
+ long timeout;
+ struct kvec vec;
+ struct msghdr msg;
+
+ memset(&vec, 0, sizeof(vec));
+ memset(&msg, 0, sizeof(msg));
+ vec.iov_base = &rcv;
+ vec.iov_len = 1;
+
+ timeout = cli_socket->sk->sk_rcvtimeo;
+ cli_socket->sk->sk_rcvtimeo = DB_INIT_TIMEOUT * HZ;
+ ret = kernel_recvmsg(cli_socket, &msg, &vec, 1, 1, 0);
+ cli_socket->sk->sk_rcvtimeo = timeout;
+ *cmd = rcv;
+
+ return ret;
+}
+
+/*
+ * The workqueue handler for vsock work_struct.
+ *
+ * Each worker-pool bound to an actual CPU implements concurrency management
+ * by hooking into the scheduler. The worker-pool is notified whenever an
+ * active worker wakes up or sleeps and keeps track of the number of the
+ * currently runnable workers. Generally, work items are not expected to hog
+ * a CPU and consume many cycles. That means maintaining just enough concurrency
+ * to prevent work processing from stalling should be optimal.
+ *
+ * So it's OK to sleep in a workqueue handler, it won't cause too many worker
+ * threads.
+ */
+static void db_conn_service(struct work_struct *work)
+{
+ struct db_conn_info *conn_info =
+ container_of(work, struct db_conn_info, work);
+ struct db_service_entry *service_entry;
+ int len, ret = -1;
+ char cmd;
+
+ len = db_vsock_recvcmd(conn_info->sock, &cmd);
+ if (len <= 0)
+ goto recv_failed;
+
+ read_lock(&db_service_lock);
+ list_for_each_entry(service_entry, &db_service_list, list) {
+ if (cmd == service_entry->cmd) {
+ ret = service_entry->handler(conn_info->sock);
+ break;
+ }
+ }
+ read_unlock(&db_service_lock);
+
+recv_failed:
+ if (ret) {
+ sock_release(conn_info->sock);
+ pr_info("Client connection closed, error code: %d\n", ret);
+ }
+ kfree(conn_info);
+}
+
+static int db_create_cli_conn(struct socket *sock)
+{
+ struct db_conn_info *conn;
+
+ conn = kmalloc(sizeof(*conn), GFP_KERNEL);
+ if (!conn)
+ return -ENOMEM;
+
+ conn->sock = sock;
+ INIT_WORK(&conn->work, db_conn_service);
+ schedule_work(&conn->work);
+
+ return 0;
+}
+
+static int db_vsock_server(void *data)
+{
+ struct socket *sock;
+ int err;
+
+ sock = db_create_vsock_listener(db_server_port);
+ if (IS_ERR(sock)) {
+ err = PTR_ERR(sock);
+ pr_err("Init server err: %d\n", err);
+ return err;
+ }
+
+ while (!kthread_should_stop()) {
+ struct socket *conn;
+
+ conn = sock_alloc();
+ if (!conn)
+ return -ENOMEM;
+
+ conn->type = sock->type;
+ conn->ops = sock->ops;
+
+ /* 0:propotal 1:kernel */
+ err = sock->ops->accept(sock, conn, 0, 1);
+ if (err < 0) {
+ pr_err("Server accept err: %d\n", err);
+ sock_release(conn);
+ continue;
+ }
+
+ err = db_create_cli_conn(conn);
+ if (err)
+ pr_err("Create client connetion err: %d\n", err);
+ }
+
+ return 0;
+}
+
+static int db_create_service(void)
+{
+ struct task_struct *service;
+ int rc = 0;
+
+ service = kthread_create(db_vsock_server, NULL, "db-vsock-srv");
+ if (IS_ERR(service)) {
+ rc = PTR_ERR(service);
+ pr_err("Server task create failed, err: %d\n", rc);
+ } else {
+ db_service_task = service;
+ wake_up_process(service);
+ }
+ return rc;
+}
+
+static int db_vsock_srv_cmdline_set(const char *device,
+ const struct kernel_param *kp)
+{
+ unsigned int port = 0;
+ int processed, consumed = 0;
+
+ /* Get "@<port>" */
+ processed = sscanf(device, "@%u%n", &port, &consumed);
+ if (processed < 1 || device[consumed] || port == 0 || port > 1024) {
+ pr_err("Using @<port> format and port range (0, 1024].\n");
+ return -EINVAL;
+ }
+
+ db_server_port = port;
+ return 0;
+}
+
+static const struct kernel_param_ops db_vsock_srv_cmdline_param_ops = {
+ .set = db_vsock_srv_cmdline_set,
+};
+
+device_param_cb(port, &db_vsock_srv_cmdline_param_ops, NULL, 0400);
+
+int register_db_vsock_service(const char cmd, db_vsock_svc_handler_t handler)
+{
+ int rc = -EEXIST;
+ struct db_service_entry *service_entry;
+
+ write_lock(&db_service_lock);
+ list_for_each_entry(service_entry, &db_service_list, list) {
+ if (cmd == service_entry->cmd) {
+ rc = -EEXIST;
+ goto out;
+ }
+ }
+
+ service_entry = kzalloc(sizeof(*service_entry), GFP_KERNEL);
+ if (!service_entry) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ service_entry->cmd = cmd;
+ service_entry->handler = handler;
+ list_add_tail(&service_entry->list, &db_service_list);
+ rc = 0;
+out:
+ write_unlock(&db_service_lock);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(register_db_vsock_service);
+
+int unregister_db_vsock_service(const char cmd)
+{
+ int rc = -EEXIST;
+ struct db_service_entry *service_entry, *n;
+
+ write_lock(&db_service_lock);
+ list_for_each_entry_safe(service_entry, n, &db_service_list, list) {
+ if (cmd == service_entry->cmd) {
+ list_del(&service_entry->list);
+ rc = 0;
+ break;
+ }
+ }
+ write_unlock(&db_service_lock);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(unregister_db_vsock_service);
+
+static int __init db_vsock_srv_init(void)
+{
+ return db_create_service();
+}
+
+late_initcall(db_vsock_srv_init);
+
+MODULE_AUTHOR("Alibaba, Inc.");
+MODULE_DESCRIPTION("Dragonball vsock server");
+MODULE_LICENSE("GPL v2");
diff --git a/include/dragonball/upcall_srv.h b/include/dragonball/upcall_srv.h
new file mode 100644
index 000000000000..1c733982cc30
--- /dev/null
+++ b/include/dragonball/upcall_srv.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * db_upcall_srv.h Virtual Sockets Server for Dragonball
+ *
+ * Copyright (C) 2022 Alibaba Cloud, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#ifndef _DB_UPCALL_SRV_H
+#define _DB_UPCALL_SRV_H
+
+#include <linux/workqueue.h>
+#include <linux/net.h>
+
+/* Vsock port to listen for incoming connections. */
+#define DB_SERVER_PORT 0xDB
+#define DB_RECVBUF_SIZE 0x400
+#define DB_INIT_TIMEOUT 10
+
+/*
+ * Vsock service handler to handle new incoming connections.
+ *
+ * Return:
+ * 0: on success and the callback takes ownership of the sock.
+ * !0: on failure and the callback should keep the sock as is.
+ */
+typedef int (*db_vsock_svc_handler_t) (struct socket *sock);
+
+extern int register_db_vsock_service(const char cmd,
+ db_vsock_svc_handler_t handler);
+extern int unregister_db_vsock_service(const char cmd);
+
+extern struct socket *db_create_vsock_listener(unsigned int port);
+extern int db_vsock_sendmsg(struct socket *sock, char *buf, size_t len);
+extern int db_vsock_recvmsg(struct socket *sock, char *buf, size_t len,
+ int flags);
+
+#endif /* _DB_UPCALL_SRV_H */
--
2.19.1.6.gb485710b

View File

@ -0,0 +1,330 @@
From 66e67cc3ebbebc9d138fff084c487d6b9d21f60c Mon Sep 17 00:00:00 2001
From: Chao Wu <chaowu@linux.alibaba.com>
Date: Mon, 21 Nov 2022 19:19:26 +0800
Subject: [PATCH 2/4] upcall: introduce device manager upcall service
Different services are registered into upcall server to handle the
request from the client side. This commit introduces devic manager
upcall service and when new message gets into upcall server, cmd `d` is
used for identifying the device manager service.
After a request is sent to device manager service, db_devmgr_handler
will start handle the request. A kthread `db_devmgr_server` will be
created and it will send CONNECT message to the client side to notify
the client start sending message for device management operations.
`db_devmgr_process` will be used for determining which device operations
will be triggered through msg_type. `get_action` will find out the
action for dealing with the operation and `action` fn will execute the
actual device management operation in the device manager service.
Signed-off-by: Liu Jiang <gerry@linux.alibaba.com>
Signed-off-by: Zizheng Bian <zizheng.bian@linux.alibaba.com>
Signed-off-by: Chao Wu <chaowu@linux.alibaba.com>
Signed-off-by: WangYu <WangYu@linux.alibaba.com>
Signed-off-by: Xingjun Liu <xingjun.liu@linux.alibaba.com>
---
drivers/misc/dragonball/upcall_srv/Kconfig | 12 +
drivers/misc/dragonball/upcall_srv/Makefile | 1 +
.../upcall_srv/dragonball_device_manager.c | 235 ++++++++++++++++++
include/dragonball/device_manager.h | 18 ++
4 files changed, 266 insertions(+)
create mode 100644 drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
create mode 100644 include/dragonball/device_manager.h
diff --git a/drivers/misc/dragonball/upcall_srv/Kconfig b/drivers/misc/dragonball/upcall_srv/Kconfig
index b00bf1f8637d..6554a9741c00 100644
--- a/drivers/misc/dragonball/upcall_srv/Kconfig
+++ b/drivers/misc/dragonball/upcall_srv/Kconfig
@@ -12,3 +12,15 @@ config DRAGONBALL_UPCALL_SRV
Sockets communication channels between guest and host/vmm.
If unsure, say N.
+
+config DRAGONBALL_DEVICE_MANAGER
+ bool "Vsock Service to Handle Dragonball Device Management Requests"
+ depends on DRAGONBALL_UPCALL_SRV
+ depends on VIRTIO_VSOCKETS
+ default y
+ help
+ This configure implements a vsock service to handle Dragonball device
+ management requests, such as getting device information, hot-plugging
+ devices etc.
+
+ If unsure, say N.
diff --git a/drivers/misc/dragonball/upcall_srv/Makefile b/drivers/misc/dragonball/upcall_srv/Makefile
index 4102e6c7edef..409c0c11e2e6 100644
--- a/drivers/misc/dragonball/upcall_srv/Makefile
+++ b/drivers/misc/dragonball/upcall_srv/Makefile
@@ -11,3 +11,4 @@
#
obj-$(CONFIG_DRAGONBALL_UPCALL_SRV) += dragonball_upcall_srv.o
+obj-$(CONFIG_DRAGONBALL_DEVICE_MANAGER) += dragonball_device_manager.o
diff --git a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
new file mode 100644
index 000000000000..ebcb6ef74285
--- /dev/null
+++ b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * drivers/misc/dragonball/vsock_srv/dragonball_device_manager.c
+ * vsock service for device management.
+ *
+ * Copyright (C) 2022 Alibaba Cloud, Inc
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#define pr_fmt(fmt) "db-dev-mgr: " fmt
+
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/virtio_mmio.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/cpuhotplug.h>
+#include <asm/cpu.h>
+#include <dragonball/upcall_srv.h>
+#include <dragonball/device_manager.h>
+#ifdef CONFIG_ARM64
+#include <linux/irqdomain.h>
+#include <linux/irq.h>
+#endif
+#include <linux/percpu.h>
+#include <linux/device.h>
+#include <asm/numa.h>
+
+/*
+ * Following designs are adopted to simplify implementation:
+ * 1) fix size messages with padding to ease receiving logic.
+ * 2) binary encoding instead of string encoding because it's on the same host.
+ * 3) synchronous communication in ping-pong mode, one in-fly request at most.
+ * 4) do not support module unloading
+ */
+
+/* These definitions are synchronized with dragonball */
+#define DEV_MGR_MSG_SIZE 0x400
+#define DEVMGR_CMD_BYTE 'd'
+#define DEVMGR_MAGIC_VERSION 0x444D0100 /* 'DM' + Version 1.0 */
+#define SHARED_IRQ_NO 5
+
+/* Type of request and reply messages. */
+enum devmgr_msg_type {
+ CONNECT = 0x00000000,
+ ADD_CPU = 0x00000001,
+ DEL_CPU = 0x00000002,
+ ADD_MEM = 0x00000003,
+ DEL_MEM = 0x00000004,
+ ADD_MMIO = 0x00000005,
+ DEL_MMIO = 0x00000006,
+ ADD_PCI = 0x00000007,
+ DEL_PCI = 0x00000008,
+};
+
+struct devmgr_msg_header {
+ /* magic version for identifying upcall */
+ uint32_t magic_version;
+ /* size of the upcall message */
+ uint32_t msg_size;
+ /* type for the message to identify its usage */
+ uint32_t msg_type;
+ /* flag for extra information */
+ uint32_t msg_flags;
+};
+
+struct devmgr_req {
+ struct devmgr_msg_header msg_header;
+ union {
+ char pad[DEV_MGR_MSG_SIZE - sizeof(struct devmgr_msg_header)];
+ } msg_load;
+};
+
+struct devmgr_reply {
+ struct devmgr_msg_header msg_header;
+ /*
+ * if ret is 0, it means the operation is successful.
+ * if ret is not 0, return value will be error code.
+ */
+ int32_t ret;
+ union {
+ char pad[DEV_MGR_MSG_SIZE - sizeof(struct devmgr_msg_header) - sizeof(int32_t)];
+ } msg_load;
+};
+
+struct task_res {
+ struct task_struct *task;
+ struct socket *sock;
+ struct devmgr_req req;
+ struct devmgr_reply reply;
+};
+
+typedef int (*action_route_t) (struct devmgr_req *req,
+ struct devmgr_reply *rep);
+
+static void _fill_msg_header(struct devmgr_msg_header *msg, uint32_t msg_size,
+ uint32_t msg_type, uint32_t msg_flags)
+{
+ msg->magic_version = DEVMGR_MAGIC_VERSION;
+ msg->msg_size = msg_size;
+ msg->msg_type = msg_type;
+ msg->msg_flags = msg_flags;
+}
+
+static struct {
+ enum devmgr_msg_type cmd;
+ action_route_t fn;
+} opt_map[] = {
+};
+
+static action_route_t get_action(struct devmgr_req *req)
+{
+ int i;
+ action_route_t action = NULL;
+ int size_opt = ARRAY_SIZE(opt_map);
+
+ for (i = 0; i < size_opt; i++) {
+ if (opt_map[i].cmd == req->msg_header.msg_type) {
+ action = opt_map[i].fn;
+ break;
+ }
+ }
+ return action;
+}
+
+static void db_devmgr_process(struct devmgr_req *req,
+ struct devmgr_reply *rep)
+{
+ int err;
+ action_route_t action;
+ struct devmgr_msg_header *req_mh = &req->msg_header;
+ struct devmgr_msg_header *rep_mh = &rep->msg_header;
+
+ if (req_mh->magic_version != DEVMGR_MAGIC_VERSION) {
+ _fill_msg_header(rep_mh, 0, req->msg_header.msg_type, 0);
+ return;
+ }
+
+ action = get_action(req);
+ if (action == NULL) {
+ pr_err("db_devmgr_process : Not found valid command");
+ rep->ret = -1;
+ _fill_msg_header(rep_mh, 0, req->msg_header.msg_type, 0);
+ return;
+ }
+
+ err = action(req, rep);
+ if (err) {
+ pr_err("db_devmgr_process : Command run failed, err: %d", err);
+ rep->ret = err;
+ _fill_msg_header(rep_mh, 0, req->msg_header.msg_type, 0);
+ return;
+ }
+}
+
+static int db_devmgr_server(void *data)
+{
+ struct task_res *res = (struct task_res *)data;
+ struct devmgr_msg_header *rep_mh = &res->reply.msg_header;
+ int len;
+
+ _fill_msg_header(rep_mh, 0, CONNECT, 0);
+ len = db_vsock_sendmsg(res->sock, (char *)&res->reply, DEV_MGR_MSG_SIZE);
+ if (len <= 0) {
+ pr_err("db_devmgr_server : Server send message failed, err: %d", len);
+ sock_release(res->sock);
+ kfree(res);
+ return len;
+ }
+
+ while (!kthread_should_stop()) {
+ len = db_vsock_recvmsg(res->sock, (char *)&res->req,
+ DEV_MGR_MSG_SIZE, 0);
+ if (len <= 0)
+ break;
+
+ /* The result(OK or Error) will fill into res->reply field */
+ db_devmgr_process(&res->req, &res->reply);
+
+ len = db_vsock_sendmsg(res->sock, (char *)&res->reply,
+ DEV_MGR_MSG_SIZE);
+ if (len <= 0)
+ break;
+ }
+
+ /* TODO: check who shutdown the socket, receiving or sending. */
+ sock_release(res->sock);
+ kfree(res);
+ return 0;
+}
+
+static int db_devmgr_handler(struct socket *sock)
+{
+ struct task_res *res;
+ struct task_struct *conn_task;
+
+ /* TODO: ensure singleton, only one server exists */
+ res = kzalloc(sizeof(*res), GFP_KERNEL);
+ if (!res)
+ return -ENOMEM;
+
+ res->sock = sock;
+ conn_task = kthread_create(db_devmgr_server, res, "db_dev_mgr");
+ if (IS_ERR(conn_task)) {
+ pr_err("db_devmgr_handler : Client process thread create failed, err: %d",
+ (int)PTR_ERR(conn_task));
+ goto failed;
+ } else {
+ res->task = conn_task;
+ wake_up_process(conn_task);
+ }
+
+ return 0;
+failed:
+ kfree(res);
+ return PTR_ERR(conn_task);
+}
+
+static int __init db_device_manager_init(void)
+{
+ return register_db_vsock_service(DEVMGR_CMD_BYTE, db_devmgr_handler);
+}
+
+late_initcall(db_device_manager_init);
+
+MODULE_AUTHOR("Alibaba, Inc.");
+MODULE_DESCRIPTION("Dragonball Device Manager");
+MODULE_LICENSE("GPL v2");
diff --git a/include/dragonball/device_manager.h b/include/dragonball/device_manager.h
new file mode 100644
index 000000000000..a1713e9f026d
--- /dev/null
+++ b/include/dragonball/device_manager.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * device_manager.h Device Manager for Dragonball
+ *
+ * Copyright (C) 2022 Alibaba Cloud, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#ifndef _DB_DEVICE_MANAGER_H
+#define _DB_DEVICE_MANAGER_H
+
+#include <linux/device.h>
+
+#endif /* _DB_DEVICE_MANAGER_H */
--
2.19.1.6.gb485710b

View File

@ -0,0 +1,301 @@
From 901fb71c77144b170465611617f0f25099d8a780 Mon Sep 17 00:00:00 2001
From: Chao Wu <chaowu@linux.alibaba.com>
Date: Mon, 21 Nov 2022 19:44:50 +0800
Subject: [PATCH 3/4] upcall: add cpu hotplug/hot-unplug into device manager
service
Add cpu hotplug and hot-unplug support into device manager. In the
`devmgr_req` message, `msg_type` ADD_CPU in `msg_header` will trigger
`add_cpu_dev` action and DEL_CPU will trigger `del_cpu_dev` action, and
we use `apic_ids` and `count` delivered in `cpu_dev_info` to notify
which and how many cpus will be hotplugged / hot-unplugged.
`add_cpu_dev` and `del_cpu_dev` will eventually trigger `add_cpu_upcall`
and `del_cpu_upcall` to trigger the cpu hotplug / hot-unplug process in
the kernel. After the cpu hotplug / hot-unplug process,
`cpu_event_notification` will generate device manager reply to the
client side.
Signed-off-by: Liu Jiang <gerry@linux.alibaba.com>
Signed-off-by: Zizheng Bian <zizheng.bian@linux.alibaba.com>
Signed-off-by: Chao Wu <chaowu@linux.alibaba.com>
Signed-off-by: WangYu <WangYu@linux.alibaba.com>
Signed-off-by: Xingjun Liu <xingjun.liu@linux.alibaba.com>
---
drivers/misc/dragonball/upcall_srv/Kconfig | 11 +
.../upcall_srv/dragonball_device_manager.c | 219 ++++++++++++++++++
2 files changed, 230 insertions(+)
diff --git a/drivers/misc/dragonball/upcall_srv/Kconfig b/drivers/misc/dragonball/upcall_srv/Kconfig
index 6554a9741c00..b237882a2928 100644
--- a/drivers/misc/dragonball/upcall_srv/Kconfig
+++ b/drivers/misc/dragonball/upcall_srv/Kconfig
@@ -24,3 +24,14 @@ config DRAGONBALL_DEVICE_MANAGER
devices etc.
If unsure, say N.
+
+config DRAGONBALL_HOTPLUG_CPU
+ bool "CPU hotplug/hotunplug support"
+ depends on DRAGONBALL_DEVICE_MANAGER
+ default y
+ help
+ This configure implements a vCPU hotplug/hotunplug support, vmm
+ should send hotplug request by vsock which follow special data
+ structure with command and parameter to hot-pluging an vCPU.
+
+ If unsure, say N.
diff --git a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
index ebcb6ef74285..210ef5d6c9d5 100644
--- a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
+++ b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
@@ -75,9 +75,20 @@ struct devmgr_req {
struct devmgr_msg_header msg_header;
union {
char pad[DEV_MGR_MSG_SIZE - sizeof(struct devmgr_msg_header)];
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU)
+ struct {
+ uint8_t count;
+ uint8_t apic_ver;
+ uint8_t apic_ids[256];
+ } cpu_dev_info;
+#endif
} msg_load;
};
+struct cpu_dev_reply_info {
+ uint32_t apic_index;
+};
+
struct devmgr_reply {
struct devmgr_msg_header msg_header;
/*
@@ -87,6 +98,9 @@ struct devmgr_reply {
int32_t ret;
union {
char pad[DEV_MGR_MSG_SIZE - sizeof(struct devmgr_msg_header) - sizeof(int32_t)];
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU)
+ struct cpu_dev_reply_info cpu_dev_info;
+#endif
} msg_load;
};
@@ -109,10 +123,215 @@ static void _fill_msg_header(struct devmgr_msg_header *msg, uint32_t msg_size,
msg->msg_flags = msg_flags;
}
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_X86_64)
+static int get_cpu_id(int apic_id)
+{
+ int i;
+
+ for (i = 0; i < num_processors; i++) {
+ if (cpu_physical_id(i) == apic_id)
+ return i;
+ }
+ return -1;
+}
+
+/**
+ * Return the first failed hotplug index of the apic_ids to dragonball.
+ * If it is not equal to the count of all hotplug needed vcpus,
+ * we will rollback the vcpus from apics_ids[0] to apic_ids[i-1] in dragonball.
+ */
+static void cpu_event_notification(
+ uint8_t apic_ids_index,
+ int ret,
+ uint32_t action_type,
+ struct devmgr_reply *rep)
+{
+ pr_info("cpu event notification: apic ids index %d", apic_ids_index);
+ rep->msg_load.cpu_dev_info.apic_index = apic_ids_index;
+ rep->ret = ret;
+ _fill_msg_header(&rep->msg_header,
+ sizeof(struct cpu_dev_reply_info), action_type, 0);
+}
+#endif
+
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_X86_64)
+static int add_cpu_upcall(int apic_id, uint8_t apic_ver)
+{
+ int cpu_id, node_id;
+ int ret;
+
+ pr_info("adding vcpu apic_id %d", apic_id);
+
+ /**
+ * Get the mutex lock for hotplug and cpu update and cpu write lock.
+ * So that other threads won't influence the hotplug process.
+ */
+ lock_device_hotplug();
+ cpu_maps_update_begin();
+ cpu_hotplug_begin();
+
+ cpu_id = generic_processor_info(apic_id, apic_ver);
+ if (cpu_id < 0) {
+ pr_err("cpu (apic id %d) cannot be added, generic processor info failed", apic_id);
+ ret = -EINVAL;
+ goto rollback_generic_cpu;
+ }
+
+ /* update numa mapping for hot-plugged cpus. */
+ node_id = numa_cpu_node(cpu_id);
+ if (node_id != NUMA_NO_NODE)
+ numa_set_node(cpu_id, node_id);
+
+ ret = arch_register_cpu(cpu_id);
+ if (ret) {
+ pr_err("cpu %d cannot be added, register cpu failed %d", cpu_id, ret);
+ goto rollback_register_cpu;
+ }
+
+ cpu_hotplug_done();
+ cpu_maps_update_done();
+ unlock_device_hotplug();
+
+ ret = add_cpu(cpu_id);
+ if (ret) {
+ pr_err("cpu %d cannot be added, cpu up failed: %d", cpu_id, ret);
+ goto rollback_cpu_up;
+ }
+ return ret;
+
+rollback_cpu_up:
+ arch_unregister_cpu(cpu_id);
+ set_cpu_present(cpu_id, false);
+ per_cpu(x86_cpu_to_apicid, cpu_id) = -1;
+ num_processors--;
+ return ret;
+
+rollback_register_cpu:
+ set_cpu_present(cpu_id, false);
+ per_cpu(x86_cpu_to_apicid, cpu_id) = -1;
+ num_processors--;
+rollback_generic_cpu:
+ cpu_hotplug_done();
+ cpu_maps_update_done();
+ unlock_device_hotplug();
+ return ret;
+}
+
+static int del_cpu_upcall(int apic_id)
+{
+ int cpu_id = get_cpu_id(apic_id);
+ int ret;
+
+ if (cpu_id == 0) {
+ pr_err("cannot del bootstrap processor.");
+ return -EINVAL;
+ }
+ pr_info("deleting vcpu %d", cpu_id);
+ ret = remove_cpu(cpu_id);
+ if (ret) {
+ pr_err("del vcpu failed, err: %d", ret);
+ return ret;
+ }
+
+ lock_device_hotplug();
+ cpu_maps_update_begin();
+ cpu_hotplug_begin();
+
+ arch_unregister_cpu(cpu_id);
+ set_cpu_present(cpu_id, false);
+ per_cpu(x86_cpu_to_apicid, cpu_id) = -1;
+ num_processors--;
+
+ cpu_hotplug_done();
+ cpu_maps_update_done();
+ unlock_device_hotplug();
+
+ return ret;
+}
+
+static int add_cpu_dev(struct devmgr_req *req,
+ struct devmgr_reply *rep)
+{
+ int ret;
+ uint8_t i;
+ int apic_id;
+
+ uint8_t count = req->msg_load.cpu_dev_info.count;
+ uint8_t apic_ver = req->msg_load.cpu_dev_info.apic_ver;
+ uint8_t *apic_ids = req->msg_load.cpu_dev_info.apic_ids;
+
+ pr_info("add vcpu number: %d", count);
+
+ for (i = 0; i < count; ++i) {
+ apic_id = apic_ids[i];
+ if (get_cpu_id(apic_id) != -1) {
+ pr_err("cpu cannot be added: apci_id %d is already been used.", apic_id);
+ ret = -EINVAL;
+ return ret;
+ }
+ }
+
+ for (i = 0; i < count; ++i) {
+ apic_id = apic_ids[i];
+ ret = add_cpu_upcall(apic_id, apic_ver);
+ if (ret != 0)
+ break;
+ }
+
+ if (!ret)
+ cpu_event_notification(i, ret, ADD_CPU, rep);
+ return ret;
+}
+
+static int del_cpu_dev(struct devmgr_req *req,
+ struct devmgr_reply *rep)
+{
+ int ret;
+ uint8_t i;
+ int cpu_id;
+
+ uint8_t count = req->msg_load.cpu_dev_info.count;
+ uint8_t *apic_ids = req->msg_load.cpu_dev_info.apic_ids;
+
+ pr_info("del vcpu number : %d", count);
+
+ if (count >= num_processors) {
+ pr_err("cpu del parameter check error: cannot remove all vcpus");
+ ret = -EINVAL;
+ cpu_event_notification(0, ret, DEL_CPU, rep);
+ return ret;
+ }
+
+ for (i = 0; i < count; ++i) {
+ cpu_id = get_cpu_id(apic_ids[i]);
+ if (!cpu_possible(cpu_id)) {
+ pr_err("cpu %d cannot be deleted: cpu not possible", cpu_id);
+ ret = -EINVAL;
+ cpu_event_notification(0, ret, DEL_CPU, rep);
+ return ret;
+ }
+ }
+
+ for (i = 0; i < count; ++i) {
+ ret = del_cpu_upcall(apic_ids[i]);
+ if (ret != 0)
+ break;
+ }
+
+ if (!ret)
+ cpu_event_notification(i, ret, DEL_CPU, rep);
+ return ret;
+}
+#endif
+
static struct {
enum devmgr_msg_type cmd;
action_route_t fn;
} opt_map[] = {
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_X86_64)
+ {ADD_CPU, add_cpu_dev},
+ {DEL_CPU, del_cpu_dev},
+#endif
};
static action_route_t get_action(struct devmgr_req *req)
--
2.19.1.6.gb485710b

View File

@ -0,0 +1,417 @@
From 90ced3463137076b9df2200b5c6ad720660c6bfc Mon Sep 17 00:00:00 2001
From: Chao Wu <chaowu@linux.alibaba.com>
Date: Wed, 23 Nov 2022 19:23:47 +0800
Subject: [PATCH 4/4] upcall: add virtio-mmio hotplug/hot-unplug into device
manager service
Add virtio-mmio hotplug/hot-unplug support into device manager. In the
`devmgr_req` message, `msg_type` ADD_MMIO in `msg_header` will trigger
`add_mmio_dev` action and DEL_MMIO will trigger `del_mmio_dev` action,
and we use `mmio_base`, `mmio_size` and `mmio_irq` delivered in
`add_mmio_dev` to notify how to hotplug the virtio-mmio device
Also `virtio_mmio_add_device` and `virtio_mmio_del_device` are
introduced under /drivers/virtio/virtio_mmio.c, and we extract
`vm_add_device` from `vm_cmdline_set` to help hotplug virtio-mmio
device.
Signed-off-by: Liu Jiang <gerry@linux.alibaba.com>
Signed-off-by: Zizheng Bian <zizheng.bian@linux.alibaba.com>
Signed-off-by: Chao Wu <chaowu@linux.alibaba.com>
Signed-off-by: WangYu <WangYu@linux.alibaba.com>
Signed-off-by: Xingjun Liu <xingjun.liu@linux.alibaba.com>
---
drivers/misc/dragonball/upcall_srv/Kconfig | 12 ++
.../upcall_srv/dragonball_device_manager.c | 112 ++++++++++++++
drivers/virtio/Kconfig | 14 ++
drivers/virtio/virtio_mmio.c | 138 +++++++++++++++---
include/dragonball/device_manager.h | 5 +
5 files changed, 259 insertions(+), 22 deletions(-)
diff --git a/drivers/misc/dragonball/upcall_srv/Kconfig b/drivers/misc/dragonball/upcall_srv/Kconfig
index b237882a2928..fc83f03c2edd 100644
--- a/drivers/misc/dragonball/upcall_srv/Kconfig
+++ b/drivers/misc/dragonball/upcall_srv/Kconfig
@@ -25,6 +25,18 @@ config DRAGONBALL_DEVICE_MANAGER
If unsure, say N.
+config DRAGONBALL_HOTPLUG_VIRTIO_MMIO
+ bool "Virtio-MMIO device hotplug/hotunplug support"
+ depends on DRAGONBALL_DEVICE_MANAGER
+ default y
+ help
+ This configure implements a Virtio-MMIO device hotplug/hotunplug
+ support, vmm should send hotplug request by vsock which follow
+ special data structure with command and parameter to hot-pluging
+ an MMIO device.
+
+ If unsure, say N.
+
config DRAGONBALL_HOTPLUG_CPU
bool "CPU hotplug/hotunplug support"
depends on DRAGONBALL_DEVICE_MANAGER
diff --git a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
index 210ef5d6c9d5..5a95b2ba63e8 100644
--- a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
+++ b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
@@ -75,6 +75,13 @@ struct devmgr_req {
struct devmgr_msg_header msg_header;
union {
char pad[DEV_MGR_MSG_SIZE - sizeof(struct devmgr_msg_header)];
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_VIRTIO_MMIO)
+ struct {
+ uint64_t mmio_base;
+ uint64_t mmio_size;
+ uint32_t mmio_irq;
+ } add_mmio_dev;
+#endif
#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU)
struct {
uint8_t count;
@@ -98,6 +105,10 @@ struct devmgr_reply {
int32_t ret;
union {
char pad[DEV_MGR_MSG_SIZE - sizeof(struct devmgr_msg_header) - sizeof(int32_t)];
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_VIRTIO_MMIO)
+ struct {
+ } add_mmio_dev;
+#endif
#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU)
struct cpu_dev_reply_info cpu_dev_info;
#endif
@@ -114,6 +125,62 @@ struct task_res {
typedef int (*action_route_t) (struct devmgr_req *req,
struct devmgr_reply *rep);
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_VIRTIO_MMIO)
+#ifdef CONFIG_ARM64
+static uint32_t get_device_virq(uint32_t pin)
+{
+ uint32_t virq;
+ struct device_node *node;
+ struct irq_fwspec dummy_fwspec = {
+ .param_count = 3,
+ .param = {0, 0, IRQ_TYPE_EDGE_RISING}
+ };
+
+ node = of_find_node_by_name(NULL, "intc");
+ if (!node) {
+ pr_err("interrupt controller device node not found.");
+ return 0;
+ }
+ dummy_fwspec.param[1] = pin;
+ dummy_fwspec.fwnode = of_node_to_fwnode(node);
+ virq = irq_create_fwspec_mapping(&dummy_fwspec);
+ of_node_put(node);
+ return virq;
+}
+#elif defined(CONFIG_X86_64)
+static inline uint32_t get_device_virq(uint32_t irq)
+{
+ return irq;
+}
+#endif
+
+static int get_dev_resource(struct devmgr_req *req, struct resource *res)
+{
+ uint64_t base = req->msg_load.add_mmio_dev.mmio_base;
+ uint64_t size = req->msg_load.add_mmio_dev.mmio_size;
+ uint32_t irq = req->msg_load.add_mmio_dev.mmio_irq;
+ uint32_t virq;
+
+ if (req->msg_header.msg_size != sizeof(req->msg_load.add_mmio_dev))
+ return -EINVAL;
+
+ res[0].flags = IORESOURCE_MEM;
+ res[0].start = base;
+ res[0].end = base + size - 1;
+ res[1].flags = IORESOURCE_IRQ;
+ virq = get_device_virq(irq);
+ if (!virq)
+ return -EINVAL;
+ res[1].start = res[1].end = virq;
+
+ /* detect the irq sharing mode */
+ if (irq == SHARED_IRQ_NO)
+ res[1].flags |= IORESOURCE_IRQ_SHAREABLE;
+
+ return 0;
+}
+#endif
+
static void _fill_msg_header(struct devmgr_msg_header *msg, uint32_t msg_size,
uint32_t msg_type, uint32_t msg_flags)
{
@@ -154,6 +221,47 @@ static void cpu_event_notification(
}
#endif
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_VIRTIO_MMIO)
+static int add_mmio_dev(struct devmgr_req *req,
+ struct devmgr_reply *rep)
+{
+ int ret;
+ struct resource res[2] = {};
+ struct devmgr_msg_header *rep_mh = &rep->msg_header;
+
+ ret = get_dev_resource(req, res);
+ if (ret)
+ return ret;
+
+ ret = virtio_mmio_add_device(res, ARRAY_SIZE(res));
+ if (!ret) {
+ rep->ret = ret;
+ _fill_msg_header(rep_mh, 0, ADD_MMIO, 0);
+ }
+ return ret;
+}
+
+static int del_mmio_dev(struct devmgr_req *req,
+ struct devmgr_reply *rep)
+{
+ int ret;
+ struct resource res[2] = {};
+ struct devmgr_msg_header *rep_mh = &rep->msg_header;
+
+ ret = get_dev_resource(req, res);
+ if (ret)
+ return ret;
+
+ ret = virtio_mmio_del_device(res, ARRAY_SIZE(res));
+ if (!ret) {
+ rep->ret = ret;
+ _fill_msg_header(rep_mh, 0, DEL_MMIO, 0);
+ }
+ return ret;
+}
+#endif
+
+
#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_X86_64)
static int add_cpu_upcall(int apic_id, uint8_t apic_ver)
{
@@ -328,6 +436,10 @@ static struct {
enum devmgr_msg_type cmd;
action_route_t fn;
} opt_map[] = {
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_VIRTIO_MMIO)
+ {ADD_MMIO, add_mmio_dev},
+ {DEL_MMIO, del_mmio_dev},
+#endif
#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_X86_64)
{ADD_CPU, add_cpu_dev},
{DEL_CPU, del_cpu_dev},
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index 0a53a6123..f599e80a5 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -173,4 +173,18 @@ config VIRTIO_DMA_SHARED_BUFFER
This option adds a flavor of dma buffers that are backed by
virtio resources.
+config VIRTIO_MMIO_DRAGONBALL
+ bool "Enable features for Dragonball virtio MMIO devcies"
+ default n
+ depends on VIRTIO_MMIO
+ depends on X86_64 || ARM64
+ select X86_PLATFORM_MSI
+ select VIRTIO_MMIO_MSI
+ help
+ The Dragonball VMM implements several optimizations for MMIO virtio
+ devices. This option enables support of those optimization features:
+ - virtio-mmio hotplug through upcall
+
+ If unsure, say N
+
endif # VIRTIO_MENU
diff --git a/include/dragonball/device_manager.h b/include/dragonball/device_manager.h
index a1713e9f026d..785761c47f97 100644
--- a/include/dragonball/device_manager.h
+++ b/include/dragonball/device_manager.h
@@ -15,4 +15,9 @@
#include <linux/device.h>
+#if defined(CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES)
+int virtio_mmio_add_device(struct resource *resources, size_t res_size);
+int virtio_mmio_del_device(struct resource *resources, size_t res_size);
+#endif
+
#endif /* _DB_DEVICE_MANAGER_H */
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index de1a08108..2d13dd6e3 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -700,16 +700,41 @@ static struct device vm_cmdline_parent = {
static int vm_cmdline_parent_registered;
static int vm_cmdline_id;
+static int vm_add_device(struct resource *resources, size_t res_size)
+{
+ int err;
+ struct platform_device *pdev;
+
+ if (!vm_cmdline_parent_registered) {
+ err = device_register(&vm_cmdline_parent);
+ if (err) {
+ pr_err("Failed to register parent device!\n");
+ return err;
+ }
+ vm_cmdline_parent_registered = 1;
+ }
+
+ pr_info("Registering device virtio-mmio.%d at 0x%llx-0x%llx, IRQ %d.\n",
+ vm_cmdline_id,
+ (unsigned long long)resources[0].start,
+ (unsigned long long)resources[0].end,
+ (int)resources[1].start);
+
+ pdev = platform_device_register_resndata(&vm_cmdline_parent,
+ "virtio-mmio", vm_cmdline_id++,
+ resources, res_size, NULL, 0);
+
+ return PTR_ERR_OR_ZERO(pdev);
+}
+
static int vm_cmdline_set(const char *device,
const struct kernel_param *kp)
{
- int err;
struct resource resources[2] = {};
char *str;
long long base, size;
unsigned int irq;
int processed, consumed = 0;
- struct platform_device *pdev;
/* Consume "size" part of the command line parameter */
size = memparse(device, &str);
@@ -734,27 +759,7 @@ static int vm_cmdline_set(const char *device,
resources[1].flags = IORESOURCE_IRQ;
resources[1].start = resources[1].end = irq;
- if (!vm_cmdline_parent_registered) {
- err = device_register(&vm_cmdline_parent);
- if (err) {
- put_device(&vm_cmdline_parent);
- pr_err("Failed to register parent device!\n");
- return err;
- }
- vm_cmdline_parent_registered = 1;
- }
-
- pr_info("Registering device virtio-mmio.%d at 0x%llx-0x%llx, IRQ %d.\n",
- vm_cmdline_id,
- (unsigned long long)resources[0].start,
- (unsigned long long)resources[0].end,
- (int)resources[1].start);
-
- pdev = platform_device_register_resndata(&vm_cmdline_parent,
- "virtio-mmio", vm_cmdline_id++,
- resources, ARRAY_SIZE(resources), NULL, 0);
-
- return PTR_ERR_OR_ZERO(pdev);
+ return vm_add_device(resources, ARRAY_SIZE(resources));
}
static int vm_cmdline_get_device(struct device *dev, void *data)
@@ -804,6 +809,94 @@ static void vm_unregister_cmdline_devices(void)
}
}
+#ifdef CONFIG_DRAGONBALL_DEVICE_MANAGER
+static int vm_match_device(struct device *dev, void *data)
+{
+ struct resource *resource = (struct resource *)data;
+ struct platform_device *pdev = to_platform_device(dev);
+
+ if ((pdev->resource[0].start == resource[0].start) &&
+ (pdev->resource[0].end == resource[0].end) &&
+ (pdev->resource[1].start == resource[1].start))
+ return 1;
+ return 0;
+}
+
+static struct device *vm_find_device(struct resource *res)
+{
+ return device_find_child(&vm_cmdline_parent, res, vm_match_device);
+}
+
+static int vm_device_overlap(struct device *dev, void *data)
+{
+ struct resource *res = (struct resource *)data;
+ struct platform_device *pdev = to_platform_device(dev);
+
+ /* Detect IRQ number conflicts except shared IRQs. */
+ if (!(res[1].flags & IORESOURCE_IRQ_SHAREABLE) &&
+ (pdev->resource[1].start == res[1].start)) {
+ return 1;
+ }
+
+ /* Detect device MMIO addresses overlapping */
+ if ((pdev->resource[0].start < res[0].end) &&
+ (pdev->resource[0].end > res[0].start)) {
+ return 1;
+ }
+
+ return 0;
+}
+
+static struct device *vm_detect_resource(struct resource *res)
+{
+ /* return NULL if no resource overlapped */
+ return device_find_child(&vm_cmdline_parent, res, vm_device_overlap);
+}
+
+int virtio_mmio_add_device(struct resource *resources, size_t res_size)
+{
+ int err;
+ struct device *dev;
+
+ if (res_size < 2 || !resources)
+ return -EINVAL;
+
+ dev = vm_detect_resource(resources);
+ if (dev) {
+ put_device(dev);
+ return -EEXIST;
+ }
+
+ lock_device_hotplug();
+ err = vm_add_device(resources, res_size);
+ unlock_device_hotplug();
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(virtio_mmio_add_device);
+
+int virtio_mmio_del_device(struct resource *resources, size_t res_size)
+{
+ int ret;
+ struct device *dev;
+
+ if (res_size < 2 || !resources)
+ return -EINVAL;
+
+ dev = vm_find_device(resources);
+ if (!dev)
+ return -ENODEV;
+
+ put_device(dev);
+ lock_device_hotplug();
+ ret = vm_unregister_cmdline_device(dev, NULL);
+ unlock_device_hotplug();
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(virtio_mmio_del_device);
+#endif /* CONFIG_DRAGONBALL_DEVICE_MANAGER */
+
#else
static void vm_unregister_cmdline_devices(void)
--
2.19.1.6.gb485710b

View File

@ -0,0 +1,163 @@
From 16e3b3da9fb8b79b006d8c9d1f68b2dec9980d72 Mon Sep 17 00:00:00 2001
Message-Id: <16e3b3da9fb8b79b006d8c9d1f68b2dec9980d72.1685428663.git.jiyunxue@linux.alibaba.com>
From: xuejun-xj <jiyunxue@linux.alibaba.com>
Date: Wed, 10 May 2023 13:55:43 +0800
Subject: [PATCH 1/3] upcall: dragonball-devmgr suppots cpu hotplug on arm64
Enable vcpuhotplug feature on aarch64 in guest kernel. It communicates
with dragonball by using upcall. This commit does these changes:
1. Wraps x86 related fields with CONFIG_X86_64.
2. Add "cpu_event_notification" for arm64.
3. Add "add_cpu_dev" and "del_cpu_dev" for arm64.
Signed-off-by: xuejun-xj <jiyunxue@linux.alibaba.com>
Reviewed-by : Chao Wu <chaowu@linux.alibaba.com>
Reviewed-by: Zizheng Bian <zizheng.bian@linux.alibaba.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
---
.../upcall_srv/dragonball_device_manager.c | 84 ++++++++++++++++++-
1 file changed, 81 insertions(+), 3 deletions(-)
diff --git a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
index 5a95b2ba63e8..088d38623b8d 100644
--- a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
+++ b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
@@ -85,15 +85,21 @@ struct devmgr_req {
#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU)
struct {
uint8_t count;
+#ifdef CONFIG_X86_64
uint8_t apic_ver;
uint8_t apic_ids[256];
+#endif
} cpu_dev_info;
#endif
} msg_load;
};
struct cpu_dev_reply_info {
+#if defined(CONFIG_X86_64)
uint32_t apic_index;
+#elif defined(CONFIG_ARM64)
+ uint32_t cpu_id;
+#endif
};
struct devmgr_reply {
@@ -190,7 +196,8 @@ static void _fill_msg_header(struct devmgr_msg_header *msg, uint32_t msg_size,
msg->msg_flags = msg_flags;
}
-#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_X86_64)
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU)
+#if defined(CONFIG_X86_64)
static int get_cpu_id(int apic_id)
{
int i;
@@ -219,6 +226,24 @@ static void cpu_event_notification(
_fill_msg_header(&rep->msg_header,
sizeof(struct cpu_dev_reply_info), action_type, 0);
}
+#elif defined(CONFIG_ARM64)
+/**
+ * Return the first failed hotplug index of the cpu_id to dragonball.
+ * If hotplug/hotunplug succeeds, it will equals to the expected cpu count.
+ */
+static void cpu_event_notification(
+ uint8_t cpu_id,
+ int ret,
+ uint32_t action_type,
+ struct devmgr_reply *rep)
+{
+ pr_info("cpu event notification: cpu_id %d\n", cpu_id);
+ rep->msg_load.cpu_dev_info.cpu_id = cpu_id;
+ rep->ret = ret;
+ _fill_msg_header(&rep->msg_header,
+ sizeof(struct cpu_dev_reply_info), action_type, 0);
+}
+#endif
#endif
#if defined(CONFIG_DRAGONBALL_HOTPLUG_VIRTIO_MMIO)
@@ -262,7 +287,8 @@ static int del_mmio_dev(struct devmgr_req *req,
#endif
-#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_X86_64)
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU)
+#if defined(CONFIG_X86_64)
static int add_cpu_upcall(int apic_id, uint8_t apic_ver)
{
int cpu_id, node_id;
@@ -430,6 +456,58 @@ static int del_cpu_dev(struct devmgr_req *req,
cpu_event_notification(i, ret, DEL_CPU, rep);
return ret;
}
+#elif defined(CONFIG_ARM64)
+static int add_cpu_dev(struct devmgr_req *req, struct devmgr_reply *rep)
+{
+ int i, ret = 0;
+ unsigned int cpu_id, nr_online_cpus;
+ uint8_t count = req->msg_load.cpu_dev_info.count;
+
+ nr_online_cpus = num_online_cpus();
+
+ pr_info("Current vcpu number: %d, Add vcpu number: %d\n",
+ nr_online_cpus, count);
+
+ for (i = 0; i < count; ++i) {
+ cpu_id = nr_online_cpus + i;
+ ret = add_cpu(cpu_id);
+ if (ret != 0)
+ break;
+ }
+
+ cpu_event_notification(nr_online_cpus + i, ret, ADD_CPU, rep);
+ return ret;
+}
+
+static int del_cpu_dev(struct devmgr_req *req, struct devmgr_reply *rep)
+{
+ int i, ret = 0;
+ unsigned int cpu_id, nr_online_cpus;
+ uint8_t count = req->msg_load.cpu_dev_info.count;
+
+ nr_online_cpus = num_online_cpus();
+
+ pr_info("Current vcpu number: %d, Delete vcpu number: %d\n",
+ nr_online_cpus, count);
+
+ if (count >= nr_online_cpus) {
+ pr_err("cpu del parameter check error: cannot remove all vcpus\n");
+ ret = -EINVAL;
+ cpu_event_notification(0, ret, DEL_CPU, rep);
+ return ret;
+ }
+
+ for (i = 0; i < count; ++i) {
+ cpu_id = nr_online_cpus - i - 1;
+ ret = remove_cpu(cpu_id);
+ if (ret != 0)
+ break;
+ }
+
+ cpu_event_notification(nr_online_cpus - i, ret, DEL_CPU, rep);
+ return ret;
+}
+#endif
#endif
static struct {
@@ -440,7 +518,7 @@ static struct {
{ADD_MMIO, add_mmio_dev},
{DEL_MMIO, del_mmio_dev},
#endif
-#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_X86_64)
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU)
{ADD_CPU, add_cpu_dev},
{DEL_CPU, del_cpu_dev},
#endif
--
2.28.0

View File

@ -0,0 +1,66 @@
From 6e07ca77fe7b5c15e0e98d9e86294c7dd2553a5a Mon Sep 17 00:00:00 2001
Message-Id: <6e07ca77fe7b5c15e0e98d9e86294c7dd2553a5a.1685428663.git.jiyunxue@linux.alibaba.com>
In-Reply-To: <16e3b3da9fb8b79b006d8c9d1f68b2dec9980d72.1685428663.git.jiyunxue@linux.alibaba.com>
References: <16e3b3da9fb8b79b006d8c9d1f68b2dec9980d72.1685428663.git.jiyunxue@linux.alibaba.com>
From: xuejun-xj <jiyunxue@linux.alibaba.com>
Date: Wed, 10 May 2023 14:51:40 +0800
Subject: [PATCH 2/3] msi: control msi irq number activated
When passthroughing pci device, kernel will initialize and activate
(max_cpu_count+1) msi irq. However, in vcpu hotplugging situation,
because of vgic, max_cpu_count may be greater than online_cpu_count.
Those offline cpus will also be activated by kernel, which cause failure
of passthroughing pci device.
To solve this problem, this patch add a function
"check_affinity_mask_online" to check if msi_desc->affinity contains
online cpus. If current cpu is offline, it will continue the for loop to
skip activating related irq.
Signed-off-by: xuejun-xj <jiyunxue@linux.alibaba.com>
Reviewed-by: Shuo Tan <shuo.tan@linux.alibaba.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
---
kernel/irq/msi.c | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 77e513e2e..3a35011ce 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -850,6 +850,23 @@ static int msi_init_virq(struct irq_domain *domain, int virq, unsigned int vflag
return 0;
}
+/* This function is used for check whether the cpu affinity belongs to the
+ * online cpus. When we passthrough the nvme devices, the kernel will allocate
+ * maxcpus+1 MSI irqs and then activate them. In vcpu hotplug situations, it
+ * may happen that kernel activates the offline cpus when bootcpus < maxcpus.
+ * To avoid this conflict, this function check the affinities.
+ */
+static inline bool check_affinity_mask_online(struct irq_affinity_desc *affinity)
+{
+ int cpu;
+
+ for_each_cpu(cpu, &affinity->mask)
+ if (cpu_online(cpu))
+ return true;
+
+ return false;
+}
+
int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
int nvec)
{
@@ -897,6 +914,9 @@ int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
return msi_handle_pci_fail(domain, desc, allocated);
for (i = 0; i < desc->nvec_used; i++) {
+ if (desc->affinity
+ && !check_affinity_mask_online(desc->affinity))
+ continue;
irq_set_msi_desc_off(virq, i, desc);
irq_debugfs_copy_devname(virq + i, dev);
ret = msi_init_virq(domain, virq + i, vflags);
--
2.28.0

View File

@ -0,0 +1,139 @@
From a05086142be13d43c7fc92500bcb870a2f37e485 Mon Sep 17 00:00:00 2001
Message-Id: <a05086142be13d43c7fc92500bcb870a2f37e485.1685428663.git.jiyunxue@linux.alibaba.com>
In-Reply-To: <16e3b3da9fb8b79b006d8c9d1f68b2dec9980d72.1685428663.git.jiyunxue@linux.alibaba.com>
References: <16e3b3da9fb8b79b006d8c9d1f68b2dec9980d72.1685428663.git.jiyunxue@linux.alibaba.com>
From: xuejun-xj <jiyunxue@linux.alibaba.com>
Date: Tue, 23 May 2023 09:43:02 +0800
Subject: [PATCH 3/3] smp: update bringup_nonboot_cpus parameters
On aarch64, kvm doesn't allow vmm to call KVM_CREATE_VCPU ioctls after
vm has already started, which is caused by vgic_initialized check in
kvm_arch_vcpu_precreate() function. Therefore, to support vcpu hotplug
feature on aarch64, all the vcpus should be created and configured ready
for start at booting procedure.
To solve the problem, dragonball will add a property in each cpu node,
called "boot-onlined". This property indicates whether this cpu should
be onlined at first boot. It has two values: 0 and 1. 0 means offline,
while 1 means online.
This commit also add a helper function called "of_get_cpu_boot_onlined",
which parse the cpu node and get the value of boot-onlined property.
Then update the global variable "boot_onlined_cpu".
When kernel calling smp_init(), bringup_nonboot_cpus will start all the
other cpus except cpu0. The activated cpu number equals setup_max_cpus.
In vcpu hotplug scenario, vmm will create all the vcpufd before vm is
initialized, while activating only a few vcpus at first boot. The
setup_max_cpus variable will be initialized as all vcpu count. This
cause that the other cpus cannot find enough cpu threads, and they will
wait for 5 seconds each cpu.
Therefore, we use boot_onlined_cpu instead of setup_max_cpus to give
"bringup_nonboot_cpus" correct cpu number it needs.
Signed-off-by: xuejun-xj <jiyunxue@linux.alibaba.com>
---
.../devicetree/bindings/arm/cpus.yaml | 11 +++++++++
arch/arm64/kernel/smp.c | 24 +++++++++++++++++++
kernel/smp.c | 10 +++++++-
3 files changed, 44 insertions(+), 1 deletion(-)
diff --git a/Documentation/devicetree/bindings/arm/cpus.yaml b/Documentation/devicetree/bindings/arm/cpus.yaml
index 14cd727d3c4b..691bb352d842 100644
--- a/Documentation/devicetree/bindings/arm/cpus.yaml
+++ b/Documentation/devicetree/bindings/arm/cpus.yaml
@@ -316,6 +316,17 @@ properties:
formed by encoding the target CPU id into the low bits of the
physical start address it should jump to.
+ boot-onlined:
+ $ref: '/schemas/types.yaml#/definitions/uint32'
+ description: |
+ The boot-onlined property is an optional u32 value that indicates
+ whether the cpu device should be activated at first boot. This is
+ useful in vcpu hotplug scenario to pass correct value of activated
+ cpu number.
+
+ This property has two values: 0 and 1. 1 means the cpu should be
+ activated while 0 means it shouldn't.
+
if:
# If the enable-method property contains one of those values
properties:
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index d323621d1..e0708c9ac 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -620,6 +620,28 @@ static void __init acpi_parse_and_init_cpus(void)
#define acpi_parse_and_init_cpus(...) do { } while (0)
#endif
+
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_ARM64)
+extern unsigned int boot_onlined_cpu;
+static void __init of_get_cpu_boot_onlined(struct device_node *dn)
+{
+ unsigned int boot_onlined;
+ int r;
+
+ r = of_property_read_u32(dn, "boot-onlined", &boot_onlined);
+ if (r) {
+ pr_err("%pOF: missing boot-onlined property\n", dn);
+ return;
+ }
+ /*
+ * Property boot-onlined has two values: 0 and 1.
+ * 0 means offline, and 1 means online.
+ * Here just count the number of boot_onlined_cpu.
+ */
+ boot_onlined_cpu += boot_onlined;
+}
+#endif
+
/*
* Enumerate the possible CPU set from the device tree and build the
* cpu logical map array containing MPIDR values related to logical
@@ -630,6 +652,9 @@ static void __init of_parse_and_init_cpus(void)
struct device_node *dn;
for_each_of_cpu_node(dn) {
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_ARM64)
+ of_get_cpu_boot_onlined(dn);
+#endif
u64 hwid = of_get_cpu_hwid(dn, 0);
if (hwid & ~MPIDR_HWID_BITMASK)
diff --git a/kernel/smp.c b/kernel/smp.c
index 25240fb2df94..567615b9a008 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -801,17 +801,25 @@ void __init setup_nr_cpu_ids(void)
nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
}
+/* Setup number of CPUs to activate */
+unsigned int boot_onlined_cpu = 0;
+
/* Called by boot processor to activate the rest. */
void __init smp_init(void)
{
int num_nodes, num_cpus;
+ int num_onlined_cpu = setup_max_cpus;
idle_threads_init();
cpuhp_threads_init();
pr_info("Bringing up secondary CPUs ...\n");
- bringup_nonboot_cpus(setup_max_cpus);
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_ARM64)
+ if (boot_onlined_cpu != 0)
+ num_onlined_cpu = boot_onlined_cpu;
+#endif
+ bringup_nonboot_cpus(num_onlined_cpu);
num_nodes = num_online_nodes();
num_cpus = num_online_cpus();
--
2.28.0

View File

@ -0,0 +1,173 @@
From 4ed40d8ce3793129ba9c0b7b663a5e137aceb70c Mon Sep 17 00:00:00 2001
From: Chao Wu <chaowu@linux.alibaba.com>
Date: Wed, 27 Dec 2023 14:43:47 +0800
Subject: [PATCH] upcall: add pci hotplug / hot-unplug support
add two new upcall functions add_pci_dev and del_pci_dev, mainly for hotplugging
and hot-unplugging pci device in the guest kernel through the upcall server.
Users could implement upcall client side with add_pci or del_pci command and trigger
those commands in the hypervisor side.
As always, Dragonball hypervisor will implement the client side to do pci hotplug and
hot-unplug as an example
Signed-off-by: Gerry Liu <gerry@linux.alibaba.com>
Signed-off-by: Helin Guo <helinguo@linux.alibaba.com>
Signed-off-by: Chao Wu <chaowu@linux.alibaba.com>
---
drivers/misc/dragonball/upcall_srv/Kconfig | 11 +++
.../upcall_srv/dragonball_device_manager.c | 90 +++++++++++++++++++
2 files changed, 101 insertions(+)
diff --git a/drivers/misc/dragonball/upcall_srv/Kconfig b/drivers/misc/dragonball/upcall_srv/Kconfig
index fc83f03c2edd..19a6ca957ea6 100644
--- a/drivers/misc/dragonball/upcall_srv/Kconfig
+++ b/drivers/misc/dragonball/upcall_srv/Kconfig
@@ -47,3 +47,14 @@ config DRAGONBALL_HOTPLUG_CPU
structure with command and parameter to hot-pluging an vCPU.
If unsure, say N.
+
+config DRAGONBALL_HOTPLUG_PCI
+ bool "PCI hotplug/hotunplug support"
+ depends on DRAGONBALL_DEVICE_MANAGER
+ default y
+ help
+ This configure implements a PCI hotplug/hotunplug support, vmm
+ should send hotplug request by vsock which follow special data
+ structure with command and parameter to hot-pluging a PCI device.
+
+ If unsure, say N.
diff --git a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
index 088d38623b8d..3544afefa2a9 100644
--- a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
+++ b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
@@ -22,6 +22,7 @@
#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/cpuhotplug.h>
+#include <linux/pci.h>
#include <asm/cpu.h>
#include <dragonball/upcall_srv.h>
#include <dragonball/device_manager.h>
@@ -90,6 +91,12 @@ struct devmgr_req {
uint8_t apic_ids[256];
#endif
} cpu_dev_info;
+#endif
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_PCI)
+ struct {
+ uint8_t busno;
+ uint8_t devfn;
+ } pci_dev_info;
#endif
} msg_load;
};
@@ -117,6 +124,9 @@ struct devmgr_reply {
#endif
#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU)
struct cpu_dev_reply_info cpu_dev_info;
+#endif
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_PCI)
+ struct {} pci_dev_info;
#endif
} msg_load;
};
@@ -286,6 +296,82 @@ static int del_mmio_dev(struct devmgr_req *req,
}
#endif
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_PCI)
+static int add_pci_dev(struct devmgr_req *req,
+ struct devmgr_reply *rep)
+{
+ int ret = 0;
+ struct devmgr_msg_header *rep_mh = &rep->msg_header;
+ uint8_t busno = req->msg_load.pci_dev_info.busno;
+ uint8_t devfn = req->msg_load.pci_dev_info.devfn;
+ struct pci_bus *bus;
+ struct pci_dev *dev;
+
+ pr_info("add pci device of busno: %02x, devfn: %02x\n", busno, devfn);
+
+ pci_lock_rescan_remove();
+
+ /* It is similar to pci_rescan_bus */
+
+ bus = pci_find_bus(0, busno);
+ if (!bus) {
+ pr_err("Could not find PCI bus for busno %02x\n", busno);
+ ret = -ENODEV;
+ goto out;
+ }
+
+ pci_scan_slot(bus, devfn);
+ dev = pci_get_slot(bus, devfn);
+ if (!dev) {
+ pr_err("Could not find PCI device for slot %02x\n", devfn);
+ ret = -ENODEV;
+ goto out;
+ }
+
+ pci_bus_claim_resources(bus);
+
+ pci_bus_add_devices(bus);
+
+ pci_dev_put(dev);
+
+out:
+ pci_unlock_rescan_remove();
+ if (!ret)
+ _fill_msg_header(rep_mh, 0, ADD_PCI, 0);
+ return ret;
+}
+
+static int del_pci_dev(struct devmgr_req *req,
+ struct devmgr_reply *rep)
+{
+ int ret = 0;
+ struct devmgr_msg_header *rep_mh = &rep->msg_header;
+ uint8_t busno = req->msg_load.pci_dev_info.busno;
+ uint8_t devfn = req->msg_load.pci_dev_info.devfn;
+ struct pci_dev *dev;
+
+ pr_info("remove pci device of busno: %02x, devfn: %02x\n", busno, devfn);
+
+ pci_lock_rescan_remove();
+
+ dev = pci_get_domain_bus_and_slot(0, busno, devfn);
+
+ if (!dev) {
+ pr_err("Could not find PCI device for slot %02x\n", devfn);
+ ret = -ENODEV;
+ goto out;
+ }
+
+ pci_stop_and_remove_bus_device(dev);
+
+ pci_dev_put(dev);
+out:
+ pci_unlock_rescan_remove();
+ if (!ret)
+ _fill_msg_header(rep_mh, 0, DEL_PCI, 0);
+ return ret;
+}
+#endif
#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU)
#if defined(CONFIG_X86_64)
@@ -522,6 +608,10 @@ static struct {
{ADD_CPU, add_cpu_dev},
{DEL_CPU, del_cpu_dev},
#endif
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_PCI)
+ {ADD_PCI, add_pci_dev},
+ {DEL_PCI, del_pci_dev},
+#endif
};
static action_route_t get_action(struct devmgr_req *req)
--
2.31.1