dm: update the iothread option to specify the CPU affinity

This patch updates the `iothread` option to specify the CPU affinity
of the iothread. Setting the iothread's CPU affinity could benefit the
Service VM's CPU utilization when Service VM owns limited dedicated CPUs.

It could be helpful to ensure the I/O mediator Quality of Service (QoS).
Once the performance tuning is done, the specific CPU affinity config could
pass to acrn-dm directly, letting the deployment more easily.

The format looks like below:
iothread=<num_iothread>@<cpu_affinity>
"@" is used to separate the following two settings:
 - the number of iothread instances
 - the CPU affinity settings for each iothread instance.

The format of `cpu_affinity` looks like below:
<cpu_affinity_0>/<cpu_affinity_1>/<cpu_affinity_2>/...
1. "/" is used to separate the CPU affinity setting for each iothread instance
   (sequentially).
2. char '*' can be used to skip the setting for the specific iothread instance.
3. the number of cpu_affinity_x vs. the number of iothread instances
   - If # of cpu_affinity_x is less than # of iothread instances,
     no CPU affinity settings for the last few iothread instances.
   - If # of cpu_affinity_x is more than # of iothread instances,
     the extra cpu_affinity_x are discarded.
4. ":" is used to separate different CPU cores for each CPU affinity setting.

Examples to specify the CPU affinity of the iothread:
1. iothread=3@0:1:2/0:1
   `add_virtual_device    9 virtio-blk iothread=3@0:1:2/0:1,mq=3,/dev/nvme1n1`
   a) 3 iothread instances are created.
   b) CPU affinity of iothread instances for this virtio-blk device:
      - 1st iothread instance <-> pins to Service VM CPU 0,1,2
      - 2nd iothread instance <-> pins to Service VM CPU 0,1
      - 3rd iothread instance <-> No CPU affinity settings

2. iothread=3@0/*/1
   `add_virtual_device    9 virtio-blk iothread=3@0/*/1,mq=3,/dev/nvme1n1`
   a) 3 iothread instances are created.
   b) CPU affinity of iothread instances for this virtio-blk device:
      - 1st iothread instance <-> pins to Service VM CPU 0
      - 2nd iothread instance <-> No CPU affinity settings
      - 3rd iothread instance <-> pins to Service VM CPU 1

v1 -> v2:
 * encapsulate one API in iothread.c to parse the iothread options, so that
   other BE can also use it.

v2 -> v3:
 * introduce one API iothread_free_options to free the elements that
   are allocated dynamically in iothread_parse_options().

Tracked-On: #8612

Signed-off-by: Shiqing Gao <shiqing.gao@intel.com>
Acked-by: Wang, Yu1 <yu1.wang@intel.com>
This commit is contained in:
Shiqing Gao 2023-10-31 00:45:17 +08:00 committed by acrnsi-robot
parent a90aa4fd26
commit 5306d9e7db
3 changed files with 173 additions and 37 deletions

View File

@ -14,6 +14,7 @@
#include <sys/queue.h>
#include <pthread.h>
#include <signal.h>
#include <string.h>
#include "iothread.h"
#include "log.h"
@ -63,6 +64,8 @@ io_thread(void *arg)
static int
iothread_start(struct iothread_ctx *ioctx_x)
{
int ret;
pthread_mutex_lock(&ioctx_x->mtx);
if (ioctx_x->started) {
@ -78,6 +81,14 @@ iothread_start(struct iothread_ctx *ioctx_x)
ioctx_x->started = true;
pthread_setname_np(ioctx_x->tid, ioctx_x->name);
if (CPU_COUNT(&(ioctx_x->cpuset)) != 0) {
ret = pthread_setaffinity_np(ioctx_x->tid, sizeof(cpuset_t), &(ioctx_x->cpuset));
if (ret != 0) {
pr_err("pthread_setaffinity_np fails %d \n", ret);
}
}
pthread_mutex_unlock(&ioctx_x->mtx);
pr_info("%s started\n", ioctx_x->name);
@ -165,9 +176,18 @@ iothread_deinit(void)
/*
* Create @ioctx_num iothread context instances
* Return NULL if fails. Otherwise, return the base of those iothread context instances.
*
* Notes:
* The caller of iothread_create() shall call iothread_free_options() afterwards to free the resources that
* are dynamically allocated during iothread_parse_options(), such as iothr_opt->cpusets.
*
* A general calling sequence from the virtual device owner is like:
* 1. Call iothread_parse_options() to parse the options from the user.
* 2. Call iothread_create() to create the iothread instances.
* 3. Call iothread_free_options() to free the dynamic resources.
*/
struct iothread_ctx *
iothread_create(int ioctx_num, const char *ioctx_tag)
iothread_create(struct iothreads_option *iothr_opt)
{
pthread_mutexattr_t attr;
int i, ret, base, end;
@ -175,9 +195,14 @@ iothread_create(int ioctx_num, const char *ioctx_tag)
struct iothread_ctx *ioctx_base = NULL;
ret = 0;
if (iothr_opt == NULL) {
pr_err("%s: iothr_opt is NULL \n", __func__);
return ioctx_base;
}
pthread_mutex_lock(&ioctxes_mutex);
base = ioctx_active_cnt;
end = base + ioctx_num;
end = base + iothr_opt->num;
if (end > IOTHREAD_NUM) {
ret = -1;
@ -197,8 +222,13 @@ iothread_create(int ioctx_num, const char *ioctx_tag)
ioctx_x->started = false;
ioctx_x->epfd = epoll_create1(0);
CPU_ZERO(&(ioctx_x->cpuset));
if (iothr_opt->cpusets != NULL) {
memcpy(&(ioctx_x->cpuset), iothr_opt->cpusets + (i - base), sizeof(cpu_set_t));
}
if (snprintf(ioctx_x->name, PTHREAD_NAME_MAX_LEN,
"iothr-%d-%s", ioctx_x->idx, ioctx_tag) >= PTHREAD_NAME_MAX_LEN) {
"iothr-%d-%s", ioctx_x->idx, iothr_opt->tag) >= PTHREAD_NAME_MAX_LEN) {
pr_err("%s: iothread name too long \n", __func__);
}
@ -218,3 +248,115 @@ iothread_create(int ioctx_num, const char *ioctx_tag)
return ioctx_base;
}
/*
* Parse the iothread options from @str and fill the options in @iothr_opt if successes.
* Return -1 if fails to parse. Otherwise, return 0.
*/
int
iothread_parse_options(char *str, struct iothreads_option *iothr_opt)
{
char *tmp_num = NULL;
char *tmp_cpusets = NULL;
char *tmp_cpux = NULL;
int service_vm_cpuid, iothread_sub_idx, num;
cpu_set_t *cpuset_list = NULL;
/*
* Create one iothread instance if DM parameters contain 'iothread', but the number is not specified.
*/
num = 1;
/*
* Valid 'iothread' setting examples:
* - create 1 iothread instance for virtio-blk
* ... virtio-blk iothread,...
*
* - create 1 iothread instance for virtio-blk
* ... virtio-blk iothread=1,...
*
* - create 3 iothread instances for virtio-blk
* ... virtio-blk iothread=3,...
*
* - create 3 iothread instances for virtio-blk with CPU affinity settings
* ... virtio-blk iothread=3@0:1:2/0:1,...
* CPU affinity of iothread instances for this virtio-blk device:
* - 1st iothread instance <-> Service VM CPU 0,1,2
* - 2nd iothread instance <-> Service VM CPU 0,1
* - 3rd iothread instance <-> No CPU affinity settings
*
*/
if (str != NULL) {
/*
* "@" is used to separate the following two settings:
* - the number of iothread instances
* - the CPU affinity settings for each iothread instance.
*/
tmp_num = strsep(&str, "@");
if (tmp_num != NULL) {
if (dm_strtoi(tmp_num, &tmp_num, 10, &num) || (num <= 0)) {
pr_err("%s: invalid iothread number %s \n", __func__, tmp_num);
return -1;
}
cpuset_list = calloc(num, sizeof(cpu_set_t));
if (cpuset_list == NULL) {
pr_err("%s: calloc cpuset_list returns NULL \n", __func__);
return -1;
}
iothread_sub_idx = 0;
while ((str != NULL) && (*str !='\0') && (iothread_sub_idx < num)) {
/* "/" is used to separate the CPU affinity setting for each iothread instance. */
tmp_cpusets = strsep(&str, "/");
CPU_ZERO(cpuset_list + iothread_sub_idx);
while ((tmp_cpusets != NULL) && (*tmp_cpusets !='\0')) {
/* ":" is used to separate different CPU cores. */
tmp_cpux = strsep(&tmp_cpusets, ":");
/*
* char '*' can be used to skip the setting for the
* specific iothread instance.
*/
if (*tmp_cpux == '*') {
break;
}
if (dm_strtoi(tmp_cpux, &tmp_cpux, 10, &service_vm_cpuid) ||
(service_vm_cpuid < 0)) {
pr_err("%s: invalid CPU affinity setting %s \n",
__func__, tmp_cpux);
free(cpuset_list);
return -1;
}
CPU_SET(service_vm_cpuid, cpuset_list + iothread_sub_idx);
pr_err("%s: iothread[%d]: set service_vm_cpuid %d \n",
__func__, iothread_sub_idx, service_vm_cpuid);
}
iothread_sub_idx++;
}
}
}
iothr_opt->num = num;
iothr_opt->cpusets = cpuset_list;
return 0;
}
/*
* This interface is used to free the elements that are allocated dynamically in iothread_parse_options(),
* such as iothr_opt->cpusets.
*/
void iothread_free_options(struct iothreads_option *iothr_opt)
{
if ((iothr_opt != NULL) && (iothr_opt->cpusets != NULL)) {
free(iothr_opt->cpusets);
iothr_opt->cpusets = NULL;
}
return;
}

View File

@ -469,7 +469,6 @@ virtio_blk_init(struct vmctx *ctx, struct pci_vdev *dev, char *opts)
{
bool dummy_bctxt;
char bident[16];
char ioctx_tag[16];
struct blockif_ctxt *bctxt;
char *opts_tmp = NULL;
char *opts_start = NULL;
@ -479,10 +478,13 @@ virtio_blk_init(struct vmctx *ctx, struct pci_vdev *dev, char *opts)
bool use_iothread;
struct iothread_ctx *ioctx_base = NULL;
struct iothreads_info iothrds_info;
int num_vqs, num_iothread;
int num_vqs;
int i, j;
pthread_mutexattr_t attr;
int rc;
struct iothreads_option iot_opt;
memset(&iot_opt, 0, sizeof(iot_opt));
bctxt = NULL;
/* Assume the bctxt is valid, until identified otherwise */
@ -490,11 +492,6 @@ virtio_blk_init(struct vmctx *ctx, struct pci_vdev *dev, char *opts)
use_iothread = false;
num_vqs = 1;
/*
* Create one iothread instance if DM parameters contain 'iothread', but the number is not specified.
*/
num_iothread = 1;
if (opts == NULL) {
pr_err("virtio_blk: backing device required\n");
return -1;
@ -527,29 +524,16 @@ virtio_blk_init(struct vmctx *ctx, struct pci_vdev *dev, char *opts)
char *p = opts_start;
while (opts_tmp != NULL) {
opt = strsep(&opts_tmp, ",");
/*
* Valid 'iothread' setting examples:
* - create 1 iothread instance for virtio-blk
* ... virtio-blk iothread,...
*
* - create 1 iothread instance for virtio-blk
* ... virtio-blk iothread=1,...
*
* - create 3 iothread instances for virtio-blk
* ... virtio-blk iothread=3,...
*/
if (!strncmp(opt, "iothread", strlen("iothread"))) {
use_iothread = true;
strsep(&opt, "=");
if (opt != NULL) {
if (dm_strtoi(opt, &opt, 10, &num_iothread) ||
(num_iothread <= 0)) {
WPRINTF(("%s: incorrect iothread number %s\n",
__func__, opt));
free(opts_start);
return -1;
}
if (iothread_parse_options(opt, &iot_opt) < 0) {
free(opts_start);
return -1;
}
p = opts_tmp;
} else if (!strncmp(opt, "mq", strlen("mq"))) {
strsep(&opt, "=");
@ -582,22 +566,23 @@ virtio_blk_init(struct vmctx *ctx, struct pci_vdev *dev, char *opts)
* - One or more vqs can be handled in one iothread.
* - The mapping between virtqueues and iothreads is based on round robin.
*/
if (num_iothread > num_vqs) {
num_iothread = num_vqs;
if (iot_opt.num > num_vqs) {
iot_opt.num = num_vqs;
}
if (snprintf(ioctx_tag, sizeof(ioctx_tag), "blk%s", bident) >= sizeof(ioctx_tag)) {
if (snprintf(iot_opt.tag, sizeof(iot_opt.tag), "blk%s", bident) >= sizeof(iot_opt.tag)) {
pr_err("%s: virtio-blk ioctx_tag too long \n", __func__);
}
ioctx_base = iothread_create(num_iothread, ioctx_tag);
ioctx_base = iothread_create(&iot_opt);
iothread_free_options(&iot_opt);
if (ioctx_base == NULL) {
pr_err("%s: Fails to create iothread context instance \n", __func__);
return -1;
}
}
iothrds_info.ioctx_base = ioctx_base;
iothrds_info.num = num_iothread;
iothrds_info.num = iot_opt.num;
bctxt = blockif_open(p, bident, num_vqs, &iothrds_info);
if (bctxt == NULL) {
@ -619,7 +604,7 @@ virtio_blk_init(struct vmctx *ctx, struct pci_vdev *dev, char *opts)
}
blk->iothrds_info.ioctx_base = ioctx_base;
blk->iothrds_info.num = num_iothread;
blk->iothrds_info.num = iot_opt.num;
blk->bc = bctxt;
/* Update virtio-blk device struct of dummy ctxt*/
@ -677,7 +662,7 @@ virtio_blk_init(struct vmctx *ctx, struct pci_vdev *dev, char *opts)
blk->vqs[j].qsize = VIRTIO_BLK_RINGSZ;
blk->vqs[j].notify = virtio_blk_notify;
if (use_iothread) {
blk->vqs[j].viothrd.ioctx = ioctx_base + j % num_iothread;
blk->vqs[j].viothrd.ioctx = ioctx_base + j % (iot_opt.num);
}
}

View File

@ -29,9 +29,16 @@ struct iothread_ctx {
bool started;
pthread_mutex_t mtx;
int idx;
cpu_set_t cpuset;
char name[PTHREAD_NAME_MAX_LEN];
};
struct iothreads_option {
char tag[PTHREAD_NAME_MAX_LEN];
int num;
cpu_set_t *cpusets;
};
struct iothreads_info {
struct iothread_ctx *ioctx_base;
int num;
@ -40,6 +47,8 @@ struct iothreads_info {
int iothread_add(struct iothread_ctx *ioctx_x, int fd, struct iothread_mevent *aevt);
int iothread_del(struct iothread_ctx *ioctx_x, int fd);
void iothread_deinit(void);
struct iothread_ctx *iothread_create(int ioctx_num, const char *ioctx_tag);
struct iothread_ctx *iothread_create(struct iothreads_option *iothr_opt);
int iothread_parse_options(char *str, struct iothreads_option *iothr_opt);
void iothread_free_options(struct iothreads_option *iothr_opt);
#endif