acrn-hypervisor/devicemodel/core/monitor.c
Vijay Dhanraj 71b56e0eed DM: Virtio-Blk Rescan
This patch adds support to trigger rescan of virtio-blk device by the
guest VM. This is an alternate to hot-plugging virtio-blk device.
This feature stems from the kata requirement, which hot-plugs container
rootfs after the VM is launched.

As part of virtio-blk rescan,
1. Update the backing file for the virtio-blk device with valid file.
   Basically update the empty file (with dummy bctxt) that was passed
   during VM launch.
2. Update virtio-blk device configurations for udpated backing file.
3. Update size associated with valid backing file in the config space.
4. Notify guest OS, of the new config change.
5. On this notification, guest will do the following.
	(i). Update virtio-blk capacity.
 	(ii). Revalidate the disk.
 	(iii). Identify the newly plugged block device.

v5 -> v6:
- Removed use of dummy file and added a new parameter "nodisk"
  to virtio-blk which indicates user wants to create a virtio-blk
  device with dummy backend.
- Moved vm_monitor_rescan from pci core to virtio-blk as it currently
  applies to only virtio-blk.

v4 -> v5:
- Reverted back logic, so that blkrescan is only supported when
  VM is launched with empty backend file.

v3 -> v4:
- Close block context before allocating a new one
- Allow backend filepath  with additional options to be more generic
- Remove blank lines introduced as part of previous patches.

v2 -> v3:
- Renamed vdev ops vdev_blk rescan to vdev_rescan
- Renamed montior ops virtio_blkrescan_ops to virtio_rescan_ops
- Consolidated virtio-blk configuration specific part into
  a separate function
- Removed size requirement in acrnctl command.

v1 -> v2:
- Added more comments in the code.
- Renamed APIs from displug to blkrescan, inline with acrnctl cmd.
- Split the patch into two. This corresponds to changes in acrn-dm.

Tracked-On: #3051
Signed-off-by: Vijay Dhanraj <vijay.dhanraj@intel.com>
Acked-by: Yu Wang <yu1.wang@intel.com>
2019-05-07 09:08:50 +08:00

492 lines
12 KiB
C

/*
* Project Acrn
* Acrn-dm-monitor
*
* Copyright (C) 2018 Intel Corporation. All rights reserved.
*
* SPDX-License-Identifier: BSD-3-Clause
*
*
* Author: TaoYuhong <yuhong.tao@intel.com>
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/queue.h>
#include <unistd.h>
#include <pthread.h>
#include "dm.h"
#include "dm_string.h"
#include "monitor.h"
#include "acrn_mngr.h"
#include "pm.h"
#include "vmmapi.h"
#include "log.h"
#define INTR_STORM_MONITOR_PERIOD 10 /* 10 seconds */
#define INTR_STORM_THRESHOLD 100000 /* 10K times per second */
#define DELAY_INTR_TIME 1 /* 1ms */
#define DELAY_DURATION 100000 /* 100ms of total duration for delay intr */
#define TIME_TO_CHECK_AGAIN 2 /* 2seconds */
struct intr_monitor_setting_t {
bool enable;
uint32_t threshold; /* intr count in probe_period when intr storm happens */
uint32_t probe_period; /* seconds: the period to probe intr data */
uint32_t delay_time; /* ms: the time to delay each intr injection */
uint32_t delay_duration; /* us: the delay duration, after it, intr injection restore to normal */
};
union intr_monitor_t {
struct acrn_intr_monitor monitor;
char reserved[4096];
} __aligned(4096);
static union intr_monitor_t intr_data;
static uint64_t intr_cnt_buf[MAX_PTDEV_NUM * 2];
static pthread_t intr_storm_monitor_pid;
static struct intr_monitor_setting_t intr_monitor_setting = {
.enable = false,
};
/* switch macro, just open in debug */
/* #define INTR_MONITOR_DBG */
#ifdef INTR_MONITOR_DBG
static FILE * dbg_file;
#define DPRINTF(format, args...) \
do { fprintf(dbg_file, format, args); fflush(dbg_file); } while (0)
/* this is a debug function */
static void write_intr_data_to_file(const struct acrn_intr_monitor *hdr)
{
static int wr_cnt;
int j;
wr_cnt++;
fprintf(dbg_file, "\n==%d time devs=%d==\n", wr_cnt, hdr->buf_cnt / 2);
fprintf(dbg_file, "IRQ\t\tCount\n");
for (j = 0; j < hdr->buf_cnt; j += 2) {
if (hdr->buffer[j + 1] != 0) {
fprintf(dbg_file, "%ld\t\t%ld\n", hdr->buffer[j], hdr->buffer[j + 1]);
}
}
fflush(dbg_file);
}
#else
#define DPRINTF(format, arg...)
#endif
static void *intr_storm_monitor_thread(void *arg)
{
struct vmctx *ctx = (struct vmctx *)arg;
struct acrn_intr_monitor *hdr = &intr_data.monitor;
uint64_t delta = 0UL;
int ret, i;
#ifdef INTR_MONITOR_DBG
dbg_file = fopen("/tmp/intr_log", "w+");
#endif
sleep(intr_monitor_setting.probe_period);
/* first to get interrupt data */
hdr->cmd = INTR_CMD_GET_DATA;
hdr->buf_cnt = MAX_PTDEV_NUM * 2;
memset(hdr->buffer, 0, sizeof(uint64_t) * hdr->buf_cnt);
ret = vm_intr_monitor(ctx, hdr);
if (ret) {
DPRINTF("first get intr data failed, ret: %d\n", ret);
intr_storm_monitor_pid = 0;
return NULL;
}
while (1) {
#ifdef INTR_MONITOR_DBG
write_intr_data_to_file(hdr);
#endif
memcpy(intr_cnt_buf, hdr->buffer, sizeof(uint64_t) * hdr->buf_cnt);
sleep(intr_monitor_setting.probe_period);
/* next time to get interrupt data */
memset(hdr->buffer, 0, sizeof(uint64_t) * hdr->buf_cnt);
ret = vm_intr_monitor(ctx, hdr);
if (ret) {
pr_err("next get intr data failed, ret: %d\n", ret);
intr_storm_monitor_pid = 0;
break;
}
/*
* calc the delta of the two times count of interrupt;
* compare the IRQ num first, if not same just drop it,
* for it just happens rarelly when devices dynamically
* allocation in SOS or UOS, it can be calc next time
*/
for (i = 0; i < hdr->buf_cnt; i += 2) {
if (hdr->buffer[i] != intr_cnt_buf[i])
continue;
/* avoid delta overflow */
if (hdr->buffer[i + 1] < intr_cnt_buf[i + 1])
continue;
delta = hdr->buffer[i + 1] - intr_cnt_buf[i + 1];
if (delta > intr_monitor_setting.threshold) {
#ifdef INTR_MONITOR_DBG
write_intr_data_to_file(hdr);
#endif
break;
}
}
/* storm detected, handle the intr abnormal status */
if (i < hdr->buf_cnt) {
pr_notice("irq=%ld, delta=%ld\n", intr_cnt_buf[i], delta);
hdr->cmd = INTR_CMD_DELAY_INT;
hdr->buffer[0] = intr_monitor_setting.delay_time;
vm_intr_monitor(ctx, hdr);
usleep(intr_monitor_setting.delay_duration); /* sleep-delay intr */
hdr->buffer[0] = 0; /* cancel to delay intr */
vm_intr_monitor(ctx, hdr);
sleep(TIME_TO_CHECK_AGAIN); /* time to get data again */
hdr->cmd = INTR_CMD_GET_DATA;
hdr->buf_cnt = MAX_PTDEV_NUM * 2;
memset(hdr->buffer, 0, sizeof(uint64_t) * hdr->buf_cnt);
vm_intr_monitor(ctx, hdr);
}
}
return NULL;
}
static void start_intr_storm_monitor(struct vmctx *ctx)
{
if (intr_monitor_setting.enable) {
int ret = pthread_create(&intr_storm_monitor_pid, NULL, intr_storm_monitor_thread, ctx);
if (ret) {
pr_err("failed %s %d\n", __func__, __LINE__);
intr_storm_monitor_pid = 0;
}
pthread_setname_np(intr_storm_monitor_pid, "storm_monitor");
pr_info("start monitor interrupt data...\n");
}
}
static void stop_intr_storm_monitor(void)
{
if (intr_storm_monitor_pid) {
void *ret;
pthread_cancel(intr_storm_monitor_pid);
pthread_join(intr_storm_monitor_pid, &ret);
intr_storm_monitor_pid = 0;
}
}
/*
.* interrupt monitor setting params, current interrupt mitigation will delay UOS's
.* pass-through devices' interrupt injection, the settings input from acrn-dm:
.* params:
.* threshold: each intr count/second when intr storm happens;
.* probe_period: seconds -- the period to probe intr data;
.* delay_time: ms -- the time to delay each intr injection;
* delay_duration; us -- the delay duration, after it, intr injection restore to normal
.*/
int acrn_parse_intr_monitor(const char *opt)
{
uint32_t threshold, period, delay, duration;
char *cp;
if((!dm_strtoui(opt, &cp, 10, &threshold) && *cp == ',') &&
(!dm_strtoui(cp + 1, &cp, 10, &period) && *cp == ',') &&
(!dm_strtoui(cp + 1, &cp, 10, &delay) && *cp == ',') &&
(!dm_strtoui(cp + 1, &cp, 10, &duration))) {
pr_info("interrupt storm monitor params: %d, %d, %d, %d\n", threshold, period, delay, duration);
} else {
pr_err("%s: not correct, it should be like: --intr_monitor 10000,10,1,100, please check!\n", opt);
return -1;
}
intr_monitor_setting.enable = true;
intr_monitor_setting.threshold = threshold * period;
intr_monitor_setting.probe_period = period;
intr_monitor_setting.delay_time = delay;
intr_monitor_setting.delay_duration = duration * 1000;
return 0;
}
struct vm_ops {
char name[16];
void *arg;
struct monitor_vm_ops *ops;
LIST_ENTRY(vm_ops) list;
};
static unsigned wakeup_reason = 0;
unsigned get_wakeup_reason(void)
{
return wakeup_reason;
}
int set_wakeup_timer(time_t t)
{
int acrnd_fd;
struct mngr_msg req;
struct mngr_msg ack;
int ret;
acrnd_fd = mngr_open_un("acrnd", MNGR_CLIENT);
if (acrnd_fd < 0) {
return -1;
}
req.magic = MNGR_MSG_MAGIC;
req.msgid = ACRND_TIMER;
req.timestamp = time(NULL);
req.data.rtc_timer.t = t;
strncpy(req.data.rtc_timer.vmname, vmname,
sizeof(req.data.rtc_timer.vmname));
memset(&ack, 0, sizeof(struct mngr_msg));
ret = mngr_send_msg(acrnd_fd, &req, &ack, 2);
mngr_close(acrnd_fd);
if (ret != sizeof(ack)) {
pr_err("%s %d\r\n", __func__, __LINE__);
return -1;
}
return ack.data.err;
}
static LIST_HEAD(vm_ops_list, vm_ops) vm_ops_head;
static pthread_mutex_t vm_ops_mtx = PTHREAD_MUTEX_INITIALIZER;
int monitor_register_vm_ops(struct monitor_vm_ops *mops, void *arg,
const char *name)
{
struct vm_ops *ops;
if (!mops) {
pr_err("%s %d\r\n", __func__, __LINE__);
return -1;
}
ops = calloc(1, sizeof(*ops));
if (!ops) {
pr_err("Alloc ops");
return -1;
}
if (name)
strncpy(ops->name, name, sizeof(ops->name) - 1);
ops->ops = mops;
ops->arg = arg;
pthread_mutex_lock(&vm_ops_mtx);
LIST_INSERT_HEAD(&vm_ops_head, ops, list);
pthread_mutex_unlock(&vm_ops_mtx);
return 0;
}
static int monitor_fd = -1;
/* handlers */
#define ACK_TIMEOUT 1
#define DEFINE_HANDLER(name, func) \
static void name(struct mngr_msg *msg, int client_fd, void *param) \
{ \
struct mngr_msg ack; \
struct vm_ops *ops; \
\
int ret = 0; \
int count = 0; \
\
ack.magic = MNGR_MSG_MAGIC; \
ack.msgid = msg->msgid; \
ack.timestamp = msg->timestamp; \
\
LIST_FOREACH(ops, &vm_ops_head, list) { \
if (ops->ops->func) { \
ret += ops->ops->func(ops->arg); \
count++; \
} \
} \
\
if (!count) { \
ack.data.err = -1; \
pr_err("No handler for id:%u\r\n", msg->msgid); \
} else \
ack.data.err = ret; \
\
mngr_send_msg(client_fd, &ack, NULL, ACK_TIMEOUT); \
}
DEFINE_HANDLER(handle_stop, stop);
DEFINE_HANDLER(handle_suspend, suspend);
DEFINE_HANDLER(handle_pause, pause);
DEFINE_HANDLER(handle_continue, unpause);
static void handle_resume(struct mngr_msg *msg, int client_fd, void *param)
{
struct mngr_msg ack;
struct vm_ops *ops;
int ret = 0;
int count = 0;
ack.magic = MNGR_MSG_MAGIC;
ack.msgid = msg->msgid;
ack.timestamp = msg->timestamp;
wakeup_reason = msg->data.reason;
LIST_FOREACH(ops, &vm_ops_head, list) {
if (ops->ops->resume) {
ret += ops->ops->resume(ops->arg);
count++;
}
}
if (!count) {
ack.data.err = -1;
pr_err("No handler for id:%u\r\n", msg->msgid);
} else
ack.data.err = ret;
mngr_send_msg(client_fd, &ack, NULL, ACK_TIMEOUT);
}
static void handle_query(struct mngr_msg *msg, int client_fd, void *param)
{
struct mngr_msg ack;
struct vm_ops *ops;
ack.magic = MNGR_MSG_MAGIC;
ack.msgid = msg->msgid;
ack.timestamp = msg->timestamp;
ack.data.state = -1;
LIST_FOREACH(ops, &vm_ops_head, list) {
if (ops->ops->query) {
ack.data.state = ops->ops->query(ops->arg);
break;
}
}
mngr_send_msg(client_fd, &ack, NULL, ACK_TIMEOUT);
}
static void handle_blkrescan(struct mngr_msg *msg, int client_fd, void *param)
{
struct mngr_msg ack;
struct vm_ops *ops;
int ret = 0;
int count = 0;
ack.magic = MNGR_MSG_MAGIC;
ack.msgid = msg->msgid;
ack.timestamp = msg->timestamp;
wakeup_reason = msg->data.reason;
LIST_FOREACH(ops, &vm_ops_head, list) {
if (ops->ops->rescan) {
ret += ops->ops->rescan(ops->arg, msg->data.devargs);
count++;
}
}
if (!count) {
ack.data.err = -1;
fprintf(stderr, "No handler for id:%u\r\n", msg->msgid);
} else
ack.data.err = ret;
mngr_send_msg(client_fd, &ack, NULL, ACK_TIMEOUT);
}
static struct monitor_vm_ops pmc_ops = {
.stop = NULL,
.resume = vm_monitor_resume,
.suspend = NULL,
.pause = NULL,
.unpause = NULL,
.query = vm_monitor_query,
};
int monitor_init(struct vmctx *ctx)
{
int ret;
char path[128] = {};
ret = check_dir(ACRN_DM_BASE_PATH, CHK_CREAT);
if (ret) {
pr_err("%s %d\r\n", __func__, __LINE__);
goto dir_err;
}
ret = check_dir(ACRN_DM_SOCK_PATH, CHK_CREAT);
if (ret) {
pr_err("%s %d\r\n", __func__, __LINE__);
goto dir_err;
}
snprintf(path, sizeof(path) - 1, "%s.monitor", vmname);
monitor_fd = mngr_open_un(path, MNGR_SERVER);
if (monitor_fd < 0) {
pr_err("%s %d\r\n", __func__, __LINE__);
goto server_err;
}
ret = 0;
ret += mngr_add_handler(monitor_fd, DM_STOP, handle_stop, NULL);
ret += mngr_add_handler(monitor_fd, DM_SUSPEND, handle_suspend, NULL);
ret += mngr_add_handler(monitor_fd, DM_RESUME, handle_resume, NULL);
ret += mngr_add_handler(monitor_fd, DM_PAUSE, handle_pause, NULL);
ret += mngr_add_handler(monitor_fd, DM_CONTINUE, handle_continue, NULL);
ret += mngr_add_handler(monitor_fd, DM_QUERY, handle_query, NULL);
ret += mngr_add_handler(monitor_fd, DM_BLKRESCAN, handle_blkrescan, NULL);
if (ret) {
pr_err("%s %d\r\n", __func__, __LINE__);
goto handlers_err;
}
monitor_register_vm_ops(&pmc_ops, ctx, "PMC_VM_OPs");
start_intr_storm_monitor(ctx);
return 0;
handlers_err:
mngr_close(monitor_fd);
monitor_fd = -1;
server_err:
dir_err:
return -1;
}
void monitor_close(void)
{
if (monitor_fd >= 0)
mngr_close(monitor_fd);
stop_intr_storm_monitor();
}