diff --git a/hypervisor/common/hv_main.c b/hypervisor/common/hv_main.c
index 1fab9b8c1..8ff5aaea9 100644
--- a/hypervisor/common/hv_main.c
+++ b/hypervisor/common/hv_main.c
@@ -17,6 +17,9 @@
 
 void vcpu_thread(struct thread_object *obj)
 {
+#ifdef HV_DEBUG
+	uint64_t vmexit_begin = 0UL, vmexit_end = 0UL;
+#endif
 	struct acrn_vcpu *vcpu = container_of(obj, struct acrn_vcpu, thread_obj);
 	uint32_t basic_exit_reason = 0U;
 	int32_t ret = 0;
@@ -43,6 +46,35 @@ void vcpu_thread(struct thread_object *obj)
 		reset_event(&vcpu->events[VCPU_EVENT_VIRTUAL_INTERRUPT]);
 		profiling_vmenter_handler(vcpu);
 
+#ifdef HV_DEBUG
+		vmexit_end = rdtsc();
+		if (vmexit_begin != 0UL) {
+			uint64_t delta = vmexit_end - vmexit_begin;
+			uint32_t us = (uint32_t)ticks_to_us(delta);
+			uint16_t fls = (uint16_t)(fls32(us) + 1); /* to avoid us = 0 case, then fls=0xFFFF */
+			uint16_t index = 0;
+
+			if (fls >= MAX_VMEXIT_LEVEL) {
+				index = MAX_VMEXIT_LEVEL - 1;
+			} else if (fls > 0) { //if fls == 0, it means the us = 0
+				index = fls - 1;
+			}
+
+			get_cpu_var(vmexit_cnt)[basic_exit_reason][index]++;
+			get_cpu_var(vmexit_time)[basic_exit_reason][0] += delta;
+
+			vcpu->vmexit_cnt[basic_exit_reason][index]++;
+			vcpu->vmexit_time[basic_exit_reason][0] += delta;
+
+			if (us > get_cpu_var(vmexit_time)[basic_exit_reason][1]) {
+				get_cpu_var(vmexit_time)[basic_exit_reason][1] = us;
+			}
+
+			if (us > vcpu->vmexit_time[basic_exit_reason][1]) {
+				vcpu->vmexit_time[basic_exit_reason][1] = us;
+			}
+		}
+#endif
 		TRACE_2L(TRACE_VM_ENTER, 0UL, 0UL);
 		ret = run_vcpu(vcpu);
 		if (ret != 0) {
@@ -54,6 +86,11 @@ void vcpu_thread(struct thread_object *obj)
 		basic_exit_reason = vcpu->arch.exit_reason & 0xFFFFU;
 		TRACE_2L(TRACE_VM_EXIT, basic_exit_reason, vcpu_get_rip(vcpu));
 
+#ifdef HV_DEBUG
+		vmexit_begin = rdtsc();
+		get_cpu_var(vmexit_cnt)[basic_exit_reason][TOTAL_ARRAY_LEVEL - 1]++;
+		vcpu->vmexit_cnt[basic_exit_reason][TOTAL_ARRAY_LEVEL - 1]++;
+#endif
 		vcpu->arch.nrexits++;
 
 		profiling_pre_vmexit_handler(vcpu);
diff --git a/hypervisor/common/schedule.c b/hypervisor/common/schedule.c
index 74c75338b..6ac409349 100644
--- a/hypervisor/common/schedule.c
+++ b/hypervisor/common/schedule.c
@@ -163,6 +163,10 @@ void schedule(void)
 	struct thread_object *prev = ctl->curr_obj;
 	uint64_t rflag;
 
+#ifdef HV_DEBUG
+	get_cpu_var(resched_times)++;
+#endif
+
 	obtain_schedule_lock(pcpu_id, &rflag);
 	if (ctl->scheduler->pick_next != NULL) {
 		next = ctl->scheduler->pick_next(ctl);
diff --git a/hypervisor/debug/shell.c b/hypervisor/debug/shell.c
index f441343ee..0ea36d89f 100644
--- a/hypervisor/debug/shell.c
+++ b/hypervisor/debug/shell.c
@@ -27,7 +27,7 @@
 #define MAX_STR_SIZE		256U
 #define SHELL_PROMPT_STR	"ACRN:\\>"
 
-#define SHELL_LOG_BUF_SIZE		(PAGE_SIZE * MAX_PCPU_NUM / 2U)
+#define SHELL_LOG_BUF_SIZE		(PAGE_SIZE * MAX_PCPU_NUM)
 static char shell_log_buf[SHELL_LOG_BUF_SIZE];
 
 /* Input Line Other - Switch to the "other" input line (there are only two
@@ -52,6 +52,7 @@ static int32_t shell_cpuid(int32_t argc, char **argv);
 static int32_t shell_reboot(int32_t argc, char **argv);
 static int32_t shell_rdmsr(int32_t argc, char **argv);
 static int32_t shell_wrmsr(int32_t argc, char **argv);
+static int32_t shell_show_vmexit_profile(__unused int argc, __unused char **argv);
 
 static struct shell_cmd shell_cmds[] = {
 	{
@@ -156,6 +157,12 @@ static struct shell_cmd shell_cmds[] = {
 		.help_str	= SHELL_CMD_WRMSR_HELP,
 		.fcn		= shell_wrmsr,
 	},
+	{
+		.str		= SHELL_CMD_VMEXIT,
+		.cmd_param	= SHELL_CMD_VMEXIT_PARAM,
+		.help_str	= SHELL_CMD_VMEXIT_HELP,
+		.fcn		= shell_show_vmexit_profile,
+	},
 };
 
 /* The initial log level*/
@@ -1452,3 +1459,477 @@ static int32_t shell_wrmsr(int32_t argc, char **argv)
 
 	return ret;
 }
+
+static void get_vmexit_profile_per_pcpu(char *str_arg, size_t str_max)
+{
+	char *str = str_arg;
+	uint16_t cpu, i;
+	size_t len, size = str_max;
+	uint16_t pcpu_num = get_pcpu_nums();
+
+	len = snprintf(str, size, "\r\nNow(us) = %16lld\r\n", ticks_to_us(rdtsc()));
+	if (len >= size) {
+		goto overflow;
+	}
+	size -= len;
+	str += len;
+
+	len = snprintf(str, size, "\r\nREASON");
+	if (len >= size) {
+		goto overflow;
+	}
+	size -= len;
+	str += len;
+
+	for (cpu = 0U; cpu < pcpu_num; cpu++) {
+		len = snprintf(str, size, "\t      CPU%hu\t        US", cpu);
+		if (len >= size) {
+			goto overflow;
+		}
+		size -= len;
+		str += len;
+	}
+
+	for (i = 0U; i < 64U; i++) {
+
+		/* to ignore the 0 count vmexit */
+		for (cpu = 0U; cpu < pcpu_num; cpu++) {
+			if (per_cpu(vmexit_cnt, cpu)[i][TOTAL_ARRAY_LEVEL - 1] != 0)
+				break;
+		}
+
+		if (cpu == pcpu_num)
+			continue;
+
+		len = snprintf(str, size, "\r\n0x%02x", i);
+		if (len >= size) {
+			goto overflow;
+		}
+		size -= len;
+		str += len;
+
+		for (cpu = 0U; cpu < pcpu_num; cpu++) {
+			len = snprintf(str, size, "\t%10lld\t%10lld",
+				per_cpu(vmexit_cnt, cpu)[i][TOTAL_ARRAY_LEVEL - 1],
+				ticks_to_us(per_cpu(vmexit_time, cpu)[i][0]));
+			if (len >= size) {
+				goto overflow;
+			}
+
+			size -= len;
+			str += len;
+		}
+	}
+
+	for (cpu = 0U; cpu < pcpu_num; cpu++) {
+		len = snprintf(str, size, "\r\ncpu%d thread resched: %ld", cpu, per_cpu(resched_times, cpu));
+
+		if (len >= size) {
+			goto overflow;
+		}
+		size -= len;
+		str += len;
+	}
+
+	snprintf(str, size, "\r\n");
+	return;
+
+overflow:
+	printf("buffer size could not be enough! please check!\n");
+}
+
+static void get_vmexit_profile_per_vcpu(char *str_arg, size_t str_max)
+{
+	char *str = str_arg;
+	uint16_t vm_id, i, vcpu_id;
+	size_t len, size = str_max;
+	struct acrn_vm *vm;
+	struct acrn_vcpu *vcpu;
+	bool has_vmexit = false;
+
+	len = snprintf(str, size, "\r\nNow(us) = %16lld; total vmexit per vm/vCPU (count & time):\r\n",
+		ticks_to_us(rdtsc()));
+	if (len >= size) {
+		goto overflow;
+	}
+	size -= len;
+	str += len;
+
+	len = snprintf(str, size, "\r\nREASON");
+	if (len >= size) {
+		goto overflow;
+	}
+	size -= len;
+	str += len;
+
+	for (vm_id = 0U; vm_id < CONFIG_MAX_VM_NUM; vm_id++) {
+		vm = get_vm_from_vmid(vm_id);
+		if (is_poweroff_vm(vm))
+			continue;
+
+		foreach_vcpu(vcpu_id, vm, vcpu) {
+			len = snprintf(str, size, "\t VM%u/vCPU%u\t        US", vm_id, vcpu_id);
+			if (len >= size) {
+				goto overflow;
+			}
+			size -= len;
+			str += len;
+		}
+	}
+
+	for (i = 0U; i < 64U; i++) {
+
+		for (vm_id = 0U; vm_id < CONFIG_MAX_VM_NUM; vm_id++) {
+			vm = get_vm_from_vmid(vm_id);
+
+			if (is_poweroff_vm(vm))
+				continue;
+
+			has_vmexit = false;
+			foreach_vcpu(vcpu_id, vm, vcpu) {
+				if (vcpu->vmexit_cnt[i][TOTAL_ARRAY_LEVEL - 1] != 0) {
+					has_vmexit = true;
+					break;
+				}
+			}
+
+			if (has_vmexit)
+				break;
+		}
+
+		if (vm_id == CONFIG_MAX_VM_NUM)
+			continue;
+
+		len = snprintf(str, size, "\r\n0x%02x", i);
+		if (len >= size) {
+			goto overflow;
+		}
+		size -= len;
+		str += len;
+
+		for (vm_id = 0U; vm_id < CONFIG_MAX_VM_NUM; vm_id++) {
+			vm = get_vm_from_vmid(vm_id);
+			if (is_poweroff_vm(vm))
+				continue;
+
+			foreach_vcpu(vcpu_id, vm, vcpu) {
+				len = snprintf(str, size, "\t%10lld\t%10lld",
+					vcpu->vmexit_cnt[i][TOTAL_ARRAY_LEVEL - 1],
+					ticks_to_us(vcpu->vmexit_time[i][0]));
+				if (len >= size) {
+					goto overflow;
+				}
+
+				size -= len;
+				str += len;
+			}
+		}
+	}
+
+	snprintf(str, size, "\r\n");
+	return;
+
+overflow:
+	printf("buffer size could not be enough! please check!\n");
+}
+
+static const char *level_info[MAX_VMEXIT_LEVEL] = {
+	"   0us -   2us",
+	"   2us -   4us",
+	"   4us -   8us",
+	"   8us -  16us",
+	"  16us -  32us",
+	"  32us -  64us",
+	"  64us - 128us",
+	" 128us - 256us",
+	" 256us - 512us",
+	" 512us -1024us",
+	"1024us -2048us",
+	"2048us -4096us",
+	"4096us -8192us",
+	"8192us -  more",
+};
+
+static void get_vmexit_details_per_pcpu(char *str_arg, size_t str_max)
+{
+	char *str = str_arg;
+	uint16_t cpu, i, level;
+	size_t len, size = str_max;
+	uint16_t pcpu_num = get_pcpu_nums();
+
+	len = snprintf(str, size, "\r\nNow=%lldus, for detailed latency of each vmexit on each cpu:",
+		ticks_to_us(rdtsc()));
+	if (len >= size) {
+		goto overflow;
+	}
+	size -= len;
+	str += len;
+
+	for (i = 0U; i < 64U; i++) {
+
+		/* to ignore the 0 count vmexit */
+		for (cpu = 0U; cpu < pcpu_num; cpu++) {
+			if (per_cpu(vmexit_cnt, cpu)[i][TOTAL_ARRAY_LEVEL - 1] != 0)
+				break;
+		}
+
+		if (cpu == pcpu_num)
+			continue;
+
+		len = snprintf(str, size, "\r\n\r\n   VMEXIT/0x%02x", i);
+		if (len >= size) {
+			goto overflow;
+		}
+		size -= len;
+		str += len;
+
+		for (cpu = 0U; cpu < pcpu_num; cpu++) {
+			len = snprintf(str, size, "        CPU%hu", cpu);
+			if (len >= size) {
+				goto overflow;
+			}
+			size -= len;
+			str += len;
+		}
+
+		for (level = 0; level < MAX_VMEXIT_LEVEL; level++) {
+
+			/* to ignore the 0 count vmexit */
+			for (cpu = 0U; cpu < pcpu_num; cpu++) {
+				if (per_cpu(vmexit_cnt, cpu)[i][level] != 0)
+					break;
+			}
+
+			if (cpu == pcpu_num)
+				continue;
+
+			len = snprintf(str, size, "\r\n%s", level_info[level]);
+			if (len >= size) {
+				goto overflow;
+			}
+			size -= len;
+			str += len;
+
+			for (cpu = 0U; cpu < pcpu_num; cpu++) {
+				len = snprintf(str, size, "%12lld", per_cpu(vmexit_cnt, cpu)[i][level]);
+				if (len >= size) {
+					goto overflow;
+				}
+
+				size -= len;
+				str += len;
+			}
+		}
+
+		len = snprintf(str, size, "\r\n  Max Lat(us):");
+		if (len >= size) {
+			goto overflow;
+		}
+		size -= len;
+		str += len;
+
+		for (cpu = 0U; cpu < pcpu_num; cpu++) {
+			len = snprintf(str, size, "%12lld", per_cpu(vmexit_time, cpu)[i][1]);
+			if (len >= size) {
+				goto overflow;
+			}
+			size -= len;
+			str += len;
+		}
+
+	}
+
+	snprintf(str, size, "\r\n");
+	return;
+
+overflow:
+	printf("buffer size could not be enough! please check!\n");
+}
+
+
+static void get_vmexit_details_per_vcpu(char *str_arg, size_t str_max)
+{
+	char *str = str_arg;
+	uint16_t vm_id, i, level, vcpu_id;
+	size_t len, size = str_max;
+	struct acrn_vm *vm;
+	struct acrn_vcpu *vcpu;
+	bool has_vmexit = false;
+
+	len = snprintf(str, size, "\r\nNow=%lldus, for detailed latency of each vmexit on each VM/vCPU:",
+		ticks_to_us(rdtsc()));
+	if (len >= size) {
+		goto overflow;
+	}
+	size -= len;
+	str += len;
+
+	for (i = 0U; i < 64U; i++) {
+
+		/* to ignore the 0 count vmexit */
+		for (vm_id = 0U; vm_id < CONFIG_MAX_VM_NUM; vm_id++) {
+			vm = get_vm_from_vmid(vm_id);
+			if (is_poweroff_vm(vm))
+				continue;
+
+			has_vmexit = false;
+			foreach_vcpu(vcpu_id, vm, vcpu) {
+				if (vcpu->vmexit_cnt[i][TOTAL_ARRAY_LEVEL - 1] != 0) {
+					has_vmexit = true;
+					break;
+				}
+			}
+
+			if (has_vmexit)
+				break;
+		}
+
+		if (vm_id == CONFIG_MAX_VM_NUM)
+			continue;
+
+		len = snprintf(str, size, "\r\n\r\n   VMEXIT/0x%02x", i);
+		if (len >= size) {
+			goto overflow;
+		}
+		size -= len;
+		str += len;
+
+		for (vm_id = 0U; vm_id < CONFIG_MAX_VM_NUM; vm_id++) {
+			vm = get_vm_from_vmid(vm_id);
+			if (is_poweroff_vm(vm))
+				continue;
+
+			foreach_vcpu(vcpu_id, vm, vcpu) {
+				len = snprintf(str, size, "   VM%u/vCPU%u", vm_id, vcpu_id);
+				if (len >= size) {
+					goto overflow;
+				}
+				size -= len;
+				str += len;
+			}
+		}
+
+		for (level = 0; level < MAX_VMEXIT_LEVEL; level++) {
+
+			/* to ignore the 0 count vmexit */
+			for (vm_id = 0U; vm_id < CONFIG_MAX_VM_NUM; vm_id++) {
+				vm = get_vm_from_vmid(vm_id);
+
+				if (is_poweroff_vm(vm))
+					continue;
+
+				has_vmexit = false;
+				foreach_vcpu(vcpu_id, vm, vcpu) {
+					if (vcpu->vmexit_cnt[i][level] != 0) {
+						has_vmexit = true;
+						break;
+					}
+				}
+
+				if (has_vmexit)
+					break;
+			}
+
+			if (vm_id == CONFIG_MAX_VM_NUM)
+				continue;
+
+			len = snprintf(str, size, "\r\n%s", level_info[level]);
+			if (len >= size) {
+				goto overflow;
+			}
+			size -= len;
+			str += len;
+
+			for (vm_id = 0U; vm_id < CONFIG_MAX_VM_NUM; vm_id++) {
+				vm = get_vm_from_vmid(vm_id);
+				if (is_poweroff_vm(vm))
+					continue;
+
+				foreach_vcpu(vcpu_id, vm, vcpu) {
+					len = snprintf(str, size, "%12lld", vcpu->vmexit_cnt[i][level]);
+					if (len >= size) {
+						goto overflow;
+					}
+
+					size -= len;
+					str += len;
+				}
+			}
+
+		}
+
+		len = snprintf(str, size, "\r\n  Max Lat(us):");
+		if (len >= size) {
+			goto overflow;
+		}
+		size -= len;
+		str += len;
+
+		for (vm_id = 0U; vm_id < CONFIG_MAX_VM_NUM; vm_id++) {
+			vm = get_vm_from_vmid(vm_id);
+			if (is_poweroff_vm(vm))
+				continue;
+
+			foreach_vcpu(vcpu_id, vm, vcpu) {
+				len = snprintf(str, size, "%12lld", vcpu->vmexit_time[i][1]);
+				if (len >= size) {
+					goto overflow;
+				}
+				size -= len;
+				str += len;
+			}
+		}
+	}
+
+	snprintf(str, size, "\r\n");
+	return;
+
+overflow:
+	printf("buffer size could not be enough! please check!\n");
+}
+
+static void clear_vmexit_info_buffer(void)
+{
+	uint16_t cpu, vm_id, vcpu_id;
+	uint16_t pcpu_num = get_pcpu_nums();
+	struct acrn_vm *vm;
+	struct acrn_vcpu *vcpu;
+
+	for (cpu = 0U; cpu < pcpu_num; cpu++) {
+		memset(per_cpu(vmexit_cnt, cpu), 0, sizeof(per_cpu(vmexit_cnt, cpu)));
+		memset(per_cpu(vmexit_time, cpu), 0, sizeof(per_cpu(vmexit_time, cpu)));
+		per_cpu(resched_times, cpu) = 0;
+	}
+
+	for (vm_id = 0U; vm_id < CONFIG_MAX_VM_NUM; vm_id++) {
+		vm = get_vm_from_vmid(vm_id);
+		if (!is_poweroff_vm(vm)) {
+			foreach_vcpu(vcpu_id, vm, vcpu) {
+				memset(vcpu->vmexit_cnt, 0, sizeof(vcpu->vmexit_cnt));
+				memset(vcpu->vmexit_time, 0, sizeof(vcpu->vmexit_time));
+			}
+		}
+	}
+
+}
+
+static int shell_show_vmexit_profile(__unused int argc, __unused char **argv)
+{
+	if ((argc == 2) && (strcmp(argv[1], "clear") == 0)) {
+		clear_vmexit_info_buffer();
+	}
+
+	get_vmexit_profile_per_pcpu(shell_log_buf, SHELL_LOG_BUF_SIZE);
+	shell_puts(shell_log_buf);
+
+	get_vmexit_details_per_pcpu(shell_log_buf, SHELL_LOG_BUF_SIZE);
+	shell_puts(shell_log_buf);
+
+	get_vmexit_profile_per_vcpu(shell_log_buf, SHELL_LOG_BUF_SIZE);
+	shell_puts(shell_log_buf);
+
+	get_vmexit_details_per_vcpu(shell_log_buf, SHELL_LOG_BUF_SIZE);
+	shell_puts(shell_log_buf);
+
+	return 0;
+}
diff --git a/hypervisor/debug/shell_priv.h b/hypervisor/debug/shell_priv.h
index af741aa26..21a010d21 100644
--- a/hypervisor/debug/shell_priv.h
+++ b/hypervisor/debug/shell_priv.h
@@ -106,4 +106,9 @@ struct shell {
 #define SHELL_CMD_WRMSR_PARAM		"[-p<pcpu_id>]	<msr_index> <value>"
 #define SHELL_CMD_WRMSR_HELP		"Write value (in hexadecimal) to the MSR at msr_index (in hexadecimal) for CPU"\
 					" ID pcpu_id"
+
+#define SHELL_CMD_VMEXIT		"vmexit"
+#define SHELL_CMD_VMEXIT_PARAM		NULL
+#define SHELL_CMD_VMEXIT_HELP		"show vmexit profiling"
+
 #endif /* SHELL_PRIV_H */
diff --git a/hypervisor/include/arch/x86/guest/vcpu.h b/hypervisor/include/arch/x86/guest/vcpu.h
index 052a69e1e..4b192cbe6 100644
--- a/hypervisor/include/arch/x86/guest/vcpu.h
+++ b/hypervisor/include/arch/x86/guest/vcpu.h
@@ -274,6 +274,12 @@ struct acrn_vcpu {
 	uint64_t reg_updated;
 
 	struct sched_event events[VCPU_EVENT_NUM];
+
+#ifdef HV_DEBUG
+	uint64_t vmexit_cnt[64][TOTAL_ARRAY_LEVEL];
+	uint64_t vmexit_time[64][2]; /*0 for total latency, 1 for max latency */
+#endif
+
 } __aligned(PAGE_SIZE);
 
 struct vcpu_dump {
diff --git a/hypervisor/include/arch/x86/guest/vm.h b/hypervisor/include/arch/x86/guest/vm.h
index b0e5b797b..1bef94bc4 100644
--- a/hypervisor/include/arch/x86/guest/vm.h
+++ b/hypervisor/include/arch/x86/guest/vm.h
@@ -156,6 +156,7 @@ struct acrn_vm {
 	uint8_t vrtc_offset;
 
 	uint64_t intr_inject_delay_delta; /* delay of intr injection */
+
 } __aligned(PAGE_SIZE);
 
 /*
diff --git a/hypervisor/include/arch/x86/per_cpu.h b/hypervisor/include/arch/x86/per_cpu.h
index 05dfbf794..b7b0f3c56 100644
--- a/hypervisor/include/arch/x86/per_cpu.h
+++ b/hypervisor/include/arch/x86/per_cpu.h
@@ -19,6 +19,8 @@
 #include <security.h>
 #include <vm_config.h>
 
+#include <acrn_common.h>
+
 struct per_cpu_region {
 	/* vmxon_region MUST be 4KB-aligned */
 	uint8_t vmxon_region[PAGE_SIZE];
@@ -26,6 +28,12 @@ struct per_cpu_region {
 #ifdef HV_DEBUG
 	struct shared_buf *sbuf[ACRN_SBUF_ID_MAX];
 	char logbuf[LOG_MESSAGE_MAX_SIZE];
+
+	uint64_t vmexit_cnt[64][TOTAL_ARRAY_LEVEL];
+	uint64_t vmexit_time[64][2]; /*0 for total latency, 1 for max latency */
+
+	uint64_t resched_times;
+
 	uint32_t npk_log_ref;
 #endif
 	uint64_t irq_count[NR_IRQS];
diff --git a/hypervisor/include/public/acrn_common.h b/hypervisor/include/public/acrn_common.h
index eddb03401..19cde685a 100644
--- a/hypervisor/include/public/acrn_common.h
+++ b/hypervisor/include/public/acrn_common.h
@@ -46,6 +46,11 @@
 #error "VIOAPIC_RTE_NUM must be larger than 23"
 #endif
 
+#ifdef HV_DEBUG
+#define MAX_VMEXIT_LEVEL 14 /* from 0 to 14 for counts*/
+#define TOTAL_ARRAY_LEVEL (MAX_VMEXIT_LEVEL + 1)
+#endif
+
 /* Generic VM flags from guest OS */
 #define GUEST_FLAG_SECURE_WORLD_ENABLED		(1UL << 0U)	/* Whether secure world is enabled */
 #define GUEST_FLAG_LAPIC_PASSTHROUGH		(1UL << 1U)  	/* Whether LAPIC is passed through */