initial import

internal commit: 14ac2bc2299032fa6714d1fefa7cf0987b3e3085 Signed-off-by: Eddie Dong <eddie.dong@intel.com>
2025-09-21 00:38:28 +00:00 · 2018-03-07 20:57:14 +08:00
parent bd31b1c53e
commit 7a3a539b17
156 changed files with 41265 additions and 0 deletions
--- a/hypervisor/arch/x86/assign.c
+++ b/hypervisor/arch/x86/assign.c
--- a/hypervisor/arch/x86/cpu.c
+++ b/hypervisor/arch/x86/cpu.c
@@ -0,0 +1,650 @@
+/*
+ * Copyright (C) 2018 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <hypervisor.h>
+#include <hv_lib.h>
+#include <acrn_common.h>
+#include <bsp_extern.h>
+#include <hv_arch.h>
+#include <schedule.h>
+#include <version.h>
+#include <hv_debug.h>
+
+#ifdef CONFIG_EFI_STUB
+extern uint32_t efi_physical_available_ap_bitmap;
+#endif
+
+uint64_t tsc_clock_freq = 1000000000;
+
+spinlock_t cpu_secondary_spinlock = {
+	.head = 0,
+	.tail = 0
+};
+
+spinlock_t up_count_spinlock = {
+	.head = 0,
+	.tail = 0
+};
+
+void *per_cpu_data_base_ptr;
+int phy_cpu_num;
+unsigned long pcpu_sync = 0;
+uint32_t up_count = 0;
+
+DEFINE_CPU_DATA(uint8_t[STACK_SIZE], stack) __aligned(16);
+DEFINE_CPU_DATA(uint8_t, lapic_id);
+DEFINE_CPU_DATA(void *, vcpu);
+DEFINE_CPU_DATA(int, state);
+
+/* TODO: add more capability per requirement */
+struct cpu_capability {
+	bool tsc_adjust_supported;
+	bool ibrs_ibpb_supported;
+	bool stibp_supported;
+	bool apicv_supported;
+	bool monitor_supported;
+};
+static struct cpu_capability cpu_caps;
+
+static void apicv_cap_detect(void);
+static void cpu_set_logical_id(uint32_t logical_id);
+static void print_hv_banner(void);
+bool check_monitor_support(void);
+int cpu_find_logical_id(uint32_t lapic_id);
+#ifndef CONFIG_EFI_STUB
+static void start_cpus();
+#endif
+static void pcpu_sync_sleep(unsigned long *sync, int mask_bit);
+int ibrs_type;
+static void check_cpu_capability(void)
+{
+	uint32_t  eax, ebx, ecx, edx;
+
+	memset(&cpu_caps, 0, sizeof(struct cpu_capability));
+
+	cpuid(CPUID_EXTEND_FEATURE, &eax, &ebx, &ecx, &edx);
+
+	cpu_caps.tsc_adjust_supported = (ebx & CPUID_EBX_TSC_ADJ) ?
+							(true) : (false);
+	cpu_caps.ibrs_ibpb_supported = (edx & CPUID_EDX_IBRS_IBPB) ?
+							(true) : (false);
+	cpu_caps.stibp_supported = (edx & CPUID_EDX_STIBP) ?
+							(true) : (false);
+
+	/* For speculation defence.
+	 * The default way is to set IBRS at vmexit and then do IBPB at vcpu
+	 * context switch(ibrs_type == IBRS_RAW).
+	 * Now provide an optimized way (ibrs_type == IBRS_OPT) which set
+	 * STIBP and do IBPB at vmexit,since having STIBP always set has less
+	 * impact than having IBRS always set. Also since IBPB is already done
+	 * at vmexit, it is no necessary to do so at vcpu context switch then.
+	 */
+	ibrs_type = IBRS_NONE;
+
+	/* Currently for APL, if we enabled retpoline, then IBRS should not
+	 * take effect
+	 * TODO: add IA32_ARCH_CAPABILITIES[1] check, if this bit is set, IBRS
+	 * should be set all the time instead of relying on retpoline
+	 */
+#ifndef CONFIG_RETPOLINE
+	if (cpu_caps.ibrs_ibpb_supported) {
+		ibrs_type = IBRS_RAW;
+		if (cpu_caps.stibp_supported)
+			ibrs_type = IBRS_OPT;
+	}
+#endif
+}
+
+bool check_tsc_adjust_support(void)
+{
+	return cpu_caps.tsc_adjust_supported;
+}
+
+bool check_ibrs_ibpb_support(void)
+{
+	return cpu_caps.ibrs_ibpb_supported;
+}
+
+bool check_stibp_support(void)
+{
+	return cpu_caps.stibp_supported;
+}
+
+static void alloc_phy_cpu_data(int pcpu_num)
+{
+	phy_cpu_num = pcpu_num;
+
+	per_cpu_data_base_ptr = calloc(1, PER_CPU_DATA_SIZE * pcpu_num);
+	ASSERT(per_cpu_data_base_ptr != NULL, "");
+}
+
+int __attribute__((weak)) parse_madt(uint8_t *lapic_id_base)
+{
+	static const uint32_t lapic_id[] = {0, 2, 4, 6};
+	uint32_t i;
+
+	for (i = 0; i < ARRAY_SIZE(lapic_id); i++)
+		*lapic_id_base++ = lapic_id[i];
+
+	return ARRAY_SIZE(lapic_id);
+}
+
+static int init_phy_cpu_storage(void)
+{
+	int i, pcpu_num = 0;
+	int bsp_cpu_id;
+	uint8_t bsp_lapic_id = 0;
+	uint8_t *lapic_id_base;
+
+	/*
+	 * allocate memory to save all lapic_id detected in parse_mdt.
+	 * We allocate 4K size which could save 4K CPUs lapic_id info.
+	 */
+	lapic_id_base = alloc_page(CPU_PAGE_SIZE);
+	ASSERT(lapic_id_base != NULL, "fail to alloc page");
+
+	pcpu_num = parse_madt(lapic_id_base);
+	alloc_phy_cpu_data(pcpu_num);
+
+	for (i = 0; i < pcpu_num; i++) {
+		per_cpu(lapic_id, i) = *lapic_id_base++;
+#ifdef CONFIG_EFI_STUB
+		efi_physical_available_ap_bitmap |= 1 << per_cpu(lapic_id, i);
+#endif
+	}
+
+	/* free memory after lapic_id are saved in per_cpu data */
+	free(lapic_id_base);
+
+	bsp_lapic_id = get_cur_lapic_id();
+
+#ifdef CONFIG_EFI_STUB
+	efi_physical_available_ap_bitmap &= ~(1 << bsp_lapic_id);
+#endif
+
+	bsp_cpu_id = cpu_find_logical_id(bsp_lapic_id);
+	ASSERT(bsp_cpu_id >= 0, "fail to get phy cpu id");
+
+	return bsp_cpu_id;
+}
+
+static void cpu_set_current_state(uint32_t logical_id, int state)
+{
+	spinlock_obtain(&up_count_spinlock);
+
+	/* Check if state is initializing */
+	if (state == CPU_STATE_INITIALIZING) {
+		/* Increment CPU up count */
+		up_count++;
+
+		/* Save this CPU's logical ID to the TSC AUX MSR */
+		cpu_set_logical_id(logical_id);
+	}
+
+	/* Set state for the specified CPU */
+	per_cpu(state, logical_id) = state;
+
+	spinlock_release(&up_count_spinlock);
+}
+
+#ifdef STACK_PROTECTOR
+struct stack_canary {
+	/* Gcc generates extra code, using [fs:40] to access canary */
+	uint8_t reserved[40];
+	uint64_t canary;
+};
+
+static DEFINE_CPU_DATA(struct stack_canary, stack_canary);
+
+static uint64_t get_random_value(void)
+{
+	uint64_t random = 0;
+
+	asm volatile ("1: rdrand %%rax\n"
+			"jnc 1b\n"
+			"mov %%rax, %0\n"
+			: "=r"(random) :: );
+	return random;
+}
+
+static void set_fs_base(void)
+{
+	struct stack_canary *psc = &get_cpu_var(stack_canary);
+
+	psc->canary = get_random_value();
+	msr_write(MSR_IA32_FS_BASE, (uint64_t)psc);
+}
+#endif
+
+void bsp_boot_init(void)
+{
+#ifdef HV_DEBUG
+	uint64_t start_tsc = rdtsc();
+#endif
+
+	/* Clear BSS */
+	memset(_ld_bss_start, 0, _ld_bss_end - _ld_bss_start);
+
+	/* Build time sanity checks to make sure hard-coded offset
+	*  is matching the actual offset!
+	*/
+	STATIC_ASSERT(offsetof(struct cpu_regs, rax) ==
+		VMX_MACHINE_T_GUEST_RAX_OFFSET);
+	STATIC_ASSERT(offsetof(struct cpu_regs, rbx) ==
+		VMX_MACHINE_T_GUEST_RBX_OFFSET);
+	STATIC_ASSERT(offsetof(struct cpu_regs, rcx) ==
+		VMX_MACHINE_T_GUEST_RCX_OFFSET);
+	STATIC_ASSERT(offsetof(struct cpu_regs, rdx) ==
+		VMX_MACHINE_T_GUEST_RDX_OFFSET);
+	STATIC_ASSERT(offsetof(struct cpu_regs, rbp) ==
+		VMX_MACHINE_T_GUEST_RBP_OFFSET);
+	STATIC_ASSERT(offsetof(struct cpu_regs, rsi) ==
+		VMX_MACHINE_T_GUEST_RSI_OFFSET);
+	STATIC_ASSERT(offsetof(struct cpu_regs, rdi) ==
+		VMX_MACHINE_T_GUEST_RDI_OFFSET);
+	STATIC_ASSERT(offsetof(struct cpu_regs, r8) ==
+		VMX_MACHINE_T_GUEST_R8_OFFSET);
+	STATIC_ASSERT(offsetof(struct cpu_regs, r9) ==
+		VMX_MACHINE_T_GUEST_R9_OFFSET);
+	STATIC_ASSERT(offsetof(struct cpu_regs, r10) ==
+		VMX_MACHINE_T_GUEST_R10_OFFSET);
+	STATIC_ASSERT(offsetof(struct cpu_regs, r11) ==
+		VMX_MACHINE_T_GUEST_R11_OFFSET);
+	STATIC_ASSERT(offsetof(struct cpu_regs, r12) ==
+		VMX_MACHINE_T_GUEST_R12_OFFSET);
+	STATIC_ASSERT(offsetof(struct cpu_regs, r13) ==
+		VMX_MACHINE_T_GUEST_R13_OFFSET);
+	STATIC_ASSERT(offsetof(struct cpu_regs, r14) ==
+		VMX_MACHINE_T_GUEST_R14_OFFSET);
+	STATIC_ASSERT(offsetof(struct cpu_regs, r15) ==
+		VMX_MACHINE_T_GUEST_R15_OFFSET);
+	STATIC_ASSERT(offsetof(struct run_context, cr2) ==
+		VMX_MACHINE_T_GUEST_CR2_OFFSET);
+	STATIC_ASSERT(offsetof(struct run_context, ia32_spec_ctrl) ==
+		VMX_MACHINE_T_GUEST_SPEC_CTRL_OFFSET);
+
+	/* Initialize the hypervisor paging */
+	init_paging();
+
+	early_init_lapic();
+
+	init_phy_cpu_storage();
+
+	load_gdtr_and_tr();
+
+	/* Switch to run-time stack */
+	CPU_SP_WRITE(&get_cpu_var(stack)[STACK_SIZE - 1]);
+
+#ifdef STACK_PROTECTOR
+	set_fs_base();
+#endif
+
+	check_cpu_capability();
+
+	apicv_cap_detect();
+
+	/* Set state for this CPU to initializing */
+	cpu_set_current_state(CPU_BOOT_ID, CPU_STATE_INITIALIZING);
+
+	/* Perform any necessary BSP initialization */
+	init_bsp();
+
+	/* Initialize Serial */
+	serial_init();
+
+	/* Initialize console */
+	console_init();
+
+	/* Print Hypervisor Banner */
+	print_hv_banner();
+
+	/* Make sure rdtsc is enabled */
+	check_tsc();
+
+	/* Calculate TSC Frequency */
+	tsc_clock_freq = tsc_cycles_in_period(1000) / 1000 * 1000000;
+
+	/* Enable logging */
+	init_logmsg(LOG_BUF_SIZE,
+		       LOG_DESTINATION);
+
+#ifdef HV_DEBUG
+	/* Log first messages */
+	printf("HV version %d.%d-%s-%s build by %s, start time %lluus\r\n",
+			HV_MAJOR_VERSION, HV_MINOR_VERSION, HV_BUILD_TIME,
+			HV_BUILD_VERSION, HV_BUILD_USER,
+			TICKS_TO_US(start_tsc));
+#endif
+	pr_dbg("Core %d is up", CPU_BOOT_ID);
+
+	/* Warn for security feature not ready */
+	if (!check_ibrs_ibpb_support() && !check_stibp_support()) {
+		pr_fatal("SECURITY WARNING!!!!!!");
+		pr_fatal("Please apply the latest CPU uCode patch!");
+	}
+
+	/* Initialize the shell */
+	shell_init();
+
+	/* Initialize interrupts */
+	interrupt_init(CPU_BOOT_ID);
+
+	timer_init();
+	setup_notification();
+	ptdev_init();
+
+	init_scheduler();
+
+#ifndef CONFIG_EFI_STUB
+	/* Start all secondary cores */
+	start_cpus();
+
+	/* Trigger event to allow secondary CPUs to continue */
+	bitmap_set(0, &pcpu_sync);
+#else
+	memcpy_s(_ld_cpu_secondary_reset_start,
+		(unsigned long)&_ld_cpu_secondary_reset_size,
+		_ld_cpu_secondary_reset_load,
+		(unsigned long)&_ld_cpu_secondary_reset_size);
+#endif
+
+	ASSERT(get_cpu_id() == CPU_BOOT_ID, "");
+
+	init_iommu();
+
+	console_setup_timer();
+
+	/* Start initializing the VM for this CPU */
+	hv_main(CPU_BOOT_ID);
+
+	/* Control should not come here */
+	cpu_halt(CPU_BOOT_ID);
+}
+
+void cpu_secondary_init(void)
+{
+	/* NOTE: Use of local / stack variables in this function is problematic
+	 * since the stack is switched in the middle of the function.  For this
+	 * reason, the logical id is only temporarily stored in a static
+	 * variable, but this will be over-written once subsequent CPUs
+	 * start-up.  Once the spin-lock is released, the cpu_logical_id_get()
+	 * API is used to obtain the logical ID
+	 */
+
+	/* Switch this CPU to use the same page tables set-up by the
+	 * primary/boot CPU
+	 */
+	enable_paging(get_paging_pml4());
+	early_init_lapic();
+
+	/* Find the logical ID of this CPU given the LAPIC ID
+	 * temp_logical_id =
+	 * cpu_find_logical_id(get_cur_lapic_id());
+	 */
+	cpu_find_logical_id(get_cur_lapic_id());
+
+	/* Set state for this CPU to initializing */
+	cpu_set_current_state(cpu_find_logical_id
+			      (get_cur_lapic_id()),
+			      CPU_STATE_INITIALIZING);
+
+	/* Switch to run-time stack */
+	CPU_SP_WRITE(&get_cpu_var(stack)[STACK_SIZE - 1]);
+
+#ifdef STACK_PROTECTOR
+	set_fs_base();
+#endif
+
+	load_gdtr_and_tr();
+
+	/* Make sure rdtsc is enabled */
+	check_tsc();
+
+	pr_dbg("Core %d is up", get_cpu_id());
+
+	/* Release secondary boot spin-lock to allow one of the next CPU(s) to
+	 * perform this common initialization
+	 */
+	spinlock_release(&cpu_secondary_spinlock);
+
+	/* Initialize secondary processor interrupts. */
+	interrupt_init(get_cpu_id());
+
+	timer_init();
+
+	/* Wait for boot processor to signal all secondary cores to continue */
+	pcpu_sync_sleep(&pcpu_sync, 0);
+
+#ifdef CONFIG_EFI_STUB
+	bitmap_clr(0, &pcpu_sync);
+#endif
+
+	hv_main(get_cpu_id());
+
+	/* Control will only come here for secondary CPUs not configured for
+	 * use or if an error occurs in hv_main
+	 */
+	cpu_halt(get_cpu_id());
+}
+
+int cpu_find_logical_id(uint32_t lapic_id)
+{
+	int i;
+
+	for (i = 0; i < phy_cpu_num; i++) {
+		if (per_cpu(lapic_id, i) == lapic_id)
+			return i;
+	}
+
+	return -1;
+}
+
+#ifndef CONFIG_EFI_STUB
+/*
+ * Start all secondary CPUs.
+ */
+static void start_cpus()
+{
+	uint32_t timeout;
+	uint32_t expected_up;
+
+	/*Copy segment for AP initialization code below 1MB */
+	memcpy_s(_ld_cpu_secondary_reset_start,
+		(unsigned long)&_ld_cpu_secondary_reset_size,
+		_ld_cpu_secondary_reset_load,
+		(unsigned long)&_ld_cpu_secondary_reset_size);
+
+	/* Set flag showing number of CPUs expected to be up to all
+	 * cpus
+	 */
+	expected_up = phy_cpu_num;
+
+	/* Broadcast IPIs to all other CPUs */
+	send_startup_ipi(INTR_CPU_STARTUP_ALL_EX_SELF,
+		       -1U, ((paddr_t) cpu_secondary_reset));
+
+	/* Wait until global count is equal to expected CPU up count or
+	 * configured time-out has expired
+	 */
+	timeout = CPU_UP_TIMEOUT * 1000;
+	while ((up_count != expected_up) && (timeout != 0)) {
+		/* Delay 10us */
+		udelay(10);
+
+		/* Decrement timeout value */
+		timeout -= 10;
+	}
+
+	/* Check to see if all expected CPUs are actually up */
+	if (up_count != expected_up) {
+		/* Print error */
+		pr_fatal("Secondary CPUs failed to come up");
+
+		/* Error condition - loop endlessly for now */
+		do {
+		} while (1);
+	}
+}
+#endif
+
+void cpu_halt(uint32_t logical_id)
+{
+	/* For debug purposes, using a stack variable in the while loop enables
+	 * us to modify the value using a JTAG probe and resume if needed.
+	 */
+	int halt = 1;
+
+	/* Set state to show CPU is halted */
+	cpu_set_current_state(logical_id, CPU_STATE_HALTED);
+
+	/* Halt the CPU */
+	do {
+		asm volatile ("hlt");
+	} while (halt);
+}
+
+static void cpu_set_logical_id(uint32_t logical_id)
+{
+	/* Write TSC AUX register */
+	msr_write(MSR_IA32_TSC_AUX, (uint64_t) logical_id);
+}
+
+static void print_hv_banner(void)
+{
+	char *boot_msg = "ACRN Hypervisor\n\r";
+
+	/* Print the boot message */
+	printf(boot_msg);
+}
+
+static void pcpu_sync_sleep(unsigned long *sync, int mask_bit)
+{
+	int wake_sync = (1 << mask_bit);
+
+	if (check_monitor_support()) {
+		/* Wait for the event to be set using monitor/mwait */
+		asm volatile ("1: cmpl      %%ebx,(%%eax)\n"
+			      "   je        2f\n"
+			      "   monitor\n"
+			      "   mwait\n"
+			      "   jmp       1b\n"
+			      "2:\n"
+			      :
+			      : "a" (sync), "d"(0), "c"(0),
+			      "b"(wake_sync)
+			      : "cc");
+	} else {
+		/* Wait for the event to be set using pause */
+		asm volatile ("1: cmpl      %%ebx,(%%eax)\n"
+			      "   je        2f\n"
+			      "   pause\n"
+			      "   jmp       1b\n"
+			      "2:\n"
+			      :
+			      : "a" (sync), "d"(0), "c"(0),
+			      "b"(wake_sync)
+			      : "cc");
+	}
+}
+
+/*check allowed ONEs setting in vmx control*/
+static bool is_ctrl_setting_allowed(uint64_t msr_val, uint32_t ctrl)
+{
+	/*
+	 * Intel SDM Appendix A.3
+	 * - bitX in ctrl can be set 1
+	 *   only if bit 32+X in msr_val is 1
+	 */
+	return ((((uint32_t)(msr_val >> 32)) & ctrl) == ctrl);
+}
+
+static void apicv_cap_detect(void)
+{
+	uint64_t val64;
+	uint32_t ctrl;
+	bool     result;
+
+	ctrl = VMX_PROCBASED_CTLS_TPR_SHADOW;
+	val64 = msr_read(MSR_IA32_VMX_PROCBASED_CTLS);
+
+	result = is_ctrl_setting_allowed(val64, ctrl);
+	if (result) {
+		ctrl = VMX_PROCBASED_CTLS2_VAPIC |
+			VMX_PROCBASED_CTLS2_VAPIC_REGS |
+			VMX_PROCBASED_CTLS2_VIRQ;
+
+		val64 = msr_read(MSR_IA32_VMX_PROCBASED_CTLS2);
+		result = is_ctrl_setting_allowed(val64, ctrl);
+	}
+
+	cpu_caps.apicv_supported = result;
+}
+
+bool is_apicv_enabled(void)
+{
+	return cpu_caps.apicv_supported;
+}
+
+static void monitor_cap_detect(void)
+{
+	uint32_t  eax, ebx, ecx, edx;
+	uint32_t family;
+	uint32_t model;
+
+	/* Run CPUID to determine if MONITOR support available */
+	cpuid(CPUID_FEATURES, &eax, &ebx, &ecx, &edx);
+
+	/* See if MONITOR feature bit is set in ECX */
+	if (ecx & CPUID_ECX_MONITOR)
+		cpu_caps.monitor_supported = true;
+
+	/* don't use monitor for CPU (family: 0x6 model: 0x5c)
+	 * in hypervisor, but still expose it to the guests and
+	 * let them handle it correctly
+	 */
+	family = (eax >> 8) & 0xff;
+	if (family == 0xF)
+        family += (eax >> 20) & 0xff;
+
+	model = (eax >> 4) & 0xf;
+	if (family >= 0x06)
+		model += ((eax >> 16) & 0xf) << 4;
+
+	if (cpu_caps.monitor_supported &&
+		(family == 0x06) &&
+		(model == 0x5c)) {
+		cpu_caps.monitor_supported = false;
+	}
+}
+
+bool check_monitor_support(void)
+{
+	return cpu_caps.monitor_supported;
+}
--- a/hypervisor/arch/x86/cpu_primary.S
+++ b/hypervisor/arch/x86/cpu_primary.S
@@ -0,0 +1,228 @@
+/*
+ * Copyright (C) 2018 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include        <hypervisor.h>
+#include        <cpu.h>
+#include        <mmu.h>
+#include        <gdt.h>
+#include        <idt.h>
+#include        <msr.h>
+
+/* MULTIBOOT HEADER */
+#define MULTIBOOT_HEADER_MAGIC 0x1badb002
+#define MULTIBOOT_HEADER_FLAGS 0x00000002 /*flags bit 1 : enable mem_*, mmap_**/
+
+    .section    multiboot_header, "a"
+
+    .align     4
+
+    /* header magic */
+    .long   MULTIBOOT_HEADER_MAGIC
+    /* header flags - flags bit 6 : enable mmap_* */
+    .long   MULTIBOOT_HEADER_FLAGS
+    /* header checksum = -(magic + flags) */
+    .long   -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
+
+    .section    entry, "ax"
+
+    .align      8
+    .code32
+
+    .global     cpu_primary_start_32
+cpu_primary_start_32:
+    /* Disable interrupts */
+    cli
+
+    /* Clear direction flag */
+    cld
+
+    /* save eax and ebx */
+    movl    %eax, %esp
+    movl    %ebx, %ebp
+
+    /* detect whether it is in long mode */
+    movl    $MSR_IA32_EFER, %ecx
+    rdmsr
+    test     $MSR_IA32_EFER_LMA_BIT, %eax
+
+    /* jump to 64bit entry if it is already in long mode */
+    jne      cpu_primary_start_64
+
+    /* save the MULTBOOT magic number & MBI */
+    movl    %esp, (boot_regs)
+    movl    %ebp, (boot_regs+4)
+
+    /* Disable paging */
+    mov     %cr0, %ebx
+    andl    $~CR0_PG, %ebx
+    mov     %ebx, %cr0
+
+    /* Set DE, PAE, MCE and OS support bits in CR4 */
+    movl    $(CR4_DE | CR4_PAE | CR4_MCE | CR4_OSFXSR | CR4_OSXMMEXCPT), %eax
+    mov     %eax, %cr4
+
+    /* Set CR3 to PML4 table address */
+    movl    $cpu_boot32_page_tables_start, %edi
+    mov     %edi, %cr3
+
+    /* Set LME bit in EFER */
+    movl    $MSR_IA32_EFER, %ecx
+    rdmsr
+    orl     $MSR_IA32_EFER_LME_BIT, %eax
+    wrmsr
+
+    /* Enable paging, protection, numeric error and co-processor
+       monitoring in CR0 to enter long mode */
+    mov     %cr0, %ebx
+    orl     $(CR0_PG | CR0_PE | CR0_MP | CR0_NE), %ebx
+    mov     %ebx, %cr0
+
+    /* Load temportary GDT pointer value */
+    mov     $cpu_primary32_gdt_ptr, %ebx
+    lgdt    (%ebx)
+
+    /* Perform a long jump based to start executing in 64-bit mode */
+    ljmp    $HOST_GDT_RING0_CODE_SEL, $primary_start_long_mode
+
+    .code64
+    .org 0x200
+    .global     cpu_primary_start_64
+cpu_primary_start_64:
+    /* save the MULTBOOT magic number & MBI */
+    movl    %edi, (boot_regs)
+    movl    %esi, (boot_regs+4)
+#ifdef CONFIG_EFI_STUB
+    movl    %edx, (boot_regs+8)
+#endif
+
+primary_start_long_mode:
+
+    /* Fix up the IDT desciptors */
+    movl    $HOST_IDT, %edx
+    movl    $HOST_IDT_ENTRIES, %ecx
+.LFixUpIDT_Entries:
+    xorl    %eax, %eax
+    xchgl   %eax, 12(%edx)              /* Set rsvd bits to 0; eax now has
+                                           high 32 of entry point */
+    xchgl   %eax,  8(%edx)              /* Set bits 63..32 of entry point;
+                                           eax now has low 32 of entry point */
+    movw    %ax,    (%edx)              /* Set bits  0-15 of procedure entry
+                                           point */
+    shr     $16, %eax
+    movw    %ax,   6(%edx)              /* Set bits 16-31 of entry point */
+    addl    $X64_IDT_DESC_SIZE,%edx
+    loop    .LFixUpIDT_Entries
+
+    /* Load IDT */
+    mov     $HOST_IDTR, %rcx
+    lidtq   (%rcx)
+
+    /* Load temportary GDT pointer value */
+    mov     $cpu_primary32_gdt_ptr, %ebx
+    lgdt    (%ebx)
+
+    /* Replace CS with the correct value should we need it */
+    mov     $HOST_GDT_RING0_CODE_SEL, %bx
+    mov     %bx, jcs
+    movabsq $jmpbuf, %rax
+    rex.w ljmp  *(%rax)
+.data
+jmpbuf: .quad after
+jcs:    .word 0
+.text
+after:
+
+    /* Initialize temporary stack pointer */
+    movq    $_ld_bss_end, %rsp
+    add     $CPU_PAGE_SIZE,%rsp
+    and     $(~(CPU_STACK_ALIGN - 1)),%rsp
+
+    // load all selector registers with appropriate values
+    xor     %edx, %edx
+    lldt    %dx
+    movl    $HOST_GDT_RING0_DATA_SEL,%eax
+    mov     %eax,%ss  // Was 32bit POC Stack
+    mov     %eax,%ds  // Was 32bit POC Data
+    mov     %eax,%es  // Was 32bit POC Data
+    mov     %edx,%fs  // Was 32bit POC Data
+    mov     %edx,%gs  // Was 32bit POC CLS
+
+   /* Push sp magic to top of stack for call trace */
+   pushq   $SP_BOTTOM_MAGIC
+   /* continue with chipset level initialization */
+   call     bsp_boot_init
+
+loop:
+    jmp loop
+
+    .align  4
+    .global boot_regs
+boot_regs:
+    .long   0x00000000
+    .long   0x00000000
+#ifdef CONFIG_EFI_STUB
+    .long   0x00000000
+#endif
+
+    /* GDT table */
+    .align  4
+cpu_primary32_gdt:
+    .quad   0x0000000000000000
+    .quad   0x00af9b000000ffff
+    .quad   0x00cf93000000ffff
+cpu_primary32_gdt_end:
+
+/* GDT pointer */
+    .align  2
+cpu_primary32_gdt_ptr:
+    .short  (cpu_primary32_gdt_end - cpu_primary32_gdt) - 1
+    .quad   cpu_primary32_gdt
+
+/* PML4, PDPT, and PD tables initialized to map first 4 GBytes of memory */
+    .align  CPU_PAGE_SIZE
+    .global cpu_boot32_page_tables_start
+cpu_boot32_page_tables_start:
+    .quad   cpu_primary32_pdpt_addr + (IA32E_COMM_P_BIT | IA32E_COMM_RW_BIT)
+    .align  CPU_PAGE_SIZE
+cpu_primary32_pdpt_addr:
+    address = 0
+    .rept   4
+    .quad   cpu_primary32_pdt_addr + address + \
+                                        (IA32E_COMM_P_BIT | IA32E_COMM_RW_BIT)
+    address = address + CPU_PAGE_SIZE
+    .endr
+    .align  CPU_PAGE_SIZE
+cpu_primary32_pdt_addr:
+    address = 0
+    .rept  2048
+    .quad  address + (IA32E_PDPTE_PS_BIT | IA32E_COMM_P_BIT | IA32E_COMM_RW_BIT)
+    address = address + 0x200000
+    .endr
+
--- a/hypervisor/arch/x86/cpu_secondary.S
+++ b/hypervisor/arch/x86/cpu_secondary.S
@@ -0,0 +1,197 @@
+/*
+ * Copyright (C) 2018 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include        <hypervisor.h>
+#include        <spinlock.h>
+#include        <gdt.h>
+#include        <cpu.h>
+#include        <mmu.h>
+#include        <msr.h>
+
+
+    .extern     cpu_secondary_init
+    .extern     cpu_logical_id
+    .extern     _ld_bss_end
+    .extern     HOST_GDTR
+
+    .section     .cpu_secondary_reset,"ax"
+
+   .align   4
+   .code16
+   .global      cpu_secondary_reset
+cpu_secondary_reset:
+
+    /* Disable local interrupts */
+
+    cli
+
+    /* Set DE, PAE, MCE and OS support bits in CR4 */
+
+    movl    $(CR4_DE | CR4_PAE | CR4_MCE | CR4_OSFXSR | CR4_OSXMMEXCPT), %eax
+    mov     %eax, %cr4
+
+    /* Set CR3 to PML4 table address */
+
+    movl    $CPU_Boot_Page_Tables_Start, %edi
+    mov     %edi, %cr3
+
+    /* Set LME bit in EFER */
+
+    movl    $MSR_IA32_EFER, %ecx
+    rdmsr
+    orl     $MSR_IA32_EFER_LME_BIT, %eax
+    wrmsr
+
+    /* Enable paging, protection, numeric error and co-processor
+       monitoring in CR0 to enter long mode */
+
+    mov     %cr0, %ebx
+    orl     $(CR0_PG | CR0_PE | CR0_MP | CR0_NE), %ebx
+    mov     %ebx, %cr0
+
+    /* Load temportary GDT pointer value */
+
+    mov     $cpu_secondary_gdt_ptr, %ebx
+    lgdt    (%ebx)
+
+    /* Perform a long jump based to start executing in 64-bit mode */
+
+    data32 ljmp $HOST_GDT_RING0_CODE_SEL, $cpu_secondary_long_mode
+
+    .code64
+cpu_secondary_long_mode:
+
+    /* Set up all other data segment registers */
+
+    movl    $HOST_GDT_RING0_DATA_SEL, %eax
+    mov     %eax, %ss
+    mov     %eax, %ds
+    mov     %eax, %es
+    mov     %eax, %fs
+    mov     %eax, %gs
+
+    /* Obtain secondary CPU spin-lock to serialize
+       booting of secondary cores for a bit */
+
+    spinlock_obtain(cpu_secondary_spinlock)
+
+    /* Initialize temporary stack pointer
+       NOTE:  Using the PML4 memory (PDPT address is top of memory
+              for the PML4 page) for the temporary stack
+              as we are only using the very first entry in
+              this page and the stack is growing down from
+              the top of this page.  This stack is only
+              used for a VERY short period of time, so
+              this reuse of PML4 memory should be acceptable. */
+
+    movq    $cpu_secondary_pdpt_addr, %rsp
+
+    /* Push sp magic to top of stack for call trace */
+    pushq   $SP_BOTTOM_MAGIC
+
+    /* Jump to C entry for the AP */
+
+    call    cpu_secondary_init
+
+cpu_secondary_error:
+
+    /* Error condition trap */
+
+    jmp     cpu_secondary_error
+
+/* GDT table */
+    .align  4
+cpu_secondary_gdt:
+    .quad   0x0000000000000000
+    .quad   0x00af9b000000ffff
+    .quad   0x00cf93000000ffff
+cpu_secondary_gdt_end:
+
+/* GDT pointer */
+    .align  2
+cpu_secondary_gdt_ptr:
+    .short  (cpu_secondary_gdt_end - cpu_secondary_gdt) - 1
+    .quad   cpu_secondary_gdt
+
+/* PML4, PDPT, and PD tables initialized to map first 4 GBytes of memory */
+
+    .align  CPU_PAGE_SIZE
+    .global CPU_Boot_Page_Tables_Start
+CPU_Boot_Page_Tables_Start:
+    .quad   cpu_secondary_pdpt_addr + (IA32E_COMM_P_BIT | IA32E_COMM_RW_BIT)
+    .align  CPU_PAGE_SIZE
+cpu_secondary_pdpt_addr:
+    address = 0
+    .rept   4
+    .quad   cpu_secondary_pdt_addr + address + \
+                                        (IA32E_COMM_P_BIT | IA32E_COMM_RW_BIT)
+    address = address + CPU_PAGE_SIZE
+    .endr
+    .align  CPU_PAGE_SIZE
+cpu_secondary_pdt_addr:
+    address = 0
+    .rept  2048
+    .quad  address + (IA32E_PDPTE_PS_BIT | IA32E_COMM_P_BIT | IA32E_COMM_RW_BIT)
+    address = address + 0x200000
+    .endr
+
+
+/*******************************************************************
+ *         GUEST initial 4G page table
+ *
+ * guest starts with long mode, HV needs to prepare Guest identity
+ * mapped page table.
+ *
+ * guest page tables covers 4G size, with 2M page size.
+ *
+ * HV copy this page table (6 pages) to guest address
+ * CPU_Boot_Page_Tables_Start_VM before  executing guest instruction.
+ *
+ ******************************************************************/
+    .align  CPU_PAGE_SIZE
+    .global CPU_Boot_Page_Tables_Start_VM
+CPU_Boot_Page_Tables_Start_VM:
+    .quad   vm_cpu_pdpt_addr + (IA32E_COMM_P_BIT | IA32E_COMM_RW_BIT)
+    .align  CPU_PAGE_SIZE
+vm_cpu_pdpt_addr:
+    address = 0
+    .rept   4
+    .quad   vm_cpu_pdt_addr + address + (IA32E_COMM_P_BIT | IA32E_COMM_RW_BIT)
+    address = address + CPU_PAGE_SIZE
+    .endr
+    .align  CPU_PAGE_SIZE
+vm_cpu_pdt_addr:
+    address = 0
+    .rept  2048
+    .quad  address + (IA32E_PDPTE_PS_BIT | IA32E_COMM_P_BIT | IA32E_COMM_RW_BIT)
+    address = address + 0x200000
+    .endr
+
+    .end
--- a/hypervisor/arch/x86/cpuid.c
+++ b/hypervisor/arch/x86/cpuid.c
@@ -0,0 +1,195 @@
+/*
+ * Copyright (C) 2018 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <hypervisor.h>
+#include <hv_lib.h>
+#include <acrn_common.h>
+#include <hv_arch.h>
+#include <cpu.h>
+
+void emulate_cpuid(struct vcpu *vcpu, uint32_t src_op, uint32_t *eax_ptr,
+	uint32_t *ebx_ptr, uint32_t *ecx_ptr, uint32_t *edx_ptr)
+{
+	uint32_t apicid = vlapic_get_id(vcpu->arch_vcpu.vlapic);
+	static const char sig[12] = "ACRNACRNACRN";
+	const uint32_t *sigptr = (const uint32_t *)sig;
+	uint32_t count = *ecx_ptr;
+
+	if ((src_op != 0x40000000) && (src_op != 0x40000010))
+		cpuid_count(src_op, count, eax_ptr, ebx_ptr, ecx_ptr, edx_ptr);
+
+	switch (src_op) {
+		/* Virtualize cpuid 0x01 */
+	case 0x01:
+		/* Patching initial APIC ID */
+		*ebx_ptr &= ~APIC_ID_MASK;
+		*ebx_ptr |= (apicid & APIC_ID_MASK);
+
+		/* mask mtrr */
+		*edx_ptr &= ~CPUID_EDX_MTRR;
+
+		/* Patching X2APIC, X2APIC mode is disabled by default. */
+		if (x2apic_enabled)
+			*ecx_ptr |= CPUID_ECX_x2APIC;
+		else
+			*ecx_ptr &= ~CPUID_ECX_x2APIC;
+
+		/* mask pcid */
+		*ecx_ptr &= ~CPUID_ECX_PCID;
+
+		/*mask vmx to guest os */
+		*ecx_ptr &= ~CPUID_ECX_VMX;
+
+		break;
+
+		/* Virtualize cpuid 0x07 */
+	case 0x07:
+		/* mask invpcid */
+		*ebx_ptr &= ~CPUID_EBX_INVPCID;
+
+		break;
+
+	case 0x0a:
+		/* not support pmu */
+		*eax_ptr &= ~0xff;
+		break;
+
+		/* Virtualize cpuid 0x0b */
+	case 0x0b:
+		/* Patching X2APIC */
+		if (!x2apic_enabled) {
+			*eax_ptr = 0;
+			*ebx_ptr = 0;
+			*ecx_ptr = 0;
+			*edx_ptr = 0;
+		}
+		break;
+
+		/*
+		* Leaf 0x40000000
+		* This leaf returns the CPUID leaf range supported by the
+		* hypervisor and the hypervisor vendor signature.
+		*
+		* EAX: The maximum input value for CPUID supported by the
+		*	hypervisor.
+		* EBX, ECX, EDX: Hypervisor vendor ID signature.
+		*/
+	case 0x40000000:
+		*eax_ptr = 0x40000010;
+		*ebx_ptr = sigptr[0];
+		*ecx_ptr = sigptr[1];
+		*edx_ptr = sigptr[2];
+		break;
+
+		/*
+		* Leaf 0x40000010 - Timing Information.
+		* This leaf returns the current TSC frequency and
+		* current Bus frequency in kHz.
+		*
+		* EAX: (Virtual) TSC frequency in kHz.
+		*      TSC frequency is calculated from PIT in ACRN
+		* EBX: (Virtual) Bus (local apic timer) frequency in kHz.
+		*      Bus (local apic timer) frequency is hardcoded as
+		*      (128 * 1024 * 1024) in ACRN
+		* ECX, EDX: RESERVED (reserved fields are set to zero).
+		*/
+	case 0x40000010:
+		*eax_ptr = (uint32_t)(tsc_clock_freq / 1000);
+		*ebx_ptr = (128 * 1024 * 1024) / 1000;
+		*ecx_ptr = 0;
+		*edx_ptr = 0;
+		break;
+
+	default:
+		break;
+	}
+}
+
+static DEFINE_CPU_DATA(struct cpuid_cache_entry[CPUID_EXTEND_FEATURE_CACHE_MAX],
+		cpuid_cache);
+
+static inline struct cpuid_cache_entry *find_cpuid_cache_entry(uint32_t op,
+	uint32_t count)
+{
+	int pcpu_id = get_cpu_id();
+	enum cpuid_cache_idx idx = CPUID_EXTEND_FEATURE_CACHE_MAX;
+
+	if ((count != 0))
+		return NULL;
+
+	switch (op) {
+	case CPUID_VENDORSTRING:
+		idx = CPUID_VENDORSTRING_CACHE_IDX;
+		break;
+
+	case CPUID_FEATURES:
+		idx = CPUID_FEATURES_CACHE_IDX;
+		break;
+
+	case CPUID_EXTEND_FEATURE:
+		idx = CPUID_EXTEND_FEATURE_CACHE_IDX;
+		break;
+
+	default:
+		break;
+	}
+
+	if (idx == CPUID_EXTEND_FEATURE_CACHE_MAX)
+		return NULL;
+
+	return &per_cpu(cpuid_cache, pcpu_id)[idx];
+}
+
+inline void cpuid_count(uint32_t op, uint32_t count,
+	uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d)
+{
+	struct cpuid_cache_entry *entry;
+
+	entry = find_cpuid_cache_entry(op, count);
+
+	if (entry == NULL) {
+		native_cpuid_count(op, count, a, b, c, d);
+	} else if (entry->inited) {
+		*a = entry->a;
+		*b = entry->b;
+		*c = entry->c;
+		*d = entry->d;
+	} else {
+		native_cpuid_count(op, count, a, b, c, d);
+
+		entry->a = *a;
+		entry->b = *b;
+		entry->c = *c;
+		entry->d = *d;
+
+		entry->inited = 1;
+	}
+}
+
--- a/hypervisor/arch/x86/ept.c
+++ b/hypervisor/arch/x86/ept.c
@@ -0,0 +1,569 @@
+/*
+ * Copyright (C) 2018 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <hypervisor.h>
+#include <hv_lib.h>
+#include <acrn_common.h>
+#include <acrn_hv_defs.h>
+#include <hv_arch.h>
+#include <hypercall.h>
+#include <hv_debug.h>
+
+#include "guest/instr_emul_wrapper.h"
+#include "guest/instr_emul.h"
+
+#define ACRN_DBG_EPT	6
+
+void *create_guest_paging(struct vm *vm)
+{
+	void *hva_dest;
+	void *hva_src;
+
+	/* copy guest identity mapped 4G page table to guest */
+	hva_dest = GPA2HVA(vm,
+			(uint64_t)CPU_Boot_Page_Tables_Start_VM);
+	hva_src = (void *)(_ld_cpu_secondary_reset_load
+			+ (CPU_Boot_Page_Tables_Start_VM
+			- _ld_cpu_secondary_reset_start));
+	/* 2MB page size, need to copy 6 pages */
+	memcpy_s(hva_dest, 6 * CPU_PAGE_SIZE, hva_src, 6 * CPU_PAGE_SIZE);
+	return (void *)CPU_Boot_Page_Tables_Start_VM;
+}
+
+static void *find_next_table(uint32_t table_offset,
+						     void *table_base)
+{
+	uint64_t table_entry;
+	uint64_t table_present;
+	void *sub_table_addr = 0;
+
+	/* Read the table entry */
+	table_entry = MEM_READ64(table_base
+			+ (table_offset * IA32E_COMM_ENTRY_SIZE));
+
+	/* If bit 7 is set, entry is not a subtable. */
+	if ((table_entry & IA32E_PDPTE_PS_BIT)
+	    || (table_entry & IA32E_PDE_PS_BIT))
+		return sub_table_addr;
+
+	/* Set table present bits to any of the read/write/execute bits */
+	table_present = (IA32E_EPT_R_BIT | IA32E_EPT_W_BIT | IA32E_EPT_X_BIT);
+
+	/* Determine if a valid entry exists */
+	if ((table_entry & table_present) == 0) {
+		/* No entry present */
+		return sub_table_addr;
+	}
+
+	/* Get address of the sub-table */
+	sub_table_addr = (void *)(table_entry & IA32E_REF_MASK);
+
+	/* Return the next table in the walk */
+	return sub_table_addr;
+}
+
+
+void free_ept_mem(void *pml4_addr)
+{
+	void *pdpt_addr;
+	void *pde_addr;
+	void *pte_addr;
+	uint32_t pml4_index;
+	uint32_t pdpt_index;
+	uint32_t pde_index;
+
+	for (pml4_index = 0; pml4_index < IA32E_NUM_ENTRIES; pml4_index++) {
+		/* Walk from the PML4 table to the PDPT table */
+		pdpt_addr = find_next_table(pml4_index, pml4_addr);
+		if (pdpt_addr == NULL)
+			continue;
+
+		for (pdpt_index = 0; pdpt_index < IA32E_NUM_ENTRIES;
+				pdpt_index++) {
+			/* Walk from the PDPT table to the PD table */
+			pde_addr = find_next_table(pdpt_index, pdpt_addr);
+
+			if (pde_addr == NULL)
+				continue;
+
+			for (pde_index = 0; pde_index < IA32E_NUM_ENTRIES;
+					pde_index++) {
+				/* Walk from the PD table to the page table */
+				pte_addr = find_next_table(pde_index,
+						pde_addr);
+
+				/* Free page table entry table */
+				if (pte_addr)
+					free(pte_addr);
+			}
+			/* Free page directory entry table */
+			if (pde_addr)
+				free(pde_addr);
+		}
+		free(pdpt_addr);
+	}
+	free(pml4_addr);
+}
+
+void destroy_ept(struct vm *vm)
+{
+	free_ept_mem(vm->arch_vm.ept);
+	free_ept_mem(vm->arch_vm.m2p);
+}
+
+uint64_t gpa2hpa_check(struct vm *vm, uint64_t gpa,
+		uint64_t size, int *found, bool assert)
+{
+	uint64_t hpa = 0;
+	int _found = 0;
+	struct entry_params entry;
+	struct map_params map_params;
+
+	map_params.page_table_type = PT_EPT;
+	map_params.pml4_base = vm->arch_vm.ept;
+	map_params.pml4_inverted = vm->arch_vm.m2p;
+	obtain_last_page_table_entry(&map_params, &entry,
+			(void *)gpa, true);
+	if (entry.entry_present == PT_PRESENT
+		/* if cross several pages, now not handle it,
+		 * only print error info
+		 */
+		&& ((gpa % entry.page_size) + size)  <= entry.page_size) {
+		_found = 1;
+		hpa = ((entry.entry_val & (~(entry.page_size - 1)))
+				| (gpa & (entry.page_size - 1)));
+	}
+
+	if (found != NULL)
+		*found = _found;
+
+	if (_found == 0 && assert) {
+		pr_err("VM %d GPA2HPA: failed for gpa 0x%llx",
+				vm->attr.boot_idx, gpa);
+		ASSERT(_found != 0, "GPA2HPA not found");
+	}
+
+	pr_dbg("GPA2HPA: 0x%llx->0x%llx", gpa, hpa);
+
+	return hpa;
+}
+
+uint64_t gpa2hpa(struct vm *vm, uint64_t gpa)
+{
+	return gpa2hpa_check(vm, gpa, 0, NULL, true);
+}
+
+uint64_t hpa2gpa(struct vm *vm, uint64_t hpa)
+{
+	struct entry_params entry;
+	struct map_params map_params;
+
+	map_params.page_table_type = PT_EPT;
+	map_params.pml4_base = vm->arch_vm.ept;
+	map_params.pml4_inverted = vm->arch_vm.m2p;
+
+	obtain_last_page_table_entry(&map_params, &entry,
+			(void *)hpa, false);
+
+	if (entry.entry_present == PT_NOT_PRESENT) {
+		pr_err("VM %d hpa2gpa: failed for hpa 0x%llx",
+				vm->attr.boot_idx, hpa);
+		ASSERT(false, "hpa2gpa not found");
+	}
+	return ((entry.entry_val & (~(entry.page_size - 1)))
+			| (hpa & (entry.page_size - 1)));
+}
+
+int is_ept_supported(void)
+{
+	uint16_t status;
+	uint64_t tmp64;
+
+	/* Read primary processor based VM control. */
+	tmp64 = msr_read(MSR_IA32_VMX_PROCBASED_CTLS);
+
+	/* Check if secondary processor based VM control is available. */
+	if (tmp64 & MMU_MEM_ATTR_BIT_EXECUTE_DISABLE) {
+		/* Read primary processor based VM control. */
+		tmp64 = msr_read(MSR_IA32_VMX_PROCBASED_CTLS2);
+
+		/* Check if EPT is supported. */
+		if (tmp64 & (((uint64_t)VMX_PROCBASED_CTLS2_EPT) << 32)) {
+			/* EPT is present. */
+			status = 1;
+		} else {
+			status = 0;
+		}
+
+	} else {
+		/* Secondary processor based VM control is not present */
+		status = 0;
+	}
+
+	return status;
+}
+
+static int check_hv_mmio_range(struct vm *vm, struct mem_io *mmio)
+{
+	int status = false;
+	struct list_head *pos;
+	struct mem_io_node *mmio_node;
+
+
+	list_for_each(pos, &vm->mmio_list) {
+		mmio_node = list_entry(pos, struct mem_io_node, list);
+		/* Check if this handler's range covers this memory access */
+		if ((mmio->paddr >= mmio_node->range_start) &&
+			(mmio->paddr + mmio->access_size <=
+			mmio_node->range_end)) {
+			status = true;
+
+			/* Break from loop - only 1 handler allowed to support
+			 * a given memory range
+			 */
+			break;
+		}
+	}
+
+	/* Return success for now */
+	return status;
+}
+
+static int hv_emulate_mmio(struct vcpu *vcpu, struct mem_io *mmio)
+{
+	int status = -EINVAL;
+	struct list_head *pos;
+	struct mem_io_node *mmio_node;
+	struct vm *vm = vcpu->vm;
+
+	list_for_each(pos, &vm->mmio_list) {
+		mmio_node = list_entry(pos, struct mem_io_node, list);
+		/* Check if this handler's range covers this memory access */
+		if ((mmio->paddr >= mmio_node->range_start) &&
+			(mmio->paddr + mmio->access_size
+			<= mmio_node->range_end)) {
+
+			ASSERT((mmio->paddr % mmio->access_size) == 0,
+				"access size not align with paddr");
+
+			/* Handle this MMIO operation */
+			status = mmio_node->read_write(vcpu, mmio,
+					mmio_node->handler_private_data);
+
+			/* Break from loop - only 1 handler allowed to support
+			 * given memory range
+			 */
+			break;
+		}
+	}
+
+	/* Return success for now */
+	return status;
+}
+
+int register_mmio_emulation_handler(struct vm *vm,
+	hv_mem_io_handler_t read_write, uint64_t start,
+	uint64_t end, void *handler_private_data)
+{
+	int status = -EINVAL;
+	struct mem_io_node *mmio_node;
+
+	if (vm->hw.created_vcpus > 0 && vm->hw.vcpu_array[0]->launched) {
+		ASSERT(0, "register mmio handler after vm launched");
+		return status;
+	}
+
+	/* Ensure both a read/write handler and range check function exist */
+	if ((read_write != HV_NULL) && (end > start)) {
+		/* Allocate memory for node */
+		mmio_node =
+		(struct mem_io_node *)calloc(1, sizeof(struct mem_io_node));
+
+		/* Ensure memory successfully allocated */
+		if (mmio_node) {
+			/* Fill in information for this node */
+			mmio_node->read_write = read_write;
+			mmio_node->handler_private_data = handler_private_data;
+
+			INIT_LIST_HEAD(&mmio_node->list);
+			list_add(&mmio_node->list, &vm->mmio_list);
+
+			mmio_node->range_start = start;
+			mmio_node->range_end = end;
+			ept_mmap(vm, start, start, end - start,
+					MAP_UNMAP, 0);
+
+			/* Return success */
+			status = 0;
+		}
+	}
+
+	/* Return status to caller */
+	return status;
+}
+
+void unregister_mmio_emulation_handler(struct vm *vm, uint64_t start,
+	uint64_t end)
+{
+	struct list_head *pos, *tmp;
+	struct mem_io_node *mmio_node;
+
+	list_for_each_safe(pos, tmp, &vm->mmio_list) {
+		mmio_node = list_entry(pos, struct mem_io_node, list);
+
+		if ((mmio_node->range_start == start) &&
+			(mmio_node->range_end == end)) {
+			/* assume only one entry found in mmio_list */
+			list_del_init(&mmio_node->list);
+			free(mmio_node);
+			break;
+		}
+	}
+}
+
+int dm_emulate_mmio_post(struct vcpu *vcpu)
+{
+	int ret = 0;
+	int cur = vcpu->vcpu_id;
+	struct vhm_request_buffer *req_buf =
+		(void *)HPA2HVA(vcpu->vm->sw.req_buf);
+
+	vcpu->req.reqs.mmio_request.value =
+		req_buf->req_queue[cur].reqs.mmio_request.value;
+
+	/* VHM emulation data already copy to req, mark to free slot now */
+	req_buf->req_queue[cur].valid = false;
+
+	if (req_buf->req_queue[cur].processed == REQ_STATE_SUCCESS)
+		vcpu->mmio.mmio_status = MMIO_TRANS_VALID;
+	else {
+		vcpu->mmio.mmio_status = MMIO_TRANS_INVALID;
+		goto out;
+	}
+
+	if (vcpu->mmio.read_write == HV_MEM_IO_READ) {
+		vcpu->mmio.value = vcpu->req.reqs.mmio_request.value;
+		/* Emulate instruction and update vcpu register set */
+		ret = emulate_instruction(vcpu, &vcpu->mmio);
+		if (ret != 0)
+			goto out;
+	}
+
+out:
+	return ret;
+}
+
+static int dm_emulate_mmio_pre(struct vcpu *vcpu, uint64_t exit_qual)
+{
+	int status;
+
+	status = analyze_instruction(vcpu, &vcpu->mmio);
+	if (status != 0)
+		return status;
+
+	if (vcpu->mmio.read_write == HV_MEM_IO_WRITE) {
+		status = emulate_instruction(vcpu, &vcpu->mmio);
+		if (status != 0)
+			return status;
+		vcpu->req.reqs.mmio_request.value = vcpu->mmio.value;
+		/* XXX: write access while EPT perm RX -> WP */
+		if ((exit_qual & 0x38) == 0x28)
+			vcpu->req.type = REQ_WP;
+	}
+
+	if (vcpu->req.type == 0)
+		vcpu->req.type = REQ_MMIO;
+	vcpu->req.reqs.mmio_request.direction = vcpu->mmio.read_write;
+	vcpu->req.reqs.mmio_request.address = (long)vcpu->mmio.paddr;
+	vcpu->req.reqs.mmio_request.size = vcpu->mmio.access_size;
+
+	return 0;
+}
+
+int ept_violation_handler(struct vcpu *vcpu)
+{
+	int status;
+	uint64_t exit_qual;
+	uint64_t gpa;
+
+	/* Handle page fault from guest */
+	exit_qual = exec_vmread(VMX_EXIT_QUALIFICATION);
+
+	memset(&vcpu->req, 0, sizeof(struct vhm_request));
+
+	/* Specify if read or write operation */
+	if (exit_qual & 0x2) {
+		/* Write operation */
+		vcpu->mmio.read_write = HV_MEM_IO_WRITE;
+
+		/* Get write value from appropriate register in context */
+		/* TODO: Need to figure out how to determine value being
+		 * written
+		 */
+		vcpu->mmio.value = 0;
+	} else {
+		/* Read operation */
+		vcpu->mmio.read_write = HV_MEM_IO_READ;
+
+		/* Get sign extension requirements for read */
+		/* TODO: Need to determine how sign extension is determined for
+		 * reads
+		 */
+		vcpu->mmio.sign_extend_read = 0;
+	}
+
+	/* Get the guest physical address */
+	gpa = exec_vmread64(VMX_GUEST_PHYSICAL_ADDR_FULL);
+
+	TRACE_2L(TRC_VMEXIT_EPT_VIOLATION, exit_qual, gpa);
+
+	/* Adjust IPA appropriately and OR page offset to get full IPA of abort
+	 */
+	vcpu->mmio.paddr = gpa;
+
+	/* Check if the MMIO access has a HV registered handler */
+	status = check_hv_mmio_range((struct vm *) vcpu->vm, &vcpu->mmio);
+
+	if (status == true) {
+		/* Fetch and decode current vcpu instruction */
+		status = analyze_instruction(vcpu, &vcpu->mmio);
+
+		if (status != 0)
+			goto out;
+
+		if (vcpu->mmio.read_write == HV_MEM_IO_WRITE) {
+			status = emulate_instruction(vcpu, &vcpu->mmio);
+			if (status != 0)
+				goto out;
+		}
+
+		/* Call generic memory emulation handler
+		 * For MMIO write, call hv_emulate_mmio after
+		 * instruction emulation. For MMIO read,
+		 * call hv_emulate_mmio at first.
+		 */
+		status = hv_emulate_mmio(vcpu, &vcpu->mmio);
+
+		if (vcpu->mmio.read_write == HV_MEM_IO_READ) {
+			/* Emulate instruction and update vcpu register set */
+			status = emulate_instruction(vcpu, &vcpu->mmio);
+			if (status != 0)
+				goto out;
+		}
+	} else {
+		/*
+		 * No mmio handler from HV side, search from VHM in Dom0
+		 *
+		 * ACRN insert request to VHM and inject upcall
+		 * For MMIO write, ask DM to run MMIO emulation after
+		 * instruction emulation. For MMIO read, ask DM to run MMIO
+		 * emulation at first.
+		 */
+		status = dm_emulate_mmio_pre(vcpu, exit_qual);
+		if (status != 0)
+			goto out;
+		status = acrn_insert_request_wait(vcpu, &vcpu->req);
+	}
+
+	return status;
+
+out:
+	pr_fatal("Guest Linear Address: 0x%016llx",
+			exec_vmread(VMX_GUEST_LINEAR_ADDR));
+
+	pr_fatal("Guest Physical Address address: 0x%016llx",
+			gpa);
+
+	ASSERT(status == true, "EPT violation");
+
+	return status;
+}
+
+int ept_misconfig_handler(__unused struct vcpu *vcpu)
+{
+	int status;
+
+	status = -EINVAL;
+
+	/* TODO - EPT Violation handler */
+	pr_info("%s, Guest linear address: 0x%016llx ",
+			__func__, exec_vmread64(VMX_GUEST_LINEAR_ADDR));
+
+	pr_info("%s, Guest physical address: 0x%016llx ",
+			__func__, exec_vmread64(VMX_GUEST_PHYSICAL_ADDR_FULL));
+
+	ASSERT(status == 0, "EPT Misconfiguration is not handled.\n");
+
+	TRACE_2L(TRC_VMEXIT_EPT_MISCONFIGURATION, 0, 0);
+
+	return status;
+}
+
+
+int ept_mmap(struct vm *vm, uint64_t hpa,
+	uint64_t gpa, uint64_t size, uint32_t type, uint32_t prot)
+{
+	struct map_params map_params;
+	int i;
+	struct vcpu *vcpu;
+
+	/* Setup memory map parameters */
+	map_params.page_table_type = PT_EPT;
+	if (vm->arch_vm.ept) {
+		map_params.pml4_base = vm->arch_vm.ept;
+		map_params.pml4_inverted = vm->arch_vm.m2p;
+	} else {
+		map_params.pml4_base =
+			alloc_paging_struct();
+		vm->arch_vm.ept = map_params.pml4_base;
+		map_params.pml4_inverted = alloc_paging_struct();
+		vm->arch_vm.m2p = map_params.pml4_inverted;
+	}
+
+	if (type == MAP_MEM || type == MAP_MMIO) {
+		map_mem(&map_params, (void *)hpa,
+			(void *)gpa, size, prot);
+
+	} else if (type == MAP_UNMAP) {
+		unmap_mem(&map_params, (void *)hpa, (void *)gpa,
+				size, prot);
+	} else
+		ASSERT(0, "unknown map type");
+
+	foreach_vcpu(i, vm, vcpu) {
+		vcpu_make_request(vcpu, ACRN_REQUEST_TLB_FLUSH);
+	}
+
+	dev_dbg(ACRN_DBG_EPT, "ept map: %s hpa: 0x%016llx gpa: 0x%016llx ",
+			type == MAP_UNMAP ? "unmap" : "map", hpa, gpa);
+	dev_dbg(ACRN_DBG_EPT, "size: 0x%016llx prot: 0x%x\n", size, prot);
+
+	return 0;
+}
--- a/hypervisor/arch/x86/gdt.c
+++ b/hypervisor/arch/x86/gdt.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2018 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <hv_lib.h>
+#include <cpu.h>
+#include <gdt.h>
+
+DEFINE_CPU_DATA(struct tss_64, tss);
+DEFINE_CPU_DATA(struct host_gdt, gdt);
+DEFINE_CPU_DATA(uint8_t[STACK_SIZE], mc_stack) __aligned(16);
+DEFINE_CPU_DATA(uint8_t[STACK_SIZE], df_stack) __aligned(16);
+DEFINE_CPU_DATA(uint8_t[STACK_SIZE], sf_stack) __aligned(16);
+
+static void set_tss_desc(union tss_64_descriptor *desc,
+		void *tss, int tss_limit, int type)
+{
+	uint32_t u1, u2, u3;
+
+	u1 = ((uint64_t)tss << 16) & 0xFFFFFFFF;
+	u2 = (uint64_t)tss & 0xFF000000;
+	u3 = ((uint64_t)tss & 0x00FF0000) >> 16;
+
+
+	desc->low32.value = u1 | (tss_limit & 0xFFFF);
+	desc->base_addr_63_32 = (uint32_t)((uint64_t)tss >> 32);
+	desc->high32.value = (u2 | ((uint32_t)type << 8) | 0x8000 | u3);
+}
+
+void load_gdtr_and_tr(void)
+{
+	struct host_gdt *gdt = &get_cpu_var(gdt);
+	struct host_gdt_descriptor gdtr;
+	struct tss_64 *tss = &get_cpu_var(tss);
+
+	/* first entry is not used */
+	gdt->rsvd = 0xAAAAAAAAAAAAAAAA;
+	/* ring 0 code sel descriptor */
+	gdt->host_gdt_code_descriptor.value = 0x00Af9b000000ffff;
+	/* ring 0 data sel descriptor */
+	gdt->host_gdt_data_descriptor.value = 0x00cf93000000ffff;
+
+	tss->ist1 = (uint64_t)get_cpu_var(mc_stack) + STACK_SIZE;
+	tss->ist2 = (uint64_t)get_cpu_var(df_stack) + STACK_SIZE;
+	tss->ist3 = (uint64_t)get_cpu_var(sf_stack) + STACK_SIZE;
+	tss->ist4 = 0L;
+
+	/* tss descriptor */
+	set_tss_desc(&gdt->host_gdt_tss_descriptors,
+		(void *)tss, sizeof(struct tss_64), TSS_AVAIL);
+
+	gdtr.len = sizeof(struct host_gdt) - 1;
+	gdtr.gdt = gdt;
+
+	asm volatile ("lgdt %0" ::"m"(gdtr));
+
+	CPU_LTR_EXECUTE(HOST_GDT_RING0_CPU_TSS_SEL);
+}
--- a/hypervisor/arch/x86/guest/guest.c
+++ b/hypervisor/arch/x86/guest/guest.c
@@ -0,0 +1,389 @@
+/*
+ * Copyright (C) 2018 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <hypervisor.h>
+#include <hv_lib.h>
+#include <acrn_common.h>
+#include <hv_arch.h>
+#include <bsp_cfg.h>
+#include <bsp_extern.h>
+#include <acrn_hv_defs.h>
+#include <hv_debug.h>
+#include <multiboot.h>
+
+#define BOOT_ARGS_LOAD_ADDR				0x24EFC000
+
+#define ACRN_DBG_GUEST	6
+
+/* for VM0 e820 */
+uint32_t e820_entries;
+struct e820_entry e820[E820_MAX_ENTRIES];
+struct e820_mem_params e820_mem;
+
+inline bool
+is_vm0(struct vm *vm)
+{
+	return (vm->attr.boot_idx & 0x7F) == 0;
+}
+
+inline struct vcpu *vcpu_from_vid(struct vm *vm, int vcpu_id)
+{
+	int i;
+	struct vcpu *vcpu;
+
+	foreach_vcpu(i, vm, vcpu) {
+		if (vcpu->vcpu_id == vcpu_id)
+			return vcpu;
+	}
+
+	return NULL;
+}
+
+inline struct vcpu *vcpu_from_pid(struct vm *vm, int pcpu_id)
+{
+	int i;
+	struct vcpu *vcpu;
+
+	foreach_vcpu(i, vm, vcpu) {
+		if (vcpu->pcpu_id == pcpu_id)
+			return vcpu;
+	}
+
+	return NULL;
+}
+
+inline struct vcpu *get_primary_vcpu(struct vm *vm)
+{
+	int i;
+	struct vcpu *vcpu;
+
+	foreach_vcpu(i, vm, vcpu) {
+		if (is_vcpu_bsp(vcpu))
+			return vcpu;
+	}
+
+	return NULL;
+}
+
+inline uint64_t vcpumask2pcpumask(struct vm *vm, uint64_t vdmask)
+{
+	int vcpu_id;
+	uint64_t dmask = 0;
+	struct vcpu *vcpu;
+
+	while ((vcpu_id = bitmap_ffs(&vdmask)) >= 0) {
+		bitmap_clr(vcpu_id, &vdmask);
+		vcpu = vcpu_from_vid(vm, vcpu_id);
+		ASSERT(vcpu, "vcpu_from_vid failed");
+		bitmap_set(vcpu->pcpu_id, &dmask);
+	}
+
+	return dmask;
+}
+
+inline bool vm_lapic_disabled(struct vm *vm)
+{
+	int i;
+	struct vcpu *vcpu;
+
+	foreach_vcpu(i, vm, vcpu) {
+		if (vlapic_enabled(vcpu->arch_vcpu.vlapic))
+			return false;
+	}
+
+	return true;
+}
+
+int init_vm0_boot_info(struct vm *vm)
+{
+	struct multiboot_module *mods = NULL;
+	struct multiboot_info *mbi = NULL;
+
+	if (!is_vm0(vm)) {
+		pr_err("just for vm0 to get info!");
+		return -EINVAL;
+	}
+
+	if (boot_regs[0] != MULTIBOOT_INFO_MAGIC) {
+		ASSERT(0, "no multiboot info found");
+		return -EINVAL;
+	}
+
+	mbi = (struct multiboot_info *)((uint64_t)boot_regs[1]);
+
+	dev_dbg(ACRN_DBG_GUEST, "Multiboot detected, flag=0x%x", mbi->mi_flags);
+	if (!(mbi->mi_flags & MULTIBOOT_INFO_HAS_MODS)) {
+		ASSERT(0, "no sos kernel info found");
+		return -EINVAL;
+	}
+
+	dev_dbg(ACRN_DBG_GUEST, "mod counts=%d\n", mbi->mi_mods_count);
+
+	/* mod[0] is for kernel&cmdline, other mod for ramdisk/firmware info*/
+	mods = (struct multiboot_module *)(uint64_t)mbi->mi_mods_addr;
+
+	dev_dbg(ACRN_DBG_GUEST, "mod0 start=0x%x, end=0x%x",
+		mods[0].mm_mod_start, mods[0].mm_mod_end);
+	dev_dbg(ACRN_DBG_GUEST, "cmd addr=0x%x, str=%s", mods[0].mm_string,
+		(char *) (uint64_t)mods[0].mm_string);
+
+	vm->sw.kernel_type = VM_LINUX_GUEST;
+	vm->sw.kernel_info.kernel_src_addr =
+		(void *)(uint64_t)mods[0].mm_mod_start;
+	vm->sw.kernel_info.kernel_size =
+		mods[0].mm_mod_end - mods[0].mm_mod_start;
+	vm->sw.kernel_info.kernel_load_addr =
+		(void *)(uint64_t)mods[0].mm_mod_start;
+
+	vm->sw.linux_info.bootargs_src_addr =
+		(void *)(uint64_t)mods[0].mm_string;
+	vm->sw.linux_info.bootargs_load_addr =
+		(void *)BOOT_ARGS_LOAD_ADDR;
+	vm->sw.linux_info.bootargs_size =
+		strnlen_s((char *)(uint64_t) mods[0].mm_string, MEM_2K);
+
+	return 0;
+}
+
+uint64_t gva2gpa(struct vm *vm, uint64_t cr3, uint64_t gva)
+{
+	int level, index, shift;
+	uint64_t *base, addr, entry, page_size;
+	uint64_t gpa = 0;
+
+	addr = cr3;
+
+	for (level = 3; level >= 0; level--) {
+		addr = addr & IA32E_REF_MASK;
+		base = GPA2HVA(vm, addr);
+		ASSERT(base != NULL, "invalid ptp base.");
+		shift = level * 9 + 12;
+		index = (gva >> shift) & 0x1FF;
+		page_size = 1UL << shift;
+
+		entry = base[index];
+		if (level > 0 && (entry & MMU_32BIT_PDE_PS) != 0)
+			break;
+		addr = entry;
+	}
+
+	entry >>= shift; entry <<= (shift + 12); entry >>= 12;
+	gpa = entry | (gva & (page_size - 1));
+
+	return gpa;
+}
+
+void init_e820(void)
+{
+	unsigned int i;
+
+	if (boot_regs[0] == MULTIBOOT_INFO_MAGIC) {
+		struct multiboot_info *mbi =
+			(struct multiboot_info *)((uint64_t)boot_regs[1]);
+		pr_info("Multiboot info detected\n");
+		if (mbi->mi_flags & 0x40) {
+			struct multiboot_mmap *mmap =
+				(struct multiboot_mmap *)
+				((uint64_t)mbi->mi_mmap_addr);
+			e820_entries = mbi->mi_mmap_length/
+				sizeof(struct multiboot_mmap);
+			if (e820_entries > E820_MAX_ENTRIES) {
+				pr_err("Too many E820 entries %d\n",
+					e820_entries);
+				e820_entries = E820_MAX_ENTRIES;
+			}
+			dev_dbg(ACRN_DBG_GUEST,
+				"mmap length 0x%x addr 0x%x entries %d\n",
+				mbi->mi_mmap_length, mbi->mi_mmap_addr,
+				e820_entries);
+			for (i = 0; i < e820_entries; i++) {
+				e820[i].baseaddr = mmap[i].baseaddr;
+				e820[i].length = mmap[i].length;
+				e820[i].type = mmap[i].type;
+
+				dev_dbg(ACRN_DBG_GUEST,
+					"mmap table: %d type: 0x%x\n",
+					i, mmap[i].type);
+				dev_dbg(ACRN_DBG_GUEST,
+					"Base: 0x%016llx length: 0x%016llx",
+					mmap[i].baseaddr, mmap[i].length);
+			}
+		}
+	} else
+		ASSERT(0, "no multiboot info found");
+}
+
+
+void obtain_e820_mem_info(void)
+{
+	unsigned int i;
+	struct e820_entry *entry;
+
+	e820_mem.mem_bottom = UINT64_MAX;
+	e820_mem.mem_top = 0x00;
+	e820_mem.max_ram_blk_base = 0;
+	e820_mem.max_ram_blk_size = 0;
+
+	for (i = 0; i < e820_entries; i++) {
+		entry = &e820[i];
+		if (e820_mem.mem_bottom > entry->baseaddr)
+			e820_mem.mem_bottom = entry->baseaddr;
+
+		if (entry->baseaddr + entry->length
+				> e820_mem.mem_top) {
+			e820_mem.mem_top = entry->baseaddr
+				+ entry->length;
+		}
+
+		if (entry->baseaddr == UOS_DEFAULT_START_ADDR
+				&& entry->type == E820_TYPE_RAM) {
+			e820_mem.max_ram_blk_base =
+				entry->baseaddr;
+			e820_mem.max_ram_blk_size = entry->length;
+		}
+	}
+}
+
+static void rebuild_vm0_e820(void)
+{
+	unsigned int i;
+	uint64_t entry_start;
+	uint64_t entry_end;
+	uint64_t hv_start = CONFIG_RAM_START;
+	uint64_t hv_end  = hv_start + CONFIG_RAM_SIZE;
+	struct e820_entry *entry, new_entry = {0};
+
+	/* hypervisor mem need be filter out from e820 table
+	 * it's hv itself + other hv reserved mem like vgt etc
+	 */
+	for (i = 0; i < e820_entries; i++) {
+		entry = &e820[i];
+		entry_start = entry->baseaddr;
+		entry_end = entry->baseaddr + entry->length;
+
+		/* No need handle in these cases*/
+		if (entry->type != E820_TYPE_RAM || entry_end <= hv_start
+				|| entry_start >= hv_end) {
+			continue;
+		}
+
+		/* filter out hv mem and adjust length of this entry*/
+		if (entry_start < hv_start && entry_end <= hv_end) {
+			entry->length = hv_start - entry_start;
+			continue;
+		}
+		/* filter out hv mem and need to create a new entry*/
+		if (entry_start < hv_start && entry_end > hv_end) {
+			entry->length = hv_start - entry_start;
+			new_entry.baseaddr = hv_end;
+			new_entry.length = entry_end - hv_end;
+			new_entry.type = E820_TYPE_RAM;
+			continue;
+		}
+		/* This entry is within the range of hv mem
+		 * change to E820_TYPE_RESERVED
+		 */
+		if (entry_start >= hv_start && entry_end <= hv_end) {
+			entry->type = E820_TYPE_RESERVED;
+			continue;
+		}
+
+		if (entry_start >= hv_start && entry_start < hv_end
+				&& entry_end > hv_end) {
+			entry->baseaddr = hv_end;
+			entry->length = entry_end - hv_end;
+			continue;
+		}
+
+	}
+
+	if (new_entry.length > 0) {
+		e820_entries++;
+		ASSERT(e820_entries <= E820_MAX_ENTRIES,
+				"e820 entry overflow");
+		entry = &e820[e820_entries - 1];
+		entry->baseaddr = new_entry.baseaddr;
+		entry->length = new_entry.length;
+		entry->type = new_entry.type;
+	}
+
+}
+int prepare_vm0_memmap_and_e820(struct vm *vm)
+{
+	unsigned int i;
+	uint32_t attr_wb = (MMU_MEM_ATTR_READ |
+			MMU_MEM_ATTR_WRITE   |
+			MMU_MEM_ATTR_EXECUTE |
+			MMU_MEM_ATTR_WB_CACHE);
+	uint32_t attr_uc = (MMU_MEM_ATTR_READ |
+			MMU_MEM_ATTR_WRITE   |
+			MMU_MEM_ATTR_EXECUTE |
+			MMU_MEM_ATTR_UNCACHED);
+	struct e820_entry *entry;
+
+
+	ASSERT(is_vm0(vm), "This func only for vm0");
+
+	rebuild_vm0_e820();
+	dev_dbg(ACRN_DBG_GUEST,
+		"vm0: bottom memory - 0x%llx, top memory - 0x%llx\n",
+		e820_mem.mem_bottom, e820_mem.mem_top);
+
+	/* create real ept map for all ranges with UC */
+	ept_mmap(vm, e820_mem.mem_bottom, e820_mem.mem_bottom,
+			(e820_mem.mem_top - e820_mem.mem_bottom),
+			MAP_MMIO, attr_uc);
+
+	/* update ram entries to WB attr */
+	for (i = 0; i < e820_entries; i++) {
+		entry = &e820[i];
+		if (entry->type == E820_TYPE_RAM)
+			ept_mmap(vm, entry->baseaddr, entry->baseaddr,
+					entry->length, MAP_MEM, attr_wb);
+	}
+
+
+	dev_dbg(ACRN_DBG_GUEST, "VM0 e820 layout:\n");
+	for (i = 0; i < e820_entries; i++) {
+		entry = &e820[i];
+		dev_dbg(ACRN_DBG_GUEST,
+			"e820 table: %d type: 0x%x", i, entry->type);
+		dev_dbg(ACRN_DBG_GUEST,
+			"BaseAddress: 0x%016llx length: 0x%016llx\n",
+			entry->baseaddr, entry->length);
+	}
+
+	/* unmap hypervisor itself for safety
+	 * will cause EPT violation if sos accesses hv memory
+	 */
+	ept_mmap(vm, CONFIG_RAM_START, CONFIG_RAM_START,
+			CONFIG_RAM_SIZE, MAP_UNMAP, 0);
+	return 0;
+}
--- a/hypervisor/arch/x86/guest/instr_emul.c
+++ b/hypervisor/arch/x86/guest/instr_emul.c
--- a/hypervisor/arch/x86/guest/instr_emul.h
+++ b/hypervisor/arch/x86/guest/instr_emul.h
@@ -0,0 +1,95 @@
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * Copyright (c) 2017 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_VMM_INSTRUCTION_EMUL_H_
+#define	_VMM_INSTRUCTION_EMUL_H_
+
+/*
+ * Callback functions to read and write memory regions.
+ */
+typedef int (*mem_region_read_t)(struct vcpu *vcpu, uint64_t gpa,
+				 uint64_t *rval, int rsize, void *arg);
+
+typedef int (*mem_region_write_t)(struct vcpu *vcpu, uint64_t gpa,
+				  uint64_t wval, int wsize, void *arg);
+
+/*
+ * Emulate the decoded 'vie' instruction.
+ *
+ * The callbacks 'mrr' and 'mrw' emulate reads and writes to the memory region
+ * containing 'gpa'. 'mrarg' is an opaque argument that is passed into the
+ * callback functions.
+ *
+ * 'void *vm' should be 'struct vm *' when called from kernel context and
+ * 'struct vmctx *' when called from user context.
+ * s
+ */
+int vmm_emulate_instruction(struct vcpu *vcpu, uint64_t gpa, struct vie *vie,
+		struct vm_guest_paging *paging, mem_region_read_t mrr,
+		mem_region_write_t mrw, void *mrarg);
+
+int vie_update_register(struct vcpu *vcpu, enum vm_reg_name reg,
+		uint64_t val, int size);
+
+/*
+ * Returns 1 if an alignment check exception should be injected and 0 otherwise.
+ */
+int vie_alignment_check(int cpl, int operand_size, uint64_t cr0,
+	uint64_t rflags, uint64_t gla);
+
+/* Returns 1 if the 'gla' is not canonical and 0 otherwise. */
+int vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla);
+
+uint64_t vie_size2mask(int size);
+
+int vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg,
+	struct seg_desc *desc, uint64_t off, int length, int addrsize, int prot,
+	uint64_t *gla);
+
+void vie_init(struct vie *vie, const char *inst_bytes, int inst_length);
+
+/*
+ * Decode the instruction fetched into 'vie' so it can be emulated.
+ *
+ * 'gla' is the guest linear address provided by the hardware assist
+ * that caused the nested page table fault. It is used to verify that
+ * the software instruction decoding is in agreement with the hardware.
+ *
+ * Some hardware assists do not provide the 'gla' to the hypervisor.
+ * To skip the 'gla' verification for this or any other reason pass
+ * in VIE_INVALID_GLA instead.
+ */
+#define	VIE_INVALID_GLA		(1UL << 63)	/* a non-canonical address */
+int vmm_decode_instruction(struct vcpu *vcpu, uint64_t gla,
+		enum vm_cpu_mode cpu_mode, int csd, struct vie *vie);
+
+int emulate_instruction(struct vcpu *vcpu, struct mem_io *mmio);
+int analyze_instruction(struct vcpu *vcpu, struct mem_io *mmio);
+
+#endif	/* _VMM_INSTRUCTION_EMUL_H_ */
--- a/hypervisor/arch/x86/guest/instr_emul_wrapper.c
+++ b/hypervisor/arch/x86/guest/instr_emul_wrapper.c
@@ -0,0 +1,466 @@
+/*
+ * Copyright (C) 2018 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <hypervisor.h>
+#include <hv_lib.h>
+#include <acrn_common.h>
+#include <hv_arch.h>
+#include <bsp_extern.h>
+#include <hv_debug.h>
+
+#include "instr_emul_wrapper.h"
+#include "instr_emul.h"
+
+struct emul_cnx {
+	struct vie vie;
+	struct vm_guest_paging paging;
+	struct vcpu *vcpu;
+	struct mem_io *mmio;
+};
+
+static DEFINE_CPU_DATA(struct emul_cnx, g_inst_ctxt);
+
+static int
+encode_vmcs_seg_desc(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc);
+
+static int32_t
+get_vmcs_field(int ident);
+
+static bool
+is_segment_register(int reg);
+
+static bool
+is_descriptor_table(int reg);
+
+int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval)
+{
+	struct run_context *cur_context;
+
+	if (!vcpu)
+		return -EINVAL;
+	if ((reg >= VM_REG_LAST) || (reg < VM_REG_GUEST_RAX))
+		return -EINVAL;
+
+	if ((reg >= VM_REG_GUEST_RAX) && (reg <= VM_REG_GUEST_RDI)) {
+		cur_context =
+			&vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context];
+		*retval = cur_context->guest_cpu_regs.longs[reg];
+	} else if ((reg > VM_REG_GUEST_RDI) && (reg < VM_REG_LAST)) {
+		int32_t field = get_vmcs_field(reg);
+
+		if (field != -1)
+			*retval = exec_vmread(field);
+		else
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+int vm_set_register(struct vcpu *vcpu, int reg, uint64_t val)
+{
+	struct run_context *cur_context;
+
+	if (!vcpu)
+		return -EINVAL;
+	if ((reg >= VM_REG_LAST) || (reg < VM_REG_GUEST_RAX))
+		return -EINVAL;
+
+	if ((reg >= VM_REG_GUEST_RAX) && (reg <= VM_REG_GUEST_RDI)) {
+		cur_context =
+			&vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context];
+		cur_context->guest_cpu_regs.longs[reg] = val;
+	} else if ((reg > VM_REG_GUEST_RDI) && (reg < VM_REG_LAST)) {
+		int32_t field = get_vmcs_field(reg);
+
+		if (field != -1)
+			exec_vmwrite(field, val);
+		else
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+int vm_set_seg_desc(struct vcpu *vcpu, int seg, struct seg_desc *ret_desc)
+{
+	int error;
+	uint32_t base, limit, access;
+
+	if ((!vcpu) || (!ret_desc))
+		return -EINVAL;
+
+	if (!is_segment_register(seg) && !is_descriptor_table(seg))
+		return -EINVAL;
+
+	error = encode_vmcs_seg_desc(seg, &base, &limit, &access);
+	if ((error != 0) || (access == 0xffffffff))
+		return -EINVAL;
+
+	exec_vmwrite(base, ret_desc->base);
+	exec_vmwrite(limit, ret_desc->limit);
+	exec_vmwrite(access, ret_desc->access);
+
+	return 0;
+}
+
+int vm_get_seg_desc(struct vcpu *vcpu, int seg, struct seg_desc *desc)
+{
+	int error;
+	uint32_t base, limit, access;
+
+	if ((!vcpu) || (!desc))
+		return -EINVAL;
+
+	if (!is_segment_register(seg) && !is_descriptor_table(seg))
+		return -EINVAL;
+
+	error = encode_vmcs_seg_desc(seg, &base, &limit, &access);
+	if ((error != 0) || (access == 0xffffffff))
+		return -EINVAL;
+
+	desc->base = exec_vmread(base);
+	desc->limit = exec_vmread(limit);
+	desc->access = exec_vmread(access);
+
+	return 0;
+}
+
+int vm_restart_instruction(struct vcpu *vcpu)
+{
+	if (!vcpu)
+		return -EINVAL;
+
+	VCPU_RETAIN_RIP(vcpu);
+	return 0;
+}
+
+static bool is_descriptor_table(int reg)
+{
+	switch (reg) {
+	case VM_REG_GUEST_IDTR:
+	case VM_REG_GUEST_GDTR:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool is_segment_register(int reg)
+{
+	switch (reg) {
+	case VM_REG_GUEST_ES:
+	case VM_REG_GUEST_CS:
+	case VM_REG_GUEST_SS:
+	case VM_REG_GUEST_DS:
+	case VM_REG_GUEST_FS:
+	case VM_REG_GUEST_GS:
+	case VM_REG_GUEST_TR:
+	case VM_REG_GUEST_LDTR:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static int encode_vmcs_seg_desc(int seg, uint32_t *base, uint32_t *lim,
+		uint32_t *acc)
+{
+	switch (seg) {
+	case VM_REG_GUEST_ES:
+		*base = VMX_GUEST_ES_BASE;
+		*lim = VMX_GUEST_ES_LIMIT;
+		*acc = VMX_GUEST_ES_ATTR;
+		break;
+	case VM_REG_GUEST_CS:
+		*base = VMX_GUEST_CS_BASE;
+		*lim = VMX_GUEST_CS_LIMIT;
+		*acc = VMX_GUEST_CS_ATTR;
+		break;
+	case VM_REG_GUEST_SS:
+		*base = VMX_GUEST_SS_BASE;
+		*lim = VMX_GUEST_SS_LIMIT;
+		*acc = VMX_GUEST_SS_ATTR;
+		break;
+	case VM_REG_GUEST_DS:
+		*base = VMX_GUEST_DS_BASE;
+		*lim = VMX_GUEST_DS_LIMIT;
+		*acc = VMX_GUEST_DS_ATTR;
+		break;
+	case VM_REG_GUEST_FS:
+		*base = VMX_GUEST_FS_BASE;
+		*lim = VMX_GUEST_FS_LIMIT;
+		*acc = VMX_GUEST_FS_ATTR;
+		break;
+	case VM_REG_GUEST_GS:
+		*base = VMX_GUEST_GS_BASE;
+		*lim = VMX_GUEST_GS_LIMIT;
+		*acc = VMX_GUEST_GS_ATTR;
+		break;
+	case VM_REG_GUEST_TR:
+		*base = VMX_GUEST_TR_BASE;
+		*lim = VMX_GUEST_TR_LIMIT;
+		*acc = VMX_GUEST_TR_ATTR;
+		break;
+	case VM_REG_GUEST_LDTR:
+		*base = VMX_GUEST_LDTR_BASE;
+		*lim = VMX_GUEST_LDTR_LIMIT;
+		*acc = VMX_GUEST_LDTR_ATTR;
+		break;
+	case VM_REG_GUEST_IDTR:
+		*base = VMX_GUEST_IDTR_BASE;
+		*lim = VMX_GUEST_IDTR_LIMIT;
+		*acc = 0xffffffff;
+		break;
+	case VM_REG_GUEST_GDTR:
+		*base = VMX_GUEST_GDTR_BASE;
+		*lim = VMX_GUEST_GDTR_LIMIT;
+		*acc = 0xffffffff;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int32_t get_vmcs_field(int ident)
+{
+	switch (ident) {
+	case VM_REG_GUEST_CR0:
+		return VMX_GUEST_CR0;
+	case VM_REG_GUEST_CR3:
+		return VMX_GUEST_CR3;
+	case VM_REG_GUEST_CR4:
+		return VMX_GUEST_CR4;
+	case VM_REG_GUEST_DR7:
+		return VMX_GUEST_DR7;
+	case VM_REG_GUEST_RSP:
+		return VMX_GUEST_RSP;
+	case VM_REG_GUEST_RIP:
+		return VMX_GUEST_RIP;
+	case VM_REG_GUEST_RFLAGS:
+		return VMX_GUEST_RFLAGS;
+	case VM_REG_GUEST_ES:
+		return VMX_GUEST_ES_SEL;
+	case VM_REG_GUEST_CS:
+		return VMX_GUEST_CS_SEL;
+	case VM_REG_GUEST_SS:
+		return VMX_GUEST_SS_SEL;
+	case VM_REG_GUEST_DS:
+		return VMX_GUEST_DS_SEL;
+	case VM_REG_GUEST_FS:
+		return VMX_GUEST_FS_SEL;
+	case VM_REG_GUEST_GS:
+		return VMX_GUEST_GS_SEL;
+	case VM_REG_GUEST_TR:
+		return VMX_GUEST_TR_SEL;
+	case VM_REG_GUEST_LDTR:
+		return VMX_GUEST_LDTR_SEL;
+	case VM_REG_GUEST_EFER:
+		return VMX_GUEST_IA32_EFER_FULL;
+	case VM_REG_GUEST_PDPTE0:
+		return VMX_GUEST_PDPTE0_FULL;
+	case VM_REG_GUEST_PDPTE1:
+		return VMX_GUEST_PDPTE1_FULL;
+	case VM_REG_GUEST_PDPTE2:
+		return VMX_GUEST_PDPTE2_FULL;
+	case VM_REG_GUEST_PDPTE3:
+		return VMX_GUEST_PDPTE3_FULL;
+	default:
+		return -1;
+	}
+}
+
+static enum vm_cpu_mode get_vmx_cpu_mode(void)
+{
+	uint32_t csar;
+
+	if (exec_vmread(VMX_GUEST_IA32_EFER_FULL) & EFER_LMA) {
+		csar = exec_vmread(VMX_GUEST_CS_ATTR);
+		if (csar & 0x2000)
+			return CPU_MODE_64BIT;        /* CS.L = 1 */
+		else
+			return CPU_MODE_COMPATIBILITY;
+	} else if (exec_vmread(VMX_GUEST_CR0) & CR0_PE) {
+		return CPU_MODE_PROTECTED;
+	} else {
+		return CPU_MODE_REAL;
+	}
+}
+
+static void get_guest_paging_info(struct vcpu *vcpu, struct emul_cnx *emul_cnx)
+{
+	uint32_t cpl, csar;
+
+	ASSERT(emul_cnx != NULL && vcpu != NULL, "Error in input arguments");
+
+	csar = exec_vmread(VMX_GUEST_CS_ATTR);
+	cpl = (csar >> 5) & 3;
+	emul_cnx->paging.cr3 =
+		vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr3;
+	emul_cnx->paging.cpl = cpl;
+	emul_cnx->paging.cpu_mode = get_vmx_cpu_mode();
+	emul_cnx->paging.paging_mode = PAGING_MODE_FLAT;/*maybe change later*/
+}
+
+static int mmio_read(struct vcpu *vcpu, __unused uint64_t gpa, uint64_t *rval,
+		__unused int size, __unused void *arg)
+{
+	struct emul_cnx *emul_cnx;
+	struct mem_io *mmio;
+
+	if (!vcpu)
+		return -EINVAL;
+
+	emul_cnx = &per_cpu(g_inst_ctxt, vcpu->pcpu_id);
+	mmio = emul_cnx->mmio;
+
+	ASSERT(mmio != NULL, "invalid mmio when reading");
+
+	*rval = mmio->value;
+
+	return 0;
+}
+
+static int mmio_write(struct vcpu *vcpu, __unused uint64_t gpa, uint64_t wval,
+		__unused int size, __unused void *arg)
+{
+	struct emul_cnx *emul_cnx;
+	struct mem_io *mmio;
+
+	if (!vcpu)
+		return -EINVAL;
+
+	emul_cnx = &per_cpu(g_inst_ctxt, vcpu->pcpu_id);
+	mmio = emul_cnx->mmio;
+
+	ASSERT(mmio != NULL, "invalid mmio when writing");
+
+	mmio->value = wval;
+
+	return 0;
+}
+
+void vm_gva2gpa(struct vcpu *vcpu, uint64_t gva, uint64_t *gpa)
+{
+
+	ASSERT(gpa != NULL, "Error in input arguments");
+	ASSERT(vcpu != NULL,
+		"Invalid vcpu id when gva2gpa");
+
+	*gpa = gva2gpa(vcpu->vm,
+		vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr3, gva);
+}
+
+int analyze_instruction(struct vcpu *vcpu, struct mem_io *mmio)
+{
+	uint64_t guest_rip_gva, guest_rip_gpa;
+	char *guest_rip_hva;
+	struct emul_cnx *emul_cnx;
+	uint32_t csar;
+	int retval = 0;
+	enum vm_cpu_mode cpu_mode;
+	int i;
+
+	guest_rip_gva =
+		vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].rip;
+
+	guest_rip_gpa = gva2gpa(vcpu->vm,
+		vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr3,
+		guest_rip_gva);
+
+	guest_rip_hva = GPA2HVA(vcpu->vm, guest_rip_gpa);
+	emul_cnx = &per_cpu(g_inst_ctxt, vcpu->pcpu_id);
+	emul_cnx->mmio = mmio;
+	emul_cnx->vcpu = vcpu;
+
+	/* by now, HVA <-> HPA is 1:1 mapping, so use hpa is OK*/
+	vie_init(&emul_cnx->vie, guest_rip_hva,
+		vcpu->arch_vcpu.inst_len);
+
+	get_guest_paging_info(vcpu, emul_cnx);
+	csar = exec_vmread(VMX_GUEST_CS_ATTR);
+	cpu_mode = get_vmx_cpu_mode();
+
+	mmio->private_data = emul_cnx;
+
+	retval = vmm_decode_instruction(vcpu, guest_rip_gva,
+			cpu_mode, SEG_DESC_DEF32(csar), &emul_cnx->vie);
+
+	mmio->access_size = emul_cnx->vie.opsize;
+
+	if (retval != 0) {
+		/* dump to instruction when decoding failed */
+		pr_err("decode following instruction failed @ 0x%016llx:",
+			exec_vmread(VMX_GUEST_RIP));
+		for (i = 0; i < emul_cnx->vie.num_valid; i++) {
+			if (i >= VIE_INST_SIZE)
+				break;
+
+			if (i == 0)
+				pr_err("\n");
+			pr_err("%d=%02hhx ",
+				i, emul_cnx->vie.inst[i]);
+		}
+	}
+
+	return retval;
+}
+
+int emulate_instruction(struct vcpu *vcpu, struct mem_io *mmio)
+{
+	struct emul_cnx *emul_cnx = (struct emul_cnx *)(mmio->private_data);
+	struct vm_guest_paging *paging = &emul_cnx->paging;
+	int i, retval = 0;
+	uint64_t gpa = mmio->paddr;
+	mem_region_read_t mread = mmio_read;
+	mem_region_write_t mwrite = mmio_write;
+
+	retval = vmm_emulate_instruction(vcpu, gpa,
+			&emul_cnx->vie, paging, mread, mwrite, &retval);
+
+	if (retval != 0) {
+		/* dump to instruction when emulation failed */
+		pr_err("emulate following instruction failed @ 0x%016llx:",
+			exec_vmread(VMX_GUEST_RIP));
+		for (i = 0; i < emul_cnx->vie.num_valid; i++) {
+			if (i >= VIE_INST_SIZE)
+				break;
+
+			if (i == 0)
+				pr_err("\n");
+
+			pr_err("%d=%02hhx ",
+				i, emul_cnx->vie.inst[i]);
+		}
+	}
+	return retval;
+}
--- a/hypervisor/arch/x86/guest/instr_emul_wrapper.h
+++ b/hypervisor/arch/x86/guest/instr_emul_wrapper.h
@@ -0,0 +1,203 @@
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * Copyright (c) 2017 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <cpu.h>
+
+struct vie_op {
+	uint8_t		op_byte;	/* actual opcode byte */
+	uint8_t		op_type;	/* type of operation (e.g. MOV) */
+	uint16_t	op_flags;
+};
+
+#define	VIE_INST_SIZE	15
+struct vie {
+	uint8_t		inst[VIE_INST_SIZE];	/* instruction bytes */
+	uint8_t		num_valid;		/* size of the instruction */
+	uint8_t		num_processed;
+
+	uint8_t		addrsize:4, opsize:4;	/* address and operand sizes */
+	uint8_t		rex_w:1,		/* REX prefix */
+			rex_r:1,
+			rex_x:1,
+			rex_b:1,
+			rex_present:1,
+			repz_present:1,		/* REP/REPE/REPZ prefix */
+			repnz_present:1,	/* REPNE/REPNZ prefix */
+			opsize_override:1,	/* Operand size override */
+			addrsize_override:1,	/* Address size override */
+			segment_override:1;	/* Segment override */
+
+	uint8_t		mod:2,			/* ModRM byte */
+			reg:4,
+			rm:4;
+
+	uint8_t		ss:2,			/* SIB byte */
+			index:4,
+			base:4;
+
+	uint8_t		disp_bytes;
+	uint8_t		imm_bytes;
+
+	uint8_t		scale;
+	int		base_register;		/* VM_REG_GUEST_xyz */
+	int		index_register;		/* VM_REG_GUEST_xyz */
+	int		segment_register;	/* VM_REG_GUEST_xyz */
+
+	int64_t		displacement;		/* optional addr displacement */
+	int64_t		immediate;		/* optional immediate operand */
+
+	uint8_t		decoded;	/* set to 1 if successfully decoded */
+
+	struct vie_op	op;			/* opcode description */
+};
+
+#define	PSL_C		0x00000001	/* carry bit */
+#define	PSL_PF		0x00000004	/* parity bit */
+#define	PSL_AF		0x00000010	/* bcd carry bit */
+#define	PSL_Z		0x00000040	/* zero bit */
+#define	PSL_N		0x00000080	/* negative bit */
+#define	PSL_T		0x00000100	/* trace enable bit */
+#define	PSL_I		0x00000200	/* interrupt enable bit */
+#define	PSL_D		0x00000400	/* string instruction direction bit */
+#define	PSL_V		0x00000800	/* overflow bit */
+#define	PSL_IOPL	0x00003000	/* i/o privilege level */
+#define	PSL_NT		0x00004000	/* nested task bit */
+#define	PSL_RF		0x00010000	/* resume flag bit */
+#define	PSL_VM		0x00020000	/* virtual 8086 mode bit */
+#define	PSL_AC		0x00040000	/* alignment checking */
+#define	PSL_VIF		0x00080000	/* virtual interrupt enable */
+#define	PSL_VIP		0x00100000	/* virtual interrupt pending */
+#define	PSL_ID		0x00200000	/* identification bit */
+
+/*
+ * The 'access' field has the format specified in Table 21-2 of the Intel
+ * Architecture Manual vol 3b.
+ *
+ * XXX The contents of the 'access' field are architecturally defined except
+ * bit 16 - Segment Unusable.
+ */
+struct seg_desc {
+	uint64_t	base;
+	uint32_t	limit;
+	uint32_t	access;
+};
+
+
+/*
+ * Protections are chosen from these bits, or-ed together
+ */
+#define	PROT_NONE	0x00	/* no permissions */
+#define	PROT_READ	0x01	/* pages can be read */
+#define	PROT_WRITE	0x02	/* pages can be written */
+#define	PROT_EXEC	0x04	/* pages can be executed */
+
+#define	SEG_DESC_TYPE(access)		((access) & 0x001f)
+#define	SEG_DESC_DPL(access)		(((access) >> 5) & 0x3)
+#define	SEG_DESC_PRESENT(access)	(((access) & 0x0080) ? 1 : 0)
+#define	SEG_DESC_DEF32(access)		(((access) & 0x4000) ? 1 : 0)
+#define	SEG_DESC_GRANULARITY(access)	(((access) & 0x8000) ? 1 : 0)
+#define	SEG_DESC_UNUSABLE(access)	(((access) & 0x10000) ? 1 : 0)
+
+enum vm_cpu_mode {
+	CPU_MODE_REAL,
+	CPU_MODE_PROTECTED,
+	CPU_MODE_COMPATIBILITY,		/* IA-32E mode (CS.L = 0) */
+	CPU_MODE_64BIT,			/* IA-32E mode (CS.L = 1) */
+};
+
+enum vm_paging_mode {
+	PAGING_MODE_FLAT,
+	PAGING_MODE_32,
+	PAGING_MODE_PAE,
+	PAGING_MODE_64,
+};
+
+struct vm_guest_paging {
+	uint64_t	cr3;
+	int		cpl;
+	enum vm_cpu_mode cpu_mode;
+	enum vm_paging_mode paging_mode;
+};
+
+/*
+ * Identifiers for architecturally defined registers.
+ */
+enum vm_reg_name {
+	VM_REG_GUEST_RAX,
+	VM_REG_GUEST_RBX,
+	VM_REG_GUEST_RCX,
+	VM_REG_GUEST_RDX,
+	VM_REG_GUEST_RBP,
+	VM_REG_GUEST_RSI,
+	VM_REG_GUEST_R8,
+	VM_REG_GUEST_R9,
+	VM_REG_GUEST_R10,
+	VM_REG_GUEST_R11,
+	VM_REG_GUEST_R12,
+	VM_REG_GUEST_R13,
+	VM_REG_GUEST_R14,
+	VM_REG_GUEST_R15,
+	VM_REG_GUEST_RDI,
+	VM_REG_GUEST_CR0,
+	VM_REG_GUEST_CR3,
+	VM_REG_GUEST_CR4,
+	VM_REG_GUEST_DR7,
+	VM_REG_GUEST_RSP,
+	VM_REG_GUEST_RIP,
+	VM_REG_GUEST_RFLAGS,
+	VM_REG_GUEST_ES,
+	VM_REG_GUEST_CS,
+	VM_REG_GUEST_SS,
+	VM_REG_GUEST_DS,
+	VM_REG_GUEST_FS,
+	VM_REG_GUEST_GS,
+	VM_REG_GUEST_LDTR,
+	VM_REG_GUEST_TR,
+	VM_REG_GUEST_IDTR,
+	VM_REG_GUEST_GDTR,
+	VM_REG_GUEST_EFER,
+	VM_REG_GUEST_CR2,
+	VM_REG_GUEST_PDPTE0,
+	VM_REG_GUEST_PDPTE1,
+	VM_REG_GUEST_PDPTE2,
+	VM_REG_GUEST_PDPTE3,
+	VM_REG_GUEST_INTR_SHADOW,
+	VM_REG_LAST
+};
+
+typedef unsigned long u_long;
+
+int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval);
+int vm_set_register(struct vcpu *vcpu, int reg, uint64_t val);
+int vm_get_seg_desc(struct vcpu *vcpu, int reg,
+		struct seg_desc *ret_desc);
+int vm_set_seg_desc(struct vcpu *vcpu, int reg,
+		struct seg_desc *desc);
+int vm_restart_instruction(struct vcpu *vcpu);
+void vm_gva2gpa(struct vcpu *vcpu, uint64_t gla, uint64_t *gpa);
--- a/hypervisor/arch/x86/guest/time.h
+++ b/hypervisor/arch/x86/guest/time.h
@@ -0,0 +1,118 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * Copyright (c) 2018 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)time.h	8.5 (Berkeley) 5/4/95
+ * $FreeBSD$
+ */
+
+#ifndef _TIME_H_
+#define	_TIME_H_
+
+struct callout {
+	void    *c_arg;                         /* function argument */
+	void    (*c_func)(void *);              /* function to call */
+	short   c_flags;                        /* User State */
+};
+
+#define CALLOUT_ACTIVE          0x0002 /* callout is currently active */
+#define CALLOUT_PENDING         0x0004 /* callout is waiting for timeout */
+#define callout_active(c)       ((c)->c_flags & CALLOUT_ACTIVE)
+#define callout_deactivate(c)   ((c)->c_flags &= ~CALLOUT_ACTIVE)
+#define callout_pending(c)      ((c)->c_flags & CALLOUT_PENDING)
+
+typedef int64_t time_t;
+typedef int64_t sbintime_t;
+
+struct bintime {
+	time_t	sec;
+	uint64_t frac;
+};
+
+static inline void
+bintime_add(struct bintime *_bt, const struct bintime *_bt2)
+{
+	uint64_t _u;
+
+	_u = _bt->frac;
+	_bt->frac += _bt2->frac;
+	if (_u > _bt->frac)
+		_bt->sec++;
+	_bt->sec += _bt2->sec;
+}
+
+static inline void
+bintime_sub(struct bintime *_bt, const struct bintime *_bt2)
+{
+	uint64_t _u;
+
+	_u = _bt->frac;
+	_bt->frac -= _bt2->frac;
+	if (_u < _bt->frac)
+		_bt->sec--;
+	_bt->sec -= _bt2->sec;
+}
+
+static inline void
+bintime_mul(struct bintime *_bt, uint32_t _x)
+{
+	uint64_t _p1, _p2;
+
+	_p1 = (_bt->frac & 0xffffffffull) * _x;
+	_p2 = (_bt->frac >> 32) * _x + (_p1 >> 32);
+	_bt->sec *= _x;
+	_bt->sec += (_p2 >> 32);
+	_bt->frac = (_p2 << 32) | (_p1 & 0xffffffffull);
+}
+
+#define	bintime_cmp(a, b, cmp)						\
+	(((a)->sec == (b)->sec) ?					\
+	    ((a)->frac cmp(b)->frac) :					\
+	    ((a)->sec cmp(b)->sec))
+
+#define SBT_1S  ((sbintime_t)1 << 32)
+#define SBT_1US (SBT_1S / 1000000)
+
+#define BT2FREQ(bt)                                                     \
+	(((uint64_t)0x8000000000000000 + ((bt)->frac >> 2)) /           \
+	 ((bt)->frac >> 1))
+
+#define FREQ2BT(freq, bt)                                               \
+{                                                                       \
+	(bt)->sec = 0;                                                  \
+	(bt)->frac = ((uint64_t)0x8000000000000000  / (freq)) << 1;     \
+}
+
+static inline sbintime_t
+bttosbt(const struct bintime _bt)
+{
+
+	return (((sbintime_t)_bt.sec << 32) + (_bt.frac >> 32));
+}
+
+#endif /* !_TIME_H_ */
--- a/hypervisor/arch/x86/guest/vcpu.c
+++ b/hypervisor/arch/x86/guest/vcpu.c
@@ -0,0 +1,357 @@
+/*
+ * Copyright (C) 2018 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <hypervisor.h>
+#include <hv_lib.h>
+#include <acrn_common.h>
+#include <hv_arch.h>
+#include <schedule.h>
+#include <hv_debug.h>
+
+vm_sw_loader_t vm_sw_loader;
+
+/***********************************************************************
+ *  vcpu_id/pcpu_id mapping table:
+ *
+ * if
+ *     VM0_CPUS[2] = {0, 2} , VM1_CPUS[2] = {3, 1};
+ * then
+ *     for physical CPU 0 : vcpu->pcpu_id = 0, vcpu->vcpu_id = 0, vmid = 0;
+ *     for physical CPU 2 : vcpu->pcpu_id = 2, vcpu->vcpu_id = 1, vmid = 0;
+ *     for physical CPU 3 : vcpu->pcpu_id = 3, vcpu->vcpu_id = 0, vmid = 1;
+ *     for physical CPU 1 : vcpu->pcpu_id = 1, vcpu->vcpu_id = 1, vmid = 1;
+ *
+ ***********************************************************************/
+int create_vcpu(int cpu_id, struct vm *vm, struct vcpu **rtn_vcpu_handle)
+{
+	struct vcpu *vcpu;
+
+	ASSERT(vm != NULL, "");
+	ASSERT(rtn_vcpu_handle != NULL, "");
+
+	pr_info("Creating VCPU %d", cpu_id);
+
+	/* Allocate memory for VCPU */
+	vcpu = calloc(1, sizeof(struct vcpu));
+	ASSERT(vcpu != NULL, "");
+
+	/* Initialize the physical CPU ID for this VCPU */
+	vcpu->pcpu_id = cpu_id;
+
+	/* Initialize the parent VM reference */
+	vcpu->vm = vm;
+
+	/* Initialize the virtual ID for this VCPU */
+	/* FIXME:
+	 * We have assumption that we always destroys vcpus in one
+	 * shot (like when vm is destroyed). If we need to support
+	 * specific vcpu destroy on fly, this vcpu_id assignment
+	 * needs revise.
+	 */
+
+	/*
+	 * vcpu->vcpu_id = vm->hw.created_vcpus;
+	 * vm->hw.created_vcpus++;
+	 */
+	vcpu->vcpu_id = atomic_xadd_int(&vm->hw.created_vcpus, 1);
+	/* vm->hw.vcpu_array[vcpu->vcpu_id] = vcpu; */
+	atomic_store_rel_64(
+		(unsigned long *)&vm->hw.vcpu_array[vcpu->vcpu_id],
+		(unsigned long)vcpu);
+
+	ASSERT(vcpu->vcpu_id < vm->hw.num_vcpus,
+			"Allocated vcpu_id is out of range!");
+
+	per_cpu(vcpu, cpu_id) = vcpu;
+
+	pr_info("PCPU%d is working as VM%d VCPU%d, Role: %s",
+			vcpu->pcpu_id, vcpu->vm->attr.id, vcpu->vcpu_id,
+			is_vcpu_bsp(vcpu) ? "PRIMARY" : "SECONDARY");
+
+	/* Is this VCPU a VM BSP, create page hierarchy for this VM */
+	if (is_vcpu_bsp(vcpu)) {
+		/* Set up temporary guest page tables */
+		vm->arch_vm.guest_pml4 = create_guest_paging(vm);
+		pr_info("VM *d VCPU %d CR3: 0x%016llx ",
+			vm->attr.id, vcpu->vcpu_id, vm->arch_vm.guest_pml4);
+	}
+
+	/* Allocate VMCS region for this VCPU */
+	vcpu->arch_vcpu.vmcs = alloc_page();
+	ASSERT(vcpu->arch_vcpu.vmcs != NULL, "");
+
+	/* Memset VMCS region for this VCPU */
+	memset(vcpu->arch_vcpu.vmcs, 0, CPU_PAGE_SIZE);
+
+	/* Initialize exception field in VCPU context */
+	vcpu->arch_vcpu.exception_info.exception = -1;
+
+	/* Initialize cur context */
+	vcpu->arch_vcpu.cur_context = NORMAL_WORLD;
+
+	/* Create per vcpu vlapic */
+	vlapic_create(vcpu);
+
+	/* Populate the return handle */
+	*rtn_vcpu_handle = vcpu;
+
+	vcpu->launched = false;
+	vcpu->paused_cnt = 0;
+	vcpu->running = 0;
+	vcpu->ioreq_pending = 0;
+	vcpu->arch_vcpu.nr_sipi = 0;
+	vcpu->pending_pre_work = 0;
+	vcpu->state = VCPU_INIT;
+
+	return 0;
+}
+
+int start_vcpu(struct vcpu *vcpu)
+{
+	uint64_t rip, instlen;
+	struct run_context *cur_context =
+		&vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context];
+	int64_t status = 0;
+
+	ASSERT(vcpu != NULL, "Incorrect arguments");
+
+	/* If this VCPU is not already launched, launch it */
+	if (!vcpu->launched) {
+		pr_info("VM %d Starting VCPU %d",
+				vcpu->vm->attr.id, vcpu->vcpu_id);
+
+		/* Set vcpu launched */
+		vcpu->launched = true;
+
+		/* avoid VMCS recycling RSB usage, set IBPB.
+		 * NOTE: this should be done for any time vmcs got switch
+		 * currently, there is no other place to do vmcs switch
+		 * Please add IBPB set for future vmcs switch case(like trusty)
+		 */
+		if (ibrs_type == IBRS_RAW)
+			msr_write(MSR_IA32_PRED_CMD, PRED_SET_IBPB);
+
+		/* Launch the VM */
+		status = vmx_vmrun(cur_context, VM_LAUNCH, ibrs_type);
+
+		/* See if VM launched successfully */
+		if (status == 0) {
+			if (is_vcpu_bsp(vcpu)) {
+				pr_info("VM %d VCPU %d successfully launched",
+					vcpu->vm->attr.id, vcpu->vcpu_id);
+			}
+		}
+	} else {
+		/* This VCPU was already launched, check if the last guest
+		 * instruction needs to be repeated and resume VCPU accordingly
+		 */
+		instlen = vcpu->arch_vcpu.inst_len;
+		rip = cur_context->rip;
+		exec_vmwrite(VMX_GUEST_RIP, ((rip + instlen) &
+				0xFFFFFFFFFFFFFFFF));
+
+		/* Resume the VM */
+		status = vmx_vmrun(cur_context, VM_RESUME, ibrs_type);
+	}
+
+	/* Save guest CR3 register */
+	cur_context->cr3 = exec_vmread(VMX_GUEST_CR3);
+
+	/* Obtain current VCPU instruction pointer and length */
+	cur_context->rip = exec_vmread(VMX_GUEST_RIP);
+	vcpu->arch_vcpu.inst_len = exec_vmread(VMX_EXIT_INSTR_LEN);
+
+	cur_context->rsp = exec_vmread(VMX_GUEST_RSP);
+	cur_context->rflags = exec_vmread(VMX_GUEST_RFLAGS);
+
+	/* Obtain VM exit reason */
+	vcpu->arch_vcpu.exit_reason = exec_vmread(VMX_EXIT_REASON);
+
+	if (status != 0) {
+		/* refer to 64-ia32 spec section 24.9.1 volume#3 */
+		if (vcpu->arch_vcpu.exit_reason & VMX_VMENTRY_FAIL)
+			pr_fatal("vmentry fail reason=%lx", vcpu->arch_vcpu.exit_reason);
+		else
+			pr_fatal("vmexit fail err_inst=%lx", exec_vmread(VMX_INSTR_ERROR));
+
+		ASSERT(status == 0, "vm fail");
+	}
+
+	return status;
+}
+
+int shutdown_vcpu(__unused struct vcpu *vcpu)
+{
+	/* TODO : Implement VCPU shutdown sequence */
+
+	return 0;
+}
+
+int destroy_vcpu(struct vcpu *vcpu)
+{
+	ASSERT(vcpu != NULL, "Incorrect arguments");
+
+	/* vcpu->vm->hw.vcpu_array[vcpu->vcpu_id] = NULL; */
+	atomic_store_rel_64(
+		(unsigned long *)&vcpu->vm->hw.vcpu_array[vcpu->vcpu_id],
+		(unsigned long)NULL);
+
+	atomic_subtract_int(&vcpu->vm->hw.created_vcpus, 1);
+
+	vlapic_free(vcpu);
+	free(vcpu->arch_vcpu.vmcs);
+	free(vcpu->guest_msrs);
+	free_pcpu(vcpu->pcpu_id);
+	free(vcpu);
+
+	return 0;
+}
+
+/* NOTE:
+ * vcpu should be paused before call this function.
+ */
+void reset_vcpu(struct vcpu *vcpu)
+{
+	struct vlapic *vlapic;
+
+	pr_dbg("vcpu%d reset", vcpu->vcpu_id);
+	ASSERT(vcpu->state != VCPU_RUNNING,
+			"reset vcpu when it's running");
+
+	if (vcpu->state == VCPU_INIT)
+		return;
+
+	vcpu->state = VCPU_INIT;
+
+	vcpu->launched = false;
+	vcpu->paused_cnt = 0;
+	vcpu->running = 0;
+	vcpu->ioreq_pending = 0;
+	vcpu->arch_vcpu.nr_sipi = 0;
+	vcpu->pending_pre_work = 0;
+	vlapic = vcpu->arch_vcpu.vlapic;
+	vlapic_init(vlapic);
+}
+
+void init_vcpu(struct vcpu *vcpu)
+{
+	if (is_vcpu_bsp(vcpu))
+		vcpu->arch_vcpu.cpu_mode = PAGE_PROTECTED_MODE;
+	else
+		vcpu->arch_vcpu.cpu_mode = REAL_MODE;
+	/* init_vmcs is delayed to vcpu vmcs launch first time */
+}
+
+void pause_vcpu(struct vcpu *vcpu, enum vcpu_state new_state)
+{
+	int pcpu_id = get_cpu_id();
+
+	pr_dbg("vcpu%d paused, new state: %d",
+		vcpu->vcpu_id, new_state);
+
+	vcpu->prev_state = vcpu->state;
+	vcpu->state = new_state;
+
+	get_schedule_lock(pcpu_id);
+	if (atomic_load_acq_32(&vcpu->running) == 1) {
+		remove_vcpu_from_runqueue(vcpu);
+		make_reschedule_request(vcpu);
+		release_schedule_lock(pcpu_id);
+
+		if (vcpu->pcpu_id != pcpu_id) {
+			while (atomic_load_acq_32(&vcpu->running) == 1)
+				__asm__ __volatile("pause" ::: "memory");
+		}
+	} else {
+		remove_vcpu_from_runqueue(vcpu);
+		release_schedule_lock(pcpu_id);
+	}
+}
+
+void resume_vcpu(struct vcpu *vcpu)
+{
+	pr_dbg("vcpu%d resumed", vcpu->vcpu_id);
+
+	vcpu->state = vcpu->prev_state;
+
+	get_schedule_lock(vcpu->pcpu_id);
+	if (vcpu->state == VCPU_RUNNING) {
+		add_vcpu_to_runqueue(vcpu);
+		make_reschedule_request(vcpu);
+	}
+	release_schedule_lock(vcpu->pcpu_id);
+}
+
+void schedule_vcpu(struct vcpu *vcpu)
+{
+	vcpu->state = VCPU_RUNNING;
+	pr_dbg("vcpu%d scheduled", vcpu->vcpu_id);
+
+	get_schedule_lock(vcpu->pcpu_id);
+	add_vcpu_to_runqueue(vcpu);
+	make_reschedule_request(vcpu);
+	release_schedule_lock(vcpu->pcpu_id);
+}
+
+/* help function for vcpu create */
+int prepare_vcpu(struct vm *vm, int pcpu_id)
+{
+	int ret = 0;
+	struct vcpu *vcpu = NULL;
+
+	ret = create_vcpu(pcpu_id, vm, &vcpu);
+	ASSERT(ret == 0, "vcpu create failed");
+
+	if (is_vcpu_bsp(vcpu)) {
+		/* Load VM SW */
+		if (!vm_sw_loader)
+			vm_sw_loader = general_sw_loader;
+		vm_sw_loader(vm, vcpu);
+		vcpu->arch_vcpu.cpu_mode = PAGE_PROTECTED_MODE;
+	} else {
+		vcpu->arch_vcpu.cpu_mode = REAL_MODE;
+	}
+
+	/* init_vmcs is delayed to vcpu vmcs launch first time */
+
+	/* initialize the vcpu tsc aux */
+	vcpu->msr_tsc_aux_guest = vcpu->vcpu_id;
+
+	set_pcpu_used(pcpu_id);
+
+	INIT_LIST_HEAD(&vcpu->run_list);
+
+	return ret;
+}
+
+void request_vcpu_pre_work(struct vcpu *vcpu, int pre_work_id)
+{
+	bitmap_set(pre_work_id, &vcpu->pending_pre_work);
+}
--- a/hypervisor/arch/x86/guest/vioapic.c
+++ b/hypervisor/arch/x86/guest/vioapic.c
@@ -0,0 +1,662 @@
+/*-
+ * Copyright (c) 2013 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
+ * Copyright (c) 2013 Neel Natu <neel@freebsd.org>
+ * Copyright (c) 2017 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#define pr_fmt(fmt)	"vioapic: " fmt
+
+#include <hypervisor.h>
+#include <hv_lib.h>
+#include <acrn_common.h>
+#include <hv_arch.h>
+#include <hv_debug.h>
+
+#define	IOREGSEL	0x00
+#define	IOWIN		0x10
+#define	IOEOI		0x40
+
+#define REDIR_ENTRIES_HW	120 /* SOS align with native ioapic */
+#define REDIR_ENTRIES_UOS	24 /* UOS pins*/
+#define	RTBL_RO_BITS	((uint64_t)(IOAPIC_RTE_REM_IRR | IOAPIC_RTE_DELIVS))
+
+#define ACRN_DBG_IOAPIC	6
+
+struct vioapic {
+	struct vm	*vm;
+	spinlock_t	mtx;
+	uint32_t	id;
+	uint32_t	ioregsel;
+	struct {
+		uint64_t reg;
+		int	 acnt;	/* sum of pin asserts (+1) and deasserts (-1) */
+	} rtbl[REDIR_ENTRIES_HW];
+};
+
+#define	VIOAPIC_LOCK(vioapic)	spinlock_obtain(&((vioapic)->mtx))
+#define	VIOAPIC_UNLOCK(vioapic)	 spinlock_release(&((vioapic)->mtx))
+
+static inline const char *pinstate_str(bool asserted)
+{
+	return (asserted) ? "asserted" : "deasserted";
+}
+
+struct vioapic *
+vm_ioapic(struct vm *vm)
+{
+	return (struct vioapic *)vm->arch_vm.virt_ioapic;
+}
+
+static void
+vioapic_send_intr(struct vioapic *vioapic, int pin)
+{
+	int vector, delmode;
+	uint32_t low, high, dest;
+	bool level, phys;
+
+	if (pin < 0 || pin >= vioapic_pincount(vioapic->vm))
+		pr_err("vioapic_send_intr: invalid pin number %d", pin);
+
+	low = vioapic->rtbl[pin].reg;
+	high = vioapic->rtbl[pin].reg >> 32;
+
+	if ((low & IOAPIC_RTE_INTMASK) == IOAPIC_RTE_INTMSET) {
+		dev_dbg(ACRN_DBG_IOAPIC, "ioapic pin%d: masked", pin);
+		return;
+	}
+
+	phys = ((low & IOAPIC_RTE_DESTMOD) == IOAPIC_RTE_DESTPHY);
+	delmode = low & IOAPIC_RTE_DELMOD;
+	level = low & IOAPIC_RTE_TRGRLVL ? true : false;
+	if (level)
+		vioapic->rtbl[pin].reg |= IOAPIC_RTE_REM_IRR;
+
+	vector = low & IOAPIC_RTE_INTVEC;
+	dest = high >> APIC_ID_SHIFT;
+	vlapic_deliver_intr(vioapic->vm, level, dest, phys, delmode, vector);
+}
+
+static void
+vioapic_set_pinstate(struct vioapic *vioapic, int pin, bool newstate)
+{
+	int oldcnt, newcnt;
+	bool needintr;
+
+	if (pin < 0 || pin >= vioapic_pincount(vioapic->vm))
+		pr_err("vioapic_set_pinstate: invalid pin number %d", pin);
+
+	oldcnt = vioapic->rtbl[pin].acnt;
+	if (newstate)
+		vioapic->rtbl[pin].acnt++;
+	else
+		vioapic->rtbl[pin].acnt--;
+	newcnt = vioapic->rtbl[pin].acnt;
+
+	if (newcnt < 0) {
+		pr_err("ioapic pin%d: bad acnt %d", pin, newcnt);
+	}
+
+	needintr = false;
+	if (oldcnt == 0 && newcnt == 1) {
+		needintr = true;
+		dev_dbg(ACRN_DBG_IOAPIC, "ioapic pin%d: asserted", pin);
+	} else if (oldcnt == 1 && newcnt == 0) {
+		dev_dbg(ACRN_DBG_IOAPIC, "ioapic pin%d: deasserted", pin);
+	} else {
+		dev_dbg(ACRN_DBG_IOAPIC, "ioapic pin%d: %s, ignored, acnt %d",
+		    pin, pinstate_str(newstate), newcnt);
+	}
+
+	if (needintr)
+		vioapic_send_intr(vioapic, pin);
+}
+
+enum irqstate {
+	IRQSTATE_ASSERT,
+	IRQSTATE_DEASSERT,
+	IRQSTATE_PULSE
+};
+
+static int
+vioapic_set_irqstate(struct vm *vm, int irq, enum irqstate irqstate)
+{
+	struct vioapic *vioapic;
+
+	if (irq < 0 || irq >= vioapic_pincount(vm))
+		return -EINVAL;
+
+	vioapic = vm_ioapic(vm);
+
+	VIOAPIC_LOCK(vioapic);
+	switch (irqstate) {
+	case IRQSTATE_ASSERT:
+		vioapic_set_pinstate(vioapic, irq, true);
+		break;
+	case IRQSTATE_DEASSERT:
+		vioapic_set_pinstate(vioapic, irq, false);
+		break;
+	case IRQSTATE_PULSE:
+		vioapic_set_pinstate(vioapic, irq, true);
+		vioapic_set_pinstate(vioapic, irq, false);
+		break;
+	default:
+		panic("vioapic_set_irqstate: invalid irqstate %d", irqstate);
+	}
+	VIOAPIC_UNLOCK(vioapic);
+
+	return 0;
+}
+
+int
+vioapic_assert_irq(struct vm *vm, int irq)
+{
+	return vioapic_set_irqstate(vm, irq, IRQSTATE_ASSERT);
+}
+
+int
+vioapic_deassert_irq(struct vm *vm, int irq)
+{
+	return vioapic_set_irqstate(vm, irq, IRQSTATE_DEASSERT);
+}
+
+int
+vioapic_pulse_irq(struct vm *vm, int irq)
+{
+	return vioapic_set_irqstate(vm, irq, IRQSTATE_PULSE);
+}
+
+/*
+ * Reset the vlapic's trigger-mode register to reflect the ioapic pin
+ * configuration.
+ */
+void
+vioapic_update_tmr(struct vcpu *vcpu)
+{
+	struct vioapic *vioapic;
+	struct vlapic *vlapic;
+	uint32_t low;
+	int delmode, pin, vector;
+	bool level;
+
+	vlapic = vcpu->arch_vcpu.vlapic;
+	vioapic = vm_ioapic(vcpu->vm);
+
+	VIOAPIC_LOCK(vioapic);
+	for (pin = 0; pin < vioapic_pincount(vioapic->vm); pin++) {
+		low = vioapic->rtbl[pin].reg;
+
+		level = low & IOAPIC_RTE_TRGRLVL ? true : false;
+
+		/*
+		 * For a level-triggered 'pin' let the vlapic figure out if
+		 * an assertion on this 'pin' would result in an interrupt
+		 * being delivered to it. If yes, then it will modify the
+		 * TMR bit associated with this vector to level-triggered.
+		 */
+		delmode = low & IOAPIC_RTE_DELMOD;
+		vector = low & IOAPIC_RTE_INTVEC;
+		vlapic_set_tmr_one_vec(vlapic, delmode, vector, level);
+	}
+	vlapic_apicv_batch_set_tmr(vlapic);
+	VIOAPIC_UNLOCK(vioapic);
+}
+
+static uint32_t
+vioapic_read(struct vioapic *vioapic, uint32_t addr)
+{
+	int regnum, pin, rshift;
+
+	regnum = addr & 0xff;
+	switch (regnum) {
+	case IOAPIC_ID:
+		return vioapic->id;
+	case IOAPIC_VER:
+		return ((vioapic_pincount(vioapic->vm) - 1) << MAX_RTE_SHIFT)
+		       | 0x11;
+	case IOAPIC_ARB:
+		return vioapic->id;
+	default:
+		break;
+	}
+
+	/* redirection table entries */
+	if (regnum >= IOAPIC_REDTBL &&
+	    regnum < IOAPIC_REDTBL + vioapic_pincount(vioapic->vm) * 2) {
+		pin = (regnum - IOAPIC_REDTBL) / 2;
+		if ((regnum - IOAPIC_REDTBL) % 2)
+			rshift = 32;
+		else
+			rshift = 0;
+
+		return vioapic->rtbl[pin].reg >> rshift;
+	}
+
+	return 0;
+}
+
+/*
+ * version 0x20+ ioapic has EOI register. And cpu could write vector to this
+ * register to clear related IRR.
+ */
+static void
+vioapic_write_eoi(struct vioapic *vioapic, int32_t vector)
+{
+	struct vm *vm = vioapic->vm;
+	int pin;
+
+	if (vector < VECTOR_FOR_INTR_START || vector > NR_MAX_VECTOR)
+		pr_err("vioapic_process_eoi: invalid vector %d", vector);
+
+	VIOAPIC_LOCK(vioapic);
+	for (pin = 0; pin < vioapic_pincount(vm); pin++) {
+		if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_REM_IRR) == 0)
+			continue;
+		if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_INTVEC) !=
+				(uint64_t)vector)
+			continue;
+
+		vioapic->rtbl[pin].reg &= ~IOAPIC_RTE_REM_IRR;
+		if (vioapic->rtbl[pin].acnt > 0) {
+			dev_dbg(ACRN_DBG_IOAPIC,
+				"ioapic pin%d: asserted at eoi, acnt %d",
+				pin, vioapic->rtbl[pin].acnt);
+			vioapic_send_intr(vioapic, pin);
+		}
+	}
+	VIOAPIC_UNLOCK(vioapic);
+}
+
+static void
+vioapic_write(struct vioapic *vioapic, uint32_t addr, uint32_t data)
+{
+	uint64_t data64, mask64;
+	uint64_t last, new, changed;
+	int regnum, pin, lshift;
+
+	regnum = addr & 0xff;
+	switch (regnum) {
+	case IOAPIC_ID:
+		vioapic->id = data & APIC_ID_MASK;
+		break;
+	case IOAPIC_VER:
+	case IOAPIC_ARB:
+		/* readonly */
+		break;
+	default:
+		break;
+	}
+
+	/* redirection table entries */
+	if (regnum >= IOAPIC_REDTBL &&
+	    regnum < IOAPIC_REDTBL + vioapic_pincount(vioapic->vm) * 2) {
+		pin = (regnum - IOAPIC_REDTBL) / 2;
+		if ((regnum - IOAPIC_REDTBL) % 2)
+			lshift = 32;
+		else
+			lshift = 0;
+
+		last = new = vioapic->rtbl[pin].reg;
+
+		data64 = (uint64_t)data << lshift;
+		mask64 = (uint64_t)0xffffffff << lshift;
+		new &= ~mask64 | RTBL_RO_BITS;
+		new |= data64 & ~RTBL_RO_BITS;
+
+		changed = last ^ new;
+		/* pin0 from vpic mask/unmask */
+		if (pin == 0 && (changed & IOAPIC_RTE_INTMASK)) {
+			/* mask -> umask */
+			if ((last & IOAPIC_RTE_INTMASK) &&
+				((new & IOAPIC_RTE_INTMASK) == 0)) {
+				if ((vioapic->vm->vpic_wire_mode
+					== VPIC_WIRE_NULL) ||
+					(vioapic->vm->vpic_wire_mode
+					== VPIC_WIRE_INTR)) {
+					atomic_set_int(
+						&vioapic->vm->vpic_wire_mode,
+						VPIC_WIRE_IOAPIC);
+					dev_dbg(ACRN_DBG_IOAPIC,
+						"vpic wire mode -> IOAPIC");
+				} else {
+					pr_err("WARNING: invalid vpic wire mode change");
+					return;
+				}
+			/* unmask -> mask */
+			} else if (((last & IOAPIC_RTE_INTMASK) == 0) &&
+				(new & IOAPIC_RTE_INTMASK)) {
+				if (vioapic->vm->vpic_wire_mode
+					== VPIC_WIRE_IOAPIC) {
+					atomic_set_int(
+						&vioapic->vm->vpic_wire_mode,
+						VPIC_WIRE_INTR);
+					dev_dbg(ACRN_DBG_IOAPIC,
+						"vpic wire mode -> INTR");
+				}
+			}
+		}
+		vioapic->rtbl[pin].reg = new;
+		dev_dbg(ACRN_DBG_IOAPIC, "ioapic pin%d: redir table entry %#lx",
+		    pin, vioapic->rtbl[pin].reg);
+		/*
+		 * If any fields in the redirection table entry (except mask
+		 * or polarity) have changed then rendezvous all the vcpus
+		 * to update their vlapic trigger-mode registers.
+		 */
+		if (changed & ~(IOAPIC_RTE_INTMASK | IOAPIC_RTE_INTPOL)) {
+			int i;
+			struct vcpu *vcpu;
+
+			dev_dbg(ACRN_DBG_IOAPIC,
+			"ioapic pin%d: recalculate vlapic trigger-mode reg",
+			pin);
+
+			VIOAPIC_UNLOCK(vioapic);
+
+			foreach_vcpu(i, vioapic->vm, vcpu) {
+				vcpu_make_request(vcpu, ACRN_REQUEST_TMR_UPDATE);
+			}
+			VIOAPIC_LOCK(vioapic);
+		}
+
+		/*
+		 * Generate an interrupt if the following conditions are met:
+		 * - pin is not masked
+		 * - previous interrupt has been EOIed
+		 * - pin level is asserted
+		 */
+		if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_INTMASK) ==
+			IOAPIC_RTE_INTMCLR &&
+			(vioapic->rtbl[pin].reg & IOAPIC_RTE_REM_IRR) == 0 &&
+			(vioapic->rtbl[pin].acnt > 0)) {
+			dev_dbg(ACRN_DBG_IOAPIC,
+				"ioapic pin%d: asserted at rtbl write, acnt %d",
+				pin, vioapic->rtbl[pin].acnt);
+			vioapic_send_intr(vioapic, pin);
+		}
+
+		/* remap for active: interrupt mask -> unmask
+		 * remap for deactive: interrupt mask & vector set to 0
+		 */
+		data64 = vioapic->rtbl[pin].reg;
+		if ((((data64 & IOAPIC_RTE_INTMASK) == IOAPIC_RTE_INTMCLR)
+		  && ((last & IOAPIC_RTE_INTMASK) == IOAPIC_RTE_INTMSET))
+		  || (((data64 & IOAPIC_RTE_INTMASK) == IOAPIC_RTE_INTMSET)
+		  && ((vioapic->rtbl[pin].reg & IOAPIC_RTE_INTVEC) == 0))) {
+			/* VM enable intr */
+			struct ptdev_intx_info intx;
+
+			/* NOTE: only support max 256 pin */
+			intx.virt_pin = (uint8_t)pin;
+			intx.vpin_src = PTDEV_VPIN_IOAPIC;
+			ptdev_intx_pin_remap(vioapic->vm, &intx);
+		}
+	}
+}
+
+static int
+vioapic_mmio_rw(struct vioapic *vioapic, uint64_t gpa,
+		uint64_t *data, int size, bool doread)
+{
+	uint64_t offset;
+
+	offset = gpa - VIOAPIC_BASE;
+
+	/*
+	 * The IOAPIC specification allows 32-bit wide accesses to the
+	 * IOREGSEL (offset 0) and IOWIN (offset 16) registers.
+	 */
+	if (size != 4 || (offset != IOREGSEL && offset != IOWIN &&
+			offset != IOEOI)) {
+		if (doread)
+			*data = 0;
+		return 0;
+	}
+
+	VIOAPIC_LOCK(vioapic);
+	if (offset == IOREGSEL) {
+		if (doread)
+			*data = vioapic->ioregsel;
+		else
+			vioapic->ioregsel = *data;
+	} else if (offset == IOEOI) {
+		/* only need to handle write operation */
+		if (!doread)
+			vioapic_write_eoi(vioapic, *data);
+	} else {
+		if (doread) {
+			*data = vioapic_read(vioapic, vioapic->ioregsel);
+		} else {
+			vioapic_write(vioapic, vioapic->ioregsel,
+			    *data);
+		}
+	}
+	VIOAPIC_UNLOCK(vioapic);
+
+	return 0;
+}
+
+int
+vioapic_mmio_read(void *vm, uint64_t gpa, uint64_t *rval,
+		int size)
+{
+	int error;
+	struct vioapic *vioapic;
+
+	vioapic = vm_ioapic(vm);
+	error = vioapic_mmio_rw(vioapic, gpa, rval, size, true);
+	return error;
+}
+
+int
+vioapic_mmio_write(void *vm, uint64_t gpa, uint64_t wval,
+		int size)
+{
+	int error;
+	struct vioapic *vioapic;
+
+	vioapic = vm_ioapic(vm);
+	error = vioapic_mmio_rw(vioapic, gpa, &wval, size, false);
+	return error;
+}
+
+void
+vioapic_process_eoi(struct vm *vm, int vector)
+{
+	struct vioapic *vioapic;
+	int pin;
+
+	if (vector < VECTOR_FOR_INTR_START || vector > NR_MAX_VECTOR)
+		pr_err("vioapic_process_eoi: invalid vector %d", vector);
+
+	vioapic = vm_ioapic(vm);
+	dev_dbg(ACRN_DBG_IOAPIC, "ioapic processing eoi for vector %d", vector);
+
+	/* notify device to ack if assigned pin */
+	for (pin = 0; pin < vioapic_pincount(vm); pin++) {
+		if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_REM_IRR) == 0)
+			continue;
+		if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_INTVEC) !=
+				(uint64_t)vector)
+			continue;
+		ptdev_intx_ack(vm, pin, PTDEV_VPIN_IOAPIC);
+	}
+
+	/*
+	 * XXX keep track of the pins associated with this vector instead
+	 * of iterating on every single pin each time.
+	 */
+	VIOAPIC_LOCK(vioapic);
+	for (pin = 0; pin < vioapic_pincount(vm); pin++) {
+		if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_REM_IRR) == 0)
+			continue;
+		if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_INTVEC) !=
+				(uint64_t)vector)
+			continue;
+
+		vioapic->rtbl[pin].reg &= ~IOAPIC_RTE_REM_IRR;
+		if (vioapic->rtbl[pin].acnt > 0) {
+			dev_dbg(ACRN_DBG_IOAPIC,
+				"ioapic pin%d: asserted at eoi, acnt %d",
+				pin, vioapic->rtbl[pin].acnt);
+			vioapic_send_intr(vioapic, pin);
+		}
+	}
+	VIOAPIC_UNLOCK(vioapic);
+}
+
+struct vioapic *
+vioapic_init(struct vm *vm)
+{
+	int i;
+	struct vioapic *vioapic;
+
+	vioapic = calloc(1, sizeof(struct vioapic));
+	ASSERT(vioapic != NULL, "");
+
+	vioapic->vm = vm;
+	spinlock_init(&vioapic->mtx);
+
+	/* Initialize all redirection entries to mask all interrupts */
+	for (i = 0; i < vioapic_pincount(vioapic->vm); i++)
+		vioapic->rtbl[i].reg = 0x0001000000010000UL;
+
+	register_mmio_emulation_handler(vm,
+			vioapic_mmio_access_handler,
+			(uint64_t)VIOAPIC_BASE,
+			(uint64_t)VIOAPIC_BASE + VIOAPIC_SIZE,
+			(void *) 0);
+
+	return vioapic;
+}
+
+void
+vioapic_cleanup(struct vioapic *vioapic)
+{
+	unregister_mmio_emulation_handler(vioapic->vm,
+		(uint64_t)VIOAPIC_BASE,
+		(uint64_t)VIOAPIC_BASE + VIOAPIC_SIZE);
+	free(vioapic);
+}
+
+int
+vioapic_pincount(struct vm *vm)
+{
+	if (is_vm0(vm))
+		return REDIR_ENTRIES_HW;
+	else
+		return REDIR_ENTRIES_UOS;
+}
+
+int vioapic_mmio_access_handler(struct vcpu *vcpu, struct mem_io *mmio,
+		void *handler_private_data)
+{
+	struct vm *vm = vcpu->vm;
+	uint64_t gpa = mmio->paddr;
+	int ret = 0;
+
+	(void)handler_private_data;
+
+	/* Note all RW to IOAPIC are 32-Bit in size */
+	ASSERT(mmio->access_size == 4,
+			"All RW to LAPIC must be 32-bits in size");
+
+	if (mmio->read_write == HV_MEM_IO_READ) {
+		ret = vioapic_mmio_read(vm,
+				gpa,
+				&mmio->value,
+				mmio->access_size);
+		mmio->mmio_status = MMIO_TRANS_VALID;
+
+	} else if (mmio->read_write == HV_MEM_IO_WRITE) {
+		ret = vioapic_mmio_write(vm,
+				gpa,
+				mmio->value,
+				mmio->access_size);
+
+		mmio->mmio_status = MMIO_TRANS_VALID;
+	}
+
+	return ret;
+}
+
+bool vioapic_get_rte(struct vm *vm, int pin, void *rte)
+{
+	struct vioapic *vioapic;
+
+	vioapic = vm_ioapic(vm);
+	if (vioapic && rte) {
+		*(uint64_t *)rte = vioapic->rtbl[pin].reg;
+		return true;
+	} else
+		return false;
+}
+
+int get_vioapic_info(char *str, int str_max, int vmid)
+{
+	int pin, len, size = str_max, vector, delmode;
+	uint64_t rte;
+	uint32_t low, high, dest;
+	bool level, phys, remote_irr, mask;
+	struct vm *vm = get_vm_from_vmid(vmid);
+
+	if (!vm) {
+		len = snprintf(str, size,
+			"\r\nvm is not exist for vmid %d", vmid);
+		size -= len;
+		str += len;
+		goto END;
+	}
+
+	len = snprintf(str, size,
+		"\r\nPIN\tVEC\tDM\tDEST\tTM\tDELM\tIRR\tMASK");
+	size -= len;
+	str += len;
+
+	for (pin = 0 ; pin < vioapic_pincount(vm); pin++) {
+		vioapic_get_rte(vm, pin, (void *)&rte);
+		low = rte;
+		high = rte >> 32;
+		mask = ((low & IOAPIC_RTE_INTMASK) == IOAPIC_RTE_INTMSET);
+		remote_irr = ((low & IOAPIC_RTE_REM_IRR) == IOAPIC_RTE_REM_IRR);
+		phys = ((low & IOAPIC_RTE_DESTMOD) == IOAPIC_RTE_DESTPHY);
+		delmode = low & IOAPIC_RTE_DELMOD;
+		level = low & IOAPIC_RTE_TRGRLVL ? true : false;
+		vector = low & IOAPIC_RTE_INTVEC;
+		dest = high >> APIC_ID_SHIFT;
+
+		len = snprintf(str, size,
+				"\r\n%d\t0x%X\t%s\t0x%X\t%s\t%d\t%d\t%d",
+				pin, vector, phys ? "phys" : "logic",
+				dest, level ? "level" : "edge",
+				delmode >> 8, remote_irr, mask);
+		size -= len;
+		str += len;
+	}
+END:
+	snprintf(str, size, "\r\n");
+	return 0;
+}
--- a/hypervisor/arch/x86/guest/vlapic.c
+++ b/hypervisor/arch/x86/guest/vlapic.c
--- a/hypervisor/arch/x86/guest/vlapic_priv.h
+++ b/hypervisor/arch/x86/guest/vlapic_priv.h
@@ -0,0 +1,153 @@
+/*-
+ * Copyright (c) 2013 Neel Natu <neel@freebsd.org>
+ * Copyright (c) 2017 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VLAPIC_PRIV_H_
+#define	_VLAPIC_PRIV_H_
+
+/*
+ * APIC Register:		Offset	   Description
+ */
+#define APIC_OFFSET_ID		0x20	/* Local APIC ID		*/
+#define APIC_OFFSET_VER		0x30	/* Local APIC Version		*/
+#define APIC_OFFSET_TPR		0x80	/* Task Priority Register	*/
+#define APIC_OFFSET_APR		0x90	/* Arbitration Priority		*/
+#define APIC_OFFSET_PPR		0xA0	/* Processor Priority Register	*/
+#define APIC_OFFSET_EOI		0xB0	/* EOI Register			*/
+#define APIC_OFFSET_RRR		0xC0	/* Remote read			*/
+#define APIC_OFFSET_LDR		0xD0	/* Logical Destination		*/
+#define APIC_OFFSET_DFR		0xE0	/* Destination Format Register	*/
+#define APIC_OFFSET_SVR		0xF0	/* Spurious Vector Register	*/
+#define APIC_OFFSET_ISR0	0x100	/* In Service Register		*/
+#define APIC_OFFSET_ISR1	0x110
+#define APIC_OFFSET_ISR2	0x120
+#define APIC_OFFSET_ISR3	0x130
+#define APIC_OFFSET_ISR4	0x140
+#define APIC_OFFSET_ISR5	0x150
+#define APIC_OFFSET_ISR6	0x160
+#define APIC_OFFSET_ISR7	0x170
+#define APIC_OFFSET_TMR0	0x180	/* Trigger Mode Register	*/
+#define APIC_OFFSET_TMR1	0x190
+#define APIC_OFFSET_TMR2	0x1A0
+#define APIC_OFFSET_TMR3	0x1B0
+#define APIC_OFFSET_TMR4	0x1C0
+#define APIC_OFFSET_TMR5	0x1D0
+#define APIC_OFFSET_TMR6	0x1E0
+#define APIC_OFFSET_TMR7	0x1F0
+#define APIC_OFFSET_IRR0	0x200	/* Interrupt Request Register	*/
+#define APIC_OFFSET_IRR1	0x210
+#define APIC_OFFSET_IRR2	0x220
+#define APIC_OFFSET_IRR3	0x230
+#define APIC_OFFSET_IRR4	0x240
+#define APIC_OFFSET_IRR5	0x250
+#define APIC_OFFSET_IRR6	0x260
+#define APIC_OFFSET_IRR7	0x270
+#define APIC_OFFSET_ESR		0x280	/* Error Status Register	*/
+#define APIC_OFFSET_CMCI_LVT	0x2F0	/* Local Vector Table (CMCI)	*/
+#define APIC_OFFSET_ICR_LOW	0x300	/* Interrupt Command Register	*/
+#define APIC_OFFSET_ICR_HI	0x310
+#define APIC_OFFSET_TIMER_LVT	0x320	/* Local Vector Table (Timer)	*/
+#define APIC_OFFSET_THERM_LVT	0x330	/* Local Vector Table (Thermal)	*/
+#define APIC_OFFSET_PERF_LVT	0x340	/* Local Vector Table (PMC)	*/
+#define APIC_OFFSET_LINT0_LVT	0x350	/* Local Vector Table (LINT0)	*/
+#define APIC_OFFSET_LINT1_LVT	0x360	/* Local Vector Table (LINT1)	*/
+#define APIC_OFFSET_ERROR_LVT	0x370	/* Local Vector Table (ERROR)	*/
+#define APIC_OFFSET_TIMER_ICR	0x380	/* Timer's Initial Count	*/
+#define APIC_OFFSET_TIMER_CCR	0x390	/* Timer's Current Count	*/
+#define APIC_OFFSET_TIMER_DCR	0x3E0	/* Timer's Divide Configuration	*/
+#define	APIC_OFFSET_SELF_IPI	0x3F0	/* Self IPI register */
+
+/*
+ * 16 priority levels with at most one vector injected per level.
+ */
+#define	ISRVEC_STK_SIZE		(16 + 1)
+
+#define VLAPIC_MAXLVT_INDEX	APIC_LVT_CMCI
+
+struct vlapic;
+
+struct pir_desc {
+	uint64_t pir[4];
+	uint64_t pending;
+	uint64_t unused[3];
+} __aligned(64);
+
+struct vlapic_ops {
+	int (*apicv_set_intr_ready)
+		(struct vlapic *vlapic, int vector, bool level);
+	int (*apicv_pending_intr)(struct vlapic *vlapic, int *vecptr);
+	void (*apicv_intr_accepted)(struct vlapic *vlapic, int vector);
+	void (*apicv_post_intr)(struct vlapic *vlapic, int hostcpu);
+	void (*apicv_set_tmr)(struct vlapic *vlapic, int vector, bool level);
+	void (*apicv_batch_set_tmr)(struct vlapic *vlapic);
+	void (*enable_x2apic_mode)(struct vlapic *vlapic);
+};
+
+struct vlapic {
+	struct vm		*vm;
+	struct vcpu		*vcpu;
+	struct lapic		*apic_page;
+	struct pir_desc		*pir_desc;
+	struct vlapic_ops	ops;
+
+	uint32_t		esr_pending;
+	int			esr_firing;
+
+	struct callout	callout;	/* vlapic timer */
+	struct bintime	timer_fire_bt;	/* callout expiry time */
+	struct bintime	timer_freq_bt;	/* timer frequency */
+	struct bintime	timer_period_bt; /* timer period */
+	long		last_timer;	/* the last timer id */
+
+	spinlock_t	timer_mtx;
+
+	/*
+	 * The 'isrvec_stk' is a stack of vectors injected by the local apic.
+	 * A vector is popped from the stack when the processor does an EOI.
+	 * The vector on the top of the stack is used to compute the
+	 * Processor Priority in conjunction with the TPR.
+	 */
+	uint8_t		isrvec_stk[ISRVEC_STK_SIZE];
+	int		isrvec_stk_top;
+
+	uint64_t	msr_apicbase;
+
+	/*
+	 * Copies of some registers in the virtual APIC page. We do this for
+	 * a couple of different reasons:
+	 * - to be able to detect what changed (e.g. svr_last)
+	 * - to maintain a coherent snapshot of the register (e.g. lvt_last)
+	 */
+	uint32_t	svr_last;
+	uint32_t	lvt_last[VLAPIC_MAXLVT_INDEX + 1];
+	struct pir_desc	pir;
+};
+
+void vlapic_cleanup(struct vlapic *vlapic);
+
+#endif	/* _VLAPIC_PRIV_H_ */
--- a/hypervisor/arch/x86/guest/vm.c
+++ b/hypervisor/arch/x86/guest/vm.c
@@ -0,0 +1,324 @@
+/*
+ * Copyright (C) 2018 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <hv_lib.h>
+#include <acrn_common.h>
+#include <hv_arch.h>
+#include <bsp_extern.h>
+#include <hv_debug.h>
+
+/* Local variables */
+
+/* VMs list */
+struct list_head vm_list = {
+	.next = &vm_list,
+	.prev = &vm_list,
+};
+
+/* Lock for VMs list */
+spinlock_t vm_list_lock = {
+	.head = 0,
+	.tail = 0
+};
+
+/* used for vmid allocation. And this means the max vm number is 64 */
+static unsigned long vmid_bitmap;
+
+static void init_vm(struct vm_description *vm_desc,
+		struct vm *vm_handle)
+{
+	/* Populate VM attributes from VM description */
+	vm_handle->hw.num_vcpus = vm_desc->vm_hw_num_cores;
+	vm_handle->state_info.privilege = vm_desc->vm_state_info_privilege;
+	vm_handle->state_info.boot_count = 0;
+}
+
+/* return a pointer to the virtual machine structure associated with
+ * this VM ID
+ */
+struct vm *get_vm_from_vmid(int vm_id)
+{
+	struct vm *vm = NULL;
+	struct list_head *pos;
+
+	spinlock_obtain(&vm_list_lock);
+	list_for_each(pos, &vm_list) {
+		vm = list_entry(pos, struct vm, list);
+		if (vm->attr.id == vm_id) {
+			spinlock_release(&vm_list_lock);
+			return vm;
+		}
+	}
+	spinlock_release(&vm_list_lock);
+
+	return NULL;
+}
+
+int create_vm(struct vm_description *vm_desc, struct vm **rtn_vm)
+{
+	unsigned int id;
+	struct vm *vm;
+	int status = 0;
+
+	if ((vm_desc == NULL) || (rtn_vm == NULL))
+		status = -EINVAL;
+
+	if (status == 0) {
+		/* Allocate memory for virtual machine */
+		vm = calloc(1, sizeof(struct vm));
+		ASSERT(vm != NULL, "vm allocation failed");
+
+		/*
+		 * Map Virtual Machine to its VM Description
+		 */
+		init_vm(vm_desc, vm);
+
+
+		/* Init mmio list */
+		INIT_LIST_HEAD(&vm->mmio_list);
+
+		if (vm->hw.num_vcpus == 0)
+			vm->hw.num_vcpus = phy_cpu_num;
+
+		vm->hw.vcpu_array =
+			calloc(1, sizeof(struct vcpu *) * vm->hw.num_vcpus);
+		ASSERT(vm->hw.vcpu_array != NULL,
+			"vcpu_array allocation failed");
+
+		for (id = 0; id < sizeof(long) * 8; id++)
+			if (bitmap_test_and_set(id, &vmid_bitmap) == 0)
+				break;
+		vm->attr.id = vm->attr.boot_idx = id;
+		snprintf(&vm->attr.name[0], MAX_VM_NAME_LEN, "vm_%d",
+			vm->attr.id);
+
+		atomic_store_rel_int(&vm->hw.created_vcpus, 0);
+
+		/* gpa_lowtop are used for system start up */
+		vm->hw.gpa_lowtop = 0;
+		/* Only for SOS: Configure VM software information */
+		/* For UOS: This VM software information is configure in DM */
+		if (is_vm0(vm)) {
+			prepare_vm0_memmap_and_e820(vm);
+#ifndef CONFIG_EFI_STUB
+			status = init_vm0_boot_info(vm);
+#endif
+		} else {
+			/* populate UOS vm fields according to vm_desc */
+			vm->secure_world_enabled =
+				vm_desc->secure_world_enabled;
+			memcpy_s(&vm->GUID[0], sizeof(vm->GUID),
+						&vm_desc->GUID[0],
+						sizeof(vm_desc->GUID));
+		}
+
+		INIT_LIST_HEAD(&vm->list);
+		spinlock_obtain(&vm_list_lock);
+		list_add(&vm->list, &vm_list);
+		spinlock_release(&vm_list_lock);
+
+		/* Ensure VM software information obtained */
+		if (status == 0) {
+
+			/* Set up IO bit-mask such that VM exit occurs on
+			 * selected IO ranges
+			 */
+			setup_io_bitmap(vm);
+
+			/* Create virtual uart */
+			if (is_vm0(vm))
+				vm->vuart = vuart_init(vm);
+
+			vm->vpic = vpic_init(vm);
+
+			/* vpic wire_mode default is INTR */
+			vm->vpic_wire_mode = VPIC_WIRE_INTR;
+
+			/* Allocate full emulated vIOAPIC instance */
+			vm->arch_vm.virt_ioapic = vioapic_init(vm);
+
+			/* Populate return VM handle */
+			*rtn_vm = vm;
+			ptdev_vm_init(vm);
+			vm->sw.req_buf = 0;
+
+			vm->state = VM_CREATED;
+		}
+
+	}
+
+	/* Return status to caller */
+	return status;
+}
+
+int shutdown_vm(struct vm *vm)
+{
+	int i, status = 0;
+	struct vcpu *vcpu = NULL;
+
+	if (vm == NULL)
+		return -EINVAL;
+
+	pause_vm(vm);
+
+	/* Only allow shutdown paused vm */
+	if (vm->state != VM_PAUSED)
+		return -EINVAL;
+
+	foreach_vcpu(i, vm, vcpu) {
+		reset_vcpu(vcpu);
+		destroy_vcpu(vcpu);
+	}
+
+	spinlock_obtain(&vm_list_lock);
+	list_del_init(&vm->list);
+	spinlock_release(&vm_list_lock);
+
+	ptdev_vm_deinit(vm);
+
+	/* cleanup and free vioapic */
+	vioapic_cleanup(vm->arch_vm.virt_ioapic);
+
+	/* Free EPT allocated resources assigned to VM */
+	destroy_ept(vm);
+
+	/* Free MSR bitmap */
+	free(vm->arch_vm.msr_bitmap);
+
+	/* TODO: De-initialize I/O Emulation */
+	free_io_emulation_resource(vm);
+
+	/* Free iommu_domain */
+	if (vm->iommu_domain)
+		destroy_iommu_domain(vm->iommu_domain);
+
+	bitmap_clr(vm->attr.id, &vmid_bitmap);
+
+	if (vm->vpic)
+		vpic_cleanup(vm);
+
+	free(vm->hw.vcpu_array);
+
+	/* TODO: De-Configure HV-SW */
+	/* Deallocate VM */
+	free(vm);
+
+	/* Return status to caller */
+	return status;
+}
+
+int start_vm(struct vm *vm)
+{
+	struct vcpu *vcpu = NULL;
+
+	vm->state = VM_STARTED;
+
+	/* Only start BSP (vid = 0) and let BSP start other APs */
+	vcpu = vcpu_from_vid(vm, 0);
+	ASSERT(vcpu != NULL, "vm%d, vcpu0", vm->attr.id);
+	schedule_vcpu(vcpu);
+
+	return 0;
+}
+
+/*
+ * DM only pause vm for shutdown/reboot. If we need to
+ * extend the pause vm for DM, this API should be extended.
+ */
+int pause_vm(struct vm *vm)
+{
+	int i;
+	struct vcpu *vcpu = NULL;
+
+	if (vm->state == VM_PAUSED)
+		return 0;
+
+	vm->state = VM_PAUSED;
+
+	foreach_vcpu(i, vm, vcpu)
+		pause_vcpu(vcpu, VCPU_ZOMBIE);
+
+	return 0;
+}
+
+int vm_resume(struct vm *vm)
+{
+	int i;
+	struct vcpu *vcpu = NULL;
+
+	foreach_vcpu(i, vm, vcpu)
+		resume_vcpu(vcpu);
+
+	vm->state = VM_STARTED;
+
+	return 0;
+}
+
+/* Finally, we will remove the array and only maintain vm0 desc */
+struct vm_description *get_vm_desc(int idx)
+{
+	struct vm_description_array *vm_desc_array;
+
+	/* Obtain base of user defined VM description array data
+	 * structure
+	 */
+	vm_desc_array = (struct vm_description_array *)get_vm_desc_base();
+	/* Obtain VM description array base */
+	if (idx >= vm_desc_array->num_vm_desc)
+		return NULL;
+	else
+		return &vm_desc_array->vm_desc_array[idx];
+}
+
+/* Create vm/vcpu for vm0 */
+int prepare_vm0(void)
+{
+	int i, ret;
+	struct vm *vm = NULL;
+	struct vm_description *vm_desc = NULL;
+
+	vm_desc = get_vm_desc(0);
+	ASSERT(vm_desc, "get vm desc failed");
+	ret = create_vm(vm_desc, &vm);
+	ASSERT(ret == 0, "VM creation failed!");
+
+	prepare_vcpu(vm, vm_desc->vm_hw_logical_core_ids[0]);
+
+	/* Prepare the AP for vm0 */
+	for (i = 1; i < vm_desc->vm_hw_num_cores; i++)
+		prepare_vcpu(vm, vm_desc->vm_hw_logical_core_ids[i]);
+
+	/* start vm0 BSP automatically */
+	start_vm(vm);
+
+	pr_fatal("Start VM0");
+
+	return 0;
+}
--- a/hypervisor/arch/x86/guest/vmcall.c
+++ b/hypervisor/arch/x86/guest/vmcall.c
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2018 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <hv_lib.h>
+#include <acrn_common.h>
+#include <hv_arch.h>
+#include <hv_debug.h>
+#include <acrn_hv_defs.h>
+#include <hypercall.h>
+
+int vmcall_handler(struct vcpu *vcpu)
+{
+	int64_t ret = 0;
+	struct vm *vm = vcpu->vm;
+	struct run_context *cur_context =
+		&vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context];
+	/* hypercall ID from guest*/
+	uint64_t hypcall_id = cur_context->guest_cpu_regs.regs.r8;
+	/* hypercall param1 from guest*/
+	uint64_t param1 = cur_context->guest_cpu_regs.regs.rdi;
+	/* hypercall param2 from guest*/
+	uint64_t param2 = cur_context->guest_cpu_regs.regs.rsi;
+	/* hypercall param3 from guest, reserved*/
+	/* uint64_t param3 = cur_context->guest_cpu_regs.regs.rdx; */
+	/* hypercall param4 from guest, reserved*/
+	/* uint64_t param4 = cur_context->guest_cpu_regs.regs.rcx; */
+
+	/* Dispatch the hypercall handler */
+	switch (hypcall_id) {
+	case HC_GET_API_VERSION:
+		ret = hcall_get_api_version(vm, param1);
+		break;
+
+	case HC_CREATE_VM:
+		ret = hcall_create_vm(vm, param1);
+		break;
+
+	case HC_DESTROY_VM:
+		ret = hcall_destroy_vm(param1);
+		break;
+
+	case HC_START_VM:
+		ret = hcall_resume_vm(param1);
+		break;
+
+	case HC_PAUSE_VM:
+		ret = hcall_pause_vm(param1);
+		break;
+
+	case HC_CREATE_VCPU:
+		ret = hcall_create_vcpu(vm, param1, param2);
+		break;
+
+	case HC_ASSERT_IRQLINE:
+		ret = hcall_assert_irqline(vm, param1, param2);
+		break;
+
+	case HC_DEASSERT_IRQLINE:
+		ret = hcall_deassert_irqline(vm, param1, param2);
+		break;
+
+	case HC_PULSE_IRQLINE:
+		ret = hcall_pulse_irqline(vm, param1, param2);
+		break;
+
+	case HC_INJECT_MSI:
+		ret = hcall_inject_msi(vm, param1, param2);
+		break;
+
+	case HC_SET_IOREQ_BUFFER:
+		ret = hcall_set_ioreq_buffer(vm, param1, param2);
+		break;
+
+	case HC_NOTIFY_REQUEST_FINISH:
+		ret = hcall_notify_req_finish(param1, param2);
+		break;
+
+	case HC_VM_SET_MEMMAP:
+		ret = hcall_set_vm_memmap(vm, param1, param2);
+		break;
+
+	case HC_VM_PCI_MSIX_REMAP:
+		ret = hcall_remap_pci_msix(vm, param1, param2);
+		break;
+
+	case HC_VM_GPA2HPA:
+		ret = hcall_gpa_to_hpa(vm, param1, param2);
+		break;
+
+	case HC_ASSIGN_PTDEV:
+		ret = hcall_assign_ptdev(vm, param1, param2);
+		break;
+
+	case HC_DEASSIGN_PTDEV:
+		ret = hcall_deassign_ptdev(vm, param1, param2);
+		break;
+
+	case HC_SET_PTDEV_INTR_INFO:
+		ret = hcall_set_ptdev_intr_info(vm, param1, param2);
+		break;
+
+	case HC_RESET_PTDEV_INTR_INFO:
+		ret = hcall_reset_ptdev_intr_info(vm, param1, param2);
+		break;
+
+	case HC_SETUP_SBUF:
+		ret = hcall_setup_sbuf(vm, param1);
+		break;
+
+	default:
+		pr_err("op %d: Invalid hypercall\n", hypcall_id);
+		ret = -1;
+		break;
+	}
+
+	cur_context->guest_cpu_regs.regs.rax = ret;
+
+	TRACE_2L(TRC_VMEXIT_VMCALL, vm->attr.id, hypcall_id);
+
+	return 0;
+}
--- a/hypervisor/arch/x86/guest/vmsr.c
+++ b/hypervisor/arch/x86/guest/vmsr.c
@@ -0,0 +1,321 @@
+/*
+ * Copyright (C) 2018 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <hv_lib.h>
+#include <acrn_common.h>
+#include <hv_arch.h>
+#include <hv_debug.h>
+
+/*MRS need to be emulated, the order in this array better as freq of ops*/
+static const uint32_t emulated_msrs[] = {
+	MSR_IA32_TSC_DEADLINE,  /* Enable TSC_DEADLINE VMEXIT */
+
+/* following MSR not emulated now */
+/*
+ *	MSR_IA32_APIC_BASE,
+ *	MSR_IA32_SYSENTER_CS,
+ *	MSR_IA32_SYSENTER_ESP,
+ *	MSR_IA32_SYSENTER_EIP,
+ *	MSR_IA32_TSC_AUX,
+ *	MSR_IA32_TIME_STAMP_COUNTER,
+ */
+};
+
+/* the index is matched with emulated msrs array*/
+enum {
+	IDX_TSC_DEADLINE,
+
+	IDX_MAX_MSR
+};
+
+static void enable_msr_interception(uint8_t *bitmap, uint32_t msr)
+{
+	uint8_t *read_map;
+	uint8_t *write_map;
+	uint8_t value;
+	/* low MSR */
+	if (msr < 0x1FFF) {
+		read_map = bitmap;
+		write_map = bitmap + 2048;
+	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
+		read_map = bitmap + 1024;
+		write_map = bitmap + 3072;
+	} else {
+		pr_err("Invalid MSR");
+		return;
+	}
+
+	msr &= 0x1FFF;
+	value = read_map[(msr>>3)];
+	value |= 1<<(msr%8);
+	/* right now we trap for both r/w */
+	read_map[(msr>>3)] = value;
+	write_map[(msr>>3)] = value;
+}
+
+/* not used now just leave it for some cases it may be used as API*/
+void disable_msr_interception(uint8_t *bitmap, uint32_t msr)
+{
+	uint8_t *read_map;
+	uint8_t *write_map;
+	uint8_t value;
+	/* low MSR */
+	if (msr < 0x1FFF) {
+		read_map = bitmap;
+		write_map = bitmap + 2048;
+	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
+		read_map = bitmap + 1024;
+		write_map = bitmap + 3072;
+	} else {
+		pr_err("Invalid MSR");
+		return;
+	}
+
+	msr &= 0x1FFF;
+	value = read_map[(msr>>3)];
+	value &= ~(1<<(msr%8));
+	/* right now we trap for both r/w */
+	read_map[(msr>>3)] = value;
+	write_map[(msr>>3)] = value;
+}
+
+void init_msr_emulation(struct vcpu *vcpu)
+{
+	uint32_t i = 0;
+	uint32_t msrs_count =  ARRAY_SIZE(emulated_msrs);
+	void *msr_bitmap;
+	uint64_t value64;
+
+	ASSERT(msrs_count == IDX_MAX_MSR,
+		"MSR ID should be matched with emulated_msrs");
+
+	/*msr bitmap, just allocated/init once, and used for all vm's vcpu*/
+	if (is_vcpu_bsp(vcpu)) {
+
+		/* Allocate and initialize memory for MSR bitmap region*/
+		vcpu->vm->arch_vm.msr_bitmap = alloc_page();
+		ASSERT(vcpu->vm->arch_vm.msr_bitmap, "");
+		memset(vcpu->vm->arch_vm.msr_bitmap, 0x0, CPU_PAGE_SIZE);
+
+		msr_bitmap = vcpu->vm->arch_vm.msr_bitmap;
+
+		for (i = 0; i < msrs_count; i++)
+			enable_msr_interception(msr_bitmap, emulated_msrs[i]);
+
+		/* below MSR protected from guest OS, if access to inject gp*/
+		enable_msr_interception(msr_bitmap, MSR_IA32_MTRR_CAP);
+		enable_msr_interception(msr_bitmap, MSR_IA32_MTRR_DEF_TYPE);
+
+		for (i = MSR_IA32_MTRR_PHYSBASE_0;
+			i <= MSR_IA32_MTRR_PHYSMASK_9; i++) {
+			enable_msr_interception(msr_bitmap, i);
+		}
+
+		enable_msr_interception(msr_bitmap, MSR_IA32_MTRR_FIX64K_00000);
+		enable_msr_interception(msr_bitmap, MSR_IA32_MTRR_FIX16K_80000);
+		enable_msr_interception(msr_bitmap, MSR_IA32_MTRR_FIX16K_A0000);
+
+		for (i = MSR_IA32_MTRR_FIX4K_C0000;
+			i <= MSR_IA32_MTRR_FIX4K_F8000; i++) {
+			enable_msr_interception(msr_bitmap, i);
+		}
+	}
+
+	/* Set up MSR bitmap - pg 2904 24.6.9 */
+	value64 = (int64_t) vcpu->vm->arch_vm.msr_bitmap;
+	exec_vmwrite64(VMX_MSR_BITMAP_FULL, value64);
+	pr_dbg("VMX_MSR_BITMAP: 0x%016llx ", value64);
+
+	vcpu->guest_msrs = (uint64_t *)calloc(msrs_count, sizeof(uint64_t));
+
+	ASSERT(vcpu->guest_msrs != NULL, "");
+	memset(vcpu->guest_msrs, 0, msrs_count * sizeof(uint64_t));
+}
+
+int rdmsr_handler(struct vcpu *vcpu)
+{
+	uint32_t msr;
+	uint64_t v = 0;
+	uint32_t id;
+	int cur_context = vcpu->arch_vcpu.cur_context;
+
+	/* Read the msr value */
+	msr = vcpu->arch_vcpu.contexts[cur_context].guest_cpu_regs.regs.rcx;
+
+	/* Do the required processing for each msr case */
+	switch (msr) {
+	case MSR_IA32_TSC_DEADLINE:
+	{
+		v = vcpu->guest_msrs[IDX_TSC_DEADLINE];
+		break;
+	}
+
+	case MSR_IA32_MTRR_CAP:
+	case MSR_IA32_MTRR_DEF_TYPE:
+	case MSR_IA32_MTRR_PHYSBASE_0 ... MSR_IA32_MTRR_PHYSMASK_9:
+	case MSR_IA32_MTRR_FIX64K_00000 ... MSR_IA32_MTRR_FIX4K_F8000:
+	{
+		vcpu_inject_gp(vcpu);
+		break;
+	}
+
+	/* following MSR not emulated now just left for future */
+	case MSR_IA32_SYSENTER_CS:
+	{
+		v = exec_vmread(VMX_GUEST_IA32_SYSENTER_CS);
+		break;
+	}
+	case MSR_IA32_SYSENTER_ESP:
+	{
+		v = exec_vmread(VMX_GUEST_IA32_SYSENTER_ESP);
+		break;
+	}
+	case MSR_IA32_SYSENTER_EIP:
+	{
+		v = exec_vmread(VMX_GUEST_IA32_SYSENTER_EIP);
+		break;
+	}
+	case MSR_IA32_TSC_AUX:
+	{
+		v = vcpu->arch_vcpu.msr_tsc_aux;
+		break;
+	}
+	case MSR_IA32_TIME_STAMP_COUNTER:
+	{
+		/* Read the host TSC value */
+		CPU_RDTSCP_EXECUTE(&v, &id);
+
+		/* Add the TSC_offset to host TSC and return the value */
+		v += exec_vmread64(VMX_TSC_OFFSET_FULL);
+		break;
+	}
+	case MSR_IA32_APIC_BASE:
+	{
+		bool ret;
+		/* Read APIC base */
+		vlapic_rdmsr(vcpu, msr, &v, &ret);
+		break;
+	}
+	default:
+	{
+		pr_warn("rdmsr: %lx should not come here!", msr);
+		v = 0;
+		break;
+	}
+	}
+
+	/* Store the MSR contents in RAX and RDX */
+	vcpu->arch_vcpu.contexts[cur_context].guest_cpu_regs.regs.rax =
+					v & 0xffffffff;
+	vcpu->arch_vcpu.contexts[cur_context].guest_cpu_regs.regs.rdx = v >> 32;
+
+	TRACE_2L(TRC_VMEXIT_RDMSR, msr, v);
+
+	return 0;
+}
+
+int wrmsr_handler(struct vcpu *vcpu)
+{
+	uint32_t msr;
+	uint64_t v;
+	struct run_context *cur_context =
+		&vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context];
+
+	/* Read the MSR ID */
+	msr = cur_context->guest_cpu_regs.regs.rcx;
+
+	/* Get the MSR contents */
+	v = (((uint64_t) cur_context->guest_cpu_regs.regs.rdx) << 32) |
+	    ((uint64_t) cur_context->guest_cpu_regs.regs.rax);
+
+	/* Do the required processing for each msr case */
+	switch (msr) {
+	case MSR_IA32_TSC_DEADLINE:
+	{
+		bool ret;
+		/* Write APIC base */
+		vlapic_wrmsr(vcpu, msr, v, &ret);
+		vcpu->guest_msrs[IDX_TSC_DEADLINE] = v;
+		break;
+	}
+	case MSR_IA32_MTRR_CAP:
+	case MSR_IA32_MTRR_DEF_TYPE:
+	case MSR_IA32_MTRR_PHYSBASE_0 ... MSR_IA32_MTRR_PHYSMASK_9:
+	case MSR_IA32_MTRR_FIX64K_00000 ... MSR_IA32_MTRR_FIX4K_F8000:
+	{
+		vcpu_inject_gp(vcpu);
+		break;
+	}
+
+	/* following MSR not emulated now just left for future */
+	case MSR_IA32_SYSENTER_CS:
+	{
+		exec_vmwrite(VMX_GUEST_IA32_SYSENTER_CS, v);
+		break;
+	}
+	case MSR_IA32_SYSENTER_ESP:
+	{
+		exec_vmwrite(VMX_GUEST_IA32_SYSENTER_ESP, v);
+		break;
+	}
+	case MSR_IA32_SYSENTER_EIP:
+	{
+		exec_vmwrite(VMX_GUEST_IA32_SYSENTER_EIP, v);
+		break;
+	}
+	case MSR_IA32_GS_BASE:
+	{
+		exec_vmwrite(VMX_GUEST_GS_BASE, v);
+		break;
+	}
+	case MSR_IA32_TSC_AUX:
+	{
+		vcpu->arch_vcpu.msr_tsc_aux = v;
+		break;
+	}
+	case MSR_IA32_APIC_BASE:
+	{
+		bool ret;
+		/* Write APIC base */
+		vlapic_wrmsr(vcpu, msr, v, &ret);
+		break;
+	}
+	default:
+	{
+		ASSERT(0, "wrmsr: %lx should not come here!", msr);
+		msr_write(msr, v);
+		break;
+	}
+	}
+
+	TRACE_2L(TRC_VMEXIT_WRMSR, msr, v);
+
+	return 0;
+}
--- a/hypervisor/arch/x86/guest/vpic.c
+++ b/hypervisor/arch/x86/guest/vpic.c
@@ -0,0 +1,950 @@
+/*-
+ * Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
+ * Copyright (c) 2017 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#define pr_fmt(fmt)	"vpic: " fmt
+
+#include <hypervisor.h>
+#include <hv_lib.h>
+#include <acrn_common.h>
+#include <hv_arch.h>
+#include <hv_debug.h>
+
+#define	VPIC_LOCK_INIT(vpic)	spinlock_init(&((vpic)->lock))
+#define	VPIC_LOCK(vpic)		spinlock_obtain(&((vpic)->lock))
+#define	VPIC_UNLOCK(vpic)	spinlock_release(&((vpic)->lock))
+/* TODO: add spinlock_locked support? */
+/*#define VPIC_LOCKED(vpic)	spinlock_locked(&((vpic)->lock))*/
+
+#define vm_pic(vm)	(vm->vpic)
+
+#define true                                          1
+#define false                                         0
+
+#define ACRN_DBG_PIC	6
+
+enum irqstate {
+	IRQSTATE_ASSERT,
+	IRQSTATE_DEASSERT,
+	IRQSTATE_PULSE
+};
+
+struct pic {
+	bool		ready;
+	int		icw_num;
+	int		rd_cmd_reg;
+
+	bool		aeoi;
+	bool		poll;
+	bool		rotate;
+	bool		sfn;		/* special fully-nested mode */
+
+	int		irq_base;
+	uint8_t		request;	/* Interrupt Request Register (IIR) */
+	uint8_t		service;	/* Interrupt Service (ISR) */
+	uint8_t		mask;		/* Interrupt Mask Register (IMR) */
+	uint8_t		smm;		/* special mask mode */
+
+	int		acnt[8];	/* sum of pin asserts and deasserts */
+	int		lowprio;	/* lowest priority irq */
+
+	bool		intr_raised;
+	uint8_t		elc;
+};
+
+struct vpic {
+	struct vm		*vm;
+	spinlock_t	lock;
+	struct pic	pic[2];
+};
+
+/*
+ * Loop over all the pins in priority order from highest to lowest.
+ */
+#define	PIC_PIN_FOREACH(pinvar, pic, tmpvar)			\
+	for (tmpvar = 0, pinvar = (pic->lowprio + 1) & 0x7;	\
+	    tmpvar < 8;						\
+	    tmpvar++, pinvar = (pinvar + 1) & 0x7)
+
+static void vpic_set_pinstate(struct vpic *vpic, int pin, bool newstate);
+
+static inline bool master_pic(struct vpic *vpic, struct pic *pic)
+{
+
+	if (pic == &vpic->pic[0])
+		return true;
+	else
+		return false;
+}
+
+static inline int vpic_get_highest_isrpin(struct pic *pic)
+{
+	int bit, pin;
+	int i;
+
+	PIC_PIN_FOREACH(pin, pic, i) {
+		bit = (1 << pin);
+
+		if (pic->service & bit) {
+			/*
+			 * An IS bit that is masked by an IMR bit will not be
+			 * cleared by a non-specific EOI in Special Mask Mode.
+			 */
+			if (pic->smm && (pic->mask & bit) != 0)
+				continue;
+			else
+				return pin;
+		}
+	}
+
+	return -1;
+}
+
+static inline int vpic_get_highest_irrpin(struct pic *pic)
+{
+	int serviced;
+	int bit, pin, tmp;
+
+	/*
+	 * In 'Special Fully-Nested Mode' when an interrupt request from
+	 * a slave is in service, the slave is not locked out from the
+	 * master's priority logic.
+	 */
+	serviced = pic->service;
+	if (pic->sfn)
+		serviced &= ~(1 << 2);
+
+	/*
+	 * In 'Special Mask Mode', when a mask bit is set in OCW1 it inhibits
+	 * further interrupts at that level and enables interrupts from all
+	 * other levels that are not masked. In other words the ISR has no
+	 * bearing on the levels that can generate interrupts.
+	 */
+	if (pic->smm)
+		serviced = 0;
+
+	PIC_PIN_FOREACH(pin, pic, tmp) {
+		bit = 1 << pin;
+
+		/*
+		 * If there is already an interrupt in service at the same
+		 * or higher priority then bail.
+		 */
+		if ((serviced & bit) != 0)
+			break;
+
+		/*
+		 * If an interrupt is asserted and not masked then return
+		 * the corresponding 'pin' to the caller.
+		 */
+		if ((pic->request & bit) != 0 && (pic->mask & bit) == 0)
+			return pin;
+	}
+
+	return -1;
+}
+
+static void vpic_notify_intr(struct vpic *vpic)
+{
+	struct pic *pic;
+	int pin;
+
+	/*
+	 * First check the slave.
+	 */
+	pic = &vpic->pic[1];
+	pin = vpic_get_highest_irrpin(pic);
+	if (!pic->intr_raised && pin != -1) {
+		dev_dbg(ACRN_DBG_PIC,
+		"pic slave notify pin = %d (imr 0x%x irr 0x%x isr 0x%x)\n",
+		pin, pic->mask, pic->request, pic->service);
+
+		/*
+		 * Cascade the request from the slave to the master.
+		 */
+		pic->intr_raised = true;
+		vpic_set_pinstate(vpic, 2, true);
+		vpic_set_pinstate(vpic, 2, false);
+	} else {
+		dev_dbg(ACRN_DBG_PIC,
+		"pic slave no eligible interrupt (imr 0x%x irr 0x%x isr 0x%x)",
+		pic->mask, pic->request, pic->service);
+	}
+
+	/*
+	 * Then check the master.
+	 */
+	pic = &vpic->pic[0];
+	pin = vpic_get_highest_irrpin(pic);
+	if (!pic->intr_raised && pin != -1) {
+		dev_dbg(ACRN_DBG_PIC,
+		"pic master notify pin = %d (imr 0x%x irr 0x%x isr 0x%x)\n",
+		pin, pic->mask, pic->request, pic->service);
+
+		/*
+		 * From Section 3.6.2, "Interrupt Modes", in the
+		 * MPtable Specification, Version 1.4
+		 *
+		 * PIC interrupts are routed to both the Local APIC
+		 * and the I/O APIC to support operation in 1 of 3
+		 * modes.
+		 *
+		 * 1. Legacy PIC Mode: the PIC effectively bypasses
+		 * all APIC components.  In this mode the local APIC is
+		 * disabled and LINT0 is reconfigured as INTR to
+		 * deliver the PIC interrupt directly to the CPU.
+		 *
+		 * 2. Virtual Wire Mode: the APIC is treated as a
+		 * virtual wire which delivers interrupts from the PIC
+		 * to the CPU.  In this mode LINT0 is programmed as
+		 * ExtINT to indicate that the PIC is the source of
+		 * the interrupt.
+		 *
+		 * 3. Virtual Wire Mode via I/O APIC: PIC interrupts are
+		 * fielded by the I/O APIC and delivered to the appropriate
+		 * CPU.  In this mode the I/O APIC input 0 is programmed
+		 * as ExtINT to indicate that the PIC is the source of the
+		 * interrupt.
+		 */
+		pic->intr_raised = true;
+		if (vpic->vm->vpic_wire_mode == VPIC_WIRE_INTR) {
+			struct vcpu *vcpu = vcpu_from_vid(vpic->vm, 0);
+
+			ASSERT(vcpu != NULL, "vm%d, vcpu0", vpic->vm->attr.id);
+			vcpu_inject_extint(vcpu);
+		} else {
+			vlapic_set_local_intr(vpic->vm, -1, APIC_LVT_LINT0);
+			/* notify vioapic pin0 if existing
+			 * For vPIC + vIOAPIC mode, vpic master irq connected
+			 * to vioapic pin0 (irq2)
+			 * From MPSpec session 5.1
+			 */
+			vioapic_pulse_irq(vpic->vm, 0);
+		}
+	} else {
+		dev_dbg(ACRN_DBG_PIC,
+		"pic master no eligible interrupt (imr 0x%x irr 0x%x isr 0x%x)",
+		pic->mask, pic->request, pic->service);
+	}
+}
+
+static int vpic_icw1(__unused struct vpic *vpic, struct pic *pic, uint8_t val)
+{
+	dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic icw1 0x%x\n",
+		vpic->vm, val);
+
+	pic->ready = false;
+
+	pic->icw_num = 1;
+	pic->request = 0;
+	pic->mask = 0;
+	pic->lowprio = 7;
+	pic->rd_cmd_reg = 0;
+	pic->poll = 0;
+	pic->smm = 0;
+
+	if ((val & ICW1_SNGL) != 0) {
+		dev_dbg(ACRN_DBG_PIC, "vpic cascade mode required\n");
+		return -1;
+	}
+
+	if ((val & ICW1_IC4) == 0) {
+		dev_dbg(ACRN_DBG_PIC, "vpic icw4 required\n");
+		return -1;
+	}
+
+	pic->icw_num++;
+
+	return 0;
+}
+
+static int vpic_icw2(__unused struct vpic *vpic, struct pic *pic, uint8_t val)
+{
+	dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic icw2 0x%x\n",
+		vpic->vm, val);
+
+	pic->irq_base = val & 0xf8;
+
+	pic->icw_num++;
+
+	return 0;
+}
+
+static int vpic_icw3(__unused struct vpic *vpic, struct pic *pic,
+		__unused uint8_t val)
+{
+	dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic icw3 0x%x\n",
+		vpic->vm, val);
+
+	pic->icw_num++;
+
+	return 0;
+}
+
+static int vpic_icw4(struct vpic *vpic, struct pic *pic, uint8_t val)
+{
+	dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic icw4 0x%x\n",
+		vpic->vm, val);
+
+	if ((val & ICW4_8086) == 0) {
+		dev_dbg(ACRN_DBG_PIC,
+			"vpic microprocessor mode required\n");
+		return -1;
+	}
+
+	if ((val & ICW4_AEOI) != 0)
+		pic->aeoi = true;
+
+	if ((val & ICW4_SFNM) != 0) {
+		if (master_pic(vpic, pic)) {
+			pic->sfn = true;
+		} else {
+			dev_dbg(ACRN_DBG_PIC,
+			"Ignoring special fully nested mode on slave pic: %#x",
+			val);
+		}
+	}
+
+	pic->icw_num = 0;
+	pic->ready = true;
+
+	return 0;
+}
+
+bool vpic_is_pin_mask(struct vpic *vpic, uint8_t virt_pin)
+{
+	struct pic *pic;
+
+	if (virt_pin < 8)
+		pic = &vpic->pic[0];
+	else if (virt_pin < 16) {
+		pic = &vpic->pic[1];
+		virt_pin -= 8;
+	} else
+		return true;
+
+	if (pic->mask & (1 << virt_pin))
+		return true;
+	else
+		return false;
+}
+
+static int vpic_ocw1(struct vpic *vpic, struct pic *pic, uint8_t val)
+{
+	int pin, i, bit;
+	uint8_t old = pic->mask;
+
+	dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic ocw1 0x%x\n",
+		vpic->vm, val);
+
+	pic->mask = val & 0xff;
+
+	/* query and setup if pin/irq is for passthrough device */
+	PIC_PIN_FOREACH(pin, pic, i) {
+		bit = (1 << pin);
+
+		/* remap for active: interrupt mask -> unmask
+		 * remap for deactive: when vIOAPIC take it over
+		 */
+		if (((pic->mask & bit) == 0) && (old & bit)) {
+			struct ptdev_intx_info intx;
+
+			/* master pic pin2 connect with slave pic,
+			 * not device, so not need pt remap
+			 */
+			if ((pin == 2) && master_pic(vpic, pic))
+				continue;
+
+			intx.virt_pin = pin;
+			intx.vpin_src = PTDEV_VPIN_PIC;
+			if (!master_pic(vpic, pic))
+				intx.virt_pin += 8;
+			ptdev_intx_pin_remap(vpic->vm, &intx);
+		}
+	}
+
+	return 0;
+}
+
+static int vpic_ocw2(struct vpic *vpic, struct pic *pic, uint8_t val)
+{
+	dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic ocw2 0x%x\n",
+		vpic->vm, val);
+
+	pic->rotate = ((val & OCW2_R) != 0);
+
+	if ((val & OCW2_EOI) != 0) {
+		int isr_bit;
+
+		if ((val & OCW2_SL) != 0) {
+			/* specific EOI */
+			isr_bit = val & 0x7;
+		} else {
+			/* non-specific EOI */
+			isr_bit = vpic_get_highest_isrpin(pic);
+		}
+
+		if (isr_bit != -1) {
+			pic->service &= ~(1 << isr_bit);
+
+			if (pic->rotate)
+				pic->lowprio = isr_bit;
+		}
+
+		/* if level ack PTDEV */
+		if (pic->elc & (1 << (isr_bit & 0x7))) {
+			ptdev_intx_ack(vpic->vm,
+				master_pic(vpic, pic) ? isr_bit : isr_bit + 8,
+				PTDEV_VPIN_PIC);
+		}
+	} else if ((val & OCW2_SL) != 0 && pic->rotate == true) {
+		/* specific priority */
+		pic->lowprio = val & 0x7;
+	}
+
+	return 0;
+}
+
+static int vpic_ocw3(__unused struct vpic *vpic, struct pic *pic, uint8_t val)
+{
+	dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic ocw3 0x%x\n",
+		vpic->vm, val);
+
+	if (val & OCW3_ESMM) {
+		pic->smm = val & OCW3_SMM ? 1 : 0;
+		dev_dbg(ACRN_DBG_PIC, "%s pic special mask mode %s\n",
+		    master_pic(vpic, pic) ? "master" : "slave",
+		    pic->smm ?  "enabled" : "disabled");
+	}
+
+	if (val & OCW3_RR) {
+		/* read register command */
+		pic->rd_cmd_reg = val & OCW3_RIS;
+
+		/* Polling mode */
+		pic->poll = ((val & OCW3_P) != 0);
+	}
+
+	return 0;
+}
+
+static void vpic_set_pinstate(struct vpic *vpic, int pin, bool newstate)
+{
+	struct pic *pic;
+	int oldcnt, newcnt;
+	bool level;
+
+	ASSERT(pin >= 0 && pin < 16,
+	    "vpic_set_pinstate: invalid pin number");
+
+	pic = &vpic->pic[pin >> 3];
+
+	oldcnt = pic->acnt[pin & 0x7];
+	if (newstate)
+		pic->acnt[pin & 0x7]++;
+	else
+		pic->acnt[pin & 0x7]--;
+	newcnt = pic->acnt[pin & 0x7];
+
+	if (newcnt < 0) {
+		pr_warn("pic pin%d: bad acnt %d\n", pin, newcnt);
+	}
+
+	level = ((vpic->pic[pin >> 3].elc & (1 << (pin & 0x7))) != 0);
+
+	if ((oldcnt == 0 && newcnt == 1) || (newcnt > 0 && level == true)) {
+		/* rising edge or level */
+		dev_dbg(ACRN_DBG_PIC, "pic pin%d: asserted\n", pin);
+		pic->request |= (1 << (pin & 0x7));
+	} else if (oldcnt == 1 && newcnt == 0) {
+		/* falling edge */
+		dev_dbg(ACRN_DBG_PIC, "pic pin%d: deasserted\n", pin);
+		if (level)
+			pic->request &= ~(1 << (pin & 0x7));
+	} else {
+		dev_dbg(ACRN_DBG_PIC,
+			"pic pin%d: %s, ignored, acnt %d\n",
+			pin, newstate ? "asserted" : "deasserted", newcnt);
+	}
+
+	vpic_notify_intr(vpic);
+}
+
+static int vpic_set_irqstate(struct vm *vm, int irq, enum irqstate irqstate)
+{
+	struct vpic *vpic;
+	struct pic *pic;
+
+	if (irq < 0 || irq > 15)
+		return -EINVAL;
+
+	vpic = vm_pic(vm);
+	pic = &vpic->pic[irq >> 3];
+
+	if (pic->ready == false)
+		return 0;
+
+	VPIC_LOCK(vpic);
+	switch (irqstate) {
+	case IRQSTATE_ASSERT:
+		vpic_set_pinstate(vpic, irq, true);
+		break;
+	case IRQSTATE_DEASSERT:
+		vpic_set_pinstate(vpic, irq, false);
+		break;
+	case IRQSTATE_PULSE:
+		vpic_set_pinstate(vpic, irq, true);
+		vpic_set_pinstate(vpic, irq, false);
+		break;
+	default:
+		ASSERT(0, "vpic_set_irqstate: invalid irqstate");
+	}
+	VPIC_UNLOCK(vpic);
+
+	return 0;
+}
+
+/* hypervisor interface: assert/deassert/pulse irq */
+int vpic_assert_irq(struct vm *vm, int irq)
+{
+	return vpic_set_irqstate(vm, irq, IRQSTATE_ASSERT);
+}
+
+int vpic_deassert_irq(struct vm *vm, int irq)
+{
+	return vpic_set_irqstate(vm, irq, IRQSTATE_DEASSERT);
+}
+
+int vpic_pulse_irq(struct vm *vm, int irq)
+{
+	return vpic_set_irqstate(vm, irq, IRQSTATE_PULSE);
+}
+
+int vpic_set_irq_trigger(struct vm *vm, int irq, enum vpic_trigger trigger)
+{
+	struct vpic *vpic;
+
+	if (irq < 0 || irq > 15)
+		return -EINVAL;
+
+	/*
+	 * See comment in vpic_elc_handler.  These IRQs must be
+	 * edge triggered.
+	 */
+	if (trigger == LEVEL_TRIGGER) {
+		switch (irq) {
+		case 0:
+		case 1:
+		case 2:
+		case 8:
+		case 13:
+			return -EINVAL;
+		}
+	}
+
+	vpic = vm_pic(vm);
+
+	VPIC_LOCK(vpic);
+
+	if (trigger == LEVEL_TRIGGER)
+		vpic->pic[irq >> 3].elc |=  1 << (irq & 0x7);
+	else
+		vpic->pic[irq >> 3].elc &=  ~(1 << (irq & 0x7));
+
+	VPIC_UNLOCK(vpic);
+
+	return 0;
+}
+
+int vpic_get_irq_trigger(struct vm *vm, int irq, enum vpic_trigger *trigger)
+{
+	struct vpic *vpic;
+
+	if (irq < 0 || irq > 15)
+		return -EINVAL;
+
+	vpic = vm_pic(vm);
+	if (!vpic)
+		return -EINVAL;
+
+	if (vpic->pic[irq>>3].elc & (1 << (irq & 0x7)))
+		*trigger = LEVEL_TRIGGER;
+	else
+		*trigger = EDGE_TRIGGER;
+	return 0;
+}
+
+void vpic_pending_intr(struct vm *vm, int *vecptr)
+{
+	struct vpic *vpic;
+	struct pic *pic;
+	int pin;
+
+	vpic = vm_pic(vm);
+
+	pic = &vpic->pic[0];
+
+	VPIC_LOCK(vpic);
+
+	pin = vpic_get_highest_irrpin(pic);
+	if (pin == 2) {
+		pic = &vpic->pic[1];
+		pin = vpic_get_highest_irrpin(pic);
+	}
+
+	/*
+	 * If there are no pins active at this moment then return the spurious
+	 * interrupt vector instead.
+	 */
+	if (pin == -1) {
+		*vecptr = -1;
+		VPIC_UNLOCK(vpic);
+		return;
+	}
+
+	ASSERT(pin >= 0 && pin <= 7, "invalid pin");
+	*vecptr = pic->irq_base + pin;
+
+	dev_dbg(ACRN_DBG_PIC, "Got pending vector 0x%x\n", *vecptr);
+
+	VPIC_UNLOCK(vpic);
+}
+
+static void vpic_pin_accepted(struct pic *pic, int pin)
+{
+	pic->intr_raised = false;
+
+	if ((pic->elc & (1 << pin)) == 0) {
+		/*only used edge trigger mode*/
+		pic->request &= ~(1 << pin);
+	}
+
+	if (pic->aeoi == true) {
+		if (pic->rotate == true)
+			pic->lowprio = pin;
+	} else {
+		pic->service |= (1 << pin);
+	}
+}
+
+void vpic_intr_accepted(struct vm *vm, int vector)
+{
+	struct vpic *vpic;
+	int pin;
+
+	vpic = vm_pic(vm);
+
+	VPIC_LOCK(vpic);
+
+	pin = vector & 0x7;
+
+	if ((vector & ~0x7) == vpic->pic[1].irq_base) {
+		vpic_pin_accepted(&vpic->pic[1], pin);
+		/*
+		 * If this vector originated from the slave,
+		 * accept the cascaded interrupt too.
+		 */
+		vpic_pin_accepted(&vpic->pic[0], 2);
+	} else {
+		vpic_pin_accepted(&vpic->pic[0], pin);
+	}
+
+	vpic_notify_intr(vpic);
+
+	VPIC_UNLOCK(vpic);
+}
+
+static int vpic_read(struct vpic *vpic, struct pic *pic,
+		int port, uint32_t *eax)
+{
+	int pin;
+
+	VPIC_LOCK(vpic);
+
+	if (pic->poll) {
+		pic->poll = 0;
+		pin = vpic_get_highest_irrpin(pic);
+		if (pin >= 0) {
+			vpic_pin_accepted(pic, pin);
+			*eax = 0x80 | pin;
+		} else {
+			*eax = 0;
+		}
+	} else {
+		if (port & ICU_IMR_OFFSET) {
+			/* read interrupt mask register */
+			*eax = pic->mask;
+		} else {
+			if (pic->rd_cmd_reg == OCW3_RIS) {
+				/* read interrupt service register */
+				*eax = pic->service;
+			} else {
+				/* read interrupt request register */
+				*eax = pic->request;
+			}
+		}
+	}
+
+	VPIC_UNLOCK(vpic);
+
+	return 0;
+}
+
+static int vpic_write(struct vpic *vpic, struct pic *pic,
+		int port, uint32_t *eax)
+{
+	int error;
+	uint8_t val;
+
+	error = 0;
+	val = *eax;
+
+	VPIC_LOCK(vpic);
+
+	if (port & ICU_IMR_OFFSET) {
+		switch (pic->icw_num) {
+		case 2:
+			error = vpic_icw2(vpic, pic, val);
+			break;
+		case 3:
+			error = vpic_icw3(vpic, pic, val);
+			break;
+		case 4:
+			error = vpic_icw4(vpic, pic, val);
+			break;
+		default:
+			error = vpic_ocw1(vpic, pic, val);
+			break;
+		}
+	} else {
+		if (val & (1 << 4))
+			error = vpic_icw1(vpic, pic, val);
+
+		if (pic->ready) {
+			if (val & (1 << 3))
+				error = vpic_ocw3(vpic, pic, val);
+			else
+				error = vpic_ocw2(vpic, pic, val);
+		}
+	}
+
+	if (pic->ready)
+		vpic_notify_intr(vpic);
+
+	VPIC_UNLOCK(vpic);
+
+	return error;
+}
+
+static int vpic_master_handler(struct vm *vm, bool in, int port, int bytes,
+		uint32_t *eax)
+{
+	struct vpic *vpic;
+	struct pic *pic;
+
+	vpic = vm_pic(vm);
+	pic = &vpic->pic[0];
+
+	if (bytes != 1)
+		return -1;
+
+	if (in)
+		return vpic_read(vpic, pic, port, eax);
+
+	return vpic_write(vpic, pic, port, eax);
+}
+
+static uint32_t vpic_master_io_read(__unused struct vm_io_handler *hdlr,
+		struct vm *vm, ioport_t addr, size_t width)
+{
+	uint32_t val = 0;
+
+	if (vpic_master_handler(vm, true, (int)addr, (int)width, &val) < 0)
+		pr_err("pic master read port 0x%x width=%d failed\n",
+				addr, width);
+	return val;
+}
+
+static void vpic_master_io_write(__unused struct vm_io_handler *hdlr,
+		struct vm *vm, ioport_t addr, size_t width, uint32_t v)
+{
+	uint32_t val = v;
+
+	if (vpic_master_handler(vm, false, (int)addr, (int)width, &val) < 0)
+		pr_err("%s: write port 0x%x width=%d value 0x%x failed\n",
+				__func__, addr, width, val);
+}
+
+static int vpic_slave_handler(struct vm *vm, bool in, int port, int bytes,
+		uint32_t *eax)
+{
+	struct vpic *vpic;
+	struct pic *pic;
+
+	vpic = vm_pic(vm);
+	pic = &vpic->pic[1];
+
+	if (bytes != 1)
+		return -1;
+
+	if (in)
+		return vpic_read(vpic, pic, port, eax);
+
+	return vpic_write(vpic, pic, port, eax);
+}
+
+static uint32_t vpic_slave_io_read(__unused struct vm_io_handler *hdlr,
+		struct vm *vm, ioport_t addr, size_t width)
+{
+	uint32_t val = 0;
+
+	if (vpic_slave_handler(vm, true, (int)addr, (int)width, &val) < 0)
+		pr_err("pic slave read port 0x%x width=%d failed\n",
+				addr, width);
+	return val;
+}
+
+static void vpic_slave_io_write(__unused struct vm_io_handler *hdlr,
+		struct vm *vm, ioport_t addr, size_t width, uint32_t v)
+{
+	uint32_t val = v;
+
+	if (vpic_slave_handler(vm, false, (int)addr, (int)width, &val) < 0)
+		pr_err("%s: write port 0x%x width=%d value 0x%x failed\n",
+				__func__, addr, width, val);
+}
+
+static int vpic_elc_handler(struct vm *vm, bool in, int port, int bytes,
+		uint32_t *eax)
+{
+	struct vpic *vpic;
+	bool is_master;
+
+	vpic = vm_pic(vm);
+	is_master = (port == IO_ELCR1);
+
+	if (bytes != 1)
+		return -1;
+
+	VPIC_LOCK(vpic);
+
+	if (in) {
+		if (is_master)
+			*eax = vpic->pic[0].elc;
+		else
+			*eax = vpic->pic[1].elc;
+	} else {
+		/*
+		 * For the master PIC the cascade channel (IRQ2), the
+		 * heart beat timer (IRQ0), and the keyboard
+		 * controller (IRQ1) cannot be programmed for level
+		 * mode.
+		 *
+		 * For the slave PIC the real time clock (IRQ8) and
+		 * the floating point error interrupt (IRQ13) cannot
+		 * be programmed for level mode.
+		 */
+		if (is_master)
+			vpic->pic[0].elc = (*eax & 0xf8);
+		else
+			vpic->pic[1].elc = (*eax & 0xde);
+	}
+
+	VPIC_UNLOCK(vpic);
+
+	return 0;
+}
+
+static uint32_t vpic_elc_io_read(__unused struct vm_io_handler *hdlr,
+		struct vm *vm, ioport_t addr, size_t width)
+{
+	uint32_t val = 0;
+
+	if (vpic_elc_handler(vm, true, (int)addr, (int)width, &val) < 0)
+		pr_err("pic elc read port 0x%x width=%d failed", addr, width);
+	return val;
+}
+
+static void vpic_elc_io_write(__unused struct vm_io_handler *hdlr,
+		struct vm *vm, ioport_t addr, size_t width, uint32_t v)
+{
+	uint32_t val = v;
+
+	if (vpic_elc_handler(vm, false, (int)addr, (int)width, &val) < 0)
+		pr_err("%s: write port 0x%x width=%d value 0x%x failed\n",
+				__func__, addr, width, val);
+}
+
+void vpic_register_io_handler(struct vm *vm)
+{
+	struct vm_io_range master_range = {
+		.flags = IO_ATTR_RW,
+		.base = 0x20,
+		.len = 2
+	};
+	struct vm_io_range slave_range = {
+		.flags = IO_ATTR_RW,
+		.base = 0xa0,
+		.len = 2
+	};
+	struct vm_io_range elcr_range = {
+		.flags = IO_ATTR_RW,
+		.base = 0x4d0,
+		.len = 2
+	};
+
+	register_io_emulation_handler(vm, &master_range,
+			&vpic_master_io_read, &vpic_master_io_write);
+	register_io_emulation_handler(vm, &slave_range,
+			&vpic_slave_io_read, &vpic_slave_io_write);
+	register_io_emulation_handler(vm, &elcr_range,
+			&vpic_elc_io_read, &vpic_elc_io_write);
+}
+
+void *vpic_init(struct vm *vm)
+{
+	struct vpic *vpic;
+
+	vpic_register_io_handler(vm);
+
+	vpic = malloc(sizeof(struct vpic));
+	ASSERT(vpic != NULL, "");
+	vpic->vm = vm;
+	vpic->pic[0].mask = 0xff;
+	vpic->pic[1].mask = 0xff;
+
+	VPIC_LOCK_INIT(vpic);
+
+	return vpic;
+}
+
+void vpic_cleanup(struct vm *vm)
+{
+	if (vm->vpic) {
+		free(vm->vpic);
+		vm->vpic = NULL;
+	}
+}
--- a/hypervisor/arch/x86/idt.S
+++ b/hypervisor/arch/x86/idt.S
@@ -0,0 +1,441 @@
+/*
+ * Copyright (C) 2018 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <gdt.h>
+#include <idt.h>
+
+.altmacro
+
+.global HOST_IDT
+.global HOST_IDTR
+
+.section    .data
+.align 8
+    .long   0
+    .short  0
+HOST_IDTR:
+    .short  HOST_IDT_SIZE - 1
+    .quad   HOST_IDT
+
+/*
+ * We'll rearrange and fix up the descriptors at runtime
+ */
+.macro interrupt_descriptor entry, dpl=0 ist=0
+	.long	HOST_GDT_RING0_CODE_SEL << 16
+	.long	0x00008e00 + (dpl << 13) + ist
+	.quad	entry
+.endm
+
+.macro	trap_descriptor entry, dpl=0, ist=0
+	.long	HOST_GDT_RING0_CODE_SEL << 16
+	.long	0x00008f00 + (dpl <<13) + ist
+	.quad	entry
+.endm
+
+
+.macro _external_interrupt_descriptor vector
+    __external_interrupt_descriptor %vector
+.endm
+
+
+.macro	__external_interrupt_descriptor vector
+	interrupt_descriptor external_interrupt_\vector
+.endm
+
+#define MACHINE_CHECK_IST   (0x1)
+#define DOUBLE_FAULT_IST    (0x2)
+#define STACK_FAULT_IST     (0x3)
+
+/*
+ * We'll use interrupt gates.  Change to trap or task only as needed.
+ */
+.section    .rodata
+.align 16
+HOST_IDT:
+interrupt_descriptor	excp_divide_error
+interrupt_descriptor	excp_debug, 3
+interrupt_descriptor	excp_nmi
+interrupt_descriptor	excp_breakpoint, 3
+interrupt_descriptor	excp_overflow, 3
+interrupt_descriptor	excp_bounds_check
+interrupt_descriptor	excp_illegal_opcode
+interrupt_descriptor	excp_device_not_available
+interrupt_descriptor	excp_double_fault, 0, DOUBLE_FAULT_IST
+interrupt_descriptor	excp_rsvd_09
+interrupt_descriptor	excp_invalid_tss
+interrupt_descriptor	excp_segment_not_present
+interrupt_descriptor	excp_stack_fault, 0, STACK_FAULT_IST
+interrupt_descriptor	excp_general_protection
+interrupt_descriptor	excp_page_fault
+interrupt_descriptor	excp_rsvd_0f
+interrupt_descriptor	excp_float_error
+interrupt_descriptor	excp_alignment_check
+interrupt_descriptor	expt_machine_check, 0, MACHINE_CHECK_IST
+interrupt_descriptor	excp_simd_fp_error
+interrupt_descriptor	excp_virtualization
+interrupt_descriptor	excp_rsvd_21
+interrupt_descriptor	excp_rsvd_22
+interrupt_descriptor	excp_rsvd_23
+interrupt_descriptor	excp_rsvd_24
+interrupt_descriptor	excp_rsvd_25
+interrupt_descriptor	excp_rsvd_26
+interrupt_descriptor	excp_rsvd_27
+interrupt_descriptor	excp_rsvd_28
+interrupt_descriptor	excp_rsvd_29
+interrupt_descriptor	excp_rsvd_30
+interrupt_descriptor	excp_rsvd_31
+
+vector = 0x20
+.rept	(0x100 - 0x20)
+	_external_interrupt_descriptor vector
+	vector = vector + 1
+.endr
+
+.section .text
+.align 16
+excp_divide_error:
+	pushq  $0x0			/* pseudo error code */
+	pushq  $0x00
+	jmp    excp_save_frame
+
+.align 8
+excp_debug:
+	pushq  $0x0			/* pseudo error code */
+	pushq  $0x01
+	jmp    excp_save_frame
+
+.align 8
+excp_nmi:
+
+
+
+
+.align 8
+excp_breakpoint:
+	pushq  $0x0			/* pseudo error code */
+	pushq  $0x03
+	jmp    excp_save_frame
+
+.align 8
+excp_overflow:
+	pushq  $0x0			/* pseudo error code */
+	pushq  $0x04
+	jmp    excp_save_frame
+
+.align 8
+excp_bounds_check:
+	pushq  $0x0			/* pseudo error code */
+	pushq  $0x05
+	jmp    excp_save_frame
+
+.align 8
+excp_illegal_opcode:
+	pushq  $0x0			/* pseudo error code */
+	pushq  $0x06
+	jmp    excp_save_frame
+
+.align 8
+excp_device_not_available:
+	pushq  $0x0			/* pseudo error code */
+	pushq  $0x07
+	jmp    excp_save_frame
+
+.align 8
+excp_double_fault:
+	pushq  $0x08
+	jmp    excp_save_frame
+
+.align 8
+excp_invalid_tss:
+	pushq  $0x0A
+	jmp    excp_save_frame
+
+.align 8
+excp_segment_not_present:
+	pushq  $0x0B
+	jmp    excp_save_frame
+
+.align 8
+excp_stack_fault:
+	pushq  $0x0C
+	jmp    excp_save_frame
+
+.align 8
+excp_general_protection:
+	pushq  $0x0D
+	jmp    excp_save_frame
+
+.align 8
+excp_page_fault:
+	pushq  $0x0E
+	jmp    excp_save_frame
+
+.align 8
+excp_float_error:
+	pushq  $0x0			/* pseudo error code */
+	pushq  $0x10
+	jmp    excp_save_frame
+
+.align 8
+excp_alignment_check:
+	pushq  $0x11
+	jmp    excp_save_frame
+
+.align 8
+expt_machine_check:
+	pushq  $0x0			/* pseudo error code */
+	pushq  $0x12
+	jmp    excp_save_frame
+
+.align 8
+excp_simd_fp_error:
+	pushq  $0x0			/* pseudo error code */
+	pushq  $0x13
+	jmp    excp_save_frame
+
+.align 8
+excp_virtualization:
+	pushq  $0x0			/* pseudo error code */
+	pushq  $0x14
+	jmp    excp_save_frame
+
+
+
+/*
+ * Macros for rsvd vectors.  Vectors 0x09, 0x0F, 0x15 through 0x1F
+ */
+.macro _rsvd_vector vector
+    __rsvd_vector %vector
+.endm
+
+.macro __rsvd_vector vector
+.align 8
+excp_rsvd_\vector\():
+	pushq  $0x0			/* pseudo error code */
+	pushq  $\vector
+	jmp    excp_rsvd
+.endm
+
+.align 8
+excp_rsvd_09:
+    _rsvd_vector 0x09
+
+.align 8
+excp_rsvd_0f:
+    _rsvd_vector 0x0f
+
+vector = 0x15
+.rept	(0x20 - 0x15)
+    _rsvd_vector vector
+    vector = vector + 1
+.endr
+
+
+
+/*
+ * Macros for external interrupts.  Vectors$0x20 through$0xFF
+ */
+.macro _external_interrupt vector
+    __external_interrupt %vector
+.endm
+
+.macro __external_interrupt vector
+.align 8
+external_interrupt_\vector\():
+	pushq  $0x0			/* pseudo error code */
+	pushq  $\vector
+	jmp    external_interrupt_save_frame
+.endm
+
+vector =0x20
+.rept	(0x100 - 0x20)
+    _external_interrupt vector
+    vector = vector + 1
+.endr
+
+
+
+/*
+ * Common entry point for defined exceptions
+ */
+.align 8
+excp_save_frame:
+    pushq   %r11
+    pushq   %r10
+    pushq   %r9
+    pushq   %r8
+    pushq   %rdi
+    pushq   %rsi
+    pushq   %rdx
+    pushq   %rcx
+    pushq   %rax
+    pushq   %rbp
+    pushq   %rbx
+    pushq   %r15
+    pushq   %r14
+    pushq   %r13
+    pushq   %r12
+
+    /* Put current stack pointer into 1st param register (rdi) */
+    movq    %rsp, %rdi
+
+    call   dispatch_exception
+
+    popq    %r12
+    popq    %r13
+    popq    %r14
+    popq    %r15
+    popq    %rbx
+    popq    %rbp
+    popq    %rax
+    popq    %rcx
+    popq    %rdx
+    popq    %rsi
+    popq    %rdi
+    popq    %r8
+    popq    %r9
+    popq    %r10
+    popq    %r11
+
+    /* Skip vector and error code*/
+    add     $16, %rsp
+
+    iretq
+
+
+/*
+ * Common entry point for reserved exceptions.
+ * These should never execute.
+ * We put a handler on them anyway to highlight the unexpected.
+ */
+.align 8
+excp_rsvd:
+    pushq   %r11
+    pushq   %r10
+    pushq   %r9
+    pushq   %r8
+    pushq   %rdi
+    pushq   %rsi
+    pushq   %rdx
+    pushq   %rcx
+    pushq   %rax
+
+
+    pushq   %rbp
+    pushq   %rbx
+    pushq   %r15
+    pushq   %r14
+    pushq   %r13
+    pushq   %r12
+
+    /* Put current stack pointer into 1st param register (rdi) */
+    movq    %rsp, %rdi
+
+    call   dispatch_exception
+
+    popq    %r12
+    popq    %r13
+    popq    %r14
+    popq    %r15
+    popq    %rbx
+    popq    %rbp
+
+    popq    %rax
+    popq    %rcx
+    popq    %rdx
+    popq    %rsi
+    popq    %rdi
+    popq    %r8
+    popq    %r9
+    popq    %r10
+    popq    %r11
+
+    /* Skip vector and error code*/
+    add     $16, %rsp
+
+    iretq
+
+
+/*
+ * Common entry point for defined interrupts.
+ * Vectors 0x20 through 0xFF
+ */
+.align 8
+external_interrupt_save_frame:
+    pushq   %r11
+    pushq   %r10
+    pushq   %r9
+    pushq   %r8
+    pushq   %rdi
+    pushq   %rsi
+    pushq   %rdx
+    pushq   %rcx
+    pushq   %rax
+
+
+    pushq   %rbp
+    pushq   %rbx
+    pushq   %r15
+    pushq   %r14
+    pushq   %r13
+    pushq   %r12
+
+    /* Put current stack pointer into 1st param register (rdi) */
+    movq    %rsp, %rdi
+
+    call   dispatch_interrupt
+
+    /*
+     * We disable softirq path from interrupt IRET, since right now all IRQ
+     * are for Guest, and we can execute softirq in hv_main() loop
+     */
+
+    popq    %r12
+    popq    %r13
+    popq    %r14
+    popq    %r15
+    popq    %rbx
+    popq    %rbp
+
+    popq    %rax
+    popq    %rcx
+    popq    %rdx
+    popq    %rsi
+    popq    %rdi
+    popq    %r8
+    popq    %r9
+    popq    %r10
+    popq    %r11
+
+    /* Skip vector and error code*/
+    add     $16, %rsp
+
+    iretq
+
--- a/hypervisor/arch/x86/interrupt.c
+++ b/hypervisor/arch/x86/interrupt.c
@@ -0,0 +1,431 @@
+/*
+ * Copyright (C) 2018 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <hv_lib.h>
+#include <acrn_common.h>
+#include <hv_arch.h>
+#include <hv_debug.h>
+
+#define EXCEPTION_ERROR_CODE_VALID  8
+#define INTERRPUT_QUEUE_BUFF_SIZE   255
+
+#define ACRN_DBG_INTR	6
+
+static const uint16_t exception_type[] = {
+	[0] = VMX_INT_TYPE_HW_EXP,
+	[1] = VMX_INT_TYPE_HW_EXP,
+	[2] = VMX_INT_TYPE_HW_EXP,
+	[3] = VMX_INT_TYPE_HW_EXP,
+	[4] = VMX_INT_TYPE_HW_EXP,
+	[5] = VMX_INT_TYPE_HW_EXP,
+	[6] = VMX_INT_TYPE_HW_EXP,
+	[7] = VMX_INT_TYPE_HW_EXP,
+	[8] = VMX_INT_TYPE_HW_EXP | EXCEPTION_ERROR_CODE_VALID,
+	[9] = VMX_INT_TYPE_HW_EXP,
+	[10] = VMX_INT_TYPE_HW_EXP | EXCEPTION_ERROR_CODE_VALID,
+	[11] = VMX_INT_TYPE_HW_EXP | EXCEPTION_ERROR_CODE_VALID,
+	[12] = VMX_INT_TYPE_HW_EXP | EXCEPTION_ERROR_CODE_VALID,
+	[13] = VMX_INT_TYPE_HW_EXP | EXCEPTION_ERROR_CODE_VALID,
+	[14] = VMX_INT_TYPE_HW_EXP | EXCEPTION_ERROR_CODE_VALID,
+	[15] = VMX_INT_TYPE_HW_EXP,
+	[16] = VMX_INT_TYPE_HW_EXP,
+	[17] = VMX_INT_TYPE_HW_EXP | EXCEPTION_ERROR_CODE_VALID,
+	[18] = VMX_INT_TYPE_HW_EXP,
+	[19] = VMX_INT_TYPE_HW_EXP,
+	[20] = VMX_INT_TYPE_HW_EXP,
+	[21] = VMX_INT_TYPE_HW_EXP,
+	[22] = VMX_INT_TYPE_HW_EXP,
+	[23] = VMX_INT_TYPE_HW_EXP,
+	[24] = VMX_INT_TYPE_HW_EXP,
+	[25] = VMX_INT_TYPE_HW_EXP,
+	[26] = VMX_INT_TYPE_HW_EXP,
+	[27] = VMX_INT_TYPE_HW_EXP,
+	[28] = VMX_INT_TYPE_HW_EXP,
+	[29] = VMX_INT_TYPE_HW_EXP,
+	[30] = VMX_INT_TYPE_HW_EXP,
+	[31] = VMX_INT_TYPE_HW_EXP
+};
+
+static int is_guest_irq_enabled(struct vcpu *vcpu)
+{
+	struct run_context *cur_context =
+		&vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context];
+	uint32_t guest_rflags, guest_state;
+	int status = false;
+
+	/* Read the RFLAGS of the guest */
+	guest_rflags = cur_context->rflags;
+	/* Check the RFLAGS[IF] bit first */
+	if (guest_rflags & HV_ARCH_VCPU_RFLAGS_IF) {
+		/* Interrupts are allowed */
+		/* Check for temporarily disabled interrupts */
+		guest_state = exec_vmread(VMX_GUEST_INTERRUPTIBILITY_INFO);
+
+		if ((guest_state & (HV_ARCH_VCPU_BLOCKED_BY_STI |
+				    HV_ARCH_VCPU_BLOCKED_BY_MOVSS)) == 0) {
+			status = true;
+		}
+	}
+	return status;
+}
+
+static bool vcpu_pending_request(struct vcpu *vcpu)
+{
+	struct vlapic *vlapic;
+	int vector = 0;
+	int ret = 0;
+
+	/* Query vLapic to get vector to inject */
+	vlapic = vcpu->arch_vcpu.vlapic;
+	ret = vlapic_pending_intr(vlapic, &vector);
+
+	/* we need to check and raise request if we have pending event
+	 * in LAPIC IRR
+	 */
+	if (ret != 0) {
+		/* we have pending IRR */
+		vcpu_make_request(vcpu, ACRN_REQUEST_EVENT);
+	}
+
+	return vcpu->arch_vcpu.pending_intr != 0;
+}
+
+int vcpu_make_request(struct vcpu *vcpu, int eventid)
+{
+	bitmap_set(eventid, &vcpu->arch_vcpu.pending_intr);
+	/*
+	 * if current hostcpu is not the target vcpu's hostcpu, we need
+	 * to invoke IPI to wake up target vcpu
+	 *
+	 * TODO: Here we just compare with cpuid, since cpuid currently is
+	 *  global under pCPU / vCPU 1:1 mapping. If later we enabled vcpu
+	 *  scheduling, we need change here to determine it target vcpu is
+	 *  VMX non-root or root mode
+	 */
+	if ((int)get_cpu_id() != vcpu->pcpu_id)
+		send_single_ipi(vcpu->pcpu_id, VECTOR_NOTIFY_VCPU);
+
+	return 0;
+}
+
+static int vcpu_do_pending_event(struct vcpu *vcpu)
+{
+	struct vlapic *vlapic = vcpu->arch_vcpu.vlapic;
+	int vector = 0;
+	int ret = 0;
+
+	if (is_apicv_enabled()) {
+		apicv_inject_pir(vlapic);
+		return 0;
+	}
+
+	/* Query vLapic to get vector to inject */
+	ret = vlapic_pending_intr(vlapic, &vector);
+
+	/*
+	 * From the Intel SDM, Volume 3, 6.3.2 Section "Maskable
+	 * Hardware Interrupts":
+	 * - maskable interrupt vectors [16,255] can be delivered
+	 *   through the local APIC.
+	 */
+	if (ret == 0)
+		return -1;
+
+	if (!(vector >= 16 && vector <= 255)) {
+		dev_dbg(ACRN_DBG_INTR, "invalid vector %d from local APIC",
+				vector);
+		return -1;
+	}
+
+	exec_vmwrite(VMX_ENTRY_INT_INFO_FIELD, VMX_INT_INFO_VALID |
+		(vector & 0xFF));
+
+	vlapic_intr_accepted(vlapic, vector);
+	return 0;
+}
+
+static int vcpu_do_pending_extint(struct vcpu *vcpu)
+{
+	struct vm *vm;
+	struct vcpu *primary;
+	int vector;
+
+	vm = vcpu->vm;
+
+	/* check if there is valid interrupt from vPIC, if yes just inject it */
+	/* PIC only connect with primary CPU */
+	primary = get_primary_vcpu(vm);
+	if (vm->vpic && vcpu == primary) {
+
+		vpic_pending_intr(vcpu->vm, &vector);
+		if (vector > 0) {
+			dev_dbg(ACRN_DBG_INTR, "VPIC: to inject PIC vector %d\n",
+					vector & 0xFF);
+			exec_vmwrite(VMX_ENTRY_INT_INFO_FIELD,
+					VMX_INT_INFO_VALID |
+					(vector & 0xFF));
+			vpic_intr_accepted(vcpu->vm, vector);
+		}
+	}
+
+	return 0;
+}
+
+static int vcpu_do_pending_gp(__unused struct vcpu *vcpu)
+{
+	/* GP vector = 13 */
+	exec_vmwrite(VMX_ENTRY_INT_INFO_FIELD,
+		VMX_INT_INFO_VALID | 13);
+	return 0;
+}
+
+/* please keep this for interrupt debug:
+ * 1. Timer alive or not
+ * 2. native LAPIC interrupt pending/EOI status
+ * 3. CPU stuck or not
+ */
+void dump_lapic(void)
+{
+	dev_dbg(ACRN_DBG_INTR,
+		"LAPIC: TIME %08x, init=0x%x cur=0x%x ISR=0x%x IRR=0x%x",
+		mmio_read_long(0xFEE00000 + LAPIC_LVT_TIMER_REGISTER),
+		mmio_read_long(0xFEE00000 + LAPIC_INITIAL_COUNT_REGISTER),
+		mmio_read_long(0xFEE00000 + LAPIC_CURRENT_COUNT_REGISTER),
+		mmio_read_long(0xFEE00000 + LAPIC_IN_SERVICE_REGISTER_7),
+		mmio_read_long(0xFEE00000 + LAPIC_INT_REQUEST_REGISTER_7));
+}
+
+int vcpu_inject_extint(struct vcpu *vcpu)
+{
+	return vcpu_make_request(vcpu, ACRN_REQUEST_EXTINT);
+}
+
+int vcpu_inject_nmi(struct vcpu *vcpu)
+{
+	return vcpu_make_request(vcpu, ACRN_REQUEST_NMI);
+}
+
+int vcpu_inject_gp(struct vcpu *vcpu)
+{
+	return vcpu_make_request(vcpu, ACRN_REQUEST_GP);
+}
+
+int interrupt_win_exiting_handler(struct vcpu *vcpu)
+{
+	int value32;
+
+	TRACE_2L(TRC_VMEXIT_INTERRUPT_WINDOW, 0, 0);
+
+	if (!vcpu)
+		return -1;
+
+	if (vcpu_pending_request(vcpu)) {
+		/* Do nothing
+		 * acrn_do_intr_process will continue for this vcpu
+		 */
+	} else {
+		/* No interrupts to inject.
+		 * Disable the interrupt window exiting
+		 */
+		vcpu->arch_vcpu.irq_window_enabled = 0;
+		value32 = exec_vmread(VMX_PROC_VM_EXEC_CONTROLS);
+		value32 &= ~(VMX_PROCBASED_CTLS_IRQ_WIN);
+		exec_vmwrite(VMX_PROC_VM_EXEC_CONTROLS, value32);
+	}
+
+	VCPU_RETAIN_RIP(vcpu);
+	return 0;
+}
+
+int external_interrupt_handler(struct vcpu *vcpu)
+{
+	int vector = exec_vmread(VMX_EXIT_INT_INFO) & 0xFF;
+	struct intr_ctx ctx;
+
+	ctx.vector = vector;
+	/* do not RETAIN RIP for spurious interrupt */
+	if (dispatch_interrupt(&ctx) == 0)
+		VCPU_RETAIN_RIP(vcpu);
+
+	TRACE_2L(TRC_VMEXIT_EXTERNAL_INTERRUPT, vector, 0);
+
+	return 0;
+}
+
+int acrn_do_intr_process(struct vcpu *vcpu)
+{
+	int ret = 0;
+	int vector;
+	int tmp;
+	bool intr_pending = false;
+	uint64_t *pending_intr_bits = &vcpu->arch_vcpu.pending_intr;
+
+	if (bitmap_test_and_clear(ACRN_REQUEST_TLB_FLUSH, pending_intr_bits))
+		mmu_invept(vcpu);
+
+	if (bitmap_test_and_clear(ACRN_REQUEST_TMR_UPDATE, pending_intr_bits))
+		vioapic_update_tmr(vcpu);
+
+	/* handling pending vector injection:
+	 * there are many reason inject failed, we need re-inject again
+	 */
+	if (vcpu->arch_vcpu.exit_interrupt_info & VMX_INT_INFO_VALID) {
+		exec_vmwrite(VMX_ENTRY_INT_INFO_FIELD,
+				vcpu->arch_vcpu.exit_interrupt_info);
+		goto INTR_WIN;
+	}
+
+	/* handling exception request */
+	vector = vcpu->arch_vcpu.exception_info.exception;
+
+	/* If there is a valid exception, inject exception to guest */
+	if (vector >= 0) {
+		if (exception_type[vector] &
+			EXCEPTION_ERROR_CODE_VALID) {
+			exec_vmwrite(VMX_ENTRY_EXCEPTION_EC,
+				vcpu->arch_vcpu.exception_info.error);
+		}
+
+		exec_vmwrite(VMX_ENTRY_INT_INFO_FIELD,
+			VMX_INT_INFO_VALID |
+			((exception_type[vector] & 15) << 8)
+			| (vector & 0xFF));
+
+		vcpu->arch_vcpu.exception_info.exception = -1;
+
+		goto INTR_WIN;
+	}
+
+	/* Do pending interrupts process */
+	/* TODO: checkin NMI intr windows before inject */
+	if (bitmap_test_and_clear(ACRN_REQUEST_NMI, pending_intr_bits)) {
+		/* Inject NMI vector = 2 */
+		exec_vmwrite(VMX_ENTRY_INT_INFO_FIELD,
+			VMX_INT_INFO_VALID | (VMX_INT_TYPE_NMI << 8) | 2);
+
+		/* Intel SDM 10.8.1
+		 * NMI, SMI, INIT, ExtINT, or SIPI directly deliver to CPU
+		 * do not need EOI to LAPIC
+		 * However, ExtINT need EOI to PIC
+		 */
+		goto INTR_WIN;
+	}
+
+	/* Guest interruptable or not */
+	if (!is_guest_irq_enabled(vcpu)) {
+		/* interrupt window unavailable */
+		goto INTR_WIN;
+	}
+
+	/* Inject external interrupt first */
+	if (bitmap_test_and_clear(ACRN_REQUEST_EXTINT, pending_intr_bits)) {
+		/* has pending external interrupts */
+		ret = vcpu_do_pending_extint(vcpu);
+		goto INTR_WIN;
+	}
+
+	/* Inject vLAPIC vectors */
+	if (bitmap_test_and_clear(ACRN_REQUEST_EVENT, pending_intr_bits)) {
+		/* has pending vLAPIC interrupts */
+		ret = vcpu_do_pending_event(vcpu);
+		goto INTR_WIN;
+	}
+
+	/* Inject GP event */
+	if (bitmap_test_and_clear(ACRN_REQUEST_GP, pending_intr_bits)) {
+		/* has pending GP interrupts */
+		ret = vcpu_do_pending_gp(vcpu);
+		goto INTR_WIN;
+	}
+
+INTR_WIN:
+	/* check if we have new interrupt pending for next VMExit */
+	intr_pending = vcpu_pending_request(vcpu);
+
+	/* Enable interrupt window exiting if pending */
+	if (intr_pending && vcpu->arch_vcpu.irq_window_enabled == 0) {
+		vcpu->arch_vcpu.irq_window_enabled = 1;
+		tmp = exec_vmread(VMX_PROC_VM_EXEC_CONTROLS);
+		tmp |= (VMX_PROCBASED_CTLS_IRQ_WIN);
+		exec_vmwrite(VMX_PROC_VM_EXEC_CONTROLS, tmp);
+	}
+
+	return ret;
+}
+
+int exception_handler(struct vcpu *vcpu)
+{
+	uint32_t intinfo, int_err_code;
+	uint32_t exception_vector;
+	uint32_t cpl;
+	int status = 0;
+
+	if (vcpu == NULL) {
+		TRACE_4I(TRC_VMEXIT_EXCEPTION_OR_NMI, 0, 0, 0, 0);
+		status = -EINVAL;
+	}
+
+	if (status != 0)
+		return status;
+
+	pr_dbg(" Handling guest exception");
+
+	/* Obtain VM-Exit information field pg 2912 */
+	intinfo = exec_vmread(VMX_EXIT_INT_INFO);
+	exception_vector = intinfo & 0xFF;
+	/* Check if exception caused by the guest is a HW exception. If the
+	 * exit occurred due to a HW exception obtain the error code to be
+	 * conveyed to get via the stack
+	 */
+	if (intinfo & VMX_INT_INFO_ERR_CODE_VALID) {
+		int_err_code = exec_vmread(VMX_EXIT_INT_EC);
+
+		/* get current privilege level and fault address */
+		cpl = exec_vmread(VMX_GUEST_CS_ATTR);
+		cpl = (cpl >> 5) & 3;
+
+		if (cpl < 3)
+			int_err_code &= ~4;
+		else
+			int_err_code |= 4;
+	} else {
+		int_err_code = 0;
+	}
+
+	/* Handle all other exceptions */
+	VCPU_RETAIN_RIP(vcpu);
+	vcpu->arch_vcpu.exception_info.exception = exception_vector;
+	vcpu->arch_vcpu.exception_info.error = int_err_code;
+
+	TRACE_4I(TRC_VMEXIT_EXCEPTION_OR_NMI,
+			exception_vector, int_err_code, 2, 0);
+
+	return status;
+}
--- a/hypervisor/arch/x86/intr_lapic.c
+++ b/hypervisor/arch/x86/intr_lapic.c
@@ -0,0 +1,418 @@
+/*
+ * Copyright (C) 2018 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <hypervisor.h>
+#include <hv_lib.h>
+#include <acrn_common.h>
+#include <hv_arch.h>
+#include <bsp_extern.h>
+#include <hv_debug.h>
+
+/* Rate range 1 to 1000 or 1uSec to 1mSec */
+#define APIC_TIMER_MAX      0xffffffff
+#define HYPE_PERIOD_MAX     1000
+#define APIC_DIVIDE_BY_ONE  0x0b
+#define PIT_TARGET          0x3FFF
+
+/* xAPIC/x2APIC Interrupt Command Register (ICR) structure */
+union apic_icr {
+	uint64_t value;
+	struct {
+		uint32_t lo_32;
+		uint32_t hi_32;
+	} value_32;
+	struct {
+		uint64_t vector:8;
+		uint64_t delivery_mode:3;
+		uint64_t destination_mode:1;
+		uint64_t delivery_status:1;
+		uint64_t rsvd_1:1;
+		uint64_t level:1;
+		uint64_t trigger_mode:1;
+		uint64_t rsvd_2:2;
+		uint64_t shorthand:2;
+		uint64_t rsvd_3:12;
+		uint64_t rsvd_4:32;
+	} bits;
+	struct {
+		uint64_t rsvd_1:32;
+		uint64_t rsvd_2:24;
+		uint64_t dest_field:8;
+	} x_bits;
+	struct {
+		uint64_t rsvd_1:32;
+		uint64_t dest_field:32;
+	} x2_bits;
+};
+
+/* xAPIC/x2APIC Interrupt Command Register (ICR) structure */
+union apic_lvt {
+	uint32_t value;
+	union {
+		struct {
+			uint32_t vector:8;
+			uint32_t rsvd_1:4;
+			uint32_t delivery_status:1;
+			uint32_t rsvd_2:3;
+			uint32_t mask:1;
+			uint32_t mode:2;
+			uint32_t rsvd_3:13;
+		} timer;
+		struct {
+			uint32_t vector:8;
+			uint32_t delivery_mode:3;
+			uint32_t rsvd_1:1;
+			uint32_t delivery_status:1;
+			uint32_t rsvd_2:3;
+			uint32_t mask:1;
+			uint32_t rsvd_3:15;
+		} cmci;
+		struct {
+			uint32_t vector:8;
+			uint32_t delivery_mode:3;
+			uint32_t rsvd_1:1;
+			uint32_t delivery_status:1;
+			uint32_t polarity:1;
+			uint32_t remote_irr:1;
+			uint32_t trigger_mode:1;
+			uint32_t mask:1;
+			uint32_t rsvd_2:15;
+		} lint;
+		struct {
+			uint32_t vector:8;
+			uint32_t rsvd_1:4;
+			uint32_t delivery_status:1;
+			uint32_t rsvd_2:3;
+			uint32_t mask:1;
+			uint32_t rsvd_3:15;
+		} error;
+		struct {
+			uint32_t vector:8;
+			uint32_t delivery_mode:3;
+			uint32_t rsvd_1:1;
+			uint32_t delivery_status:1;
+			uint32_t rsvd_2:3;
+			uint32_t mask:1;
+			uint32_t rsvd_3:15;
+		} pmc;
+		struct {
+			uint32_t vector:8;
+			uint32_t delivery_mode:3;
+			uint32_t rsvd_1:1;
+			uint32_t delivery_status:1;
+			uint32_t rsvd_2:3;
+			uint32_t mask:1;
+			uint32_t rsvd_3:15;
+		} thermal;
+		struct {
+			uint32_t vector:8;
+			uint32_t rsvd_1:4;
+			uint32_t delivery_status:1;
+			uint32_t rsvd_2:3;
+			uint32_t mask:1;
+			uint32_t rsvd_3:15;
+		} common;
+	} bits;
+};
+
+union lapic_base_msr {
+	uint64_t value;
+	struct {
+		uint64_t rsvd_1:8;
+		uint64_t bsp:1;
+		uint64_t rsvd_2:1;
+		uint64_t x2APIC_enable:1;
+		uint64_t xAPIC_enable:1;
+		uint64_t lapic_paddr:24;
+		uint64_t rsvd_3:28;
+	} fields;
+};
+
+struct lapic_info {
+	int init_status;
+	struct {
+		paddr_t paddr;
+		vaddr_t vaddr;
+	} xapic;
+
+};
+
+static struct lapic_info lapic_info;
+
+static uint32_t read_lapic_reg32(uint32_t offset)
+{
+	ASSERT((offset >= 0x020) && (offset <= 0x3FF), "");
+	return mmio_read_long(lapic_info.xapic.vaddr + offset);
+}
+
+static void write_lapic_reg32(uint32_t offset, uint32_t value)
+{
+	ASSERT((offset >= 0x020) && (offset <= 0x3FF), "");
+	mmio_write_long(value, lapic_info.xapic.vaddr + offset);
+}
+
+static void clear_lapic_isr(void)
+{
+	uint64_t isr_reg = LAPIC_IN_SERVICE_REGISTER_0;
+
+	/* This is a Intel recommended procedure and assures that the processor
+	 * does not get hung up due to already set "in-service" interrupts left
+	 * over from the boot loader environment. This actually occurs in real
+	 * life, therefore we will ensure all the in-service bits are clear.
+	 */
+	do {
+		if (read_lapic_reg32(isr_reg)) {
+			write_lapic_reg32(LAPIC_EOI_REGISTER, 0);
+			continue;
+		}
+		isr_reg += 0x10;
+	} while (isr_reg <= LAPIC_IN_SERVICE_REGISTER_7);
+}
+
+static void map_lapic(void)
+{
+	/* At some point we may need to translate this paddr to a vaddr. 1:1
+	 * mapping for now.
+	 */
+	lapic_info.xapic.vaddr = lapic_info.xapic.paddr;
+}
+
+int early_init_lapic(void)
+{
+	union lapic_base_msr lapic_base_msr;
+
+	/* Get local APIC base address */
+	lapic_base_msr.value = msr_read(MSR_IA32_APIC_BASE);
+
+	/* Initialize globals only 1 time */
+	if (lapic_info.init_status == false) {
+		/* Get Local APIC physical address. */
+		lapic_info.xapic.paddr = LAPIC_BASE;
+
+		/* Map in the local xAPIC */
+		map_lapic();
+
+		lapic_info.init_status = true;
+	}
+
+	/* Check if xAPIC mode enabled */
+	if (lapic_base_msr.fields.xAPIC_enable == 0) {
+		/* Ensure in xAPIC mode */
+		lapic_base_msr.fields.xAPIC_enable = 1;
+		lapic_base_msr.fields.x2APIC_enable = 0;
+		msr_write(MSR_IA32_APIC_BASE, lapic_base_msr.value);
+	} else {
+		/* Check if x2apic is disabled */
+		ASSERT(lapic_base_msr.fields.x2APIC_enable == 0,
+			"Disable X2APIC in BIOS");
+	}
+
+	return 0;
+}
+
+int init_lapic(uint32_t cpu_id)
+{
+	/* Set the Logical Destination Register */
+	write_lapic_reg32(LAPIC_LOGICAL_DESTINATION_REGISTER,
+		(1 << cpu_id) << 24);
+
+	/* Set the Destination Format Register */
+	write_lapic_reg32(LAPIC_DESTINATION_FORMAT_REGISTER, 0xf << 28);
+
+	/* Mask all LAPIC LVT entries before enabling the local APIC */
+	write_lapic_reg32(LAPIC_LVT_CMCI_REGISTER, LAPIC_LVT_MASK);
+	write_lapic_reg32(LAPIC_LVT_TIMER_REGISTER, LAPIC_LVT_MASK);
+	write_lapic_reg32(LAPIC_LVT_THERMAL_SENSOR_REGISTER, LAPIC_LVT_MASK);
+	write_lapic_reg32(LAPIC_LVT_PMC_REGISTER, LAPIC_LVT_MASK);
+	write_lapic_reg32(LAPIC_LVT_LINT0_REGISTER, LAPIC_LVT_MASK);
+	write_lapic_reg32(LAPIC_LVT_LINT1_REGISTER, LAPIC_LVT_MASK);
+	write_lapic_reg32(LAPIC_LVT_ERROR_REGISTER, LAPIC_LVT_MASK);
+
+	/* Enable Local APIC */
+	/* TODO: add spurious-interrupt handler */
+	write_lapic_reg32(LAPIC_SPURIOUS_VECTOR_REGISTER,
+		       LAPIC_SVR_APIC_ENABLE_MASK | LAPIC_SVR_VECTOR);
+
+	/* Ensure there are no ISR bits set. */
+	clear_lapic_isr();
+
+	return 0;
+}
+
+int send_lapic_eoi(void)
+{
+	write_lapic_reg32(LAPIC_EOI_REGISTER, 0);
+	return 0;
+}
+
+static void wait_for_delivery(void)
+{
+	union apic_icr tmp;
+
+	do {
+		tmp.value_32.lo_32 =
+			read_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_0);
+	} while (tmp.bits.delivery_status);
+}
+
+uint32_t get_cur_lapic_id(void)
+{
+	uint32_t lapic_id;
+
+	lapic_id = read_lapic_reg32(LAPIC_ID_REGISTER);
+	lapic_id = (lapic_id >> 24);
+
+	return lapic_id;
+}
+
+int
+send_startup_ipi(enum intr_cpu_startup_shorthand cpu_startup_shorthand,
+	uint32_t cpu_startup_dest, paddr_t cpu_startup_start_address)
+{
+	union apic_icr icr;
+	uint8_t shorthand;
+	int status = 0;
+	uint32_t eax, ebx, ecx, edx;
+	uint32_t family;
+
+	if (cpu_startup_shorthand >= INTR_CPU_STARTUP_UNKNOWN)
+		status = -EINVAL;
+
+	ASSERT(status == 0, "Incorrect arguments");
+
+	icr.value = 0;
+	icr.bits.destination_mode = INTR_LAPIC_ICR_PHYSICAL;
+
+	if (cpu_startup_shorthand == INTR_CPU_STARTUP_USE_DEST) {
+		shorthand = INTR_LAPIC_ICR_USE_DEST_ARRAY;
+		icr.x_bits.dest_field = per_cpu(lapic_id, cpu_startup_dest);
+	} else {		/* Use destination shorthand */
+		shorthand = INTR_LAPIC_ICR_ALL_EX_SELF;
+		icr.value_32.hi_32 = 0;
+	}
+
+	/*
+	 * family calculation from SDM Vol. 2A
+	 * CPUID with INPUT EAX=01h:Returns Model, Family, Stepping Information
+	 */
+	cpuid(CPUID_FEATURES, &eax, &ebx, &ecx, &edx);
+	family = (eax >> 8) & 0xff;
+	if (family == 0xF)
+		family += (eax >> 20) & 0xff;
+
+	/* Assert INIT IPI */
+	write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_1, icr.value_32.hi_32);
+	icr.bits.shorthand = shorthand;
+	icr.bits.delivery_mode = INTR_LAPIC_ICR_INIT;
+	icr.bits.level = INTR_LAPIC_ICR_ASSERT;
+	icr.bits.trigger_mode = INTR_LAPIC_ICR_LEVEL;
+	write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_0, icr.value_32.lo_32);
+	wait_for_delivery();
+
+	/* Give 10ms for INIT sequence to complete for old processors.
+	 * Modern processors (family == 6) don't need to wait here.
+	 */
+	if (family != 6)
+		mdelay(10);
+
+	/* De-assert INIT IPI */
+	write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_1, icr.value_32.hi_32);
+	icr.bits.level = INTR_LAPIC_ICR_DEASSERT;
+	write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_0, icr.value_32.lo_32);
+	wait_for_delivery();
+
+	/* Send Start IPI with page number of secondary reset code */
+	write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_1, icr.value_32.hi_32);
+	icr.value_32.lo_32 = 0;
+	icr.bits.shorthand = shorthand;
+	icr.bits.delivery_mode = INTR_LAPIC_ICR_STARTUP;
+	icr.bits.vector = ((paddr_t) cpu_startup_start_address) >> 12;
+	write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_0, icr.value_32.lo_32);
+	wait_for_delivery();
+
+	if (family == 6) /* 10us is enough for Modern processors */
+		udelay(10);
+	else /* 200us for old processors */
+		udelay(200);
+
+	/* Send another start IPI as per the Intel Arch specification */
+	write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_1, icr.value_32.hi_32);
+	write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_0, icr.value_32.lo_32);
+	wait_for_delivery();
+
+	return status;
+}
+
+void send_single_ipi(uint32_t pcpu_id, uint32_t vector)
+{
+	uint32_t dest_lapic_id, hi_32, lo_32;
+
+	/* Get the lapic ID of the destination processor. */
+	dest_lapic_id = per_cpu(lapic_id, pcpu_id);
+
+	/* Set the target processor. */
+	hi_32 = dest_lapic_id << 24;
+
+	/* Set the vector ID. */
+	lo_32 = vector;
+
+	/* Set the destination field to the target processor. */
+	write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_1, hi_32);
+
+	/* Write the vector ID to ICR. */
+	write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_0, lo_32);
+
+	wait_for_delivery();
+}
+
+int send_shorthand_ipi(uint8_t vector,
+		enum intr_lapic_icr_shorthand shorthand,
+		enum intr_lapic_icr_delivery_mode delivery_mode)
+{
+	union apic_icr icr;
+	int status = 0;
+
+	if ((shorthand < INTR_LAPIC_ICR_SELF)
+			|| (shorthand > INTR_LAPIC_ICR_ALL_EX_SELF)
+			|| (delivery_mode > INTR_LAPIC_ICR_NMI))
+		status = -EINVAL;
+
+	ASSERT(status == 0, "Incorrect arguments");
+
+	icr.value = 0;
+	icr.bits.shorthand = shorthand;
+	icr.bits.delivery_mode = delivery_mode;
+	icr.bits.vector = vector;
+	write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_1, icr.value_32.hi_32);
+	write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_0, icr.value_32.lo_32);
+	wait_for_delivery();
+
+	return status;
+}
--- a/hypervisor/arch/x86/intr_main.c
+++ b/hypervisor/arch/x86/intr_main.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2018 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <hypervisor.h>
+#include <hv_lib.h>
+#include <acrn_common.h>
+#include <hv_arch.h>
+#include <hv_debug.h>
+
+int interrupt_init(uint32_t cpu_id)
+{
+	struct host_idt_descriptor *idtd = &HOST_IDTR;
+	int status;
+
+	set_idt(idtd);
+
+	status = init_lapic(cpu_id);
+	ASSERT(status == 0, "lapic init failed");
+	if (status != 0)
+		return -ENODEV;
+
+	status = init_default_irqs(cpu_id);
+	ASSERT(status == 0, "irqs init failed");
+	if (status != 0)
+		return -ENODEV;
+
+	CPU_IRQ_ENABLE();
+
+	return status;
+}
--- a/hypervisor/arch/x86/io.c
+++ b/hypervisor/arch/x86/io.c
@@ -0,0 +1,292 @@
+/*
+ * Copyright (C) 2018 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <hypervisor.h>
+#include <hv_lib.h>
+#include <acrn_common.h>
+#include <hv_arch.h>
+#include <hv_debug.h>
+#include <hypercall.h>
+
+int dm_emulate_pio_post(struct vcpu *vcpu)
+{
+	int cur = vcpu->vcpu_id;
+	int cur_context = vcpu->arch_vcpu.cur_context;
+	struct vhm_request_buffer *req_buf =
+		(void *)HPA2HVA(vcpu->vm->sw.req_buf);
+	uint32_t mask =
+		0xFFFFFFFFul >> (32 - 8 * vcpu->req.reqs.pio_request.size);
+	uint64_t *rax;
+
+	ASSERT(cur_context == 0, "pio emulation only happen in normal wrold");
+
+	rax = &vcpu->arch_vcpu.contexts[cur_context].guest_cpu_regs.regs.rax;
+	vcpu->req.reqs.pio_request.value =
+		req_buf->req_queue[cur].reqs.pio_request.value;
+
+	/* VHM emulation data already copy to req, mark to free slot now */
+	req_buf->req_queue[cur].valid = false;
+
+	if (req_buf->req_queue[cur].processed != REQ_STATE_SUCCESS)
+		return -1;
+
+	if (vcpu->req.reqs.pio_request.direction == REQUEST_READ)
+		*rax = ((*rax) & ~mask) |
+			(vcpu->req.reqs.pio_request.value & mask);
+
+	return 0;
+}
+
+static void dm_emulate_pio_pre(struct vcpu *vcpu, uint64_t exit_qual,
+				uint32_t sz, uint64_t req_value)
+{
+	vcpu->req.type = REQ_PORTIO;
+	if (VM_EXIT_IO_INSTRUCTION_ACCESS_DIRECTION(exit_qual))
+		vcpu->req.reqs.pio_request.direction = REQUEST_READ;
+	else
+		vcpu->req.reqs.pio_request.direction = REQUEST_WRITE;
+
+	vcpu->req.reqs.pio_request.address =
+		VM_EXIT_IO_INSTRUCTION_PORT_NUMBER(exit_qual);
+	vcpu->req.reqs.pio_request.size = sz;
+	vcpu->req.reqs.pio_request.value = req_value;
+}
+
+int io_instr_handler(struct vcpu *vcpu)
+{
+	uint32_t sz;
+	uint32_t mask;
+	uint32_t port;
+	int8_t direction;
+	struct vm_io_handler *handler;
+	uint64_t exit_qual;
+	struct vm *vm = vcpu->vm;
+	int cur_context_idx = vcpu->arch_vcpu.cur_context;
+	struct run_context *cur_context;
+	int status = -EINVAL;
+
+	ASSERT(cur_context_idx == 0,
+		"pio emulation only happen in normal wrold");
+
+	cur_context = &vcpu->arch_vcpu.contexts[cur_context_idx];
+	exit_qual = vcpu->arch_vcpu.exit_qualification;
+
+	sz = VM_EXIT_IO_INSTRUCTION_SIZE(exit_qual) + 1;
+	port = VM_EXIT_IO_INSTRUCTION_PORT_NUMBER(exit_qual);
+	direction = VM_EXIT_IO_INSTRUCTION_ACCESS_DIRECTION(exit_qual);
+	mask = 0xfffffffful >> (32 - 8 * sz);
+
+	memset(&vcpu->req, 0, sizeof(struct vhm_request));
+
+	TRACE_4I(TRC_VMEXIT_IO_INSTRUCTION, port, direction, sz,
+		cur_context_idx);
+
+	for (handler = vm->arch_vm.io_handler;
+			handler; handler = handler->next) {
+
+		if ((port >= handler->desc.addr + handler->desc.len) ||
+				(port + sz <= handler->desc.addr))
+			continue;
+
+		/* Dom0 do not require IO emulation */
+		if (is_vm0(vm))
+			status = 0;
+
+		if (direction == 0) {
+			if (handler->desc.io_write == NULL)
+				continue;
+
+			handler->desc.io_write(handler, vm, port, sz,
+				cur_context->guest_cpu_regs.regs.rax);
+
+			pr_dbg("IO write on port %04x, data %08x", port,
+				cur_context->guest_cpu_regs.regs.rax & mask);
+
+			status = 0;
+			break;
+		} else if (handler->desc.io_read) {
+			uint32_t data = handler->desc.io_read(handler, vm,
+							 port, sz);
+
+			cur_context->guest_cpu_regs.regs.rax &= ~mask;
+			cur_context->guest_cpu_regs.regs.rax |= data & mask;
+
+			pr_dbg("IO read on port %04x, data %08x", port, data);
+
+			status = 0;
+			break;
+		}
+	}
+
+	/* Go for VHM */
+	if (status != 0) {
+		uint64_t *rax = &cur_context->guest_cpu_regs.regs.rax;
+
+		dm_emulate_pio_pre(vcpu, exit_qual, sz, *rax);
+		status = acrn_insert_request_wait(vcpu, &vcpu->req);
+	}
+
+	if (status != 0) {
+		pr_fatal("IO %s access to port 0x%04x, size=%u",
+				direction ? "read" : "write", port, sz);
+
+	}
+
+	/* Catch any problems */
+	ASSERT(status == 0, "Invalid IO access");
+
+	return status;
+}
+
+static void register_io_handler(struct vm *vm, struct vm_io_handler *hdlr)
+{
+	if (vm->arch_vm.io_handler)
+		hdlr->next = vm->arch_vm.io_handler;
+
+	vm->arch_vm.io_handler = hdlr;
+}
+
+static void empty_io_handler_list(struct vm *vm)
+{
+	struct vm_io_handler *handler = vm->arch_vm.io_handler;
+	struct vm_io_handler *tmp;
+
+	while (handler) {
+		tmp = handler;
+		handler = tmp->next;
+		free(tmp);
+	}
+	vm->arch_vm.io_handler = NULL;
+}
+
+void free_io_emulation_resource(struct vm *vm)
+{
+	empty_io_handler_list(vm);
+
+	/* Free I/O emulation bitmaps */
+	free(vm->arch_vm.iobitmap[0]);
+	free(vm->arch_vm.iobitmap[1]);
+}
+
+static void deny_guest_io_access(struct vm *vm, uint32_t address, uint32_t nbytes)
+{
+	uint32_t *b;
+	uint32_t i;
+	uint32_t a;
+
+	for (i = 0; i < nbytes; i++) {
+		b = vm->arch_vm.iobitmap[0];
+		if (address & 0x8000)
+			b = vm->arch_vm.iobitmap[1];
+		a = address & 0x7fff;
+		b[a >> 5] |= (1 << (a & 0x1f));
+		address++;
+	}
+}
+
+static uint32_t
+default_io_read(__unused struct vm_io_handler *hdlr, __unused struct vm *vm,
+			ioport_t address, size_t width)
+{
+	uint32_t v = io_read(address, width);
+	return v;
+}
+
+static void default_io_write(__unused struct vm_io_handler *hdlr,
+			__unused struct vm *vm, ioport_t addr,
+			size_t width, uint32_t v)
+{
+	io_write(v, addr, width);
+}
+
+static struct vm_io_handler *create_io_handler(uint32_t port, uint32_t len,
+				io_read_fn_t io_read_fn_ptr,
+				io_write_fn_t io_write_fn_ptr)
+{
+
+	struct vm_io_handler *handler;
+
+	handler = calloc(1, sizeof(struct vm_io_handler));
+
+	if (handler != NULL) {
+		handler->desc.addr = port;
+		handler->desc.len = len;
+		handler->desc.io_read = io_read_fn_ptr;
+		handler->desc.io_write = io_write_fn_ptr;
+	} else {
+		pr_err("Error: out of memory");
+	}
+
+	return handler;
+}
+
+void setup_io_bitmap(struct vm *vm)
+{
+	/* Allocate VM architecture state and IO bitmaps A and B */
+	vm->arch_vm.iobitmap[0] = alloc_page();
+	vm->arch_vm.iobitmap[1] = alloc_page();
+
+	ASSERT(vm->arch_vm.iobitmap[0] && vm->arch_vm.iobitmap[1], "");
+
+	if (is_vm0(vm)) {
+		memset(vm->arch_vm.iobitmap[0], 0x00, CPU_PAGE_SIZE);
+		memset(vm->arch_vm.iobitmap[1], 0x00, CPU_PAGE_SIZE);
+	} else {
+		/* block all IO port access from Guest */
+		memset(vm->arch_vm.iobitmap[0], 0xFF, CPU_PAGE_SIZE);
+		memset(vm->arch_vm.iobitmap[1], 0xFF, CPU_PAGE_SIZE);
+	}
+}
+
+void register_io_emulation_handler(struct vm *vm, struct vm_io_range *range,
+		io_read_fn_t io_read_fn_ptr,
+		io_write_fn_t io_write_fn_ptr)
+{
+	struct vm_io_handler *handler = NULL;
+	io_read_fn_t io_read_fn = &default_io_read;
+	io_write_fn_t io_write_fn = &default_io_write;
+
+	if (range->flags == IO_ATTR_RW && io_read_fn_ptr && io_write_fn_ptr) {
+		io_read_fn = io_read_fn_ptr;
+		io_write_fn = io_write_fn_ptr;
+	} else if (range->flags == IO_ATTR_R) {
+		if (io_read_fn_ptr)
+			io_read_fn = io_read_fn_ptr;
+		io_write_fn = NULL;
+	}
+
+	if (is_vm0(vm))
+		deny_guest_io_access(vm, range->base, range->len);
+
+	handler = create_io_handler(range->base,
+			range->len, io_read_fn, io_write_fn);
+
+	register_io_handler(vm, handler);
+}
--- a/hypervisor/arch/x86/ioapic.c
+++ b/hypervisor/arch/x86/ioapic.c
@@ -0,0 +1,439 @@
+/*
+ * Copyright (C) 2018 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <hypervisor.h>
+#include <hv_lib.h>
+#include <acrn_common.h>
+#include <hv_arch.h>
+#include <hv_debug.h>
+
+/* Register offsets */
+#define IOAPIC_REGSEL_OFFSET   0
+#define IOAPIC_WINSWL_OFFSET   0x10
+
+/* IOAPIC Redirection Table (RTE) Entry structure */
+struct ioapic_rte {
+	uint32_t lo_32;
+	uint32_t hi_32;
+} ioapic_rte;
+
+struct gsi_table {
+	uint8_t ioapic_id;
+	uint8_t pin;
+	uint64_t addr;
+};
+static struct gsi_table gsi_table[NR_MAX_GSI];
+static int nr_gsi;
+static spinlock_t ioapic_lock;
+
+/*
+ * the irq to ioapic pin mapping should extract from ACPI MADT table
+ * hardcoded here
+ */
+uint16_t legacy_irq_to_pin[NR_LEGACY_IRQ] = {
+	2, /* IRQ0*/
+	1, /* IRQ1*/
+	0, /* IRQ2 connected to Pin0 (ExtInt source of PIC) if existing */
+	3, /* IRQ3*/
+	4, /* IRQ4*/
+	5, /* IRQ5*/
+	6, /* IRQ6*/
+	7, /* IRQ7*/
+	8, /* IRQ8*/
+	9 | IOAPIC_RTE_TRGRLVL, /* IRQ9*/
+	10, /* IRQ10*/
+	11, /* IRQ11*/
+	12, /* IRQ12*/
+	13, /* IRQ13*/
+	14, /* IRQ14*/
+	15, /* IRQ15*/
+};
+
+static uint64_t map_ioapic(
+		uint64_t ioapic_paddr)
+{
+	/* At some point we may need to translate this paddr to a vaddr.
+	 * 1:1 mapping for now.
+	 */
+	return (vaddr_t) ioapic_paddr;
+}
+
+static inline uint32_t
+ioapic_read_reg32(const uint64_t ioapic_base, const uint8_t offset)
+{
+	uint32_t v;
+
+	spinlock_rflags;
+
+	spinlock_irqsave_obtain(&ioapic_lock);
+
+	/* Write IOREGSEL */
+	*(uint32_t *)(ioapic_base) = offset;
+	/* Read  IOWIN */
+	v = *(uint32_t *)(ioapic_base + IOAPIC_WINSWL_OFFSET);
+
+	spinlock_irqrestore_release(&ioapic_lock);
+	return v;
+}
+
+static inline void
+ioapic_write_reg32(const uint64_t ioapic_base,
+		const uint8_t offset, const uint32_t value)
+{
+	spinlock_rflags;
+
+	spinlock_irqsave_obtain(&ioapic_lock);
+
+	/* Write IOREGSEL */
+	*(uint32_t *)(ioapic_base) = offset;
+	/* Write IOWIN */
+	*(uint32_t *)(ioapic_base + IOAPIC_WINSWL_OFFSET) = value;
+
+	spinlock_irqrestore_release(&ioapic_lock);
+}
+
+static inline uint64_t
+get_ioapic_base(int apic_id)
+{
+	uint64_t addr = -1UL;
+
+	/* should extract next ioapic from ACPI MADT table */
+	if (apic_id == 0)
+		addr = DEFAULT_IO_APIC_BASE;
+	else if (apic_id == 1)
+		addr = 0xfec3f000;
+	else if (apic_id == 2)
+		addr = 0xfec7f000;
+	else
+		ASSERT(apic_id <= 2, "ACPI MADT table missing");
+	return addr;
+}
+
+
+static inline void
+ioapic_get_rte_entry(uint64_t ioapic_addr,
+		int pin, struct ioapic_rte *rte)
+{
+	rte->lo_32 = ioapic_read_reg32(ioapic_addr, pin*2 + 0x10);
+	rte->hi_32 = ioapic_read_reg32(ioapic_addr, pin*2 + 0x11);
+}
+
+static inline void
+ioapic_set_rte_entry(uint64_t ioapic_addr,
+		int pin, struct ioapic_rte *rte)
+{
+	ioapic_write_reg32(ioapic_addr, pin*2 + 0x10, rte->lo_32);
+	ioapic_write_reg32(ioapic_addr, pin*2 + 0x11, rte->hi_32);
+}
+
+static inline struct ioapic_rte
+create_rte_for_legacy_irq(int irq, int vr)
+{
+	struct ioapic_rte rte = {0, 0};
+
+	/* Legacy IRQ 0-15 setup, default masked
+	 * are actually defined in either MPTable or ACPI MADT table
+	 * before we have ACPI table parsing in HV we use common hardcode
+	 */
+
+	rte.lo_32 |= IOAPIC_RTE_INTMSET;
+	rte.lo_32 |= (legacy_irq_to_pin[irq] & IOAPIC_RTE_TRGRLVL);
+	rte.lo_32 |= DEFAULT_DEST_MODE;
+	rte.lo_32 |= DEFAULT_DELIVERY_MODE;
+	rte.lo_32 |= (IOAPIC_RTE_INTVEC & vr);
+
+	/* FIXME: Fixed to active Low? */
+	rte.lo_32 |= IOAPIC_RTE_INTALO;
+
+	/* Dest field: legacy irq fixed to CPU0 */
+	rte.hi_32 |= 1 << 24;
+
+	return rte;
+}
+
+static inline struct ioapic_rte
+create_rte_for_gsi_irq(int irq, int vr)
+{
+	struct ioapic_rte rte = {0, 0};
+
+	if (irq < NR_LEGACY_IRQ)
+		return create_rte_for_legacy_irq(irq, vr);
+
+	/* irq default masked, level trig */
+	rte.lo_32 |= IOAPIC_RTE_INTMSET;
+	rte.lo_32 |= IOAPIC_RTE_TRGRLVL;
+	rte.lo_32 |= DEFAULT_DEST_MODE;
+	rte.lo_32 |= DEFAULT_DELIVERY_MODE;
+	rte.lo_32 |= (IOAPIC_RTE_INTVEC & vr);
+
+	/* FIXME: Fixed to active Low? */
+	rte.lo_32 |= IOAPIC_RTE_INTALO;
+
+	/* Dest field */
+	rte.hi_32 |= ALL_CPUS_MASK << 24;
+
+	return rte;
+}
+
+static void ioapic_set_routing(int gsi, int vr)
+{
+	uint64_t addr;
+	struct ioapic_rte rte;
+
+	addr = gsi_table[gsi].addr;
+	rte = create_rte_for_gsi_irq(gsi, vr);
+	ioapic_set_rte_entry(addr, gsi_table[gsi].pin, &rte);
+
+	if (rte.lo_32 & IOAPIC_RTE_TRGRMOD)
+		update_irq_handler(gsi, handle_level_interrupt_common);
+	else
+		update_irq_handler(gsi, common_handler_edge);
+
+	dev_dbg(ACRN_DBG_IRQ, "GSI: irq:%d pin:%d rte:%x",
+		gsi, gsi_table[gsi].pin,
+		rte.lo_32);
+}
+
+void ioapic_get_rte(int irq, uint64_t *rte)
+{
+	uint64_t addr;
+	struct ioapic_rte _rte;
+
+	if (!irq_is_gsi(irq))
+		return;
+
+	addr = gsi_table[irq].addr;
+	ioapic_get_rte_entry(addr, gsi_table[irq].pin, &_rte);
+
+	*rte = _rte.hi_32;
+	*rte = *rte << 32 | _rte.lo_32;
+}
+
+void ioapic_set_rte(int irq, uint64_t raw_rte)
+{
+	uint64_t addr;
+	struct ioapic_rte rte;
+
+	if (!irq_is_gsi(irq))
+		return;
+
+	addr = gsi_table[irq].addr;
+	rte.lo_32 = raw_rte;
+	rte.hi_32 = raw_rte >> 32;
+	ioapic_set_rte_entry(addr, gsi_table[irq].pin, &rte);
+
+	dev_dbg(ACRN_DBG_IRQ, "GSI: irq:%d pin:%d rte:%x",
+		irq, gsi_table[irq].pin,
+		rte.lo_32);
+}
+
+int irq_gsi_num(void)
+{
+	return nr_gsi;
+}
+
+bool irq_is_gsi(int irq)
+{
+	return irq < nr_gsi;
+}
+
+int irq_to_pin(int irq)
+{
+	if (irq_is_gsi(irq))
+		return gsi_table[irq].pin;
+	else
+		return -1;
+}
+
+int pin_to_irq(int pin)
+{
+	int i;
+
+	if (pin < 0)
+		return IRQ_INVALID;
+
+	for (i = 0; i < nr_gsi; i++) {
+		if (gsi_table[i].pin == (uint8_t) pin)
+			return i;
+	}
+	return IRQ_INVALID;
+}
+
+void
+irq_gsi_mask_unmask(int irq, bool mask)
+{
+	uint64_t addr = gsi_table[irq].addr;
+	int pin = gsi_table[irq].pin;
+	struct ioapic_rte rte;
+
+	if (!irq_is_gsi(irq))
+		return;
+
+	ioapic_get_rte_entry(addr, pin, &rte);
+	if (mask)
+		rte.lo_32 |= IOAPIC_RTE_INTMSET;
+	else
+		rte.lo_32 &= ~IOAPIC_RTE_INTMASK;
+	ioapic_set_rte_entry(addr, pin, &rte);
+	dev_dbg(ACRN_DBG_PTIRQ, "update: irq:%d pin:%d rte:%x",
+		irq, pin, rte.lo_32);
+}
+
+void setup_ioapic_irq(void)
+{
+	int ioapic_id;
+	int gsi;
+	int vr;
+
+	spinlock_init(&ioapic_lock);
+
+	for (ioapic_id = 0, gsi = 0; ioapic_id < NR_IOAPICS; ioapic_id++) {
+		int pin;
+		int max_pins;
+		int version;
+		uint64_t addr;
+
+		addr = map_ioapic(get_ioapic_base(ioapic_id));
+		version = ioapic_read_reg32(addr, IOAPIC_VER);
+		max_pins = (version & IOAPIC_MAX_RTE_MASK) >> MAX_RTE_SHIFT;
+		dev_dbg(ACRN_DBG_IRQ, "IOAPIC version: %x", version);
+		ASSERT(max_pins > NR_LEGACY_IRQ,
+			"Legacy IRQ num > total GSI");
+
+		for (pin = 0; pin < max_pins; pin++) {
+			gsi_table[gsi].ioapic_id = ioapic_id;
+			gsi_table[gsi].addr = addr;
+
+			if (gsi < NR_LEGACY_IRQ)
+				gsi_table[gsi].pin =
+					legacy_irq_to_pin[gsi] & 0xff;
+			else
+				gsi_table[gsi].pin = pin;
+
+			/* pinned irq before use it */
+			if (irq_mark_used(gsi) < 0) {
+				pr_err("failed to alloc IRQ[%d]", gsi);
+				gsi++;
+				continue;
+			}
+
+			/* assign vector for this GSI
+			 * for legacy irq, reserved vector and never free
+			 */
+			if (gsi < NR_LEGACY_IRQ) {
+				vr = irq_desc_alloc_vector(gsi, false);
+				if (vr < 0) {
+					pr_err("failed to alloc VR");
+					gsi++;
+					continue;
+				}
+			} else
+				vr = 0; /* not to allocate VR right now */
+
+			ioapic_set_routing(gsi, vr);
+			gsi++;
+		}
+	}
+
+	/* system max gsi numbers */
+	nr_gsi = gsi;
+	ASSERT(nr_gsi < NR_MAX_GSI, "GSI table overflow");
+}
+
+void dump_ioapic(void)
+{
+	int irq;
+
+	for (irq = 0; irq < nr_gsi; irq++) {
+		uint64_t addr = gsi_table[irq].addr;
+		int pin = gsi_table[irq].pin;
+		struct ioapic_rte rte;
+
+		ioapic_get_rte_entry(addr, pin, &rte);
+		dev_dbg(ACRN_DBG_IRQ, "DUMP: irq:%d pin:%d rte:%x",
+			irq, pin, rte.lo_32);
+	}
+}
+
+void get_rte_info(struct ioapic_rte *rte, bool *mask, bool *irr,
+	bool *phys, int *delmode, bool *level, int *vector, uint32_t *dest)
+{
+	*mask = ((rte->lo_32 & IOAPIC_RTE_INTMASK) == IOAPIC_RTE_INTMSET);
+	*irr = ((rte->lo_32 & IOAPIC_RTE_REM_IRR) == IOAPIC_RTE_REM_IRR);
+	*phys = ((rte->lo_32 & IOAPIC_RTE_DESTMOD) == IOAPIC_RTE_DESTPHY);
+	*delmode = rte->lo_32 & IOAPIC_RTE_DELMOD;
+	*level = rte->lo_32 & IOAPIC_RTE_TRGRLVL ? true : false;
+	*vector = rte->lo_32 & IOAPIC_RTE_INTVEC;
+	*dest = rte->hi_32 >> APIC_ID_SHIFT;
+}
+
+int get_ioapic_info(char *str, int str_max_len)
+{
+	int irq, len, size = str_max_len;
+
+	len = snprintf(str, size,
+	"\r\nIRQ\tPIN\tRTE.HI32\tRTE.LO32\tVEC\tDST\tDM\tTM\tDELM\tIRR\tMASK");
+	size -= len;
+	str += len;
+
+	for (irq = 0; irq < nr_gsi; irq++) {
+		uint64_t addr = gsi_table[irq].addr;
+		int pin = gsi_table[irq].pin;
+		struct ioapic_rte rte;
+
+		bool irr, phys, level, mask;
+		int delmode, vector;
+		uint32_t dest;
+
+		ioapic_get_rte_entry(addr, pin, &rte);
+
+		get_rte_info(&rte, &mask, &irr, &phys, &delmode, &level,
+			&vector, &dest);
+
+		len = snprintf(str, size, "\r\n%03d\t%03d\t0x%08X\t0x%08X\t",
+			irq, pin, rte.hi_32, rte.lo_32);
+		size -= len;
+		str += len;
+
+		len = snprintf(str, size, "0x%02X\t0x%02X\t%s\t%s\t%d\t%d\t%d",
+			vector, dest, phys ? "phys" : "logic",
+			level ? "level" : "edge", delmode >> 8, irr, mask);
+		size -= len;
+		str += len;
+
+		if (size < 2) {
+			pr_err("\r\nsmall buffer for ioapic dump");
+			return -1;
+		}
+	}
+
+	snprintf(str, size, "\r\n");
+	return 0;
+}
--- a/hypervisor/arch/x86/irq.c
+++ b/hypervisor/arch/x86/irq.c
@@ -0,0 +1,761 @@
+/*
+ * Copyright (C) 2018 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <hypervisor.h>
+#include <hv_lib.h>
+#include <acrn_common.h>
+#include <hv_arch.h>
+#include <hv_debug.h>
+
+static spinlock_t exception_spinlock = { .head = 0, .tail = 0, };
+
+struct irq_request_info {
+	/* vector set to 0xE0 ~ 0xFF for pri_register_handler
+	 * and set to -1 for normal_register_handler
+	 */
+	int vector;
+	dev_handler_t func;
+	void *dev_data;
+	bool share;
+	bool lowpri;
+	char *name;
+};
+
+/* any field change in below required irq_lock protection with irqsave */
+struct irq_desc {
+	int irq;		/* index to irq_desc_base */
+	enum irq_state used;	/* this irq have assigned to device */
+	enum irq_desc_state state; /* irq_desc status */
+	int vector;		/* assigned vector */
+	void *handler_data;	/* irq_handler private data */
+	int (*irq_handler)(struct irq_desc *irq_desc, void *handler_data);
+	struct dev_handler_node *dev_list;
+	spinlock_t irq_lock;
+	uint64_t *irq_cnt; /* this irq cnt happened on CPUs */
+	uint64_t irq_lost_cnt;
+};
+
+static struct irq_desc *irq_desc_base;
+static int vector_to_irq[NR_MAX_VECTOR + 1];
+
+static DEFINE_CPU_DATA(uint64_t[NR_MAX_IRQS], irq_count);
+static DEFINE_CPU_DATA(uint64_t, spurious);
+
+spurious_handler_t spurious_handler;
+
+static void init_irq_desc(void)
+{
+	int i, page_num = 0;
+	int desc_size = NR_MAX_IRQS * sizeof(struct irq_desc);
+
+	page_num = (desc_size + CPU_PAGE_SIZE-1) >> CPU_PAGE_SHIFT;
+
+	irq_desc_base = alloc_pages(page_num);
+
+	ASSERT(irq_desc_base, "page alloc failed!");
+	memset(irq_desc_base, 0, page_num * CPU_PAGE_SIZE);
+
+	for (i = 0; i < NR_MAX_IRQS; i++) {
+		irq_desc_base[i].irq = i;
+		irq_desc_base[i].vector = VECTOR_INVALID;
+		spinlock_init(&irq_desc_base[i].irq_lock);
+	}
+
+	for (i = 0; i <= NR_MAX_VECTOR; i++)
+		vector_to_irq[i] = IRQ_INVALID;
+
+}
+
+/*
+ * alloc vector 0x20-0xDF for irq
+ *	lowpri:  0x20-0x7F
+ *	highpri: 0x80-0xDF
+ */
+static int find_available_vector(bool lowpri)
+{
+	int i, start, end;
+
+	if (lowpri) {
+		start = VECTOR_FOR_NOR_LOWPRI_START;
+		end = VECTOR_FOR_NOR_LOWPRI_END;
+	} else {
+		start = VECTOR_FOR_NOR_HIGHPRI_START;
+		end = VECTOR_FOR_NOR_HIGHPRI_END;
+	}
+
+	/* TODO: vector lock required */
+	for (i = start; i < end; i++) {
+		if (vector_to_irq[i] == IRQ_INVALID)
+			return i;
+	}
+	return -1;
+}
+
+/*
+ * check and set irq to be assigned
+ * return: -1 if irq already assigned otherwise return irq
+ */
+int irq_mark_used(int irq)
+{
+	struct irq_desc *desc;
+
+	spinlock_rflags;
+
+	if (irq < 0)
+		return -1;
+
+	desc = irq_desc_base + irq;
+	spinlock_irqsave_obtain(&desc->irq_lock);
+	if (desc->used == IRQ_NOT_ASSIGNED)
+		desc->used = IRQ_ASSIGNED_NOSHARE;
+	spinlock_irqrestore_release(&desc->irq_lock);
+	return irq;
+}
+
+/*
+ * system find available irq and set assigned
+ * return: irq, -1 not found
+ */
+static int alloc_irq(void)
+{
+	int i;
+	struct irq_desc *desc;
+
+	spinlock_rflags;
+
+	for (i = irq_gsi_num(); i < NR_MAX_IRQS; i++) {
+		desc = irq_desc_base + i;
+		spinlock_irqsave_obtain(&desc->irq_lock);
+		if (desc->used == IRQ_NOT_ASSIGNED) {
+			desc->used = IRQ_ASSIGNED_NOSHARE;
+			spinlock_irqrestore_release(&desc->irq_lock);
+			break;
+		}
+		spinlock_irqrestore_release(&desc->irq_lock);
+	}
+	return (i == NR_MAX_IRQS) ? -1:i;
+}
+
+/* need irq_lock protection before use */
+static void _irq_desc_set_vector(int irq, int vr)
+{
+	struct irq_desc *desc;
+
+	desc = irq_desc_base + irq;
+	vector_to_irq[vr] = irq;
+	desc->vector = vr;
+}
+
+/* lock version of set vector */
+static void irq_desc_set_vector(int irq, int vr)
+{
+	struct irq_desc *desc;
+
+	spinlock_rflags;
+
+	desc = irq_desc_base + irq;
+	spinlock_irqsave_obtain(&desc->irq_lock);
+	vector_to_irq[vr] = irq;
+	desc->vector = vr;
+	spinlock_irqrestore_release(&desc->irq_lock);
+}
+
+/* used with holding irq_lock outside */
+static void _irq_desc_free_vector(int irq)
+{
+	struct irq_desc *desc;
+	int vr;
+
+	if (irq > NR_MAX_IRQS || irq < 0)
+		return;
+
+	desc = irq_desc_base + irq;
+
+	vr = desc->vector;
+	desc->used = IRQ_NOT_ASSIGNED;
+	desc->state = IRQ_DESC_PENDING;
+	desc->vector = VECTOR_INVALID;
+
+	vr &= NR_MAX_VECTOR;
+	if (vector_to_irq[vr] == irq)
+		vector_to_irq[vr] = IRQ_INVALID;
+}
+
+static void disable_pic_irq(void)
+{
+	io_write_byte(0xff, 0xA1);
+	io_write_byte(0xff, 0x21);
+}
+
+static bool
+irq_desc_append_dev(struct irq_desc *desc, void *node, bool share)
+{
+	struct dev_handler_node *dev_list;
+	bool added = true;
+
+	spinlock_rflags;
+
+	spinlock_irqsave_obtain(&desc->irq_lock);
+	dev_list = desc->dev_list;
+
+	/* assign if first node */
+	if (dev_list == NULL) {
+		desc->dev_list = node;
+		desc->used = (share)?IRQ_ASSIGNED_SHARED:IRQ_ASSIGNED_NOSHARE;
+
+		/* Only GSI possible for Level and it already init during
+		 * ioapic setup.
+		 * caller can later update it with update_irq_handler()
+		 */
+		if (!desc->irq_handler)
+			desc->irq_handler = common_handler_edge;
+	} else if (!share || desc->used == IRQ_ASSIGNED_NOSHARE) {
+		/* dev node added failed */
+		added = false;
+	} else {
+		/* dev_list point to last valid node */
+		while (dev_list->next)
+			dev_list = dev_list->next;
+		/* add node */
+		dev_list->next = node;
+	}
+	spinlock_irqrestore_release(&desc->irq_lock);
+
+	return added;
+}
+
+static struct dev_handler_node*
+common_register_handler(int irq,
+		struct irq_request_info *info)
+{
+	struct dev_handler_node *node = NULL;
+	struct irq_desc *desc;
+	bool added = false;
+
+	/* ======================================================
+	 * This is low level ISR handler registering function
+	 * case: irq = -1
+	 *	caller did not know which irq to use, and want system to
+	 *	allocate available irq for it. These irq are in range:
+	 *	nr_gsi ~ NR_MAX_IRQS
+	 *	a irq will be allocated and the vector will be assigned to this
+	 *	irq automatically.
+	 *
+	 * case: irq >=0 and irq < nr_gsi
+	 *	caller want to add device ISR handler into ioapic pins.
+	 *	two kind of devices: legacy device and PCI device with INTx
+	 *	a vector will automatically assigned.
+	 *
+	 * case: irq with speical type (not from IOAPIC/MSI)
+	 *	These irq value are pre-defined for Timer, IPI, Spurious etc
+	 *	vectors are pre-defined also
+	 *
+	 * return value: pinned irq and assigned vector for this irq.
+	 *	caller can use this irq to enable/disable/mask/unmask interrupt
+	 *	and if this irq is for:
+	 *	GSI legacy: nothing to do for legacy irq, already initialized
+	 *	GSI other: need to progam PCI INTx to match this irq pin
+	 *	MSI: caller need program vector to PCI device
+	 *
+	 * =====================================================
+	 */
+	ASSERT(info != NULL, "Invalid param");
+
+	/* HV select a irq for device if irq < 0
+	 * this vector/irq match to APCI DSDT or PCI INTx/MSI
+	 */
+	if (irq < 0)
+		irq = alloc_irq();
+	else
+		irq = irq_mark_used(irq);
+
+	if (irq < 0) {
+		pr_err("failed to assign IRQ");
+		goto OUT;
+	}
+
+	node = calloc(1, sizeof(struct dev_handler_node));
+	if (node == NULL) {
+		pr_err("failed to alloc node");
+		irq_desc_try_free_vector(irq);
+		goto OUT;
+	}
+
+	desc = irq_desc_base + irq;
+	added = irq_desc_append_dev(desc, node, info->share);
+	if (!added) {
+		free(node);
+		node = NULL;
+		pr_err("failed to add node to non-shared irq");
+	}
+OUT:
+	if (added) {
+		/* it is safe to call irq_desc_alloc_vector multiple times*/
+		if (info->vector >= VECTOR_FOR_PRI_START &&
+			info->vector <= VECTOR_FOR_PRI_END)
+			irq_desc_set_vector(irq, info->vector);
+		else if (info->vector < 0)
+			irq_desc_alloc_vector(irq, info->lowpri);
+		else {
+			pr_err("the input vector is not correct");
+			free(node);
+			return NULL;
+		}
+
+		node->dev_handler = info->func;
+		node->dev_data = info->dev_data;
+		node->desc = desc;
+
+		/* we are okay using strcpy_s here even with spinlock
+		 * since no #PG in HV right now
+		 */
+		strcpy_s(node->name, 32, info->name);
+		dev_dbg(ACRN_DBG_IRQ, "[%s] %s irq%d vr:0x%x",
+			__func__, node->name, irq, desc->vector);
+	}
+
+	return node;
+}
+
+/* it is safe to call irq_desc_alloc_vector multiple times*/
+int irq_desc_alloc_vector(int irq, bool lowpri)
+{
+	int vr = -1;
+	struct irq_desc *desc;
+
+	spinlock_rflags;
+
+	/* irq should be always available at this time */
+	if (irq > NR_MAX_IRQS || irq < 0)
+		return false;
+
+	desc = irq_desc_base + irq;
+	spinlock_irqsave_obtain(&desc->irq_lock);
+	if (desc->vector != VECTOR_INVALID) {
+		/* already allocated a vector */
+		goto OUT;
+	}
+
+	/* FLAT mode, a irq connected to every cpu's same vector */
+	vr = find_available_vector(lowpri);
+	if (vr < 0) {
+		pr_err("no vector found for irq[%d]", irq);
+		goto OUT;
+	}
+	_irq_desc_set_vector(irq, vr);
+OUT:
+	spinlock_irqrestore_release(&desc->irq_lock);
+	return vr;
+}
+
+void irq_desc_try_free_vector(int irq)
+{
+	struct irq_desc *desc;
+
+	spinlock_rflags;
+
+	/* legacy irq's vector is reserved and should not be freed */
+	if (irq > NR_MAX_IRQS || irq < NR_LEGACY_IRQ)
+		return;
+
+	desc = irq_desc_base + irq;
+	spinlock_irqsave_obtain(&desc->irq_lock);
+	if (desc->dev_list == NULL)
+		_irq_desc_free_vector(irq);
+
+	spinlock_irqrestore_release(&desc->irq_lock);
+
+}
+
+int irq_to_vector(int irq)
+{
+	if (irq < NR_MAX_IRQS)
+		return irq_desc_base[irq].vector;
+	else
+		return VECTOR_INVALID;
+}
+
+int dev_to_irq(struct dev_handler_node *node)
+{
+	return node->desc->irq;
+}
+
+int dev_to_vector(struct dev_handler_node *node)
+{
+	return node->desc->vector;
+}
+
+int init_default_irqs(unsigned int cpu_id)
+{
+	if (cpu_id > 0)
+		return 0;
+
+	init_irq_desc();
+
+	/* we use ioapic only, disable legacy PIC */
+	disable_pic_irq();
+	setup_ioapic_irq();
+	init_softirq();
+
+	return 0;
+}
+
+void dispatch_exception(struct intr_ctx *ctx)
+{
+	unsigned int cpu_id = get_cpu_id();
+
+	/* Obtain lock to ensure exception dump doesn't get corrupted */
+	spinlock_obtain(&exception_spinlock);
+
+	dump_exception(ctx, cpu_id);
+
+	/* Release lock to let other CPUs handle exception */
+	spinlock_release(&exception_spinlock);
+
+	/* Halt the CPU */
+	cpu_halt(cpu_id);
+}
+
+int handle_spurious_interrupt(int vector)
+{
+	send_lapic_eoi();
+
+	get_cpu_var(spurious)++;
+
+	pr_warn("Spurious vector: 0x%x.", vector);
+
+	if (spurious_handler)
+		return spurious_handler(vector);
+	else
+		return 0;
+}
+
+/* do_IRQ() */
+int dispatch_interrupt(struct intr_ctx *ctx)
+{
+	int vr = ctx->vector;
+	int irq = vector_to_irq[vr];
+	struct irq_desc *desc;
+
+	if (irq == IRQ_INVALID)
+		goto ERR;
+
+	desc = irq_desc_base + irq;
+	per_cpu(irq_count, get_cpu_id())[irq]++;
+
+	if (vr != desc->vector)
+		goto ERR;
+
+	if (desc->used == IRQ_NOT_ASSIGNED || !desc->irq_handler) {
+		/* mask irq if possible */
+		goto ERR;
+	}
+
+	desc->irq_handler(desc, desc->handler_data);
+	return 0;
+ERR:
+	return handle_spurious_interrupt(vr);
+}
+
+int handle_level_interrupt_common(struct irq_desc *desc,
+			__unused void *handler_data)
+{
+	struct dev_handler_node *dev = desc->dev_list;
+	spinlock_rflags;
+
+	/*
+	 * give other Core a try to return without hold irq_lock
+	 * and record irq_lost count here
+	 */
+	if (desc->state != IRQ_DESC_PENDING) {
+		send_lapic_eoi();
+		desc->irq_lost_cnt++;
+		return 0;
+	}
+
+	spinlock_irqsave_obtain(&desc->irq_lock);
+	desc->state = IRQ_DESC_IN_PROCESS;
+
+	/* mask iopaic pin */
+	if (irq_is_gsi(desc->irq))
+		GSI_MASK_IRQ(desc->irq);
+
+	/* Send EOI to LAPIC/IOAPIC IRR */
+	send_lapic_eoi();
+
+	while (dev) {
+		if (dev->dev_handler)
+			dev->dev_handler(desc->irq, dev->dev_data);
+		dev = dev->next;
+	}
+
+	if (irq_is_gsi(desc->irq))
+		GSI_UNMASK_IRQ(desc->irq);
+
+	desc->state = IRQ_DESC_PENDING;
+	spinlock_irqrestore_release(&desc->irq_lock);
+
+	return 0;
+}
+
+int common_handler_edge(struct irq_desc *desc, __unused void *handler_data)
+{
+	struct dev_handler_node *dev = desc->dev_list;
+	spinlock_rflags;
+
+	/*
+	 * give other Core a try to return without hold irq_lock
+	 * and record irq_lost count here
+	 */
+	if (desc->state != IRQ_DESC_PENDING) {
+		send_lapic_eoi();
+		desc->irq_lost_cnt++;
+		return 0;
+	}
+
+	spinlock_irqsave_obtain(&desc->irq_lock);
+	desc->state = IRQ_DESC_IN_PROCESS;
+
+	/* Send EOI to LAPIC/IOAPIC IRR */
+	send_lapic_eoi();
+
+	while (dev) {
+		if (dev->dev_handler)
+			dev->dev_handler(desc->irq, dev->dev_data);
+		dev = dev->next;
+	}
+
+	desc->state = IRQ_DESC_PENDING;
+	spinlock_irqrestore_release(&desc->irq_lock);
+
+	return 0;
+}
+
+int common_dev_handler_level(struct irq_desc *desc, __unused void *handler_data)
+{
+	struct dev_handler_node *dev = desc->dev_list;
+	spinlock_rflags;
+
+	/*
+	 * give other Core a try to return without hold irq_lock
+	 * and record irq_lost count here
+	 */
+	if (desc->state != IRQ_DESC_PENDING) {
+		send_lapic_eoi();
+		desc->irq_lost_cnt++;
+		return 0;
+	}
+
+	spinlock_irqsave_obtain(&desc->irq_lock);
+	desc->state = IRQ_DESC_IN_PROCESS;
+
+	/* mask iopaic pin */
+	if (irq_is_gsi(desc->irq))
+		GSI_MASK_IRQ(desc->irq);
+
+	/* Send EOI to LAPIC/IOAPIC IRR */
+	send_lapic_eoi();
+
+	while (dev) {
+		if (dev->dev_handler)
+			dev->dev_handler(desc->irq, dev->dev_data);
+		dev = dev->next;
+	}
+
+	desc->state = IRQ_DESC_PENDING;
+	spinlock_irqrestore_release(&desc->irq_lock);
+
+	/* we did not unmask irq until guest EOI the vector */
+	return 0;
+}
+
+/* no desc->irq_lock for quick handling local interrupt like lapic timer */
+int quick_handler_nolock(struct irq_desc *desc, __unused void *handler_data)
+{
+	struct dev_handler_node *dev = desc->dev_list;
+
+	/* Send EOI to LAPIC/IOAPIC IRR */
+	send_lapic_eoi();
+
+	while (dev) {
+		if (dev->dev_handler)
+			dev->dev_handler(desc->irq, dev->dev_data);
+		dev = dev->next;
+	}
+
+	return 0;
+}
+
+void update_irq_handler(int irq, irq_handler_t func)
+{
+	struct irq_desc *desc;
+
+	spinlock_rflags;
+
+	if (irq >= NR_MAX_IRQS)
+		return;
+
+	desc = irq_desc_base + irq;
+	spinlock_irqsave_obtain(&desc->irq_lock);
+	desc->irq_handler = func;
+	spinlock_irqrestore_release(&desc->irq_lock);
+}
+
+void unregister_handler_common(struct dev_handler_node *node)
+{
+	struct dev_handler_node *head;
+	struct irq_desc *desc;
+
+	spinlock_rflags;
+
+	if (node == NULL)
+		return;
+
+	dev_dbg(ACRN_DBG_IRQ, "[%s] %s irq%d vr:0x%x",
+		__func__, node->name,
+		dev_to_irq(node),
+		dev_to_vector(node));
+
+	desc = node->desc;
+	spinlock_irqsave_obtain(&desc->irq_lock);
+
+	head = desc->dev_list;
+	if (head == node) {
+		desc->dev_list = NULL;
+		goto UNLOCK_EXIT;
+	}
+
+	while (head->next) {
+		if (head->next == node)
+			break;
+		head = head->next;
+	}
+
+	head->next = node->next;
+
+UNLOCK_EXIT:
+	spinlock_irqrestore_release(&desc->irq_lock);
+	irq_desc_try_free_vector(desc->irq);
+	free(node);
+}
+
+/*
+ * Allocate IRQ with Vector from 0x20 ~ 0xDF
+ */
+struct dev_handler_node*
+normal_register_handler(int irq,
+		dev_handler_t func,
+		void *dev_data,
+		bool share,
+		bool lowpri,
+		const char *name)
+{
+	struct irq_request_info info;
+
+	info.vector = -1;
+	info.lowpri = lowpri;
+	info.func = func;
+	info.dev_data = dev_data;
+	info.share = share;
+	info.name = (char *)name;
+
+	return common_register_handler(irq, &info);
+}
+
+/*
+ * Allocate IRQ with vector from 0xE0 ~ 0xFF
+ * Allocate a IRQ and install isr on that specific cpu
+ * User can install same irq/isr on different CPU by call this function multiple
+ * times
+ */
+struct dev_handler_node*
+pri_register_handler(int irq,
+		int vector,
+		dev_handler_t func,
+		void *dev_data,
+		const char *name)
+{
+	struct irq_request_info info;
+
+	if (vector < VECTOR_FOR_PRI_START || vector > VECTOR_FOR_PRI_END)
+		return NULL;
+
+	info.vector = vector;
+	info.lowpri = false;
+	info.func = func;
+	info.dev_data = dev_data;
+	info.share = true;
+	info.name = (char *)name;
+
+	return common_register_handler(irq, &info);
+}
+
+int get_cpu_interrupt_info(char *str, int str_max)
+{
+	int irq, vector, pcpu_id, len, size = str_max;
+	struct irq_desc *desc;
+
+	len = snprintf(str, size, "\r\nIRQ\tVECTOR");
+	size -= len;
+	str += len;
+	for (pcpu_id = 0; pcpu_id < phy_cpu_num; pcpu_id++) {
+		len = snprintf(str, size, "\tCPU%d", pcpu_id);
+		size -= len;
+		str += len;
+	}
+	len = snprintf(str, size, "\tLOST\tSHARE");
+	size -= len;
+	str += len;
+
+	for (irq = 0; irq < NR_MAX_IRQS; irq++) {
+		desc = irq_desc_base + irq;
+		vector = irq_to_vector(irq);
+		if (desc->used != IRQ_NOT_ASSIGNED &&
+			vector != VECTOR_INVALID) {
+			len = snprintf(str, size, "\r\n%d\t0x%X", irq, vector);
+			size -= len;
+			str += len;
+			for (pcpu_id = 0; pcpu_id < phy_cpu_num; pcpu_id++) {
+				len = snprintf(str, size, "\t%d",
+					per_cpu(irq_count, pcpu_id)[irq]++);
+				size -= len;
+				str += len;
+			}
+			len = snprintf(str, size, "\t%d\t%s",
+					desc->irq_lost_cnt,
+					desc->used == IRQ_ASSIGNED_SHARED ?
+					"shared" : "no-shared");
+			size -= len;
+			str += len;
+		}
+	}
+	snprintf(str, size, "\r\n");
+	return 0;
+}
--- a/hypervisor/arch/x86/mmu.c
+++ b/hypervisor/arch/x86/mmu.c
@@ -0,0 +1,932 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * Copyright (c) 2017 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <hypervisor.h>
+#include <hv_lib.h>
+#include <acrn_common.h>
+#include <hv_arch.h>
+#include <bsp_extern.h>
+#include <hv_debug.h>
+
+static void *mmu_pml4_addr;
+
+enum mem_map_request_type {
+	PAGING_REQUEST_TYPE_MAP = 0,	/* Creates a new mapping. */
+	PAGING_REQUEST_TYPE_UNMAP = 1,	/* Removes a pre-existing entry */
+	PAGING_REQUEST_TYPE_MODIFY = 2,
+	/* Modifies a pre-existing entries attributes. */
+	PAGING_REQUEST_TYPE_UNKNOWN,
+};
+
+struct mm_capability {
+	/* EPT and MMU 1-GByte page supported flag */
+	bool ept_1gb_page_supported;
+	bool invept_supported;
+	bool invept_single_context_supported;
+	bool invept_global_context_supported;
+	bool invvpid_supported;
+	bool invvpid_single_context_supported;
+	bool invvpid_global_context_supported;
+	bool mmu_1gb_page_supported;
+};
+static struct mm_capability mm_caps;
+
+#define INVEPT_TYPE_SINGLE_CONTEXT      1UL
+#define INVEPT_TYPE_ALL_CONTEXTS        2UL
+#define INVEPT_SET_ERROR_CODE				\
+	"       jnc 1f\n"				\
+	"       mov $1, %0\n"      /* CF: error = 1 */	\
+	"       jmp 3f\n"				\
+	"1:     jnz 2f\n"				\
+	"       mov $2, %0\n"      /* ZF: error = 2 */	\
+	"       jmp 3f\n"				\
+	"2:     mov $0, %0\n"				\
+	"3:"
+
+struct invept_desc {
+	uint64_t eptp;
+	uint64_t _res;
+};
+
+static inline void _invept(uint64_t type, struct invept_desc desc)
+{
+	int error = 0;
+
+	asm volatile ("invept %1, %2\n"
+			INVEPT_SET_ERROR_CODE
+			: "=r" (error)
+			: "m" (desc), "r" (type)
+			: "memory");
+
+	ASSERT(error == 0, "invept error");
+}
+
+static void check_mmu_capability(void)
+{
+	uint64_t val;
+	uint32_t  eax, ebx, ecx, edx;
+
+	memset(&mm_caps, 0, sizeof(struct mm_capability));
+
+	/* Read the MSR register of EPT and VPID Capability -  SDM A.10 */
+	val = msr_read(MSR_IA32_VMX_EPT_VPID_CAP);
+	mm_caps.ept_1gb_page_supported = (val & MSR_VMX_EPT_VPID_CAP_1GB)
+		? (true) : (false);
+	mm_caps.invept_supported =
+		(val & MSR_VMX_INVEPT) ? (true) : (false);
+	mm_caps.invept_single_context_supported =
+		(val & MSR_VMX_INVEPT_SINGLE_CONTEXT) ? (true) : (false);
+	mm_caps.invept_global_context_supported =
+		(val & MSR_VMX_INVEPT_GLOBAL_CONTEXT) ? (true) : (false);
+	mm_caps.invvpid_supported =
+		(val & MSR_VMX_INVVPID) ? (true) : (false);
+	mm_caps.invvpid_single_context_supported =
+		(val & MSR_VMX_INVVPID_SINGLE_CONTEXT) ? (true) : (false);
+	mm_caps.invvpid_global_context_supported =
+		(val & MSR_VMX_INVVPID_GLOBAL_CONTEXT) ? (true) : (false);
+
+	/* Read CPUID to check if PAGE1GB is supported
+	 * SDM 4.1.4 If CPUID.80000001H:EDX.Page1GB[bit26]=1,
+	 * 1-GByte pages are supported with 4-level paging
+	 */
+	cpuid(CPUID_EXTEND_FUNCTION_1, &eax, &ebx, &ecx, &edx);
+	mm_caps.mmu_1gb_page_supported = (edx & CPUID_EDX_PAGE1GB) ?
+							(true) : (false);
+}
+
+static inline bool check_invept_single_support(void)
+{
+	return mm_caps.invept_supported &&
+			mm_caps.invept_single_context_supported;
+}
+
+static inline bool check_invept_global_support(void)
+{
+	return mm_caps.invept_supported &&
+			mm_caps.invept_global_context_supported;
+}
+
+void mmu_invept(struct vcpu *vcpu)
+{
+	struct invept_desc desc = {0};
+
+	if (check_invept_single_support()) {
+		desc.eptp = (uint64_t) vcpu->vm->arch_vm.ept | (3 << 3) | 6;
+		_invept(INVEPT_TYPE_SINGLE_CONTEXT, desc);
+	} else if (check_invept_global_support())
+		_invept(INVEPT_TYPE_ALL_CONTEXTS, desc);
+}
+
+static bool check_mmu_1gb_support(struct map_params *map_params)
+{
+	bool status = false;
+
+	if (map_params->page_table_type == PT_EPT)
+		status = mm_caps.ept_1gb_page_supported;
+	else
+		status = mm_caps.mmu_1gb_page_supported;
+	return status;
+}
+
+static uint32_t map_mem_region(void *vaddr, void *paddr,
+		void *table_base, uint64_t attr, uint32_t table_level,
+		int ept_entry, enum mem_map_request_type request_type)
+{
+	uint64_t table_entry;
+	uint64_t table_present;
+	uint32_t table_offset;
+	uint32_t mapped_size;
+
+	if (table_base == NULL || table_level >= IA32E_UNKNOWN
+	    || request_type >= PAGING_REQUEST_TYPE_UNKNOWN) {
+		/* Shouldn't go here */
+		ASSERT(false, "Incorrect Arguments. Failed to map region");
+	}
+
+	/* switch based on  of table */
+	switch (table_level) {
+	case IA32E_PDPT:
+
+		/* Get offset to the entry in the PDPT for this address */
+		table_offset = IA32E_PDPTE_INDEX_CALC(vaddr);
+
+		/* PS bit must be set for these entries to be mapped */
+		attr |= IA32E_PDPTE_PS_BIT;
+
+		/* Set mapped size to 1 GB */
+		mapped_size = MEM_1G;
+
+		break;
+
+	case IA32E_PD:
+
+		/* Get offset to the entry in the PD for this address */
+		table_offset = IA32E_PDE_INDEX_CALC(vaddr);
+
+		/* PS bit must be set for these entries to be mapped */
+		attr |= IA32E_PDE_PS_BIT;
+
+		/* Set mapped size to 2 MB */
+		mapped_size = MEM_2M;
+
+		break;
+
+	case IA32E_PT:
+
+		/* Get offset to the entry in the PT for this address */
+		table_offset = IA32E_PTE_INDEX_CALC(vaddr);
+
+		/* NOTE: No PS bit in page table entries */
+
+		/* Set mapped size to 4 KB */
+		mapped_size = MEM_4K;
+
+		/* If not a EPT entry, see if the PAT bit is set for PDPT entry
+		 */
+		if ((!ept_entry) && (attr & IA32E_PDPTE_PAT_BIT)) {
+			/* The PAT bit is set; Clear it and set the page table
+			 * PAT bit instead
+			 */
+			attr &= (uint64_t) (~((uint64_t) IA32E_PDPTE_PAT_BIT));
+			attr |= IA32E_PTE_PAT_BIT;
+		}
+
+		break;
+
+	case IA32E_PML4:
+	default:
+
+		/* Set mapping size to 0 - can't map memory in PML4 */
+		mapped_size = 0;
+
+		break;
+	}
+
+	/* Check to see if mapping should occur */
+	if (mapped_size != 0) {
+		/* Get current table entry */
+		uint64_t tmp = MEM_READ64(table_base + table_offset);
+
+		/* Check if EPT entry */
+		if (ept_entry) {
+			/* Use read/write/execute bits to determine presence of
+			 * entry
+			 */
+			table_present = (IA32E_EPT_R_BIT |
+					 IA32E_EPT_W_BIT | IA32E_EPT_X_BIT);
+		} else {
+			/* Use the P bit to determine if an entry is present */
+			table_present = IA32E_COMM_P_BIT;
+		}
+
+		switch (request_type) {
+		case PAGING_REQUEST_TYPE_MAP:
+		{
+			/* No need to confirm current table entry
+			 * isn't already present
+			 * support map-->remap
+			 */
+			table_entry = (ept_entry
+					? attr
+					: (attr | IA32E_COMM_P_BIT));
+
+			table_entry |= (uint64_t)paddr;
+
+			/* Write the table entry to map this memory */
+			MEM_WRITE64(table_base + table_offset, table_entry);
+			break;
+		}
+		case PAGING_REQUEST_TYPE_UNMAP:
+		{
+			if (tmp & table_present) {
+				/* Table is present.
+				 * Write the table entry to map this memory
+				 */
+				MEM_WRITE64(table_base + table_offset, 0);
+			}
+			break;
+		}
+		case PAGING_REQUEST_TYPE_MODIFY:
+		{
+			/* Allow mapping or modification as requested. */
+			table_entry = (ept_entry
+				       ? attr : (attr | IA32E_COMM_P_BIT));
+
+			table_entry |= (uint64_t) paddr;
+
+			/* Write the table entry to map this memory */
+			MEM_WRITE64(table_base + table_offset, table_entry);
+
+			break;
+		}
+		default:
+			ASSERT("Bad memory map request type" == 0, "");
+			break;
+		}
+	}
+
+	/* Return mapped size to caller */
+	return mapped_size;
+}
+
+static uint32_t fetch_page_table_offset(void *addr, uint32_t table_level)
+{
+	uint32_t table_offset;
+
+	/* Switch based on level of table */
+	switch (table_level) {
+	case IA32E_PML4:
+
+		/* Get offset to the entry in the PML4
+		 * for this address
+		 */
+		table_offset = IA32E_PML4E_INDEX_CALC(addr);
+		break;
+
+	case IA32E_PDPT:
+
+		/* Get offset to the entry in the PDPT
+		 * for this address
+		 */
+		table_offset = IA32E_PDPTE_INDEX_CALC(addr);
+		break;
+
+	case IA32E_PD:
+
+		/* Get offset to the entry in the PD
+		 * for this address
+		 */
+		table_offset = IA32E_PDE_INDEX_CALC(addr);
+		break;
+
+	case IA32E_PT:
+		table_offset = IA32E_PTE_INDEX_CALC(addr);
+		break;
+
+	default:
+		pr_err("Wrong page table level = 0x%lx", table_level);
+		ASSERT(false, "Wrong page table level");
+		break;
+	}
+
+	return table_offset;
+}
+
+static inline uint32_t check_page_table_present(struct map_params *map_params,
+		uint64_t table_entry)
+{
+	if (map_params->page_table_type == PT_EPT) {
+		table_entry &= (IA32E_EPT_R_BIT | IA32E_EPT_W_BIT |
+				IA32E_EPT_X_BIT);
+	} else {
+		table_entry &= (IA32E_COMM_P_BIT);
+	}
+
+	return (table_entry) ? PT_PRESENT : PT_NOT_PRESENT;
+}
+
+static uint64_t get_table_entry(struct map_params *map_params, void *addr,
+		void *table_base, uint32_t table_level)
+{
+	uint32_t table_offset;
+	uint64_t table_entry;
+	int status = 0;
+
+	if (table_base == NULL
+			|| table_level >= IA32E_UNKNOWN
+			|| map_params == NULL)	{
+		status = -EINVAL;
+	}
+	ASSERT(status == 0, "Incorrect Arguments");
+
+	table_offset = fetch_page_table_offset(addr, table_level);
+
+	/* Read the table entry */
+	table_entry = MEM_READ64(table_base + table_offset);
+
+	/* Return the next table in the walk */
+	return table_entry;
+}
+
+static void  *walk_paging_struct(void *addr, void *table_base,
+		uint32_t table_level, struct map_params *map_params)
+{
+	uint32_t table_offset;
+	uint64_t table_entry;
+	uint64_t table_present;
+	/* if  table_level == IA32E_PT Just return the same address
+	 * can't walk down any further
+	 */
+	void *sub_table_addr = ((table_level == IA32E_PT) ? table_base:NULL);
+	int status = 0;
+
+	if (table_base == NULL || table_level >= IA32E_UNKNOWN
+	    || map_params == NULL) {
+		status = -EINVAL;
+	}
+	ASSERT(status == 0, "Incorrect Arguments");
+
+	table_offset = fetch_page_table_offset(addr, table_level);
+
+	/* See if we can skip the rest */
+	if (sub_table_addr != table_base) {
+		/* Read the table entry */
+		table_entry = MEM_READ64(table_base + table_offset);
+
+		/* Check if EPT entry being created */
+		if (map_params->page_table_type == PT_EPT) {
+			/* Set table present bits to any of the
+			 * read/write/execute bits
+			 */
+			table_present = (IA32E_EPT_R_BIT | IA32E_EPT_W_BIT |
+					 IA32E_EPT_X_BIT);
+		} else {
+			/* Set table preset bits to P bit or r/w bit */
+			table_present = (IA32E_COMM_P_BIT | IA32E_COMM_RW_BIT);
+		}
+
+		/* Determine if a valid entry exists */
+		if ((table_entry & table_present) == 0) {
+			/* No entry present - need to allocate a new table */
+			sub_table_addr =
+			    alloc_paging_struct();
+			/* Check to ensure memory available for this structure*/
+			if (sub_table_addr == 0) {
+				/* Error: Unable to find table memory necessary
+				 * to map memory
+				 */
+				ASSERT(sub_table_addr == 0,
+					"Fail to find table memory "
+					"for map memory");
+
+				return sub_table_addr;
+			}
+
+			/* Write entry to current table to reference the new
+			 * sub-table
+			 */
+			MEM_WRITE64(table_base + table_offset,
+				    (uint64_t) sub_table_addr | table_present);
+		} else {
+			/* Get address of the sub-table */
+			sub_table_addr = (void *)(table_entry & IA32E_REF_MASK);
+		}
+	}
+
+	/* Return the next table in the walk */
+	return sub_table_addr;
+}
+
+void *get_paging_pml4(void)
+{
+	/* Return address to caller */
+	return mmu_pml4_addr;
+}
+
+void enable_paging(void *pml4_base_addr)
+{
+	CPU_CR_WRITE(cr3, (unsigned long)pml4_base_addr);
+}
+
+void init_paging(void)
+{
+	struct map_params map_params;
+	struct e820_entry *entry;
+	uint32_t i;
+	int attr_wb = (MMU_MEM_ATTR_READ |
+			MMU_MEM_ATTR_WRITE   |
+			MMU_MEM_ATTR_EXECUTE |
+			MMU_MEM_ATTR_WB_CACHE);
+	int attr_uc = (MMU_MEM_ATTR_READ |
+			MMU_MEM_ATTR_WRITE   |
+			MMU_MEM_ATTR_EXECUTE |
+			MMU_MEM_ATTR_UNCACHED);
+
+	pr_dbg("HV MMU Initialization");
+
+	check_mmu_capability();
+
+	/* Allocate memory for Hypervisor PML4 table */
+	mmu_pml4_addr = alloc_paging_struct();
+
+	init_e820();
+	obtain_e820_mem_info();
+
+	/* Loop through all memory regions in the e820 table */
+	map_params.page_table_type = PT_HOST;
+	map_params.pml4_base = mmu_pml4_addr;
+
+	/* Map all memory regions to UC attribute */
+	map_mem(&map_params, (void *)e820_mem.mem_bottom,
+			(void *)e820_mem.mem_bottom,
+			(e820_mem.mem_top - e820_mem.mem_bottom),
+			attr_uc);
+
+	/* Modify WB attribute for E820_TYPE_RAM */
+	for (i = 0, entry = &e820[0];
+			i < e820_entries;
+			i++, entry = &e820[i]) {
+		if (entry->type == E820_TYPE_RAM) {
+			modify_mem(&map_params, (void *)entry->baseaddr,
+					(void *)entry->baseaddr,
+					entry->length, attr_wb);
+		}
+	}
+
+	pr_dbg("Enabling MMU ");
+
+	/* Enable paging */
+	enable_paging(mmu_pml4_addr);
+}
+
+void *alloc_paging_struct(void)
+{
+	void *ptr = NULL;
+
+	/* Allocate a page from Hypervisor heap */
+	ptr = alloc_page();
+
+	ASSERT(ptr, "page alloc failed!");
+	memset(ptr, 0, CPU_PAGE_SIZE);
+
+	return ptr;
+}
+
+uint64_t config_page_table_attr(struct map_params *map_params, uint32_t flags)
+{
+	int  ept_entry = map_params->page_table_type;
+	uint64_t attr = 0;
+
+	/* Convert generic memory flags to architecture specific attributes */
+	/* Check if read access */
+	if (flags & MMU_MEM_ATTR_READ) {
+		/* Configure for read access */
+		attr |=
+		   (ept_entry ? IA32E_EPT_R_BIT : MMU_MEM_ATTR_BIT_READ_WRITE);
+	}
+
+	/* Check for write access */
+	if (flags & MMU_MEM_ATTR_WRITE)	{
+		/* Configure for write access */
+		attr |=
+		   (ept_entry ? IA32E_EPT_W_BIT : MMU_MEM_ATTR_BIT_READ_WRITE);
+	}
+
+	/* Check for execute access */
+	if (flags & MMU_MEM_ATTR_EXECUTE) {
+		/* Configure for execute (EPT only) */
+		attr |= (ept_entry ? IA32E_EPT_X_BIT : 0);
+	}
+
+	/* EPT & VT-d share the same page tables, set SNP bit
+	 * to force snooping of PCIe devices if the page
+	 * is cachable
+	 */
+	if ((flags & MMU_MEM_ATTR_UNCACHED) != MMU_MEM_ATTR_UNCACHED
+			&& ept_entry == PT_EPT) {
+		attr |= IA32E_EPT_SNOOP_CTRL;
+	}
+
+	/* Check for cache / memory types */
+	if (flags & MMU_MEM_ATTR_WB_CACHE) {
+		/* Configure for write back cache */
+		attr |=
+		   (ept_entry ? IA32E_EPT_WB : MMU_MEM_ATTR_TYPE_CACHED_WB);
+	} else if (flags & MMU_MEM_ATTR_WT_CACHE)	{
+		/* Configure for write through cache */
+		attr |=
+		   (ept_entry ? IA32E_EPT_WT : MMU_MEM_ATTR_TYPE_CACHED_WT);
+	} else if (flags & MMU_MEM_ATTR_UNCACHED)	{
+		/* Configure for uncached */
+		attr |=
+		 (ept_entry ? IA32E_EPT_UNCACHED : MMU_MEM_ATTR_TYPE_UNCACHED);
+	} else if (flags & MMU_MEM_ATTR_WC) {
+		/* Configure for write combining */
+		attr |=
+		 (ept_entry ? IA32E_EPT_WC : MMU_MEM_ATTR_TYPE_WRITE_COMBINED);
+	} else {
+		/* Configure for write protected */
+		attr |=
+		(ept_entry ? IA32E_EPT_WP : MMU_MEM_ATTR_TYPE_WRITE_PROTECTED);
+	}
+	return attr;
+
+}
+
+void obtain_last_page_table_entry(struct map_params *map_params,
+		struct entry_params *entry, void *addr, bool direct)
+{
+	uint64_t table_entry;
+	uint32_t table_present = 0;
+	/* Obtain the PML4 address */
+	void *table_addr = direct ? (map_params->pml4_base)
+				: (map_params->pml4_inverted);
+
+	/* Obtain page table entry from PML4 table*/
+	table_entry = get_table_entry(map_params, addr,
+			table_addr, IA32E_PML4);
+	table_present = check_page_table_present(map_params, table_entry);
+	if (table_present == PT_NOT_PRESENT) {
+		/* PML4E not present, return PML4 base address */
+		entry->entry_level  = IA32E_PML4;
+		entry->entry_base = (uint64_t)table_addr;
+		entry->entry_present = PT_NOT_PRESENT;
+		entry->page_size = check_mmu_1gb_support(map_params) ?
+			(PAGE_SIZE_1G) : (PAGE_SIZE_2M);
+		entry->entry_off = fetch_page_table_offset(addr, IA32E_PML4);
+		entry->entry_val =  table_entry;
+		return;
+	}
+
+	/* Obtain page table entry from PDPT table*/
+	table_addr = (void *)(table_entry & IA32E_REF_MASK);
+	table_entry = get_table_entry(map_params, addr,
+			table_addr, IA32E_PDPT);
+	table_present = check_page_table_present(map_params, table_entry);
+	if (table_present == PT_NOT_PRESENT) {
+		/* PDPTE not present, return PDPT base address */
+		entry->entry_level  = IA32E_PDPT;
+		entry->entry_base = (uint64_t)table_addr;
+		entry->entry_present = PT_NOT_PRESENT;
+		entry->page_size = check_mmu_1gb_support(map_params) ?
+			(PAGE_SIZE_1G) : (PAGE_SIZE_2M);
+		entry->entry_off = fetch_page_table_offset(addr, IA32E_PDPT);
+		entry->entry_val =  table_entry;
+		return;
+	}
+	if (table_entry & IA32E_PDPTE_PS_BIT) {
+		/* 1GB page size, return the base addr of the pg entry*/
+		entry->entry_level  = IA32E_PDPT;
+		entry->entry_base = (uint64_t)table_addr;
+		entry->page_size = check_mmu_1gb_support(map_params) ?
+			(PAGE_SIZE_1G) : (PAGE_SIZE_2M);
+		entry->entry_present = PT_PRESENT;
+		entry->entry_off = fetch_page_table_offset(addr, IA32E_PDPT);
+		entry->entry_val =  table_entry;
+		return;
+	}
+
+	/* Obtain page table entry from PD table*/
+	table_addr = (void *)(table_entry&IA32E_REF_MASK);
+	table_entry = get_table_entry(map_params, addr,
+			table_addr, IA32E_PD);
+	table_present = check_page_table_present(map_params, table_entry);
+	if (table_present == PT_NOT_PRESENT) {
+		/* PDE not present, return PDE base address */
+		entry->entry_level  = IA32E_PD;
+		entry->entry_base = (uint64_t)table_addr;
+		entry->entry_present = PT_NOT_PRESENT;
+		entry->page_size  = PAGE_SIZE_2M;
+		entry->entry_off = fetch_page_table_offset(addr, IA32E_PD);
+		entry->entry_val =  table_entry;
+		return;
+
+	}
+	if (table_entry & IA32E_PDE_PS_BIT) {
+		/* 2MB page size, return the base addr of the pg entry*/
+		entry->entry_level  = IA32E_PD;
+		entry->entry_base = (uint64_t)table_addr;
+		entry->entry_present = PT_PRESENT;
+		entry->page_size  = PAGE_SIZE_2M;
+		entry->entry_off = fetch_page_table_offset(addr, IA32E_PD);
+		entry->entry_val =  table_entry;
+		return;
+	}
+
+	/* Obtain page table entry from PT table*/
+	table_addr = (void *)(table_entry&IA32E_REF_MASK);
+	table_entry = get_table_entry(map_params, addr,
+			table_addr, IA32E_PT);
+	table_present = check_page_table_present(map_params, table_entry);
+	entry->entry_present = ((table_present == PT_PRESENT)
+			? (PT_PRESENT):(PT_NOT_PRESENT));
+	entry->entry_level  = IA32E_PT;
+	entry->entry_base = (uint64_t)table_addr;
+	entry->page_size  = PAGE_SIZE_4K;
+	entry->entry_off = fetch_page_table_offset(addr, IA32E_PT);
+	entry->entry_val =  table_entry;
+}
+
+static uint64_t update_page_table_entry(struct map_params *map_params,
+		void *paddr, void *vaddr, uint64_t size, uint64_t attr,
+		enum mem_map_request_type request_type, bool direct)
+{
+	uint64_t remaining_size = size;
+	uint32_t adjustment_size;
+	int ept_entry = map_params->page_table_type;
+	/* Obtain the PML4 address */
+	void *table_addr = direct ? (map_params->pml4_base)
+				: (map_params->pml4_inverted);
+
+	/* Walk from the PML4 table to the PDPT table */
+	table_addr = walk_paging_struct(vaddr, table_addr, IA32E_PML4,
+			map_params);
+
+	if ((remaining_size >= MEM_1G)
+			&& (MEM_ALIGNED_CHECK(vaddr, MEM_1G))
+			&& (MEM_ALIGNED_CHECK(paddr, MEM_1G))
+			&& check_mmu_1gb_support(map_params)) {
+		/* Map this 1 GByte memory region */
+		adjustment_size = map_mem_region(vaddr, paddr,
+				table_addr, attr, IA32E_PDPT,
+				ept_entry, request_type);
+	} else if ((remaining_size >= MEM_2M)
+			&& (MEM_ALIGNED_CHECK(vaddr, MEM_2M))
+			&& (MEM_ALIGNED_CHECK(paddr, MEM_2M))) {
+		/* Walk from the PDPT table to the PD table */
+		table_addr = walk_paging_struct(vaddr, table_addr,
+				IA32E_PDPT, map_params);
+		/* Map this 2 MByte memory region */
+		adjustment_size = map_mem_region(vaddr, paddr,
+				table_addr, attr, IA32E_PD, ept_entry,
+				request_type);
+	} else {
+		/* Walk from the PDPT table to the PD table */
+		table_addr = walk_paging_struct(vaddr,
+				table_addr, IA32E_PDPT, map_params);
+		/* Walk from the PD table to the page table */
+		table_addr = walk_paging_struct(vaddr,
+				table_addr, IA32E_PD, map_params);
+		/* Map this 4 KByte memory region */
+		adjustment_size = map_mem_region(vaddr, paddr,
+				table_addr, attr, IA32E_PT,
+				ept_entry, request_type);
+	}
+
+	return adjustment_size;
+
+}
+
+static uint64_t break_page_table(struct map_params *map_params, void *paddr,
+		void *vaddr, uint64_t page_size, bool direct)
+{
+	uint32_t i = 0;
+	uint64_t pa;
+	uint64_t attr = 0x00;
+	uint64_t next_page_size = 0x00;
+	void *sub_tab_addr = NULL;
+	struct entry_params entry;
+
+	switch (page_size) {
+	/* Breaking 1GB page to 2MB page*/
+	case PAGE_SIZE_1G:
+		next_page_size = PAGE_SIZE_2M;
+		attr |= IA32E_PDE_PS_BIT;
+		pr_info("%s, Breaking 1GB -->2MB vaddr=0x%llx",
+				__func__, vaddr);
+		break;
+
+		/* Breaking 2MB page to 4KB page*/
+	case PAGE_SIZE_2M:
+		next_page_size = PAGE_SIZE_4K;
+		pr_info("%s, Breaking 2MB -->4KB vaddr=0x%llx",
+				__func__, vaddr);
+		break;
+
+		/* 4KB page, No action*/
+	case PAGE_SIZE_4K:
+	default:
+		next_page_size = PAGE_SIZE_4K;
+		pr_info("%s, Breaking 4KB no action vaddr=0x%llx",
+				__func__, vaddr);
+		break;
+	}
+
+	if (page_size != next_page_size) {
+		obtain_last_page_table_entry(map_params, &entry, vaddr, direct);
+
+		/* New entry present - need to allocate a new table */
+		sub_tab_addr = alloc_paging_struct();
+		/* Check to ensure memory available for this structure */
+		if (sub_tab_addr == 0) {
+			/* Error:
+			 * Unable to find table memory necessary to map memory
+			 */
+			pr_err("Fail to find table memory for map memory");
+			ASSERT(sub_tab_addr == 0, "");
+			return 0;
+		}
+
+		/* the physical address maybe be not aligned of
+		 * current page size, obtain the starting physical address
+		 * aligned of current page size
+		 */
+		pa = ((((uint64_t)paddr) / page_size) * page_size);
+		if (map_params->page_table_type == PT_EPT) {
+			/* Keep original attribute(here &0x3f)
+			 * bit 0(R) bit1(W) bit2(X) bit3~5 MT
+			 */
+			attr |= (entry.entry_val & 0x3f);
+		} else {
+			/* Keep original attribute(here &0x7f) */
+			attr |= (entry.entry_val & 0x7f);
+		}
+		/* write all entries and keep original attr*/
+		for (i = 0; i < IA32E_NUM_ENTRIES; i++) {
+			MEM_WRITE64(sub_tab_addr + (i * IA32E_COMM_ENTRY_SIZE),
+					(attr | (pa + (i * next_page_size))));
+		}
+		if (map_params->page_table_type == PT_EPT) {
+			/* Write the table entry to map this memory,
+			 * SDM chapter28 figure 28-1
+			 * bit 0(R) bit1(W) bit2(X) bit3~5 MUST be reserved
+			 * (here &0x07)
+			 */
+			MEM_WRITE64(entry.entry_base + entry.entry_off,
+					((entry.entry_val & 0x07) |
+					 ((uint64_t)sub_tab_addr)));
+		} else {
+			/* Write the table entry to map this memory,
+			 * SDM chapter4 figure 4-11
+			 * bit0(P) bit1(RW) bit2(U/S) bit3(PWT) bit4(PCD)
+			 * bit5(A) bit6(D or Ignore)
+			 */
+			MEM_WRITE64(entry.entry_base + entry.entry_off,
+					((entry.entry_val & 0x7f) |
+					 ((uint64_t)sub_tab_addr)));
+		}
+	}
+
+	return next_page_size;
+}
+
+static void modify_paging(struct map_params *map_params, void *paddr,
+		void *vaddr, uint64_t size, uint32_t flags,
+		enum mem_map_request_type request_type, bool direct)
+{
+	int64_t  remaining_size;
+	uint64_t adjust_size;
+	uint64_t attr;
+	int status = 0;
+	struct entry_params entry;
+	uint64_t page_size;
+	uint64_t vaddr_end = ((uint64_t)vaddr) + size;
+
+	/* if the address is not PAGE aligned, will drop
+	 * the unaligned part
+	 */
+	paddr = (void *)ROUND_PAGE_UP((uint64_t)paddr);
+	vaddr = (void *)ROUND_PAGE_UP((uint64_t)vaddr);
+	vaddr_end = ROUND_PAGE_DOWN(vaddr_end);
+	remaining_size = vaddr_end - (uint64_t)vaddr;
+
+	if ((request_type >= PAGING_REQUEST_TYPE_UNKNOWN)
+			|| (map_params == NULL)) {
+		pr_err("%s: vaddr=0x%llx size=0x%llx req_type=0x%lx",
+			__func__, vaddr, size, request_type);
+		status = -EINVAL;
+	}
+	ASSERT(status == 0, "Incorrect Arguments");
+
+	attr = config_page_table_attr(map_params, flags);
+	/* Loop until the entire block of memory is appropriately
+	 * MAP/UNMAP/MODIFY
+	 */
+	while (remaining_size > 0) {
+		obtain_last_page_table_entry(map_params, &entry, vaddr, direct);
+		/* filter the unmap request, no action in this case*/
+		page_size =  entry.page_size;
+		if ((request_type == PAGING_REQUEST_TYPE_UNMAP)
+				&& (entry.entry_present == PT_NOT_PRESENT)) {
+			adjust_size =
+				page_size - ((uint64_t)(vaddr) % page_size);
+			vaddr += adjust_size;
+			paddr += adjust_size;
+			remaining_size -= adjust_size;
+			continue;
+		}
+
+		/* if the address is NOT aligned of current page size,
+		 * or required memory size < page size
+		 * need to break page firstly
+		 */
+		if (entry.entry_present == PT_PRESENT) {
+			/* Maybe need to recursive breaking in this case
+			 * e.g. 1GB->2MB->4KB
+			 */
+			while ((uint64_t)remaining_size < page_size
+				|| (!MEM_ALIGNED_CHECK(vaddr, page_size))
+				|| (!MEM_ALIGNED_CHECK(paddr, page_size))) {
+				/* The breaking function return the page size
+				 * of next level page table
+				 */
+				page_size = break_page_table(map_params,
+					paddr, vaddr, page_size, direct);
+			}
+		} else {
+			page_size = ((uint64_t)remaining_size < page_size)
+				? ((uint64_t)remaining_size) : (page_size);
+		}
+		/* The function return the memory size that one entry can map */
+		adjust_size = update_page_table_entry(map_params, paddr, vaddr,
+				page_size, attr, request_type, direct);
+		vaddr += adjust_size;
+		paddr += adjust_size;
+		remaining_size -= adjust_size;
+	}
+}
+
+void map_mem(struct map_params *map_params, void *paddr, void *vaddr,
+		    uint64_t size, uint32_t flags)
+{
+	/* used for MMU and EPT*/
+	modify_paging(map_params, paddr, vaddr, size, flags,
+			PAGING_REQUEST_TYPE_MAP, true);
+	/* only for EPT */
+	if (map_params->page_table_type == PT_EPT) {
+		modify_paging(map_params, vaddr, paddr, size, flags,
+				PAGING_REQUEST_TYPE_MAP, false);
+	}
+}
+
+void unmap_mem(struct map_params *map_params, void *paddr, void *vaddr,
+		      uint64_t size, uint32_t flags)
+{
+	/* used for MMU and EPT */
+	modify_paging(map_params, paddr, vaddr, size, flags,
+			PAGING_REQUEST_TYPE_UNMAP, true);
+	/* only for EPT */
+	if (map_params->page_table_type == PT_EPT) {
+		modify_paging(map_params, vaddr, paddr, size, flags,
+				PAGING_REQUEST_TYPE_UNMAP, false);
+	}
+}
+
+void modify_mem(struct map_params *map_params, void *paddr, void *vaddr,
+		       uint64_t size, uint32_t flags)
+{
+	/* used for MMU and EPT*/
+	modify_paging(map_params, paddr, vaddr, size, flags,
+			PAGING_REQUEST_TYPE_MODIFY, true);
+	/* only for EPT */
+	if (map_params->page_table_type == PT_EPT) {
+		modify_paging(map_params, vaddr, paddr, size, flags,
+				PAGING_REQUEST_TYPE_MODIFY, false);
+	}
+}
--- a/hypervisor/arch/x86/notify.c
+++ b/hypervisor/arch/x86/notify.c
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2018 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <hypervisor.h>
+#include <hv_lib.h>
+#include <acrn_common.h>
+#include <hv_arch.h>
+#include <hv_debug.h>
+#include <irq.h>
+
+static struct dev_handler_node *notification_node;
+
+/* run in interrupt context */
+static int kick_notification(__unused int irq, __unused void *data)
+{
+	/* Notification vector does not require handling here, it's just used
+	 * to kick taget cpu out of non-root mode.
+	 */
+	return 0;
+}
+
+static int request_notification_irq(dev_handler_t func, void *data,
+				const char *name)
+{
+	int irq = -1; /* system allocate */
+	struct dev_handler_node *node = NULL;
+
+	if (notification_node != NULL) {
+		pr_info("%s, Notification vector already allocated on this CPU",
+				__func__);
+		return -EBUSY;
+	}
+
+	/* all cpu register the same notification vector */
+	node = pri_register_handler(irq, VECTOR_NOTIFY_VCPU, func, data, name);
+	if (node == NULL) {
+		pr_err("Failed to add notify isr");
+		return -1;
+	}
+	update_irq_handler(dev_to_irq(node), quick_handler_nolock);
+	notification_node = node;
+	return 0;
+}
+
+void setup_notification(void)
+{
+	int cpu;
+	char name[32] = {0};
+
+	cpu = get_cpu_id();
+	if (cpu > 0)
+		return;
+
+	/* support IPI notification, VM0 will register all CPU */
+	snprintf(name, 32, "NOTIFY_ISR%d", cpu);
+	if (request_notification_irq(kick_notification, NULL, name) < 0) {
+		pr_err("Failed to setup notification");
+		return;
+	}
+
+	dev_dbg(ACRN_DBG_PTIRQ, "NOTIFY: irq[%d] setup vector %x",
+		dev_to_irq(notification_node),
+		dev_to_vector(notification_node));
+}
+
+void cleanup_notification(void)
+{
+	if (notification_node)
+		unregister_handler_common(notification_node);
+	notification_node = NULL;
+}
--- a/hypervisor/arch/x86/softirq.c
+++ b/hypervisor/arch/x86/softirq.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2018 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <hypervisor.h>
+#include <hv_lib.h>
+#include <acrn_common.h>
+#include <hv_arch.h>
+
+static DEFINE_CPU_DATA(uint64_t, softirq_pending);
+
+void disable_softirq(int cpu_id)
+{
+	bitmap_clr(SOFTIRQ_ATOMIC, &per_cpu(softirq_pending, cpu_id));
+}
+
+void enable_softirq(int cpu_id)
+{
+	bitmap_set(SOFTIRQ_ATOMIC, &per_cpu(softirq_pending, cpu_id));
+}
+
+void init_softirq(void)
+{
+	int cpu_id;
+
+	for (cpu_id = 0; cpu_id < phy_cpu_num; cpu_id++) {
+		per_cpu(softirq_pending, cpu_id) = 0;
+		bitmap_set(SOFTIRQ_ATOMIC, &per_cpu(softirq_pending, cpu_id));
+	}
+}
+
+void raise_softirq(int softirq_id)
+{
+	int cpu_id = get_cpu_id();
+	uint64_t *bitmap = &per_cpu(softirq_pending, cpu_id);
+
+	if (cpu_id >= phy_cpu_num)
+		return;
+
+	bitmap_set(softirq_id, bitmap);
+}
+
+void exec_softirq(void)
+{
+	int cpu_id = get_cpu_id();
+	uint64_t *bitmap = &per_cpu(softirq_pending, cpu_id);
+
+	uint64_t rflag;
+	int softirq_id;
+
+	if (cpu_id >= phy_cpu_num)
+		return;
+
+	/* Disable softirq
+	 * SOFTIRQ_ATOMIC bit = 0 means softirq already in execution
+	 */
+	if (!bitmap_test_and_clear(SOFTIRQ_ATOMIC, bitmap))
+		return;
+
+	if (((*bitmap) & SOFTIRQ_MASK) == 0UL)
+		goto ENABLE_AND_EXIT;
+
+	/* check if we are in interrupt context */
+	CPU_RFLAGS_SAVE(&rflag);
+	if (!(rflag & (1<<9)))
+		goto ENABLE_AND_EXIT;
+
+	while (1) {
+		softirq_id = bitmap_ffs(bitmap);
+		if ((softirq_id < 0) || (softirq_id >= SOFTIRQ_MAX))
+			break;
+
+		bitmap_clr(softirq_id, bitmap);
+
+		switch (softirq_id) {
+		case SOFTIRQ_TIMER:
+			timer_softirq(cpu_id);
+			break;
+		case SOFTIRQ_DEV_ASSIGN:
+			ptdev_softirq(cpu_id);
+			break;
+		default:
+			break;
+
+		}
+	}
+
+ENABLE_AND_EXIT:
+	enable_softirq(cpu_id);
+}
+
--- a/hypervisor/arch/x86/timer.c
+++ b/hypervisor/arch/x86/timer.c
@@ -0,0 +1,561 @@
+/*
+ * Copyright (C) 2018 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <hypervisor.h>
+#include <hv_lib.h>
+#include <acrn_common.h>
+#include <hv_arch.h>
+#include <hv_debug.h>
+
+#define MAX_TIMER_ACTIONS 32
+
+struct timer_statistics {
+	struct {
+		uint64_t pickup_id;
+		uint64_t pickup_time;
+		uint64_t pickup_deadline;
+		uint64_t added_id;
+		uint64_t added_time;
+		uint64_t added_deadline;
+	} last;
+	uint64_t total_pickup_cnt;
+	uint64_t total_added_cnt;
+	uint64_t irq_cnt;
+	long pending_cnt;
+};
+
+struct timer {
+	timer_handle_t	func;		/* callback if time reached */
+	uint64_t	priv_data;	/* func private data */
+	uint64_t	deadline;	/* tsc deadline to interrupt */
+	long		handle;		/* unique handle for user */
+	int		cpu_id;		/* armed on which CPU */
+	int		id;		/* timer ID, used by release */
+	struct list_head node;		/* link all timers */
+};
+
+struct per_cpu_timers {
+	struct timer *timers_pool; 	/* it's timers pool for allocation */
+	uint64_t free_bitmap;
+	struct list_head timer_list;	/* it's for runtime active timer list */
+	spinlock_t lock;
+	int cpu_id;
+	struct timer_statistics stat;
+};
+
+static DEFINE_CPU_DATA(struct per_cpu_timers, cpu_timers);
+
+#define TIMER_IRQ (NR_MAX_IRQS - 1)
+
+DEFINE_CPU_DATA(struct dev_handler_node *, timer_node);
+
+static struct timer*
+find_expired_timer(struct per_cpu_timers *cpu_timer, uint64_t tsc_now);
+
+static struct timer *alloc_timer(int cpu_id)
+{
+	int idx;
+	struct per_cpu_timers *cpu_timer;
+	struct timer *timer;
+
+	spinlock_rflags;
+
+	cpu_timer = &per_cpu(cpu_timers, cpu_id);
+	spinlock_irqsave_obtain(&cpu_timer->lock);
+	idx = bitmap_ffs(&cpu_timer->free_bitmap);
+	if (idx < 0) {
+		spinlock_irqrestore_release(&cpu_timer->lock);
+		return NULL;
+	}
+
+	bitmap_clr(idx, &cpu_timer->free_bitmap);
+	cpu_timer->stat.total_added_cnt++;
+	cpu_timer->stat.pending_cnt++;
+
+	/* assign unique handle and never duplicate */
+	timer = cpu_timer->timers_pool + idx;
+	timer->handle = cpu_timer->stat.total_added_cnt;
+	spinlock_irqrestore_release(&cpu_timer->lock);
+
+	ASSERT((cpu_timer->timers_pool[cpu_id].cpu_id == cpu_id),
+		"timer cpu_id did not match");
+	return timer;
+}
+
+static void release_timer(struct timer *timer)
+{
+	struct per_cpu_timers *cpu_timer;
+
+	spinlock_rflags;
+
+	cpu_timer = &per_cpu(cpu_timers, timer->cpu_id);
+	timer->priv_data = 0;
+	timer->func = NULL;
+	timer->deadline = 0;
+	spinlock_irqsave_obtain(&cpu_timer->lock);
+	bitmap_set(timer->id, &cpu_timer->free_bitmap);
+	cpu_timer->stat.pending_cnt--;
+	spinlock_irqrestore_release(&cpu_timer->lock);
+}
+
+static int get_target_cpu(void)
+{
+	/* we should search idle CPU to balance timer service */
+	return get_cpu_id();
+}
+
+static struct timer*
+find_expired_timer(struct per_cpu_timers *cpu_timer, uint64_t tsc_now)
+{
+	struct timer *timer;
+	struct list_head *pos;
+
+	spinlock_rflags;
+
+	spinlock_irqsave_obtain(&cpu_timer->lock);
+	list_for_each(pos, &cpu_timer->timer_list) {
+		timer = list_entry(pos, struct timer, node);
+		if (timer->deadline <= tsc_now)
+			goto UNLOCK;
+	}
+	timer = NULL;
+UNLOCK:
+	spinlock_irqrestore_release(&cpu_timer->lock);
+	return timer;
+}
+
+/* need lock protect outside */
+static struct timer*
+_search_nearest_timer(struct per_cpu_timers *cpu_timer)
+{
+	struct timer *timer;
+	struct timer *target = NULL;
+	struct list_head *pos;
+
+	list_for_each(pos, &cpu_timer->timer_list) {
+		timer = list_entry(pos, struct timer, node);
+		if (target == NULL)
+			target = timer;
+		else if (timer->deadline < target->deadline)
+			target = timer;
+	}
+
+	return target;
+}
+
+/* need lock protect outside */
+static struct timer*
+_search_timer_by_handle(struct per_cpu_timers *cpu_timer, long handle)
+{
+	struct timer *timer;
+	struct list_head *pos;
+
+	list_for_each(pos, &cpu_timer->timer_list) {
+		timer = list_entry(pos, struct timer, node);
+		if (timer->handle == handle)
+			goto FOUND;
+	}
+	timer = NULL;
+FOUND:
+	return timer;
+}
+
+static void
+run_timer(struct per_cpu_timers *cpu_timer, struct timer *timer)
+{
+	spinlock_rflags;
+
+	/* remove from list first */
+	spinlock_irqsave_obtain(&cpu_timer->lock);
+	list_del(&timer->node);
+	spinlock_irqrestore_release(&cpu_timer->lock);
+
+	/* deadline = 0 means stop timer, we should skip */
+	if (timer->func && timer->deadline != 0UL)
+		timer->func(timer->priv_data);
+
+	cpu_timer->stat.last.pickup_id = timer->id;
+	cpu_timer->stat.last.pickup_deadline = timer->deadline;
+	cpu_timer->stat.last.pickup_time = rdtsc();
+	cpu_timer->stat.total_pickup_cnt++;
+
+	TRACE_4I(TRACE_TIMER_ACTION_PCKUP, timer->id, timer->deadline,
+		timer->deadline >> 32, cpu_timer->stat.total_pickup_cnt);
+}
+
+/* run in interrupt context */
+static int tsc_deadline_handler(__unused int irq, __unused void *data)
+{
+	raise_softirq(SOFTIRQ_TIMER);
+	return 0;
+}
+
+static inline void schedule_next_timer(int cpu)
+{
+	struct timer *timer;
+	struct per_cpu_timers *cpu_timer = &per_cpu(cpu_timers, cpu);
+
+	spinlock_rflags;
+
+	spinlock_irqsave_obtain(&cpu_timer->lock);
+	timer = _search_nearest_timer(cpu_timer);
+	if (timer) {
+		/* it is okay to program a expired time */
+		msr_write(MSR_IA32_TSC_DEADLINE, timer->deadline);
+	}
+	spinlock_irqrestore_release(&cpu_timer->lock);
+}
+
+int request_timer_irq(int cpu, dev_handler_t func, void *data, const char *name)
+{
+	struct dev_handler_node *node = NULL;
+
+	if (cpu >= phy_cpu_num)
+		return -1;
+
+	if (per_cpu(timer_node, cpu)) {
+		pr_err("CPU%d timer isr already added", cpu);
+		unregister_handler_common(per_cpu(timer_node, cpu));
+	}
+
+	node = pri_register_handler(TIMER_IRQ, VECTOR_TIMER, func, data, name);
+	if (node != NULL) {
+		per_cpu(timer_node, cpu) = node;
+		update_irq_handler(TIMER_IRQ, quick_handler_nolock);
+	} else {
+		pr_err("Failed to add timer isr");
+		return -1;
+	}
+
+	return 0;
+}
+
+/*TODO: init in separate cpu */
+static void init_timer_pool(void)
+{
+	int i, j;
+	struct per_cpu_timers *cpu_timer;
+	struct timer *timers_pool;
+
+	/* Make sure only init one time*/
+	if (get_cpu_id() > 0)
+		return;
+
+	for (i = 0; i < phy_cpu_num; i++) {
+		cpu_timer = &per_cpu(cpu_timers, i);
+		cpu_timer->cpu_id = i;
+		timers_pool =
+			calloc(MAX_TIMER_ACTIONS, sizeof(struct timer));
+		ASSERT(timers_pool, "Create timers pool failed");
+
+		cpu_timer->timers_pool = timers_pool;
+		cpu_timer->free_bitmap = (1UL<<MAX_TIMER_ACTIONS)-1;
+
+		INIT_LIST_HEAD(&cpu_timer->timer_list);
+		spinlock_init(&cpu_timer->lock);
+		for (j = 0; j < MAX_TIMER_ACTIONS; j++) {
+			timers_pool[j].id = j;
+			timers_pool[j].cpu_id = i;
+			timers_pool[j].priv_data = 0;
+			timers_pool[j].func = NULL;
+			timers_pool[j].deadline = 0;
+			timers_pool[j].handle = -1UL;
+		}
+	}
+}
+
+static void init_tsc_deadline_timer(void)
+{
+	uint32_t val;
+
+	val = VECTOR_TIMER;
+	val |= 0x40000; /* TSC deadline and unmask */
+	mmio_write_long(val, LAPIC_BASE + LAPIC_LVT_TIMER_REGISTER);
+	asm volatile("mfence" : : : "memory");
+	/* disarm timer */
+	msr_write(MSR_IA32_TSC_DEADLINE, 0UL);
+}
+
+void timer_init(void)
+{
+	char name[32] = {0};
+	int cpu = get_cpu_id();
+
+	snprintf(name, 32, "timer_tick[%d]", cpu);
+	if (request_timer_irq(cpu, tsc_deadline_handler, NULL, name) < 0) {
+		pr_err("Timer setup failed");
+		return;
+	}
+
+	init_tsc_deadline_timer();
+	init_timer_pool();
+}
+
+void timer_cleanup(void)
+{
+	int cpu = get_cpu_id();
+
+	if (per_cpu(timer_node, cpu))
+		unregister_handler_common(per_cpu(timer_node, cpu));
+
+	per_cpu(timer_node, cpu) = NULL;
+}
+
+int timer_softirq(int cpu_id)
+{
+	struct per_cpu_timers *cpu_timer;
+	struct timer *timer;
+	int max = MAX_TIMER_ACTIONS;
+
+	/* handle passed timer */
+	cpu_timer = &per_cpu(cpu_timers, cpu_id);
+	cpu_timer->stat.irq_cnt++;
+
+	/* This is to make sure we are not blocked due to delay inside func()
+	 * force to exit irq handler after we serviced >31 timers
+	 * caller used to add_timer() in timer->func(), if there is a delay
+	 * inside func(), it will infinitely loop here, because new added timer
+	 * already passed due to previously func()'s delay.
+	 */
+	timer = find_expired_timer(cpu_timer, rdtsc());
+	while (timer && --max > 0) {
+		run_timer(cpu_timer, timer);
+		/* put back to timer pool */
+		release_timer(timer);
+		/* search next one */
+		timer = find_expired_timer(cpu_timer, rdtsc());
+	}
+
+	/* update nearest timer */
+	schedule_next_timer(cpu_id);
+	return 0;
+}
+
+/*
+ * add_timer is okay to add passed timer but not 0
+ * return: handle, this handle is unique and can be used to find back
+ *  this added timer. handle will be invalid after timer expired
+ */
+long add_timer(timer_handle_t func, uint64_t data, uint64_t deadline)
+{
+	struct timer *timer;
+	struct per_cpu_timers *cpu_timer;
+	int cpu_id = get_target_cpu();
+
+	spinlock_rflags;
+
+	if (deadline == 0 || func == NULL)
+		return -1;
+
+	/* possible interrupt context please avoid mem alloct here*/
+	timer = alloc_timer(cpu_id);
+	if (timer == NULL)
+		return -1;
+
+	timer->func = func;
+	timer->priv_data = data;
+	timer->deadline = deadline;
+	timer->cpu_id = get_target_cpu();
+
+	cpu_timer = &per_cpu(cpu_timers, timer->cpu_id);
+
+	/* We need irqsave here even softirq enabled to protect timer_list */
+	spinlock_irqsave_obtain(&cpu_timer->lock);
+	list_add_tail(&timer->node, &cpu_timer->timer_list);
+	cpu_timer->stat.last.added_id = timer->id;
+	cpu_timer->stat.last.added_time = rdtsc();
+	cpu_timer->stat.last.added_deadline = timer->deadline;
+	spinlock_irqrestore_release(&cpu_timer->lock);
+	TRACE_4I(TRACE_TIMER_ACTION_ADDED, timer->id, timer->deadline,
+		timer->deadline >> 32, cpu_timer->stat.total_added_cnt);
+
+	schedule_next_timer(cpu_id);
+	return timer->handle;
+}
+
+/*
+ * update_timer existing timer. if not found, add new timer
+ */
+long
+update_timer(long handle, timer_handle_t func, uint64_t data,
+		uint64_t deadline)
+{
+	struct timer *timer;
+	struct per_cpu_timers *cpu_timer;
+	int cpu_id = get_target_cpu();
+
+	spinlock_rflags;
+	bool ret = false;
+
+	if (deadline == 0)
+		return -1;
+
+	cpu_timer = &per_cpu(cpu_timers, cpu_id);
+	spinlock_irqsave_obtain(&cpu_timer->lock);
+	timer = _search_timer_by_handle(cpu_timer, handle);
+	if (timer) {
+		/* update deadline and re-sort */
+		timer->deadline = deadline;
+		timer->func = func;
+		timer->priv_data = data;
+		TRACE_4I(TRACE_TIMER_ACTION_UPDAT, timer->id,
+			timer->deadline, timer->deadline >> 32,
+			cpu_timer->stat.total_added_cnt);
+		ret = true;
+	}
+	spinlock_irqrestore_release(&cpu_timer->lock);
+
+	if (ret)
+		schedule_next_timer(cpu_id);
+	else {
+		/* if update failed, we add to new, and update handle */
+		/* TODO: the correct behavior should be return failure here */
+		handle = add_timer(func, data, deadline);
+	}
+
+	return handle;
+}
+
+/* NOTE: cpu_id referred to physical cpu id here */
+bool cancel_timer(long handle, int cpu_id)
+{
+	struct timer *timer;
+	struct per_cpu_timers *cpu_timer;
+
+	spinlock_rflags;
+	bool ret = false;
+
+	cpu_timer = &per_cpu(cpu_timers, cpu_id);
+	spinlock_irqsave_obtain(&cpu_timer->lock);
+	timer = _search_timer_by_handle(cpu_timer, handle);
+	if (timer) {
+		/* NOTE: we can not directly release timer here.
+		 * Instead we set deadline to expired and clear func.
+		 * This timer will be reclaim next timer
+		 */
+		timer->deadline = 0;
+		timer->func = NULL;
+		ret = true;
+	}
+	spinlock_irqrestore_release(&cpu_timer->lock);
+	return ret;
+}
+
+void dump_timer_pool_info(int cpu_id)
+{
+	struct per_cpu_timers *cpu_timer =
+			&per_cpu(cpu_timers, cpu_id);
+	struct list_head *pos;
+	int cn = 0;
+
+	spinlock_rflags;
+
+	if (cpu_id >= phy_cpu_num)
+		return;
+
+	pr_info("Timer%d statistics: Pending: %d\n\t"
+		"total_pickup: %lld total_added: %lld total_irq: %lld",
+		cpu_id,
+		cpu_timer->stat.pending_cnt,
+		cpu_timer->stat.total_pickup_cnt,
+		cpu_timer->stat.total_added_cnt,
+		cpu_timer->stat.irq_cnt);
+
+	pr_info("LAST pickup[%d] time: 0x%llx deadline: 0x%llx",
+		cpu_timer->stat.last.pickup_id,
+		cpu_timer->stat.last.pickup_time,
+		cpu_timer->stat.last.pickup_deadline);
+
+	pr_info("LAST added[%d] time: 0x%llx deadline: 0x%llx",
+		cpu_timer->stat.last.added_id,
+		cpu_timer->stat.last.added_time,
+		cpu_timer->stat.last.added_deadline);
+
+	spinlock_irqsave_obtain(&cpu_timer->lock);
+	list_for_each(pos, &cpu_timer->timer_list) {
+		cn++;
+		pr_info("-->pending: %d trigger: 0x%llx", cn,
+			list_entry(pos, struct timer, node)->deadline);
+	}
+	spinlock_irqrestore_release(&cpu_timer->lock);
+}
+
+void check_tsc(void)
+{
+	uint64_t temp64;
+
+	/* Ensure time-stamp timer is turned on for each CPU */
+	CPU_CR_READ(cr4, &temp64);
+	CPU_CR_WRITE(cr4, (temp64 & ~CR4_TSD));
+}
+
+uint64_t tsc_cycles_in_period(uint16_t timer_period_in_us)
+{
+	uint16_t initial_pit;
+	uint16_t current_pit;
+	uint32_t current_tsc;
+#define PIT_TARGET          0x3FFF
+
+	if (timer_period_in_us < 1000)
+		pr_warn("Bad timer_period_in_us: %d\n",
+				timer_period_in_us);
+
+	/* Assume the 8254 delivers 18.2 ticks per second when 16 bits fully
+	 * wrap.  This is about 1.193MHz or a clock period of 0.8384uSec
+	 */
+	initial_pit = (uint16_t)(timer_period_in_us*1193000UL/1000000);
+	initial_pit += PIT_TARGET;
+
+	/* Port 0x43 ==> Control word write; Data 0x30 ==> Select Counter 0,
+	 * Read/Write least significant byte first, mode 0, 16 bits.
+	 */
+
+	io_write_byte(0x30, 0x43);
+	io_write_byte(initial_pit & 0x00ff, 0x40);	/* Write LSB */
+	io_write_byte(initial_pit >> 8, 0x40);	/* Write MSB */
+
+	current_tsc = rdtsc();
+
+	do {
+		/* Port 0x43 ==> Control word write; 0x00 ==> Select
+		 * Counter 0, Counter Latch Command, Mode 0; 16 bits
+		 */
+		io_write_byte(0x00, 0x43);
+
+		current_pit = io_read_byte(0x40);	/* Read LSB */
+		current_pit |= io_read_byte(0x40) << 8;	/* Read MSB */
+		/* Let the counter count down to PIT_TARGET */
+	} while (current_pit > PIT_TARGET);
+
+	current_tsc = rdtsc() - current_tsc;
+
+	return (uint64_t) current_tsc;
+}
+
--- a/hypervisor/arch/x86/vmexit.c
+++ b/hypervisor/arch/x86/vmexit.c
@@ -0,0 +1,494 @@
+/*
+ * Copyright (C) 2018 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <hypervisor.h>
+#include <hv_lib.h>
+#include <acrn_common.h>
+#include <hv_arch.h>
+#include <hv_debug.h>
+
+static int rdtscp_handler(struct vcpu *vcpu);
+static int unhandled_vmexit_handler(struct vcpu *vcpu);
+static int rdtsc_handler(struct vcpu *vcpu);
+/* VM Dispatch table for Exit condition handling */
+static const struct vm_exit_dispatch dispatch_table[] = {
+	[VMX_EXIT_REASON_EXCEPTION_OR_NMI] = {
+		.handler = exception_handler},
+	[VMX_EXIT_REASON_EXTERNAL_INTERRUPT] = {
+		.handler = external_interrupt_handler},
+	[VMX_EXIT_REASON_TRIPLE_FAULT] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_INIT_SIGNAL] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_STARTUP_IPI] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_IO_SMI] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_OTHER_SMI] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_INTERRUPT_WINDOW] = {
+		.handler = interrupt_win_exiting_handler},
+	[VMX_EXIT_REASON_NMI_WINDOW] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_TASK_SWITCH] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_CPUID] = {
+		.handler = cpuid_handler},
+	[VMX_EXIT_REASON_GETSEC] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_HLT] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_INVD] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_INVLPG] = {
+		.handler = unhandled_vmexit_handler,},
+	[VMX_EXIT_REASON_RDPMC] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_RDTSC] = {
+		.handler = rdtsc_handler},
+	[VMX_EXIT_REASON_RSM] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_VMCALL] = {
+		.handler = vmcall_handler},
+	[VMX_EXIT_REASON_VMCLEAR] {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_VMLAUNCH] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_VMPTRLD] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_VMPTRST] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_VMREAD] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_VMRESUME] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_VMWRITE] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_VMXOFF] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_VMXON] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_CR_ACCESS] = {
+		.handler = cr_access_handler,
+		.need_exit_qualification = 1},
+	[VMX_EXIT_REASON_DR_ACCESS] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_IO_INSTRUCTION] = {
+		.handler = io_instr_handler,
+		.need_exit_qualification = 1},
+	[VMX_EXIT_REASON_RDMSR] = {
+		.handler = rdmsr_handler},
+	[VMX_EXIT_REASON_WRMSR] = {
+		.handler = wrmsr_handler},
+	[VMX_EXIT_REASON_ENTRY_FAILURE_INVALID_GUEST_STATE] = {
+		.handler = unhandled_vmexit_handler,
+		.need_exit_qualification = 1},
+	[VMX_EXIT_REASON_ENTRY_FAILURE_MSR_LOADING] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_MWAIT] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_MONITOR_TRAP] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_MONITOR] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_PAUSE] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_ENTRY_FAILURE_MACHINE_CHECK] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_TPR_BELOW_THRESHOLD] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_APIC_ACCESS] = {
+		.handler = apicv_access_exit_handler},
+	[VMX_EXIT_REASON_VIRTUALIZED_EOI] = {
+		.handler = apicv_virtualized_eoi_exit_handler},
+	[VMX_EXIT_REASON_GDTR_IDTR_ACCESS] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_LDTR_TR_ACCESS] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_EPT_VIOLATION] = {
+		.handler = ept_violation_handler,
+		.need_exit_qualification = 1},
+	[VMX_EXIT_REASON_EPT_MISCONFIGURATION] = {
+		.handler = ept_misconfig_handler,
+		.need_exit_qualification = 1},
+	[VMX_EXIT_REASON_INVEPT] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_RDTSCP] = {
+		.handler = rdtscp_handler},
+	[VMX_EXIT_REASON_VMX_PREEMPTION_TIMER_EXPIRED] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_INVVPID] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_WBINVD] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_XSETBV] = {
+		.handler = unhandled_vmexit_handler},
+	[VMX_EXIT_REASON_APIC_WRITE] = {
+		.handler = apicv_write_exit_handler}
+};
+
+struct vm_exit_dispatch *vmexit_handler(struct vcpu *vcpu)
+{
+	struct vm_exit_dispatch *dispatch = HV_NULL;
+	uint16_t basic_exit_reason;
+
+	/* Obtain interrupt info */
+	vcpu->arch_vcpu.exit_interrupt_info =
+	    exec_vmread(VMX_IDT_VEC_INFO_FIELD);
+
+	/* Calculate basic exit reason (low 16-bits) */
+	basic_exit_reason = vcpu->arch_vcpu.exit_reason & 0xFFFF;
+
+	/* Log details for exit */
+	pr_dbg("Exit Reason: 0x%016llx ", vcpu->arch_vcpu.exit_reason);
+
+	/* Ensure exit reason is within dispatch table */
+	if (basic_exit_reason < ARRAY_SIZE(dispatch_table)) {
+		/* Calculate dispatch table entry */
+		dispatch = (struct vm_exit_dispatch *)
+			(dispatch_table + basic_exit_reason);
+
+		/* See if an exit qualification is necessary for this exit
+		 * handler
+		 */
+		if (dispatch->need_exit_qualification) {
+			/* Get exit qualification */
+			vcpu->arch_vcpu.exit_qualification =
+			    exec_vmread(VMX_EXIT_QUALIFICATION);
+		}
+	}
+
+	/* Update current vcpu in VM that caused vm exit */
+	vcpu->vm->current_vcpu = vcpu;
+
+	/* Return pointer to exit dispatch entry */
+	return dispatch;
+}
+
+static int unhandled_vmexit_handler(__unused struct vcpu *vcpu)
+{
+	pr_fatal("Error: Unhandled VM exit condition from guest at 0x%016llx ",
+			exec_vmread(VMX_GUEST_RIP));
+
+	pr_fatal("Exit Reason: 0x%016llx ", vcpu->arch_vcpu.exit_reason);
+
+	pr_err("Exit qualification: 0x%016llx ",
+			exec_vmread(VMX_EXIT_QUALIFICATION));
+
+	/* while(1); */
+
+	TRACE_2L(TRC_VMEXIT_UNHANDLED, vcpu->arch_vcpu.exit_reason, 0);
+
+	return 0;
+}
+
+static int write_cr0(struct vcpu *vcpu, uint64_t value)
+{
+	uint32_t value32;
+	uint64_t value64;
+
+	pr_dbg("VMM: Guest trying to write 0x%08x to CR0", value);
+
+	/* Read host mask value */
+	value64 = exec_vmread(VMX_CR0_MASK);
+
+	/* Clear all bits being written by guest that are owned by host */
+	value &= ~value64;
+
+	/* Update CR0 in guest state */
+	vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr0 |= value;
+	exec_vmwrite(VMX_GUEST_CR0,
+		vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr0);
+	pr_dbg("VMM: Guest allowed to write 0x%08x to CR0",
+		  vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr0);
+
+	/* If guest is trying to transition vcpu from unpaged real mode to page
+	 * protected mode make necessary changes to VMCS structure to reflect
+	 * transition from real mode to paged-protected mode
+	 */
+	if (!is_vcpu_bsp(vcpu) &&
+	    (vcpu->arch_vcpu.cpu_mode == REAL_MODE) &&
+	    (value & CR0_PG) && (value & CR0_PE)) {
+		/* Enable protected mode */
+		value32 = exec_vmread(VMX_ENTRY_CONTROLS);
+		value32 |= (VMX_ENTRY_CTLS_IA32E_MODE |
+			    VMX_ENTRY_CTLS_LOAD_PAT |
+			    VMX_ENTRY_CTLS_LOAD_EFER);
+		exec_vmwrite(VMX_ENTRY_CONTROLS, value32);
+		pr_dbg("VMX_ENTRY_CONTROLS: 0x%x ", value32);
+
+		/* Disable unrestricted mode */
+		value32 = exec_vmread(VMX_PROC_VM_EXEC_CONTROLS2);
+		value32 |= (VMX_PROCBASED_CTLS2_EPT |
+			    VMX_PROCBASED_CTLS2_RDTSCP);
+		exec_vmwrite(VMX_PROC_VM_EXEC_CONTROLS2, value32);
+		pr_dbg("VMX_PROC_VM_EXEC_CONTROLS2: 0x%x ", value32);
+
+		/* Set up EFER */
+		value64 = exec_vmread64(VMX_GUEST_IA32_EFER_FULL);
+		value64 |= (MSR_IA32_EFER_SCE_BIT |
+			    MSR_IA32_EFER_LME_BIT |
+			    MSR_IA32_EFER_LMA_BIT | MSR_IA32_EFER_NXE_BIT);
+		exec_vmwrite64(VMX_GUEST_IA32_EFER_FULL, value64);
+		pr_dbg("VMX_GUEST_IA32_EFER: 0x%016llx ", value64);
+	}
+
+	return 0;
+}
+
+static int write_cr3(struct vcpu *vcpu, uint64_t value)
+{
+	/* Write to guest's CR3 */
+	vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr3 = value;
+
+	/* Commit new value to VMCS */
+	exec_vmwrite(VMX_GUEST_CR3,
+		vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr3);
+
+	return 0;
+}
+
+static int write_cr4(struct vcpu *vcpu, uint64_t value)
+{
+	uint64_t temp64;
+
+	pr_dbg("VMM: Guest trying to write 0x%08x to CR4", value);
+
+	/* Read host mask value */
+	temp64 = exec_vmread(VMX_CR4_MASK);
+
+	/* Clear all bits being written by guest that are owned by host */
+	value &= ~temp64;
+
+	/* Write updated CR4 (bitwise OR of allowed guest bits and CR4 host
+	 * value)
+	 */
+	vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr4 |= value;
+	exec_vmwrite(VMX_GUEST_CR4,
+		vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr4);
+	pr_dbg("VMM: Guest allowed to write 0x%08x to CR4",
+		  vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr4);
+
+	return 0;
+}
+
+static int read_cr3(struct vcpu *vcpu, uint64_t *value)
+{
+	*value = vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr3;
+
+	pr_dbg("VMM: reading 0x%08x from CR3", *value);
+
+	return 0;
+}
+
+int cpuid_handler(struct vcpu *vcpu)
+{
+	struct run_context *cur_context =
+		&vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context];
+
+	emulate_cpuid(vcpu, (uint32_t)cur_context->guest_cpu_regs.regs.rax,
+		(uint32_t *)&cur_context->guest_cpu_regs.regs.rax,
+		(uint32_t *)&cur_context->guest_cpu_regs.regs.rbx,
+		(uint32_t *)&cur_context->guest_cpu_regs.regs.rcx,
+		(uint32_t *)&cur_context->guest_cpu_regs.regs.rdx);
+
+	TRACE_2L(TRC_VMEXIT_CPUID, vcpu->vcpu_id, 0);
+
+	return 0;
+}
+
+int cr_access_handler(struct vcpu *vcpu)
+{
+	uint64_t *regptr;
+	struct run_context *cur_context =
+		&vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context];
+	static const int reg_trans_tab[] = {
+		[0] = VMX_MACHINE_T_GUEST_RAX_INDEX,
+		[1] = VMX_MACHINE_T_GUEST_RCX_INDEX,
+		[2] = VMX_MACHINE_T_GUEST_RDX_INDEX,
+		[3] = VMX_MACHINE_T_GUEST_RBX_INDEX,
+		[4] = 0xFF, /* for sp reg, should not be used, just for init */
+		[5] = VMX_MACHINE_T_GUEST_RBP_INDEX,
+		[6] = VMX_MACHINE_T_GUEST_RSI_INDEX,
+		[7] = VMX_MACHINE_T_GUEST_RDI_INDEX,
+		[8] = VMX_MACHINE_T_GUEST_R8_INDEX,
+		[9] = VMX_MACHINE_T_GUEST_R9_INDEX,
+		[10] = VMX_MACHINE_T_GUEST_R10_INDEX,
+		[11] = VMX_MACHINE_T_GUEST_R11_INDEX,
+		[12] = VMX_MACHINE_T_GUEST_R12_INDEX,
+		[13] = VMX_MACHINE_T_GUEST_R13_INDEX,
+		[14] = VMX_MACHINE_T_GUEST_R14_INDEX,
+		[15] = VMX_MACHINE_T_GUEST_R15_INDEX
+	};
+	int idx = VM_EXIT_CR_ACCESS_REG_IDX(vcpu->arch_vcpu.exit_qualification);
+
+	ASSERT(idx != 4, "index should not be 4 (target SP)");
+	regptr = cur_context->guest_cpu_regs.longs + reg_trans_tab[idx];
+
+	switch ((VM_EXIT_CR_ACCESS_ACCESS_TYPE
+		 (vcpu->arch_vcpu.exit_qualification) << 4) |
+		VM_EXIT_CR_ACCESS_CR_NUM(vcpu->arch_vcpu.exit_qualification)) {
+	case 0x00:
+		/* mov to cr0 */
+		write_cr0(vcpu, *regptr);
+		break;
+
+	case 0x03:
+		/* mov to cr3 */
+		write_cr3(vcpu, *regptr);
+		break;
+
+	case 0x04:
+		/* mov to cr4 */
+		write_cr4(vcpu, *regptr);
+		break;
+
+	case 0x13:
+		/* mov from cr3 */
+		read_cr3(vcpu, regptr);
+		break;
+#if 0
+	case 0x14:
+		/* mov from cr4 (this should not happen) */
+	case 0x10:
+		/* mov from cr0 (this should not happen) */
+#endif
+	case 0x08:
+		/* mov to cr8 */
+		vlapic_set_cr8(vcpu->arch_vcpu.vlapic, *regptr);
+		break;
+	case 0x18:
+		/* mov from cr8 */
+		*regptr = vlapic_get_cr8(vcpu->arch_vcpu.vlapic);
+		break;
+	default:
+		panic("Unhandled CR access");
+		return -EINVAL;
+	}
+
+	TRACE_2L(TRC_VMEXIT_CR_ACCESS,
+		VM_EXIT_CR_ACCESS_ACCESS_TYPE
+			(vcpu->arch_vcpu.exit_qualification),
+		VM_EXIT_CR_ACCESS_CR_NUM
+			(vcpu->arch_vcpu.exit_qualification));
+
+	return 0;
+}
+
+#if 0
+/*
+ * VMX_PROCBASED_CTLS_INVLPG is not enabled in the VM-execution
+ * control therefore we don't need it's handler.
+ *
+ * INVLPG: this instruction Invalidates any translation lookaside buffer
+ */
+int invlpg_handler(__unused struct vcpu *vcpu)
+{
+	pr_fatal("INVLPG executed");
+
+	return 0;
+}
+
+/*
+ * XSETBV instruction set's the XCR0 that is used to tell for which components
+ * states can be saved on a context switch using xsave.
+ *
+ * We don't handle this right now because we are on a platform that does not
+ * support XSAVE/XRSTORE feature as reflected by the instruction CPUID.
+ *
+ * to make sure this never get called until we support it we can prevent the
+ * reading of this bit in CPUID VMEXIT.
+ *
+ * Linux checks this in CPUID: cpufeature.h: #define cpu_has_xsave
+ */
+static int xsetbv_instr_handler(__unused struct vcpu *vcpu)
+{
+	ASSERT("Not Supported" == 0, "XSETBV executed");
+
+	return 0;
+}
+#endif
+
+static int rdtsc_handler(struct vcpu *vcpu)
+{
+	uint64_t host_tsc, guest_tsc, tsc_offset;
+	uint32_t id;
+	struct run_context *cur_context =
+		&vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context];
+
+	/* Read the host TSC value */
+	CPU_RDTSCP_EXECUTE(&host_tsc, &id);
+
+	/* Get the guest TSC offset value from VMCS */
+	tsc_offset =
+	    exec_vmread64(VMX_TSC_OFFSET_FULL);
+
+	/* Update the guest TSC value by following: TSC_guest = TSC_host +
+	 * TSC_guest_Offset
+	 */
+	guest_tsc = host_tsc + tsc_offset;
+
+	/* Return the TSC_guest in rax:rdx */
+	cur_context->guest_cpu_regs.regs.rax = (uint32_t) guest_tsc;
+	cur_context->guest_cpu_regs.regs.rdx = (uint32_t) (guest_tsc >> 32);
+
+	TRACE_2L(TRC_VMEXIT_RDTSC, host_tsc, tsc_offset);
+
+	return 0;
+}
+
+static int rdtscp_handler(struct vcpu *vcpu)
+{
+	uint64_t host_tsc, guest_tsc, tsc_offset;
+	uint32_t id;
+	struct run_context *cur_context =
+		&vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context];
+
+	/* Read the host TSC value */
+	CPU_RDTSCP_EXECUTE(&host_tsc, &id);
+
+	/* Get the guest TSC offset value from VMCS */
+	tsc_offset =
+	    exec_vmread64(VMX_TSC_OFFSET_FULL);
+
+	/* Update the guest TSC value by following: * TSC_guest = TSC_host +
+	 * TSC_guest_Offset
+	 */
+	guest_tsc = host_tsc + tsc_offset;
+
+	/* Return the TSC_guest in rax:rdx and IA32_TSC_AUX in rcx */
+	cur_context->guest_cpu_regs.regs.rax = (uint32_t) guest_tsc;
+	cur_context->guest_cpu_regs.regs.rdx = (uint32_t) (guest_tsc >> 32);
+	cur_context->guest_cpu_regs.regs.rcx = vcpu->arch_vcpu.msr_tsc_aux;
+
+	TRACE_2L(TRC_VMEXIT_RDTSCP, guest_tsc, vcpu->arch_vcpu.msr_tsc_aux);
+
+	return 0;
+}
--- a/hypervisor/arch/x86/vmx.c
+++ b/hypervisor/arch/x86/vmx.c
--- a/hypervisor/arch/x86/vmx_asm.S
+++ b/hypervisor/arch/x86/vmx_asm.S
@@ -0,0 +1,245 @@
+/*
+ * Copyright (C) 2018 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <vmx.h>
+#include <msr.h>
+#include <guest.h>
+#include <vcpu.h>
+#include <cpu.h>
+#include <types.h>
+
+    .text
+
+/*int vmx_vmrun(struct run_context *context, int launch, int ibrs_type) */
+   .code64
+   .align       8
+   .global      vmx_vmrun
+vmx_vmrun:
+
+    /* Save all host GPRs that must be preserved across function calls
+       per System V ABI */
+    push        %rdx
+    push        %rbx
+    push        %rbp
+    push        %r12
+    push        %r13
+    push        %r14
+    push        %r15
+
+    /* Save RDI on top of host stack for easy access to VCPU pointer
+       on return from guest context */
+    push        %rdi
+
+    /* rdx = ibrs_type */
+    /* if ibrs_type != IBRS_NONE, means IBRS feature is supported,
+     * restore MSR SPEC_CTRL to guest
+     */
+    cmp         $IBRS_NONE,%rdx
+    je          next
+
+    movl        $MSR_IA32_SPEC_CTRL,%ecx
+    mov         VMX_MACHINE_T_GUEST_SPEC_CTRL_OFFSET(%rdi),%rax
+    movl        $0,%edx
+    wrmsr
+
+next:
+
+    /* Load VMCS_HOST_RSP_FIELD field value */
+    mov         $VMX_HOST_RSP,%rdx
+
+    /* Write the current stack pointer to the VMCS_HOST_RSP_FIELD */
+    vmwrite     %rsp,%rdx
+
+    /* Error occurred - handle error */
+    jbe         vm_eval_error
+
+    /* Compare the launch flag to see if launching (1) or resuming (0) */
+    cmp         $VM_LAUNCH, %rsi
+
+    mov         VMX_MACHINE_T_GUEST_CR2_OFFSET(%rdi),%rax
+    mov         %rax,%cr2
+
+    mov         VMX_MACHINE_T_GUEST_RAX_OFFSET(%rdi),%rax
+    mov         VMX_MACHINE_T_GUEST_RBX_OFFSET(%rdi),%rbx
+    mov         VMX_MACHINE_T_GUEST_RCX_OFFSET(%rdi),%rcx
+    mov         VMX_MACHINE_T_GUEST_RDX_OFFSET(%rdi),%rdx
+    mov         VMX_MACHINE_T_GUEST_RBP_OFFSET(%rdi),%rbp
+    mov         VMX_MACHINE_T_GUEST_RSI_OFFSET(%rdi),%rsi
+    mov         VMX_MACHINE_T_GUEST_R8_OFFSET(%rdi),%r8
+    mov         VMX_MACHINE_T_GUEST_R9_OFFSET(%rdi),%r9
+    mov         VMX_MACHINE_T_GUEST_R10_OFFSET(%rdi),%r10
+    mov         VMX_MACHINE_T_GUEST_R11_OFFSET(%rdi),%r11
+    mov         VMX_MACHINE_T_GUEST_R12_OFFSET(%rdi),%r12
+    mov         VMX_MACHINE_T_GUEST_R13_OFFSET(%rdi),%r13
+    mov         VMX_MACHINE_T_GUEST_R14_OFFSET(%rdi),%r14
+    mov         VMX_MACHINE_T_GUEST_R15_OFFSET(%rdi),%r15
+
+    mov         VMX_MACHINE_T_GUEST_RDI_OFFSET(%rdi),%rdi
+
+    /* Execute appropriate VMX instruction */
+    je          vm_launch
+
+    /* Execute a VM resume */
+    vmresume
+
+vm_launch:
+
+    /* Execute a VM launch */
+    vmlaunch
+
+    .global vm_exit
+vm_exit:
+
+    /* Get VCPU data structure pointer from top of host stack and
+       save guest RDI in its place */
+    xchg        0(%rsp),%rdi
+
+    /* Save current GPRs to guest state area */
+    mov         %rax,VMX_MACHINE_T_GUEST_RAX_OFFSET(%rdi)
+
+    mov         %cr2,%rax
+    mov         %rax,VMX_MACHINE_T_GUEST_CR2_OFFSET(%rdi)
+
+    mov         %rbx,VMX_MACHINE_T_GUEST_RBX_OFFSET(%rdi)
+    mov         %rcx,VMX_MACHINE_T_GUEST_RCX_OFFSET(%rdi)
+    mov         %rdx,VMX_MACHINE_T_GUEST_RDX_OFFSET(%rdi)
+    mov         %rbp,VMX_MACHINE_T_GUEST_RBP_OFFSET(%rdi)
+    mov         %rsi,VMX_MACHINE_T_GUEST_RSI_OFFSET(%rdi)
+    mov         %r8,VMX_MACHINE_T_GUEST_R8_OFFSET(%rdi)
+    mov         %r9,VMX_MACHINE_T_GUEST_R9_OFFSET(%rdi)
+    mov         %r10,VMX_MACHINE_T_GUEST_R10_OFFSET(%rdi)
+    mov         %r11,VMX_MACHINE_T_GUEST_R11_OFFSET(%rdi)
+    mov         %r12,VMX_MACHINE_T_GUEST_R12_OFFSET(%rdi)
+    mov         %r13,VMX_MACHINE_T_GUEST_R13_OFFSET(%rdi)
+    mov         %r14,VMX_MACHINE_T_GUEST_R14_OFFSET(%rdi)
+    mov         %r15,VMX_MACHINE_T_GUEST_R15_OFFSET(%rdi)
+
+    /* Load guest RDI off host stack and into RDX */
+    mov         0(%rsp),%rdx
+
+    /* Save guest RDI to guest state area */
+    mov         %rdx,VMX_MACHINE_T_GUEST_RDI_OFFSET(%rdi)
+
+    /* Save RDI to RSI for later SPEC_CTRL save*/
+    mov         %rdi,%rsi
+
+vm_eval_error:
+
+    /* Restore host GPR System V required registers */
+    pop         %rdi
+    pop         %r15
+    pop         %r14
+    pop         %r13
+    pop         %r12
+    pop         %rbp
+    pop         %rbx
+    pop         %rdx
+
+
+    /* Check vm fail, refer to 64-ia32 spec section 26.2 in volume#3 */
+    mov         $VM_FAIL,%rax
+    jc          vm_return
+    jz          vm_return
+
+    /* Clear host registers to prevent speculative use */
+    xor         %rcx,%rcx
+    xor         %r8,%r8
+    xor         %r9,%r9
+    xor         %r10,%r10
+    xor         %r11,%r11
+
+    /* rdx = ibrs_type */
+    /* IBRS_NONE: no ibrs setting, just flush rsb
+     * IBRS_RAW: set IBRS then flush rsb
+     * IBRS_OPT: set STIBP & IBPB then flush rsb
+     */
+    cmp         $IBRS_NONE,%rdx
+    je          stuff_rsb
+
+    cmp         $IBRS_OPT,%rdx
+    je          ibrs_opt
+
+    /* Save guest MSR SPEC_CTRL, low 32 bit is enough */
+    movl        $MSR_IA32_SPEC_CTRL,%ecx
+    rdmsr
+    mov         %rax,VMX_MACHINE_T_GUEST_SPEC_CTRL_OFFSET(%rsi)
+    movl        $SPEC_ENABLE_IBRS,%eax
+    movl        $0,%edx
+    wrmsr
+
+    jmp         stuff_rsb
+
+ibrs_opt:
+
+    movl        $MSR_IA32_PRED_CMD,%ecx
+    movl        $PRED_SET_IBPB,%eax
+    movl        $0,%edx
+    wrmsr
+
+    /* Save guest MSR SPEC_CTRL, low 32 bit is enough */
+    movl        $MSR_IA32_SPEC_CTRL,%ecx
+    rdmsr
+    mov         %rax,VMX_MACHINE_T_GUEST_SPEC_CTRL_OFFSET(%rsi)
+    movl        $SPEC_ENABLE_STIBP,%eax
+    movl        $0,%edx
+    wrmsr
+
+    /* stuff rsb by 32 CALLs, make sure no any "ret" is executed before this
+     * stuffing rsb, otherwise, someone may insert some code before this for
+     * future update.
+     */
+stuff_rsb:
+
+    /* stuff 32 RSB, rax = 32/2 */
+    mov         $16,%rax
+.align 16
+3:
+    call        4f
+33:
+    pause
+    jmp         33b
+.align 16
+4:
+    call        5f
+44:
+    pause
+    jmp         44b
+.align 16
+5:  dec         %rax
+    jnz         3b
+    /* stuff 32 RSB, rsp += 8*32 */
+    add         $(8*32),%rsp
+
+    mov         $VM_SUCCESS,%rax
+
+vm_return:
+    /* Return to caller */
+    ret
+
--- a/hypervisor/arch/x86/vtd.c
+++ b/hypervisor/arch/x86/vtd.c