initial import

internal commit: 14ac2bc2299032fa6714d1fefa7cf0987b3e3085 Signed-off-by: Eddie Dong <eddie.dong@intel.com>
2025-09-24 10:17:28 +00:00 · 2018-03-07 20:57:14 +08:00
commit f4cd4338fd
156 changed files with 41265 additions and 0 deletions
--- a/arch/x86/guest/guest.c
+++ b/arch/x86/guest/guest.c
@@ -0,0 +1,389 @@
+/*
+ * Copyright (C) 2018 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <hypervisor.h>
+#include <hv_lib.h>
+#include <acrn_common.h>
+#include <hv_arch.h>
+#include <bsp_cfg.h>
+#include <bsp_extern.h>
+#include <acrn_hv_defs.h>
+#include <hv_debug.h>
+#include <multiboot.h>
+
+#define BOOT_ARGS_LOAD_ADDR				0x24EFC000
+
+#define ACRN_DBG_GUEST	6
+
+/* for VM0 e820 */
+uint32_t e820_entries;
+struct e820_entry e820[E820_MAX_ENTRIES];
+struct e820_mem_params e820_mem;
+
+inline bool
+is_vm0(struct vm *vm)
+{
+	return (vm->attr.boot_idx & 0x7F) == 0;
+}
+
+inline struct vcpu *vcpu_from_vid(struct vm *vm, int vcpu_id)
+{
+	int i;
+	struct vcpu *vcpu;
+
+	foreach_vcpu(i, vm, vcpu) {
+		if (vcpu->vcpu_id == vcpu_id)
+			return vcpu;
+	}
+
+	return NULL;
+}
+
+inline struct vcpu *vcpu_from_pid(struct vm *vm, int pcpu_id)
+{
+	int i;
+	struct vcpu *vcpu;
+
+	foreach_vcpu(i, vm, vcpu) {
+		if (vcpu->pcpu_id == pcpu_id)
+			return vcpu;
+	}
+
+	return NULL;
+}
+
+inline struct vcpu *get_primary_vcpu(struct vm *vm)
+{
+	int i;
+	struct vcpu *vcpu;
+
+	foreach_vcpu(i, vm, vcpu) {
+		if (is_vcpu_bsp(vcpu))
+			return vcpu;
+	}
+
+	return NULL;
+}
+
+inline uint64_t vcpumask2pcpumask(struct vm *vm, uint64_t vdmask)
+{
+	int vcpu_id;
+	uint64_t dmask = 0;
+	struct vcpu *vcpu;
+
+	while ((vcpu_id = bitmap_ffs(&vdmask)) >= 0) {
+		bitmap_clr(vcpu_id, &vdmask);
+		vcpu = vcpu_from_vid(vm, vcpu_id);
+		ASSERT(vcpu, "vcpu_from_vid failed");
+		bitmap_set(vcpu->pcpu_id, &dmask);
+	}
+
+	return dmask;
+}
+
+inline bool vm_lapic_disabled(struct vm *vm)
+{
+	int i;
+	struct vcpu *vcpu;
+
+	foreach_vcpu(i, vm, vcpu) {
+		if (vlapic_enabled(vcpu->arch_vcpu.vlapic))
+			return false;
+	}
+
+	return true;
+}
+
+int init_vm0_boot_info(struct vm *vm)
+{
+	struct multiboot_module *mods = NULL;
+	struct multiboot_info *mbi = NULL;
+
+	if (!is_vm0(vm)) {
+		pr_err("just for vm0 to get info!");
+		return -EINVAL;
+	}
+
+	if (boot_regs[0] != MULTIBOOT_INFO_MAGIC) {
+		ASSERT(0, "no multiboot info found");
+		return -EINVAL;
+	}
+
+	mbi = (struct multiboot_info *)((uint64_t)boot_regs[1]);
+
+	dev_dbg(ACRN_DBG_GUEST, "Multiboot detected, flag=0x%x", mbi->mi_flags);
+	if (!(mbi->mi_flags & MULTIBOOT_INFO_HAS_MODS)) {
+		ASSERT(0, "no sos kernel info found");
+		return -EINVAL;
+	}
+
+	dev_dbg(ACRN_DBG_GUEST, "mod counts=%d\n", mbi->mi_mods_count);
+
+	/* mod[0] is for kernel&cmdline, other mod for ramdisk/firmware info*/
+	mods = (struct multiboot_module *)(uint64_t)mbi->mi_mods_addr;
+
+	dev_dbg(ACRN_DBG_GUEST, "mod0 start=0x%x, end=0x%x",
+		mods[0].mm_mod_start, mods[0].mm_mod_end);
+	dev_dbg(ACRN_DBG_GUEST, "cmd addr=0x%x, str=%s", mods[0].mm_string,
+		(char *) (uint64_t)mods[0].mm_string);
+
+	vm->sw.kernel_type = VM_LINUX_GUEST;
+	vm->sw.kernel_info.kernel_src_addr =
+		(void *)(uint64_t)mods[0].mm_mod_start;
+	vm->sw.kernel_info.kernel_size =
+		mods[0].mm_mod_end - mods[0].mm_mod_start;
+	vm->sw.kernel_info.kernel_load_addr =
+		(void *)(uint64_t)mods[0].mm_mod_start;
+
+	vm->sw.linux_info.bootargs_src_addr =
+		(void *)(uint64_t)mods[0].mm_string;
+	vm->sw.linux_info.bootargs_load_addr =
+		(void *)BOOT_ARGS_LOAD_ADDR;
+	vm->sw.linux_info.bootargs_size =
+		strnlen_s((char *)(uint64_t) mods[0].mm_string, MEM_2K);
+
+	return 0;
+}
+
+uint64_t gva2gpa(struct vm *vm, uint64_t cr3, uint64_t gva)
+{
+	int level, index, shift;
+	uint64_t *base, addr, entry, page_size;
+	uint64_t gpa = 0;
+
+	addr = cr3;
+
+	for (level = 3; level >= 0; level--) {
+		addr = addr & IA32E_REF_MASK;
+		base = GPA2HVA(vm, addr);
+		ASSERT(base != NULL, "invalid ptp base.");
+		shift = level * 9 + 12;
+		index = (gva >> shift) & 0x1FF;
+		page_size = 1UL << shift;
+
+		entry = base[index];
+		if (level > 0 && (entry & MMU_32BIT_PDE_PS) != 0)
+			break;
+		addr = entry;
+	}
+
+	entry >>= shift; entry <<= (shift + 12); entry >>= 12;
+	gpa = entry | (gva & (page_size - 1));
+
+	return gpa;
+}
+
+void init_e820(void)
+{
+	unsigned int i;
+
+	if (boot_regs[0] == MULTIBOOT_INFO_MAGIC) {
+		struct multiboot_info *mbi =
+			(struct multiboot_info *)((uint64_t)boot_regs[1]);
+		pr_info("Multiboot info detected\n");
+		if (mbi->mi_flags & 0x40) {
+			struct multiboot_mmap *mmap =
+				(struct multiboot_mmap *)
+				((uint64_t)mbi->mi_mmap_addr);
+			e820_entries = mbi->mi_mmap_length/
+				sizeof(struct multiboot_mmap);
+			if (e820_entries > E820_MAX_ENTRIES) {
+				pr_err("Too many E820 entries %d\n",
+					e820_entries);
+				e820_entries = E820_MAX_ENTRIES;
+			}
+			dev_dbg(ACRN_DBG_GUEST,
+				"mmap length 0x%x addr 0x%x entries %d\n",
+				mbi->mi_mmap_length, mbi->mi_mmap_addr,
+				e820_entries);
+			for (i = 0; i < e820_entries; i++) {
+				e820[i].baseaddr = mmap[i].baseaddr;
+				e820[i].length = mmap[i].length;
+				e820[i].type = mmap[i].type;
+
+				dev_dbg(ACRN_DBG_GUEST,
+					"mmap table: %d type: 0x%x\n",
+					i, mmap[i].type);
+				dev_dbg(ACRN_DBG_GUEST,
+					"Base: 0x%016llx length: 0x%016llx",
+					mmap[i].baseaddr, mmap[i].length);
+			}
+		}
+	} else
+		ASSERT(0, "no multiboot info found");
+}
+
+
+void obtain_e820_mem_info(void)
+{
+	unsigned int i;
+	struct e820_entry *entry;
+
+	e820_mem.mem_bottom = UINT64_MAX;
+	e820_mem.mem_top = 0x00;
+	e820_mem.max_ram_blk_base = 0;
+	e820_mem.max_ram_blk_size = 0;
+
+	for (i = 0; i < e820_entries; i++) {
+		entry = &e820[i];
+		if (e820_mem.mem_bottom > entry->baseaddr)
+			e820_mem.mem_bottom = entry->baseaddr;
+
+		if (entry->baseaddr + entry->length
+				> e820_mem.mem_top) {
+			e820_mem.mem_top = entry->baseaddr
+				+ entry->length;
+		}
+
+		if (entry->baseaddr == UOS_DEFAULT_START_ADDR
+				&& entry->type == E820_TYPE_RAM) {
+			e820_mem.max_ram_blk_base =
+				entry->baseaddr;
+			e820_mem.max_ram_blk_size = entry->length;
+		}
+	}
+}
+
+static void rebuild_vm0_e820(void)
+{
+	unsigned int i;
+	uint64_t entry_start;
+	uint64_t entry_end;
+	uint64_t hv_start = CONFIG_RAM_START;
+	uint64_t hv_end  = hv_start + CONFIG_RAM_SIZE;
+	struct e820_entry *entry, new_entry = {0};
+
+	/* hypervisor mem need be filter out from e820 table
+	 * it's hv itself + other hv reserved mem like vgt etc
+	 */
+	for (i = 0; i < e820_entries; i++) {
+		entry = &e820[i];
+		entry_start = entry->baseaddr;
+		entry_end = entry->baseaddr + entry->length;
+
+		/* No need handle in these cases*/
+		if (entry->type != E820_TYPE_RAM || entry_end <= hv_start
+				|| entry_start >= hv_end) {
+			continue;
+		}
+
+		/* filter out hv mem and adjust length of this entry*/
+		if (entry_start < hv_start && entry_end <= hv_end) {
+			entry->length = hv_start - entry_start;
+			continue;
+		}
+		/* filter out hv mem and need to create a new entry*/
+		if (entry_start < hv_start && entry_end > hv_end) {
+			entry->length = hv_start - entry_start;
+			new_entry.baseaddr = hv_end;
+			new_entry.length = entry_end - hv_end;
+			new_entry.type = E820_TYPE_RAM;
+			continue;
+		}
+		/* This entry is within the range of hv mem
+		 * change to E820_TYPE_RESERVED
+		 */
+		if (entry_start >= hv_start && entry_end <= hv_end) {
+			entry->type = E820_TYPE_RESERVED;
+			continue;
+		}
+
+		if (entry_start >= hv_start && entry_start < hv_end
+				&& entry_end > hv_end) {
+			entry->baseaddr = hv_end;
+			entry->length = entry_end - hv_end;
+			continue;
+		}
+
+	}
+
+	if (new_entry.length > 0) {
+		e820_entries++;
+		ASSERT(e820_entries <= E820_MAX_ENTRIES,
+				"e820 entry overflow");
+		entry = &e820[e820_entries - 1];
+		entry->baseaddr = new_entry.baseaddr;
+		entry->length = new_entry.length;
+		entry->type = new_entry.type;
+	}
+
+}
+int prepare_vm0_memmap_and_e820(struct vm *vm)
+{
+	unsigned int i;
+	uint32_t attr_wb = (MMU_MEM_ATTR_READ |
+			MMU_MEM_ATTR_WRITE   |
+			MMU_MEM_ATTR_EXECUTE |
+			MMU_MEM_ATTR_WB_CACHE);
+	uint32_t attr_uc = (MMU_MEM_ATTR_READ |
+			MMU_MEM_ATTR_WRITE   |
+			MMU_MEM_ATTR_EXECUTE |
+			MMU_MEM_ATTR_UNCACHED);
+	struct e820_entry *entry;
+
+
+	ASSERT(is_vm0(vm), "This func only for vm0");
+
+	rebuild_vm0_e820();
+	dev_dbg(ACRN_DBG_GUEST,
+		"vm0: bottom memory - 0x%llx, top memory - 0x%llx\n",
+		e820_mem.mem_bottom, e820_mem.mem_top);
+
+	/* create real ept map for all ranges with UC */
+	ept_mmap(vm, e820_mem.mem_bottom, e820_mem.mem_bottom,
+			(e820_mem.mem_top - e820_mem.mem_bottom),
+			MAP_MMIO, attr_uc);
+
+	/* update ram entries to WB attr */
+	for (i = 0; i < e820_entries; i++) {
+		entry = &e820[i];
+		if (entry->type == E820_TYPE_RAM)
+			ept_mmap(vm, entry->baseaddr, entry->baseaddr,
+					entry->length, MAP_MEM, attr_wb);
+	}
+
+
+	dev_dbg(ACRN_DBG_GUEST, "VM0 e820 layout:\n");
+	for (i = 0; i < e820_entries; i++) {
+		entry = &e820[i];
+		dev_dbg(ACRN_DBG_GUEST,
+			"e820 table: %d type: 0x%x", i, entry->type);
+		dev_dbg(ACRN_DBG_GUEST,
+			"BaseAddress: 0x%016llx length: 0x%016llx\n",
+			entry->baseaddr, entry->length);
+	}
+
+	/* unmap hypervisor itself for safety
+	 * will cause EPT violation if sos accesses hv memory
+	 */
+	ept_mmap(vm, CONFIG_RAM_START, CONFIG_RAM_START,
+			CONFIG_RAM_SIZE, MAP_UNMAP, 0);
+	return 0;
+}
--- a/arch/x86/guest/instr_emul.c
+++ b/arch/x86/guest/instr_emul.c
--- a/arch/x86/guest/instr_emul.h
+++ b/arch/x86/guest/instr_emul.h
@@ -0,0 +1,95 @@
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * Copyright (c) 2017 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_VMM_INSTRUCTION_EMUL_H_
+#define	_VMM_INSTRUCTION_EMUL_H_
+
+/*
+ * Callback functions to read and write memory regions.
+ */
+typedef int (*mem_region_read_t)(struct vcpu *vcpu, uint64_t gpa,
+				 uint64_t *rval, int rsize, void *arg);
+
+typedef int (*mem_region_write_t)(struct vcpu *vcpu, uint64_t gpa,
+				  uint64_t wval, int wsize, void *arg);
+
+/*
+ * Emulate the decoded 'vie' instruction.
+ *
+ * The callbacks 'mrr' and 'mrw' emulate reads and writes to the memory region
+ * containing 'gpa'. 'mrarg' is an opaque argument that is passed into the
+ * callback functions.
+ *
+ * 'void *vm' should be 'struct vm *' when called from kernel context and
+ * 'struct vmctx *' when called from user context.
+ * s
+ */
+int vmm_emulate_instruction(struct vcpu *vcpu, uint64_t gpa, struct vie *vie,
+		struct vm_guest_paging *paging, mem_region_read_t mrr,
+		mem_region_write_t mrw, void *mrarg);
+
+int vie_update_register(struct vcpu *vcpu, enum vm_reg_name reg,
+		uint64_t val, int size);
+
+/*
+ * Returns 1 if an alignment check exception should be injected and 0 otherwise.
+ */
+int vie_alignment_check(int cpl, int operand_size, uint64_t cr0,
+	uint64_t rflags, uint64_t gla);
+
+/* Returns 1 if the 'gla' is not canonical and 0 otherwise. */
+int vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla);
+
+uint64_t vie_size2mask(int size);
+
+int vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg,
+	struct seg_desc *desc, uint64_t off, int length, int addrsize, int prot,
+	uint64_t *gla);
+
+void vie_init(struct vie *vie, const char *inst_bytes, int inst_length);
+
+/*
+ * Decode the instruction fetched into 'vie' so it can be emulated.
+ *
+ * 'gla' is the guest linear address provided by the hardware assist
+ * that caused the nested page table fault. It is used to verify that
+ * the software instruction decoding is in agreement with the hardware.
+ *
+ * Some hardware assists do not provide the 'gla' to the hypervisor.
+ * To skip the 'gla' verification for this or any other reason pass
+ * in VIE_INVALID_GLA instead.
+ */
+#define	VIE_INVALID_GLA		(1UL << 63)	/* a non-canonical address */
+int vmm_decode_instruction(struct vcpu *vcpu, uint64_t gla,
+		enum vm_cpu_mode cpu_mode, int csd, struct vie *vie);
+
+int emulate_instruction(struct vcpu *vcpu, struct mem_io *mmio);
+int analyze_instruction(struct vcpu *vcpu, struct mem_io *mmio);
+
+#endif	/* _VMM_INSTRUCTION_EMUL_H_ */
--- a/arch/x86/guest/instr_emul_wrapper.c
+++ b/arch/x86/guest/instr_emul_wrapper.c
@@ -0,0 +1,466 @@
+/*
+ * Copyright (C) 2018 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <hypervisor.h>
+#include <hv_lib.h>
+#include <acrn_common.h>
+#include <hv_arch.h>
+#include <bsp_extern.h>
+#include <hv_debug.h>
+
+#include "instr_emul_wrapper.h"
+#include "instr_emul.h"
+
+struct emul_cnx {
+	struct vie vie;
+	struct vm_guest_paging paging;
+	struct vcpu *vcpu;
+	struct mem_io *mmio;
+};
+
+static DEFINE_CPU_DATA(struct emul_cnx, g_inst_ctxt);
+
+static int
+encode_vmcs_seg_desc(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc);
+
+static int32_t
+get_vmcs_field(int ident);
+
+static bool
+is_segment_register(int reg);
+
+static bool
+is_descriptor_table(int reg);
+
+int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval)
+{
+	struct run_context *cur_context;
+
+	if (!vcpu)
+		return -EINVAL;
+	if ((reg >= VM_REG_LAST) || (reg < VM_REG_GUEST_RAX))
+		return -EINVAL;
+
+	if ((reg >= VM_REG_GUEST_RAX) && (reg <= VM_REG_GUEST_RDI)) {
+		cur_context =
+			&vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context];
+		*retval = cur_context->guest_cpu_regs.longs[reg];
+	} else if ((reg > VM_REG_GUEST_RDI) && (reg < VM_REG_LAST)) {
+		int32_t field = get_vmcs_field(reg);
+
+		if (field != -1)
+			*retval = exec_vmread(field);
+		else
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+int vm_set_register(struct vcpu *vcpu, int reg, uint64_t val)
+{
+	struct run_context *cur_context;
+
+	if (!vcpu)
+		return -EINVAL;
+	if ((reg >= VM_REG_LAST) || (reg < VM_REG_GUEST_RAX))
+		return -EINVAL;
+
+	if ((reg >= VM_REG_GUEST_RAX) && (reg <= VM_REG_GUEST_RDI)) {
+		cur_context =
+			&vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context];
+		cur_context->guest_cpu_regs.longs[reg] = val;
+	} else if ((reg > VM_REG_GUEST_RDI) && (reg < VM_REG_LAST)) {
+		int32_t field = get_vmcs_field(reg);
+
+		if (field != -1)
+			exec_vmwrite(field, val);
+		else
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+int vm_set_seg_desc(struct vcpu *vcpu, int seg, struct seg_desc *ret_desc)
+{
+	int error;
+	uint32_t base, limit, access;
+
+	if ((!vcpu) || (!ret_desc))
+		return -EINVAL;
+
+	if (!is_segment_register(seg) && !is_descriptor_table(seg))
+		return -EINVAL;
+
+	error = encode_vmcs_seg_desc(seg, &base, &limit, &access);
+	if ((error != 0) || (access == 0xffffffff))
+		return -EINVAL;
+
+	exec_vmwrite(base, ret_desc->base);
+	exec_vmwrite(limit, ret_desc->limit);
+	exec_vmwrite(access, ret_desc->access);
+
+	return 0;
+}
+
+int vm_get_seg_desc(struct vcpu *vcpu, int seg, struct seg_desc *desc)
+{
+	int error;
+	uint32_t base, limit, access;
+
+	if ((!vcpu) || (!desc))
+		return -EINVAL;
+
+	if (!is_segment_register(seg) && !is_descriptor_table(seg))
+		return -EINVAL;
+
+	error = encode_vmcs_seg_desc(seg, &base, &limit, &access);
+	if ((error != 0) || (access == 0xffffffff))
+		return -EINVAL;
+
+	desc->base = exec_vmread(base);
+	desc->limit = exec_vmread(limit);
+	desc->access = exec_vmread(access);
+
+	return 0;
+}
+
+int vm_restart_instruction(struct vcpu *vcpu)
+{
+	if (!vcpu)
+		return -EINVAL;
+
+	VCPU_RETAIN_RIP(vcpu);
+	return 0;
+}
+
+static bool is_descriptor_table(int reg)
+{
+	switch (reg) {
+	case VM_REG_GUEST_IDTR:
+	case VM_REG_GUEST_GDTR:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool is_segment_register(int reg)
+{
+	switch (reg) {
+	case VM_REG_GUEST_ES:
+	case VM_REG_GUEST_CS:
+	case VM_REG_GUEST_SS:
+	case VM_REG_GUEST_DS:
+	case VM_REG_GUEST_FS:
+	case VM_REG_GUEST_GS:
+	case VM_REG_GUEST_TR:
+	case VM_REG_GUEST_LDTR:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static int encode_vmcs_seg_desc(int seg, uint32_t *base, uint32_t *lim,
+		uint32_t *acc)
+{
+	switch (seg) {
+	case VM_REG_GUEST_ES:
+		*base = VMX_GUEST_ES_BASE;
+		*lim = VMX_GUEST_ES_LIMIT;
+		*acc = VMX_GUEST_ES_ATTR;
+		break;
+	case VM_REG_GUEST_CS:
+		*base = VMX_GUEST_CS_BASE;
+		*lim = VMX_GUEST_CS_LIMIT;
+		*acc = VMX_GUEST_CS_ATTR;
+		break;
+	case VM_REG_GUEST_SS:
+		*base = VMX_GUEST_SS_BASE;
+		*lim = VMX_GUEST_SS_LIMIT;
+		*acc = VMX_GUEST_SS_ATTR;
+		break;
+	case VM_REG_GUEST_DS:
+		*base = VMX_GUEST_DS_BASE;
+		*lim = VMX_GUEST_DS_LIMIT;
+		*acc = VMX_GUEST_DS_ATTR;
+		break;
+	case VM_REG_GUEST_FS:
+		*base = VMX_GUEST_FS_BASE;
+		*lim = VMX_GUEST_FS_LIMIT;
+		*acc = VMX_GUEST_FS_ATTR;
+		break;
+	case VM_REG_GUEST_GS:
+		*base = VMX_GUEST_GS_BASE;
+		*lim = VMX_GUEST_GS_LIMIT;
+		*acc = VMX_GUEST_GS_ATTR;
+		break;
+	case VM_REG_GUEST_TR:
+		*base = VMX_GUEST_TR_BASE;
+		*lim = VMX_GUEST_TR_LIMIT;
+		*acc = VMX_GUEST_TR_ATTR;
+		break;
+	case VM_REG_GUEST_LDTR:
+		*base = VMX_GUEST_LDTR_BASE;
+		*lim = VMX_GUEST_LDTR_LIMIT;
+		*acc = VMX_GUEST_LDTR_ATTR;
+		break;
+	case VM_REG_GUEST_IDTR:
+		*base = VMX_GUEST_IDTR_BASE;
+		*lim = VMX_GUEST_IDTR_LIMIT;
+		*acc = 0xffffffff;
+		break;
+	case VM_REG_GUEST_GDTR:
+		*base = VMX_GUEST_GDTR_BASE;
+		*lim = VMX_GUEST_GDTR_LIMIT;
+		*acc = 0xffffffff;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int32_t get_vmcs_field(int ident)
+{
+	switch (ident) {
+	case VM_REG_GUEST_CR0:
+		return VMX_GUEST_CR0;
+	case VM_REG_GUEST_CR3:
+		return VMX_GUEST_CR3;
+	case VM_REG_GUEST_CR4:
+		return VMX_GUEST_CR4;
+	case VM_REG_GUEST_DR7:
+		return VMX_GUEST_DR7;
+	case VM_REG_GUEST_RSP:
+		return VMX_GUEST_RSP;
+	case VM_REG_GUEST_RIP:
+		return VMX_GUEST_RIP;
+	case VM_REG_GUEST_RFLAGS:
+		return VMX_GUEST_RFLAGS;
+	case VM_REG_GUEST_ES:
+		return VMX_GUEST_ES_SEL;
+	case VM_REG_GUEST_CS:
+		return VMX_GUEST_CS_SEL;
+	case VM_REG_GUEST_SS:
+		return VMX_GUEST_SS_SEL;
+	case VM_REG_GUEST_DS:
+		return VMX_GUEST_DS_SEL;
+	case VM_REG_GUEST_FS:
+		return VMX_GUEST_FS_SEL;
+	case VM_REG_GUEST_GS:
+		return VMX_GUEST_GS_SEL;
+	case VM_REG_GUEST_TR:
+		return VMX_GUEST_TR_SEL;
+	case VM_REG_GUEST_LDTR:
+		return VMX_GUEST_LDTR_SEL;
+	case VM_REG_GUEST_EFER:
+		return VMX_GUEST_IA32_EFER_FULL;
+	case VM_REG_GUEST_PDPTE0:
+		return VMX_GUEST_PDPTE0_FULL;
+	case VM_REG_GUEST_PDPTE1:
+		return VMX_GUEST_PDPTE1_FULL;
+	case VM_REG_GUEST_PDPTE2:
+		return VMX_GUEST_PDPTE2_FULL;
+	case VM_REG_GUEST_PDPTE3:
+		return VMX_GUEST_PDPTE3_FULL;
+	default:
+		return -1;
+	}
+}
+
+static enum vm_cpu_mode get_vmx_cpu_mode(void)
+{
+	uint32_t csar;
+
+	if (exec_vmread(VMX_GUEST_IA32_EFER_FULL) & EFER_LMA) {
+		csar = exec_vmread(VMX_GUEST_CS_ATTR);
+		if (csar & 0x2000)
+			return CPU_MODE_64BIT;        /* CS.L = 1 */
+		else
+			return CPU_MODE_COMPATIBILITY;
+	} else if (exec_vmread(VMX_GUEST_CR0) & CR0_PE) {
+		return CPU_MODE_PROTECTED;
+	} else {
+		return CPU_MODE_REAL;
+	}
+}
+
+static void get_guest_paging_info(struct vcpu *vcpu, struct emul_cnx *emul_cnx)
+{
+	uint32_t cpl, csar;
+
+	ASSERT(emul_cnx != NULL && vcpu != NULL, "Error in input arguments");
+
+	csar = exec_vmread(VMX_GUEST_CS_ATTR);
+	cpl = (csar >> 5) & 3;
+	emul_cnx->paging.cr3 =
+		vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr3;
+	emul_cnx->paging.cpl = cpl;
+	emul_cnx->paging.cpu_mode = get_vmx_cpu_mode();
+	emul_cnx->paging.paging_mode = PAGING_MODE_FLAT;/*maybe change later*/
+}
+
+static int mmio_read(struct vcpu *vcpu, __unused uint64_t gpa, uint64_t *rval,
+		__unused int size, __unused void *arg)
+{
+	struct emul_cnx *emul_cnx;
+	struct mem_io *mmio;
+
+	if (!vcpu)
+		return -EINVAL;
+
+	emul_cnx = &per_cpu(g_inst_ctxt, vcpu->pcpu_id);
+	mmio = emul_cnx->mmio;
+
+	ASSERT(mmio != NULL, "invalid mmio when reading");
+
+	*rval = mmio->value;
+
+	return 0;
+}
+
+static int mmio_write(struct vcpu *vcpu, __unused uint64_t gpa, uint64_t wval,
+		__unused int size, __unused void *arg)
+{
+	struct emul_cnx *emul_cnx;
+	struct mem_io *mmio;
+
+	if (!vcpu)
+		return -EINVAL;
+
+	emul_cnx = &per_cpu(g_inst_ctxt, vcpu->pcpu_id);
+	mmio = emul_cnx->mmio;
+
+	ASSERT(mmio != NULL, "invalid mmio when writing");
+
+	mmio->value = wval;
+
+	return 0;
+}
+
+void vm_gva2gpa(struct vcpu *vcpu, uint64_t gva, uint64_t *gpa)
+{
+
+	ASSERT(gpa != NULL, "Error in input arguments");
+	ASSERT(vcpu != NULL,
+		"Invalid vcpu id when gva2gpa");
+
+	*gpa = gva2gpa(vcpu->vm,
+		vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr3, gva);
+}
+
+int analyze_instruction(struct vcpu *vcpu, struct mem_io *mmio)
+{
+	uint64_t guest_rip_gva, guest_rip_gpa;
+	char *guest_rip_hva;
+	struct emul_cnx *emul_cnx;
+	uint32_t csar;
+	int retval = 0;
+	enum vm_cpu_mode cpu_mode;
+	int i;
+
+	guest_rip_gva =
+		vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].rip;
+
+	guest_rip_gpa = gva2gpa(vcpu->vm,
+		vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr3,
+		guest_rip_gva);
+
+	guest_rip_hva = GPA2HVA(vcpu->vm, guest_rip_gpa);
+	emul_cnx = &per_cpu(g_inst_ctxt, vcpu->pcpu_id);
+	emul_cnx->mmio = mmio;
+	emul_cnx->vcpu = vcpu;
+
+	/* by now, HVA <-> HPA is 1:1 mapping, so use hpa is OK*/
+	vie_init(&emul_cnx->vie, guest_rip_hva,
+		vcpu->arch_vcpu.inst_len);
+
+	get_guest_paging_info(vcpu, emul_cnx);
+	csar = exec_vmread(VMX_GUEST_CS_ATTR);
+	cpu_mode = get_vmx_cpu_mode();
+
+	mmio->private_data = emul_cnx;
+
+	retval = vmm_decode_instruction(vcpu, guest_rip_gva,
+			cpu_mode, SEG_DESC_DEF32(csar), &emul_cnx->vie);
+
+	mmio->access_size = emul_cnx->vie.opsize;
+
+	if (retval != 0) {
+		/* dump to instruction when decoding failed */
+		pr_err("decode following instruction failed @ 0x%016llx:",
+			exec_vmread(VMX_GUEST_RIP));
+		for (i = 0; i < emul_cnx->vie.num_valid; i++) {
+			if (i >= VIE_INST_SIZE)
+				break;
+
+			if (i == 0)
+				pr_err("\n");
+			pr_err("%d=%02hhx ",
+				i, emul_cnx->vie.inst[i]);
+		}
+	}
+
+	return retval;
+}
+
+int emulate_instruction(struct vcpu *vcpu, struct mem_io *mmio)
+{
+	struct emul_cnx *emul_cnx = (struct emul_cnx *)(mmio->private_data);
+	struct vm_guest_paging *paging = &emul_cnx->paging;
+	int i, retval = 0;
+	uint64_t gpa = mmio->paddr;
+	mem_region_read_t mread = mmio_read;
+	mem_region_write_t mwrite = mmio_write;
+
+	retval = vmm_emulate_instruction(vcpu, gpa,
+			&emul_cnx->vie, paging, mread, mwrite, &retval);
+
+	if (retval != 0) {
+		/* dump to instruction when emulation failed */
+		pr_err("emulate following instruction failed @ 0x%016llx:",
+			exec_vmread(VMX_GUEST_RIP));
+		for (i = 0; i < emul_cnx->vie.num_valid; i++) {
+			if (i >= VIE_INST_SIZE)
+				break;
+
+			if (i == 0)
+				pr_err("\n");
+
+			pr_err("%d=%02hhx ",
+				i, emul_cnx->vie.inst[i]);
+		}
+	}
+	return retval;
+}
--- a/arch/x86/guest/instr_emul_wrapper.h
+++ b/arch/x86/guest/instr_emul_wrapper.h
@@ -0,0 +1,203 @@
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * Copyright (c) 2017 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <cpu.h>
+
+struct vie_op {
+	uint8_t		op_byte;	/* actual opcode byte */
+	uint8_t		op_type;	/* type of operation (e.g. MOV) */
+	uint16_t	op_flags;
+};
+
+#define	VIE_INST_SIZE	15
+struct vie {
+	uint8_t		inst[VIE_INST_SIZE];	/* instruction bytes */
+	uint8_t		num_valid;		/* size of the instruction */
+	uint8_t		num_processed;
+
+	uint8_t		addrsize:4, opsize:4;	/* address and operand sizes */
+	uint8_t		rex_w:1,		/* REX prefix */
+			rex_r:1,
+			rex_x:1,
+			rex_b:1,
+			rex_present:1,
+			repz_present:1,		/* REP/REPE/REPZ prefix */
+			repnz_present:1,	/* REPNE/REPNZ prefix */
+			opsize_override:1,	/* Operand size override */
+			addrsize_override:1,	/* Address size override */
+			segment_override:1;	/* Segment override */
+
+	uint8_t		mod:2,			/* ModRM byte */
+			reg:4,
+			rm:4;
+
+	uint8_t		ss:2,			/* SIB byte */
+			index:4,
+			base:4;
+
+	uint8_t		disp_bytes;
+	uint8_t		imm_bytes;
+
+	uint8_t		scale;
+	int		base_register;		/* VM_REG_GUEST_xyz */
+	int		index_register;		/* VM_REG_GUEST_xyz */
+	int		segment_register;	/* VM_REG_GUEST_xyz */
+
+	int64_t		displacement;		/* optional addr displacement */
+	int64_t		immediate;		/* optional immediate operand */
+
+	uint8_t		decoded;	/* set to 1 if successfully decoded */
+
+	struct vie_op	op;			/* opcode description */
+};
+
+#define	PSL_C		0x00000001	/* carry bit */
+#define	PSL_PF		0x00000004	/* parity bit */
+#define	PSL_AF		0x00000010	/* bcd carry bit */
+#define	PSL_Z		0x00000040	/* zero bit */
+#define	PSL_N		0x00000080	/* negative bit */
+#define	PSL_T		0x00000100	/* trace enable bit */
+#define	PSL_I		0x00000200	/* interrupt enable bit */
+#define	PSL_D		0x00000400	/* string instruction direction bit */
+#define	PSL_V		0x00000800	/* overflow bit */
+#define	PSL_IOPL	0x00003000	/* i/o privilege level */
+#define	PSL_NT		0x00004000	/* nested task bit */
+#define	PSL_RF		0x00010000	/* resume flag bit */
+#define	PSL_VM		0x00020000	/* virtual 8086 mode bit */
+#define	PSL_AC		0x00040000	/* alignment checking */
+#define	PSL_VIF		0x00080000	/* virtual interrupt enable */
+#define	PSL_VIP		0x00100000	/* virtual interrupt pending */
+#define	PSL_ID		0x00200000	/* identification bit */
+
+/*
+ * The 'access' field has the format specified in Table 21-2 of the Intel
+ * Architecture Manual vol 3b.
+ *
+ * XXX The contents of the 'access' field are architecturally defined except
+ * bit 16 - Segment Unusable.
+ */
+struct seg_desc {
+	uint64_t	base;
+	uint32_t	limit;
+	uint32_t	access;
+};
+
+
+/*
+ * Protections are chosen from these bits, or-ed together
+ */
+#define	PROT_NONE	0x00	/* no permissions */
+#define	PROT_READ	0x01	/* pages can be read */
+#define	PROT_WRITE	0x02	/* pages can be written */
+#define	PROT_EXEC	0x04	/* pages can be executed */
+
+#define	SEG_DESC_TYPE(access)		((access) & 0x001f)
+#define	SEG_DESC_DPL(access)		(((access) >> 5) & 0x3)
+#define	SEG_DESC_PRESENT(access)	(((access) & 0x0080) ? 1 : 0)
+#define	SEG_DESC_DEF32(access)		(((access) & 0x4000) ? 1 : 0)
+#define	SEG_DESC_GRANULARITY(access)	(((access) & 0x8000) ? 1 : 0)
+#define	SEG_DESC_UNUSABLE(access)	(((access) & 0x10000) ? 1 : 0)
+
+enum vm_cpu_mode {
+	CPU_MODE_REAL,
+	CPU_MODE_PROTECTED,
+	CPU_MODE_COMPATIBILITY,		/* IA-32E mode (CS.L = 0) */
+	CPU_MODE_64BIT,			/* IA-32E mode (CS.L = 1) */
+};
+
+enum vm_paging_mode {
+	PAGING_MODE_FLAT,
+	PAGING_MODE_32,
+	PAGING_MODE_PAE,
+	PAGING_MODE_64,
+};
+
+struct vm_guest_paging {
+	uint64_t	cr3;
+	int		cpl;
+	enum vm_cpu_mode cpu_mode;
+	enum vm_paging_mode paging_mode;
+};
+
+/*
+ * Identifiers for architecturally defined registers.
+ */
+enum vm_reg_name {
+	VM_REG_GUEST_RAX,
+	VM_REG_GUEST_RBX,
+	VM_REG_GUEST_RCX,
+	VM_REG_GUEST_RDX,
+	VM_REG_GUEST_RBP,
+	VM_REG_GUEST_RSI,
+	VM_REG_GUEST_R8,
+	VM_REG_GUEST_R9,
+	VM_REG_GUEST_R10,
+	VM_REG_GUEST_R11,
+	VM_REG_GUEST_R12,
+	VM_REG_GUEST_R13,
+	VM_REG_GUEST_R14,
+	VM_REG_GUEST_R15,
+	VM_REG_GUEST_RDI,
+	VM_REG_GUEST_CR0,
+	VM_REG_GUEST_CR3,
+	VM_REG_GUEST_CR4,
+	VM_REG_GUEST_DR7,
+	VM_REG_GUEST_RSP,
+	VM_REG_GUEST_RIP,
+	VM_REG_GUEST_RFLAGS,
+	VM_REG_GUEST_ES,
+	VM_REG_GUEST_CS,
+	VM_REG_GUEST_SS,
+	VM_REG_GUEST_DS,
+	VM_REG_GUEST_FS,
+	VM_REG_GUEST_GS,
+	VM_REG_GUEST_LDTR,
+	VM_REG_GUEST_TR,
+	VM_REG_GUEST_IDTR,
+	VM_REG_GUEST_GDTR,
+	VM_REG_GUEST_EFER,
+	VM_REG_GUEST_CR2,
+	VM_REG_GUEST_PDPTE0,
+	VM_REG_GUEST_PDPTE1,
+	VM_REG_GUEST_PDPTE2,
+	VM_REG_GUEST_PDPTE3,
+	VM_REG_GUEST_INTR_SHADOW,
+	VM_REG_LAST
+};
+
+typedef unsigned long u_long;
+
+int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval);
+int vm_set_register(struct vcpu *vcpu, int reg, uint64_t val);
+int vm_get_seg_desc(struct vcpu *vcpu, int reg,
+		struct seg_desc *ret_desc);
+int vm_set_seg_desc(struct vcpu *vcpu, int reg,
+		struct seg_desc *desc);
+int vm_restart_instruction(struct vcpu *vcpu);
+void vm_gva2gpa(struct vcpu *vcpu, uint64_t gla, uint64_t *gpa);
--- a/arch/x86/guest/time.h
+++ b/arch/x86/guest/time.h
@@ -0,0 +1,118 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * Copyright (c) 2018 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)time.h	8.5 (Berkeley) 5/4/95
+ * $FreeBSD$
+ */
+
+#ifndef _TIME_H_
+#define	_TIME_H_
+
+struct callout {
+	void    *c_arg;                         /* function argument */
+	void    (*c_func)(void *);              /* function to call */
+	short   c_flags;                        /* User State */
+};
+
+#define CALLOUT_ACTIVE          0x0002 /* callout is currently active */
+#define CALLOUT_PENDING         0x0004 /* callout is waiting for timeout */
+#define callout_active(c)       ((c)->c_flags & CALLOUT_ACTIVE)
+#define callout_deactivate(c)   ((c)->c_flags &= ~CALLOUT_ACTIVE)
+#define callout_pending(c)      ((c)->c_flags & CALLOUT_PENDING)
+
+typedef int64_t time_t;
+typedef int64_t sbintime_t;
+
+struct bintime {
+	time_t	sec;
+	uint64_t frac;
+};
+
+static inline void
+bintime_add(struct bintime *_bt, const struct bintime *_bt2)
+{
+	uint64_t _u;
+
+	_u = _bt->frac;
+	_bt->frac += _bt2->frac;
+	if (_u > _bt->frac)
+		_bt->sec++;
+	_bt->sec += _bt2->sec;
+}
+
+static inline void
+bintime_sub(struct bintime *_bt, const struct bintime *_bt2)
+{
+	uint64_t _u;
+
+	_u = _bt->frac;
+	_bt->frac -= _bt2->frac;
+	if (_u < _bt->frac)
+		_bt->sec--;
+	_bt->sec -= _bt2->sec;
+}
+
+static inline void
+bintime_mul(struct bintime *_bt, uint32_t _x)
+{
+	uint64_t _p1, _p2;
+
+	_p1 = (_bt->frac & 0xffffffffull) * _x;
+	_p2 = (_bt->frac >> 32) * _x + (_p1 >> 32);
+	_bt->sec *= _x;
+	_bt->sec += (_p2 >> 32);
+	_bt->frac = (_p2 << 32) | (_p1 & 0xffffffffull);
+}
+
+#define	bintime_cmp(a, b, cmp)						\
+	(((a)->sec == (b)->sec) ?					\
+	    ((a)->frac cmp(b)->frac) :					\
+	    ((a)->sec cmp(b)->sec))
+
+#define SBT_1S  ((sbintime_t)1 << 32)
+#define SBT_1US (SBT_1S / 1000000)
+
+#define BT2FREQ(bt)                                                     \
+	(((uint64_t)0x8000000000000000 + ((bt)->frac >> 2)) /           \
+	 ((bt)->frac >> 1))
+
+#define FREQ2BT(freq, bt)                                               \
+{                                                                       \
+	(bt)->sec = 0;                                                  \
+	(bt)->frac = ((uint64_t)0x8000000000000000  / (freq)) << 1;     \
+}
+
+static inline sbintime_t
+bttosbt(const struct bintime _bt)
+{
+
+	return (((sbintime_t)_bt.sec << 32) + (_bt.frac >> 32));
+}
+
+#endif /* !_TIME_H_ */
--- a/arch/x86/guest/vcpu.c
+++ b/arch/x86/guest/vcpu.c
@@ -0,0 +1,357 @@
+/*
+ * Copyright (C) 2018 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <hypervisor.h>
+#include <hv_lib.h>
+#include <acrn_common.h>
+#include <hv_arch.h>
+#include <schedule.h>
+#include <hv_debug.h>
+
+vm_sw_loader_t vm_sw_loader;
+
+/***********************************************************************
+ *  vcpu_id/pcpu_id mapping table:
+ *
+ * if
+ *     VM0_CPUS[2] = {0, 2} , VM1_CPUS[2] = {3, 1};
+ * then
+ *     for physical CPU 0 : vcpu->pcpu_id = 0, vcpu->vcpu_id = 0, vmid = 0;
+ *     for physical CPU 2 : vcpu->pcpu_id = 2, vcpu->vcpu_id = 1, vmid = 0;
+ *     for physical CPU 3 : vcpu->pcpu_id = 3, vcpu->vcpu_id = 0, vmid = 1;
+ *     for physical CPU 1 : vcpu->pcpu_id = 1, vcpu->vcpu_id = 1, vmid = 1;
+ *
+ ***********************************************************************/
+int create_vcpu(int cpu_id, struct vm *vm, struct vcpu **rtn_vcpu_handle)
+{
+	struct vcpu *vcpu;
+
+	ASSERT(vm != NULL, "");
+	ASSERT(rtn_vcpu_handle != NULL, "");
+
+	pr_info("Creating VCPU %d", cpu_id);
+
+	/* Allocate memory for VCPU */
+	vcpu = calloc(1, sizeof(struct vcpu));
+	ASSERT(vcpu != NULL, "");
+
+	/* Initialize the physical CPU ID for this VCPU */
+	vcpu->pcpu_id = cpu_id;
+
+	/* Initialize the parent VM reference */
+	vcpu->vm = vm;
+
+	/* Initialize the virtual ID for this VCPU */
+	/* FIXME:
+	 * We have assumption that we always destroys vcpus in one
+	 * shot (like when vm is destroyed). If we need to support
+	 * specific vcpu destroy on fly, this vcpu_id assignment
+	 * needs revise.
+	 */
+
+	/*
+	 * vcpu->vcpu_id = vm->hw.created_vcpus;
+	 * vm->hw.created_vcpus++;
+	 */
+	vcpu->vcpu_id = atomic_xadd_int(&vm->hw.created_vcpus, 1);
+	/* vm->hw.vcpu_array[vcpu->vcpu_id] = vcpu; */
+	atomic_store_rel_64(
+		(unsigned long *)&vm->hw.vcpu_array[vcpu->vcpu_id],
+		(unsigned long)vcpu);
+
+	ASSERT(vcpu->vcpu_id < vm->hw.num_vcpus,
+			"Allocated vcpu_id is out of range!");
+
+	per_cpu(vcpu, cpu_id) = vcpu;
+
+	pr_info("PCPU%d is working as VM%d VCPU%d, Role: %s",
+			vcpu->pcpu_id, vcpu->vm->attr.id, vcpu->vcpu_id,
+			is_vcpu_bsp(vcpu) ? "PRIMARY" : "SECONDARY");
+
+	/* Is this VCPU a VM BSP, create page hierarchy for this VM */
+	if (is_vcpu_bsp(vcpu)) {
+		/* Set up temporary guest page tables */
+		vm->arch_vm.guest_pml4 = create_guest_paging(vm);
+		pr_info("VM *d VCPU %d CR3: 0x%016llx ",
+			vm->attr.id, vcpu->vcpu_id, vm->arch_vm.guest_pml4);
+	}
+
+	/* Allocate VMCS region for this VCPU */
+	vcpu->arch_vcpu.vmcs = alloc_page();
+	ASSERT(vcpu->arch_vcpu.vmcs != NULL, "");
+
+	/* Memset VMCS region for this VCPU */
+	memset(vcpu->arch_vcpu.vmcs, 0, CPU_PAGE_SIZE);
+
+	/* Initialize exception field in VCPU context */
+	vcpu->arch_vcpu.exception_info.exception = -1;
+
+	/* Initialize cur context */
+	vcpu->arch_vcpu.cur_context = NORMAL_WORLD;
+
+	/* Create per vcpu vlapic */
+	vlapic_create(vcpu);
+
+	/* Populate the return handle */
+	*rtn_vcpu_handle = vcpu;
+
+	vcpu->launched = false;
+	vcpu->paused_cnt = 0;
+	vcpu->running = 0;
+	vcpu->ioreq_pending = 0;
+	vcpu->arch_vcpu.nr_sipi = 0;
+	vcpu->pending_pre_work = 0;
+	vcpu->state = VCPU_INIT;
+
+	return 0;
+}
+
+int start_vcpu(struct vcpu *vcpu)
+{
+	uint64_t rip, instlen;
+	struct run_context *cur_context =
+		&vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context];
+	int64_t status = 0;
+
+	ASSERT(vcpu != NULL, "Incorrect arguments");
+
+	/* If this VCPU is not already launched, launch it */
+	if (!vcpu->launched) {
+		pr_info("VM %d Starting VCPU %d",
+				vcpu->vm->attr.id, vcpu->vcpu_id);
+
+		/* Set vcpu launched */
+		vcpu->launched = true;
+
+		/* avoid VMCS recycling RSB usage, set IBPB.
+		 * NOTE: this should be done for any time vmcs got switch
+		 * currently, there is no other place to do vmcs switch
+		 * Please add IBPB set for future vmcs switch case(like trusty)
+		 */
+		if (ibrs_type == IBRS_RAW)
+			msr_write(MSR_IA32_PRED_CMD, PRED_SET_IBPB);
+
+		/* Launch the VM */
+		status = vmx_vmrun(cur_context, VM_LAUNCH, ibrs_type);
+
+		/* See if VM launched successfully */
+		if (status == 0) {
+			if (is_vcpu_bsp(vcpu)) {
+				pr_info("VM %d VCPU %d successfully launched",
+					vcpu->vm->attr.id, vcpu->vcpu_id);
+			}
+		}
+	} else {
+		/* This VCPU was already launched, check if the last guest
+		 * instruction needs to be repeated and resume VCPU accordingly
+		 */
+		instlen = vcpu->arch_vcpu.inst_len;
+		rip = cur_context->rip;
+		exec_vmwrite(VMX_GUEST_RIP, ((rip + instlen) &
+				0xFFFFFFFFFFFFFFFF));
+
+		/* Resume the VM */
+		status = vmx_vmrun(cur_context, VM_RESUME, ibrs_type);
+	}
+
+	/* Save guest CR3 register */
+	cur_context->cr3 = exec_vmread(VMX_GUEST_CR3);
+
+	/* Obtain current VCPU instruction pointer and length */
+	cur_context->rip = exec_vmread(VMX_GUEST_RIP);
+	vcpu->arch_vcpu.inst_len = exec_vmread(VMX_EXIT_INSTR_LEN);
+
+	cur_context->rsp = exec_vmread(VMX_GUEST_RSP);
+	cur_context->rflags = exec_vmread(VMX_GUEST_RFLAGS);
+
+	/* Obtain VM exit reason */
+	vcpu->arch_vcpu.exit_reason = exec_vmread(VMX_EXIT_REASON);
+
+	if (status != 0) {
+		/* refer to 64-ia32 spec section 24.9.1 volume#3 */
+		if (vcpu->arch_vcpu.exit_reason & VMX_VMENTRY_FAIL)
+			pr_fatal("vmentry fail reason=%lx", vcpu->arch_vcpu.exit_reason);
+		else
+			pr_fatal("vmexit fail err_inst=%lx", exec_vmread(VMX_INSTR_ERROR));
+
+		ASSERT(status == 0, "vm fail");
+	}
+
+	return status;
+}
+
+int shutdown_vcpu(__unused struct vcpu *vcpu)
+{
+	/* TODO : Implement VCPU shutdown sequence */
+
+	return 0;
+}
+
+int destroy_vcpu(struct vcpu *vcpu)
+{
+	ASSERT(vcpu != NULL, "Incorrect arguments");
+
+	/* vcpu->vm->hw.vcpu_array[vcpu->vcpu_id] = NULL; */
+	atomic_store_rel_64(
+		(unsigned long *)&vcpu->vm->hw.vcpu_array[vcpu->vcpu_id],
+		(unsigned long)NULL);
+
+	atomic_subtract_int(&vcpu->vm->hw.created_vcpus, 1);
+
+	vlapic_free(vcpu);
+	free(vcpu->arch_vcpu.vmcs);
+	free(vcpu->guest_msrs);
+	free_pcpu(vcpu->pcpu_id);
+	free(vcpu);
+
+	return 0;
+}
+
+/* NOTE:
+ * vcpu should be paused before call this function.
+ */
+void reset_vcpu(struct vcpu *vcpu)
+{
+	struct vlapic *vlapic;
+
+	pr_dbg("vcpu%d reset", vcpu->vcpu_id);
+	ASSERT(vcpu->state != VCPU_RUNNING,
+			"reset vcpu when it's running");
+
+	if (vcpu->state == VCPU_INIT)
+		return;
+
+	vcpu->state = VCPU_INIT;
+
+	vcpu->launched = false;
+	vcpu->paused_cnt = 0;
+	vcpu->running = 0;
+	vcpu->ioreq_pending = 0;
+	vcpu->arch_vcpu.nr_sipi = 0;
+	vcpu->pending_pre_work = 0;
+	vlapic = vcpu->arch_vcpu.vlapic;
+	vlapic_init(vlapic);
+}
+
+void init_vcpu(struct vcpu *vcpu)
+{
+	if (is_vcpu_bsp(vcpu))
+		vcpu->arch_vcpu.cpu_mode = PAGE_PROTECTED_MODE;
+	else
+		vcpu->arch_vcpu.cpu_mode = REAL_MODE;
+	/* init_vmcs is delayed to vcpu vmcs launch first time */
+}
+
+void pause_vcpu(struct vcpu *vcpu, enum vcpu_state new_state)
+{
+	int pcpu_id = get_cpu_id();
+
+	pr_dbg("vcpu%d paused, new state: %d",
+		vcpu->vcpu_id, new_state);
+
+	vcpu->prev_state = vcpu->state;
+	vcpu->state = new_state;
+
+	get_schedule_lock(pcpu_id);
+	if (atomic_load_acq_32(&vcpu->running) == 1) {
+		remove_vcpu_from_runqueue(vcpu);
+		make_reschedule_request(vcpu);
+		release_schedule_lock(pcpu_id);
+
+		if (vcpu->pcpu_id != pcpu_id) {
+			while (atomic_load_acq_32(&vcpu->running) == 1)
+				__asm__ __volatile("pause" ::: "memory");
+		}
+	} else {
+		remove_vcpu_from_runqueue(vcpu);
+		release_schedule_lock(pcpu_id);
+	}
+}
+
+void resume_vcpu(struct vcpu *vcpu)
+{
+	pr_dbg("vcpu%d resumed", vcpu->vcpu_id);
+
+	vcpu->state = vcpu->prev_state;
+
+	get_schedule_lock(vcpu->pcpu_id);
+	if (vcpu->state == VCPU_RUNNING) {
+		add_vcpu_to_runqueue(vcpu);
+		make_reschedule_request(vcpu);
+	}
+	release_schedule_lock(vcpu->pcpu_id);
+}
+
+void schedule_vcpu(struct vcpu *vcpu)
+{
+	vcpu->state = VCPU_RUNNING;
+	pr_dbg("vcpu%d scheduled", vcpu->vcpu_id);
+
+	get_schedule_lock(vcpu->pcpu_id);
+	add_vcpu_to_runqueue(vcpu);
+	make_reschedule_request(vcpu);
+	release_schedule_lock(vcpu->pcpu_id);
+}
+
+/* help function for vcpu create */
+int prepare_vcpu(struct vm *vm, int pcpu_id)
+{
+	int ret = 0;
+	struct vcpu *vcpu = NULL;
+
+	ret = create_vcpu(pcpu_id, vm, &vcpu);
+	ASSERT(ret == 0, "vcpu create failed");
+
+	if (is_vcpu_bsp(vcpu)) {
+		/* Load VM SW */
+		if (!vm_sw_loader)
+			vm_sw_loader = general_sw_loader;
+		vm_sw_loader(vm, vcpu);
+		vcpu->arch_vcpu.cpu_mode = PAGE_PROTECTED_MODE;
+	} else {
+		vcpu->arch_vcpu.cpu_mode = REAL_MODE;
+	}
+
+	/* init_vmcs is delayed to vcpu vmcs launch first time */
+
+	/* initialize the vcpu tsc aux */
+	vcpu->msr_tsc_aux_guest = vcpu->vcpu_id;
+
+	set_pcpu_used(pcpu_id);
+
+	INIT_LIST_HEAD(&vcpu->run_list);
+
+	return ret;
+}
+
+void request_vcpu_pre_work(struct vcpu *vcpu, int pre_work_id)
+{
+	bitmap_set(pre_work_id, &vcpu->pending_pre_work);
+}
--- a/arch/x86/guest/vioapic.c
+++ b/arch/x86/guest/vioapic.c
@@ -0,0 +1,662 @@
+/*-
+ * Copyright (c) 2013 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
+ * Copyright (c) 2013 Neel Natu <neel@freebsd.org>
+ * Copyright (c) 2017 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#define pr_fmt(fmt)	"vioapic: " fmt
+
+#include <hypervisor.h>
+#include <hv_lib.h>
+#include <acrn_common.h>
+#include <hv_arch.h>
+#include <hv_debug.h>
+
+#define	IOREGSEL	0x00
+#define	IOWIN		0x10
+#define	IOEOI		0x40
+
+#define REDIR_ENTRIES_HW	120 /* SOS align with native ioapic */
+#define REDIR_ENTRIES_UOS	24 /* UOS pins*/
+#define	RTBL_RO_BITS	((uint64_t)(IOAPIC_RTE_REM_IRR | IOAPIC_RTE_DELIVS))
+
+#define ACRN_DBG_IOAPIC	6
+
+struct vioapic {
+	struct vm	*vm;
+	spinlock_t	mtx;
+	uint32_t	id;
+	uint32_t	ioregsel;
+	struct {
+		uint64_t reg;
+		int	 acnt;	/* sum of pin asserts (+1) and deasserts (-1) */
+	} rtbl[REDIR_ENTRIES_HW];
+};
+
+#define	VIOAPIC_LOCK(vioapic)	spinlock_obtain(&((vioapic)->mtx))
+#define	VIOAPIC_UNLOCK(vioapic)	 spinlock_release(&((vioapic)->mtx))
+
+static inline const char *pinstate_str(bool asserted)
+{
+	return (asserted) ? "asserted" : "deasserted";
+}
+
+struct vioapic *
+vm_ioapic(struct vm *vm)
+{
+	return (struct vioapic *)vm->arch_vm.virt_ioapic;
+}
+
+static void
+vioapic_send_intr(struct vioapic *vioapic, int pin)
+{
+	int vector, delmode;
+	uint32_t low, high, dest;
+	bool level, phys;
+
+	if (pin < 0 || pin >= vioapic_pincount(vioapic->vm))
+		pr_err("vioapic_send_intr: invalid pin number %d", pin);
+
+	low = vioapic->rtbl[pin].reg;
+	high = vioapic->rtbl[pin].reg >> 32;
+
+	if ((low & IOAPIC_RTE_INTMASK) == IOAPIC_RTE_INTMSET) {
+		dev_dbg(ACRN_DBG_IOAPIC, "ioapic pin%d: masked", pin);
+		return;
+	}
+
+	phys = ((low & IOAPIC_RTE_DESTMOD) == IOAPIC_RTE_DESTPHY);
+	delmode = low & IOAPIC_RTE_DELMOD;
+	level = low & IOAPIC_RTE_TRGRLVL ? true : false;
+	if (level)
+		vioapic->rtbl[pin].reg |= IOAPIC_RTE_REM_IRR;
+
+	vector = low & IOAPIC_RTE_INTVEC;
+	dest = high >> APIC_ID_SHIFT;
+	vlapic_deliver_intr(vioapic->vm, level, dest, phys, delmode, vector);
+}
+
+static void
+vioapic_set_pinstate(struct vioapic *vioapic, int pin, bool newstate)
+{
+	int oldcnt, newcnt;
+	bool needintr;
+
+	if (pin < 0 || pin >= vioapic_pincount(vioapic->vm))
+		pr_err("vioapic_set_pinstate: invalid pin number %d", pin);
+
+	oldcnt = vioapic->rtbl[pin].acnt;
+	if (newstate)
+		vioapic->rtbl[pin].acnt++;
+	else
+		vioapic->rtbl[pin].acnt--;
+	newcnt = vioapic->rtbl[pin].acnt;
+
+	if (newcnt < 0) {
+		pr_err("ioapic pin%d: bad acnt %d", pin, newcnt);
+	}
+
+	needintr = false;
+	if (oldcnt == 0 && newcnt == 1) {
+		needintr = true;
+		dev_dbg(ACRN_DBG_IOAPIC, "ioapic pin%d: asserted", pin);
+	} else if (oldcnt == 1 && newcnt == 0) {
+		dev_dbg(ACRN_DBG_IOAPIC, "ioapic pin%d: deasserted", pin);
+	} else {
+		dev_dbg(ACRN_DBG_IOAPIC, "ioapic pin%d: %s, ignored, acnt %d",
+		    pin, pinstate_str(newstate), newcnt);
+	}
+
+	if (needintr)
+		vioapic_send_intr(vioapic, pin);
+}
+
+enum irqstate {
+	IRQSTATE_ASSERT,
+	IRQSTATE_DEASSERT,
+	IRQSTATE_PULSE
+};
+
+static int
+vioapic_set_irqstate(struct vm *vm, int irq, enum irqstate irqstate)
+{
+	struct vioapic *vioapic;
+
+	if (irq < 0 || irq >= vioapic_pincount(vm))
+		return -EINVAL;
+
+	vioapic = vm_ioapic(vm);
+
+	VIOAPIC_LOCK(vioapic);
+	switch (irqstate) {
+	case IRQSTATE_ASSERT:
+		vioapic_set_pinstate(vioapic, irq, true);
+		break;
+	case IRQSTATE_DEASSERT:
+		vioapic_set_pinstate(vioapic, irq, false);
+		break;
+	case IRQSTATE_PULSE:
+		vioapic_set_pinstate(vioapic, irq, true);
+		vioapic_set_pinstate(vioapic, irq, false);
+		break;
+	default:
+		panic("vioapic_set_irqstate: invalid irqstate %d", irqstate);
+	}
+	VIOAPIC_UNLOCK(vioapic);
+
+	return 0;
+}
+
+int
+vioapic_assert_irq(struct vm *vm, int irq)
+{
+	return vioapic_set_irqstate(vm, irq, IRQSTATE_ASSERT);
+}
+
+int
+vioapic_deassert_irq(struct vm *vm, int irq)
+{
+	return vioapic_set_irqstate(vm, irq, IRQSTATE_DEASSERT);
+}
+
+int
+vioapic_pulse_irq(struct vm *vm, int irq)
+{
+	return vioapic_set_irqstate(vm, irq, IRQSTATE_PULSE);
+}
+
+/*
+ * Reset the vlapic's trigger-mode register to reflect the ioapic pin
+ * configuration.
+ */
+void
+vioapic_update_tmr(struct vcpu *vcpu)
+{
+	struct vioapic *vioapic;
+	struct vlapic *vlapic;
+	uint32_t low;
+	int delmode, pin, vector;
+	bool level;
+
+	vlapic = vcpu->arch_vcpu.vlapic;
+	vioapic = vm_ioapic(vcpu->vm);
+
+	VIOAPIC_LOCK(vioapic);
+	for (pin = 0; pin < vioapic_pincount(vioapic->vm); pin++) {
+		low = vioapic->rtbl[pin].reg;
+
+		level = low & IOAPIC_RTE_TRGRLVL ? true : false;
+
+		/*
+		 * For a level-triggered 'pin' let the vlapic figure out if
+		 * an assertion on this 'pin' would result in an interrupt
+		 * being delivered to it. If yes, then it will modify the
+		 * TMR bit associated with this vector to level-triggered.
+		 */
+		delmode = low & IOAPIC_RTE_DELMOD;
+		vector = low & IOAPIC_RTE_INTVEC;
+		vlapic_set_tmr_one_vec(vlapic, delmode, vector, level);
+	}
+	vlapic_apicv_batch_set_tmr(vlapic);
+	VIOAPIC_UNLOCK(vioapic);
+}
+
+static uint32_t
+vioapic_read(struct vioapic *vioapic, uint32_t addr)
+{
+	int regnum, pin, rshift;
+
+	regnum = addr & 0xff;
+	switch (regnum) {
+	case IOAPIC_ID:
+		return vioapic->id;
+	case IOAPIC_VER:
+		return ((vioapic_pincount(vioapic->vm) - 1) << MAX_RTE_SHIFT)
+		       | 0x11;
+	case IOAPIC_ARB:
+		return vioapic->id;
+	default:
+		break;
+	}
+
+	/* redirection table entries */
+	if (regnum >= IOAPIC_REDTBL &&
+	    regnum < IOAPIC_REDTBL + vioapic_pincount(vioapic->vm) * 2) {
+		pin = (regnum - IOAPIC_REDTBL) / 2;
+		if ((regnum - IOAPIC_REDTBL) % 2)
+			rshift = 32;
+		else
+			rshift = 0;
+
+		return vioapic->rtbl[pin].reg >> rshift;
+	}
+
+	return 0;
+}
+
+/*
+ * version 0x20+ ioapic has EOI register. And cpu could write vector to this
+ * register to clear related IRR.
+ */
+static void
+vioapic_write_eoi(struct vioapic *vioapic, int32_t vector)
+{
+	struct vm *vm = vioapic->vm;
+	int pin;
+
+	if (vector < VECTOR_FOR_INTR_START || vector > NR_MAX_VECTOR)
+		pr_err("vioapic_process_eoi: invalid vector %d", vector);
+
+	VIOAPIC_LOCK(vioapic);
+	for (pin = 0; pin < vioapic_pincount(vm); pin++) {
+		if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_REM_IRR) == 0)
+			continue;
+		if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_INTVEC) !=
+				(uint64_t)vector)
+			continue;
+
+		vioapic->rtbl[pin].reg &= ~IOAPIC_RTE_REM_IRR;
+		if (vioapic->rtbl[pin].acnt > 0) {
+			dev_dbg(ACRN_DBG_IOAPIC,
+				"ioapic pin%d: asserted at eoi, acnt %d",
+				pin, vioapic->rtbl[pin].acnt);
+			vioapic_send_intr(vioapic, pin);
+		}
+	}
+	VIOAPIC_UNLOCK(vioapic);
+}
+
+static void
+vioapic_write(struct vioapic *vioapic, uint32_t addr, uint32_t data)
+{
+	uint64_t data64, mask64;
+	uint64_t last, new, changed;
+	int regnum, pin, lshift;
+
+	regnum = addr & 0xff;
+	switch (regnum) {
+	case IOAPIC_ID:
+		vioapic->id = data & APIC_ID_MASK;
+		break;
+	case IOAPIC_VER:
+	case IOAPIC_ARB:
+		/* readonly */
+		break;
+	default:
+		break;
+	}
+
+	/* redirection table entries */
+	if (regnum >= IOAPIC_REDTBL &&
+	    regnum < IOAPIC_REDTBL + vioapic_pincount(vioapic->vm) * 2) {
+		pin = (regnum - IOAPIC_REDTBL) / 2;
+		if ((regnum - IOAPIC_REDTBL) % 2)
+			lshift = 32;
+		else
+			lshift = 0;
+
+		last = new = vioapic->rtbl[pin].reg;
+
+		data64 = (uint64_t)data << lshift;
+		mask64 = (uint64_t)0xffffffff << lshift;
+		new &= ~mask64 | RTBL_RO_BITS;
+		new |= data64 & ~RTBL_RO_BITS;
+
+		changed = last ^ new;
+		/* pin0 from vpic mask/unmask */
+		if (pin == 0 && (changed & IOAPIC_RTE_INTMASK)) {
+			/* mask -> umask */
+			if ((last & IOAPIC_RTE_INTMASK) &&
+				((new & IOAPIC_RTE_INTMASK) == 0)) {
+				if ((vioapic->vm->vpic_wire_mode
+					== VPIC_WIRE_NULL) ||
+					(vioapic->vm->vpic_wire_mode
+					== VPIC_WIRE_INTR)) {
+					atomic_set_int(
+						&vioapic->vm->vpic_wire_mode,
+						VPIC_WIRE_IOAPIC);
+					dev_dbg(ACRN_DBG_IOAPIC,
+						"vpic wire mode -> IOAPIC");
+				} else {
+					pr_err("WARNING: invalid vpic wire mode change");
+					return;
+				}
+			/* unmask -> mask */
+			} else if (((last & IOAPIC_RTE_INTMASK) == 0) &&
+				(new & IOAPIC_RTE_INTMASK)) {
+				if (vioapic->vm->vpic_wire_mode
+					== VPIC_WIRE_IOAPIC) {
+					atomic_set_int(
+						&vioapic->vm->vpic_wire_mode,
+						VPIC_WIRE_INTR);
+					dev_dbg(ACRN_DBG_IOAPIC,
+						"vpic wire mode -> INTR");
+				}
+			}
+		}
+		vioapic->rtbl[pin].reg = new;
+		dev_dbg(ACRN_DBG_IOAPIC, "ioapic pin%d: redir table entry %#lx",
+		    pin, vioapic->rtbl[pin].reg);
+		/*
+		 * If any fields in the redirection table entry (except mask
+		 * or polarity) have changed then rendezvous all the vcpus
+		 * to update their vlapic trigger-mode registers.
+		 */
+		if (changed & ~(IOAPIC_RTE_INTMASK | IOAPIC_RTE_INTPOL)) {
+			int i;
+			struct vcpu *vcpu;
+
+			dev_dbg(ACRN_DBG_IOAPIC,
+			"ioapic pin%d: recalculate vlapic trigger-mode reg",
+			pin);
+
+			VIOAPIC_UNLOCK(vioapic);
+
+			foreach_vcpu(i, vioapic->vm, vcpu) {
+				vcpu_make_request(vcpu, ACRN_REQUEST_TMR_UPDATE);
+			}
+			VIOAPIC_LOCK(vioapic);
+		}
+
+		/*
+		 * Generate an interrupt if the following conditions are met:
+		 * - pin is not masked
+		 * - previous interrupt has been EOIed
+		 * - pin level is asserted
+		 */
+		if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_INTMASK) ==
+			IOAPIC_RTE_INTMCLR &&
+			(vioapic->rtbl[pin].reg & IOAPIC_RTE_REM_IRR) == 0 &&
+			(vioapic->rtbl[pin].acnt > 0)) {
+			dev_dbg(ACRN_DBG_IOAPIC,
+				"ioapic pin%d: asserted at rtbl write, acnt %d",
+				pin, vioapic->rtbl[pin].acnt);
+			vioapic_send_intr(vioapic, pin);
+		}
+
+		/* remap for active: interrupt mask -> unmask
+		 * remap for deactive: interrupt mask & vector set to 0
+		 */
+		data64 = vioapic->rtbl[pin].reg;
+		if ((((data64 & IOAPIC_RTE_INTMASK) == IOAPIC_RTE_INTMCLR)
+		  && ((last & IOAPIC_RTE_INTMASK) == IOAPIC_RTE_INTMSET))
+		  || (((data64 & IOAPIC_RTE_INTMASK) == IOAPIC_RTE_INTMSET)
+		  && ((vioapic->rtbl[pin].reg & IOAPIC_RTE_INTVEC) == 0))) {
+			/* VM enable intr */
+			struct ptdev_intx_info intx;
+
+			/* NOTE: only support max 256 pin */
+			intx.virt_pin = (uint8_t)pin;
+			intx.vpin_src = PTDEV_VPIN_IOAPIC;
+			ptdev_intx_pin_remap(vioapic->vm, &intx);
+		}
+	}
+}
+
+static int
+vioapic_mmio_rw(struct vioapic *vioapic, uint64_t gpa,
+		uint64_t *data, int size, bool doread)
+{
+	uint64_t offset;
+
+	offset = gpa - VIOAPIC_BASE;
+
+	/*
+	 * The IOAPIC specification allows 32-bit wide accesses to the
+	 * IOREGSEL (offset 0) and IOWIN (offset 16) registers.
+	 */
+	if (size != 4 || (offset != IOREGSEL && offset != IOWIN &&
+			offset != IOEOI)) {
+		if (doread)
+			*data = 0;
+		return 0;
+	}
+
+	VIOAPIC_LOCK(vioapic);
+	if (offset == IOREGSEL) {
+		if (doread)
+			*data = vioapic->ioregsel;
+		else
+			vioapic->ioregsel = *data;
+	} else if (offset == IOEOI) {
+		/* only need to handle write operation */
+		if (!doread)
+			vioapic_write_eoi(vioapic, *data);
+	} else {
+		if (doread) {
+			*data = vioapic_read(vioapic, vioapic->ioregsel);
+		} else {
+			vioapic_write(vioapic, vioapic->ioregsel,
+			    *data);
+		}
+	}
+	VIOAPIC_UNLOCK(vioapic);
+
+	return 0;
+}
+
+int
+vioapic_mmio_read(void *vm, uint64_t gpa, uint64_t *rval,
+		int size)
+{
+	int error;
+	struct vioapic *vioapic;
+
+	vioapic = vm_ioapic(vm);
+	error = vioapic_mmio_rw(vioapic, gpa, rval, size, true);
+	return error;
+}
+
+int
+vioapic_mmio_write(void *vm, uint64_t gpa, uint64_t wval,
+		int size)
+{
+	int error;
+	struct vioapic *vioapic;
+
+	vioapic = vm_ioapic(vm);
+	error = vioapic_mmio_rw(vioapic, gpa, &wval, size, false);
+	return error;
+}
+
+void
+vioapic_process_eoi(struct vm *vm, int vector)
+{
+	struct vioapic *vioapic;
+	int pin;
+
+	if (vector < VECTOR_FOR_INTR_START || vector > NR_MAX_VECTOR)
+		pr_err("vioapic_process_eoi: invalid vector %d", vector);
+
+	vioapic = vm_ioapic(vm);
+	dev_dbg(ACRN_DBG_IOAPIC, "ioapic processing eoi for vector %d", vector);
+
+	/* notify device to ack if assigned pin */
+	for (pin = 0; pin < vioapic_pincount(vm); pin++) {
+		if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_REM_IRR) == 0)
+			continue;
+		if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_INTVEC) !=
+				(uint64_t)vector)
+			continue;
+		ptdev_intx_ack(vm, pin, PTDEV_VPIN_IOAPIC);
+	}
+
+	/*
+	 * XXX keep track of the pins associated with this vector instead
+	 * of iterating on every single pin each time.
+	 */
+	VIOAPIC_LOCK(vioapic);
+	for (pin = 0; pin < vioapic_pincount(vm); pin++) {
+		if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_REM_IRR) == 0)
+			continue;
+		if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_INTVEC) !=
+				(uint64_t)vector)
+			continue;
+
+		vioapic->rtbl[pin].reg &= ~IOAPIC_RTE_REM_IRR;
+		if (vioapic->rtbl[pin].acnt > 0) {
+			dev_dbg(ACRN_DBG_IOAPIC,
+				"ioapic pin%d: asserted at eoi, acnt %d",
+				pin, vioapic->rtbl[pin].acnt);
+			vioapic_send_intr(vioapic, pin);
+		}
+	}
+	VIOAPIC_UNLOCK(vioapic);
+}
+
+struct vioapic *
+vioapic_init(struct vm *vm)
+{
+	int i;
+	struct vioapic *vioapic;
+
+	vioapic = calloc(1, sizeof(struct vioapic));
+	ASSERT(vioapic != NULL, "");
+
+	vioapic->vm = vm;
+	spinlock_init(&vioapic->mtx);
+
+	/* Initialize all redirection entries to mask all interrupts */
+	for (i = 0; i < vioapic_pincount(vioapic->vm); i++)
+		vioapic->rtbl[i].reg = 0x0001000000010000UL;
+
+	register_mmio_emulation_handler(vm,
+			vioapic_mmio_access_handler,
+			(uint64_t)VIOAPIC_BASE,
+			(uint64_t)VIOAPIC_BASE + VIOAPIC_SIZE,
+			(void *) 0);
+
+	return vioapic;
+}
+
+void
+vioapic_cleanup(struct vioapic *vioapic)
+{
+	unregister_mmio_emulation_handler(vioapic->vm,
+		(uint64_t)VIOAPIC_BASE,
+		(uint64_t)VIOAPIC_BASE + VIOAPIC_SIZE);
+	free(vioapic);
+}
+
+int
+vioapic_pincount(struct vm *vm)
+{
+	if (is_vm0(vm))
+		return REDIR_ENTRIES_HW;
+	else
+		return REDIR_ENTRIES_UOS;
+}
+
+int vioapic_mmio_access_handler(struct vcpu *vcpu, struct mem_io *mmio,
+		void *handler_private_data)
+{
+	struct vm *vm = vcpu->vm;
+	uint64_t gpa = mmio->paddr;
+	int ret = 0;
+
+	(void)handler_private_data;
+
+	/* Note all RW to IOAPIC are 32-Bit in size */
+	ASSERT(mmio->access_size == 4,
+			"All RW to LAPIC must be 32-bits in size");
+
+	if (mmio->read_write == HV_MEM_IO_READ) {
+		ret = vioapic_mmio_read(vm,
+				gpa,
+				&mmio->value,
+				mmio->access_size);
+		mmio->mmio_status = MMIO_TRANS_VALID;
+
+	} else if (mmio->read_write == HV_MEM_IO_WRITE) {
+		ret = vioapic_mmio_write(vm,
+				gpa,
+				mmio->value,
+				mmio->access_size);
+
+		mmio->mmio_status = MMIO_TRANS_VALID;
+	}
+
+	return ret;
+}
+
+bool vioapic_get_rte(struct vm *vm, int pin, void *rte)
+{
+	struct vioapic *vioapic;
+
+	vioapic = vm_ioapic(vm);
+	if (vioapic && rte) {
+		*(uint64_t *)rte = vioapic->rtbl[pin].reg;
+		return true;
+	} else
+		return false;
+}
+
+int get_vioapic_info(char *str, int str_max, int vmid)
+{
+	int pin, len, size = str_max, vector, delmode;
+	uint64_t rte;
+	uint32_t low, high, dest;
+	bool level, phys, remote_irr, mask;
+	struct vm *vm = get_vm_from_vmid(vmid);
+
+	if (!vm) {
+		len = snprintf(str, size,
+			"\r\nvm is not exist for vmid %d", vmid);
+		size -= len;
+		str += len;
+		goto END;
+	}
+
+	len = snprintf(str, size,
+		"\r\nPIN\tVEC\tDM\tDEST\tTM\tDELM\tIRR\tMASK");
+	size -= len;
+	str += len;
+
+	for (pin = 0 ; pin < vioapic_pincount(vm); pin++) {
+		vioapic_get_rte(vm, pin, (void *)&rte);
+		low = rte;
+		high = rte >> 32;
+		mask = ((low & IOAPIC_RTE_INTMASK) == IOAPIC_RTE_INTMSET);
+		remote_irr = ((low & IOAPIC_RTE_REM_IRR) == IOAPIC_RTE_REM_IRR);
+		phys = ((low & IOAPIC_RTE_DESTMOD) == IOAPIC_RTE_DESTPHY);
+		delmode = low & IOAPIC_RTE_DELMOD;
+		level = low & IOAPIC_RTE_TRGRLVL ? true : false;
+		vector = low & IOAPIC_RTE_INTVEC;
+		dest = high >> APIC_ID_SHIFT;
+
+		len = snprintf(str, size,
+				"\r\n%d\t0x%X\t%s\t0x%X\t%s\t%d\t%d\t%d",
+				pin, vector, phys ? "phys" : "logic",
+				dest, level ? "level" : "edge",
+				delmode >> 8, remote_irr, mask);
+		size -= len;
+		str += len;
+	}
+END:
+	snprintf(str, size, "\r\n");
+	return 0;
+}
--- a/arch/x86/guest/vlapic.c
+++ b/arch/x86/guest/vlapic.c
--- a/arch/x86/guest/vlapic_priv.h
+++ b/arch/x86/guest/vlapic_priv.h
@@ -0,0 +1,153 @@
+/*-
+ * Copyright (c) 2013 Neel Natu <neel@freebsd.org>
+ * Copyright (c) 2017 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VLAPIC_PRIV_H_
+#define	_VLAPIC_PRIV_H_
+
+/*
+ * APIC Register:		Offset	   Description
+ */
+#define APIC_OFFSET_ID		0x20	/* Local APIC ID		*/
+#define APIC_OFFSET_VER		0x30	/* Local APIC Version		*/
+#define APIC_OFFSET_TPR		0x80	/* Task Priority Register	*/
+#define APIC_OFFSET_APR		0x90	/* Arbitration Priority		*/
+#define APIC_OFFSET_PPR		0xA0	/* Processor Priority Register	*/
+#define APIC_OFFSET_EOI		0xB0	/* EOI Register			*/
+#define APIC_OFFSET_RRR		0xC0	/* Remote read			*/
+#define APIC_OFFSET_LDR		0xD0	/* Logical Destination		*/
+#define APIC_OFFSET_DFR		0xE0	/* Destination Format Register	*/
+#define APIC_OFFSET_SVR		0xF0	/* Spurious Vector Register	*/
+#define APIC_OFFSET_ISR0	0x100	/* In Service Register		*/
+#define APIC_OFFSET_ISR1	0x110
+#define APIC_OFFSET_ISR2	0x120
+#define APIC_OFFSET_ISR3	0x130
+#define APIC_OFFSET_ISR4	0x140
+#define APIC_OFFSET_ISR5	0x150
+#define APIC_OFFSET_ISR6	0x160
+#define APIC_OFFSET_ISR7	0x170
+#define APIC_OFFSET_TMR0	0x180	/* Trigger Mode Register	*/
+#define APIC_OFFSET_TMR1	0x190
+#define APIC_OFFSET_TMR2	0x1A0
+#define APIC_OFFSET_TMR3	0x1B0
+#define APIC_OFFSET_TMR4	0x1C0
+#define APIC_OFFSET_TMR5	0x1D0
+#define APIC_OFFSET_TMR6	0x1E0
+#define APIC_OFFSET_TMR7	0x1F0
+#define APIC_OFFSET_IRR0	0x200	/* Interrupt Request Register	*/
+#define APIC_OFFSET_IRR1	0x210
+#define APIC_OFFSET_IRR2	0x220
+#define APIC_OFFSET_IRR3	0x230
+#define APIC_OFFSET_IRR4	0x240
+#define APIC_OFFSET_IRR5	0x250
+#define APIC_OFFSET_IRR6	0x260
+#define APIC_OFFSET_IRR7	0x270
+#define APIC_OFFSET_ESR		0x280	/* Error Status Register	*/
+#define APIC_OFFSET_CMCI_LVT	0x2F0	/* Local Vector Table (CMCI)	*/
+#define APIC_OFFSET_ICR_LOW	0x300	/* Interrupt Command Register	*/
+#define APIC_OFFSET_ICR_HI	0x310
+#define APIC_OFFSET_TIMER_LVT	0x320	/* Local Vector Table (Timer)	*/
+#define APIC_OFFSET_THERM_LVT	0x330	/* Local Vector Table (Thermal)	*/
+#define APIC_OFFSET_PERF_LVT	0x340	/* Local Vector Table (PMC)	*/
+#define APIC_OFFSET_LINT0_LVT	0x350	/* Local Vector Table (LINT0)	*/
+#define APIC_OFFSET_LINT1_LVT	0x360	/* Local Vector Table (LINT1)	*/
+#define APIC_OFFSET_ERROR_LVT	0x370	/* Local Vector Table (ERROR)	*/
+#define APIC_OFFSET_TIMER_ICR	0x380	/* Timer's Initial Count	*/
+#define APIC_OFFSET_TIMER_CCR	0x390	/* Timer's Current Count	*/
+#define APIC_OFFSET_TIMER_DCR	0x3E0	/* Timer's Divide Configuration	*/
+#define	APIC_OFFSET_SELF_IPI	0x3F0	/* Self IPI register */
+
+/*
+ * 16 priority levels with at most one vector injected per level.
+ */
+#define	ISRVEC_STK_SIZE		(16 + 1)
+
+#define VLAPIC_MAXLVT_INDEX	APIC_LVT_CMCI
+
+struct vlapic;
+
+struct pir_desc {
+	uint64_t pir[4];
+	uint64_t pending;
+	uint64_t unused[3];
+} __aligned(64);
+
+struct vlapic_ops {
+	int (*apicv_set_intr_ready)
+		(struct vlapic *vlapic, int vector, bool level);
+	int (*apicv_pending_intr)(struct vlapic *vlapic, int *vecptr);
+	void (*apicv_intr_accepted)(struct vlapic *vlapic, int vector);
+	void (*apicv_post_intr)(struct vlapic *vlapic, int hostcpu);
+	void (*apicv_set_tmr)(struct vlapic *vlapic, int vector, bool level);
+	void (*apicv_batch_set_tmr)(struct vlapic *vlapic);
+	void (*enable_x2apic_mode)(struct vlapic *vlapic);
+};
+
+struct vlapic {
+	struct vm		*vm;
+	struct vcpu		*vcpu;
+	struct lapic		*apic_page;
+	struct pir_desc		*pir_desc;
+	struct vlapic_ops	ops;
+
+	uint32_t		esr_pending;
+	int			esr_firing;
+
+	struct callout	callout;	/* vlapic timer */
+	struct bintime	timer_fire_bt;	/* callout expiry time */
+	struct bintime	timer_freq_bt;	/* timer frequency */
+	struct bintime	timer_period_bt; /* timer period */
+	long		last_timer;	/* the last timer id */
+
+	spinlock_t	timer_mtx;
+
+	/*
+	 * The 'isrvec_stk' is a stack of vectors injected by the local apic.
+	 * A vector is popped from the stack when the processor does an EOI.
+	 * The vector on the top of the stack is used to compute the
+	 * Processor Priority in conjunction with the TPR.
+	 */
+	uint8_t		isrvec_stk[ISRVEC_STK_SIZE];
+	int		isrvec_stk_top;
+
+	uint64_t	msr_apicbase;
+
+	/*
+	 * Copies of some registers in the virtual APIC page. We do this for
+	 * a couple of different reasons:
+	 * - to be able to detect what changed (e.g. svr_last)
+	 * - to maintain a coherent snapshot of the register (e.g. lvt_last)
+	 */
+	uint32_t	svr_last;
+	uint32_t	lvt_last[VLAPIC_MAXLVT_INDEX + 1];
+	struct pir_desc	pir;
+};
+
+void vlapic_cleanup(struct vlapic *vlapic);
+
+#endif	/* _VLAPIC_PRIV_H_ */
--- a/arch/x86/guest/vm.c
+++ b/arch/x86/guest/vm.c
@@ -0,0 +1,324 @@
+/*
+ * Copyright (C) 2018 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <hv_lib.h>
+#include <acrn_common.h>
+#include <hv_arch.h>
+#include <bsp_extern.h>
+#include <hv_debug.h>
+
+/* Local variables */
+
+/* VMs list */
+struct list_head vm_list = {
+	.next = &vm_list,
+	.prev = &vm_list,
+};
+
+/* Lock for VMs list */
+spinlock_t vm_list_lock = {
+	.head = 0,
+	.tail = 0
+};
+
+/* used for vmid allocation. And this means the max vm number is 64 */
+static unsigned long vmid_bitmap;
+
+static void init_vm(struct vm_description *vm_desc,
+		struct vm *vm_handle)
+{
+	/* Populate VM attributes from VM description */
+	vm_handle->hw.num_vcpus = vm_desc->vm_hw_num_cores;
+	vm_handle->state_info.privilege = vm_desc->vm_state_info_privilege;
+	vm_handle->state_info.boot_count = 0;
+}
+
+/* return a pointer to the virtual machine structure associated with
+ * this VM ID
+ */
+struct vm *get_vm_from_vmid(int vm_id)
+{
+	struct vm *vm = NULL;
+	struct list_head *pos;
+
+	spinlock_obtain(&vm_list_lock);
+	list_for_each(pos, &vm_list) {
+		vm = list_entry(pos, struct vm, list);
+		if (vm->attr.id == vm_id) {
+			spinlock_release(&vm_list_lock);
+			return vm;
+		}
+	}
+	spinlock_release(&vm_list_lock);
+
+	return NULL;
+}
+
+int create_vm(struct vm_description *vm_desc, struct vm **rtn_vm)
+{
+	unsigned int id;
+	struct vm *vm;
+	int status = 0;
+
+	if ((vm_desc == NULL) || (rtn_vm == NULL))
+		status = -EINVAL;
+
+	if (status == 0) {
+		/* Allocate memory for virtual machine */
+		vm = calloc(1, sizeof(struct vm));
+		ASSERT(vm != NULL, "vm allocation failed");
+
+		/*
+		 * Map Virtual Machine to its VM Description
+		 */
+		init_vm(vm_desc, vm);
+
+
+		/* Init mmio list */
+		INIT_LIST_HEAD(&vm->mmio_list);
+
+		if (vm->hw.num_vcpus == 0)
+			vm->hw.num_vcpus = phy_cpu_num;
+
+		vm->hw.vcpu_array =
+			calloc(1, sizeof(struct vcpu *) * vm->hw.num_vcpus);
+		ASSERT(vm->hw.vcpu_array != NULL,
+			"vcpu_array allocation failed");
+
+		for (id = 0; id < sizeof(long) * 8; id++)
+			if (bitmap_test_and_set(id, &vmid_bitmap) == 0)
+				break;
+		vm->attr.id = vm->attr.boot_idx = id;
+		snprintf(&vm->attr.name[0], MAX_VM_NAME_LEN, "vm_%d",
+			vm->attr.id);
+
+		atomic_store_rel_int(&vm->hw.created_vcpus, 0);
+
+		/* gpa_lowtop are used for system start up */
+		vm->hw.gpa_lowtop = 0;
+		/* Only for SOS: Configure VM software information */
+		/* For UOS: This VM software information is configure in DM */
+		if (is_vm0(vm)) {
+			prepare_vm0_memmap_and_e820(vm);
+#ifndef CONFIG_EFI_STUB
+			status = init_vm0_boot_info(vm);
+#endif
+		} else {
+			/* populate UOS vm fields according to vm_desc */
+			vm->secure_world_enabled =
+				vm_desc->secure_world_enabled;
+			memcpy_s(&vm->GUID[0], sizeof(vm->GUID),
+						&vm_desc->GUID[0],
+						sizeof(vm_desc->GUID));
+		}
+
+		INIT_LIST_HEAD(&vm->list);
+		spinlock_obtain(&vm_list_lock);
+		list_add(&vm->list, &vm_list);
+		spinlock_release(&vm_list_lock);
+
+		/* Ensure VM software information obtained */
+		if (status == 0) {
+
+			/* Set up IO bit-mask such that VM exit occurs on
+			 * selected IO ranges
+			 */
+			setup_io_bitmap(vm);
+
+			/* Create virtual uart */
+			if (is_vm0(vm))
+				vm->vuart = vuart_init(vm);
+
+			vm->vpic = vpic_init(vm);
+
+			/* vpic wire_mode default is INTR */
+			vm->vpic_wire_mode = VPIC_WIRE_INTR;
+
+			/* Allocate full emulated vIOAPIC instance */
+			vm->arch_vm.virt_ioapic = vioapic_init(vm);
+
+			/* Populate return VM handle */
+			*rtn_vm = vm;
+			ptdev_vm_init(vm);
+			vm->sw.req_buf = 0;
+
+			vm->state = VM_CREATED;
+		}
+
+	}
+
+	/* Return status to caller */
+	return status;
+}
+
+int shutdown_vm(struct vm *vm)
+{
+	int i, status = 0;
+	struct vcpu *vcpu = NULL;
+
+	if (vm == NULL)
+		return -EINVAL;
+
+	pause_vm(vm);
+
+	/* Only allow shutdown paused vm */
+	if (vm->state != VM_PAUSED)
+		return -EINVAL;
+
+	foreach_vcpu(i, vm, vcpu) {
+		reset_vcpu(vcpu);
+		destroy_vcpu(vcpu);
+	}
+
+	spinlock_obtain(&vm_list_lock);
+	list_del_init(&vm->list);
+	spinlock_release(&vm_list_lock);
+
+	ptdev_vm_deinit(vm);
+
+	/* cleanup and free vioapic */
+	vioapic_cleanup(vm->arch_vm.virt_ioapic);
+
+	/* Free EPT allocated resources assigned to VM */
+	destroy_ept(vm);
+
+	/* Free MSR bitmap */
+	free(vm->arch_vm.msr_bitmap);
+
+	/* TODO: De-initialize I/O Emulation */
+	free_io_emulation_resource(vm);
+
+	/* Free iommu_domain */
+	if (vm->iommu_domain)
+		destroy_iommu_domain(vm->iommu_domain);
+
+	bitmap_clr(vm->attr.id, &vmid_bitmap);
+
+	if (vm->vpic)
+		vpic_cleanup(vm);
+
+	free(vm->hw.vcpu_array);
+
+	/* TODO: De-Configure HV-SW */
+	/* Deallocate VM */
+	free(vm);
+
+	/* Return status to caller */
+	return status;
+}
+
+int start_vm(struct vm *vm)
+{
+	struct vcpu *vcpu = NULL;
+
+	vm->state = VM_STARTED;
+
+	/* Only start BSP (vid = 0) and let BSP start other APs */
+	vcpu = vcpu_from_vid(vm, 0);
+	ASSERT(vcpu != NULL, "vm%d, vcpu0", vm->attr.id);
+	schedule_vcpu(vcpu);
+
+	return 0;
+}
+
+/*
+ * DM only pause vm for shutdown/reboot. If we need to
+ * extend the pause vm for DM, this API should be extended.
+ */
+int pause_vm(struct vm *vm)
+{
+	int i;
+	struct vcpu *vcpu = NULL;
+
+	if (vm->state == VM_PAUSED)
+		return 0;
+
+	vm->state = VM_PAUSED;
+
+	foreach_vcpu(i, vm, vcpu)
+		pause_vcpu(vcpu, VCPU_ZOMBIE);
+
+	return 0;
+}
+
+int vm_resume(struct vm *vm)
+{
+	int i;
+	struct vcpu *vcpu = NULL;
+
+	foreach_vcpu(i, vm, vcpu)
+		resume_vcpu(vcpu);
+
+	vm->state = VM_STARTED;
+
+	return 0;
+}
+
+/* Finally, we will remove the array and only maintain vm0 desc */
+struct vm_description *get_vm_desc(int idx)
+{
+	struct vm_description_array *vm_desc_array;
+
+	/* Obtain base of user defined VM description array data
+	 * structure
+	 */
+	vm_desc_array = (struct vm_description_array *)get_vm_desc_base();
+	/* Obtain VM description array base */
+	if (idx >= vm_desc_array->num_vm_desc)
+		return NULL;
+	else
+		return &vm_desc_array->vm_desc_array[idx];
+}
+
+/* Create vm/vcpu for vm0 */
+int prepare_vm0(void)
+{
+	int i, ret;
+	struct vm *vm = NULL;
+	struct vm_description *vm_desc = NULL;
+
+	vm_desc = get_vm_desc(0);
+	ASSERT(vm_desc, "get vm desc failed");
+	ret = create_vm(vm_desc, &vm);
+	ASSERT(ret == 0, "VM creation failed!");
+
+	prepare_vcpu(vm, vm_desc->vm_hw_logical_core_ids[0]);
+
+	/* Prepare the AP for vm0 */
+	for (i = 1; i < vm_desc->vm_hw_num_cores; i++)
+		prepare_vcpu(vm, vm_desc->vm_hw_logical_core_ids[i]);
+
+	/* start vm0 BSP automatically */
+	start_vm(vm);
+
+	pr_fatal("Start VM0");
+
+	return 0;
+}
--- a/arch/x86/guest/vmcall.c
+++ b/arch/x86/guest/vmcall.c
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2018 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <hv_lib.h>
+#include <acrn_common.h>
+#include <hv_arch.h>
+#include <hv_debug.h>
+#include <acrn_hv_defs.h>
+#include <hypercall.h>
+
+int vmcall_handler(struct vcpu *vcpu)
+{
+	int64_t ret = 0;
+	struct vm *vm = vcpu->vm;
+	struct run_context *cur_context =
+		&vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context];
+	/* hypercall ID from guest*/
+	uint64_t hypcall_id = cur_context->guest_cpu_regs.regs.r8;
+	/* hypercall param1 from guest*/
+	uint64_t param1 = cur_context->guest_cpu_regs.regs.rdi;
+	/* hypercall param2 from guest*/
+	uint64_t param2 = cur_context->guest_cpu_regs.regs.rsi;
+	/* hypercall param3 from guest, reserved*/
+	/* uint64_t param3 = cur_context->guest_cpu_regs.regs.rdx; */
+	/* hypercall param4 from guest, reserved*/
+	/* uint64_t param4 = cur_context->guest_cpu_regs.regs.rcx; */
+
+	/* Dispatch the hypercall handler */
+	switch (hypcall_id) {
+	case HC_GET_API_VERSION:
+		ret = hcall_get_api_version(vm, param1);
+		break;
+
+	case HC_CREATE_VM:
+		ret = hcall_create_vm(vm, param1);
+		break;
+
+	case HC_DESTROY_VM:
+		ret = hcall_destroy_vm(param1);
+		break;
+
+	case HC_START_VM:
+		ret = hcall_resume_vm(param1);
+		break;
+
+	case HC_PAUSE_VM:
+		ret = hcall_pause_vm(param1);
+		break;
+
+	case HC_CREATE_VCPU:
+		ret = hcall_create_vcpu(vm, param1, param2);
+		break;
+
+	case HC_ASSERT_IRQLINE:
+		ret = hcall_assert_irqline(vm, param1, param2);
+		break;
+
+	case HC_DEASSERT_IRQLINE:
+		ret = hcall_deassert_irqline(vm, param1, param2);
+		break;
+
+	case HC_PULSE_IRQLINE:
+		ret = hcall_pulse_irqline(vm, param1, param2);
+		break;
+
+	case HC_INJECT_MSI:
+		ret = hcall_inject_msi(vm, param1, param2);
+		break;
+
+	case HC_SET_IOREQ_BUFFER:
+		ret = hcall_set_ioreq_buffer(vm, param1, param2);
+		break;
+
+	case HC_NOTIFY_REQUEST_FINISH:
+		ret = hcall_notify_req_finish(param1, param2);
+		break;
+
+	case HC_VM_SET_MEMMAP:
+		ret = hcall_set_vm_memmap(vm, param1, param2);
+		break;
+
+	case HC_VM_PCI_MSIX_REMAP:
+		ret = hcall_remap_pci_msix(vm, param1, param2);
+		break;
+
+	case HC_VM_GPA2HPA:
+		ret = hcall_gpa_to_hpa(vm, param1, param2);
+		break;
+
+	case HC_ASSIGN_PTDEV:
+		ret = hcall_assign_ptdev(vm, param1, param2);
+		break;
+
+	case HC_DEASSIGN_PTDEV:
+		ret = hcall_deassign_ptdev(vm, param1, param2);
+		break;
+
+	case HC_SET_PTDEV_INTR_INFO:
+		ret = hcall_set_ptdev_intr_info(vm, param1, param2);
+		break;
+
+	case HC_RESET_PTDEV_INTR_INFO:
+		ret = hcall_reset_ptdev_intr_info(vm, param1, param2);
+		break;
+
+	case HC_SETUP_SBUF:
+		ret = hcall_setup_sbuf(vm, param1);
+		break;
+
+	default:
+		pr_err("op %d: Invalid hypercall\n", hypcall_id);
+		ret = -1;
+		break;
+	}
+
+	cur_context->guest_cpu_regs.regs.rax = ret;
+
+	TRACE_2L(TRC_VMEXIT_VMCALL, vm->attr.id, hypcall_id);
+
+	return 0;
+}
--- a/arch/x86/guest/vmsr.c
+++ b/arch/x86/guest/vmsr.c
@@ -0,0 +1,321 @@
+/*
+ * Copyright (C) 2018 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <hv_lib.h>
+#include <acrn_common.h>
+#include <hv_arch.h>
+#include <hv_debug.h>
+
+/*MRS need to be emulated, the order in this array better as freq of ops*/
+static const uint32_t emulated_msrs[] = {
+	MSR_IA32_TSC_DEADLINE,  /* Enable TSC_DEADLINE VMEXIT */
+
+/* following MSR not emulated now */
+/*
+ *	MSR_IA32_APIC_BASE,
+ *	MSR_IA32_SYSENTER_CS,
+ *	MSR_IA32_SYSENTER_ESP,
+ *	MSR_IA32_SYSENTER_EIP,
+ *	MSR_IA32_TSC_AUX,
+ *	MSR_IA32_TIME_STAMP_COUNTER,
+ */
+};
+
+/* the index is matched with emulated msrs array*/
+enum {
+	IDX_TSC_DEADLINE,
+
+	IDX_MAX_MSR
+};
+
+static void enable_msr_interception(uint8_t *bitmap, uint32_t msr)
+{
+	uint8_t *read_map;
+	uint8_t *write_map;
+	uint8_t value;
+	/* low MSR */
+	if (msr < 0x1FFF) {
+		read_map = bitmap;
+		write_map = bitmap + 2048;
+	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
+		read_map = bitmap + 1024;
+		write_map = bitmap + 3072;
+	} else {
+		pr_err("Invalid MSR");
+		return;
+	}
+
+	msr &= 0x1FFF;
+	value = read_map[(msr>>3)];
+	value |= 1<<(msr%8);
+	/* right now we trap for both r/w */
+	read_map[(msr>>3)] = value;
+	write_map[(msr>>3)] = value;
+}
+
+/* not used now just leave it for some cases it may be used as API*/
+void disable_msr_interception(uint8_t *bitmap, uint32_t msr)
+{
+	uint8_t *read_map;
+	uint8_t *write_map;
+	uint8_t value;
+	/* low MSR */
+	if (msr < 0x1FFF) {
+		read_map = bitmap;
+		write_map = bitmap + 2048;
+	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
+		read_map = bitmap + 1024;
+		write_map = bitmap + 3072;
+	} else {
+		pr_err("Invalid MSR");
+		return;
+	}
+
+	msr &= 0x1FFF;
+	value = read_map[(msr>>3)];
+	value &= ~(1<<(msr%8));
+	/* right now we trap for both r/w */
+	read_map[(msr>>3)] = value;
+	write_map[(msr>>3)] = value;
+}
+
+void init_msr_emulation(struct vcpu *vcpu)
+{
+	uint32_t i = 0;
+	uint32_t msrs_count =  ARRAY_SIZE(emulated_msrs);
+	void *msr_bitmap;
+	uint64_t value64;
+
+	ASSERT(msrs_count == IDX_MAX_MSR,
+		"MSR ID should be matched with emulated_msrs");
+
+	/*msr bitmap, just allocated/init once, and used for all vm's vcpu*/
+	if (is_vcpu_bsp(vcpu)) {
+
+		/* Allocate and initialize memory for MSR bitmap region*/
+		vcpu->vm->arch_vm.msr_bitmap = alloc_page();
+		ASSERT(vcpu->vm->arch_vm.msr_bitmap, "");
+		memset(vcpu->vm->arch_vm.msr_bitmap, 0x0, CPU_PAGE_SIZE);
+
+		msr_bitmap = vcpu->vm->arch_vm.msr_bitmap;
+
+		for (i = 0; i < msrs_count; i++)
+			enable_msr_interception(msr_bitmap, emulated_msrs[i]);
+
+		/* below MSR protected from guest OS, if access to inject gp*/
+		enable_msr_interception(msr_bitmap, MSR_IA32_MTRR_CAP);
+		enable_msr_interception(msr_bitmap, MSR_IA32_MTRR_DEF_TYPE);
+
+		for (i = MSR_IA32_MTRR_PHYSBASE_0;
+			i <= MSR_IA32_MTRR_PHYSMASK_9; i++) {
+			enable_msr_interception(msr_bitmap, i);
+		}
+
+		enable_msr_interception(msr_bitmap, MSR_IA32_MTRR_FIX64K_00000);
+		enable_msr_interception(msr_bitmap, MSR_IA32_MTRR_FIX16K_80000);
+		enable_msr_interception(msr_bitmap, MSR_IA32_MTRR_FIX16K_A0000);
+
+		for (i = MSR_IA32_MTRR_FIX4K_C0000;
+			i <= MSR_IA32_MTRR_FIX4K_F8000; i++) {
+			enable_msr_interception(msr_bitmap, i);
+		}
+	}
+
+	/* Set up MSR bitmap - pg 2904 24.6.9 */
+	value64 = (int64_t) vcpu->vm->arch_vm.msr_bitmap;
+	exec_vmwrite64(VMX_MSR_BITMAP_FULL, value64);
+	pr_dbg("VMX_MSR_BITMAP: 0x%016llx ", value64);
+
+	vcpu->guest_msrs = (uint64_t *)calloc(msrs_count, sizeof(uint64_t));
+
+	ASSERT(vcpu->guest_msrs != NULL, "");
+	memset(vcpu->guest_msrs, 0, msrs_count * sizeof(uint64_t));
+}
+
+int rdmsr_handler(struct vcpu *vcpu)
+{
+	uint32_t msr;
+	uint64_t v = 0;
+	uint32_t id;
+	int cur_context = vcpu->arch_vcpu.cur_context;
+
+	/* Read the msr value */
+	msr = vcpu->arch_vcpu.contexts[cur_context].guest_cpu_regs.regs.rcx;
+
+	/* Do the required processing for each msr case */
+	switch (msr) {
+	case MSR_IA32_TSC_DEADLINE:
+	{
+		v = vcpu->guest_msrs[IDX_TSC_DEADLINE];
+		break;
+	}
+
+	case MSR_IA32_MTRR_CAP:
+	case MSR_IA32_MTRR_DEF_TYPE:
+	case MSR_IA32_MTRR_PHYSBASE_0 ... MSR_IA32_MTRR_PHYSMASK_9:
+	case MSR_IA32_MTRR_FIX64K_00000 ... MSR_IA32_MTRR_FIX4K_F8000:
+	{
+		vcpu_inject_gp(vcpu);
+		break;
+	}
+
+	/* following MSR not emulated now just left for future */
+	case MSR_IA32_SYSENTER_CS:
+	{
+		v = exec_vmread(VMX_GUEST_IA32_SYSENTER_CS);
+		break;
+	}
+	case MSR_IA32_SYSENTER_ESP:
+	{
+		v = exec_vmread(VMX_GUEST_IA32_SYSENTER_ESP);
+		break;
+	}
+	case MSR_IA32_SYSENTER_EIP:
+	{
+		v = exec_vmread(VMX_GUEST_IA32_SYSENTER_EIP);
+		break;
+	}
+	case MSR_IA32_TSC_AUX:
+	{
+		v = vcpu->arch_vcpu.msr_tsc_aux;
+		break;
+	}
+	case MSR_IA32_TIME_STAMP_COUNTER:
+	{
+		/* Read the host TSC value */
+		CPU_RDTSCP_EXECUTE(&v, &id);
+
+		/* Add the TSC_offset to host TSC and return the value */
+		v += exec_vmread64(VMX_TSC_OFFSET_FULL);
+		break;
+	}
+	case MSR_IA32_APIC_BASE:
+	{
+		bool ret;
+		/* Read APIC base */
+		vlapic_rdmsr(vcpu, msr, &v, &ret);
+		break;
+	}
+	default:
+	{
+		pr_warn("rdmsr: %lx should not come here!", msr);
+		v = 0;
+		break;
+	}
+	}
+
+	/* Store the MSR contents in RAX and RDX */
+	vcpu->arch_vcpu.contexts[cur_context].guest_cpu_regs.regs.rax =
+					v & 0xffffffff;
+	vcpu->arch_vcpu.contexts[cur_context].guest_cpu_regs.regs.rdx = v >> 32;
+
+	TRACE_2L(TRC_VMEXIT_RDMSR, msr, v);
+
+	return 0;
+}
+
+int wrmsr_handler(struct vcpu *vcpu)
+{
+	uint32_t msr;
+	uint64_t v;
+	struct run_context *cur_context =
+		&vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context];
+
+	/* Read the MSR ID */
+	msr = cur_context->guest_cpu_regs.regs.rcx;
+
+	/* Get the MSR contents */
+	v = (((uint64_t) cur_context->guest_cpu_regs.regs.rdx) << 32) |
+	    ((uint64_t) cur_context->guest_cpu_regs.regs.rax);
+
+	/* Do the required processing for each msr case */
+	switch (msr) {
+	case MSR_IA32_TSC_DEADLINE:
+	{
+		bool ret;
+		/* Write APIC base */
+		vlapic_wrmsr(vcpu, msr, v, &ret);
+		vcpu->guest_msrs[IDX_TSC_DEADLINE] = v;
+		break;
+	}
+	case MSR_IA32_MTRR_CAP:
+	case MSR_IA32_MTRR_DEF_TYPE:
+	case MSR_IA32_MTRR_PHYSBASE_0 ... MSR_IA32_MTRR_PHYSMASK_9:
+	case MSR_IA32_MTRR_FIX64K_00000 ... MSR_IA32_MTRR_FIX4K_F8000:
+	{
+		vcpu_inject_gp(vcpu);
+		break;
+	}
+
+	/* following MSR not emulated now just left for future */
+	case MSR_IA32_SYSENTER_CS:
+	{
+		exec_vmwrite(VMX_GUEST_IA32_SYSENTER_CS, v);
+		break;
+	}
+	case MSR_IA32_SYSENTER_ESP:
+	{
+		exec_vmwrite(VMX_GUEST_IA32_SYSENTER_ESP, v);
+		break;
+	}
+	case MSR_IA32_SYSENTER_EIP:
+	{
+		exec_vmwrite(VMX_GUEST_IA32_SYSENTER_EIP, v);
+		break;
+	}
+	case MSR_IA32_GS_BASE:
+	{
+		exec_vmwrite(VMX_GUEST_GS_BASE, v);
+		break;
+	}
+	case MSR_IA32_TSC_AUX:
+	{
+		vcpu->arch_vcpu.msr_tsc_aux = v;
+		break;
+	}
+	case MSR_IA32_APIC_BASE:
+	{
+		bool ret;
+		/* Write APIC base */
+		vlapic_wrmsr(vcpu, msr, v, &ret);
+		break;
+	}
+	default:
+	{
+		ASSERT(0, "wrmsr: %lx should not come here!", msr);
+		msr_write(msr, v);
+		break;
+	}
+	}
+
+	TRACE_2L(TRC_VMEXIT_WRMSR, msr, v);
+
+	return 0;
+}
--- a/arch/x86/guest/vpic.c
+++ b/arch/x86/guest/vpic.c
@@ -0,0 +1,950 @@
+/*-
+ * Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
+ * Copyright (c) 2017 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#define pr_fmt(fmt)	"vpic: " fmt
+
+#include <hypervisor.h>
+#include <hv_lib.h>
+#include <acrn_common.h>
+#include <hv_arch.h>
+#include <hv_debug.h>
+
+#define	VPIC_LOCK_INIT(vpic)	spinlock_init(&((vpic)->lock))
+#define	VPIC_LOCK(vpic)		spinlock_obtain(&((vpic)->lock))
+#define	VPIC_UNLOCK(vpic)	spinlock_release(&((vpic)->lock))
+/* TODO: add spinlock_locked support? */
+/*#define VPIC_LOCKED(vpic)	spinlock_locked(&((vpic)->lock))*/
+
+#define vm_pic(vm)	(vm->vpic)
+
+#define true                                          1
+#define false                                         0
+
+#define ACRN_DBG_PIC	6
+
+enum irqstate {
+	IRQSTATE_ASSERT,
+	IRQSTATE_DEASSERT,
+	IRQSTATE_PULSE
+};
+
+struct pic {
+	bool		ready;
+	int		icw_num;
+	int		rd_cmd_reg;
+
+	bool		aeoi;
+	bool		poll;
+	bool		rotate;
+	bool		sfn;		/* special fully-nested mode */
+
+	int		irq_base;
+	uint8_t		request;	/* Interrupt Request Register (IIR) */
+	uint8_t		service;	/* Interrupt Service (ISR) */
+	uint8_t		mask;		/* Interrupt Mask Register (IMR) */
+	uint8_t		smm;		/* special mask mode */
+
+	int		acnt[8];	/* sum of pin asserts and deasserts */
+	int		lowprio;	/* lowest priority irq */
+
+	bool		intr_raised;
+	uint8_t		elc;
+};
+
+struct vpic {
+	struct vm		*vm;
+	spinlock_t	lock;
+	struct pic	pic[2];
+};
+
+/*
+ * Loop over all the pins in priority order from highest to lowest.
+ */
+#define	PIC_PIN_FOREACH(pinvar, pic, tmpvar)			\
+	for (tmpvar = 0, pinvar = (pic->lowprio + 1) & 0x7;	\
+	    tmpvar < 8;						\
+	    tmpvar++, pinvar = (pinvar + 1) & 0x7)
+
+static void vpic_set_pinstate(struct vpic *vpic, int pin, bool newstate);
+
+static inline bool master_pic(struct vpic *vpic, struct pic *pic)
+{
+
+	if (pic == &vpic->pic[0])
+		return true;
+	else
+		return false;
+}
+
+static inline int vpic_get_highest_isrpin(struct pic *pic)
+{
+	int bit, pin;
+	int i;
+
+	PIC_PIN_FOREACH(pin, pic, i) {
+		bit = (1 << pin);
+
+		if (pic->service & bit) {
+			/*
+			 * An IS bit that is masked by an IMR bit will not be
+			 * cleared by a non-specific EOI in Special Mask Mode.
+			 */
+			if (pic->smm && (pic->mask & bit) != 0)
+				continue;
+			else
+				return pin;
+		}
+	}
+
+	return -1;
+}
+
+static inline int vpic_get_highest_irrpin(struct pic *pic)
+{
+	int serviced;
+	int bit, pin, tmp;
+
+	/*
+	 * In 'Special Fully-Nested Mode' when an interrupt request from
+	 * a slave is in service, the slave is not locked out from the
+	 * master's priority logic.
+	 */
+	serviced = pic->service;
+	if (pic->sfn)
+		serviced &= ~(1 << 2);
+
+	/*
+	 * In 'Special Mask Mode', when a mask bit is set in OCW1 it inhibits
+	 * further interrupts at that level and enables interrupts from all
+	 * other levels that are not masked. In other words the ISR has no
+	 * bearing on the levels that can generate interrupts.
+	 */
+	if (pic->smm)
+		serviced = 0;
+
+	PIC_PIN_FOREACH(pin, pic, tmp) {
+		bit = 1 << pin;
+
+		/*
+		 * If there is already an interrupt in service at the same
+		 * or higher priority then bail.
+		 */
+		if ((serviced & bit) != 0)
+			break;
+
+		/*
+		 * If an interrupt is asserted and not masked then return
+		 * the corresponding 'pin' to the caller.
+		 */
+		if ((pic->request & bit) != 0 && (pic->mask & bit) == 0)
+			return pin;
+	}
+
+	return -1;
+}
+
+static void vpic_notify_intr(struct vpic *vpic)
+{
+	struct pic *pic;
+	int pin;
+
+	/*
+	 * First check the slave.
+	 */
+	pic = &vpic->pic[1];
+	pin = vpic_get_highest_irrpin(pic);
+	if (!pic->intr_raised && pin != -1) {
+		dev_dbg(ACRN_DBG_PIC,
+		"pic slave notify pin = %d (imr 0x%x irr 0x%x isr 0x%x)\n",
+		pin, pic->mask, pic->request, pic->service);
+
+		/*
+		 * Cascade the request from the slave to the master.
+		 */
+		pic->intr_raised = true;
+		vpic_set_pinstate(vpic, 2, true);
+		vpic_set_pinstate(vpic, 2, false);
+	} else {
+		dev_dbg(ACRN_DBG_PIC,
+		"pic slave no eligible interrupt (imr 0x%x irr 0x%x isr 0x%x)",
+		pic->mask, pic->request, pic->service);
+	}
+
+	/*
+	 * Then check the master.
+	 */
+	pic = &vpic->pic[0];
+	pin = vpic_get_highest_irrpin(pic);
+	if (!pic->intr_raised && pin != -1) {
+		dev_dbg(ACRN_DBG_PIC,
+		"pic master notify pin = %d (imr 0x%x irr 0x%x isr 0x%x)\n",
+		pin, pic->mask, pic->request, pic->service);
+
+		/*
+		 * From Section 3.6.2, "Interrupt Modes", in the
+		 * MPtable Specification, Version 1.4
+		 *
+		 * PIC interrupts are routed to both the Local APIC
+		 * and the I/O APIC to support operation in 1 of 3
+		 * modes.
+		 *
+		 * 1. Legacy PIC Mode: the PIC effectively bypasses
+		 * all APIC components.  In this mode the local APIC is
+		 * disabled and LINT0 is reconfigured as INTR to
+		 * deliver the PIC interrupt directly to the CPU.
+		 *
+		 * 2. Virtual Wire Mode: the APIC is treated as a
+		 * virtual wire which delivers interrupts from the PIC
+		 * to the CPU.  In this mode LINT0 is programmed as
+		 * ExtINT to indicate that the PIC is the source of
+		 * the interrupt.
+		 *
+		 * 3. Virtual Wire Mode via I/O APIC: PIC interrupts are
+		 * fielded by the I/O APIC and delivered to the appropriate
+		 * CPU.  In this mode the I/O APIC input 0 is programmed
+		 * as ExtINT to indicate that the PIC is the source of the
+		 * interrupt.
+		 */
+		pic->intr_raised = true;
+		if (vpic->vm->vpic_wire_mode == VPIC_WIRE_INTR) {
+			struct vcpu *vcpu = vcpu_from_vid(vpic->vm, 0);
+
+			ASSERT(vcpu != NULL, "vm%d, vcpu0", vpic->vm->attr.id);
+			vcpu_inject_extint(vcpu);
+		} else {
+			vlapic_set_local_intr(vpic->vm, -1, APIC_LVT_LINT0);
+			/* notify vioapic pin0 if existing
+			 * For vPIC + vIOAPIC mode, vpic master irq connected
+			 * to vioapic pin0 (irq2)
+			 * From MPSpec session 5.1
+			 */
+			vioapic_pulse_irq(vpic->vm, 0);
+		}
+	} else {
+		dev_dbg(ACRN_DBG_PIC,
+		"pic master no eligible interrupt (imr 0x%x irr 0x%x isr 0x%x)",
+		pic->mask, pic->request, pic->service);
+	}
+}
+
+static int vpic_icw1(__unused struct vpic *vpic, struct pic *pic, uint8_t val)
+{
+	dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic icw1 0x%x\n",
+		vpic->vm, val);
+
+	pic->ready = false;
+
+	pic->icw_num = 1;
+	pic->request = 0;
+	pic->mask = 0;
+	pic->lowprio = 7;
+	pic->rd_cmd_reg = 0;
+	pic->poll = 0;
+	pic->smm = 0;
+
+	if ((val & ICW1_SNGL) != 0) {
+		dev_dbg(ACRN_DBG_PIC, "vpic cascade mode required\n");
+		return -1;
+	}
+
+	if ((val & ICW1_IC4) == 0) {
+		dev_dbg(ACRN_DBG_PIC, "vpic icw4 required\n");
+		return -1;
+	}
+
+	pic->icw_num++;
+
+	return 0;
+}
+
+static int vpic_icw2(__unused struct vpic *vpic, struct pic *pic, uint8_t val)
+{
+	dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic icw2 0x%x\n",
+		vpic->vm, val);
+
+	pic->irq_base = val & 0xf8;
+
+	pic->icw_num++;
+
+	return 0;
+}
+
+static int vpic_icw3(__unused struct vpic *vpic, struct pic *pic,
+		__unused uint8_t val)
+{
+	dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic icw3 0x%x\n",
+		vpic->vm, val);
+
+	pic->icw_num++;
+
+	return 0;
+}
+
+static int vpic_icw4(struct vpic *vpic, struct pic *pic, uint8_t val)
+{
+	dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic icw4 0x%x\n",
+		vpic->vm, val);
+
+	if ((val & ICW4_8086) == 0) {
+		dev_dbg(ACRN_DBG_PIC,
+			"vpic microprocessor mode required\n");
+		return -1;
+	}
+
+	if ((val & ICW4_AEOI) != 0)
+		pic->aeoi = true;
+
+	if ((val & ICW4_SFNM) != 0) {
+		if (master_pic(vpic, pic)) {
+			pic->sfn = true;
+		} else {
+			dev_dbg(ACRN_DBG_PIC,
+			"Ignoring special fully nested mode on slave pic: %#x",
+			val);
+		}
+	}
+
+	pic->icw_num = 0;
+	pic->ready = true;
+
+	return 0;
+}
+
+bool vpic_is_pin_mask(struct vpic *vpic, uint8_t virt_pin)
+{
+	struct pic *pic;
+
+	if (virt_pin < 8)
+		pic = &vpic->pic[0];
+	else if (virt_pin < 16) {
+		pic = &vpic->pic[1];
+		virt_pin -= 8;
+	} else
+		return true;
+
+	if (pic->mask & (1 << virt_pin))
+		return true;
+	else
+		return false;
+}
+
+static int vpic_ocw1(struct vpic *vpic, struct pic *pic, uint8_t val)
+{
+	int pin, i, bit;
+	uint8_t old = pic->mask;
+
+	dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic ocw1 0x%x\n",
+		vpic->vm, val);
+
+	pic->mask = val & 0xff;
+
+	/* query and setup if pin/irq is for passthrough device */
+	PIC_PIN_FOREACH(pin, pic, i) {
+		bit = (1 << pin);
+
+		/* remap for active: interrupt mask -> unmask
+		 * remap for deactive: when vIOAPIC take it over
+		 */
+		if (((pic->mask & bit) == 0) && (old & bit)) {
+			struct ptdev_intx_info intx;
+
+			/* master pic pin2 connect with slave pic,
+			 * not device, so not need pt remap
+			 */
+			if ((pin == 2) && master_pic(vpic, pic))
+				continue;
+
+			intx.virt_pin = pin;
+			intx.vpin_src = PTDEV_VPIN_PIC;
+			if (!master_pic(vpic, pic))
+				intx.virt_pin += 8;
+			ptdev_intx_pin_remap(vpic->vm, &intx);
+		}
+	}
+
+	return 0;
+}
+
+static int vpic_ocw2(struct vpic *vpic, struct pic *pic, uint8_t val)
+{
+	dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic ocw2 0x%x\n",
+		vpic->vm, val);
+
+	pic->rotate = ((val & OCW2_R) != 0);
+
+	if ((val & OCW2_EOI) != 0) {
+		int isr_bit;
+
+		if ((val & OCW2_SL) != 0) {
+			/* specific EOI */
+			isr_bit = val & 0x7;
+		} else {
+			/* non-specific EOI */
+			isr_bit = vpic_get_highest_isrpin(pic);
+		}
+
+		if (isr_bit != -1) {
+			pic->service &= ~(1 << isr_bit);
+
+			if (pic->rotate)
+				pic->lowprio = isr_bit;
+		}
+
+		/* if level ack PTDEV */
+		if (pic->elc & (1 << (isr_bit & 0x7))) {
+			ptdev_intx_ack(vpic->vm,
+				master_pic(vpic, pic) ? isr_bit : isr_bit + 8,
+				PTDEV_VPIN_PIC);
+		}
+	} else if ((val & OCW2_SL) != 0 && pic->rotate == true) {
+		/* specific priority */
+		pic->lowprio = val & 0x7;
+	}
+
+	return 0;
+}
+
+static int vpic_ocw3(__unused struct vpic *vpic, struct pic *pic, uint8_t val)
+{
+	dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic ocw3 0x%x\n",
+		vpic->vm, val);
+
+	if (val & OCW3_ESMM) {
+		pic->smm = val & OCW3_SMM ? 1 : 0;
+		dev_dbg(ACRN_DBG_PIC, "%s pic special mask mode %s\n",
+		    master_pic(vpic, pic) ? "master" : "slave",
+		    pic->smm ?  "enabled" : "disabled");
+	}
+
+	if (val & OCW3_RR) {
+		/* read register command */
+		pic->rd_cmd_reg = val & OCW3_RIS;
+
+		/* Polling mode */
+		pic->poll = ((val & OCW3_P) != 0);
+	}
+
+	return 0;
+}
+
+static void vpic_set_pinstate(struct vpic *vpic, int pin, bool newstate)
+{
+	struct pic *pic;
+	int oldcnt, newcnt;
+	bool level;
+
+	ASSERT(pin >= 0 && pin < 16,
+	    "vpic_set_pinstate: invalid pin number");
+
+	pic = &vpic->pic[pin >> 3];
+
+	oldcnt = pic->acnt[pin & 0x7];
+	if (newstate)
+		pic->acnt[pin & 0x7]++;
+	else
+		pic->acnt[pin & 0x7]--;
+	newcnt = pic->acnt[pin & 0x7];
+
+	if (newcnt < 0) {
+		pr_warn("pic pin%d: bad acnt %d\n", pin, newcnt);
+	}
+
+	level = ((vpic->pic[pin >> 3].elc & (1 << (pin & 0x7))) != 0);
+
+	if ((oldcnt == 0 && newcnt == 1) || (newcnt > 0 && level == true)) {
+		/* rising edge or level */
+		dev_dbg(ACRN_DBG_PIC, "pic pin%d: asserted\n", pin);
+		pic->request |= (1 << (pin & 0x7));
+	} else if (oldcnt == 1 && newcnt == 0) {
+		/* falling edge */
+		dev_dbg(ACRN_DBG_PIC, "pic pin%d: deasserted\n", pin);
+		if (level)
+			pic->request &= ~(1 << (pin & 0x7));
+	} else {
+		dev_dbg(ACRN_DBG_PIC,
+			"pic pin%d: %s, ignored, acnt %d\n",
+			pin, newstate ? "asserted" : "deasserted", newcnt);
+	}
+
+	vpic_notify_intr(vpic);
+}
+
+static int vpic_set_irqstate(struct vm *vm, int irq, enum irqstate irqstate)
+{
+	struct vpic *vpic;
+	struct pic *pic;
+
+	if (irq < 0 || irq > 15)
+		return -EINVAL;
+
+	vpic = vm_pic(vm);
+	pic = &vpic->pic[irq >> 3];
+
+	if (pic->ready == false)
+		return 0;
+
+	VPIC_LOCK(vpic);
+	switch (irqstate) {
+	case IRQSTATE_ASSERT:
+		vpic_set_pinstate(vpic, irq, true);
+		break;
+	case IRQSTATE_DEASSERT:
+		vpic_set_pinstate(vpic, irq, false);
+		break;
+	case IRQSTATE_PULSE:
+		vpic_set_pinstate(vpic, irq, true);
+		vpic_set_pinstate(vpic, irq, false);
+		break;
+	default:
+		ASSERT(0, "vpic_set_irqstate: invalid irqstate");
+	}
+	VPIC_UNLOCK(vpic);
+
+	return 0;
+}
+
+/* hypervisor interface: assert/deassert/pulse irq */
+int vpic_assert_irq(struct vm *vm, int irq)
+{
+	return vpic_set_irqstate(vm, irq, IRQSTATE_ASSERT);
+}
+
+int vpic_deassert_irq(struct vm *vm, int irq)
+{
+	return vpic_set_irqstate(vm, irq, IRQSTATE_DEASSERT);
+}
+
+int vpic_pulse_irq(struct vm *vm, int irq)
+{
+	return vpic_set_irqstate(vm, irq, IRQSTATE_PULSE);
+}
+
+int vpic_set_irq_trigger(struct vm *vm, int irq, enum vpic_trigger trigger)
+{
+	struct vpic *vpic;
+
+	if (irq < 0 || irq > 15)
+		return -EINVAL;
+
+	/*
+	 * See comment in vpic_elc_handler.  These IRQs must be
+	 * edge triggered.
+	 */
+	if (trigger == LEVEL_TRIGGER) {
+		switch (irq) {
+		case 0:
+		case 1:
+		case 2:
+		case 8:
+		case 13:
+			return -EINVAL;
+		}
+	}
+
+	vpic = vm_pic(vm);
+
+	VPIC_LOCK(vpic);
+
+	if (trigger == LEVEL_TRIGGER)
+		vpic->pic[irq >> 3].elc |=  1 << (irq & 0x7);
+	else
+		vpic->pic[irq >> 3].elc &=  ~(1 << (irq & 0x7));
+
+	VPIC_UNLOCK(vpic);
+
+	return 0;
+}
+
+int vpic_get_irq_trigger(struct vm *vm, int irq, enum vpic_trigger *trigger)
+{
+	struct vpic *vpic;
+
+	if (irq < 0 || irq > 15)
+		return -EINVAL;
+
+	vpic = vm_pic(vm);
+	if (!vpic)
+		return -EINVAL;
+
+	if (vpic->pic[irq>>3].elc & (1 << (irq & 0x7)))
+		*trigger = LEVEL_TRIGGER;
+	else
+		*trigger = EDGE_TRIGGER;
+	return 0;
+}
+
+void vpic_pending_intr(struct vm *vm, int *vecptr)
+{
+	struct vpic *vpic;
+	struct pic *pic;
+	int pin;
+
+	vpic = vm_pic(vm);
+
+	pic = &vpic->pic[0];
+
+	VPIC_LOCK(vpic);
+
+	pin = vpic_get_highest_irrpin(pic);
+	if (pin == 2) {
+		pic = &vpic->pic[1];
+		pin = vpic_get_highest_irrpin(pic);
+	}
+
+	/*
+	 * If there are no pins active at this moment then return the spurious
+	 * interrupt vector instead.
+	 */
+	if (pin == -1) {
+		*vecptr = -1;
+		VPIC_UNLOCK(vpic);
+		return;
+	}
+
+	ASSERT(pin >= 0 && pin <= 7, "invalid pin");
+	*vecptr = pic->irq_base + pin;
+
+	dev_dbg(ACRN_DBG_PIC, "Got pending vector 0x%x\n", *vecptr);
+
+	VPIC_UNLOCK(vpic);
+}
+
+static void vpic_pin_accepted(struct pic *pic, int pin)
+{
+	pic->intr_raised = false;
+
+	if ((pic->elc & (1 << pin)) == 0) {
+		/*only used edge trigger mode*/
+		pic->request &= ~(1 << pin);
+	}
+
+	if (pic->aeoi == true) {
+		if (pic->rotate == true)
+			pic->lowprio = pin;
+	} else {
+		pic->service |= (1 << pin);
+	}
+}
+
+void vpic_intr_accepted(struct vm *vm, int vector)
+{
+	struct vpic *vpic;
+	int pin;
+
+	vpic = vm_pic(vm);
+
+	VPIC_LOCK(vpic);
+
+	pin = vector & 0x7;
+
+	if ((vector & ~0x7) == vpic->pic[1].irq_base) {
+		vpic_pin_accepted(&vpic->pic[1], pin);
+		/*
+		 * If this vector originated from the slave,
+		 * accept the cascaded interrupt too.
+		 */
+		vpic_pin_accepted(&vpic->pic[0], 2);
+	} else {
+		vpic_pin_accepted(&vpic->pic[0], pin);
+	}
+
+	vpic_notify_intr(vpic);
+
+	VPIC_UNLOCK(vpic);
+}
+
+static int vpic_read(struct vpic *vpic, struct pic *pic,
+		int port, uint32_t *eax)
+{
+	int pin;
+
+	VPIC_LOCK(vpic);
+
+	if (pic->poll) {
+		pic->poll = 0;
+		pin = vpic_get_highest_irrpin(pic);
+		if (pin >= 0) {
+			vpic_pin_accepted(pic, pin);
+			*eax = 0x80 | pin;
+		} else {
+			*eax = 0;
+		}
+	} else {
+		if (port & ICU_IMR_OFFSET) {
+			/* read interrupt mask register */
+			*eax = pic->mask;
+		} else {
+			if (pic->rd_cmd_reg == OCW3_RIS) {
+				/* read interrupt service register */
+				*eax = pic->service;
+			} else {
+				/* read interrupt request register */
+				*eax = pic->request;
+			}
+		}
+	}
+
+	VPIC_UNLOCK(vpic);
+
+	return 0;
+}
+
+static int vpic_write(struct vpic *vpic, struct pic *pic,
+		int port, uint32_t *eax)
+{
+	int error;
+	uint8_t val;
+
+	error = 0;
+	val = *eax;
+
+	VPIC_LOCK(vpic);
+
+	if (port & ICU_IMR_OFFSET) {
+		switch (pic->icw_num) {
+		case 2:
+			error = vpic_icw2(vpic, pic, val);
+			break;
+		case 3:
+			error = vpic_icw3(vpic, pic, val);
+			break;
+		case 4:
+			error = vpic_icw4(vpic, pic, val);
+			break;
+		default:
+			error = vpic_ocw1(vpic, pic, val);
+			break;
+		}
+	} else {
+		if (val & (1 << 4))
+			error = vpic_icw1(vpic, pic, val);
+
+		if (pic->ready) {
+			if (val & (1 << 3))
+				error = vpic_ocw3(vpic, pic, val);
+			else
+				error = vpic_ocw2(vpic, pic, val);
+		}
+	}
+
+	if (pic->ready)
+		vpic_notify_intr(vpic);
+
+	VPIC_UNLOCK(vpic);
+
+	return error;
+}
+
+static int vpic_master_handler(struct vm *vm, bool in, int port, int bytes,
+		uint32_t *eax)
+{
+	struct vpic *vpic;
+	struct pic *pic;
+
+	vpic = vm_pic(vm);
+	pic = &vpic->pic[0];
+
+	if (bytes != 1)
+		return -1;
+
+	if (in)
+		return vpic_read(vpic, pic, port, eax);
+
+	return vpic_write(vpic, pic, port, eax);
+}
+
+static uint32_t vpic_master_io_read(__unused struct vm_io_handler *hdlr,
+		struct vm *vm, ioport_t addr, size_t width)
+{
+	uint32_t val = 0;
+
+	if (vpic_master_handler(vm, true, (int)addr, (int)width, &val) < 0)
+		pr_err("pic master read port 0x%x width=%d failed\n",
+				addr, width);
+	return val;
+}
+
+static void vpic_master_io_write(__unused struct vm_io_handler *hdlr,
+		struct vm *vm, ioport_t addr, size_t width, uint32_t v)
+{
+	uint32_t val = v;
+
+	if (vpic_master_handler(vm, false, (int)addr, (int)width, &val) < 0)
+		pr_err("%s: write port 0x%x width=%d value 0x%x failed\n",
+				__func__, addr, width, val);
+}
+
+static int vpic_slave_handler(struct vm *vm, bool in, int port, int bytes,
+		uint32_t *eax)
+{
+	struct vpic *vpic;
+	struct pic *pic;
+
+	vpic = vm_pic(vm);
+	pic = &vpic->pic[1];
+
+	if (bytes != 1)
+		return -1;
+
+	if (in)
+		return vpic_read(vpic, pic, port, eax);
+
+	return vpic_write(vpic, pic, port, eax);
+}
+
+static uint32_t vpic_slave_io_read(__unused struct vm_io_handler *hdlr,
+		struct vm *vm, ioport_t addr, size_t width)
+{
+	uint32_t val = 0;
+
+	if (vpic_slave_handler(vm, true, (int)addr, (int)width, &val) < 0)
+		pr_err("pic slave read port 0x%x width=%d failed\n",
+				addr, width);
+	return val;
+}
+
+static void vpic_slave_io_write(__unused struct vm_io_handler *hdlr,
+		struct vm *vm, ioport_t addr, size_t width, uint32_t v)
+{
+	uint32_t val = v;
+
+	if (vpic_slave_handler(vm, false, (int)addr, (int)width, &val) < 0)
+		pr_err("%s: write port 0x%x width=%d value 0x%x failed\n",
+				__func__, addr, width, val);
+}
+
+static int vpic_elc_handler(struct vm *vm, bool in, int port, int bytes,
+		uint32_t *eax)
+{
+	struct vpic *vpic;
+	bool is_master;
+
+	vpic = vm_pic(vm);
+	is_master = (port == IO_ELCR1);
+
+	if (bytes != 1)
+		return -1;
+
+	VPIC_LOCK(vpic);
+
+	if (in) {
+		if (is_master)
+			*eax = vpic->pic[0].elc;
+		else
+			*eax = vpic->pic[1].elc;
+	} else {
+		/*
+		 * For the master PIC the cascade channel (IRQ2), the
+		 * heart beat timer (IRQ0), and the keyboard
+		 * controller (IRQ1) cannot be programmed for level
+		 * mode.
+		 *
+		 * For the slave PIC the real time clock (IRQ8) and
+		 * the floating point error interrupt (IRQ13) cannot
+		 * be programmed for level mode.
+		 */
+		if (is_master)
+			vpic->pic[0].elc = (*eax & 0xf8);
+		else
+			vpic->pic[1].elc = (*eax & 0xde);
+	}
+
+	VPIC_UNLOCK(vpic);
+
+	return 0;
+}
+
+static uint32_t vpic_elc_io_read(__unused struct vm_io_handler *hdlr,
+		struct vm *vm, ioport_t addr, size_t width)
+{
+	uint32_t val = 0;
+
+	if (vpic_elc_handler(vm, true, (int)addr, (int)width, &val) < 0)
+		pr_err("pic elc read port 0x%x width=%d failed", addr, width);
+	return val;
+}
+
+static void vpic_elc_io_write(__unused struct vm_io_handler *hdlr,
+		struct vm *vm, ioport_t addr, size_t width, uint32_t v)
+{
+	uint32_t val = v;
+
+	if (vpic_elc_handler(vm, false, (int)addr, (int)width, &val) < 0)
+		pr_err("%s: write port 0x%x width=%d value 0x%x failed\n",
+				__func__, addr, width, val);
+}
+
+void vpic_register_io_handler(struct vm *vm)
+{
+	struct vm_io_range master_range = {
+		.flags = IO_ATTR_RW,
+		.base = 0x20,
+		.len = 2
+	};
+	struct vm_io_range slave_range = {
+		.flags = IO_ATTR_RW,
+		.base = 0xa0,
+		.len = 2
+	};
+	struct vm_io_range elcr_range = {
+		.flags = IO_ATTR_RW,
+		.base = 0x4d0,
+		.len = 2
+	};
+
+	register_io_emulation_handler(vm, &master_range,
+			&vpic_master_io_read, &vpic_master_io_write);
+	register_io_emulation_handler(vm, &slave_range,
+			&vpic_slave_io_read, &vpic_slave_io_write);
+	register_io_emulation_handler(vm, &elcr_range,
+			&vpic_elc_io_read, &vpic_elc_io_write);
+}
+
+void *vpic_init(struct vm *vm)
+{
+	struct vpic *vpic;
+
+	vpic_register_io_handler(vm);
+
+	vpic = malloc(sizeof(struct vpic));
+	ASSERT(vpic != NULL, "");
+	vpic->vm = vm;
+	vpic->pic[0].mask = 0xff;
+	vpic->pic[1].mask = 0xff;
+
+	VPIC_LOCK_INIT(vpic);
+
+	return vpic;
+}
+
+void vpic_cleanup(struct vm *vm)
+{
+	if (vm->vpic) {
+		free(vm->vpic);
+		vm->vpic = NULL;
+	}
+}