initial import

internal commit: 14ac2bc2299032fa6714d1fefa7cf0987b3e3085

Signed-off-by: Eddie Dong <eddie.dong@intel.com>
This commit is contained in:
Eddie Dong
2018-03-07 20:57:14 +08:00
committed by Jack Ren
commit f4cd4338fd
156 changed files with 41265 additions and 0 deletions

389
arch/x86/guest/guest.c Normal file
View File

@@ -0,0 +1,389 @@
/*
* Copyright (C) 2018 Intel Corporation. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <hypervisor.h>
#include <hv_lib.h>
#include <acrn_common.h>
#include <hv_arch.h>
#include <bsp_cfg.h>
#include <bsp_extern.h>
#include <acrn_hv_defs.h>
#include <hv_debug.h>
#include <multiboot.h>
#define BOOT_ARGS_LOAD_ADDR 0x24EFC000
#define ACRN_DBG_GUEST 6
/* for VM0 e820 */
uint32_t e820_entries;
struct e820_entry e820[E820_MAX_ENTRIES];
struct e820_mem_params e820_mem;
inline bool
is_vm0(struct vm *vm)
{
return (vm->attr.boot_idx & 0x7F) == 0;
}
inline struct vcpu *vcpu_from_vid(struct vm *vm, int vcpu_id)
{
int i;
struct vcpu *vcpu;
foreach_vcpu(i, vm, vcpu) {
if (vcpu->vcpu_id == vcpu_id)
return vcpu;
}
return NULL;
}
inline struct vcpu *vcpu_from_pid(struct vm *vm, int pcpu_id)
{
int i;
struct vcpu *vcpu;
foreach_vcpu(i, vm, vcpu) {
if (vcpu->pcpu_id == pcpu_id)
return vcpu;
}
return NULL;
}
inline struct vcpu *get_primary_vcpu(struct vm *vm)
{
int i;
struct vcpu *vcpu;
foreach_vcpu(i, vm, vcpu) {
if (is_vcpu_bsp(vcpu))
return vcpu;
}
return NULL;
}
inline uint64_t vcpumask2pcpumask(struct vm *vm, uint64_t vdmask)
{
int vcpu_id;
uint64_t dmask = 0;
struct vcpu *vcpu;
while ((vcpu_id = bitmap_ffs(&vdmask)) >= 0) {
bitmap_clr(vcpu_id, &vdmask);
vcpu = vcpu_from_vid(vm, vcpu_id);
ASSERT(vcpu, "vcpu_from_vid failed");
bitmap_set(vcpu->pcpu_id, &dmask);
}
return dmask;
}
inline bool vm_lapic_disabled(struct vm *vm)
{
int i;
struct vcpu *vcpu;
foreach_vcpu(i, vm, vcpu) {
if (vlapic_enabled(vcpu->arch_vcpu.vlapic))
return false;
}
return true;
}
int init_vm0_boot_info(struct vm *vm)
{
struct multiboot_module *mods = NULL;
struct multiboot_info *mbi = NULL;
if (!is_vm0(vm)) {
pr_err("just for vm0 to get info!");
return -EINVAL;
}
if (boot_regs[0] != MULTIBOOT_INFO_MAGIC) {
ASSERT(0, "no multiboot info found");
return -EINVAL;
}
mbi = (struct multiboot_info *)((uint64_t)boot_regs[1]);
dev_dbg(ACRN_DBG_GUEST, "Multiboot detected, flag=0x%x", mbi->mi_flags);
if (!(mbi->mi_flags & MULTIBOOT_INFO_HAS_MODS)) {
ASSERT(0, "no sos kernel info found");
return -EINVAL;
}
dev_dbg(ACRN_DBG_GUEST, "mod counts=%d\n", mbi->mi_mods_count);
/* mod[0] is for kernel&cmdline, other mod for ramdisk/firmware info*/
mods = (struct multiboot_module *)(uint64_t)mbi->mi_mods_addr;
dev_dbg(ACRN_DBG_GUEST, "mod0 start=0x%x, end=0x%x",
mods[0].mm_mod_start, mods[0].mm_mod_end);
dev_dbg(ACRN_DBG_GUEST, "cmd addr=0x%x, str=%s", mods[0].mm_string,
(char *) (uint64_t)mods[0].mm_string);
vm->sw.kernel_type = VM_LINUX_GUEST;
vm->sw.kernel_info.kernel_src_addr =
(void *)(uint64_t)mods[0].mm_mod_start;
vm->sw.kernel_info.kernel_size =
mods[0].mm_mod_end - mods[0].mm_mod_start;
vm->sw.kernel_info.kernel_load_addr =
(void *)(uint64_t)mods[0].mm_mod_start;
vm->sw.linux_info.bootargs_src_addr =
(void *)(uint64_t)mods[0].mm_string;
vm->sw.linux_info.bootargs_load_addr =
(void *)BOOT_ARGS_LOAD_ADDR;
vm->sw.linux_info.bootargs_size =
strnlen_s((char *)(uint64_t) mods[0].mm_string, MEM_2K);
return 0;
}
uint64_t gva2gpa(struct vm *vm, uint64_t cr3, uint64_t gva)
{
int level, index, shift;
uint64_t *base, addr, entry, page_size;
uint64_t gpa = 0;
addr = cr3;
for (level = 3; level >= 0; level--) {
addr = addr & IA32E_REF_MASK;
base = GPA2HVA(vm, addr);
ASSERT(base != NULL, "invalid ptp base.");
shift = level * 9 + 12;
index = (gva >> shift) & 0x1FF;
page_size = 1UL << shift;
entry = base[index];
if (level > 0 && (entry & MMU_32BIT_PDE_PS) != 0)
break;
addr = entry;
}
entry >>= shift; entry <<= (shift + 12); entry >>= 12;
gpa = entry | (gva & (page_size - 1));
return gpa;
}
void init_e820(void)
{
unsigned int i;
if (boot_regs[0] == MULTIBOOT_INFO_MAGIC) {
struct multiboot_info *mbi =
(struct multiboot_info *)((uint64_t)boot_regs[1]);
pr_info("Multiboot info detected\n");
if (mbi->mi_flags & 0x40) {
struct multiboot_mmap *mmap =
(struct multiboot_mmap *)
((uint64_t)mbi->mi_mmap_addr);
e820_entries = mbi->mi_mmap_length/
sizeof(struct multiboot_mmap);
if (e820_entries > E820_MAX_ENTRIES) {
pr_err("Too many E820 entries %d\n",
e820_entries);
e820_entries = E820_MAX_ENTRIES;
}
dev_dbg(ACRN_DBG_GUEST,
"mmap length 0x%x addr 0x%x entries %d\n",
mbi->mi_mmap_length, mbi->mi_mmap_addr,
e820_entries);
for (i = 0; i < e820_entries; i++) {
e820[i].baseaddr = mmap[i].baseaddr;
e820[i].length = mmap[i].length;
e820[i].type = mmap[i].type;
dev_dbg(ACRN_DBG_GUEST,
"mmap table: %d type: 0x%x\n",
i, mmap[i].type);
dev_dbg(ACRN_DBG_GUEST,
"Base: 0x%016llx length: 0x%016llx",
mmap[i].baseaddr, mmap[i].length);
}
}
} else
ASSERT(0, "no multiboot info found");
}
void obtain_e820_mem_info(void)
{
unsigned int i;
struct e820_entry *entry;
e820_mem.mem_bottom = UINT64_MAX;
e820_mem.mem_top = 0x00;
e820_mem.max_ram_blk_base = 0;
e820_mem.max_ram_blk_size = 0;
for (i = 0; i < e820_entries; i++) {
entry = &e820[i];
if (e820_mem.mem_bottom > entry->baseaddr)
e820_mem.mem_bottom = entry->baseaddr;
if (entry->baseaddr + entry->length
> e820_mem.mem_top) {
e820_mem.mem_top = entry->baseaddr
+ entry->length;
}
if (entry->baseaddr == UOS_DEFAULT_START_ADDR
&& entry->type == E820_TYPE_RAM) {
e820_mem.max_ram_blk_base =
entry->baseaddr;
e820_mem.max_ram_blk_size = entry->length;
}
}
}
static void rebuild_vm0_e820(void)
{
unsigned int i;
uint64_t entry_start;
uint64_t entry_end;
uint64_t hv_start = CONFIG_RAM_START;
uint64_t hv_end = hv_start + CONFIG_RAM_SIZE;
struct e820_entry *entry, new_entry = {0};
/* hypervisor mem need be filter out from e820 table
* it's hv itself + other hv reserved mem like vgt etc
*/
for (i = 0; i < e820_entries; i++) {
entry = &e820[i];
entry_start = entry->baseaddr;
entry_end = entry->baseaddr + entry->length;
/* No need handle in these cases*/
if (entry->type != E820_TYPE_RAM || entry_end <= hv_start
|| entry_start >= hv_end) {
continue;
}
/* filter out hv mem and adjust length of this entry*/
if (entry_start < hv_start && entry_end <= hv_end) {
entry->length = hv_start - entry_start;
continue;
}
/* filter out hv mem and need to create a new entry*/
if (entry_start < hv_start && entry_end > hv_end) {
entry->length = hv_start - entry_start;
new_entry.baseaddr = hv_end;
new_entry.length = entry_end - hv_end;
new_entry.type = E820_TYPE_RAM;
continue;
}
/* This entry is within the range of hv mem
* change to E820_TYPE_RESERVED
*/
if (entry_start >= hv_start && entry_end <= hv_end) {
entry->type = E820_TYPE_RESERVED;
continue;
}
if (entry_start >= hv_start && entry_start < hv_end
&& entry_end > hv_end) {
entry->baseaddr = hv_end;
entry->length = entry_end - hv_end;
continue;
}
}
if (new_entry.length > 0) {
e820_entries++;
ASSERT(e820_entries <= E820_MAX_ENTRIES,
"e820 entry overflow");
entry = &e820[e820_entries - 1];
entry->baseaddr = new_entry.baseaddr;
entry->length = new_entry.length;
entry->type = new_entry.type;
}
}
int prepare_vm0_memmap_and_e820(struct vm *vm)
{
unsigned int i;
uint32_t attr_wb = (MMU_MEM_ATTR_READ |
MMU_MEM_ATTR_WRITE |
MMU_MEM_ATTR_EXECUTE |
MMU_MEM_ATTR_WB_CACHE);
uint32_t attr_uc = (MMU_MEM_ATTR_READ |
MMU_MEM_ATTR_WRITE |
MMU_MEM_ATTR_EXECUTE |
MMU_MEM_ATTR_UNCACHED);
struct e820_entry *entry;
ASSERT(is_vm0(vm), "This func only for vm0");
rebuild_vm0_e820();
dev_dbg(ACRN_DBG_GUEST,
"vm0: bottom memory - 0x%llx, top memory - 0x%llx\n",
e820_mem.mem_bottom, e820_mem.mem_top);
/* create real ept map for all ranges with UC */
ept_mmap(vm, e820_mem.mem_bottom, e820_mem.mem_bottom,
(e820_mem.mem_top - e820_mem.mem_bottom),
MAP_MMIO, attr_uc);
/* update ram entries to WB attr */
for (i = 0; i < e820_entries; i++) {
entry = &e820[i];
if (entry->type == E820_TYPE_RAM)
ept_mmap(vm, entry->baseaddr, entry->baseaddr,
entry->length, MAP_MEM, attr_wb);
}
dev_dbg(ACRN_DBG_GUEST, "VM0 e820 layout:\n");
for (i = 0; i < e820_entries; i++) {
entry = &e820[i];
dev_dbg(ACRN_DBG_GUEST,
"e820 table: %d type: 0x%x", i, entry->type);
dev_dbg(ACRN_DBG_GUEST,
"BaseAddress: 0x%016llx length: 0x%016llx\n",
entry->baseaddr, entry->length);
}
/* unmap hypervisor itself for safety
* will cause EPT violation if sos accesses hv memory
*/
ept_mmap(vm, CONFIG_RAM_START, CONFIG_RAM_START,
CONFIG_RAM_SIZE, MAP_UNMAP, 0);
return 0;
}

2137
arch/x86/guest/instr_emul.c Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,95 @@
/*-
* Copyright (c) 2012 NetApp, Inc.
* Copyright (c) 2017 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _VMM_INSTRUCTION_EMUL_H_
#define _VMM_INSTRUCTION_EMUL_H_
/*
* Callback functions to read and write memory regions.
*/
typedef int (*mem_region_read_t)(struct vcpu *vcpu, uint64_t gpa,
uint64_t *rval, int rsize, void *arg);
typedef int (*mem_region_write_t)(struct vcpu *vcpu, uint64_t gpa,
uint64_t wval, int wsize, void *arg);
/*
* Emulate the decoded 'vie' instruction.
*
* The callbacks 'mrr' and 'mrw' emulate reads and writes to the memory region
* containing 'gpa'. 'mrarg' is an opaque argument that is passed into the
* callback functions.
*
* 'void *vm' should be 'struct vm *' when called from kernel context and
* 'struct vmctx *' when called from user context.
* s
*/
int vmm_emulate_instruction(struct vcpu *vcpu, uint64_t gpa, struct vie *vie,
struct vm_guest_paging *paging, mem_region_read_t mrr,
mem_region_write_t mrw, void *mrarg);
int vie_update_register(struct vcpu *vcpu, enum vm_reg_name reg,
uint64_t val, int size);
/*
* Returns 1 if an alignment check exception should be injected and 0 otherwise.
*/
int vie_alignment_check(int cpl, int operand_size, uint64_t cr0,
uint64_t rflags, uint64_t gla);
/* Returns 1 if the 'gla' is not canonical and 0 otherwise. */
int vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla);
uint64_t vie_size2mask(int size);
int vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg,
struct seg_desc *desc, uint64_t off, int length, int addrsize, int prot,
uint64_t *gla);
void vie_init(struct vie *vie, const char *inst_bytes, int inst_length);
/*
* Decode the instruction fetched into 'vie' so it can be emulated.
*
* 'gla' is the guest linear address provided by the hardware assist
* that caused the nested page table fault. It is used to verify that
* the software instruction decoding is in agreement with the hardware.
*
* Some hardware assists do not provide the 'gla' to the hypervisor.
* To skip the 'gla' verification for this or any other reason pass
* in VIE_INVALID_GLA instead.
*/
#define VIE_INVALID_GLA (1UL << 63) /* a non-canonical address */
int vmm_decode_instruction(struct vcpu *vcpu, uint64_t gla,
enum vm_cpu_mode cpu_mode, int csd, struct vie *vie);
int emulate_instruction(struct vcpu *vcpu, struct mem_io *mmio);
int analyze_instruction(struct vcpu *vcpu, struct mem_io *mmio);
#endif /* _VMM_INSTRUCTION_EMUL_H_ */

View File

@@ -0,0 +1,466 @@
/*
* Copyright (C) 2018 Intel Corporation. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <hypervisor.h>
#include <hv_lib.h>
#include <acrn_common.h>
#include <hv_arch.h>
#include <bsp_extern.h>
#include <hv_debug.h>
#include "instr_emul_wrapper.h"
#include "instr_emul.h"
struct emul_cnx {
struct vie vie;
struct vm_guest_paging paging;
struct vcpu *vcpu;
struct mem_io *mmio;
};
static DEFINE_CPU_DATA(struct emul_cnx, g_inst_ctxt);
static int
encode_vmcs_seg_desc(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc);
static int32_t
get_vmcs_field(int ident);
static bool
is_segment_register(int reg);
static bool
is_descriptor_table(int reg);
int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval)
{
struct run_context *cur_context;
if (!vcpu)
return -EINVAL;
if ((reg >= VM_REG_LAST) || (reg < VM_REG_GUEST_RAX))
return -EINVAL;
if ((reg >= VM_REG_GUEST_RAX) && (reg <= VM_REG_GUEST_RDI)) {
cur_context =
&vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context];
*retval = cur_context->guest_cpu_regs.longs[reg];
} else if ((reg > VM_REG_GUEST_RDI) && (reg < VM_REG_LAST)) {
int32_t field = get_vmcs_field(reg);
if (field != -1)
*retval = exec_vmread(field);
else
return -EINVAL;
}
return 0;
}
int vm_set_register(struct vcpu *vcpu, int reg, uint64_t val)
{
struct run_context *cur_context;
if (!vcpu)
return -EINVAL;
if ((reg >= VM_REG_LAST) || (reg < VM_REG_GUEST_RAX))
return -EINVAL;
if ((reg >= VM_REG_GUEST_RAX) && (reg <= VM_REG_GUEST_RDI)) {
cur_context =
&vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context];
cur_context->guest_cpu_regs.longs[reg] = val;
} else if ((reg > VM_REG_GUEST_RDI) && (reg < VM_REG_LAST)) {
int32_t field = get_vmcs_field(reg);
if (field != -1)
exec_vmwrite(field, val);
else
return -EINVAL;
}
return 0;
}
int vm_set_seg_desc(struct vcpu *vcpu, int seg, struct seg_desc *ret_desc)
{
int error;
uint32_t base, limit, access;
if ((!vcpu) || (!ret_desc))
return -EINVAL;
if (!is_segment_register(seg) && !is_descriptor_table(seg))
return -EINVAL;
error = encode_vmcs_seg_desc(seg, &base, &limit, &access);
if ((error != 0) || (access == 0xffffffff))
return -EINVAL;
exec_vmwrite(base, ret_desc->base);
exec_vmwrite(limit, ret_desc->limit);
exec_vmwrite(access, ret_desc->access);
return 0;
}
int vm_get_seg_desc(struct vcpu *vcpu, int seg, struct seg_desc *desc)
{
int error;
uint32_t base, limit, access;
if ((!vcpu) || (!desc))
return -EINVAL;
if (!is_segment_register(seg) && !is_descriptor_table(seg))
return -EINVAL;
error = encode_vmcs_seg_desc(seg, &base, &limit, &access);
if ((error != 0) || (access == 0xffffffff))
return -EINVAL;
desc->base = exec_vmread(base);
desc->limit = exec_vmread(limit);
desc->access = exec_vmread(access);
return 0;
}
int vm_restart_instruction(struct vcpu *vcpu)
{
if (!vcpu)
return -EINVAL;
VCPU_RETAIN_RIP(vcpu);
return 0;
}
static bool is_descriptor_table(int reg)
{
switch (reg) {
case VM_REG_GUEST_IDTR:
case VM_REG_GUEST_GDTR:
return true;
default:
return false;
}
}
static bool is_segment_register(int reg)
{
switch (reg) {
case VM_REG_GUEST_ES:
case VM_REG_GUEST_CS:
case VM_REG_GUEST_SS:
case VM_REG_GUEST_DS:
case VM_REG_GUEST_FS:
case VM_REG_GUEST_GS:
case VM_REG_GUEST_TR:
case VM_REG_GUEST_LDTR:
return true;
default:
return false;
}
}
static int encode_vmcs_seg_desc(int seg, uint32_t *base, uint32_t *lim,
uint32_t *acc)
{
switch (seg) {
case VM_REG_GUEST_ES:
*base = VMX_GUEST_ES_BASE;
*lim = VMX_GUEST_ES_LIMIT;
*acc = VMX_GUEST_ES_ATTR;
break;
case VM_REG_GUEST_CS:
*base = VMX_GUEST_CS_BASE;
*lim = VMX_GUEST_CS_LIMIT;
*acc = VMX_GUEST_CS_ATTR;
break;
case VM_REG_GUEST_SS:
*base = VMX_GUEST_SS_BASE;
*lim = VMX_GUEST_SS_LIMIT;
*acc = VMX_GUEST_SS_ATTR;
break;
case VM_REG_GUEST_DS:
*base = VMX_GUEST_DS_BASE;
*lim = VMX_GUEST_DS_LIMIT;
*acc = VMX_GUEST_DS_ATTR;
break;
case VM_REG_GUEST_FS:
*base = VMX_GUEST_FS_BASE;
*lim = VMX_GUEST_FS_LIMIT;
*acc = VMX_GUEST_FS_ATTR;
break;
case VM_REG_GUEST_GS:
*base = VMX_GUEST_GS_BASE;
*lim = VMX_GUEST_GS_LIMIT;
*acc = VMX_GUEST_GS_ATTR;
break;
case VM_REG_GUEST_TR:
*base = VMX_GUEST_TR_BASE;
*lim = VMX_GUEST_TR_LIMIT;
*acc = VMX_GUEST_TR_ATTR;
break;
case VM_REG_GUEST_LDTR:
*base = VMX_GUEST_LDTR_BASE;
*lim = VMX_GUEST_LDTR_LIMIT;
*acc = VMX_GUEST_LDTR_ATTR;
break;
case VM_REG_GUEST_IDTR:
*base = VMX_GUEST_IDTR_BASE;
*lim = VMX_GUEST_IDTR_LIMIT;
*acc = 0xffffffff;
break;
case VM_REG_GUEST_GDTR:
*base = VMX_GUEST_GDTR_BASE;
*lim = VMX_GUEST_GDTR_LIMIT;
*acc = 0xffffffff;
break;
default:
return -EINVAL;
}
return 0;
}
static int32_t get_vmcs_field(int ident)
{
switch (ident) {
case VM_REG_GUEST_CR0:
return VMX_GUEST_CR0;
case VM_REG_GUEST_CR3:
return VMX_GUEST_CR3;
case VM_REG_GUEST_CR4:
return VMX_GUEST_CR4;
case VM_REG_GUEST_DR7:
return VMX_GUEST_DR7;
case VM_REG_GUEST_RSP:
return VMX_GUEST_RSP;
case VM_REG_GUEST_RIP:
return VMX_GUEST_RIP;
case VM_REG_GUEST_RFLAGS:
return VMX_GUEST_RFLAGS;
case VM_REG_GUEST_ES:
return VMX_GUEST_ES_SEL;
case VM_REG_GUEST_CS:
return VMX_GUEST_CS_SEL;
case VM_REG_GUEST_SS:
return VMX_GUEST_SS_SEL;
case VM_REG_GUEST_DS:
return VMX_GUEST_DS_SEL;
case VM_REG_GUEST_FS:
return VMX_GUEST_FS_SEL;
case VM_REG_GUEST_GS:
return VMX_GUEST_GS_SEL;
case VM_REG_GUEST_TR:
return VMX_GUEST_TR_SEL;
case VM_REG_GUEST_LDTR:
return VMX_GUEST_LDTR_SEL;
case VM_REG_GUEST_EFER:
return VMX_GUEST_IA32_EFER_FULL;
case VM_REG_GUEST_PDPTE0:
return VMX_GUEST_PDPTE0_FULL;
case VM_REG_GUEST_PDPTE1:
return VMX_GUEST_PDPTE1_FULL;
case VM_REG_GUEST_PDPTE2:
return VMX_GUEST_PDPTE2_FULL;
case VM_REG_GUEST_PDPTE3:
return VMX_GUEST_PDPTE3_FULL;
default:
return -1;
}
}
static enum vm_cpu_mode get_vmx_cpu_mode(void)
{
uint32_t csar;
if (exec_vmread(VMX_GUEST_IA32_EFER_FULL) & EFER_LMA) {
csar = exec_vmread(VMX_GUEST_CS_ATTR);
if (csar & 0x2000)
return CPU_MODE_64BIT; /* CS.L = 1 */
else
return CPU_MODE_COMPATIBILITY;
} else if (exec_vmread(VMX_GUEST_CR0) & CR0_PE) {
return CPU_MODE_PROTECTED;
} else {
return CPU_MODE_REAL;
}
}
static void get_guest_paging_info(struct vcpu *vcpu, struct emul_cnx *emul_cnx)
{
uint32_t cpl, csar;
ASSERT(emul_cnx != NULL && vcpu != NULL, "Error in input arguments");
csar = exec_vmread(VMX_GUEST_CS_ATTR);
cpl = (csar >> 5) & 3;
emul_cnx->paging.cr3 =
vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr3;
emul_cnx->paging.cpl = cpl;
emul_cnx->paging.cpu_mode = get_vmx_cpu_mode();
emul_cnx->paging.paging_mode = PAGING_MODE_FLAT;/*maybe change later*/
}
static int mmio_read(struct vcpu *vcpu, __unused uint64_t gpa, uint64_t *rval,
__unused int size, __unused void *arg)
{
struct emul_cnx *emul_cnx;
struct mem_io *mmio;
if (!vcpu)
return -EINVAL;
emul_cnx = &per_cpu(g_inst_ctxt, vcpu->pcpu_id);
mmio = emul_cnx->mmio;
ASSERT(mmio != NULL, "invalid mmio when reading");
*rval = mmio->value;
return 0;
}
static int mmio_write(struct vcpu *vcpu, __unused uint64_t gpa, uint64_t wval,
__unused int size, __unused void *arg)
{
struct emul_cnx *emul_cnx;
struct mem_io *mmio;
if (!vcpu)
return -EINVAL;
emul_cnx = &per_cpu(g_inst_ctxt, vcpu->pcpu_id);
mmio = emul_cnx->mmio;
ASSERT(mmio != NULL, "invalid mmio when writing");
mmio->value = wval;
return 0;
}
void vm_gva2gpa(struct vcpu *vcpu, uint64_t gva, uint64_t *gpa)
{
ASSERT(gpa != NULL, "Error in input arguments");
ASSERT(vcpu != NULL,
"Invalid vcpu id when gva2gpa");
*gpa = gva2gpa(vcpu->vm,
vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr3, gva);
}
int analyze_instruction(struct vcpu *vcpu, struct mem_io *mmio)
{
uint64_t guest_rip_gva, guest_rip_gpa;
char *guest_rip_hva;
struct emul_cnx *emul_cnx;
uint32_t csar;
int retval = 0;
enum vm_cpu_mode cpu_mode;
int i;
guest_rip_gva =
vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].rip;
guest_rip_gpa = gva2gpa(vcpu->vm,
vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr3,
guest_rip_gva);
guest_rip_hva = GPA2HVA(vcpu->vm, guest_rip_gpa);
emul_cnx = &per_cpu(g_inst_ctxt, vcpu->pcpu_id);
emul_cnx->mmio = mmio;
emul_cnx->vcpu = vcpu;
/* by now, HVA <-> HPA is 1:1 mapping, so use hpa is OK*/
vie_init(&emul_cnx->vie, guest_rip_hva,
vcpu->arch_vcpu.inst_len);
get_guest_paging_info(vcpu, emul_cnx);
csar = exec_vmread(VMX_GUEST_CS_ATTR);
cpu_mode = get_vmx_cpu_mode();
mmio->private_data = emul_cnx;
retval = vmm_decode_instruction(vcpu, guest_rip_gva,
cpu_mode, SEG_DESC_DEF32(csar), &emul_cnx->vie);
mmio->access_size = emul_cnx->vie.opsize;
if (retval != 0) {
/* dump to instruction when decoding failed */
pr_err("decode following instruction failed @ 0x%016llx:",
exec_vmread(VMX_GUEST_RIP));
for (i = 0; i < emul_cnx->vie.num_valid; i++) {
if (i >= VIE_INST_SIZE)
break;
if (i == 0)
pr_err("\n");
pr_err("%d=%02hhx ",
i, emul_cnx->vie.inst[i]);
}
}
return retval;
}
int emulate_instruction(struct vcpu *vcpu, struct mem_io *mmio)
{
struct emul_cnx *emul_cnx = (struct emul_cnx *)(mmio->private_data);
struct vm_guest_paging *paging = &emul_cnx->paging;
int i, retval = 0;
uint64_t gpa = mmio->paddr;
mem_region_read_t mread = mmio_read;
mem_region_write_t mwrite = mmio_write;
retval = vmm_emulate_instruction(vcpu, gpa,
&emul_cnx->vie, paging, mread, mwrite, &retval);
if (retval != 0) {
/* dump to instruction when emulation failed */
pr_err("emulate following instruction failed @ 0x%016llx:",
exec_vmread(VMX_GUEST_RIP));
for (i = 0; i < emul_cnx->vie.num_valid; i++) {
if (i >= VIE_INST_SIZE)
break;
if (i == 0)
pr_err("\n");
pr_err("%d=%02hhx ",
i, emul_cnx->vie.inst[i]);
}
}
return retval;
}

View File

@@ -0,0 +1,203 @@
/*-
* Copyright (c) 2012 NetApp, Inc.
* Copyright (c) 2017 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <cpu.h>
struct vie_op {
uint8_t op_byte; /* actual opcode byte */
uint8_t op_type; /* type of operation (e.g. MOV) */
uint16_t op_flags;
};
#define VIE_INST_SIZE 15
struct vie {
uint8_t inst[VIE_INST_SIZE]; /* instruction bytes */
uint8_t num_valid; /* size of the instruction */
uint8_t num_processed;
uint8_t addrsize:4, opsize:4; /* address and operand sizes */
uint8_t rex_w:1, /* REX prefix */
rex_r:1,
rex_x:1,
rex_b:1,
rex_present:1,
repz_present:1, /* REP/REPE/REPZ prefix */
repnz_present:1, /* REPNE/REPNZ prefix */
opsize_override:1, /* Operand size override */
addrsize_override:1, /* Address size override */
segment_override:1; /* Segment override */
uint8_t mod:2, /* ModRM byte */
reg:4,
rm:4;
uint8_t ss:2, /* SIB byte */
index:4,
base:4;
uint8_t disp_bytes;
uint8_t imm_bytes;
uint8_t scale;
int base_register; /* VM_REG_GUEST_xyz */
int index_register; /* VM_REG_GUEST_xyz */
int segment_register; /* VM_REG_GUEST_xyz */
int64_t displacement; /* optional addr displacement */
int64_t immediate; /* optional immediate operand */
uint8_t decoded; /* set to 1 if successfully decoded */
struct vie_op op; /* opcode description */
};
#define PSL_C 0x00000001 /* carry bit */
#define PSL_PF 0x00000004 /* parity bit */
#define PSL_AF 0x00000010 /* bcd carry bit */
#define PSL_Z 0x00000040 /* zero bit */
#define PSL_N 0x00000080 /* negative bit */
#define PSL_T 0x00000100 /* trace enable bit */
#define PSL_I 0x00000200 /* interrupt enable bit */
#define PSL_D 0x00000400 /* string instruction direction bit */
#define PSL_V 0x00000800 /* overflow bit */
#define PSL_IOPL 0x00003000 /* i/o privilege level */
#define PSL_NT 0x00004000 /* nested task bit */
#define PSL_RF 0x00010000 /* resume flag bit */
#define PSL_VM 0x00020000 /* virtual 8086 mode bit */
#define PSL_AC 0x00040000 /* alignment checking */
#define PSL_VIF 0x00080000 /* virtual interrupt enable */
#define PSL_VIP 0x00100000 /* virtual interrupt pending */
#define PSL_ID 0x00200000 /* identification bit */
/*
* The 'access' field has the format specified in Table 21-2 of the Intel
* Architecture Manual vol 3b.
*
* XXX The contents of the 'access' field are architecturally defined except
* bit 16 - Segment Unusable.
*/
struct seg_desc {
uint64_t base;
uint32_t limit;
uint32_t access;
};
/*
* Protections are chosen from these bits, or-ed together
*/
#define PROT_NONE 0x00 /* no permissions */
#define PROT_READ 0x01 /* pages can be read */
#define PROT_WRITE 0x02 /* pages can be written */
#define PROT_EXEC 0x04 /* pages can be executed */
#define SEG_DESC_TYPE(access) ((access) & 0x001f)
#define SEG_DESC_DPL(access) (((access) >> 5) & 0x3)
#define SEG_DESC_PRESENT(access) (((access) & 0x0080) ? 1 : 0)
#define SEG_DESC_DEF32(access) (((access) & 0x4000) ? 1 : 0)
#define SEG_DESC_GRANULARITY(access) (((access) & 0x8000) ? 1 : 0)
#define SEG_DESC_UNUSABLE(access) (((access) & 0x10000) ? 1 : 0)
enum vm_cpu_mode {
CPU_MODE_REAL,
CPU_MODE_PROTECTED,
CPU_MODE_COMPATIBILITY, /* IA-32E mode (CS.L = 0) */
CPU_MODE_64BIT, /* IA-32E mode (CS.L = 1) */
};
enum vm_paging_mode {
PAGING_MODE_FLAT,
PAGING_MODE_32,
PAGING_MODE_PAE,
PAGING_MODE_64,
};
struct vm_guest_paging {
uint64_t cr3;
int cpl;
enum vm_cpu_mode cpu_mode;
enum vm_paging_mode paging_mode;
};
/*
* Identifiers for architecturally defined registers.
*/
enum vm_reg_name {
VM_REG_GUEST_RAX,
VM_REG_GUEST_RBX,
VM_REG_GUEST_RCX,
VM_REG_GUEST_RDX,
VM_REG_GUEST_RBP,
VM_REG_GUEST_RSI,
VM_REG_GUEST_R8,
VM_REG_GUEST_R9,
VM_REG_GUEST_R10,
VM_REG_GUEST_R11,
VM_REG_GUEST_R12,
VM_REG_GUEST_R13,
VM_REG_GUEST_R14,
VM_REG_GUEST_R15,
VM_REG_GUEST_RDI,
VM_REG_GUEST_CR0,
VM_REG_GUEST_CR3,
VM_REG_GUEST_CR4,
VM_REG_GUEST_DR7,
VM_REG_GUEST_RSP,
VM_REG_GUEST_RIP,
VM_REG_GUEST_RFLAGS,
VM_REG_GUEST_ES,
VM_REG_GUEST_CS,
VM_REG_GUEST_SS,
VM_REG_GUEST_DS,
VM_REG_GUEST_FS,
VM_REG_GUEST_GS,
VM_REG_GUEST_LDTR,
VM_REG_GUEST_TR,
VM_REG_GUEST_IDTR,
VM_REG_GUEST_GDTR,
VM_REG_GUEST_EFER,
VM_REG_GUEST_CR2,
VM_REG_GUEST_PDPTE0,
VM_REG_GUEST_PDPTE1,
VM_REG_GUEST_PDPTE2,
VM_REG_GUEST_PDPTE3,
VM_REG_GUEST_INTR_SHADOW,
VM_REG_LAST
};
typedef unsigned long u_long;
int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval);
int vm_set_register(struct vcpu *vcpu, int reg, uint64_t val);
int vm_get_seg_desc(struct vcpu *vcpu, int reg,
struct seg_desc *ret_desc);
int vm_set_seg_desc(struct vcpu *vcpu, int reg,
struct seg_desc *desc);
int vm_restart_instruction(struct vcpu *vcpu);
void vm_gva2gpa(struct vcpu *vcpu, uint64_t gla, uint64_t *gpa);

118
arch/x86/guest/time.h Normal file
View File

@@ -0,0 +1,118 @@
/*-
* Copyright (c) 1982, 1986, 1993
* The Regents of the University of California. All rights reserved.
* Copyright (c) 2018 Intel Corporation. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)time.h 8.5 (Berkeley) 5/4/95
* $FreeBSD$
*/
#ifndef _TIME_H_
#define _TIME_H_
struct callout {
void *c_arg; /* function argument */
void (*c_func)(void *); /* function to call */
short c_flags; /* User State */
};
#define CALLOUT_ACTIVE 0x0002 /* callout is currently active */
#define CALLOUT_PENDING 0x0004 /* callout is waiting for timeout */
#define callout_active(c) ((c)->c_flags & CALLOUT_ACTIVE)
#define callout_deactivate(c) ((c)->c_flags &= ~CALLOUT_ACTIVE)
#define callout_pending(c) ((c)->c_flags & CALLOUT_PENDING)
typedef int64_t time_t;
typedef int64_t sbintime_t;
struct bintime {
time_t sec;
uint64_t frac;
};
static inline void
bintime_add(struct bintime *_bt, const struct bintime *_bt2)
{
uint64_t _u;
_u = _bt->frac;
_bt->frac += _bt2->frac;
if (_u > _bt->frac)
_bt->sec++;
_bt->sec += _bt2->sec;
}
static inline void
bintime_sub(struct bintime *_bt, const struct bintime *_bt2)
{
uint64_t _u;
_u = _bt->frac;
_bt->frac -= _bt2->frac;
if (_u < _bt->frac)
_bt->sec--;
_bt->sec -= _bt2->sec;
}
static inline void
bintime_mul(struct bintime *_bt, uint32_t _x)
{
uint64_t _p1, _p2;
_p1 = (_bt->frac & 0xffffffffull) * _x;
_p2 = (_bt->frac >> 32) * _x + (_p1 >> 32);
_bt->sec *= _x;
_bt->sec += (_p2 >> 32);
_bt->frac = (_p2 << 32) | (_p1 & 0xffffffffull);
}
#define bintime_cmp(a, b, cmp) \
(((a)->sec == (b)->sec) ? \
((a)->frac cmp(b)->frac) : \
((a)->sec cmp(b)->sec))
#define SBT_1S ((sbintime_t)1 << 32)
#define SBT_1US (SBT_1S / 1000000)
#define BT2FREQ(bt) \
(((uint64_t)0x8000000000000000 + ((bt)->frac >> 2)) / \
((bt)->frac >> 1))
#define FREQ2BT(freq, bt) \
{ \
(bt)->sec = 0; \
(bt)->frac = ((uint64_t)0x8000000000000000 / (freq)) << 1; \
}
static inline sbintime_t
bttosbt(const struct bintime _bt)
{
return (((sbintime_t)_bt.sec << 32) + (_bt.frac >> 32));
}
#endif /* !_TIME_H_ */

357
arch/x86/guest/vcpu.c Normal file
View File

@@ -0,0 +1,357 @@
/*
* Copyright (C) 2018 Intel Corporation. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <hypervisor.h>
#include <hv_lib.h>
#include <acrn_common.h>
#include <hv_arch.h>
#include <schedule.h>
#include <hv_debug.h>
vm_sw_loader_t vm_sw_loader;
/***********************************************************************
* vcpu_id/pcpu_id mapping table:
*
* if
* VM0_CPUS[2] = {0, 2} , VM1_CPUS[2] = {3, 1};
* then
* for physical CPU 0 : vcpu->pcpu_id = 0, vcpu->vcpu_id = 0, vmid = 0;
* for physical CPU 2 : vcpu->pcpu_id = 2, vcpu->vcpu_id = 1, vmid = 0;
* for physical CPU 3 : vcpu->pcpu_id = 3, vcpu->vcpu_id = 0, vmid = 1;
* for physical CPU 1 : vcpu->pcpu_id = 1, vcpu->vcpu_id = 1, vmid = 1;
*
***********************************************************************/
int create_vcpu(int cpu_id, struct vm *vm, struct vcpu **rtn_vcpu_handle)
{
struct vcpu *vcpu;
ASSERT(vm != NULL, "");
ASSERT(rtn_vcpu_handle != NULL, "");
pr_info("Creating VCPU %d", cpu_id);
/* Allocate memory for VCPU */
vcpu = calloc(1, sizeof(struct vcpu));
ASSERT(vcpu != NULL, "");
/* Initialize the physical CPU ID for this VCPU */
vcpu->pcpu_id = cpu_id;
/* Initialize the parent VM reference */
vcpu->vm = vm;
/* Initialize the virtual ID for this VCPU */
/* FIXME:
* We have assumption that we always destroys vcpus in one
* shot (like when vm is destroyed). If we need to support
* specific vcpu destroy on fly, this vcpu_id assignment
* needs revise.
*/
/*
* vcpu->vcpu_id = vm->hw.created_vcpus;
* vm->hw.created_vcpus++;
*/
vcpu->vcpu_id = atomic_xadd_int(&vm->hw.created_vcpus, 1);
/* vm->hw.vcpu_array[vcpu->vcpu_id] = vcpu; */
atomic_store_rel_64(
(unsigned long *)&vm->hw.vcpu_array[vcpu->vcpu_id],
(unsigned long)vcpu);
ASSERT(vcpu->vcpu_id < vm->hw.num_vcpus,
"Allocated vcpu_id is out of range!");
per_cpu(vcpu, cpu_id) = vcpu;
pr_info("PCPU%d is working as VM%d VCPU%d, Role: %s",
vcpu->pcpu_id, vcpu->vm->attr.id, vcpu->vcpu_id,
is_vcpu_bsp(vcpu) ? "PRIMARY" : "SECONDARY");
/* Is this VCPU a VM BSP, create page hierarchy for this VM */
if (is_vcpu_bsp(vcpu)) {
/* Set up temporary guest page tables */
vm->arch_vm.guest_pml4 = create_guest_paging(vm);
pr_info("VM *d VCPU %d CR3: 0x%016llx ",
vm->attr.id, vcpu->vcpu_id, vm->arch_vm.guest_pml4);
}
/* Allocate VMCS region for this VCPU */
vcpu->arch_vcpu.vmcs = alloc_page();
ASSERT(vcpu->arch_vcpu.vmcs != NULL, "");
/* Memset VMCS region for this VCPU */
memset(vcpu->arch_vcpu.vmcs, 0, CPU_PAGE_SIZE);
/* Initialize exception field in VCPU context */
vcpu->arch_vcpu.exception_info.exception = -1;
/* Initialize cur context */
vcpu->arch_vcpu.cur_context = NORMAL_WORLD;
/* Create per vcpu vlapic */
vlapic_create(vcpu);
/* Populate the return handle */
*rtn_vcpu_handle = vcpu;
vcpu->launched = false;
vcpu->paused_cnt = 0;
vcpu->running = 0;
vcpu->ioreq_pending = 0;
vcpu->arch_vcpu.nr_sipi = 0;
vcpu->pending_pre_work = 0;
vcpu->state = VCPU_INIT;
return 0;
}
int start_vcpu(struct vcpu *vcpu)
{
uint64_t rip, instlen;
struct run_context *cur_context =
&vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context];
int64_t status = 0;
ASSERT(vcpu != NULL, "Incorrect arguments");
/* If this VCPU is not already launched, launch it */
if (!vcpu->launched) {
pr_info("VM %d Starting VCPU %d",
vcpu->vm->attr.id, vcpu->vcpu_id);
/* Set vcpu launched */
vcpu->launched = true;
/* avoid VMCS recycling RSB usage, set IBPB.
* NOTE: this should be done for any time vmcs got switch
* currently, there is no other place to do vmcs switch
* Please add IBPB set for future vmcs switch case(like trusty)
*/
if (ibrs_type == IBRS_RAW)
msr_write(MSR_IA32_PRED_CMD, PRED_SET_IBPB);
/* Launch the VM */
status = vmx_vmrun(cur_context, VM_LAUNCH, ibrs_type);
/* See if VM launched successfully */
if (status == 0) {
if (is_vcpu_bsp(vcpu)) {
pr_info("VM %d VCPU %d successfully launched",
vcpu->vm->attr.id, vcpu->vcpu_id);
}
}
} else {
/* This VCPU was already launched, check if the last guest
* instruction needs to be repeated and resume VCPU accordingly
*/
instlen = vcpu->arch_vcpu.inst_len;
rip = cur_context->rip;
exec_vmwrite(VMX_GUEST_RIP, ((rip + instlen) &
0xFFFFFFFFFFFFFFFF));
/* Resume the VM */
status = vmx_vmrun(cur_context, VM_RESUME, ibrs_type);
}
/* Save guest CR3 register */
cur_context->cr3 = exec_vmread(VMX_GUEST_CR3);
/* Obtain current VCPU instruction pointer and length */
cur_context->rip = exec_vmread(VMX_GUEST_RIP);
vcpu->arch_vcpu.inst_len = exec_vmread(VMX_EXIT_INSTR_LEN);
cur_context->rsp = exec_vmread(VMX_GUEST_RSP);
cur_context->rflags = exec_vmread(VMX_GUEST_RFLAGS);
/* Obtain VM exit reason */
vcpu->arch_vcpu.exit_reason = exec_vmread(VMX_EXIT_REASON);
if (status != 0) {
/* refer to 64-ia32 spec section 24.9.1 volume#3 */
if (vcpu->arch_vcpu.exit_reason & VMX_VMENTRY_FAIL)
pr_fatal("vmentry fail reason=%lx", vcpu->arch_vcpu.exit_reason);
else
pr_fatal("vmexit fail err_inst=%lx", exec_vmread(VMX_INSTR_ERROR));
ASSERT(status == 0, "vm fail");
}
return status;
}
int shutdown_vcpu(__unused struct vcpu *vcpu)
{
/* TODO : Implement VCPU shutdown sequence */
return 0;
}
int destroy_vcpu(struct vcpu *vcpu)
{
ASSERT(vcpu != NULL, "Incorrect arguments");
/* vcpu->vm->hw.vcpu_array[vcpu->vcpu_id] = NULL; */
atomic_store_rel_64(
(unsigned long *)&vcpu->vm->hw.vcpu_array[vcpu->vcpu_id],
(unsigned long)NULL);
atomic_subtract_int(&vcpu->vm->hw.created_vcpus, 1);
vlapic_free(vcpu);
free(vcpu->arch_vcpu.vmcs);
free(vcpu->guest_msrs);
free_pcpu(vcpu->pcpu_id);
free(vcpu);
return 0;
}
/* NOTE:
* vcpu should be paused before call this function.
*/
void reset_vcpu(struct vcpu *vcpu)
{
struct vlapic *vlapic;
pr_dbg("vcpu%d reset", vcpu->vcpu_id);
ASSERT(vcpu->state != VCPU_RUNNING,
"reset vcpu when it's running");
if (vcpu->state == VCPU_INIT)
return;
vcpu->state = VCPU_INIT;
vcpu->launched = false;
vcpu->paused_cnt = 0;
vcpu->running = 0;
vcpu->ioreq_pending = 0;
vcpu->arch_vcpu.nr_sipi = 0;
vcpu->pending_pre_work = 0;
vlapic = vcpu->arch_vcpu.vlapic;
vlapic_init(vlapic);
}
void init_vcpu(struct vcpu *vcpu)
{
if (is_vcpu_bsp(vcpu))
vcpu->arch_vcpu.cpu_mode = PAGE_PROTECTED_MODE;
else
vcpu->arch_vcpu.cpu_mode = REAL_MODE;
/* init_vmcs is delayed to vcpu vmcs launch first time */
}
void pause_vcpu(struct vcpu *vcpu, enum vcpu_state new_state)
{
int pcpu_id = get_cpu_id();
pr_dbg("vcpu%d paused, new state: %d",
vcpu->vcpu_id, new_state);
vcpu->prev_state = vcpu->state;
vcpu->state = new_state;
get_schedule_lock(pcpu_id);
if (atomic_load_acq_32(&vcpu->running) == 1) {
remove_vcpu_from_runqueue(vcpu);
make_reschedule_request(vcpu);
release_schedule_lock(pcpu_id);
if (vcpu->pcpu_id != pcpu_id) {
while (atomic_load_acq_32(&vcpu->running) == 1)
__asm__ __volatile("pause" ::: "memory");
}
} else {
remove_vcpu_from_runqueue(vcpu);
release_schedule_lock(pcpu_id);
}
}
void resume_vcpu(struct vcpu *vcpu)
{
pr_dbg("vcpu%d resumed", vcpu->vcpu_id);
vcpu->state = vcpu->prev_state;
get_schedule_lock(vcpu->pcpu_id);
if (vcpu->state == VCPU_RUNNING) {
add_vcpu_to_runqueue(vcpu);
make_reschedule_request(vcpu);
}
release_schedule_lock(vcpu->pcpu_id);
}
void schedule_vcpu(struct vcpu *vcpu)
{
vcpu->state = VCPU_RUNNING;
pr_dbg("vcpu%d scheduled", vcpu->vcpu_id);
get_schedule_lock(vcpu->pcpu_id);
add_vcpu_to_runqueue(vcpu);
make_reschedule_request(vcpu);
release_schedule_lock(vcpu->pcpu_id);
}
/* help function for vcpu create */
int prepare_vcpu(struct vm *vm, int pcpu_id)
{
int ret = 0;
struct vcpu *vcpu = NULL;
ret = create_vcpu(pcpu_id, vm, &vcpu);
ASSERT(ret == 0, "vcpu create failed");
if (is_vcpu_bsp(vcpu)) {
/* Load VM SW */
if (!vm_sw_loader)
vm_sw_loader = general_sw_loader;
vm_sw_loader(vm, vcpu);
vcpu->arch_vcpu.cpu_mode = PAGE_PROTECTED_MODE;
} else {
vcpu->arch_vcpu.cpu_mode = REAL_MODE;
}
/* init_vmcs is delayed to vcpu vmcs launch first time */
/* initialize the vcpu tsc aux */
vcpu->msr_tsc_aux_guest = vcpu->vcpu_id;
set_pcpu_used(pcpu_id);
INIT_LIST_HEAD(&vcpu->run_list);
return ret;
}
void request_vcpu_pre_work(struct vcpu *vcpu, int pre_work_id)
{
bitmap_set(pre_work_id, &vcpu->pending_pre_work);
}

662
arch/x86/guest/vioapic.c Normal file
View File

@@ -0,0 +1,662 @@
/*-
* Copyright (c) 2013 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
* Copyright (c) 2013 Neel Natu <neel@freebsd.org>
* Copyright (c) 2017 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#define pr_fmt(fmt) "vioapic: " fmt
#include <hypervisor.h>
#include <hv_lib.h>
#include <acrn_common.h>
#include <hv_arch.h>
#include <hv_debug.h>
#define IOREGSEL 0x00
#define IOWIN 0x10
#define IOEOI 0x40
#define REDIR_ENTRIES_HW 120 /* SOS align with native ioapic */
#define REDIR_ENTRIES_UOS 24 /* UOS pins*/
#define RTBL_RO_BITS ((uint64_t)(IOAPIC_RTE_REM_IRR | IOAPIC_RTE_DELIVS))
#define ACRN_DBG_IOAPIC 6
struct vioapic {
struct vm *vm;
spinlock_t mtx;
uint32_t id;
uint32_t ioregsel;
struct {
uint64_t reg;
int acnt; /* sum of pin asserts (+1) and deasserts (-1) */
} rtbl[REDIR_ENTRIES_HW];
};
#define VIOAPIC_LOCK(vioapic) spinlock_obtain(&((vioapic)->mtx))
#define VIOAPIC_UNLOCK(vioapic) spinlock_release(&((vioapic)->mtx))
static inline const char *pinstate_str(bool asserted)
{
return (asserted) ? "asserted" : "deasserted";
}
struct vioapic *
vm_ioapic(struct vm *vm)
{
return (struct vioapic *)vm->arch_vm.virt_ioapic;
}
static void
vioapic_send_intr(struct vioapic *vioapic, int pin)
{
int vector, delmode;
uint32_t low, high, dest;
bool level, phys;
if (pin < 0 || pin >= vioapic_pincount(vioapic->vm))
pr_err("vioapic_send_intr: invalid pin number %d", pin);
low = vioapic->rtbl[pin].reg;
high = vioapic->rtbl[pin].reg >> 32;
if ((low & IOAPIC_RTE_INTMASK) == IOAPIC_RTE_INTMSET) {
dev_dbg(ACRN_DBG_IOAPIC, "ioapic pin%d: masked", pin);
return;
}
phys = ((low & IOAPIC_RTE_DESTMOD) == IOAPIC_RTE_DESTPHY);
delmode = low & IOAPIC_RTE_DELMOD;
level = low & IOAPIC_RTE_TRGRLVL ? true : false;
if (level)
vioapic->rtbl[pin].reg |= IOAPIC_RTE_REM_IRR;
vector = low & IOAPIC_RTE_INTVEC;
dest = high >> APIC_ID_SHIFT;
vlapic_deliver_intr(vioapic->vm, level, dest, phys, delmode, vector);
}
static void
vioapic_set_pinstate(struct vioapic *vioapic, int pin, bool newstate)
{
int oldcnt, newcnt;
bool needintr;
if (pin < 0 || pin >= vioapic_pincount(vioapic->vm))
pr_err("vioapic_set_pinstate: invalid pin number %d", pin);
oldcnt = vioapic->rtbl[pin].acnt;
if (newstate)
vioapic->rtbl[pin].acnt++;
else
vioapic->rtbl[pin].acnt--;
newcnt = vioapic->rtbl[pin].acnt;
if (newcnt < 0) {
pr_err("ioapic pin%d: bad acnt %d", pin, newcnt);
}
needintr = false;
if (oldcnt == 0 && newcnt == 1) {
needintr = true;
dev_dbg(ACRN_DBG_IOAPIC, "ioapic pin%d: asserted", pin);
} else if (oldcnt == 1 && newcnt == 0) {
dev_dbg(ACRN_DBG_IOAPIC, "ioapic pin%d: deasserted", pin);
} else {
dev_dbg(ACRN_DBG_IOAPIC, "ioapic pin%d: %s, ignored, acnt %d",
pin, pinstate_str(newstate), newcnt);
}
if (needintr)
vioapic_send_intr(vioapic, pin);
}
enum irqstate {
IRQSTATE_ASSERT,
IRQSTATE_DEASSERT,
IRQSTATE_PULSE
};
static int
vioapic_set_irqstate(struct vm *vm, int irq, enum irqstate irqstate)
{
struct vioapic *vioapic;
if (irq < 0 || irq >= vioapic_pincount(vm))
return -EINVAL;
vioapic = vm_ioapic(vm);
VIOAPIC_LOCK(vioapic);
switch (irqstate) {
case IRQSTATE_ASSERT:
vioapic_set_pinstate(vioapic, irq, true);
break;
case IRQSTATE_DEASSERT:
vioapic_set_pinstate(vioapic, irq, false);
break;
case IRQSTATE_PULSE:
vioapic_set_pinstate(vioapic, irq, true);
vioapic_set_pinstate(vioapic, irq, false);
break;
default:
panic("vioapic_set_irqstate: invalid irqstate %d", irqstate);
}
VIOAPIC_UNLOCK(vioapic);
return 0;
}
int
vioapic_assert_irq(struct vm *vm, int irq)
{
return vioapic_set_irqstate(vm, irq, IRQSTATE_ASSERT);
}
int
vioapic_deassert_irq(struct vm *vm, int irq)
{
return vioapic_set_irqstate(vm, irq, IRQSTATE_DEASSERT);
}
int
vioapic_pulse_irq(struct vm *vm, int irq)
{
return vioapic_set_irqstate(vm, irq, IRQSTATE_PULSE);
}
/*
* Reset the vlapic's trigger-mode register to reflect the ioapic pin
* configuration.
*/
void
vioapic_update_tmr(struct vcpu *vcpu)
{
struct vioapic *vioapic;
struct vlapic *vlapic;
uint32_t low;
int delmode, pin, vector;
bool level;
vlapic = vcpu->arch_vcpu.vlapic;
vioapic = vm_ioapic(vcpu->vm);
VIOAPIC_LOCK(vioapic);
for (pin = 0; pin < vioapic_pincount(vioapic->vm); pin++) {
low = vioapic->rtbl[pin].reg;
level = low & IOAPIC_RTE_TRGRLVL ? true : false;
/*
* For a level-triggered 'pin' let the vlapic figure out if
* an assertion on this 'pin' would result in an interrupt
* being delivered to it. If yes, then it will modify the
* TMR bit associated with this vector to level-triggered.
*/
delmode = low & IOAPIC_RTE_DELMOD;
vector = low & IOAPIC_RTE_INTVEC;
vlapic_set_tmr_one_vec(vlapic, delmode, vector, level);
}
vlapic_apicv_batch_set_tmr(vlapic);
VIOAPIC_UNLOCK(vioapic);
}
static uint32_t
vioapic_read(struct vioapic *vioapic, uint32_t addr)
{
int regnum, pin, rshift;
regnum = addr & 0xff;
switch (regnum) {
case IOAPIC_ID:
return vioapic->id;
case IOAPIC_VER:
return ((vioapic_pincount(vioapic->vm) - 1) << MAX_RTE_SHIFT)
| 0x11;
case IOAPIC_ARB:
return vioapic->id;
default:
break;
}
/* redirection table entries */
if (regnum >= IOAPIC_REDTBL &&
regnum < IOAPIC_REDTBL + vioapic_pincount(vioapic->vm) * 2) {
pin = (regnum - IOAPIC_REDTBL) / 2;
if ((regnum - IOAPIC_REDTBL) % 2)
rshift = 32;
else
rshift = 0;
return vioapic->rtbl[pin].reg >> rshift;
}
return 0;
}
/*
* version 0x20+ ioapic has EOI register. And cpu could write vector to this
* register to clear related IRR.
*/
static void
vioapic_write_eoi(struct vioapic *vioapic, int32_t vector)
{
struct vm *vm = vioapic->vm;
int pin;
if (vector < VECTOR_FOR_INTR_START || vector > NR_MAX_VECTOR)
pr_err("vioapic_process_eoi: invalid vector %d", vector);
VIOAPIC_LOCK(vioapic);
for (pin = 0; pin < vioapic_pincount(vm); pin++) {
if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_REM_IRR) == 0)
continue;
if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_INTVEC) !=
(uint64_t)vector)
continue;
vioapic->rtbl[pin].reg &= ~IOAPIC_RTE_REM_IRR;
if (vioapic->rtbl[pin].acnt > 0) {
dev_dbg(ACRN_DBG_IOAPIC,
"ioapic pin%d: asserted at eoi, acnt %d",
pin, vioapic->rtbl[pin].acnt);
vioapic_send_intr(vioapic, pin);
}
}
VIOAPIC_UNLOCK(vioapic);
}
static void
vioapic_write(struct vioapic *vioapic, uint32_t addr, uint32_t data)
{
uint64_t data64, mask64;
uint64_t last, new, changed;
int regnum, pin, lshift;
regnum = addr & 0xff;
switch (regnum) {
case IOAPIC_ID:
vioapic->id = data & APIC_ID_MASK;
break;
case IOAPIC_VER:
case IOAPIC_ARB:
/* readonly */
break;
default:
break;
}
/* redirection table entries */
if (regnum >= IOAPIC_REDTBL &&
regnum < IOAPIC_REDTBL + vioapic_pincount(vioapic->vm) * 2) {
pin = (regnum - IOAPIC_REDTBL) / 2;
if ((regnum - IOAPIC_REDTBL) % 2)
lshift = 32;
else
lshift = 0;
last = new = vioapic->rtbl[pin].reg;
data64 = (uint64_t)data << lshift;
mask64 = (uint64_t)0xffffffff << lshift;
new &= ~mask64 | RTBL_RO_BITS;
new |= data64 & ~RTBL_RO_BITS;
changed = last ^ new;
/* pin0 from vpic mask/unmask */
if (pin == 0 && (changed & IOAPIC_RTE_INTMASK)) {
/* mask -> umask */
if ((last & IOAPIC_RTE_INTMASK) &&
((new & IOAPIC_RTE_INTMASK) == 0)) {
if ((vioapic->vm->vpic_wire_mode
== VPIC_WIRE_NULL) ||
(vioapic->vm->vpic_wire_mode
== VPIC_WIRE_INTR)) {
atomic_set_int(
&vioapic->vm->vpic_wire_mode,
VPIC_WIRE_IOAPIC);
dev_dbg(ACRN_DBG_IOAPIC,
"vpic wire mode -> IOAPIC");
} else {
pr_err("WARNING: invalid vpic wire mode change");
return;
}
/* unmask -> mask */
} else if (((last & IOAPIC_RTE_INTMASK) == 0) &&
(new & IOAPIC_RTE_INTMASK)) {
if (vioapic->vm->vpic_wire_mode
== VPIC_WIRE_IOAPIC) {
atomic_set_int(
&vioapic->vm->vpic_wire_mode,
VPIC_WIRE_INTR);
dev_dbg(ACRN_DBG_IOAPIC,
"vpic wire mode -> INTR");
}
}
}
vioapic->rtbl[pin].reg = new;
dev_dbg(ACRN_DBG_IOAPIC, "ioapic pin%d: redir table entry %#lx",
pin, vioapic->rtbl[pin].reg);
/*
* If any fields in the redirection table entry (except mask
* or polarity) have changed then rendezvous all the vcpus
* to update their vlapic trigger-mode registers.
*/
if (changed & ~(IOAPIC_RTE_INTMASK | IOAPIC_RTE_INTPOL)) {
int i;
struct vcpu *vcpu;
dev_dbg(ACRN_DBG_IOAPIC,
"ioapic pin%d: recalculate vlapic trigger-mode reg",
pin);
VIOAPIC_UNLOCK(vioapic);
foreach_vcpu(i, vioapic->vm, vcpu) {
vcpu_make_request(vcpu, ACRN_REQUEST_TMR_UPDATE);
}
VIOAPIC_LOCK(vioapic);
}
/*
* Generate an interrupt if the following conditions are met:
* - pin is not masked
* - previous interrupt has been EOIed
* - pin level is asserted
*/
if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_INTMASK) ==
IOAPIC_RTE_INTMCLR &&
(vioapic->rtbl[pin].reg & IOAPIC_RTE_REM_IRR) == 0 &&
(vioapic->rtbl[pin].acnt > 0)) {
dev_dbg(ACRN_DBG_IOAPIC,
"ioapic pin%d: asserted at rtbl write, acnt %d",
pin, vioapic->rtbl[pin].acnt);
vioapic_send_intr(vioapic, pin);
}
/* remap for active: interrupt mask -> unmask
* remap for deactive: interrupt mask & vector set to 0
*/
data64 = vioapic->rtbl[pin].reg;
if ((((data64 & IOAPIC_RTE_INTMASK) == IOAPIC_RTE_INTMCLR)
&& ((last & IOAPIC_RTE_INTMASK) == IOAPIC_RTE_INTMSET))
|| (((data64 & IOAPIC_RTE_INTMASK) == IOAPIC_RTE_INTMSET)
&& ((vioapic->rtbl[pin].reg & IOAPIC_RTE_INTVEC) == 0))) {
/* VM enable intr */
struct ptdev_intx_info intx;
/* NOTE: only support max 256 pin */
intx.virt_pin = (uint8_t)pin;
intx.vpin_src = PTDEV_VPIN_IOAPIC;
ptdev_intx_pin_remap(vioapic->vm, &intx);
}
}
}
static int
vioapic_mmio_rw(struct vioapic *vioapic, uint64_t gpa,
uint64_t *data, int size, bool doread)
{
uint64_t offset;
offset = gpa - VIOAPIC_BASE;
/*
* The IOAPIC specification allows 32-bit wide accesses to the
* IOREGSEL (offset 0) and IOWIN (offset 16) registers.
*/
if (size != 4 || (offset != IOREGSEL && offset != IOWIN &&
offset != IOEOI)) {
if (doread)
*data = 0;
return 0;
}
VIOAPIC_LOCK(vioapic);
if (offset == IOREGSEL) {
if (doread)
*data = vioapic->ioregsel;
else
vioapic->ioregsel = *data;
} else if (offset == IOEOI) {
/* only need to handle write operation */
if (!doread)
vioapic_write_eoi(vioapic, *data);
} else {
if (doread) {
*data = vioapic_read(vioapic, vioapic->ioregsel);
} else {
vioapic_write(vioapic, vioapic->ioregsel,
*data);
}
}
VIOAPIC_UNLOCK(vioapic);
return 0;
}
int
vioapic_mmio_read(void *vm, uint64_t gpa, uint64_t *rval,
int size)
{
int error;
struct vioapic *vioapic;
vioapic = vm_ioapic(vm);
error = vioapic_mmio_rw(vioapic, gpa, rval, size, true);
return error;
}
int
vioapic_mmio_write(void *vm, uint64_t gpa, uint64_t wval,
int size)
{
int error;
struct vioapic *vioapic;
vioapic = vm_ioapic(vm);
error = vioapic_mmio_rw(vioapic, gpa, &wval, size, false);
return error;
}
void
vioapic_process_eoi(struct vm *vm, int vector)
{
struct vioapic *vioapic;
int pin;
if (vector < VECTOR_FOR_INTR_START || vector > NR_MAX_VECTOR)
pr_err("vioapic_process_eoi: invalid vector %d", vector);
vioapic = vm_ioapic(vm);
dev_dbg(ACRN_DBG_IOAPIC, "ioapic processing eoi for vector %d", vector);
/* notify device to ack if assigned pin */
for (pin = 0; pin < vioapic_pincount(vm); pin++) {
if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_REM_IRR) == 0)
continue;
if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_INTVEC) !=
(uint64_t)vector)
continue;
ptdev_intx_ack(vm, pin, PTDEV_VPIN_IOAPIC);
}
/*
* XXX keep track of the pins associated with this vector instead
* of iterating on every single pin each time.
*/
VIOAPIC_LOCK(vioapic);
for (pin = 0; pin < vioapic_pincount(vm); pin++) {
if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_REM_IRR) == 0)
continue;
if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_INTVEC) !=
(uint64_t)vector)
continue;
vioapic->rtbl[pin].reg &= ~IOAPIC_RTE_REM_IRR;
if (vioapic->rtbl[pin].acnt > 0) {
dev_dbg(ACRN_DBG_IOAPIC,
"ioapic pin%d: asserted at eoi, acnt %d",
pin, vioapic->rtbl[pin].acnt);
vioapic_send_intr(vioapic, pin);
}
}
VIOAPIC_UNLOCK(vioapic);
}
struct vioapic *
vioapic_init(struct vm *vm)
{
int i;
struct vioapic *vioapic;
vioapic = calloc(1, sizeof(struct vioapic));
ASSERT(vioapic != NULL, "");
vioapic->vm = vm;
spinlock_init(&vioapic->mtx);
/* Initialize all redirection entries to mask all interrupts */
for (i = 0; i < vioapic_pincount(vioapic->vm); i++)
vioapic->rtbl[i].reg = 0x0001000000010000UL;
register_mmio_emulation_handler(vm,
vioapic_mmio_access_handler,
(uint64_t)VIOAPIC_BASE,
(uint64_t)VIOAPIC_BASE + VIOAPIC_SIZE,
(void *) 0);
return vioapic;
}
void
vioapic_cleanup(struct vioapic *vioapic)
{
unregister_mmio_emulation_handler(vioapic->vm,
(uint64_t)VIOAPIC_BASE,
(uint64_t)VIOAPIC_BASE + VIOAPIC_SIZE);
free(vioapic);
}
int
vioapic_pincount(struct vm *vm)
{
if (is_vm0(vm))
return REDIR_ENTRIES_HW;
else
return REDIR_ENTRIES_UOS;
}
int vioapic_mmio_access_handler(struct vcpu *vcpu, struct mem_io *mmio,
void *handler_private_data)
{
struct vm *vm = vcpu->vm;
uint64_t gpa = mmio->paddr;
int ret = 0;
(void)handler_private_data;
/* Note all RW to IOAPIC are 32-Bit in size */
ASSERT(mmio->access_size == 4,
"All RW to LAPIC must be 32-bits in size");
if (mmio->read_write == HV_MEM_IO_READ) {
ret = vioapic_mmio_read(vm,
gpa,
&mmio->value,
mmio->access_size);
mmio->mmio_status = MMIO_TRANS_VALID;
} else if (mmio->read_write == HV_MEM_IO_WRITE) {
ret = vioapic_mmio_write(vm,
gpa,
mmio->value,
mmio->access_size);
mmio->mmio_status = MMIO_TRANS_VALID;
}
return ret;
}
bool vioapic_get_rte(struct vm *vm, int pin, void *rte)
{
struct vioapic *vioapic;
vioapic = vm_ioapic(vm);
if (vioapic && rte) {
*(uint64_t *)rte = vioapic->rtbl[pin].reg;
return true;
} else
return false;
}
int get_vioapic_info(char *str, int str_max, int vmid)
{
int pin, len, size = str_max, vector, delmode;
uint64_t rte;
uint32_t low, high, dest;
bool level, phys, remote_irr, mask;
struct vm *vm = get_vm_from_vmid(vmid);
if (!vm) {
len = snprintf(str, size,
"\r\nvm is not exist for vmid %d", vmid);
size -= len;
str += len;
goto END;
}
len = snprintf(str, size,
"\r\nPIN\tVEC\tDM\tDEST\tTM\tDELM\tIRR\tMASK");
size -= len;
str += len;
for (pin = 0 ; pin < vioapic_pincount(vm); pin++) {
vioapic_get_rte(vm, pin, (void *)&rte);
low = rte;
high = rte >> 32;
mask = ((low & IOAPIC_RTE_INTMASK) == IOAPIC_RTE_INTMSET);
remote_irr = ((low & IOAPIC_RTE_REM_IRR) == IOAPIC_RTE_REM_IRR);
phys = ((low & IOAPIC_RTE_DESTMOD) == IOAPIC_RTE_DESTPHY);
delmode = low & IOAPIC_RTE_DELMOD;
level = low & IOAPIC_RTE_TRGRLVL ? true : false;
vector = low & IOAPIC_RTE_INTVEC;
dest = high >> APIC_ID_SHIFT;
len = snprintf(str, size,
"\r\n%d\t0x%X\t%s\t0x%X\t%s\t%d\t%d\t%d",
pin, vector, phys ? "phys" : "logic",
dest, level ? "level" : "edge",
delmode >> 8, remote_irr, mask);
size -= len;
str += len;
}
END:
snprintf(str, size, "\r\n");
return 0;
}

2398
arch/x86/guest/vlapic.c Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,153 @@
/*-
* Copyright (c) 2013 Neel Natu <neel@freebsd.org>
* Copyright (c) 2017 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _VLAPIC_PRIV_H_
#define _VLAPIC_PRIV_H_
/*
* APIC Register: Offset Description
*/
#define APIC_OFFSET_ID 0x20 /* Local APIC ID */
#define APIC_OFFSET_VER 0x30 /* Local APIC Version */
#define APIC_OFFSET_TPR 0x80 /* Task Priority Register */
#define APIC_OFFSET_APR 0x90 /* Arbitration Priority */
#define APIC_OFFSET_PPR 0xA0 /* Processor Priority Register */
#define APIC_OFFSET_EOI 0xB0 /* EOI Register */
#define APIC_OFFSET_RRR 0xC0 /* Remote read */
#define APIC_OFFSET_LDR 0xD0 /* Logical Destination */
#define APIC_OFFSET_DFR 0xE0 /* Destination Format Register */
#define APIC_OFFSET_SVR 0xF0 /* Spurious Vector Register */
#define APIC_OFFSET_ISR0 0x100 /* In Service Register */
#define APIC_OFFSET_ISR1 0x110
#define APIC_OFFSET_ISR2 0x120
#define APIC_OFFSET_ISR3 0x130
#define APIC_OFFSET_ISR4 0x140
#define APIC_OFFSET_ISR5 0x150
#define APIC_OFFSET_ISR6 0x160
#define APIC_OFFSET_ISR7 0x170
#define APIC_OFFSET_TMR0 0x180 /* Trigger Mode Register */
#define APIC_OFFSET_TMR1 0x190
#define APIC_OFFSET_TMR2 0x1A0
#define APIC_OFFSET_TMR3 0x1B0
#define APIC_OFFSET_TMR4 0x1C0
#define APIC_OFFSET_TMR5 0x1D0
#define APIC_OFFSET_TMR6 0x1E0
#define APIC_OFFSET_TMR7 0x1F0
#define APIC_OFFSET_IRR0 0x200 /* Interrupt Request Register */
#define APIC_OFFSET_IRR1 0x210
#define APIC_OFFSET_IRR2 0x220
#define APIC_OFFSET_IRR3 0x230
#define APIC_OFFSET_IRR4 0x240
#define APIC_OFFSET_IRR5 0x250
#define APIC_OFFSET_IRR6 0x260
#define APIC_OFFSET_IRR7 0x270
#define APIC_OFFSET_ESR 0x280 /* Error Status Register */
#define APIC_OFFSET_CMCI_LVT 0x2F0 /* Local Vector Table (CMCI) */
#define APIC_OFFSET_ICR_LOW 0x300 /* Interrupt Command Register */
#define APIC_OFFSET_ICR_HI 0x310
#define APIC_OFFSET_TIMER_LVT 0x320 /* Local Vector Table (Timer) */
#define APIC_OFFSET_THERM_LVT 0x330 /* Local Vector Table (Thermal) */
#define APIC_OFFSET_PERF_LVT 0x340 /* Local Vector Table (PMC) */
#define APIC_OFFSET_LINT0_LVT 0x350 /* Local Vector Table (LINT0) */
#define APIC_OFFSET_LINT1_LVT 0x360 /* Local Vector Table (LINT1) */
#define APIC_OFFSET_ERROR_LVT 0x370 /* Local Vector Table (ERROR) */
#define APIC_OFFSET_TIMER_ICR 0x380 /* Timer's Initial Count */
#define APIC_OFFSET_TIMER_CCR 0x390 /* Timer's Current Count */
#define APIC_OFFSET_TIMER_DCR 0x3E0 /* Timer's Divide Configuration */
#define APIC_OFFSET_SELF_IPI 0x3F0 /* Self IPI register */
/*
* 16 priority levels with at most one vector injected per level.
*/
#define ISRVEC_STK_SIZE (16 + 1)
#define VLAPIC_MAXLVT_INDEX APIC_LVT_CMCI
struct vlapic;
struct pir_desc {
uint64_t pir[4];
uint64_t pending;
uint64_t unused[3];
} __aligned(64);
struct vlapic_ops {
int (*apicv_set_intr_ready)
(struct vlapic *vlapic, int vector, bool level);
int (*apicv_pending_intr)(struct vlapic *vlapic, int *vecptr);
void (*apicv_intr_accepted)(struct vlapic *vlapic, int vector);
void (*apicv_post_intr)(struct vlapic *vlapic, int hostcpu);
void (*apicv_set_tmr)(struct vlapic *vlapic, int vector, bool level);
void (*apicv_batch_set_tmr)(struct vlapic *vlapic);
void (*enable_x2apic_mode)(struct vlapic *vlapic);
};
struct vlapic {
struct vm *vm;
struct vcpu *vcpu;
struct lapic *apic_page;
struct pir_desc *pir_desc;
struct vlapic_ops ops;
uint32_t esr_pending;
int esr_firing;
struct callout callout; /* vlapic timer */
struct bintime timer_fire_bt; /* callout expiry time */
struct bintime timer_freq_bt; /* timer frequency */
struct bintime timer_period_bt; /* timer period */
long last_timer; /* the last timer id */
spinlock_t timer_mtx;
/*
* The 'isrvec_stk' is a stack of vectors injected by the local apic.
* A vector is popped from the stack when the processor does an EOI.
* The vector on the top of the stack is used to compute the
* Processor Priority in conjunction with the TPR.
*/
uint8_t isrvec_stk[ISRVEC_STK_SIZE];
int isrvec_stk_top;
uint64_t msr_apicbase;
/*
* Copies of some registers in the virtual APIC page. We do this for
* a couple of different reasons:
* - to be able to detect what changed (e.g. svr_last)
* - to maintain a coherent snapshot of the register (e.g. lvt_last)
*/
uint32_t svr_last;
uint32_t lvt_last[VLAPIC_MAXLVT_INDEX + 1];
struct pir_desc pir;
};
void vlapic_cleanup(struct vlapic *vlapic);
#endif /* _VLAPIC_PRIV_H_ */

324
arch/x86/guest/vm.c Normal file
View File

@@ -0,0 +1,324 @@
/*
* Copyright (C) 2018 Intel Corporation. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <hv_lib.h>
#include <acrn_common.h>
#include <hv_arch.h>
#include <bsp_extern.h>
#include <hv_debug.h>
/* Local variables */
/* VMs list */
struct list_head vm_list = {
.next = &vm_list,
.prev = &vm_list,
};
/* Lock for VMs list */
spinlock_t vm_list_lock = {
.head = 0,
.tail = 0
};
/* used for vmid allocation. And this means the max vm number is 64 */
static unsigned long vmid_bitmap;
static void init_vm(struct vm_description *vm_desc,
struct vm *vm_handle)
{
/* Populate VM attributes from VM description */
vm_handle->hw.num_vcpus = vm_desc->vm_hw_num_cores;
vm_handle->state_info.privilege = vm_desc->vm_state_info_privilege;
vm_handle->state_info.boot_count = 0;
}
/* return a pointer to the virtual machine structure associated with
* this VM ID
*/
struct vm *get_vm_from_vmid(int vm_id)
{
struct vm *vm = NULL;
struct list_head *pos;
spinlock_obtain(&vm_list_lock);
list_for_each(pos, &vm_list) {
vm = list_entry(pos, struct vm, list);
if (vm->attr.id == vm_id) {
spinlock_release(&vm_list_lock);
return vm;
}
}
spinlock_release(&vm_list_lock);
return NULL;
}
int create_vm(struct vm_description *vm_desc, struct vm **rtn_vm)
{
unsigned int id;
struct vm *vm;
int status = 0;
if ((vm_desc == NULL) || (rtn_vm == NULL))
status = -EINVAL;
if (status == 0) {
/* Allocate memory for virtual machine */
vm = calloc(1, sizeof(struct vm));
ASSERT(vm != NULL, "vm allocation failed");
/*
* Map Virtual Machine to its VM Description
*/
init_vm(vm_desc, vm);
/* Init mmio list */
INIT_LIST_HEAD(&vm->mmio_list);
if (vm->hw.num_vcpus == 0)
vm->hw.num_vcpus = phy_cpu_num;
vm->hw.vcpu_array =
calloc(1, sizeof(struct vcpu *) * vm->hw.num_vcpus);
ASSERT(vm->hw.vcpu_array != NULL,
"vcpu_array allocation failed");
for (id = 0; id < sizeof(long) * 8; id++)
if (bitmap_test_and_set(id, &vmid_bitmap) == 0)
break;
vm->attr.id = vm->attr.boot_idx = id;
snprintf(&vm->attr.name[0], MAX_VM_NAME_LEN, "vm_%d",
vm->attr.id);
atomic_store_rel_int(&vm->hw.created_vcpus, 0);
/* gpa_lowtop are used for system start up */
vm->hw.gpa_lowtop = 0;
/* Only for SOS: Configure VM software information */
/* For UOS: This VM software information is configure in DM */
if (is_vm0(vm)) {
prepare_vm0_memmap_and_e820(vm);
#ifndef CONFIG_EFI_STUB
status = init_vm0_boot_info(vm);
#endif
} else {
/* populate UOS vm fields according to vm_desc */
vm->secure_world_enabled =
vm_desc->secure_world_enabled;
memcpy_s(&vm->GUID[0], sizeof(vm->GUID),
&vm_desc->GUID[0],
sizeof(vm_desc->GUID));
}
INIT_LIST_HEAD(&vm->list);
spinlock_obtain(&vm_list_lock);
list_add(&vm->list, &vm_list);
spinlock_release(&vm_list_lock);
/* Ensure VM software information obtained */
if (status == 0) {
/* Set up IO bit-mask such that VM exit occurs on
* selected IO ranges
*/
setup_io_bitmap(vm);
/* Create virtual uart */
if (is_vm0(vm))
vm->vuart = vuart_init(vm);
vm->vpic = vpic_init(vm);
/* vpic wire_mode default is INTR */
vm->vpic_wire_mode = VPIC_WIRE_INTR;
/* Allocate full emulated vIOAPIC instance */
vm->arch_vm.virt_ioapic = vioapic_init(vm);
/* Populate return VM handle */
*rtn_vm = vm;
ptdev_vm_init(vm);
vm->sw.req_buf = 0;
vm->state = VM_CREATED;
}
}
/* Return status to caller */
return status;
}
int shutdown_vm(struct vm *vm)
{
int i, status = 0;
struct vcpu *vcpu = NULL;
if (vm == NULL)
return -EINVAL;
pause_vm(vm);
/* Only allow shutdown paused vm */
if (vm->state != VM_PAUSED)
return -EINVAL;
foreach_vcpu(i, vm, vcpu) {
reset_vcpu(vcpu);
destroy_vcpu(vcpu);
}
spinlock_obtain(&vm_list_lock);
list_del_init(&vm->list);
spinlock_release(&vm_list_lock);
ptdev_vm_deinit(vm);
/* cleanup and free vioapic */
vioapic_cleanup(vm->arch_vm.virt_ioapic);
/* Free EPT allocated resources assigned to VM */
destroy_ept(vm);
/* Free MSR bitmap */
free(vm->arch_vm.msr_bitmap);
/* TODO: De-initialize I/O Emulation */
free_io_emulation_resource(vm);
/* Free iommu_domain */
if (vm->iommu_domain)
destroy_iommu_domain(vm->iommu_domain);
bitmap_clr(vm->attr.id, &vmid_bitmap);
if (vm->vpic)
vpic_cleanup(vm);
free(vm->hw.vcpu_array);
/* TODO: De-Configure HV-SW */
/* Deallocate VM */
free(vm);
/* Return status to caller */
return status;
}
int start_vm(struct vm *vm)
{
struct vcpu *vcpu = NULL;
vm->state = VM_STARTED;
/* Only start BSP (vid = 0) and let BSP start other APs */
vcpu = vcpu_from_vid(vm, 0);
ASSERT(vcpu != NULL, "vm%d, vcpu0", vm->attr.id);
schedule_vcpu(vcpu);
return 0;
}
/*
* DM only pause vm for shutdown/reboot. If we need to
* extend the pause vm for DM, this API should be extended.
*/
int pause_vm(struct vm *vm)
{
int i;
struct vcpu *vcpu = NULL;
if (vm->state == VM_PAUSED)
return 0;
vm->state = VM_PAUSED;
foreach_vcpu(i, vm, vcpu)
pause_vcpu(vcpu, VCPU_ZOMBIE);
return 0;
}
int vm_resume(struct vm *vm)
{
int i;
struct vcpu *vcpu = NULL;
foreach_vcpu(i, vm, vcpu)
resume_vcpu(vcpu);
vm->state = VM_STARTED;
return 0;
}
/* Finally, we will remove the array and only maintain vm0 desc */
struct vm_description *get_vm_desc(int idx)
{
struct vm_description_array *vm_desc_array;
/* Obtain base of user defined VM description array data
* structure
*/
vm_desc_array = (struct vm_description_array *)get_vm_desc_base();
/* Obtain VM description array base */
if (idx >= vm_desc_array->num_vm_desc)
return NULL;
else
return &vm_desc_array->vm_desc_array[idx];
}
/* Create vm/vcpu for vm0 */
int prepare_vm0(void)
{
int i, ret;
struct vm *vm = NULL;
struct vm_description *vm_desc = NULL;
vm_desc = get_vm_desc(0);
ASSERT(vm_desc, "get vm desc failed");
ret = create_vm(vm_desc, &vm);
ASSERT(ret == 0, "VM creation failed!");
prepare_vcpu(vm, vm_desc->vm_hw_logical_core_ids[0]);
/* Prepare the AP for vm0 */
for (i = 1; i < vm_desc->vm_hw_num_cores; i++)
prepare_vcpu(vm, vm_desc->vm_hw_logical_core_ids[i]);
/* start vm0 BSP automatically */
start_vm(vm);
pr_fatal("Start VM0");
return 0;
}

148
arch/x86/guest/vmcall.c Normal file
View File

@@ -0,0 +1,148 @@
/*
* Copyright (C) 2018 Intel Corporation. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <hv_lib.h>
#include <acrn_common.h>
#include <hv_arch.h>
#include <hv_debug.h>
#include <acrn_hv_defs.h>
#include <hypercall.h>
int vmcall_handler(struct vcpu *vcpu)
{
int64_t ret = 0;
struct vm *vm = vcpu->vm;
struct run_context *cur_context =
&vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context];
/* hypercall ID from guest*/
uint64_t hypcall_id = cur_context->guest_cpu_regs.regs.r8;
/* hypercall param1 from guest*/
uint64_t param1 = cur_context->guest_cpu_regs.regs.rdi;
/* hypercall param2 from guest*/
uint64_t param2 = cur_context->guest_cpu_regs.regs.rsi;
/* hypercall param3 from guest, reserved*/
/* uint64_t param3 = cur_context->guest_cpu_regs.regs.rdx; */
/* hypercall param4 from guest, reserved*/
/* uint64_t param4 = cur_context->guest_cpu_regs.regs.rcx; */
/* Dispatch the hypercall handler */
switch (hypcall_id) {
case HC_GET_API_VERSION:
ret = hcall_get_api_version(vm, param1);
break;
case HC_CREATE_VM:
ret = hcall_create_vm(vm, param1);
break;
case HC_DESTROY_VM:
ret = hcall_destroy_vm(param1);
break;
case HC_START_VM:
ret = hcall_resume_vm(param1);
break;
case HC_PAUSE_VM:
ret = hcall_pause_vm(param1);
break;
case HC_CREATE_VCPU:
ret = hcall_create_vcpu(vm, param1, param2);
break;
case HC_ASSERT_IRQLINE:
ret = hcall_assert_irqline(vm, param1, param2);
break;
case HC_DEASSERT_IRQLINE:
ret = hcall_deassert_irqline(vm, param1, param2);
break;
case HC_PULSE_IRQLINE:
ret = hcall_pulse_irqline(vm, param1, param2);
break;
case HC_INJECT_MSI:
ret = hcall_inject_msi(vm, param1, param2);
break;
case HC_SET_IOREQ_BUFFER:
ret = hcall_set_ioreq_buffer(vm, param1, param2);
break;
case HC_NOTIFY_REQUEST_FINISH:
ret = hcall_notify_req_finish(param1, param2);
break;
case HC_VM_SET_MEMMAP:
ret = hcall_set_vm_memmap(vm, param1, param2);
break;
case HC_VM_PCI_MSIX_REMAP:
ret = hcall_remap_pci_msix(vm, param1, param2);
break;
case HC_VM_GPA2HPA:
ret = hcall_gpa_to_hpa(vm, param1, param2);
break;
case HC_ASSIGN_PTDEV:
ret = hcall_assign_ptdev(vm, param1, param2);
break;
case HC_DEASSIGN_PTDEV:
ret = hcall_deassign_ptdev(vm, param1, param2);
break;
case HC_SET_PTDEV_INTR_INFO:
ret = hcall_set_ptdev_intr_info(vm, param1, param2);
break;
case HC_RESET_PTDEV_INTR_INFO:
ret = hcall_reset_ptdev_intr_info(vm, param1, param2);
break;
case HC_SETUP_SBUF:
ret = hcall_setup_sbuf(vm, param1);
break;
default:
pr_err("op %d: Invalid hypercall\n", hypcall_id);
ret = -1;
break;
}
cur_context->guest_cpu_regs.regs.rax = ret;
TRACE_2L(TRC_VMEXIT_VMCALL, vm->attr.id, hypcall_id);
return 0;
}

321
arch/x86/guest/vmsr.c Normal file
View File

@@ -0,0 +1,321 @@
/*
* Copyright (C) 2018 Intel Corporation. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <hv_lib.h>
#include <acrn_common.h>
#include <hv_arch.h>
#include <hv_debug.h>
/*MRS need to be emulated, the order in this array better as freq of ops*/
static const uint32_t emulated_msrs[] = {
MSR_IA32_TSC_DEADLINE, /* Enable TSC_DEADLINE VMEXIT */
/* following MSR not emulated now */
/*
* MSR_IA32_APIC_BASE,
* MSR_IA32_SYSENTER_CS,
* MSR_IA32_SYSENTER_ESP,
* MSR_IA32_SYSENTER_EIP,
* MSR_IA32_TSC_AUX,
* MSR_IA32_TIME_STAMP_COUNTER,
*/
};
/* the index is matched with emulated msrs array*/
enum {
IDX_TSC_DEADLINE,
IDX_MAX_MSR
};
static void enable_msr_interception(uint8_t *bitmap, uint32_t msr)
{
uint8_t *read_map;
uint8_t *write_map;
uint8_t value;
/* low MSR */
if (msr < 0x1FFF) {
read_map = bitmap;
write_map = bitmap + 2048;
} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
read_map = bitmap + 1024;
write_map = bitmap + 3072;
} else {
pr_err("Invalid MSR");
return;
}
msr &= 0x1FFF;
value = read_map[(msr>>3)];
value |= 1<<(msr%8);
/* right now we trap for both r/w */
read_map[(msr>>3)] = value;
write_map[(msr>>3)] = value;
}
/* not used now just leave it for some cases it may be used as API*/
void disable_msr_interception(uint8_t *bitmap, uint32_t msr)
{
uint8_t *read_map;
uint8_t *write_map;
uint8_t value;
/* low MSR */
if (msr < 0x1FFF) {
read_map = bitmap;
write_map = bitmap + 2048;
} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
read_map = bitmap + 1024;
write_map = bitmap + 3072;
} else {
pr_err("Invalid MSR");
return;
}
msr &= 0x1FFF;
value = read_map[(msr>>3)];
value &= ~(1<<(msr%8));
/* right now we trap for both r/w */
read_map[(msr>>3)] = value;
write_map[(msr>>3)] = value;
}
void init_msr_emulation(struct vcpu *vcpu)
{
uint32_t i = 0;
uint32_t msrs_count = ARRAY_SIZE(emulated_msrs);
void *msr_bitmap;
uint64_t value64;
ASSERT(msrs_count == IDX_MAX_MSR,
"MSR ID should be matched with emulated_msrs");
/*msr bitmap, just allocated/init once, and used for all vm's vcpu*/
if (is_vcpu_bsp(vcpu)) {
/* Allocate and initialize memory for MSR bitmap region*/
vcpu->vm->arch_vm.msr_bitmap = alloc_page();
ASSERT(vcpu->vm->arch_vm.msr_bitmap, "");
memset(vcpu->vm->arch_vm.msr_bitmap, 0x0, CPU_PAGE_SIZE);
msr_bitmap = vcpu->vm->arch_vm.msr_bitmap;
for (i = 0; i < msrs_count; i++)
enable_msr_interception(msr_bitmap, emulated_msrs[i]);
/* below MSR protected from guest OS, if access to inject gp*/
enable_msr_interception(msr_bitmap, MSR_IA32_MTRR_CAP);
enable_msr_interception(msr_bitmap, MSR_IA32_MTRR_DEF_TYPE);
for (i = MSR_IA32_MTRR_PHYSBASE_0;
i <= MSR_IA32_MTRR_PHYSMASK_9; i++) {
enable_msr_interception(msr_bitmap, i);
}
enable_msr_interception(msr_bitmap, MSR_IA32_MTRR_FIX64K_00000);
enable_msr_interception(msr_bitmap, MSR_IA32_MTRR_FIX16K_80000);
enable_msr_interception(msr_bitmap, MSR_IA32_MTRR_FIX16K_A0000);
for (i = MSR_IA32_MTRR_FIX4K_C0000;
i <= MSR_IA32_MTRR_FIX4K_F8000; i++) {
enable_msr_interception(msr_bitmap, i);
}
}
/* Set up MSR bitmap - pg 2904 24.6.9 */
value64 = (int64_t) vcpu->vm->arch_vm.msr_bitmap;
exec_vmwrite64(VMX_MSR_BITMAP_FULL, value64);
pr_dbg("VMX_MSR_BITMAP: 0x%016llx ", value64);
vcpu->guest_msrs = (uint64_t *)calloc(msrs_count, sizeof(uint64_t));
ASSERT(vcpu->guest_msrs != NULL, "");
memset(vcpu->guest_msrs, 0, msrs_count * sizeof(uint64_t));
}
int rdmsr_handler(struct vcpu *vcpu)
{
uint32_t msr;
uint64_t v = 0;
uint32_t id;
int cur_context = vcpu->arch_vcpu.cur_context;
/* Read the msr value */
msr = vcpu->arch_vcpu.contexts[cur_context].guest_cpu_regs.regs.rcx;
/* Do the required processing for each msr case */
switch (msr) {
case MSR_IA32_TSC_DEADLINE:
{
v = vcpu->guest_msrs[IDX_TSC_DEADLINE];
break;
}
case MSR_IA32_MTRR_CAP:
case MSR_IA32_MTRR_DEF_TYPE:
case MSR_IA32_MTRR_PHYSBASE_0 ... MSR_IA32_MTRR_PHYSMASK_9:
case MSR_IA32_MTRR_FIX64K_00000 ... MSR_IA32_MTRR_FIX4K_F8000:
{
vcpu_inject_gp(vcpu);
break;
}
/* following MSR not emulated now just left for future */
case MSR_IA32_SYSENTER_CS:
{
v = exec_vmread(VMX_GUEST_IA32_SYSENTER_CS);
break;
}
case MSR_IA32_SYSENTER_ESP:
{
v = exec_vmread(VMX_GUEST_IA32_SYSENTER_ESP);
break;
}
case MSR_IA32_SYSENTER_EIP:
{
v = exec_vmread(VMX_GUEST_IA32_SYSENTER_EIP);
break;
}
case MSR_IA32_TSC_AUX:
{
v = vcpu->arch_vcpu.msr_tsc_aux;
break;
}
case MSR_IA32_TIME_STAMP_COUNTER:
{
/* Read the host TSC value */
CPU_RDTSCP_EXECUTE(&v, &id);
/* Add the TSC_offset to host TSC and return the value */
v += exec_vmread64(VMX_TSC_OFFSET_FULL);
break;
}
case MSR_IA32_APIC_BASE:
{
bool ret;
/* Read APIC base */
vlapic_rdmsr(vcpu, msr, &v, &ret);
break;
}
default:
{
pr_warn("rdmsr: %lx should not come here!", msr);
v = 0;
break;
}
}
/* Store the MSR contents in RAX and RDX */
vcpu->arch_vcpu.contexts[cur_context].guest_cpu_regs.regs.rax =
v & 0xffffffff;
vcpu->arch_vcpu.contexts[cur_context].guest_cpu_regs.regs.rdx = v >> 32;
TRACE_2L(TRC_VMEXIT_RDMSR, msr, v);
return 0;
}
int wrmsr_handler(struct vcpu *vcpu)
{
uint32_t msr;
uint64_t v;
struct run_context *cur_context =
&vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context];
/* Read the MSR ID */
msr = cur_context->guest_cpu_regs.regs.rcx;
/* Get the MSR contents */
v = (((uint64_t) cur_context->guest_cpu_regs.regs.rdx) << 32) |
((uint64_t) cur_context->guest_cpu_regs.regs.rax);
/* Do the required processing for each msr case */
switch (msr) {
case MSR_IA32_TSC_DEADLINE:
{
bool ret;
/* Write APIC base */
vlapic_wrmsr(vcpu, msr, v, &ret);
vcpu->guest_msrs[IDX_TSC_DEADLINE] = v;
break;
}
case MSR_IA32_MTRR_CAP:
case MSR_IA32_MTRR_DEF_TYPE:
case MSR_IA32_MTRR_PHYSBASE_0 ... MSR_IA32_MTRR_PHYSMASK_9:
case MSR_IA32_MTRR_FIX64K_00000 ... MSR_IA32_MTRR_FIX4K_F8000:
{
vcpu_inject_gp(vcpu);
break;
}
/* following MSR not emulated now just left for future */
case MSR_IA32_SYSENTER_CS:
{
exec_vmwrite(VMX_GUEST_IA32_SYSENTER_CS, v);
break;
}
case MSR_IA32_SYSENTER_ESP:
{
exec_vmwrite(VMX_GUEST_IA32_SYSENTER_ESP, v);
break;
}
case MSR_IA32_SYSENTER_EIP:
{
exec_vmwrite(VMX_GUEST_IA32_SYSENTER_EIP, v);
break;
}
case MSR_IA32_GS_BASE:
{
exec_vmwrite(VMX_GUEST_GS_BASE, v);
break;
}
case MSR_IA32_TSC_AUX:
{
vcpu->arch_vcpu.msr_tsc_aux = v;
break;
}
case MSR_IA32_APIC_BASE:
{
bool ret;
/* Write APIC base */
vlapic_wrmsr(vcpu, msr, v, &ret);
break;
}
default:
{
ASSERT(0, "wrmsr: %lx should not come here!", msr);
msr_write(msr, v);
break;
}
}
TRACE_2L(TRC_VMEXIT_WRMSR, msr, v);
return 0;
}

950
arch/x86/guest/vpic.c Normal file
View File

@@ -0,0 +1,950 @@
/*-
* Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
* Copyright (c) 2017 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#define pr_fmt(fmt) "vpic: " fmt
#include <hypervisor.h>
#include <hv_lib.h>
#include <acrn_common.h>
#include <hv_arch.h>
#include <hv_debug.h>
#define VPIC_LOCK_INIT(vpic) spinlock_init(&((vpic)->lock))
#define VPIC_LOCK(vpic) spinlock_obtain(&((vpic)->lock))
#define VPIC_UNLOCK(vpic) spinlock_release(&((vpic)->lock))
/* TODO: add spinlock_locked support? */
/*#define VPIC_LOCKED(vpic) spinlock_locked(&((vpic)->lock))*/
#define vm_pic(vm) (vm->vpic)
#define true 1
#define false 0
#define ACRN_DBG_PIC 6
enum irqstate {
IRQSTATE_ASSERT,
IRQSTATE_DEASSERT,
IRQSTATE_PULSE
};
struct pic {
bool ready;
int icw_num;
int rd_cmd_reg;
bool aeoi;
bool poll;
bool rotate;
bool sfn; /* special fully-nested mode */
int irq_base;
uint8_t request; /* Interrupt Request Register (IIR) */
uint8_t service; /* Interrupt Service (ISR) */
uint8_t mask; /* Interrupt Mask Register (IMR) */
uint8_t smm; /* special mask mode */
int acnt[8]; /* sum of pin asserts and deasserts */
int lowprio; /* lowest priority irq */
bool intr_raised;
uint8_t elc;
};
struct vpic {
struct vm *vm;
spinlock_t lock;
struct pic pic[2];
};
/*
* Loop over all the pins in priority order from highest to lowest.
*/
#define PIC_PIN_FOREACH(pinvar, pic, tmpvar) \
for (tmpvar = 0, pinvar = (pic->lowprio + 1) & 0x7; \
tmpvar < 8; \
tmpvar++, pinvar = (pinvar + 1) & 0x7)
static void vpic_set_pinstate(struct vpic *vpic, int pin, bool newstate);
static inline bool master_pic(struct vpic *vpic, struct pic *pic)
{
if (pic == &vpic->pic[0])
return true;
else
return false;
}
static inline int vpic_get_highest_isrpin(struct pic *pic)
{
int bit, pin;
int i;
PIC_PIN_FOREACH(pin, pic, i) {
bit = (1 << pin);
if (pic->service & bit) {
/*
* An IS bit that is masked by an IMR bit will not be
* cleared by a non-specific EOI in Special Mask Mode.
*/
if (pic->smm && (pic->mask & bit) != 0)
continue;
else
return pin;
}
}
return -1;
}
static inline int vpic_get_highest_irrpin(struct pic *pic)
{
int serviced;
int bit, pin, tmp;
/*
* In 'Special Fully-Nested Mode' when an interrupt request from
* a slave is in service, the slave is not locked out from the
* master's priority logic.
*/
serviced = pic->service;
if (pic->sfn)
serviced &= ~(1 << 2);
/*
* In 'Special Mask Mode', when a mask bit is set in OCW1 it inhibits
* further interrupts at that level and enables interrupts from all
* other levels that are not masked. In other words the ISR has no
* bearing on the levels that can generate interrupts.
*/
if (pic->smm)
serviced = 0;
PIC_PIN_FOREACH(pin, pic, tmp) {
bit = 1 << pin;
/*
* If there is already an interrupt in service at the same
* or higher priority then bail.
*/
if ((serviced & bit) != 0)
break;
/*
* If an interrupt is asserted and not masked then return
* the corresponding 'pin' to the caller.
*/
if ((pic->request & bit) != 0 && (pic->mask & bit) == 0)
return pin;
}
return -1;
}
static void vpic_notify_intr(struct vpic *vpic)
{
struct pic *pic;
int pin;
/*
* First check the slave.
*/
pic = &vpic->pic[1];
pin = vpic_get_highest_irrpin(pic);
if (!pic->intr_raised && pin != -1) {
dev_dbg(ACRN_DBG_PIC,
"pic slave notify pin = %d (imr 0x%x irr 0x%x isr 0x%x)\n",
pin, pic->mask, pic->request, pic->service);
/*
* Cascade the request from the slave to the master.
*/
pic->intr_raised = true;
vpic_set_pinstate(vpic, 2, true);
vpic_set_pinstate(vpic, 2, false);
} else {
dev_dbg(ACRN_DBG_PIC,
"pic slave no eligible interrupt (imr 0x%x irr 0x%x isr 0x%x)",
pic->mask, pic->request, pic->service);
}
/*
* Then check the master.
*/
pic = &vpic->pic[0];
pin = vpic_get_highest_irrpin(pic);
if (!pic->intr_raised && pin != -1) {
dev_dbg(ACRN_DBG_PIC,
"pic master notify pin = %d (imr 0x%x irr 0x%x isr 0x%x)\n",
pin, pic->mask, pic->request, pic->service);
/*
* From Section 3.6.2, "Interrupt Modes", in the
* MPtable Specification, Version 1.4
*
* PIC interrupts are routed to both the Local APIC
* and the I/O APIC to support operation in 1 of 3
* modes.
*
* 1. Legacy PIC Mode: the PIC effectively bypasses
* all APIC components. In this mode the local APIC is
* disabled and LINT0 is reconfigured as INTR to
* deliver the PIC interrupt directly to the CPU.
*
* 2. Virtual Wire Mode: the APIC is treated as a
* virtual wire which delivers interrupts from the PIC
* to the CPU. In this mode LINT0 is programmed as
* ExtINT to indicate that the PIC is the source of
* the interrupt.
*
* 3. Virtual Wire Mode via I/O APIC: PIC interrupts are
* fielded by the I/O APIC and delivered to the appropriate
* CPU. In this mode the I/O APIC input 0 is programmed
* as ExtINT to indicate that the PIC is the source of the
* interrupt.
*/
pic->intr_raised = true;
if (vpic->vm->vpic_wire_mode == VPIC_WIRE_INTR) {
struct vcpu *vcpu = vcpu_from_vid(vpic->vm, 0);
ASSERT(vcpu != NULL, "vm%d, vcpu0", vpic->vm->attr.id);
vcpu_inject_extint(vcpu);
} else {
vlapic_set_local_intr(vpic->vm, -1, APIC_LVT_LINT0);
/* notify vioapic pin0 if existing
* For vPIC + vIOAPIC mode, vpic master irq connected
* to vioapic pin0 (irq2)
* From MPSpec session 5.1
*/
vioapic_pulse_irq(vpic->vm, 0);
}
} else {
dev_dbg(ACRN_DBG_PIC,
"pic master no eligible interrupt (imr 0x%x irr 0x%x isr 0x%x)",
pic->mask, pic->request, pic->service);
}
}
static int vpic_icw1(__unused struct vpic *vpic, struct pic *pic, uint8_t val)
{
dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic icw1 0x%x\n",
vpic->vm, val);
pic->ready = false;
pic->icw_num = 1;
pic->request = 0;
pic->mask = 0;
pic->lowprio = 7;
pic->rd_cmd_reg = 0;
pic->poll = 0;
pic->smm = 0;
if ((val & ICW1_SNGL) != 0) {
dev_dbg(ACRN_DBG_PIC, "vpic cascade mode required\n");
return -1;
}
if ((val & ICW1_IC4) == 0) {
dev_dbg(ACRN_DBG_PIC, "vpic icw4 required\n");
return -1;
}
pic->icw_num++;
return 0;
}
static int vpic_icw2(__unused struct vpic *vpic, struct pic *pic, uint8_t val)
{
dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic icw2 0x%x\n",
vpic->vm, val);
pic->irq_base = val & 0xf8;
pic->icw_num++;
return 0;
}
static int vpic_icw3(__unused struct vpic *vpic, struct pic *pic,
__unused uint8_t val)
{
dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic icw3 0x%x\n",
vpic->vm, val);
pic->icw_num++;
return 0;
}
static int vpic_icw4(struct vpic *vpic, struct pic *pic, uint8_t val)
{
dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic icw4 0x%x\n",
vpic->vm, val);
if ((val & ICW4_8086) == 0) {
dev_dbg(ACRN_DBG_PIC,
"vpic microprocessor mode required\n");
return -1;
}
if ((val & ICW4_AEOI) != 0)
pic->aeoi = true;
if ((val & ICW4_SFNM) != 0) {
if (master_pic(vpic, pic)) {
pic->sfn = true;
} else {
dev_dbg(ACRN_DBG_PIC,
"Ignoring special fully nested mode on slave pic: %#x",
val);
}
}
pic->icw_num = 0;
pic->ready = true;
return 0;
}
bool vpic_is_pin_mask(struct vpic *vpic, uint8_t virt_pin)
{
struct pic *pic;
if (virt_pin < 8)
pic = &vpic->pic[0];
else if (virt_pin < 16) {
pic = &vpic->pic[1];
virt_pin -= 8;
} else
return true;
if (pic->mask & (1 << virt_pin))
return true;
else
return false;
}
static int vpic_ocw1(struct vpic *vpic, struct pic *pic, uint8_t val)
{
int pin, i, bit;
uint8_t old = pic->mask;
dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic ocw1 0x%x\n",
vpic->vm, val);
pic->mask = val & 0xff;
/* query and setup if pin/irq is for passthrough device */
PIC_PIN_FOREACH(pin, pic, i) {
bit = (1 << pin);
/* remap for active: interrupt mask -> unmask
* remap for deactive: when vIOAPIC take it over
*/
if (((pic->mask & bit) == 0) && (old & bit)) {
struct ptdev_intx_info intx;
/* master pic pin2 connect with slave pic,
* not device, so not need pt remap
*/
if ((pin == 2) && master_pic(vpic, pic))
continue;
intx.virt_pin = pin;
intx.vpin_src = PTDEV_VPIN_PIC;
if (!master_pic(vpic, pic))
intx.virt_pin += 8;
ptdev_intx_pin_remap(vpic->vm, &intx);
}
}
return 0;
}
static int vpic_ocw2(struct vpic *vpic, struct pic *pic, uint8_t val)
{
dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic ocw2 0x%x\n",
vpic->vm, val);
pic->rotate = ((val & OCW2_R) != 0);
if ((val & OCW2_EOI) != 0) {
int isr_bit;
if ((val & OCW2_SL) != 0) {
/* specific EOI */
isr_bit = val & 0x7;
} else {
/* non-specific EOI */
isr_bit = vpic_get_highest_isrpin(pic);
}
if (isr_bit != -1) {
pic->service &= ~(1 << isr_bit);
if (pic->rotate)
pic->lowprio = isr_bit;
}
/* if level ack PTDEV */
if (pic->elc & (1 << (isr_bit & 0x7))) {
ptdev_intx_ack(vpic->vm,
master_pic(vpic, pic) ? isr_bit : isr_bit + 8,
PTDEV_VPIN_PIC);
}
} else if ((val & OCW2_SL) != 0 && pic->rotate == true) {
/* specific priority */
pic->lowprio = val & 0x7;
}
return 0;
}
static int vpic_ocw3(__unused struct vpic *vpic, struct pic *pic, uint8_t val)
{
dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic ocw3 0x%x\n",
vpic->vm, val);
if (val & OCW3_ESMM) {
pic->smm = val & OCW3_SMM ? 1 : 0;
dev_dbg(ACRN_DBG_PIC, "%s pic special mask mode %s\n",
master_pic(vpic, pic) ? "master" : "slave",
pic->smm ? "enabled" : "disabled");
}
if (val & OCW3_RR) {
/* read register command */
pic->rd_cmd_reg = val & OCW3_RIS;
/* Polling mode */
pic->poll = ((val & OCW3_P) != 0);
}
return 0;
}
static void vpic_set_pinstate(struct vpic *vpic, int pin, bool newstate)
{
struct pic *pic;
int oldcnt, newcnt;
bool level;
ASSERT(pin >= 0 && pin < 16,
"vpic_set_pinstate: invalid pin number");
pic = &vpic->pic[pin >> 3];
oldcnt = pic->acnt[pin & 0x7];
if (newstate)
pic->acnt[pin & 0x7]++;
else
pic->acnt[pin & 0x7]--;
newcnt = pic->acnt[pin & 0x7];
if (newcnt < 0) {
pr_warn("pic pin%d: bad acnt %d\n", pin, newcnt);
}
level = ((vpic->pic[pin >> 3].elc & (1 << (pin & 0x7))) != 0);
if ((oldcnt == 0 && newcnt == 1) || (newcnt > 0 && level == true)) {
/* rising edge or level */
dev_dbg(ACRN_DBG_PIC, "pic pin%d: asserted\n", pin);
pic->request |= (1 << (pin & 0x7));
} else if (oldcnt == 1 && newcnt == 0) {
/* falling edge */
dev_dbg(ACRN_DBG_PIC, "pic pin%d: deasserted\n", pin);
if (level)
pic->request &= ~(1 << (pin & 0x7));
} else {
dev_dbg(ACRN_DBG_PIC,
"pic pin%d: %s, ignored, acnt %d\n",
pin, newstate ? "asserted" : "deasserted", newcnt);
}
vpic_notify_intr(vpic);
}
static int vpic_set_irqstate(struct vm *vm, int irq, enum irqstate irqstate)
{
struct vpic *vpic;
struct pic *pic;
if (irq < 0 || irq > 15)
return -EINVAL;
vpic = vm_pic(vm);
pic = &vpic->pic[irq >> 3];
if (pic->ready == false)
return 0;
VPIC_LOCK(vpic);
switch (irqstate) {
case IRQSTATE_ASSERT:
vpic_set_pinstate(vpic, irq, true);
break;
case IRQSTATE_DEASSERT:
vpic_set_pinstate(vpic, irq, false);
break;
case IRQSTATE_PULSE:
vpic_set_pinstate(vpic, irq, true);
vpic_set_pinstate(vpic, irq, false);
break;
default:
ASSERT(0, "vpic_set_irqstate: invalid irqstate");
}
VPIC_UNLOCK(vpic);
return 0;
}
/* hypervisor interface: assert/deassert/pulse irq */
int vpic_assert_irq(struct vm *vm, int irq)
{
return vpic_set_irqstate(vm, irq, IRQSTATE_ASSERT);
}
int vpic_deassert_irq(struct vm *vm, int irq)
{
return vpic_set_irqstate(vm, irq, IRQSTATE_DEASSERT);
}
int vpic_pulse_irq(struct vm *vm, int irq)
{
return vpic_set_irqstate(vm, irq, IRQSTATE_PULSE);
}
int vpic_set_irq_trigger(struct vm *vm, int irq, enum vpic_trigger trigger)
{
struct vpic *vpic;
if (irq < 0 || irq > 15)
return -EINVAL;
/*
* See comment in vpic_elc_handler. These IRQs must be
* edge triggered.
*/
if (trigger == LEVEL_TRIGGER) {
switch (irq) {
case 0:
case 1:
case 2:
case 8:
case 13:
return -EINVAL;
}
}
vpic = vm_pic(vm);
VPIC_LOCK(vpic);
if (trigger == LEVEL_TRIGGER)
vpic->pic[irq >> 3].elc |= 1 << (irq & 0x7);
else
vpic->pic[irq >> 3].elc &= ~(1 << (irq & 0x7));
VPIC_UNLOCK(vpic);
return 0;
}
int vpic_get_irq_trigger(struct vm *vm, int irq, enum vpic_trigger *trigger)
{
struct vpic *vpic;
if (irq < 0 || irq > 15)
return -EINVAL;
vpic = vm_pic(vm);
if (!vpic)
return -EINVAL;
if (vpic->pic[irq>>3].elc & (1 << (irq & 0x7)))
*trigger = LEVEL_TRIGGER;
else
*trigger = EDGE_TRIGGER;
return 0;
}
void vpic_pending_intr(struct vm *vm, int *vecptr)
{
struct vpic *vpic;
struct pic *pic;
int pin;
vpic = vm_pic(vm);
pic = &vpic->pic[0];
VPIC_LOCK(vpic);
pin = vpic_get_highest_irrpin(pic);
if (pin == 2) {
pic = &vpic->pic[1];
pin = vpic_get_highest_irrpin(pic);
}
/*
* If there are no pins active at this moment then return the spurious
* interrupt vector instead.
*/
if (pin == -1) {
*vecptr = -1;
VPIC_UNLOCK(vpic);
return;
}
ASSERT(pin >= 0 && pin <= 7, "invalid pin");
*vecptr = pic->irq_base + pin;
dev_dbg(ACRN_DBG_PIC, "Got pending vector 0x%x\n", *vecptr);
VPIC_UNLOCK(vpic);
}
static void vpic_pin_accepted(struct pic *pic, int pin)
{
pic->intr_raised = false;
if ((pic->elc & (1 << pin)) == 0) {
/*only used edge trigger mode*/
pic->request &= ~(1 << pin);
}
if (pic->aeoi == true) {
if (pic->rotate == true)
pic->lowprio = pin;
} else {
pic->service |= (1 << pin);
}
}
void vpic_intr_accepted(struct vm *vm, int vector)
{
struct vpic *vpic;
int pin;
vpic = vm_pic(vm);
VPIC_LOCK(vpic);
pin = vector & 0x7;
if ((vector & ~0x7) == vpic->pic[1].irq_base) {
vpic_pin_accepted(&vpic->pic[1], pin);
/*
* If this vector originated from the slave,
* accept the cascaded interrupt too.
*/
vpic_pin_accepted(&vpic->pic[0], 2);
} else {
vpic_pin_accepted(&vpic->pic[0], pin);
}
vpic_notify_intr(vpic);
VPIC_UNLOCK(vpic);
}
static int vpic_read(struct vpic *vpic, struct pic *pic,
int port, uint32_t *eax)
{
int pin;
VPIC_LOCK(vpic);
if (pic->poll) {
pic->poll = 0;
pin = vpic_get_highest_irrpin(pic);
if (pin >= 0) {
vpic_pin_accepted(pic, pin);
*eax = 0x80 | pin;
} else {
*eax = 0;
}
} else {
if (port & ICU_IMR_OFFSET) {
/* read interrupt mask register */
*eax = pic->mask;
} else {
if (pic->rd_cmd_reg == OCW3_RIS) {
/* read interrupt service register */
*eax = pic->service;
} else {
/* read interrupt request register */
*eax = pic->request;
}
}
}
VPIC_UNLOCK(vpic);
return 0;
}
static int vpic_write(struct vpic *vpic, struct pic *pic,
int port, uint32_t *eax)
{
int error;
uint8_t val;
error = 0;
val = *eax;
VPIC_LOCK(vpic);
if (port & ICU_IMR_OFFSET) {
switch (pic->icw_num) {
case 2:
error = vpic_icw2(vpic, pic, val);
break;
case 3:
error = vpic_icw3(vpic, pic, val);
break;
case 4:
error = vpic_icw4(vpic, pic, val);
break;
default:
error = vpic_ocw1(vpic, pic, val);
break;
}
} else {
if (val & (1 << 4))
error = vpic_icw1(vpic, pic, val);
if (pic->ready) {
if (val & (1 << 3))
error = vpic_ocw3(vpic, pic, val);
else
error = vpic_ocw2(vpic, pic, val);
}
}
if (pic->ready)
vpic_notify_intr(vpic);
VPIC_UNLOCK(vpic);
return error;
}
static int vpic_master_handler(struct vm *vm, bool in, int port, int bytes,
uint32_t *eax)
{
struct vpic *vpic;
struct pic *pic;
vpic = vm_pic(vm);
pic = &vpic->pic[0];
if (bytes != 1)
return -1;
if (in)
return vpic_read(vpic, pic, port, eax);
return vpic_write(vpic, pic, port, eax);
}
static uint32_t vpic_master_io_read(__unused struct vm_io_handler *hdlr,
struct vm *vm, ioport_t addr, size_t width)
{
uint32_t val = 0;
if (vpic_master_handler(vm, true, (int)addr, (int)width, &val) < 0)
pr_err("pic master read port 0x%x width=%d failed\n",
addr, width);
return val;
}
static void vpic_master_io_write(__unused struct vm_io_handler *hdlr,
struct vm *vm, ioport_t addr, size_t width, uint32_t v)
{
uint32_t val = v;
if (vpic_master_handler(vm, false, (int)addr, (int)width, &val) < 0)
pr_err("%s: write port 0x%x width=%d value 0x%x failed\n",
__func__, addr, width, val);
}
static int vpic_slave_handler(struct vm *vm, bool in, int port, int bytes,
uint32_t *eax)
{
struct vpic *vpic;
struct pic *pic;
vpic = vm_pic(vm);
pic = &vpic->pic[1];
if (bytes != 1)
return -1;
if (in)
return vpic_read(vpic, pic, port, eax);
return vpic_write(vpic, pic, port, eax);
}
static uint32_t vpic_slave_io_read(__unused struct vm_io_handler *hdlr,
struct vm *vm, ioport_t addr, size_t width)
{
uint32_t val = 0;
if (vpic_slave_handler(vm, true, (int)addr, (int)width, &val) < 0)
pr_err("pic slave read port 0x%x width=%d failed\n",
addr, width);
return val;
}
static void vpic_slave_io_write(__unused struct vm_io_handler *hdlr,
struct vm *vm, ioport_t addr, size_t width, uint32_t v)
{
uint32_t val = v;
if (vpic_slave_handler(vm, false, (int)addr, (int)width, &val) < 0)
pr_err("%s: write port 0x%x width=%d value 0x%x failed\n",
__func__, addr, width, val);
}
static int vpic_elc_handler(struct vm *vm, bool in, int port, int bytes,
uint32_t *eax)
{
struct vpic *vpic;
bool is_master;
vpic = vm_pic(vm);
is_master = (port == IO_ELCR1);
if (bytes != 1)
return -1;
VPIC_LOCK(vpic);
if (in) {
if (is_master)
*eax = vpic->pic[0].elc;
else
*eax = vpic->pic[1].elc;
} else {
/*
* For the master PIC the cascade channel (IRQ2), the
* heart beat timer (IRQ0), and the keyboard
* controller (IRQ1) cannot be programmed for level
* mode.
*
* For the slave PIC the real time clock (IRQ8) and
* the floating point error interrupt (IRQ13) cannot
* be programmed for level mode.
*/
if (is_master)
vpic->pic[0].elc = (*eax & 0xf8);
else
vpic->pic[1].elc = (*eax & 0xde);
}
VPIC_UNLOCK(vpic);
return 0;
}
static uint32_t vpic_elc_io_read(__unused struct vm_io_handler *hdlr,
struct vm *vm, ioport_t addr, size_t width)
{
uint32_t val = 0;
if (vpic_elc_handler(vm, true, (int)addr, (int)width, &val) < 0)
pr_err("pic elc read port 0x%x width=%d failed", addr, width);
return val;
}
static void vpic_elc_io_write(__unused struct vm_io_handler *hdlr,
struct vm *vm, ioport_t addr, size_t width, uint32_t v)
{
uint32_t val = v;
if (vpic_elc_handler(vm, false, (int)addr, (int)width, &val) < 0)
pr_err("%s: write port 0x%x width=%d value 0x%x failed\n",
__func__, addr, width, val);
}
void vpic_register_io_handler(struct vm *vm)
{
struct vm_io_range master_range = {
.flags = IO_ATTR_RW,
.base = 0x20,
.len = 2
};
struct vm_io_range slave_range = {
.flags = IO_ATTR_RW,
.base = 0xa0,
.len = 2
};
struct vm_io_range elcr_range = {
.flags = IO_ATTR_RW,
.base = 0x4d0,
.len = 2
};
register_io_emulation_handler(vm, &master_range,
&vpic_master_io_read, &vpic_master_io_write);
register_io_emulation_handler(vm, &slave_range,
&vpic_slave_io_read, &vpic_slave_io_write);
register_io_emulation_handler(vm, &elcr_range,
&vpic_elc_io_read, &vpic_elc_io_write);
}
void *vpic_init(struct vm *vm)
{
struct vpic *vpic;
vpic_register_io_handler(vm);
vpic = malloc(sizeof(struct vpic));
ASSERT(vpic != NULL, "");
vpic->vm = vm;
vpic->pic[0].mask = 0xff;
vpic->pic[1].mask = 0xff;
VPIC_LOCK_INIT(vpic);
return vpic;
}
void vpic_cleanup(struct vm *vm)
{
if (vm->vpic) {
free(vm->vpic);
vm->vpic = NULL;
}
}