diff --git a/hypervisor/Documentation/Trusty.txt b/hypervisor/Documentation/Trusty.txt new file mode 100644 index 000000000..8e2aeea4f --- /dev/null +++ b/hypervisor/Documentation/Trusty.txt @@ -0,0 +1,157 @@ + Trusty on ACRN +Overview +Trusty Architecture +Trusty specific Hypercalls +Trusty Boot flow +EPT Hierarchy + +******** +Overview +******** +Trusty is a set of software components supporting a Trusted Execution Environment (TEE). +Trusty consists of: + 1. An operating system (the Trusty OS) that runs on a processor intended to provide a TEE + 2. Drivers for the Android kernel (Linux) to facilitate communication with applications + running under the Trusty OS + 3. A set of libraries for Android/Linux systems software to facilitate communication with + trusted applications executed within the Trusty OS using the kernel drivers + +LK (Little Kernel) is a tiny operating system suited for small embedded devices, bootloaders, +and other environments where OS primitives like threads, mutexes, and timers are needed, but +there's a desire to keep things small and lightweight. LK has been chosen as the Trusty OS kernel. + + +******************* +Trusty Architecture +******************* + +---------------------------+ + |VMn | + | ...... | ++------------+ +---------------------------+ | +|VM0 | |VM1 | | +| | | +--------+ +--------+ | | +| | | | | | | | | +| SOS | | | Normal | | Secure | | | +| | | | World | | World | |-+ +| | | | | | | | +| | | +--------+ +--------+ | ++------------+ +---------------------------+ ++-------------------------------------------+ +| ACRN Hypervisor | ++-------------------------------------------+ ++-------------------------------------------+ +| HW | ++-------------------------------------------+ + +Note: Trusty OS is running in Secure World in the architecture above. + + +************************** +Trusty specific Hypercalls +************************** +1. HC_LAUNCH_TRUSTY + ->This Hypercall is used by UOSloader to request ACRN to launch Trusty. + ->The Trusty memory region range, entry point must be specified. + ->Hypervisor needs to save current vCPU contexts (Normal World). +2. HC_WORLD_SWITCH + ->Simulate ARM SMC (Secure Monitor Call, or SMC) instruction to do world switch. + ->Hypervisor needs to save current world vCPU contexts, and load next world vCPU contexts. + ->Update rdi, rsi, rdx, rbx to next world vCPU contexts. + +API +--- +1. hcall_launch_trusty(vm_t *vm); +2. hcall_world_switch(vm_t *vm); + + +**************** +Trusty Boot flow +**************** +Per design, AOSloader will trigger boot of Trusty. So the boot flow will be: + AOSloader --> ACRN --> Trusty --> ACRN --> AOSloader + +Detail: +1. UOSloader + 1.1 load and verify trusty image from virtual disk. + 1.2 allocate runtime memory for trusty. + 1.3 do ELF relocation of trusty image and get entry address. + 1.4 call HC_LAUNCH_TRUSTY with trusty memory base and entry address. +2. ACRN(HC_LAUNCH_TRUSTY) + 2.1 save World context for Normal World. + 2.2 init World context for Secure World(RIP, RSP, EPT, etc.). + 2.3 resume to Secure World. +3. Trusty + 3.1 booting up + 3.2 call HC_WORLD_SWITCH to switch back to Normal World if boot completed. +4. ACRN(HC_WORLD_SWITCH) + 4.1 save World context for the World which caused this vmexit(Secure World) + 4.2 restore World context for next World(Normal World(UOSloader)) + 4.3 resume to next World(UOSloader) +5. UOSloader + 5.1 continue to boot. + + +************* +EPT Hierarchy +************* +Per Trusty design, Trusty can access Normal World's memory, but Normal World cannot +access Secure World's memory. Hence it means Secure World EPTP page table hierarchy +must contain normal world GPA address space, while Trusty world's GPA address space +must be removed from the Normal world EPTP page table hierarchy. + +Design: +Put Secure World's GPA to very high position: 511G-512G. The PML4/PDPT for Trusty +World are separated from Normal World. PD/PT for low memory (<511G) are shared in +both Trusty World's EPT and Normal World's EPT. PD/PT for high memory (>=511G) are +valid for Trusty World's EPT only. + +Benefit: +This design will benefit the EPT changes of Normal World. There are requirement to +modify Normal World's EPT during runtime such as memory increasing, attribute +change, etc. If such behavior happened, only PD and PT for Normal World need to +be updated. + +ABSTRACT EPT hierarchy for 2 Worlds: +==================================================================== ================================================== +: Normal World : : Secure World : +: PML4 : : PML4 : +: +--------+ : : +--------+ : +: | | : : | | : +: | | : : PD | | : +: | | : : +-------+ | | : +: | | : : | | | | : +: | 0-512G |--+ : : | | +--| 0-512G | : +:EPTP -->+--------+ | : : | | | +--------+<-- EPTP : +: | PDPT : : | | PDPT | : +: | +--------+ : : | | +--------+ | : +: | | >=511G |---> Not present : : +-------+<--| >=511G | | : +: | |________| : : |________| | : +: | | | : : | | | : +: | | <511G |->+<----------------------------:--------:--------------| <511G | | : +: | | | | : : | | | : +: +-->+--------+ | PD PT : : +--------+<-+ : +: | ... ... : ================================================== +: | +-------+ +-------+ : +: | +-------+| +-------+| : +: | | || | || : +: | | || | || : +: | | PDE |--+ | || : +: | | || | | || : +: | | |+ | | |+ : +: +-->+-------+ +-->+-------+ : +: : +==================================================================== + +API +---- +/* +Create Secure World EPT hierarchy, construct new PML4/PDPT, reuse PD/PT parse from +vm->arch_vm->ept + +Parameters: + vm: VM with 2 Worlds + gpa: original gpa allocated from vSBL + size: LK size(16M by default) + rebase_offset: rebase the gpa to offset xxx(511G_OFFSET) +*/ +int create_secure_world_ept(vm_t *vm, uint64_t gpa, uint64_t size, uint64_t rebase_offset) diff --git a/hypervisor/MAINTAINERS b/hypervisor/MAINTAINERS new file mode 100644 index 000000000..c5f7055fc --- /dev/null +++ b/hypervisor/MAINTAINERS @@ -0,0 +1,35 @@ +ACRN Hypervisor Maintainers +=========================== + +This file provides information about the primary maintainers for +ACRN Hypervisor Maintainers. + +In general, you should not privately email the maintainer. You should +email the acrn-dev list, but you can also Cc the maintainer. + +Descriptions of section entries: + + L: Mailing list that is relevant to this area (default is acrn-dev) + Patches and questions should be sent to the email list. + M: Cc address for patches and questions (ie, the package maintainer) + W: Web-page with status/info + T: SCM tree type and location. Type is one of: git, svn. + S: Status, one of the following: + Supported: Someone is actually paid to look after this. + Maintained: Someone actually looks after it. + Odd Fixes: It has a maintainer but they don't have time to do + much other than throw the odd patch in. See below. + Orphan: No current maintainer [but maybe you could take the + role as you write your new code]. + Obsolete: Old code. Something tagged obsolete generally means + it has been replaced by a better system and you + should be using that. + +Maintainers List +---------------- +W: N/A +S: Supported +L: https://lists.projectacrn.org/g/acrn-dev +T: git - https://github.com/projectacrn/acrn-hypervisor.git +M: Eddie Dong +M: Jason Chen diff --git a/hypervisor/Makefile b/hypervisor/Makefile new file mode 100644 index 000000000..9d2e3b072 --- /dev/null +++ b/hypervisor/Makefile @@ -0,0 +1,223 @@ +# +# ACRN Hypervisor +# + + +MAJOR_VERSION=0 +MINOR_VERSION=1 + +RELEASE ?= 0 + +GCC_MAJOR=$(shell echo __GNUC__ | $(CC) -E -x c - | tail -n 1) +GCC_MINOR=$(shell echo __GNUC_MINOR__ | $(CC) -E -x c - | tail -n 1) + +#enable stack overflow check +STACK_PROTECTOR := 1 + +BASEDIR := $(shell pwd) +PLATFORM ?= sbl +HV_OBJDIR ?= $(CURDIR)/build +HV_FILE := acrn + +CFLAGS += -Wall -W +CFLAGS += -ffunction-sections -fdata-sections +CFLAGS += -fshort-wchar -ffreestanding +CFLAGS += -m64 +CFLAGS += -mno-red-zone +CFLAGS += -static -nostdinc -nostdlib -fno-common + +ifdef STACK_PROTECTOR +ifeq (true, $(shell [ $(GCC_MAJOR) -gt 4 ] && echo true)) +CFLAGS += -fstack-protector-strong +else +ifeq (true, $(shell [ $(GCC_MAJOR) -eq 4 ] && [ $(GCC_MINOR) -ge 9 ] && echo true)) +CFLAGS += -fstack-protector-strong +else +CFLAGS += -fstack-protector +endif +endif +CFLAGS += -DSTACK_PROTECTOR +endif + +ASFLAGS += -m64 -nostdinc -nostdlib + +LDFLAGS += -Wl,--gc-sections -static -nostartfiles -nostdlib +LDFLAGS += -Wl,-n,-z,max-page-size=0x1000 +LDFLAGS += -Wl,-z,noexecstack + +ARCH_CFLAGS += -gdwarf-2 -O0 +ARCH_ASFLAGS += -gdwarf-2 -DASSEMBLER=1 +ARCH_ARFLAGS += +ARCH_LDFLAGS += + +ARCH_LDSCRIPT = $(HV_OBJDIR)/link_ram.ld +ARCH_LDSCRIPT_IN = bsp/ld/link_ram.ld.in + +INCLUDE_PATH += include +INCLUDE_PATH += include/lib +INCLUDE_PATH += include/common +INCLUDE_PATH += include/arch/x86 +INCLUDE_PATH += include/arch/x86/guest +INCLUDE_PATH += include/debug +INCLUDE_PATH += include/public +INCLUDE_PATH += include/common +INCLUDE_PATH += bsp/include +INCLUDE_PATH += bsp/$(PLATFORM)/include/bsp +INCLUDE_PATH += boot/include + +CC = gcc +AS = as +AR = ar +LD = gcc +POSTLD = objcopy + +D_SRCS += debug/dump.c +D_SRCS += debug/logmsg.c +D_SRCS += debug/shell_internal.c +D_SRCS += debug/shell_public.c +D_SRCS += debug/vuart.c +D_SRCS += debug/serial.c +D_SRCS += debug/uart16550.c +D_SRCS += debug/console.c +D_SRCS += debug/sbuf.c +C_SRCS += debug/printf.c +D_SRCS += boot/acpi.c +C_SRCS += boot/dmar_parse.c +C_SRCS += arch/x86/ioapic.c +C_SRCS += arch/x86/intr_lapic.c +S_SRCS += arch/x86/cpu_secondary.S +C_SRCS += arch/x86/cpu.c +C_SRCS += arch/x86/softirq.c +C_SRCS += arch/x86/cpuid.c +C_SRCS += arch/x86/mmu.c +C_SRCS += arch/x86/notify.c +C_SRCS += arch/x86/intr_main.c +C_SRCS += arch/x86/vtd.c +C_SRCS += arch/x86/gdt.c +S_SRCS += arch/x86/cpu_primary.S +S_SRCS += arch/x86/idt.S +C_SRCS += arch/x86/irq.c +C_SRCS += arch/x86/timer.c +C_SRCS += arch/x86/ept.c +S_SRCS += arch/x86/vmx_asm.S +C_SRCS += arch/x86/io.c +C_SRCS += arch/x86/interrupt.c +C_SRCS += arch/x86/vmexit.c +C_SRCS += arch/x86/vmx.c +C_SRCS += arch/x86/assign.c +C_SRCS += arch/x86/guest/vcpu.c +C_SRCS += arch/x86/guest/vm.c +C_SRCS += arch/x86/guest/instr_emul_wrapper.c +C_SRCS += arch/x86/guest/vlapic.c +C_SRCS += arch/x86/guest/guest.c +C_SRCS += arch/x86/guest/vmcall.c +C_SRCS += arch/x86/guest/vpic.c +C_SRCS += arch/x86/guest/vmsr.c +C_SRCS += arch/x86/guest/vioapic.c +C_SRCS += arch/x86/guest/instr_emul.c +C_SRCS += lib/spinlock.c +C_SRCS += lib/udelay.c +C_SRCS += lib/strnlen.c +C_SRCS += lib/memchr.c +C_SRCS += lib/stdlib.c +C_SRCS += lib/memcpy.c +C_SRCS += lib/strtol.c +C_SRCS += lib/mdelay.c +C_SRCS += lib/div.c +C_SRCS += lib/strchr.c +C_SRCS += lib/strcpy.c +C_SRCS += lib/memset.c +C_SRCS += lib/mem_mgt.c +C_SRCS += lib/strncpy.c +C_SRCS += lib/crypto/tinycrypt/hmac.c +C_SRCS += lib/crypto/tinycrypt/sha256.c +C_SRCS += lib/crypto/hkdf.c +C_SRCS += common/hv_main.c +C_SRCS += common/hypercall.c +C_SRCS += common/schedule.c +C_SRCS += common/vm_load.c + +ifdef STACK_PROTECTOR +C_SRCS += common/stack_protector.c +endif + +C_SRCS += bsp/$(PLATFORM)/vm_description.c +C_SRCS += bsp/$(PLATFORM)/$(PLATFORM).c + +ifeq ($(PLATFORM),uefi) +C_SRCS += bsp/$(PLATFORM)/cmdline.c +endif + +C_OBJS := $(patsubst %.c,$(HV_OBJDIR)/%.o,$(C_SRCS)) +ifeq ($(RELEASE),0) +C_OBJS += $(patsubst %.c,$(HV_OBJDIR)/%.o,$(D_SRCS)) +CFLAGS += -DHV_DEBUG +endif +S_OBJS := $(patsubst %.S,$(HV_OBJDIR)/%.o,$(S_SRCS)) + +DISTCLEAN_OBJS := $(shell find $(BASEDIR) -name '*.o') +VERSION := bsp/$(PLATFORM)/include/bsp/version.h + +.PHONY: all +all: $(VERSION) $(HV_OBJDIR)/$(HV_FILE).32.out $(HV_OBJDIR)/$(HV_FILE).bin + rm -f $(VERSION) + +ifeq ($(PLATFORM), uefi) +all: efi +.PHONY: efi +efi: $(HV_OBJDIR)/$(HV_FILE).bin + echo "building hypervisor as EFI executable..." + make -C bsp/uefi/efi HV_OBJDIR=$(HV_OBJDIR) RELEASE=$(RELEASE) + +install: efi + make -C bsp/uefi/efi HV_OBJDIR=$(HV_OBJDIR) RELEASE=$(RELEASE) install +endif + +$(HV_OBJDIR)/$(HV_FILE).32.out: $(HV_OBJDIR)/$(HV_FILE).out + $(POSTLD) -S --section-alignment=0x1000 -O elf32-i386 $< $@ + +$(HV_OBJDIR)/$(HV_FILE).bin: $(HV_OBJDIR)/$(HV_FILE).out + $(POSTLD) -O binary $< $(HV_OBJDIR)/$(HV_FILE).bin + +$(HV_OBJDIR)/$(HV_FILE).out: $(C_OBJS) $(S_OBJS) + $(CC) -E -x c $(patsubst %, -I%, $(INCLUDE_PATH)) $(ARCH_LDSCRIPT_IN) | grep -v '^#' > $(ARCH_LDSCRIPT) + $(LD) -Wl,-Map=$(HV_OBJDIR)/$(HV_FILE).map -o $@ $(LDFLAGS) $(ARCH_LDFLAGS) -T$(ARCH_LDSCRIPT) $^ + +.PHONY: clean +clean: + rm -f $(C_OBJS) + rm -f $(S_OBJS) + rm -f $(VERSION) + rm -rf $(HV_OBJDIR) + +.PHONY: distclean +distclean: + rm -f $(DISTCLEAN_OBJS) + rm -f $(C_OBJS) + rm -f $(S_OBJS) + rm -f $(VERSION) + rm -rf $(HV_OBJDIR) + rm -f tags TAGS cscope.files cscope.in.out cscope.out cscope.po.out GTAGS GPATH GRTAGS GSYMS + +PHONY: (VERSION) +$(VERSION): + touch $(VERSION) + @COMMIT=`git rev-parse --verify --short HEAD 2>/dev/null`;\ + DIRTY=`git diff-index --name-only HEAD`;\ + if [ -n "$$DIRTY" ];then PATCH="$$COMMIT-dirty";else PATCH="$$COMMIT";fi;\ + TIME=`date "+%Y%m%d"`;\ + cat license_header > $(VERSION);\ + echo "#define HV_MAJOR_VERSION $(MAJOR_VERSION)" >> $(VERSION);\ + echo "#define HV_MINOR_VERSION $(MINOR_VERSION)" >> $(VERSION);\ + echo "#define HV_BUILD_VERSION "\""$$PATCH"\""" >> $(VERSION);\ + echo "#define HV_BUILD_TIME "\""$$TIME"\""" >> $(VERSION);\ + echo "#define HV_BUILD_USER "\""$(USER)"\""" >> $(VERSION) + +$(HV_OBJDIR)/%.o: %.c + [ ! -e $@ ] && mkdir -p $(dir $@); \ + $(CC) $(patsubst %, -I%, $(INCLUDE_PATH)) -I. -c $(CFLAGS) $(ARCH_CFLAGS) $< -o $@ + +$(HV_OBJDIR)/%.o: %.S + [ ! -e $@ ] && mkdir -p $(dir $@); \ + $(CC) $(patsubst %, -I%, $(INCLUDE_PATH)) -I. $(ASFLAGS) $(ARCH_ASFLAGS) -c $< -o $@ + diff --git a/hypervisor/README.rst b/hypervisor/README.rst new file mode 100644 index 000000000..0948e4ecb --- /dev/null +++ b/hypervisor/README.rst @@ -0,0 +1,25 @@ +Embedded-Hypervisor +################### + +This open source embedded hypervisor defines a software architecture for +running multiple software subsystems managed securely on a consolidated +system (by means of a virtual machine manager), and defines a reference +framework Device Model implementation for devices emulation + +This embedded hypervisor is type-1 reference hypervisor, running +directly on the system hardware. It can be used for building software +defined cockpit (SDC) or In-Vehicle Experience (IVE) solutions running +on Intel Architecture Apollo Lake platforms. As a reference +implementation, it provides the basis for embedded hypervisor vendors to +build solutions with an open source reference I/O mediation solution, +and provides auto makers a reference software stack for SDC usage. + +This embedded hypervisor is able to support both Linux* and Android* as +a Guest OS, managed by the hypervisor, where applications can run. + +This embedded hypervisor is a partitioning hypervisor reference stack, +also suitable for non-automotive IoT & embedded device solutions. It +will be addressing the gap that currently exists between datacenter +hypervisors, hard partitioning hypervisors, and select industrial +applications. Extending the scope of this open source embedded +hypervisor relies on the involvement of community developers like you! diff --git a/hypervisor/arch/x86/assign.c b/hypervisor/arch/x86/assign.c new file mode 100644 index 000000000..a6b9f2b00 --- /dev/null +++ b/hypervisor/arch/x86/assign.c @@ -0,0 +1,1015 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +#define ACTIVE_FLAG 0x1 /* any non zero should be okay */ +#define INTX_ID_IOAPIC (0 << 23) +#define INTX_ID_PIC (1 << 23) + +/* SOFTIRQ_DEV_ASSIGN list for all CPUs */ +static struct list_head softirq_dev_entry_list; + +/* + * entry could both be in ptdev_list and softirq_dev_entry_list. + * When release entry, we need make sure entry deleted from both + * lists. We have to require two locks and the lock sequence is: + * vm->ptdev_lock + * softirq_dev_lock + */ +static spinlock_t softirq_dev_lock; + +static inline uint32_t +entry_id_from_msix(uint16_t vbdf, int8_t index) +{ + uint32_t id = index; + + id = vbdf | (id << 16) | (PTDEV_INTR_MSI << 24); + return id; +} + +static inline uint32_t +entry_id_from_intx(uint8_t vpin, enum ptdev_vpin_source vpin_src) +{ + uint32_t id = vpin; + uint32_t src_id = + vpin_src == PTDEV_VPIN_IOAPIC ? INTX_ID_IOAPIC : INTX_ID_PIC; + + id = id | src_id | (PTDEV_INTR_INTX << 24); + return id; +} + +static inline uint32_t +entry_id(struct ptdev_remapping_info *entry) +{ + uint32_t id; + + if (entry->type == PTDEV_INTR_INTX) + id = entry_id_from_intx(entry->intx.virt_pin, + entry->intx.vpin_src); + else + id = entry_id_from_msix(entry->virt_bdf, + entry->msi.msix_entry_index); + + return id; +} + +static inline bool +entry_is_active(struct ptdev_remapping_info *entry) +{ + return atomic_load_acq_int(&entry->active) == ACTIVE_FLAG; +} + +/* require ptdev_lock protect */ +static inline struct ptdev_remapping_info * +_get_remapping_entry(struct vm *vm, uint32_t id) +{ + struct ptdev_remapping_info *entry; + struct list_head *pos; + + list_for_each(pos, &vm->ptdev_list) { + entry = list_entry(pos, struct ptdev_remapping_info, + entry_node); + if (entry_id(entry) == id) + return entry; + } + + return NULL; +} + +static inline struct ptdev_remapping_info * +get_remapping_entry(struct vm *vm, uint32_t id) +{ + struct ptdev_remapping_info *entry; + + spinlock_obtain(&vm->ptdev_lock); + entry = _get_remapping_entry(vm, id); + spinlock_release(&vm->ptdev_lock); + return entry; +} + +static void ptdev_enqueue_softirq(struct ptdev_remapping_info *entry) +{ + spinlock_rflags; + /* enqueue request in order, SOFTIRQ_DEV_ASSIGN will pickup */ + spinlock_irqsave_obtain(&softirq_dev_lock); + + /* avoid adding recursively */ + list_del(&entry->softirq_node); + /* TODO: assert if entry already in list */ + list_add_tail(&entry->softirq_node, + &softirq_dev_entry_list); + spinlock_irqrestore_release(&softirq_dev_lock); + raise_softirq(SOFTIRQ_DEV_ASSIGN); +} + +static struct ptdev_remapping_info* +ptdev_dequeue_softirq(void) +{ + struct ptdev_remapping_info *entry = NULL; + + spinlock_rflags; + spinlock_irqsave_obtain(&softirq_dev_lock); + + if (!list_empty(&softirq_dev_entry_list)) { + entry = get_first_item(&softirq_dev_entry_list, + struct ptdev_remapping_info, softirq_node); + list_del_init(&entry->softirq_node); + } + + spinlock_irqrestore_release(&softirq_dev_lock); + return entry; +} + +/* interrupt context */ +static int ptdev_interrupt_handler(__unused int irq, void *data) +{ + struct ptdev_remapping_info *entry = + (struct ptdev_remapping_info *) data; + + ptdev_enqueue_softirq(entry); + return 0; +} + +static void +ptdev_update_irq_handler(struct vm *vm, struct ptdev_remapping_info *entry) +{ + int phys_irq = dev_to_irq(entry->node); + + if (entry->type == PTDEV_INTR_MSI) { + /* all other MSI and normal maskable */ + update_irq_handler(phys_irq, common_handler_edge); + } + /* update irq handler for IOAPIC */ + if ((entry->type == PTDEV_INTR_INTX) + && (entry->intx.vpin_src == PTDEV_VPIN_IOAPIC)) { + uint64_t rte; + bool trigger_lvl = false; + + /* VPIN_IOAPIC src means we have vioapic enabled */ + vioapic_get_rte(vm, entry->intx.virt_pin, &rte); + if ((rte & IOAPIC_RTE_TRGRMOD) == IOAPIC_RTE_TRGRLVL) + trigger_lvl = true; + + if (trigger_lvl) + update_irq_handler(phys_irq, common_dev_handler_level); + else + update_irq_handler(phys_irq, common_handler_edge); + } + /* update irq handler for PIC */ + if ((entry->type == PTDEV_INTR_INTX) && (phys_irq < NR_LEGACY_IRQ) + && (entry->intx.vpin_src == PTDEV_VPIN_PIC)) { + enum vpic_trigger trigger; + + /* VPIN_PIC src means we have vpic enabled */ + vpic_get_irq_trigger(vm, entry->intx.virt_pin, &trigger); + if (trigger == LEVEL_TRIGGER) + update_irq_handler(phys_irq, common_dev_handler_level); + else + update_irq_handler(phys_irq, common_handler_edge); + } +} + +/* require ptdev_lock protect */ +static struct ptdev_remapping_info * +alloc_entry(struct vm *vm, enum ptdev_intr_type type) +{ + struct ptdev_remapping_info *entry; + + /* allocate */ + entry = calloc(1, sizeof(*entry)); + ASSERT(entry, "alloc memory failed"); + entry->type = type; + entry->vm = vm; + atomic_clear_int(&entry->active, ACTIVE_FLAG); + list_add(&entry->entry_node, &vm->ptdev_list); + + return entry; +} + +/* require ptdev_lock protect */ +static void +release_entry(struct ptdev_remapping_info *entry) +{ + spinlock_rflags; + + /* remove entry from ptdev_list */ + list_del_init(&entry->entry_node); + + /* + * remove entry from softirq list.the ptdev_lock + * is required before calling release_entry. + */ + spinlock_irqsave_obtain(&softirq_dev_lock); + list_del_init(&entry->softirq_node); + spinlock_irqrestore_release(&softirq_dev_lock); + + free(entry); +} + +/* require ptdev_lock protect */ +static void +release_all_entry(struct vm *vm) +{ + struct ptdev_remapping_info *entry; + struct list_head *pos, *tmp; + + list_for_each_safe(pos, tmp, &vm->ptdev_list) { + entry = list_entry(pos, struct ptdev_remapping_info, + entry_node); + release_entry(entry); + } +} + +/* active intr with irq registering */ +static struct ptdev_remapping_info * +ptdev_activate_entry(struct ptdev_remapping_info *entry, int phys_irq, + bool lowpri) +{ + struct dev_handler_node *node; + + /* register and allocate host vector/irq */ + node = normal_register_handler(phys_irq, ptdev_interrupt_handler, + (void *)entry, true, lowpri, "dev assign"); + + ASSERT(node != NULL, "dev register failed"); + entry->node = node; + + atomic_set_int(&entry->active, ACTIVE_FLAG); + return entry; +} + +static void +ptdev_deactivate_entry(struct ptdev_remapping_info *entry) +{ + spinlock_rflags; + + atomic_clear_int(&entry->active, ACTIVE_FLAG); + + unregister_handler_common(entry->node); + entry->node = NULL; + + /* remove from softirq list if added */ + spinlock_irqsave_obtain(&softirq_dev_lock); + list_del_init(&entry->softirq_node); + spinlock_irqrestore_release(&softirq_dev_lock); +} + +static void check_deactive_pic_intx(struct vm *vm, uint8_t phys_pin) +{ + struct ptdev_remapping_info *entry; + struct list_head *pos; + + if (phys_pin >= NR_LEGACY_IRQ) + return; + + spinlock_obtain(&vm->ptdev_lock); + list_for_each(pos, &vm->ptdev_list) { + entry = list_entry(pos, struct ptdev_remapping_info, + entry_node); + if (entry->type == PTDEV_INTR_INTX && + entry->intx.vpin_src == PTDEV_VPIN_PIC && + entry->intx.phys_pin == phys_pin && + entry_is_active(entry)) { + GSI_MASK_IRQ(pin_to_irq(phys_pin)); + ptdev_deactivate_entry(entry); + dev_dbg(ACRN_DBG_IRQ, + "IOAPIC pin=%d pirq=%d deassigned ", + phys_pin, pin_to_irq(phys_pin)); + dev_dbg(ACRN_DBG_IRQ, "from vm%d vPIC vpin=%d", + entry->vm->attr.id, entry->intx.virt_pin); + } + } + spinlock_release(&vm->ptdev_lock); +} + +static bool ptdev_native_owned_intx(struct vm *vm, struct ptdev_intx_info *info) +{ + /* vm0 pin 4 (uart) is owned by hypervisor */ + if (is_vm0(vm) && info->virt_pin == 4) + return true; + else + return false; +} + +static void ptdev_build_native_msi(struct vm *vm, struct ptdev_msi_info *info, + int vector) +{ + uint64_t vdmask, pdmask; + uint32_t dest, delmode; + bool phys; + + /* native destination cpu mask */ + dest = (info->vmsi_addr >> 12) & 0xff; + phys = ((info->vmsi_addr & + (MSI_ADDR_RH | MSI_ADDR_LOG)) != + (MSI_ADDR_RH | MSI_ADDR_LOG)); + calcvdest(vm, &vdmask, dest, phys); + pdmask = vcpumask2pcpumask(vm, vdmask); + + /* native delivery mode */ + delmode = info->vmsi_data & APIC_DELMODE_MASK; + if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) + delmode = APIC_DELMODE_LOWPRIO; + + /* update native delivery mode & vector */ + info->pmsi_data = info->vmsi_data; + info->pmsi_data &= ~0x7FF; + info->pmsi_data |= delmode | vector; + + /* update native dest mode & dest field */ + info->pmsi_addr = info->vmsi_addr; + info->pmsi_addr &= ~0xFF00C; + info->pmsi_addr |= pdmask << 12 | + MSI_ADDR_RH | MSI_ADDR_LOG; + + dev_dbg(ACRN_DBG_IRQ, "MSI addr:data = 0x%x:%x(V) -> 0x%x:%x(P)", + info->vmsi_addr, info->vmsi_data, + info->pmsi_addr, info->pmsi_data); +} + +static uint64_t ptdev_build_native_rte(struct vm *vm, struct ptdev_intx_info *info, + int vector) +{ + uint64_t rte; + + if (info->vpin_src == PTDEV_VPIN_IOAPIC) { + uint64_t vdmask, pdmask; + uint32_t dest, low, high, delmode; + bool phys; + + vioapic_get_rte(vm, info->virt_pin, &rte); + low = rte; + high = rte >> 32; + + /* native destination cpu mask */ + phys = ((low & IOAPIC_RTE_DESTMOD) == IOAPIC_RTE_DESTPHY); + dest = high >> APIC_ID_SHIFT; + calcvdest(vm, &vdmask, dest, phys); + pdmask = vcpumask2pcpumask(vm, vdmask); + + /* native delivery mode */ + delmode = low & IOAPIC_RTE_DELMOD; + if ((delmode != IOAPIC_RTE_DELFIXED) && + (delmode != IOAPIC_RTE_DELLOPRI)) + delmode = IOAPIC_RTE_DELLOPRI; + + /* update native delivery mode, dest mode(logical) & vector */ + low &= ~(IOAPIC_RTE_DESTMOD | + IOAPIC_RTE_DELMOD | IOAPIC_RTE_INTVEC); + low |= IOAPIC_RTE_DESTLOG | delmode | vector; + + /* update native dest field */ + high &= ~IOAPIC_RTE_DEST; + high |= pdmask << 24; + + dev_dbg(ACRN_DBG_IRQ, "IOAPIC RTE = 0x%x:%x(V) -> 0x%x:%x(P)", + rte >> 32, (uint32_t)rte, high, low); + + rte = high; + rte = rte << 32 | low; + } else { + enum vpic_trigger trigger; + int phys_irq = pin_to_irq(info->phys_pin); + uint64_t native_rte; + + /* just update trigger mode */ + ioapic_get_rte(phys_irq, &native_rte); + rte = native_rte; + rte &= ~IOAPIC_RTE_TRGRMOD; + vpic_get_irq_trigger(vm, info->virt_pin, &trigger); + if (trigger == LEVEL_TRIGGER) + rte |= IOAPIC_RTE_TRGRLVL; + + dev_dbg(ACRN_DBG_IRQ, "IOAPIC RTE = 0x%x:%x(P) -> 0x%x:%x(P)", + native_rte >> 32, (uint32_t)native_rte, + rte >> 32, (uint32_t)rte); + } + + return rte; +} + +static struct ptdev_remapping_info * +add_msix_remapping(struct vm *vm, uint16_t virt_bdf, uint16_t phys_bdf, + int msix_entry_index) +{ + struct ptdev_remapping_info *entry; + + spinlock_obtain(&vm->ptdev_lock); + entry = _get_remapping_entry(vm, + entry_id_from_msix(virt_bdf, msix_entry_index)); + if (!entry) { + entry = alloc_entry(vm, PTDEV_INTR_MSI); + entry->virt_bdf = virt_bdf; + entry->phys_bdf = phys_bdf; + entry->msi.msix_entry_index = msix_entry_index; + } + spinlock_release(&vm->ptdev_lock); + + dev_dbg(ACRN_DBG_IRQ, + "VM%d MSIX add vector mapping vbdf%x:pbdf%x idx=%d", + entry->vm->attr.id, virt_bdf, phys_bdf, msix_entry_index); + return entry; +} + +/* deactive & remove mapping entry of msix_entry_index for virt_bdf */ +static void +remove_msix_remapping(struct vm *vm, uint16_t virt_bdf, int msix_entry_index) +{ + struct ptdev_remapping_info *entry; + + spinlock_obtain(&vm->ptdev_lock); + entry = _get_remapping_entry(vm, + entry_id_from_msix(virt_bdf, msix_entry_index)); + if (!entry) + goto END; + + if (entry_is_active(entry)) + /*TODO: disable MSIX device when HV can in future */ + ptdev_deactivate_entry(entry); + + dev_dbg(ACRN_DBG_IRQ, + "VM%d MSIX remove vector mapping vbdf-pbdf:0x%x-0x%x idx=%d", + entry->vm->attr.id, + virt_bdf, entry->phys_bdf, msix_entry_index); + + release_entry(entry); + +END: + spinlock_release(&vm->ptdev_lock); + +} + +/* install virt to phys pin mapping if not existing */ +static struct ptdev_remapping_info * +add_intx_remapping(struct vm *vm, uint8_t virt_pin, uint8_t phys_pin, bool pic_pin) +{ + struct ptdev_remapping_info *entry; + enum ptdev_vpin_source vpin_src = + pic_pin ? PTDEV_VPIN_PIC : PTDEV_VPIN_IOAPIC; + + spinlock_obtain(&vm->ptdev_lock); + entry = _get_remapping_entry(vm, + entry_id_from_intx(virt_pin, vpin_src)); + if (!entry) { + entry = alloc_entry(vm, PTDEV_INTR_INTX); + entry->intx.vpin_src = vpin_src; + } + + /* update existing */ + entry->intx.virt_pin = virt_pin; + entry->intx.phys_pin = phys_pin; + spinlock_release(&vm->ptdev_lock); + + dev_dbg(ACRN_DBG_IRQ, + "VM%d INTX add pin mapping vpin%d:ppin%d", + entry->vm->attr.id, virt_pin, phys_pin); + + return entry; +} + +/* deactive & remove mapping entry of virt_pin */ +void remove_intx_remapping(struct vm *vm, uint8_t virt_pin, bool pic_pin) +{ + int phys_irq, phys_pin; + struct ptdev_remapping_info *entry; + enum ptdev_vpin_source vpin_src = + pic_pin ? PTDEV_VPIN_PIC : PTDEV_VPIN_IOAPIC; + + spinlock_obtain(&vm->ptdev_lock); + entry = _get_remapping_entry(vm, + entry_id_from_intx(virt_pin, vpin_src)); + if (!entry) + goto END; + + if (entry_is_active(entry)) { + phys_pin = entry->intx.phys_pin; + phys_irq = pin_to_irq(phys_pin); + if (!irq_is_gsi(phys_irq)) + goto END; + + /* disable interrupt */ + GSI_MASK_IRQ(phys_irq); + + ptdev_deactivate_entry(entry); + dev_dbg(ACRN_DBG_IRQ, + "deactive %s intx entry:ppin=%d, pirq=%d ", + pic_pin ? "PIC" : "IOAPIC", phys_pin, phys_irq); + dev_dbg(ACRN_DBG_IRQ, "from vm%d vpin=%d\n", + entry->vm->attr.id, virt_pin); + } + + release_entry(entry); + +END: + spinlock_release(&vm->ptdev_lock); +} + +static void ptdev_intr_handle_irq(struct vm *vm, + struct ptdev_remapping_info *entry) +{ + switch (entry->intx.vpin_src) { + case PTDEV_VPIN_IOAPIC: + { + uint64_t rte; + bool trigger_lvl = false; + + /* VPIN_IOAPIC src means we have vioapic enabled */ + vioapic_get_rte(vm, entry->intx.virt_pin, &rte); + if ((rte & IOAPIC_RTE_TRGRMOD) == IOAPIC_RTE_TRGRLVL) + trigger_lvl = true; + + if (trigger_lvl) + vioapic_assert_irq(vm, entry->intx.virt_pin); + else + vioapic_pulse_irq(vm, entry->intx.virt_pin); + + dev_dbg(ACRN_DBG_PTIRQ, + "dev-assign: irq=0x%x assert vr: 0x%x vRTE=0x%x", + dev_to_irq(entry->node), + irq_to_vector(dev_to_irq(entry->node)), rte); + break; + } + case PTDEV_VPIN_PIC: + { + enum vpic_trigger trigger; + + /* VPIN_PIC src means we have vpic enabled */ + vpic_get_irq_trigger(vm, entry->intx.virt_pin, &trigger); + if (trigger == LEVEL_TRIGGER) + vpic_assert_irq(vm, entry->intx.virt_pin); + else + vpic_pulse_irq(vm, entry->intx.virt_pin); + break; + } + default: + break; + } +} + +void ptdev_intx_ack(struct vm *vm, int virt_pin, + enum ptdev_vpin_source vpin_src) +{ + int phys_irq; + struct ptdev_remapping_info *entry; + int phys_pin; + + entry = get_remapping_entry(vm, entry_id_from_intx(virt_pin, vpin_src)); + if (!entry) + return; + + ASSERT(vpin_src == entry->intx.vpin_src, "PIN source not match"); + phys_pin = entry->intx.phys_pin; + phys_irq = pin_to_irq(phys_pin); + if (!irq_is_gsi(phys_irq)) + return; + + /* NOTE: only Level trigger will process EOI/ACK and if we got here + * means we have this vioapic or vpic or both enabled + */ + switch (entry->intx.vpin_src) { + case PTDEV_VPIN_IOAPIC: + vioapic_deassert_irq(vm, virt_pin); + break; + case PTDEV_VPIN_PIC: + vpic_deassert_irq(vm, virt_pin); + default: + break; + } + + dev_dbg(ACRN_DBG_PTIRQ, "dev-assign: irq=0x%x acked vr: 0x%x", + phys_irq, irq_to_vector(phys_irq)); + GSI_UNMASK_IRQ(phys_irq); +} + +/* Main entry for PCI device assignment with MSI and MSI-X + * MSI can up to 8 vectors and MSI-X can up to 1024 Vectors + * We use msix_entry_index to indicate coming vectors + * msix_entry_index = 0 means first vector + * user must provide bdf and msix_entry_index + */ +int ptdev_msix_remap(struct vm *vm, uint16_t virt_bdf, + struct ptdev_msi_info *info) +{ + struct ptdev_remapping_info *entry; + bool lowpri = !is_vm0(vm); + + entry = get_remapping_entry(vm, + entry_id_from_msix(virt_bdf, info->msix_entry_index)); + if (!entry) { + /* VM0 we add mapping dynamically */ + if (is_vm0(vm)) { + entry = add_msix_remapping(vm, virt_bdf, virt_bdf, + info->msix_entry_index); + } else { + pr_err("dev-assign: msi entry not exist"); + return -1; + } + } + + /* handle destroy case */ + if (entry_is_active(entry) && info->vmsi_data == 0) { + info->pmsi_data = 0; + ptdev_deactivate_entry(entry); + goto END; + } + + if (!entry_is_active(entry)) { + /* update msi source and active entry */ + ptdev_activate_entry(entry, -1, lowpri); + } + + /* build native config MSI, update to info->pmsi_xxx */ + ptdev_build_native_msi(vm, info, + dev_to_vector(entry->node)); + entry->msi = *info; + entry->msi.virt_vector = info->vmsi_data & 0xFF; + entry->msi.phys_vector = dev_to_vector(entry->node); + + /* update irq handler according to info in guest */ + ptdev_update_irq_handler(vm, entry); + + dev_dbg(ACRN_DBG_IRQ, + "PCI %x:%x.%x MSI VR[%d] 0x%x->0x%x assigned to vm%d", + (entry->virt_bdf >> 8) & 0xFF, + (entry->virt_bdf >> 3) & 0x1F, + (entry->virt_bdf) & 0x7, entry->msi.msix_entry_index, + entry->msi.virt_vector, entry->msi.phys_vector, + entry->vm->attr.id); +END: + return 0; +} + +static bool vpin_masked(struct vm *vm, uint8_t virt_pin, + enum ptdev_vpin_source vpin_src) +{ + if (vpin_src == PTDEV_VPIN_IOAPIC) { + uint64_t rte; + + vioapic_get_rte(vm, virt_pin, &rte); + if ((rte & IOAPIC_RTE_INTMASK) == IOAPIC_RTE_INTMSET) + return true; + else + return false; + } else + return vpic_is_pin_mask(vm->vpic, virt_pin); +} + +static void update_active_native_ioapic(struct vm *vm, struct ptdev_intx_info *info, + struct ptdev_remapping_info *entry, int phys_irq) +{ + uint64_t rte; + + /* disable interrupt */ + GSI_MASK_IRQ(phys_irq); + + /* build native IOAPIC RTE, update to info->rte */ + rte = ptdev_build_native_rte(vm, info, + dev_to_vector(entry->node)); + + /* set rte entry */ + GSI_SET_RTE(phys_irq, rte | IOAPIC_RTE_INTMSET); + + /* update irq handler according to info in guest */ + ptdev_update_irq_handler(vm, entry); + + /* enable interrupt */ + GSI_UNMASK_IRQ(phys_irq); +} + +/* Main entry for PCI/Legacy device assignment with INTx */ +int ptdev_intx_pin_remap(struct vm *vm, struct ptdev_intx_info *info) +{ + struct ptdev_remapping_info *entry; + uint64_t rte; + int phys_irq; + int phys_pin; + bool lowpri = !is_vm0(vm); + + /* + * virt pin could come from vpic master, vpic slave or vioapic + * while phys pin is always means for native IOAPIC + * Device Model should tell us the mapping information + */ + + /* no fix for native owned intx */ + if (ptdev_native_owned_intx(vm, info)) + goto END; + + /* query if we have virt to phys mapping */ + entry = get_remapping_entry(vm, entry_id_from_intx(info->virt_pin, + info->vpin_src)); + if (!entry) { + /* allocate entry during first unmask */ + if (vpin_masked(vm, info->virt_pin, info->vpin_src)) + goto END; + if (is_vm0(vm)) { + bool pic_pin = (info->vpin_src == PTDEV_VPIN_PIC); + + info->phys_pin = info->virt_pin; + /* fix vPIC pin to correct native IOAPIC pin */ + if (pic_pin) + info->phys_pin = + legacy_irq_to_pin[info->virt_pin]; + + entry = add_intx_remapping(vm, info->virt_pin, + info->phys_pin, pic_pin); + } else + goto END; + } + + /* phys_pin from native IOAPIC */ + phys_pin = entry->intx.phys_pin; + phys_irq = pin_to_irq(phys_pin); + if (!irq_is_gsi(phys_irq)) + goto END; + + if (entry_is_active(entry) + && (info->vpin_src == PTDEV_VPIN_IOAPIC)) { + /* SOS/UOS vIOAPIC pin will be dynamically free/assign + * while vPIC pin is static assigned + */ + vioapic_get_rte(vm, info->virt_pin, &rte); + if (((uint32_t)rte) == 0x10000) { + /* disable interrupt */ + GSI_MASK_IRQ(phys_irq); + ptdev_deactivate_entry(entry); + dev_dbg(ACRN_DBG_IRQ, + "IOAPIC pin=%d pirq=%d deassigned ", + phys_pin, phys_irq); + dev_dbg(ACRN_DBG_IRQ, "from vm%d vIOAPIC vpin=%d", + entry->vm->attr.id, info->virt_pin); + goto END; + } else { + /*update rte*/ + update_active_native_ioapic(vm, info, entry, phys_irq); + } + } else if (entry_is_active(entry) + && info->vpin_src == PTDEV_VPIN_PIC) { + /* only update here + * deactive vPIC entry when IOAPIC take it over + */ + update_active_native_ioapic(vm, info, entry, phys_irq); + } else { + /* + * both vIOAPIC & vPIC take native IOAPIC path + * vIOAPIC: build native RTE according vIOAPIC configuration + * vPIC: keep native RTE configuration in setup_ioapic_irq() + */ + if (info->vpin_src == PTDEV_VPIN_IOAPIC) + check_deactive_pic_intx(vm, phys_pin); + + /* active entry */ + ptdev_activate_entry(entry, phys_irq, lowpri); + + update_active_native_ioapic(vm, info, entry, phys_irq); + + dev_dbg(ACRN_DBG_IRQ, + "IOAPIC pin=%d pirq=%d assigned to vm%d %s vpin=%d", + phys_pin, phys_irq, entry->vm->attr.id, + info->vpin_src == PTDEV_VPIN_PIC ? "vPIC" : "vIOAPIC", + info->virt_pin); + } +END: + return 0; +} + +void ptdev_softirq(__unused int cpu) +{ + while (1) { + struct ptdev_remapping_info *entry = ptdev_dequeue_softirq(); + struct vm *vm; + + if (!entry) + break; + + /* skip any inactive entry */ + if (!entry_is_active(entry)) { + /* service next item */ + continue; + } + + /* TBD: need valid vm */ + vm = entry->vm; + + /* handle real request */ + if (entry->type == PTDEV_INTR_INTX) + ptdev_intr_handle_irq(vm, entry); + else { + /* TODO: msi destmode check required */ + vlapic_intr_msi(vm, entry->msi.vmsi_addr, + entry->msi.vmsi_data); + dev_dbg(ACRN_DBG_PTIRQ, + "dev-assign: irq=0x%x MSI VR: 0x%x-0x%x", + dev_to_irq(entry->node), + entry->msi.virt_vector, + irq_to_vector(dev_to_irq(entry->node))); + dev_dbg(ACRN_DBG_PTIRQ, + " vmsi_addr: 0x%x vmsi_data: 0x%x", + entry->msi.vmsi_addr, entry->msi.vmsi_data); + } + } +} + +void ptdev_init(void) +{ + if (get_cpu_id() > 0) + return; + + INIT_LIST_HEAD(&softirq_dev_entry_list); + spinlock_init(&softirq_dev_lock); +} + +void ptdev_vm_init(struct vm *vm) +{ + INIT_LIST_HEAD(&vm->ptdev_list); + spinlock_init(&vm->ptdev_lock); +} + +void ptdev_vm_deinit(struct vm *vm) +{ + /* VM already down */ + spinlock_obtain(&vm->ptdev_lock); + release_all_entry(vm); + spinlock_release(&vm->ptdev_lock); +} + +void ptdev_add_intx_remapping(struct vm *vm, + __unused uint16_t virt_bdf, __unused uint16_t phys_bdf, + uint8_t virt_pin, uint8_t phys_pin, bool pic_pin) +{ + if (vm == NULL) { + pr_err("ptdev_add_intx_remapping fails!\n"); + return; + } + + add_intx_remapping(vm, virt_pin, phys_pin, pic_pin); +} + +void ptdev_remove_intx_remapping(struct vm *vm, uint8_t virt_pin, bool pic_pin) +{ + if (vm == NULL) { + pr_err("ptdev_remove_intr_remapping fails!\n"); + return; + } + + remove_intx_remapping(vm, virt_pin, pic_pin); +} + +void ptdev_add_msix_remapping(struct vm *vm, uint16_t virt_bdf, uint16_t phys_bdf, + int vector_count) +{ + int i; + + for (i = 0; i < vector_count; i++) + add_msix_remapping(vm, virt_bdf, phys_bdf, i); +} + +void ptdev_remove_msix_remapping(struct vm *vm, uint16_t virt_bdf, int vector_count) +{ + int i; + + if (vm == NULL) { + pr_err("ptdev_remove_msix_remapping fails!\n"); + return; + } + + for (i = 0; i < vector_count; i++) + remove_msix_remapping(vm, virt_bdf, i); +} + +static void get_entry_info(struct ptdev_remapping_info *entry, char *type, + int *irq, int *vector, uint64_t *dest, bool *lvl_tm, + int *pin, int *vpin, int *bdf, int *vbdf) +{ + if (entry_is_active(entry)) { + if (entry->type == PTDEV_INTR_MSI) { + strcpy_s(type, 16, "MSI"); + *dest = (entry->msi.pmsi_addr & 0xFF000) >> 12; + if (entry->msi.pmsi_data & APIC_TRIGMOD_LEVEL) + *lvl_tm = true; + else + *lvl_tm = false; + *pin = -1; + *vpin = -1; + *bdf = entry->phys_bdf; + *vbdf = entry->virt_bdf; + } else { + int phys_irq = pin_to_irq(entry->intx.phys_pin); + uint64_t rte = 0; + + if (entry->intx.vpin_src == PTDEV_VPIN_IOAPIC) + strcpy_s(type, 16, "IOAPIC"); + else + strcpy_s(type, 16, "PIC"); + ioapic_get_rte(phys_irq, &rte); + *dest = ((rte >> 32) & IOAPIC_RTE_DEST) >> 24; + if (rte & IOAPIC_RTE_TRGRLVL) + *lvl_tm = true; + else + *lvl_tm = false; + *pin = entry->intx.phys_pin; + *vpin = entry->intx.virt_pin; + *bdf = 0; + *vbdf = 0; + } + *irq = dev_to_irq(entry->node); + *vector = dev_to_vector(entry->node); + } else { + strcpy_s(type, 16, "NONE"); + *irq = -1; + *vector = 0; + *dest = 0; + *lvl_tm = 0; + *pin = -1; + *vpin = -1; + *bdf = 0; + *vbdf = 0; + } +} + +int get_ptdev_info(char *str, int str_max) +{ + struct ptdev_remapping_info *entry; + int len, size = str_max, irq, vector; + char type[16]; + uint64_t dest; + bool lvl_tm; + int pin, vpin, bdf, vbdf; + struct list_head *pos = NULL, *vm_pos; + struct vm *vm; + + len = snprintf(str, size, + "\r\nVM\tTYPE\tIRQ\tVEC\tDEST\tTM\tPIN\tVPIN\tBDF\tVBDF"); + size -= len; + str += len; + + spinlock_obtain(&vm_list_lock); + list_for_each(vm_pos, &vm_list) { + vm = list_entry(pos, struct vm, list); + spinlock_obtain(&vm->ptdev_lock); + list_for_each(pos, &vm->ptdev_list) { + entry = list_entry(pos, struct ptdev_remapping_info, + entry_node); + if (entry_is_active(entry)) { + get_entry_info(entry, type, &irq, &vector, + &dest, &lvl_tm, &pin, &vpin, + &bdf, &vbdf); + len = snprintf(str, size, + "\r\n%d\t%s\t%d\t0x%X\t0x%X", + entry->vm->attr.id, type, + irq, vector, dest); + size -= len; + str += len; + + len = snprintf(str, size, + "\t%s\t%d\t%d\t%x:%x.%x\t%x:%x.%x", + entry_is_active(entry) ? + (lvl_tm ? "level" : "edge") : "none", + pin, vpin, + (bdf & 0xff00) >> 8, + (bdf & 0xf8) >> 3, bdf & 0x7, + (vbdf & 0xff00) >> 8, + (vbdf & 0xf8) >> 3, vbdf & 0x7); + size -= len; + str += len; + } + } + spinlock_release(&vm->ptdev_lock); + } + spinlock_release(&vm_list_lock); + + snprintf(str, size, "\r\n"); + return 0; +} diff --git a/hypervisor/arch/x86/cpu.c b/hypervisor/arch/x86/cpu.c new file mode 100644 index 000000000..5f0c80200 --- /dev/null +++ b/hypervisor/arch/x86/cpu.c @@ -0,0 +1,650 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_EFI_STUB +extern uint32_t efi_physical_available_ap_bitmap; +#endif + +uint64_t tsc_clock_freq = 1000000000; + +spinlock_t cpu_secondary_spinlock = { + .head = 0, + .tail = 0 +}; + +spinlock_t up_count_spinlock = { + .head = 0, + .tail = 0 +}; + +void *per_cpu_data_base_ptr; +int phy_cpu_num; +unsigned long pcpu_sync = 0; +uint32_t up_count = 0; + +DEFINE_CPU_DATA(uint8_t[STACK_SIZE], stack) __aligned(16); +DEFINE_CPU_DATA(uint8_t, lapic_id); +DEFINE_CPU_DATA(void *, vcpu); +DEFINE_CPU_DATA(int, state); + +/* TODO: add more capability per requirement */ +struct cpu_capability { + bool tsc_adjust_supported; + bool ibrs_ibpb_supported; + bool stibp_supported; + bool apicv_supported; + bool monitor_supported; +}; +static struct cpu_capability cpu_caps; + +static void apicv_cap_detect(void); +static void cpu_set_logical_id(uint32_t logical_id); +static void print_hv_banner(void); +bool check_monitor_support(void); +int cpu_find_logical_id(uint32_t lapic_id); +#ifndef CONFIG_EFI_STUB +static void start_cpus(); +#endif +static void pcpu_sync_sleep(unsigned long *sync, int mask_bit); +int ibrs_type; +static void check_cpu_capability(void) +{ + uint32_t eax, ebx, ecx, edx; + + memset(&cpu_caps, 0, sizeof(struct cpu_capability)); + + cpuid(CPUID_EXTEND_FEATURE, &eax, &ebx, &ecx, &edx); + + cpu_caps.tsc_adjust_supported = (ebx & CPUID_EBX_TSC_ADJ) ? + (true) : (false); + cpu_caps.ibrs_ibpb_supported = (edx & CPUID_EDX_IBRS_IBPB) ? + (true) : (false); + cpu_caps.stibp_supported = (edx & CPUID_EDX_STIBP) ? + (true) : (false); + + /* For speculation defence. + * The default way is to set IBRS at vmexit and then do IBPB at vcpu + * context switch(ibrs_type == IBRS_RAW). + * Now provide an optimized way (ibrs_type == IBRS_OPT) which set + * STIBP and do IBPB at vmexit,since having STIBP always set has less + * impact than having IBRS always set. Also since IBPB is already done + * at vmexit, it is no necessary to do so at vcpu context switch then. + */ + ibrs_type = IBRS_NONE; + + /* Currently for APL, if we enabled retpoline, then IBRS should not + * take effect + * TODO: add IA32_ARCH_CAPABILITIES[1] check, if this bit is set, IBRS + * should be set all the time instead of relying on retpoline + */ +#ifndef CONFIG_RETPOLINE + if (cpu_caps.ibrs_ibpb_supported) { + ibrs_type = IBRS_RAW; + if (cpu_caps.stibp_supported) + ibrs_type = IBRS_OPT; + } +#endif +} + +bool check_tsc_adjust_support(void) +{ + return cpu_caps.tsc_adjust_supported; +} + +bool check_ibrs_ibpb_support(void) +{ + return cpu_caps.ibrs_ibpb_supported; +} + +bool check_stibp_support(void) +{ + return cpu_caps.stibp_supported; +} + +static void alloc_phy_cpu_data(int pcpu_num) +{ + phy_cpu_num = pcpu_num; + + per_cpu_data_base_ptr = calloc(1, PER_CPU_DATA_SIZE * pcpu_num); + ASSERT(per_cpu_data_base_ptr != NULL, ""); +} + +int __attribute__((weak)) parse_madt(uint8_t *lapic_id_base) +{ + static const uint32_t lapic_id[] = {0, 2, 4, 6}; + uint32_t i; + + for (i = 0; i < ARRAY_SIZE(lapic_id); i++) + *lapic_id_base++ = lapic_id[i]; + + return ARRAY_SIZE(lapic_id); +} + +static int init_phy_cpu_storage(void) +{ + int i, pcpu_num = 0; + int bsp_cpu_id; + uint8_t bsp_lapic_id = 0; + uint8_t *lapic_id_base; + + /* + * allocate memory to save all lapic_id detected in parse_mdt. + * We allocate 4K size which could save 4K CPUs lapic_id info. + */ + lapic_id_base = alloc_page(CPU_PAGE_SIZE); + ASSERT(lapic_id_base != NULL, "fail to alloc page"); + + pcpu_num = parse_madt(lapic_id_base); + alloc_phy_cpu_data(pcpu_num); + + for (i = 0; i < pcpu_num; i++) { + per_cpu(lapic_id, i) = *lapic_id_base++; +#ifdef CONFIG_EFI_STUB + efi_physical_available_ap_bitmap |= 1 << per_cpu(lapic_id, i); +#endif + } + + /* free memory after lapic_id are saved in per_cpu data */ + free(lapic_id_base); + + bsp_lapic_id = get_cur_lapic_id(); + +#ifdef CONFIG_EFI_STUB + efi_physical_available_ap_bitmap &= ~(1 << bsp_lapic_id); +#endif + + bsp_cpu_id = cpu_find_logical_id(bsp_lapic_id); + ASSERT(bsp_cpu_id >= 0, "fail to get phy cpu id"); + + return bsp_cpu_id; +} + +static void cpu_set_current_state(uint32_t logical_id, int state) +{ + spinlock_obtain(&up_count_spinlock); + + /* Check if state is initializing */ + if (state == CPU_STATE_INITIALIZING) { + /* Increment CPU up count */ + up_count++; + + /* Save this CPU's logical ID to the TSC AUX MSR */ + cpu_set_logical_id(logical_id); + } + + /* Set state for the specified CPU */ + per_cpu(state, logical_id) = state; + + spinlock_release(&up_count_spinlock); +} + +#ifdef STACK_PROTECTOR +struct stack_canary { + /* Gcc generates extra code, using [fs:40] to access canary */ + uint8_t reserved[40]; + uint64_t canary; +}; + +static DEFINE_CPU_DATA(struct stack_canary, stack_canary); + +static uint64_t get_random_value(void) +{ + uint64_t random = 0; + + asm volatile ("1: rdrand %%rax\n" + "jnc 1b\n" + "mov %%rax, %0\n" + : "=r"(random) :: ); + return random; +} + +static void set_fs_base(void) +{ + struct stack_canary *psc = &get_cpu_var(stack_canary); + + psc->canary = get_random_value(); + msr_write(MSR_IA32_FS_BASE, (uint64_t)psc); +} +#endif + +void bsp_boot_init(void) +{ +#ifdef HV_DEBUG + uint64_t start_tsc = rdtsc(); +#endif + + /* Clear BSS */ + memset(_ld_bss_start, 0, _ld_bss_end - _ld_bss_start); + + /* Build time sanity checks to make sure hard-coded offset + * is matching the actual offset! + */ + STATIC_ASSERT(offsetof(struct cpu_regs, rax) == + VMX_MACHINE_T_GUEST_RAX_OFFSET); + STATIC_ASSERT(offsetof(struct cpu_regs, rbx) == + VMX_MACHINE_T_GUEST_RBX_OFFSET); + STATIC_ASSERT(offsetof(struct cpu_regs, rcx) == + VMX_MACHINE_T_GUEST_RCX_OFFSET); + STATIC_ASSERT(offsetof(struct cpu_regs, rdx) == + VMX_MACHINE_T_GUEST_RDX_OFFSET); + STATIC_ASSERT(offsetof(struct cpu_regs, rbp) == + VMX_MACHINE_T_GUEST_RBP_OFFSET); + STATIC_ASSERT(offsetof(struct cpu_regs, rsi) == + VMX_MACHINE_T_GUEST_RSI_OFFSET); + STATIC_ASSERT(offsetof(struct cpu_regs, rdi) == + VMX_MACHINE_T_GUEST_RDI_OFFSET); + STATIC_ASSERT(offsetof(struct cpu_regs, r8) == + VMX_MACHINE_T_GUEST_R8_OFFSET); + STATIC_ASSERT(offsetof(struct cpu_regs, r9) == + VMX_MACHINE_T_GUEST_R9_OFFSET); + STATIC_ASSERT(offsetof(struct cpu_regs, r10) == + VMX_MACHINE_T_GUEST_R10_OFFSET); + STATIC_ASSERT(offsetof(struct cpu_regs, r11) == + VMX_MACHINE_T_GUEST_R11_OFFSET); + STATIC_ASSERT(offsetof(struct cpu_regs, r12) == + VMX_MACHINE_T_GUEST_R12_OFFSET); + STATIC_ASSERT(offsetof(struct cpu_regs, r13) == + VMX_MACHINE_T_GUEST_R13_OFFSET); + STATIC_ASSERT(offsetof(struct cpu_regs, r14) == + VMX_MACHINE_T_GUEST_R14_OFFSET); + STATIC_ASSERT(offsetof(struct cpu_regs, r15) == + VMX_MACHINE_T_GUEST_R15_OFFSET); + STATIC_ASSERT(offsetof(struct run_context, cr2) == + VMX_MACHINE_T_GUEST_CR2_OFFSET); + STATIC_ASSERT(offsetof(struct run_context, ia32_spec_ctrl) == + VMX_MACHINE_T_GUEST_SPEC_CTRL_OFFSET); + + /* Initialize the hypervisor paging */ + init_paging(); + + early_init_lapic(); + + init_phy_cpu_storage(); + + load_gdtr_and_tr(); + + /* Switch to run-time stack */ + CPU_SP_WRITE(&get_cpu_var(stack)[STACK_SIZE - 1]); + +#ifdef STACK_PROTECTOR + set_fs_base(); +#endif + + check_cpu_capability(); + + apicv_cap_detect(); + + /* Set state for this CPU to initializing */ + cpu_set_current_state(CPU_BOOT_ID, CPU_STATE_INITIALIZING); + + /* Perform any necessary BSP initialization */ + init_bsp(); + + /* Initialize Serial */ + serial_init(); + + /* Initialize console */ + console_init(); + + /* Print Hypervisor Banner */ + print_hv_banner(); + + /* Make sure rdtsc is enabled */ + check_tsc(); + + /* Calculate TSC Frequency */ + tsc_clock_freq = tsc_cycles_in_period(1000) / 1000 * 1000000; + + /* Enable logging */ + init_logmsg(LOG_BUF_SIZE, + LOG_DESTINATION); + +#ifdef HV_DEBUG + /* Log first messages */ + printf("HV version %d.%d-%s-%s build by %s, start time %lluus\r\n", + HV_MAJOR_VERSION, HV_MINOR_VERSION, HV_BUILD_TIME, + HV_BUILD_VERSION, HV_BUILD_USER, + TICKS_TO_US(start_tsc)); +#endif + pr_dbg("Core %d is up", CPU_BOOT_ID); + + /* Warn for security feature not ready */ + if (!check_ibrs_ibpb_support() && !check_stibp_support()) { + pr_fatal("SECURITY WARNING!!!!!!"); + pr_fatal("Please apply the latest CPU uCode patch!"); + } + + /* Initialize the shell */ + shell_init(); + + /* Initialize interrupts */ + interrupt_init(CPU_BOOT_ID); + + timer_init(); + setup_notification(); + ptdev_init(); + + init_scheduler(); + +#ifndef CONFIG_EFI_STUB + /* Start all secondary cores */ + start_cpus(); + + /* Trigger event to allow secondary CPUs to continue */ + bitmap_set(0, &pcpu_sync); +#else + memcpy_s(_ld_cpu_secondary_reset_start, + (unsigned long)&_ld_cpu_secondary_reset_size, + _ld_cpu_secondary_reset_load, + (unsigned long)&_ld_cpu_secondary_reset_size); +#endif + + ASSERT(get_cpu_id() == CPU_BOOT_ID, ""); + + init_iommu(); + + console_setup_timer(); + + /* Start initializing the VM for this CPU */ + hv_main(CPU_BOOT_ID); + + /* Control should not come here */ + cpu_halt(CPU_BOOT_ID); +} + +void cpu_secondary_init(void) +{ + /* NOTE: Use of local / stack variables in this function is problematic + * since the stack is switched in the middle of the function. For this + * reason, the logical id is only temporarily stored in a static + * variable, but this will be over-written once subsequent CPUs + * start-up. Once the spin-lock is released, the cpu_logical_id_get() + * API is used to obtain the logical ID + */ + + /* Switch this CPU to use the same page tables set-up by the + * primary/boot CPU + */ + enable_paging(get_paging_pml4()); + early_init_lapic(); + + /* Find the logical ID of this CPU given the LAPIC ID + * temp_logical_id = + * cpu_find_logical_id(get_cur_lapic_id()); + */ + cpu_find_logical_id(get_cur_lapic_id()); + + /* Set state for this CPU to initializing */ + cpu_set_current_state(cpu_find_logical_id + (get_cur_lapic_id()), + CPU_STATE_INITIALIZING); + + /* Switch to run-time stack */ + CPU_SP_WRITE(&get_cpu_var(stack)[STACK_SIZE - 1]); + +#ifdef STACK_PROTECTOR + set_fs_base(); +#endif + + load_gdtr_and_tr(); + + /* Make sure rdtsc is enabled */ + check_tsc(); + + pr_dbg("Core %d is up", get_cpu_id()); + + /* Release secondary boot spin-lock to allow one of the next CPU(s) to + * perform this common initialization + */ + spinlock_release(&cpu_secondary_spinlock); + + /* Initialize secondary processor interrupts. */ + interrupt_init(get_cpu_id()); + + timer_init(); + + /* Wait for boot processor to signal all secondary cores to continue */ + pcpu_sync_sleep(&pcpu_sync, 0); + +#ifdef CONFIG_EFI_STUB + bitmap_clr(0, &pcpu_sync); +#endif + + hv_main(get_cpu_id()); + + /* Control will only come here for secondary CPUs not configured for + * use or if an error occurs in hv_main + */ + cpu_halt(get_cpu_id()); +} + +int cpu_find_logical_id(uint32_t lapic_id) +{ + int i; + + for (i = 0; i < phy_cpu_num; i++) { + if (per_cpu(lapic_id, i) == lapic_id) + return i; + } + + return -1; +} + +#ifndef CONFIG_EFI_STUB +/* + * Start all secondary CPUs. + */ +static void start_cpus() +{ + uint32_t timeout; + uint32_t expected_up; + + /*Copy segment for AP initialization code below 1MB */ + memcpy_s(_ld_cpu_secondary_reset_start, + (unsigned long)&_ld_cpu_secondary_reset_size, + _ld_cpu_secondary_reset_load, + (unsigned long)&_ld_cpu_secondary_reset_size); + + /* Set flag showing number of CPUs expected to be up to all + * cpus + */ + expected_up = phy_cpu_num; + + /* Broadcast IPIs to all other CPUs */ + send_startup_ipi(INTR_CPU_STARTUP_ALL_EX_SELF, + -1U, ((paddr_t) cpu_secondary_reset)); + + /* Wait until global count is equal to expected CPU up count or + * configured time-out has expired + */ + timeout = CPU_UP_TIMEOUT * 1000; + while ((up_count != expected_up) && (timeout != 0)) { + /* Delay 10us */ + udelay(10); + + /* Decrement timeout value */ + timeout -= 10; + } + + /* Check to see if all expected CPUs are actually up */ + if (up_count != expected_up) { + /* Print error */ + pr_fatal("Secondary CPUs failed to come up"); + + /* Error condition - loop endlessly for now */ + do { + } while (1); + } +} +#endif + +void cpu_halt(uint32_t logical_id) +{ + /* For debug purposes, using a stack variable in the while loop enables + * us to modify the value using a JTAG probe and resume if needed. + */ + int halt = 1; + + /* Set state to show CPU is halted */ + cpu_set_current_state(logical_id, CPU_STATE_HALTED); + + /* Halt the CPU */ + do { + asm volatile ("hlt"); + } while (halt); +} + +static void cpu_set_logical_id(uint32_t logical_id) +{ + /* Write TSC AUX register */ + msr_write(MSR_IA32_TSC_AUX, (uint64_t) logical_id); +} + +static void print_hv_banner(void) +{ + char *boot_msg = "ACRN Hypervisor\n\r"; + + /* Print the boot message */ + printf(boot_msg); +} + +static void pcpu_sync_sleep(unsigned long *sync, int mask_bit) +{ + int wake_sync = (1 << mask_bit); + + if (check_monitor_support()) { + /* Wait for the event to be set using monitor/mwait */ + asm volatile ("1: cmpl %%ebx,(%%eax)\n" + " je 2f\n" + " monitor\n" + " mwait\n" + " jmp 1b\n" + "2:\n" + : + : "a" (sync), "d"(0), "c"(0), + "b"(wake_sync) + : "cc"); + } else { + /* Wait for the event to be set using pause */ + asm volatile ("1: cmpl %%ebx,(%%eax)\n" + " je 2f\n" + " pause\n" + " jmp 1b\n" + "2:\n" + : + : "a" (sync), "d"(0), "c"(0), + "b"(wake_sync) + : "cc"); + } +} + +/*check allowed ONEs setting in vmx control*/ +static bool is_ctrl_setting_allowed(uint64_t msr_val, uint32_t ctrl) +{ + /* + * Intel SDM Appendix A.3 + * - bitX in ctrl can be set 1 + * only if bit 32+X in msr_val is 1 + */ + return ((((uint32_t)(msr_val >> 32)) & ctrl) == ctrl); +} + +static void apicv_cap_detect(void) +{ + uint64_t val64; + uint32_t ctrl; + bool result; + + ctrl = VMX_PROCBASED_CTLS_TPR_SHADOW; + val64 = msr_read(MSR_IA32_VMX_PROCBASED_CTLS); + + result = is_ctrl_setting_allowed(val64, ctrl); + if (result) { + ctrl = VMX_PROCBASED_CTLS2_VAPIC | + VMX_PROCBASED_CTLS2_VAPIC_REGS | + VMX_PROCBASED_CTLS2_VIRQ; + + val64 = msr_read(MSR_IA32_VMX_PROCBASED_CTLS2); + result = is_ctrl_setting_allowed(val64, ctrl); + } + + cpu_caps.apicv_supported = result; +} + +bool is_apicv_enabled(void) +{ + return cpu_caps.apicv_supported; +} + +static void monitor_cap_detect(void) +{ + uint32_t eax, ebx, ecx, edx; + uint32_t family; + uint32_t model; + + /* Run CPUID to determine if MONITOR support available */ + cpuid(CPUID_FEATURES, &eax, &ebx, &ecx, &edx); + + /* See if MONITOR feature bit is set in ECX */ + if (ecx & CPUID_ECX_MONITOR) + cpu_caps.monitor_supported = true; + + /* don't use monitor for CPU (family: 0x6 model: 0x5c) + * in hypervisor, but still expose it to the guests and + * let them handle it correctly + */ + family = (eax >> 8) & 0xff; + if (family == 0xF) + family += (eax >> 20) & 0xff; + + model = (eax >> 4) & 0xf; + if (family >= 0x06) + model += ((eax >> 16) & 0xf) << 4; + + if (cpu_caps.monitor_supported && + (family == 0x06) && + (model == 0x5c)) { + cpu_caps.monitor_supported = false; + } +} + +bool check_monitor_support(void) +{ + return cpu_caps.monitor_supported; +} diff --git a/hypervisor/arch/x86/cpu_primary.S b/hypervisor/arch/x86/cpu_primary.S new file mode 100644 index 000000000..0c54e2e6d --- /dev/null +++ b/hypervisor/arch/x86/cpu_primary.S @@ -0,0 +1,228 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +/* MULTIBOOT HEADER */ +#define MULTIBOOT_HEADER_MAGIC 0x1badb002 +#define MULTIBOOT_HEADER_FLAGS 0x00000002 /*flags bit 1 : enable mem_*, mmap_**/ + + .section multiboot_header, "a" + + .align 4 + + /* header magic */ + .long MULTIBOOT_HEADER_MAGIC + /* header flags - flags bit 6 : enable mmap_* */ + .long MULTIBOOT_HEADER_FLAGS + /* header checksum = -(magic + flags) */ + .long -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS) + + .section entry, "ax" + + .align 8 + .code32 + + .global cpu_primary_start_32 +cpu_primary_start_32: + /* Disable interrupts */ + cli + + /* Clear direction flag */ + cld + + /* save eax and ebx */ + movl %eax, %esp + movl %ebx, %ebp + + /* detect whether it is in long mode */ + movl $MSR_IA32_EFER, %ecx + rdmsr + test $MSR_IA32_EFER_LMA_BIT, %eax + + /* jump to 64bit entry if it is already in long mode */ + jne cpu_primary_start_64 + + /* save the MULTBOOT magic number & MBI */ + movl %esp, (boot_regs) + movl %ebp, (boot_regs+4) + + /* Disable paging */ + mov %cr0, %ebx + andl $~CR0_PG, %ebx + mov %ebx, %cr0 + + /* Set DE, PAE, MCE and OS support bits in CR4 */ + movl $(CR4_DE | CR4_PAE | CR4_MCE | CR4_OSFXSR | CR4_OSXMMEXCPT), %eax + mov %eax, %cr4 + + /* Set CR3 to PML4 table address */ + movl $cpu_boot32_page_tables_start, %edi + mov %edi, %cr3 + + /* Set LME bit in EFER */ + movl $MSR_IA32_EFER, %ecx + rdmsr + orl $MSR_IA32_EFER_LME_BIT, %eax + wrmsr + + /* Enable paging, protection, numeric error and co-processor + monitoring in CR0 to enter long mode */ + mov %cr0, %ebx + orl $(CR0_PG | CR0_PE | CR0_MP | CR0_NE), %ebx + mov %ebx, %cr0 + + /* Load temportary GDT pointer value */ + mov $cpu_primary32_gdt_ptr, %ebx + lgdt (%ebx) + + /* Perform a long jump based to start executing in 64-bit mode */ + ljmp $HOST_GDT_RING0_CODE_SEL, $primary_start_long_mode + + .code64 + .org 0x200 + .global cpu_primary_start_64 +cpu_primary_start_64: + /* save the MULTBOOT magic number & MBI */ + movl %edi, (boot_regs) + movl %esi, (boot_regs+4) +#ifdef CONFIG_EFI_STUB + movl %edx, (boot_regs+8) +#endif + +primary_start_long_mode: + + /* Fix up the IDT desciptors */ + movl $HOST_IDT, %edx + movl $HOST_IDT_ENTRIES, %ecx +.LFixUpIDT_Entries: + xorl %eax, %eax + xchgl %eax, 12(%edx) /* Set rsvd bits to 0; eax now has + high 32 of entry point */ + xchgl %eax, 8(%edx) /* Set bits 63..32 of entry point; + eax now has low 32 of entry point */ + movw %ax, (%edx) /* Set bits 0-15 of procedure entry + point */ + shr $16, %eax + movw %ax, 6(%edx) /* Set bits 16-31 of entry point */ + addl $X64_IDT_DESC_SIZE,%edx + loop .LFixUpIDT_Entries + + /* Load IDT */ + mov $HOST_IDTR, %rcx + lidtq (%rcx) + + /* Load temportary GDT pointer value */ + mov $cpu_primary32_gdt_ptr, %ebx + lgdt (%ebx) + + /* Replace CS with the correct value should we need it */ + mov $HOST_GDT_RING0_CODE_SEL, %bx + mov %bx, jcs + movabsq $jmpbuf, %rax + rex.w ljmp *(%rax) +.data +jmpbuf: .quad after +jcs: .word 0 +.text +after: + + /* Initialize temporary stack pointer */ + movq $_ld_bss_end, %rsp + add $CPU_PAGE_SIZE,%rsp + and $(~(CPU_STACK_ALIGN - 1)),%rsp + + // load all selector registers with appropriate values + xor %edx, %edx + lldt %dx + movl $HOST_GDT_RING0_DATA_SEL,%eax + mov %eax,%ss // Was 32bit POC Stack + mov %eax,%ds // Was 32bit POC Data + mov %eax,%es // Was 32bit POC Data + mov %edx,%fs // Was 32bit POC Data + mov %edx,%gs // Was 32bit POC CLS + + /* Push sp magic to top of stack for call trace */ + pushq $SP_BOTTOM_MAGIC + /* continue with chipset level initialization */ + call bsp_boot_init + +loop: + jmp loop + + .align 4 + .global boot_regs +boot_regs: + .long 0x00000000 + .long 0x00000000 +#ifdef CONFIG_EFI_STUB + .long 0x00000000 +#endif + + /* GDT table */ + .align 4 +cpu_primary32_gdt: + .quad 0x0000000000000000 + .quad 0x00af9b000000ffff + .quad 0x00cf93000000ffff +cpu_primary32_gdt_end: + +/* GDT pointer */ + .align 2 +cpu_primary32_gdt_ptr: + .short (cpu_primary32_gdt_end - cpu_primary32_gdt) - 1 + .quad cpu_primary32_gdt + +/* PML4, PDPT, and PD tables initialized to map first 4 GBytes of memory */ + .align CPU_PAGE_SIZE + .global cpu_boot32_page_tables_start +cpu_boot32_page_tables_start: + .quad cpu_primary32_pdpt_addr + (IA32E_COMM_P_BIT | IA32E_COMM_RW_BIT) + .align CPU_PAGE_SIZE +cpu_primary32_pdpt_addr: + address = 0 + .rept 4 + .quad cpu_primary32_pdt_addr + address + \ + (IA32E_COMM_P_BIT | IA32E_COMM_RW_BIT) + address = address + CPU_PAGE_SIZE + .endr + .align CPU_PAGE_SIZE +cpu_primary32_pdt_addr: + address = 0 + .rept 2048 + .quad address + (IA32E_PDPTE_PS_BIT | IA32E_COMM_P_BIT | IA32E_COMM_RW_BIT) + address = address + 0x200000 + .endr + diff --git a/hypervisor/arch/x86/cpu_secondary.S b/hypervisor/arch/x86/cpu_secondary.S new file mode 100644 index 000000000..acb23bc5d --- /dev/null +++ b/hypervisor/arch/x86/cpu_secondary.S @@ -0,0 +1,197 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + + + .extern cpu_secondary_init + .extern cpu_logical_id + .extern _ld_bss_end + .extern HOST_GDTR + + .section .cpu_secondary_reset,"ax" + + .align 4 + .code16 + .global cpu_secondary_reset +cpu_secondary_reset: + + /* Disable local interrupts */ + + cli + + /* Set DE, PAE, MCE and OS support bits in CR4 */ + + movl $(CR4_DE | CR4_PAE | CR4_MCE | CR4_OSFXSR | CR4_OSXMMEXCPT), %eax + mov %eax, %cr4 + + /* Set CR3 to PML4 table address */ + + movl $CPU_Boot_Page_Tables_Start, %edi + mov %edi, %cr3 + + /* Set LME bit in EFER */ + + movl $MSR_IA32_EFER, %ecx + rdmsr + orl $MSR_IA32_EFER_LME_BIT, %eax + wrmsr + + /* Enable paging, protection, numeric error and co-processor + monitoring in CR0 to enter long mode */ + + mov %cr0, %ebx + orl $(CR0_PG | CR0_PE | CR0_MP | CR0_NE), %ebx + mov %ebx, %cr0 + + /* Load temportary GDT pointer value */ + + mov $cpu_secondary_gdt_ptr, %ebx + lgdt (%ebx) + + /* Perform a long jump based to start executing in 64-bit mode */ + + data32 ljmp $HOST_GDT_RING0_CODE_SEL, $cpu_secondary_long_mode + + .code64 +cpu_secondary_long_mode: + + /* Set up all other data segment registers */ + + movl $HOST_GDT_RING0_DATA_SEL, %eax + mov %eax, %ss + mov %eax, %ds + mov %eax, %es + mov %eax, %fs + mov %eax, %gs + + /* Obtain secondary CPU spin-lock to serialize + booting of secondary cores for a bit */ + + spinlock_obtain(cpu_secondary_spinlock) + + /* Initialize temporary stack pointer + NOTE: Using the PML4 memory (PDPT address is top of memory + for the PML4 page) for the temporary stack + as we are only using the very first entry in + this page and the stack is growing down from + the top of this page. This stack is only + used for a VERY short period of time, so + this reuse of PML4 memory should be acceptable. */ + + movq $cpu_secondary_pdpt_addr, %rsp + + /* Push sp magic to top of stack for call trace */ + pushq $SP_BOTTOM_MAGIC + + /* Jump to C entry for the AP */ + + call cpu_secondary_init + +cpu_secondary_error: + + /* Error condition trap */ + + jmp cpu_secondary_error + +/* GDT table */ + .align 4 +cpu_secondary_gdt: + .quad 0x0000000000000000 + .quad 0x00af9b000000ffff + .quad 0x00cf93000000ffff +cpu_secondary_gdt_end: + +/* GDT pointer */ + .align 2 +cpu_secondary_gdt_ptr: + .short (cpu_secondary_gdt_end - cpu_secondary_gdt) - 1 + .quad cpu_secondary_gdt + +/* PML4, PDPT, and PD tables initialized to map first 4 GBytes of memory */ + + .align CPU_PAGE_SIZE + .global CPU_Boot_Page_Tables_Start +CPU_Boot_Page_Tables_Start: + .quad cpu_secondary_pdpt_addr + (IA32E_COMM_P_BIT | IA32E_COMM_RW_BIT) + .align CPU_PAGE_SIZE +cpu_secondary_pdpt_addr: + address = 0 + .rept 4 + .quad cpu_secondary_pdt_addr + address + \ + (IA32E_COMM_P_BIT | IA32E_COMM_RW_BIT) + address = address + CPU_PAGE_SIZE + .endr + .align CPU_PAGE_SIZE +cpu_secondary_pdt_addr: + address = 0 + .rept 2048 + .quad address + (IA32E_PDPTE_PS_BIT | IA32E_COMM_P_BIT | IA32E_COMM_RW_BIT) + address = address + 0x200000 + .endr + + +/******************************************************************* + * GUEST initial 4G page table + * + * guest starts with long mode, HV needs to prepare Guest identity + * mapped page table. + * + * guest page tables covers 4G size, with 2M page size. + * + * HV copy this page table (6 pages) to guest address + * CPU_Boot_Page_Tables_Start_VM before executing guest instruction. + * + ******************************************************************/ + .align CPU_PAGE_SIZE + .global CPU_Boot_Page_Tables_Start_VM +CPU_Boot_Page_Tables_Start_VM: + .quad vm_cpu_pdpt_addr + (IA32E_COMM_P_BIT | IA32E_COMM_RW_BIT) + .align CPU_PAGE_SIZE +vm_cpu_pdpt_addr: + address = 0 + .rept 4 + .quad vm_cpu_pdt_addr + address + (IA32E_COMM_P_BIT | IA32E_COMM_RW_BIT) + address = address + CPU_PAGE_SIZE + .endr + .align CPU_PAGE_SIZE +vm_cpu_pdt_addr: + address = 0 + .rept 2048 + .quad address + (IA32E_PDPTE_PS_BIT | IA32E_COMM_P_BIT | IA32E_COMM_RW_BIT) + address = address + 0x200000 + .endr + + .end diff --git a/hypervisor/arch/x86/cpuid.c b/hypervisor/arch/x86/cpuid.c new file mode 100644 index 000000000..7b1e6e73c --- /dev/null +++ b/hypervisor/arch/x86/cpuid.c @@ -0,0 +1,195 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +void emulate_cpuid(struct vcpu *vcpu, uint32_t src_op, uint32_t *eax_ptr, + uint32_t *ebx_ptr, uint32_t *ecx_ptr, uint32_t *edx_ptr) +{ + uint32_t apicid = vlapic_get_id(vcpu->arch_vcpu.vlapic); + static const char sig[12] = "ACRNACRNACRN"; + const uint32_t *sigptr = (const uint32_t *)sig; + uint32_t count = *ecx_ptr; + + if ((src_op != 0x40000000) && (src_op != 0x40000010)) + cpuid_count(src_op, count, eax_ptr, ebx_ptr, ecx_ptr, edx_ptr); + + switch (src_op) { + /* Virtualize cpuid 0x01 */ + case 0x01: + /* Patching initial APIC ID */ + *ebx_ptr &= ~APIC_ID_MASK; + *ebx_ptr |= (apicid & APIC_ID_MASK); + + /* mask mtrr */ + *edx_ptr &= ~CPUID_EDX_MTRR; + + /* Patching X2APIC, X2APIC mode is disabled by default. */ + if (x2apic_enabled) + *ecx_ptr |= CPUID_ECX_x2APIC; + else + *ecx_ptr &= ~CPUID_ECX_x2APIC; + + /* mask pcid */ + *ecx_ptr &= ~CPUID_ECX_PCID; + + /*mask vmx to guest os */ + *ecx_ptr &= ~CPUID_ECX_VMX; + + break; + + /* Virtualize cpuid 0x07 */ + case 0x07: + /* mask invpcid */ + *ebx_ptr &= ~CPUID_EBX_INVPCID; + + break; + + case 0x0a: + /* not support pmu */ + *eax_ptr &= ~0xff; + break; + + /* Virtualize cpuid 0x0b */ + case 0x0b: + /* Patching X2APIC */ + if (!x2apic_enabled) { + *eax_ptr = 0; + *ebx_ptr = 0; + *ecx_ptr = 0; + *edx_ptr = 0; + } + break; + + /* + * Leaf 0x40000000 + * This leaf returns the CPUID leaf range supported by the + * hypervisor and the hypervisor vendor signature. + * + * EAX: The maximum input value for CPUID supported by the + * hypervisor. + * EBX, ECX, EDX: Hypervisor vendor ID signature. + */ + case 0x40000000: + *eax_ptr = 0x40000010; + *ebx_ptr = sigptr[0]; + *ecx_ptr = sigptr[1]; + *edx_ptr = sigptr[2]; + break; + + /* + * Leaf 0x40000010 - Timing Information. + * This leaf returns the current TSC frequency and + * current Bus frequency in kHz. + * + * EAX: (Virtual) TSC frequency in kHz. + * TSC frequency is calculated from PIT in ACRN + * EBX: (Virtual) Bus (local apic timer) frequency in kHz. + * Bus (local apic timer) frequency is hardcoded as + * (128 * 1024 * 1024) in ACRN + * ECX, EDX: RESERVED (reserved fields are set to zero). + */ + case 0x40000010: + *eax_ptr = (uint32_t)(tsc_clock_freq / 1000); + *ebx_ptr = (128 * 1024 * 1024) / 1000; + *ecx_ptr = 0; + *edx_ptr = 0; + break; + + default: + break; + } +} + +static DEFINE_CPU_DATA(struct cpuid_cache_entry[CPUID_EXTEND_FEATURE_CACHE_MAX], + cpuid_cache); + +static inline struct cpuid_cache_entry *find_cpuid_cache_entry(uint32_t op, + uint32_t count) +{ + int pcpu_id = get_cpu_id(); + enum cpuid_cache_idx idx = CPUID_EXTEND_FEATURE_CACHE_MAX; + + if ((count != 0)) + return NULL; + + switch (op) { + case CPUID_VENDORSTRING: + idx = CPUID_VENDORSTRING_CACHE_IDX; + break; + + case CPUID_FEATURES: + idx = CPUID_FEATURES_CACHE_IDX; + break; + + case CPUID_EXTEND_FEATURE: + idx = CPUID_EXTEND_FEATURE_CACHE_IDX; + break; + + default: + break; + } + + if (idx == CPUID_EXTEND_FEATURE_CACHE_MAX) + return NULL; + + return &per_cpu(cpuid_cache, pcpu_id)[idx]; +} + +inline void cpuid_count(uint32_t op, uint32_t count, + uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) +{ + struct cpuid_cache_entry *entry; + + entry = find_cpuid_cache_entry(op, count); + + if (entry == NULL) { + native_cpuid_count(op, count, a, b, c, d); + } else if (entry->inited) { + *a = entry->a; + *b = entry->b; + *c = entry->c; + *d = entry->d; + } else { + native_cpuid_count(op, count, a, b, c, d); + + entry->a = *a; + entry->b = *b; + entry->c = *c; + entry->d = *d; + + entry->inited = 1; + } +} + diff --git a/hypervisor/arch/x86/ept.c b/hypervisor/arch/x86/ept.c new file mode 100644 index 000000000..a56525d24 --- /dev/null +++ b/hypervisor/arch/x86/ept.c @@ -0,0 +1,569 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "guest/instr_emul_wrapper.h" +#include "guest/instr_emul.h" + +#define ACRN_DBG_EPT 6 + +void *create_guest_paging(struct vm *vm) +{ + void *hva_dest; + void *hva_src; + + /* copy guest identity mapped 4G page table to guest */ + hva_dest = GPA2HVA(vm, + (uint64_t)CPU_Boot_Page_Tables_Start_VM); + hva_src = (void *)(_ld_cpu_secondary_reset_load + + (CPU_Boot_Page_Tables_Start_VM + - _ld_cpu_secondary_reset_start)); + /* 2MB page size, need to copy 6 pages */ + memcpy_s(hva_dest, 6 * CPU_PAGE_SIZE, hva_src, 6 * CPU_PAGE_SIZE); + return (void *)CPU_Boot_Page_Tables_Start_VM; +} + +static void *find_next_table(uint32_t table_offset, + void *table_base) +{ + uint64_t table_entry; + uint64_t table_present; + void *sub_table_addr = 0; + + /* Read the table entry */ + table_entry = MEM_READ64(table_base + + (table_offset * IA32E_COMM_ENTRY_SIZE)); + + /* If bit 7 is set, entry is not a subtable. */ + if ((table_entry & IA32E_PDPTE_PS_BIT) + || (table_entry & IA32E_PDE_PS_BIT)) + return sub_table_addr; + + /* Set table present bits to any of the read/write/execute bits */ + table_present = (IA32E_EPT_R_BIT | IA32E_EPT_W_BIT | IA32E_EPT_X_BIT); + + /* Determine if a valid entry exists */ + if ((table_entry & table_present) == 0) { + /* No entry present */ + return sub_table_addr; + } + + /* Get address of the sub-table */ + sub_table_addr = (void *)(table_entry & IA32E_REF_MASK); + + /* Return the next table in the walk */ + return sub_table_addr; +} + + +void free_ept_mem(void *pml4_addr) +{ + void *pdpt_addr; + void *pde_addr; + void *pte_addr; + uint32_t pml4_index; + uint32_t pdpt_index; + uint32_t pde_index; + + for (pml4_index = 0; pml4_index < IA32E_NUM_ENTRIES; pml4_index++) { + /* Walk from the PML4 table to the PDPT table */ + pdpt_addr = find_next_table(pml4_index, pml4_addr); + if (pdpt_addr == NULL) + continue; + + for (pdpt_index = 0; pdpt_index < IA32E_NUM_ENTRIES; + pdpt_index++) { + /* Walk from the PDPT table to the PD table */ + pde_addr = find_next_table(pdpt_index, pdpt_addr); + + if (pde_addr == NULL) + continue; + + for (pde_index = 0; pde_index < IA32E_NUM_ENTRIES; + pde_index++) { + /* Walk from the PD table to the page table */ + pte_addr = find_next_table(pde_index, + pde_addr); + + /* Free page table entry table */ + if (pte_addr) + free(pte_addr); + } + /* Free page directory entry table */ + if (pde_addr) + free(pde_addr); + } + free(pdpt_addr); + } + free(pml4_addr); +} + +void destroy_ept(struct vm *vm) +{ + free_ept_mem(vm->arch_vm.ept); + free_ept_mem(vm->arch_vm.m2p); +} + +uint64_t gpa2hpa_check(struct vm *vm, uint64_t gpa, + uint64_t size, int *found, bool assert) +{ + uint64_t hpa = 0; + int _found = 0; + struct entry_params entry; + struct map_params map_params; + + map_params.page_table_type = PT_EPT; + map_params.pml4_base = vm->arch_vm.ept; + map_params.pml4_inverted = vm->arch_vm.m2p; + obtain_last_page_table_entry(&map_params, &entry, + (void *)gpa, true); + if (entry.entry_present == PT_PRESENT + /* if cross several pages, now not handle it, + * only print error info + */ + && ((gpa % entry.page_size) + size) <= entry.page_size) { + _found = 1; + hpa = ((entry.entry_val & (~(entry.page_size - 1))) + | (gpa & (entry.page_size - 1))); + } + + if (found != NULL) + *found = _found; + + if (_found == 0 && assert) { + pr_err("VM %d GPA2HPA: failed for gpa 0x%llx", + vm->attr.boot_idx, gpa); + ASSERT(_found != 0, "GPA2HPA not found"); + } + + pr_dbg("GPA2HPA: 0x%llx->0x%llx", gpa, hpa); + + return hpa; +} + +uint64_t gpa2hpa(struct vm *vm, uint64_t gpa) +{ + return gpa2hpa_check(vm, gpa, 0, NULL, true); +} + +uint64_t hpa2gpa(struct vm *vm, uint64_t hpa) +{ + struct entry_params entry; + struct map_params map_params; + + map_params.page_table_type = PT_EPT; + map_params.pml4_base = vm->arch_vm.ept; + map_params.pml4_inverted = vm->arch_vm.m2p; + + obtain_last_page_table_entry(&map_params, &entry, + (void *)hpa, false); + + if (entry.entry_present == PT_NOT_PRESENT) { + pr_err("VM %d hpa2gpa: failed for hpa 0x%llx", + vm->attr.boot_idx, hpa); + ASSERT(false, "hpa2gpa not found"); + } + return ((entry.entry_val & (~(entry.page_size - 1))) + | (hpa & (entry.page_size - 1))); +} + +int is_ept_supported(void) +{ + uint16_t status; + uint64_t tmp64; + + /* Read primary processor based VM control. */ + tmp64 = msr_read(MSR_IA32_VMX_PROCBASED_CTLS); + + /* Check if secondary processor based VM control is available. */ + if (tmp64 & MMU_MEM_ATTR_BIT_EXECUTE_DISABLE) { + /* Read primary processor based VM control. */ + tmp64 = msr_read(MSR_IA32_VMX_PROCBASED_CTLS2); + + /* Check if EPT is supported. */ + if (tmp64 & (((uint64_t)VMX_PROCBASED_CTLS2_EPT) << 32)) { + /* EPT is present. */ + status = 1; + } else { + status = 0; + } + + } else { + /* Secondary processor based VM control is not present */ + status = 0; + } + + return status; +} + +static int check_hv_mmio_range(struct vm *vm, struct mem_io *mmio) +{ + int status = false; + struct list_head *pos; + struct mem_io_node *mmio_node; + + + list_for_each(pos, &vm->mmio_list) { + mmio_node = list_entry(pos, struct mem_io_node, list); + /* Check if this handler's range covers this memory access */ + if ((mmio->paddr >= mmio_node->range_start) && + (mmio->paddr + mmio->access_size <= + mmio_node->range_end)) { + status = true; + + /* Break from loop - only 1 handler allowed to support + * a given memory range + */ + break; + } + } + + /* Return success for now */ + return status; +} + +static int hv_emulate_mmio(struct vcpu *vcpu, struct mem_io *mmio) +{ + int status = -EINVAL; + struct list_head *pos; + struct mem_io_node *mmio_node; + struct vm *vm = vcpu->vm; + + list_for_each(pos, &vm->mmio_list) { + mmio_node = list_entry(pos, struct mem_io_node, list); + /* Check if this handler's range covers this memory access */ + if ((mmio->paddr >= mmio_node->range_start) && + (mmio->paddr + mmio->access_size + <= mmio_node->range_end)) { + + ASSERT((mmio->paddr % mmio->access_size) == 0, + "access size not align with paddr"); + + /* Handle this MMIO operation */ + status = mmio_node->read_write(vcpu, mmio, + mmio_node->handler_private_data); + + /* Break from loop - only 1 handler allowed to support + * given memory range + */ + break; + } + } + + /* Return success for now */ + return status; +} + +int register_mmio_emulation_handler(struct vm *vm, + hv_mem_io_handler_t read_write, uint64_t start, + uint64_t end, void *handler_private_data) +{ + int status = -EINVAL; + struct mem_io_node *mmio_node; + + if (vm->hw.created_vcpus > 0 && vm->hw.vcpu_array[0]->launched) { + ASSERT(0, "register mmio handler after vm launched"); + return status; + } + + /* Ensure both a read/write handler and range check function exist */ + if ((read_write != HV_NULL) && (end > start)) { + /* Allocate memory for node */ + mmio_node = + (struct mem_io_node *)calloc(1, sizeof(struct mem_io_node)); + + /* Ensure memory successfully allocated */ + if (mmio_node) { + /* Fill in information for this node */ + mmio_node->read_write = read_write; + mmio_node->handler_private_data = handler_private_data; + + INIT_LIST_HEAD(&mmio_node->list); + list_add(&mmio_node->list, &vm->mmio_list); + + mmio_node->range_start = start; + mmio_node->range_end = end; + ept_mmap(vm, start, start, end - start, + MAP_UNMAP, 0); + + /* Return success */ + status = 0; + } + } + + /* Return status to caller */ + return status; +} + +void unregister_mmio_emulation_handler(struct vm *vm, uint64_t start, + uint64_t end) +{ + struct list_head *pos, *tmp; + struct mem_io_node *mmio_node; + + list_for_each_safe(pos, tmp, &vm->mmio_list) { + mmio_node = list_entry(pos, struct mem_io_node, list); + + if ((mmio_node->range_start == start) && + (mmio_node->range_end == end)) { + /* assume only one entry found in mmio_list */ + list_del_init(&mmio_node->list); + free(mmio_node); + break; + } + } +} + +int dm_emulate_mmio_post(struct vcpu *vcpu) +{ + int ret = 0; + int cur = vcpu->vcpu_id; + struct vhm_request_buffer *req_buf = + (void *)HPA2HVA(vcpu->vm->sw.req_buf); + + vcpu->req.reqs.mmio_request.value = + req_buf->req_queue[cur].reqs.mmio_request.value; + + /* VHM emulation data already copy to req, mark to free slot now */ + req_buf->req_queue[cur].valid = false; + + if (req_buf->req_queue[cur].processed == REQ_STATE_SUCCESS) + vcpu->mmio.mmio_status = MMIO_TRANS_VALID; + else { + vcpu->mmio.mmio_status = MMIO_TRANS_INVALID; + goto out; + } + + if (vcpu->mmio.read_write == HV_MEM_IO_READ) { + vcpu->mmio.value = vcpu->req.reqs.mmio_request.value; + /* Emulate instruction and update vcpu register set */ + ret = emulate_instruction(vcpu, &vcpu->mmio); + if (ret != 0) + goto out; + } + +out: + return ret; +} + +static int dm_emulate_mmio_pre(struct vcpu *vcpu, uint64_t exit_qual) +{ + int status; + + status = analyze_instruction(vcpu, &vcpu->mmio); + if (status != 0) + return status; + + if (vcpu->mmio.read_write == HV_MEM_IO_WRITE) { + status = emulate_instruction(vcpu, &vcpu->mmio); + if (status != 0) + return status; + vcpu->req.reqs.mmio_request.value = vcpu->mmio.value; + /* XXX: write access while EPT perm RX -> WP */ + if ((exit_qual & 0x38) == 0x28) + vcpu->req.type = REQ_WP; + } + + if (vcpu->req.type == 0) + vcpu->req.type = REQ_MMIO; + vcpu->req.reqs.mmio_request.direction = vcpu->mmio.read_write; + vcpu->req.reqs.mmio_request.address = (long)vcpu->mmio.paddr; + vcpu->req.reqs.mmio_request.size = vcpu->mmio.access_size; + + return 0; +} + +int ept_violation_handler(struct vcpu *vcpu) +{ + int status; + uint64_t exit_qual; + uint64_t gpa; + + /* Handle page fault from guest */ + exit_qual = exec_vmread(VMX_EXIT_QUALIFICATION); + + memset(&vcpu->req, 0, sizeof(struct vhm_request)); + + /* Specify if read or write operation */ + if (exit_qual & 0x2) { + /* Write operation */ + vcpu->mmio.read_write = HV_MEM_IO_WRITE; + + /* Get write value from appropriate register in context */ + /* TODO: Need to figure out how to determine value being + * written + */ + vcpu->mmio.value = 0; + } else { + /* Read operation */ + vcpu->mmio.read_write = HV_MEM_IO_READ; + + /* Get sign extension requirements for read */ + /* TODO: Need to determine how sign extension is determined for + * reads + */ + vcpu->mmio.sign_extend_read = 0; + } + + /* Get the guest physical address */ + gpa = exec_vmread64(VMX_GUEST_PHYSICAL_ADDR_FULL); + + TRACE_2L(TRC_VMEXIT_EPT_VIOLATION, exit_qual, gpa); + + /* Adjust IPA appropriately and OR page offset to get full IPA of abort + */ + vcpu->mmio.paddr = gpa; + + /* Check if the MMIO access has a HV registered handler */ + status = check_hv_mmio_range((struct vm *) vcpu->vm, &vcpu->mmio); + + if (status == true) { + /* Fetch and decode current vcpu instruction */ + status = analyze_instruction(vcpu, &vcpu->mmio); + + if (status != 0) + goto out; + + if (vcpu->mmio.read_write == HV_MEM_IO_WRITE) { + status = emulate_instruction(vcpu, &vcpu->mmio); + if (status != 0) + goto out; + } + + /* Call generic memory emulation handler + * For MMIO write, call hv_emulate_mmio after + * instruction emulation. For MMIO read, + * call hv_emulate_mmio at first. + */ + status = hv_emulate_mmio(vcpu, &vcpu->mmio); + + if (vcpu->mmio.read_write == HV_MEM_IO_READ) { + /* Emulate instruction and update vcpu register set */ + status = emulate_instruction(vcpu, &vcpu->mmio); + if (status != 0) + goto out; + } + } else { + /* + * No mmio handler from HV side, search from VHM in Dom0 + * + * ACRN insert request to VHM and inject upcall + * For MMIO write, ask DM to run MMIO emulation after + * instruction emulation. For MMIO read, ask DM to run MMIO + * emulation at first. + */ + status = dm_emulate_mmio_pre(vcpu, exit_qual); + if (status != 0) + goto out; + status = acrn_insert_request_wait(vcpu, &vcpu->req); + } + + return status; + +out: + pr_fatal("Guest Linear Address: 0x%016llx", + exec_vmread(VMX_GUEST_LINEAR_ADDR)); + + pr_fatal("Guest Physical Address address: 0x%016llx", + gpa); + + ASSERT(status == true, "EPT violation"); + + return status; +} + +int ept_misconfig_handler(__unused struct vcpu *vcpu) +{ + int status; + + status = -EINVAL; + + /* TODO - EPT Violation handler */ + pr_info("%s, Guest linear address: 0x%016llx ", + __func__, exec_vmread64(VMX_GUEST_LINEAR_ADDR)); + + pr_info("%s, Guest physical address: 0x%016llx ", + __func__, exec_vmread64(VMX_GUEST_PHYSICAL_ADDR_FULL)); + + ASSERT(status == 0, "EPT Misconfiguration is not handled.\n"); + + TRACE_2L(TRC_VMEXIT_EPT_MISCONFIGURATION, 0, 0); + + return status; +} + + +int ept_mmap(struct vm *vm, uint64_t hpa, + uint64_t gpa, uint64_t size, uint32_t type, uint32_t prot) +{ + struct map_params map_params; + int i; + struct vcpu *vcpu; + + /* Setup memory map parameters */ + map_params.page_table_type = PT_EPT; + if (vm->arch_vm.ept) { + map_params.pml4_base = vm->arch_vm.ept; + map_params.pml4_inverted = vm->arch_vm.m2p; + } else { + map_params.pml4_base = + alloc_paging_struct(); + vm->arch_vm.ept = map_params.pml4_base; + map_params.pml4_inverted = alloc_paging_struct(); + vm->arch_vm.m2p = map_params.pml4_inverted; + } + + if (type == MAP_MEM || type == MAP_MMIO) { + map_mem(&map_params, (void *)hpa, + (void *)gpa, size, prot); + + } else if (type == MAP_UNMAP) { + unmap_mem(&map_params, (void *)hpa, (void *)gpa, + size, prot); + } else + ASSERT(0, "unknown map type"); + + foreach_vcpu(i, vm, vcpu) { + vcpu_make_request(vcpu, ACRN_REQUEST_TLB_FLUSH); + } + + dev_dbg(ACRN_DBG_EPT, "ept map: %s hpa: 0x%016llx gpa: 0x%016llx ", + type == MAP_UNMAP ? "unmap" : "map", hpa, gpa); + dev_dbg(ACRN_DBG_EPT, "size: 0x%016llx prot: 0x%x\n", size, prot); + + return 0; +} diff --git a/hypervisor/arch/x86/gdt.c b/hypervisor/arch/x86/gdt.c new file mode 100644 index 000000000..23d6ecab0 --- /dev/null +++ b/hypervisor/arch/x86/gdt.c @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +DEFINE_CPU_DATA(struct tss_64, tss); +DEFINE_CPU_DATA(struct host_gdt, gdt); +DEFINE_CPU_DATA(uint8_t[STACK_SIZE], mc_stack) __aligned(16); +DEFINE_CPU_DATA(uint8_t[STACK_SIZE], df_stack) __aligned(16); +DEFINE_CPU_DATA(uint8_t[STACK_SIZE], sf_stack) __aligned(16); + +static void set_tss_desc(union tss_64_descriptor *desc, + void *tss, int tss_limit, int type) +{ + uint32_t u1, u2, u3; + + u1 = ((uint64_t)tss << 16) & 0xFFFFFFFF; + u2 = (uint64_t)tss & 0xFF000000; + u3 = ((uint64_t)tss & 0x00FF0000) >> 16; + + + desc->low32.value = u1 | (tss_limit & 0xFFFF); + desc->base_addr_63_32 = (uint32_t)((uint64_t)tss >> 32); + desc->high32.value = (u2 | ((uint32_t)type << 8) | 0x8000 | u3); +} + +void load_gdtr_and_tr(void) +{ + struct host_gdt *gdt = &get_cpu_var(gdt); + struct host_gdt_descriptor gdtr; + struct tss_64 *tss = &get_cpu_var(tss); + + /* first entry is not used */ + gdt->rsvd = 0xAAAAAAAAAAAAAAAA; + /* ring 0 code sel descriptor */ + gdt->host_gdt_code_descriptor.value = 0x00Af9b000000ffff; + /* ring 0 data sel descriptor */ + gdt->host_gdt_data_descriptor.value = 0x00cf93000000ffff; + + tss->ist1 = (uint64_t)get_cpu_var(mc_stack) + STACK_SIZE; + tss->ist2 = (uint64_t)get_cpu_var(df_stack) + STACK_SIZE; + tss->ist3 = (uint64_t)get_cpu_var(sf_stack) + STACK_SIZE; + tss->ist4 = 0L; + + /* tss descriptor */ + set_tss_desc(&gdt->host_gdt_tss_descriptors, + (void *)tss, sizeof(struct tss_64), TSS_AVAIL); + + gdtr.len = sizeof(struct host_gdt) - 1; + gdtr.gdt = gdt; + + asm volatile ("lgdt %0" ::"m"(gdtr)); + + CPU_LTR_EXECUTE(HOST_GDT_RING0_CPU_TSS_SEL); +} diff --git a/hypervisor/arch/x86/guest/guest.c b/hypervisor/arch/x86/guest/guest.c new file mode 100644 index 000000000..eeb4d243f --- /dev/null +++ b/hypervisor/arch/x86/guest/guest.c @@ -0,0 +1,389 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define BOOT_ARGS_LOAD_ADDR 0x24EFC000 + +#define ACRN_DBG_GUEST 6 + +/* for VM0 e820 */ +uint32_t e820_entries; +struct e820_entry e820[E820_MAX_ENTRIES]; +struct e820_mem_params e820_mem; + +inline bool +is_vm0(struct vm *vm) +{ + return (vm->attr.boot_idx & 0x7F) == 0; +} + +inline struct vcpu *vcpu_from_vid(struct vm *vm, int vcpu_id) +{ + int i; + struct vcpu *vcpu; + + foreach_vcpu(i, vm, vcpu) { + if (vcpu->vcpu_id == vcpu_id) + return vcpu; + } + + return NULL; +} + +inline struct vcpu *vcpu_from_pid(struct vm *vm, int pcpu_id) +{ + int i; + struct vcpu *vcpu; + + foreach_vcpu(i, vm, vcpu) { + if (vcpu->pcpu_id == pcpu_id) + return vcpu; + } + + return NULL; +} + +inline struct vcpu *get_primary_vcpu(struct vm *vm) +{ + int i; + struct vcpu *vcpu; + + foreach_vcpu(i, vm, vcpu) { + if (is_vcpu_bsp(vcpu)) + return vcpu; + } + + return NULL; +} + +inline uint64_t vcpumask2pcpumask(struct vm *vm, uint64_t vdmask) +{ + int vcpu_id; + uint64_t dmask = 0; + struct vcpu *vcpu; + + while ((vcpu_id = bitmap_ffs(&vdmask)) >= 0) { + bitmap_clr(vcpu_id, &vdmask); + vcpu = vcpu_from_vid(vm, vcpu_id); + ASSERT(vcpu, "vcpu_from_vid failed"); + bitmap_set(vcpu->pcpu_id, &dmask); + } + + return dmask; +} + +inline bool vm_lapic_disabled(struct vm *vm) +{ + int i; + struct vcpu *vcpu; + + foreach_vcpu(i, vm, vcpu) { + if (vlapic_enabled(vcpu->arch_vcpu.vlapic)) + return false; + } + + return true; +} + +int init_vm0_boot_info(struct vm *vm) +{ + struct multiboot_module *mods = NULL; + struct multiboot_info *mbi = NULL; + + if (!is_vm0(vm)) { + pr_err("just for vm0 to get info!"); + return -EINVAL; + } + + if (boot_regs[0] != MULTIBOOT_INFO_MAGIC) { + ASSERT(0, "no multiboot info found"); + return -EINVAL; + } + + mbi = (struct multiboot_info *)((uint64_t)boot_regs[1]); + + dev_dbg(ACRN_DBG_GUEST, "Multiboot detected, flag=0x%x", mbi->mi_flags); + if (!(mbi->mi_flags & MULTIBOOT_INFO_HAS_MODS)) { + ASSERT(0, "no sos kernel info found"); + return -EINVAL; + } + + dev_dbg(ACRN_DBG_GUEST, "mod counts=%d\n", mbi->mi_mods_count); + + /* mod[0] is for kernel&cmdline, other mod for ramdisk/firmware info*/ + mods = (struct multiboot_module *)(uint64_t)mbi->mi_mods_addr; + + dev_dbg(ACRN_DBG_GUEST, "mod0 start=0x%x, end=0x%x", + mods[0].mm_mod_start, mods[0].mm_mod_end); + dev_dbg(ACRN_DBG_GUEST, "cmd addr=0x%x, str=%s", mods[0].mm_string, + (char *) (uint64_t)mods[0].mm_string); + + vm->sw.kernel_type = VM_LINUX_GUEST; + vm->sw.kernel_info.kernel_src_addr = + (void *)(uint64_t)mods[0].mm_mod_start; + vm->sw.kernel_info.kernel_size = + mods[0].mm_mod_end - mods[0].mm_mod_start; + vm->sw.kernel_info.kernel_load_addr = + (void *)(uint64_t)mods[0].mm_mod_start; + + vm->sw.linux_info.bootargs_src_addr = + (void *)(uint64_t)mods[0].mm_string; + vm->sw.linux_info.bootargs_load_addr = + (void *)BOOT_ARGS_LOAD_ADDR; + vm->sw.linux_info.bootargs_size = + strnlen_s((char *)(uint64_t) mods[0].mm_string, MEM_2K); + + return 0; +} + +uint64_t gva2gpa(struct vm *vm, uint64_t cr3, uint64_t gva) +{ + int level, index, shift; + uint64_t *base, addr, entry, page_size; + uint64_t gpa = 0; + + addr = cr3; + + for (level = 3; level >= 0; level--) { + addr = addr & IA32E_REF_MASK; + base = GPA2HVA(vm, addr); + ASSERT(base != NULL, "invalid ptp base."); + shift = level * 9 + 12; + index = (gva >> shift) & 0x1FF; + page_size = 1UL << shift; + + entry = base[index]; + if (level > 0 && (entry & MMU_32BIT_PDE_PS) != 0) + break; + addr = entry; + } + + entry >>= shift; entry <<= (shift + 12); entry >>= 12; + gpa = entry | (gva & (page_size - 1)); + + return gpa; +} + +void init_e820(void) +{ + unsigned int i; + + if (boot_regs[0] == MULTIBOOT_INFO_MAGIC) { + struct multiboot_info *mbi = + (struct multiboot_info *)((uint64_t)boot_regs[1]); + pr_info("Multiboot info detected\n"); + if (mbi->mi_flags & 0x40) { + struct multiboot_mmap *mmap = + (struct multiboot_mmap *) + ((uint64_t)mbi->mi_mmap_addr); + e820_entries = mbi->mi_mmap_length/ + sizeof(struct multiboot_mmap); + if (e820_entries > E820_MAX_ENTRIES) { + pr_err("Too many E820 entries %d\n", + e820_entries); + e820_entries = E820_MAX_ENTRIES; + } + dev_dbg(ACRN_DBG_GUEST, + "mmap length 0x%x addr 0x%x entries %d\n", + mbi->mi_mmap_length, mbi->mi_mmap_addr, + e820_entries); + for (i = 0; i < e820_entries; i++) { + e820[i].baseaddr = mmap[i].baseaddr; + e820[i].length = mmap[i].length; + e820[i].type = mmap[i].type; + + dev_dbg(ACRN_DBG_GUEST, + "mmap table: %d type: 0x%x\n", + i, mmap[i].type); + dev_dbg(ACRN_DBG_GUEST, + "Base: 0x%016llx length: 0x%016llx", + mmap[i].baseaddr, mmap[i].length); + } + } + } else + ASSERT(0, "no multiboot info found"); +} + + +void obtain_e820_mem_info(void) +{ + unsigned int i; + struct e820_entry *entry; + + e820_mem.mem_bottom = UINT64_MAX; + e820_mem.mem_top = 0x00; + e820_mem.max_ram_blk_base = 0; + e820_mem.max_ram_blk_size = 0; + + for (i = 0; i < e820_entries; i++) { + entry = &e820[i]; + if (e820_mem.mem_bottom > entry->baseaddr) + e820_mem.mem_bottom = entry->baseaddr; + + if (entry->baseaddr + entry->length + > e820_mem.mem_top) { + e820_mem.mem_top = entry->baseaddr + + entry->length; + } + + if (entry->baseaddr == UOS_DEFAULT_START_ADDR + && entry->type == E820_TYPE_RAM) { + e820_mem.max_ram_blk_base = + entry->baseaddr; + e820_mem.max_ram_blk_size = entry->length; + } + } +} + +static void rebuild_vm0_e820(void) +{ + unsigned int i; + uint64_t entry_start; + uint64_t entry_end; + uint64_t hv_start = CONFIG_RAM_START; + uint64_t hv_end = hv_start + CONFIG_RAM_SIZE; + struct e820_entry *entry, new_entry = {0}; + + /* hypervisor mem need be filter out from e820 table + * it's hv itself + other hv reserved mem like vgt etc + */ + for (i = 0; i < e820_entries; i++) { + entry = &e820[i]; + entry_start = entry->baseaddr; + entry_end = entry->baseaddr + entry->length; + + /* No need handle in these cases*/ + if (entry->type != E820_TYPE_RAM || entry_end <= hv_start + || entry_start >= hv_end) { + continue; + } + + /* filter out hv mem and adjust length of this entry*/ + if (entry_start < hv_start && entry_end <= hv_end) { + entry->length = hv_start - entry_start; + continue; + } + /* filter out hv mem and need to create a new entry*/ + if (entry_start < hv_start && entry_end > hv_end) { + entry->length = hv_start - entry_start; + new_entry.baseaddr = hv_end; + new_entry.length = entry_end - hv_end; + new_entry.type = E820_TYPE_RAM; + continue; + } + /* This entry is within the range of hv mem + * change to E820_TYPE_RESERVED + */ + if (entry_start >= hv_start && entry_end <= hv_end) { + entry->type = E820_TYPE_RESERVED; + continue; + } + + if (entry_start >= hv_start && entry_start < hv_end + && entry_end > hv_end) { + entry->baseaddr = hv_end; + entry->length = entry_end - hv_end; + continue; + } + + } + + if (new_entry.length > 0) { + e820_entries++; + ASSERT(e820_entries <= E820_MAX_ENTRIES, + "e820 entry overflow"); + entry = &e820[e820_entries - 1]; + entry->baseaddr = new_entry.baseaddr; + entry->length = new_entry.length; + entry->type = new_entry.type; + } + +} +int prepare_vm0_memmap_and_e820(struct vm *vm) +{ + unsigned int i; + uint32_t attr_wb = (MMU_MEM_ATTR_READ | + MMU_MEM_ATTR_WRITE | + MMU_MEM_ATTR_EXECUTE | + MMU_MEM_ATTR_WB_CACHE); + uint32_t attr_uc = (MMU_MEM_ATTR_READ | + MMU_MEM_ATTR_WRITE | + MMU_MEM_ATTR_EXECUTE | + MMU_MEM_ATTR_UNCACHED); + struct e820_entry *entry; + + + ASSERT(is_vm0(vm), "This func only for vm0"); + + rebuild_vm0_e820(); + dev_dbg(ACRN_DBG_GUEST, + "vm0: bottom memory - 0x%llx, top memory - 0x%llx\n", + e820_mem.mem_bottom, e820_mem.mem_top); + + /* create real ept map for all ranges with UC */ + ept_mmap(vm, e820_mem.mem_bottom, e820_mem.mem_bottom, + (e820_mem.mem_top - e820_mem.mem_bottom), + MAP_MMIO, attr_uc); + + /* update ram entries to WB attr */ + for (i = 0; i < e820_entries; i++) { + entry = &e820[i]; + if (entry->type == E820_TYPE_RAM) + ept_mmap(vm, entry->baseaddr, entry->baseaddr, + entry->length, MAP_MEM, attr_wb); + } + + + dev_dbg(ACRN_DBG_GUEST, "VM0 e820 layout:\n"); + for (i = 0; i < e820_entries; i++) { + entry = &e820[i]; + dev_dbg(ACRN_DBG_GUEST, + "e820 table: %d type: 0x%x", i, entry->type); + dev_dbg(ACRN_DBG_GUEST, + "BaseAddress: 0x%016llx length: 0x%016llx\n", + entry->baseaddr, entry->length); + } + + /* unmap hypervisor itself for safety + * will cause EPT violation if sos accesses hv memory + */ + ept_mmap(vm, CONFIG_RAM_START, CONFIG_RAM_START, + CONFIG_RAM_SIZE, MAP_UNMAP, 0); + return 0; +} diff --git a/hypervisor/arch/x86/guest/instr_emul.c b/hypervisor/arch/x86/guest/instr_emul.c new file mode 100644 index 000000000..3f9b259c6 --- /dev/null +++ b/hypervisor/arch/x86/guest/instr_emul.c @@ -0,0 +1,2137 @@ +/*- + * Copyright (c) 2012 Sandvine, Inc. + * Copyright (c) 2012 NetApp, Inc. + * Copyright (c) 2017 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#include +#include +#include +#include + +#include "instr_emul_wrapper.h" +#include "instr_emul.h" + +/* struct vie_op.op_type */ +enum { + VIE_OP_TYPE_NONE = 0, + VIE_OP_TYPE_MOV, + VIE_OP_TYPE_MOVSX, + VIE_OP_TYPE_MOVZX, + VIE_OP_TYPE_AND, + VIE_OP_TYPE_OR, + VIE_OP_TYPE_SUB, + VIE_OP_TYPE_TWO_BYTE, + VIE_OP_TYPE_PUSH, + VIE_OP_TYPE_CMP, + VIE_OP_TYPE_POP, + VIE_OP_TYPE_MOVS, + VIE_OP_TYPE_GROUP1, + VIE_OP_TYPE_STOS, + VIE_OP_TYPE_BITTEST, + VIE_OP_TYPE_TEST, + VIE_OP_TYPE_LAST +}; + +/* struct vie_op.op_flags */ +#define VIE_OP_F_IMM (1 << 0) /* 16/32-bit immediate operand */ +#define VIE_OP_F_IMM8 (1 << 1) /* 8-bit immediate operand */ +#define VIE_OP_F_MOFFSET (1 << 2) /* 16/32/64-bit immediate moffset */ +#define VIE_OP_F_NO_MODRM (1 << 3) +#define VIE_OP_F_NO_GLA_VERIFICATION (1 << 4) + +static const struct vie_op two_byte_opcodes[256] = { + [0xB6] = { + .op_byte = 0xB6, + .op_type = VIE_OP_TYPE_MOVZX, + }, + [0xB7] = { + .op_byte = 0xB7, + .op_type = VIE_OP_TYPE_MOVZX, + }, + [0xBA] = { + .op_byte = 0xBA, + .op_type = VIE_OP_TYPE_BITTEST, + .op_flags = VIE_OP_F_IMM8, + }, + [0xBE] = { + .op_byte = 0xBE, + .op_type = VIE_OP_TYPE_MOVSX, + }, +}; + +static const struct vie_op one_byte_opcodes[256] = { + [0x0F] = { + .op_byte = 0x0F, + .op_type = VIE_OP_TYPE_TWO_BYTE + }, + [0x2B] = { + .op_byte = 0x2B, + .op_type = VIE_OP_TYPE_SUB, + }, + [0x39] = { + .op_byte = 0x39, + .op_type = VIE_OP_TYPE_CMP, + }, + [0x3B] = { + .op_byte = 0x3B, + .op_type = VIE_OP_TYPE_CMP, + }, + [0x88] = { + .op_byte = 0x88, + .op_type = VIE_OP_TYPE_MOV, + }, + [0x89] = { + .op_byte = 0x89, + .op_type = VIE_OP_TYPE_MOV, + }, + [0x8A] = { + .op_byte = 0x8A, + .op_type = VIE_OP_TYPE_MOV, + }, + [0x8B] = { + .op_byte = 0x8B, + .op_type = VIE_OP_TYPE_MOV, + }, + [0xA1] = { + .op_byte = 0xA1, + .op_type = VIE_OP_TYPE_MOV, + .op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM, + }, + [0xA3] = { + .op_byte = 0xA3, + .op_type = VIE_OP_TYPE_MOV, + .op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM, + }, + [0xA4] = { + .op_byte = 0xA4, + .op_type = VIE_OP_TYPE_MOVS, + .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION + }, + [0xA5] = { + .op_byte = 0xA5, + .op_type = VIE_OP_TYPE_MOVS, + .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION + }, + [0xAA] = { + .op_byte = 0xAA, + .op_type = VIE_OP_TYPE_STOS, + .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION + }, + [0xAB] = { + .op_byte = 0xAB, + .op_type = VIE_OP_TYPE_STOS, + .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION + }, + [0xC6] = { + /* XXX Group 11 extended opcode - not just MOV */ + .op_byte = 0xC6, + .op_type = VIE_OP_TYPE_MOV, + .op_flags = VIE_OP_F_IMM8, + }, + [0xC7] = { + .op_byte = 0xC7, + .op_type = VIE_OP_TYPE_MOV, + .op_flags = VIE_OP_F_IMM, + }, + [0x23] = { + .op_byte = 0x23, + .op_type = VIE_OP_TYPE_AND, + }, + [0x80] = { + /* Group 1 extended opcode */ + .op_byte = 0x80, + .op_type = VIE_OP_TYPE_GROUP1, + .op_flags = VIE_OP_F_IMM8, + }, + [0x81] = { + /* Group 1 extended opcode */ + .op_byte = 0x81, + .op_type = VIE_OP_TYPE_GROUP1, + .op_flags = VIE_OP_F_IMM, + }, + [0x83] = { + /* Group 1 extended opcode */ + .op_byte = 0x83, + .op_type = VIE_OP_TYPE_GROUP1, + .op_flags = VIE_OP_F_IMM8, + }, + [0x84] = { + .op_byte = 0x84, + .op_type = VIE_OP_TYPE_TEST, + }, + [0x85] = { + .op_byte = 0x85, + .op_type = VIE_OP_TYPE_TEST, + }, + [0x08] = { + .op_byte = 0x08, + .op_type = VIE_OP_TYPE_OR, + }, + [0x09] = { + .op_byte = 0x09, + .op_type = VIE_OP_TYPE_OR, + }, + [0x8F] = { + /* XXX Group 1A extended opcode - not just POP */ + .op_byte = 0x8F, + .op_type = VIE_OP_TYPE_POP, + }, + [0xFF] = { + /* XXX Group 5 extended opcode - not just PUSH */ + .op_byte = 0xFF, + .op_type = VIE_OP_TYPE_PUSH, + } +}; + +/* struct vie.mod */ +#define VIE_MOD_INDIRECT 0 +#define VIE_MOD_INDIRECT_DISP8 1 +#define VIE_MOD_INDIRECT_DISP32 2 +#define VIE_MOD_DIRECT 3 + +/* struct vie.rm */ +#define VIE_RM_SIB 4 +#define VIE_RM_DISP32 5 + +#define GB (1024 * 1024 * 1024) + +static enum vm_reg_name gpr_map[16] = { + VM_REG_GUEST_RAX, + VM_REG_GUEST_RCX, + VM_REG_GUEST_RDX, + VM_REG_GUEST_RBX, + VM_REG_GUEST_RSP, + VM_REG_GUEST_RBP, + VM_REG_GUEST_RSI, + VM_REG_GUEST_RDI, + VM_REG_GUEST_R8, + VM_REG_GUEST_R9, + VM_REG_GUEST_R10, + VM_REG_GUEST_R11, + VM_REG_GUEST_R12, + VM_REG_GUEST_R13, + VM_REG_GUEST_R14, + VM_REG_GUEST_R15 +}; + +static uint64_t size2mask[] = { + [1] = 0xff, + [2] = 0xffff, + [4] = 0xffffffff, + [8] = 0xffffffffffffffff, +}; + + +static int +vie_read_register(struct vcpu *vcpu, enum vm_reg_name reg, uint64_t *rval) +{ + int error; + + error = vm_get_register(vcpu, reg, rval); + + return error; +} + +static void +vie_calc_bytereg(struct vie *vie, enum vm_reg_name *reg, int *lhbr) +{ + *lhbr = 0; + *reg = gpr_map[vie->reg]; + + /* + * 64-bit mode imposes limitations on accessing legacy high byte + * registers (lhbr). + * + * The legacy high-byte registers cannot be addressed if the REX + * prefix is present. In this case the values 4, 5, 6 and 7 of the + * 'ModRM:reg' field address %spl, %bpl, %sil and %dil respectively. + * + * If the REX prefix is not present then the values 4, 5, 6 and 7 + * of the 'ModRM:reg' field address the legacy high-byte registers, + * %ah, %ch, %dh and %bh respectively. + */ + if (!vie->rex_present) { + if (vie->reg & 0x4) { + *lhbr = 1; + *reg = gpr_map[vie->reg & 0x3]; + } + } +} + +static int +vie_read_bytereg(struct vcpu *vcpu, struct vie *vie, uint8_t *rval) +{ + uint64_t val; + int error, lhbr; + enum vm_reg_name reg; + + vie_calc_bytereg(vie, ®, &lhbr); + error = vm_get_register(vcpu, reg, &val); + + /* + * To obtain the value of a legacy high byte register shift the + * base register right by 8 bits (%ah = %rax >> 8). + */ + if (lhbr) + *rval = val >> 8; + else + *rval = val; + return error; +} + +static int +vie_write_bytereg(struct vcpu *vcpu, struct vie *vie, uint8_t byte) +{ + uint64_t origval, val, mask; + int error, lhbr; + enum vm_reg_name reg; + + vie_calc_bytereg(vie, ®, &lhbr); + error = vm_get_register(vcpu, reg, &origval); + if (error == 0) { + val = byte; + mask = 0xff; + if (lhbr) { + /* + * Shift left by 8 to store 'byte' in a legacy high + * byte register. + */ + val <<= 8; + mask <<= 8; + } + val |= origval & ~mask; + error = vm_set_register(vcpu, reg, val); + } + return error; +} + +int +vie_update_register(struct vcpu *vcpu, enum vm_reg_name reg, + uint64_t val, int size) +{ + int error; + uint64_t origval; + + switch (size) { + case 1: + case 2: + error = vie_read_register(vcpu, reg, &origval); + if (error) + return error; + val &= size2mask[size]; + val |= origval & ~size2mask[size]; + break; + case 4: + val &= 0xffffffffUL; + break; + case 8: + break; + default: + return -EINVAL; + } + + error = vm_set_register(vcpu, reg, val); + return error; +} + +#define RFLAGS_STATUS_BITS (PSL_C | PSL_PF | PSL_AF | PSL_Z | PSL_N | PSL_V) + +/* + * Return the status flags that would result from doing (x - y). + */ +#define GETCC(sz) \ +static u_long \ +getcc##sz(uint##sz##_t x, uint##sz##_t y) \ +{ \ + u_long rflags; \ + \ + __asm __volatile("sub %2,%1; pushfq; popq %0" : \ + "=r" (rflags), "+r" (x) : "m" (y)); \ + return rflags; \ +} struct __hack + +GETCC(8); +GETCC(16); +GETCC(32); +GETCC(64); + +static u_long +getcc(int opsize, uint64_t x, uint64_t y) +{ + ASSERT(opsize == 1 || opsize == 2 || opsize == 4 || opsize == 8, + "getcc: invalid operand size %d", opsize); + + if (opsize == 1) + return getcc8(x, y); + else if (opsize == 2) + return getcc16(x, y); + else if (opsize == 4) + return getcc32(x, y); + else + return getcc64(x, y); +} + +static int +emulate_mov(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, + mem_region_read_t memread, mem_region_write_t memwrite, + void *arg) +{ + int error, size; + enum vm_reg_name reg; + uint8_t byte; + uint64_t val; + + size = vie->opsize; + error = -EINVAL; + switch (vie->op.op_byte) { + case 0x88: + /* + * MOV byte from reg (ModRM:reg) to mem (ModRM:r/m) + * 88/r: mov r/m8, r8 + * REX + 88/r: mov r/m8, r8 (%ah, %ch, %dh, %bh not available) + */ + size = 1; /* override for byte operation */ + error = vie_read_bytereg(vcpu, vie, &byte); + if (error == 0) + error = memwrite(vcpu, gpa, byte, size, + arg); + break; + case 0x89: + /* + * MOV from reg (ModRM:reg) to mem (ModRM:r/m) + * 89/r: mov r/m16, r16 + * 89/r: mov r/m32, r32 + * REX.W + 89/r mov r/m64, r64 + */ + + reg = gpr_map[vie->reg]; + error = vie_read_register(vcpu, reg, &val); + if (error == 0) { + val &= size2mask[size]; + error = memwrite(vcpu, gpa, val, size, + arg); + } + break; + case 0x8A: + /* + * MOV byte from mem (ModRM:r/m) to reg (ModRM:reg) + * 8A/r: mov r8, r/m8 + * REX + 8A/r: mov r8, r/m8 + */ + size = 1; /* override for byte operation */ + error = memread(vcpu, gpa, &val, size, arg); + if (error == 0) + error = vie_write_bytereg(vcpu, vie, val); + break; + case 0x8B: + /* + * MOV from mem (ModRM:r/m) to reg (ModRM:reg) + * 8B/r: mov r16, r/m16 + * 8B/r: mov r32, r/m32 + * REX.W 8B/r: mov r64, r/m64 + */ + error = memread(vcpu, gpa, &val, size, arg); + if (error == 0) { + reg = gpr_map[vie->reg]; + error = vie_update_register(vcpu, reg, + val, size); + } + break; + case 0xA1: + /* + * MOV from seg:moffset to AX/EAX/RAX + * A1: mov AX, moffs16 + * A1: mov EAX, moffs32 + * REX.W + A1: mov RAX, moffs64 + */ + error = memread(vcpu, gpa, &val, size, arg); + if (error == 0) { + reg = VM_REG_GUEST_RAX; + error = vie_update_register(vcpu, reg, + val, size); + } + break; + case 0xA3: + /* + * MOV from AX/EAX/RAX to seg:moffset + * A3: mov moffs16, AX + * A3: mov moffs32, EAX + * REX.W + A3: mov moffs64, RAX + */ + error = vie_read_register(vcpu, VM_REG_GUEST_RAX, + &val); + if (error == 0) { + val &= size2mask[size]; + error = memwrite(vcpu, gpa, val, size, + arg); + } + break; + case 0xC6: + /* + * MOV from imm8 to mem (ModRM:r/m) + * C6/0 mov r/m8, imm8 + * REX + C6/0 mov r/m8, imm8 + */ + size = 1; /* override for byte operation */ + error = memwrite(vcpu, gpa, vie->immediate, size, + arg); + break; + case 0xC7: + /* + * MOV from imm16/imm32 to mem (ModRM:r/m) + * C7/0 mov r/m16, imm16 + * C7/0 mov r/m32, imm32 + * REX.W + C7/0 mov r/m64, imm32 + * (sign-extended to 64-bits) + */ + val = vie->immediate & size2mask[size]; + error = memwrite(vcpu, gpa, val, size, arg); + break; + default: + break; + } + + return error; +} + +static int +emulate_movx(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, + mem_region_read_t memread, __unused mem_region_write_t memwrite, + void *arg) +{ + int error, size; + enum vm_reg_name reg; + uint64_t val; + + size = vie->opsize; + error = -EINVAL; + + switch (vie->op.op_byte) { + case 0xB6: + /* + * MOV and zero extend byte from mem (ModRM:r/m) to + * reg (ModRM:reg). + * + * 0F B6/r movzx r16, r/m8 + * 0F B6/r movzx r32, r/m8 + * REX.W + 0F B6/r movzx r64, r/m8 + */ + + /* get the first operand */ + error = memread(vcpu, gpa, &val, 1, arg); + if (error) + break; + + /* get the second operand */ + reg = gpr_map[vie->reg]; + + /* zero-extend byte */ + val = (uint8_t)val; + + /* write the result */ + error = vie_update_register(vcpu, reg, val, size); + break; + case 0xB7: + /* + * MOV and zero extend word from mem (ModRM:r/m) to + * reg (ModRM:reg). + * + * 0F B7/r movzx r32, r/m16 + * REX.W + 0F B7/r movzx r64, r/m16 + */ + error = memread(vcpu, gpa, &val, 2, arg); + if (error) + return error; + + reg = gpr_map[vie->reg]; + + /* zero-extend word */ + val = (uint16_t)val; + + error = vie_update_register(vcpu, reg, val, size); + break; + case 0xBE: + /* + * MOV and sign extend byte from mem (ModRM:r/m) to + * reg (ModRM:reg). + * + * 0F BE/r movsx r16, r/m8 + * 0F BE/r movsx r32, r/m8 + * REX.W + 0F BE/r movsx r64, r/m8 + */ + + /* get the first operand */ + error = memread(vcpu, gpa, &val, 1, arg); + if (error) + break; + + /* get the second operand */ + reg = gpr_map[vie->reg]; + + /* sign extend byte */ + val = (int8_t)val; + + /* write the result */ + error = vie_update_register(vcpu, reg, val, size); + break; + default: + break; + } + return error; +} + +/* + * Helper function to calculate and validate a linear address. + */ +static int +get_gla(struct vcpu *vcpu, __unused struct vie *vie, + struct vm_guest_paging *paging, + int opsize, int addrsize, int prot, enum vm_reg_name seg, + enum vm_reg_name gpr, uint64_t *gla, int *fault) +{ + struct seg_desc desc; + uint64_t cr0, val, rflags; + int error; + + error = vie_read_register(vcpu, VM_REG_GUEST_CR0, &cr0); + ASSERT(error == 0, "%s: error %d getting cr0", __func__, error); + + error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS, &rflags); + ASSERT(error == 0, "%s: error %d getting rflags", __func__, error); + + error = vm_get_seg_desc(vcpu, seg, &desc); + ASSERT(error == 0, "%s: error %d getting segment descriptor %d", + __func__, error, seg); + + error = vie_read_register(vcpu, gpr, &val); + ASSERT(error == 0, "%s: error %d getting register %d", __func__, + error, gpr); + + if (vie_calculate_gla(paging->cpu_mode, seg, &desc, val, opsize, + addrsize, prot, gla)) { + if (seg == VM_REG_GUEST_SS) + /*vm_inject_ss(vcpu, 0);*/ + pr_err("TODO: inject ss exception"); + else + /*vm_inject_gp(vcpu);*/ + pr_err("TODO: inject gp exception"); + goto guest_fault; + } + + if (vie_canonical_check(paging->cpu_mode, *gla)) { + if (seg == VM_REG_GUEST_SS) + /*vm_inject_ss(vcpu, 0);*/ + pr_err("TODO: inject ss exception"); + else + /*vm_inject_gp(vcpu);*/ + pr_err("TODO: inject gp exception"); + goto guest_fault; + } + + if (vie_alignment_check(paging->cpl, opsize, cr0, rflags, *gla)) { + /*vm_inject_ac(vcpu, 0);*/ + pr_err("TODO: inject ac exception"); + goto guest_fault; + } + + *fault = 0; + return 0; + +guest_fault: + *fault = 1; + return 0; +} + +static int +emulate_movs(struct vcpu *vcpu, __unused uint64_t gpa, struct vie *vie, + struct vm_guest_paging *paging, + __unused mem_region_read_t memread, + __unused mem_region_write_t memwrite, + __unused void *arg) +{ + uint64_t dstaddr, srcaddr, dstgpa, srcgpa; + uint64_t rcx, rdi, rsi, rflags; + int error, fault, opsize, seg, repeat; + + opsize = (vie->op.op_byte == 0xA4) ? 1 : vie->opsize; + error = 0; + + /* + * XXX although the MOVS instruction is only supposed to be used with + * the "rep" prefix some guests like FreeBSD will use "repnz" instead. + * + * Empirically the "repnz" prefix has identical behavior to "rep" + * and the zero flag does not make a difference. + */ + repeat = vie->repz_present | vie->repnz_present; + + if (repeat) { + error = vie_read_register(vcpu, VM_REG_GUEST_RCX, &rcx); + ASSERT(!error, "%s: error %d getting rcx", __func__, error); + + /* + * The count register is %rcx, %ecx or %cx depending on the + * address size of the instruction. + */ + if ((rcx & vie_size2mask(vie->addrsize)) == 0) { + error = 0; + goto done; + } + } + + seg = vie->segment_override ? vie->segment_register : VM_REG_GUEST_DS; + error = get_gla(vcpu, vie, paging, opsize, vie->addrsize, + PROT_READ, seg, VM_REG_GUEST_RSI, &srcaddr, &fault); + if (error || fault) + goto done; + + error = get_gla(vcpu, vie, paging, opsize, vie->addrsize, + PROT_WRITE, VM_REG_GUEST_ES, VM_REG_GUEST_RDI, &dstaddr, + &fault); + if (error || fault) + goto done; + + vm_gva2gpa(vcpu, srcaddr, &srcgpa); + vm_gva2gpa(vcpu, dstaddr, &dstgpa); + memcpy_s((char *)dstaddr, 16, (char *)srcaddr, opsize); + + error = vie_read_register(vcpu, VM_REG_GUEST_RSI, &rsi); + ASSERT(error == 0, "%s: error %d getting rsi", __func__, error); + + error = vie_read_register(vcpu, VM_REG_GUEST_RDI, &rdi); + ASSERT(error == 0, "%s: error %d getting rdi", __func__, error); + + error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS, &rflags); + ASSERT(error == 0, "%s: error %d getting rflags", __func__, error); + + if (rflags & PSL_D) { + rsi -= opsize; + rdi -= opsize; + } else { + rsi += opsize; + rdi += opsize; + } + + error = vie_update_register(vcpu, VM_REG_GUEST_RSI, rsi, + vie->addrsize); + ASSERT(error == 0, "%s: error %d updating rsi", __func__, error); + + error = vie_update_register(vcpu, VM_REG_GUEST_RDI, rdi, + vie->addrsize); + ASSERT(error == 0, "%s: error %d updating rdi", __func__, error); + + if (repeat) { + rcx = rcx - 1; + error = vie_update_register(vcpu, VM_REG_GUEST_RCX, + rcx, vie->addrsize); + ASSERT(!error, "%s: error %d updating rcx", __func__, error); + + /* + * Repeat the instruction if the count register is not zero. + */ + if ((rcx & vie_size2mask(vie->addrsize)) != 0) + vm_restart_instruction(vcpu); + } +done: + ASSERT(error == 0, "%s: unexpected error %d", __func__, error); + return error; +} + +static int +emulate_stos(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, + __unused struct vm_guest_paging *paging, + __unused mem_region_read_t memread, + mem_region_write_t memwrite, void *arg) +{ + int error, opsize, repeat; + uint64_t val; + uint64_t rcx, rdi, rflags; + + opsize = (vie->op.op_byte == 0xAA) ? 1 : vie->opsize; + repeat = vie->repz_present | vie->repnz_present; + + if (repeat) { + error = vie_read_register(vcpu, VM_REG_GUEST_RCX, &rcx); + ASSERT(!error, "%s: error %d getting rcx", __func__, error); + + /* + * The count register is %rcx, %ecx or %cx depending on the + * address size of the instruction. + */ + if ((rcx & vie_size2mask(vie->addrsize)) == 0) + return 0; + } + + error = vie_read_register(vcpu, VM_REG_GUEST_RAX, &val); + ASSERT(!error, "%s: error %d getting rax", __func__, error); + + error = memwrite(vcpu, gpa, val, opsize, arg); + if (error) + return error; + + error = vie_read_register(vcpu, VM_REG_GUEST_RDI, &rdi); + ASSERT(error == 0, "%s: error %d getting rdi", __func__, error); + + error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS, &rflags); + ASSERT(error == 0, "%s: error %d getting rflags", __func__, error); + + if (rflags & PSL_D) + rdi -= opsize; + else + rdi += opsize; + + error = vie_update_register(vcpu, VM_REG_GUEST_RDI, rdi, + vie->addrsize); + ASSERT(error == 0, "%s: error %d updating rdi", __func__, error); + + if (repeat) { + rcx = rcx - 1; + error = vie_update_register(vcpu, VM_REG_GUEST_RCX, + rcx, vie->addrsize); + ASSERT(!error, "%s: error %d updating rcx", __func__, error); + + /* + * Repeat the instruction if the count register is not zero. + */ + if ((rcx & vie_size2mask(vie->addrsize)) != 0) + vm_restart_instruction(vcpu); + } + + return 0; +} + +static int +emulate_test(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, + mem_region_read_t memread, __unused mem_region_write_t memwrite, + void *arg) +{ + int error, size; + enum vm_reg_name reg; + uint64_t result, rflags, rflags2, val1, val2; + + size = vie->opsize; + error = -EINVAL; + + switch (vie->op.op_byte) { + case 0x84: + /* + * 84/r test r8, r/m8 + */ + size = 1; /*override size for 8-bit operation*/ + /* fallthrough */ + case 0x85: + /* + * AND reg (ModRM:reg) and mem (ModRM:r/m) and discard + * the result. + * + * + * 85/r test r16, r/m16 + * 85/r test r32, r/m32 + * REX.W + 85/r test r64, r/m64 + */ + + /* get the first operand */ + reg = gpr_map[vie->reg]; + error = vie_read_register(vcpu, reg, &val1); + if (error) + break; + + /* get the second operand */ + error = memread(vcpu, gpa, &val2, size, arg); + if (error) + break; + + /* perform the operation and write the result */ + result = val1 & val2; + break; + default: + break; + } + if (error) + return error; + + error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS, &rflags); + if (error) + return error; + + /* + * OF and CF are cleared; the SF, ZF and PF flags are set according + * to the result; AF is undefined. + * + * The updated status flags are obtained by subtracting 0 from 'result'. + */ + rflags2 = getcc(size, result, 0); + rflags &= ~RFLAGS_STATUS_BITS; + rflags |= rflags2 & (PSL_PF | PSL_Z | PSL_N); + + error = vie_update_register(vcpu, VM_REG_GUEST_RFLAGS, rflags, 8); + return error; +} + +static int +emulate_and(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, + mem_region_read_t memread, mem_region_write_t memwrite, + void *arg) +{ + int error, size; + enum vm_reg_name reg; + uint64_t result, rflags, rflags2, val1, val2; + + size = vie->opsize; + error = -EINVAL; + + switch (vie->op.op_byte) { + case 0x23: + /* + * AND reg (ModRM:reg) and mem (ModRM:r/m) and store the + * result in reg. + * + * 23/r and r16, r/m16 + * 23/r and r32, r/m32 + * REX.W + 23/r and r64, r/m64 + */ + + /* get the first operand */ + reg = gpr_map[vie->reg]; + error = vie_read_register(vcpu, reg, &val1); + if (error) + break; + + /* get the second operand */ + error = memread(vcpu, gpa, &val2, size, arg); + if (error) + break; + + /* perform the operation and write the result */ + result = val1 & val2; + error = vie_update_register(vcpu, reg, result, + size); + break; + case 0x81: + case 0x83: + /* + * AND mem (ModRM:r/m) with immediate and store the + * result in mem. + * + * 81 /4 and r/m16, imm16 + * 81 /4 and r/m32, imm32 + * REX.W + 81 /4 and r/m64, imm32 sign-extended to 64 + * + * 83 /4 and r/m16, imm8 sign-extended to 16 + * 83 /4 and r/m32, imm8 sign-extended to 32 + * REX.W + 83/4 and r/m64, imm8 sign-extended to 64 + */ + + /* get the first operand */ + error = memread(vcpu, gpa, &val1, size, arg); + if (error) + break; + + /* + * perform the operation with the pre-fetched immediate + * operand and write the result + */ + result = val1 & vie->immediate; + error = memwrite(vcpu, gpa, result, size, arg); + break; + default: + break; + } + if (error) + return error; + + error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS, &rflags); + if (error) + return error; + + /* + * OF and CF are cleared; the SF, ZF and PF flags are set according + * to the result; AF is undefined. + * + * The updated status flags are obtained by subtracting 0 from 'result'. + */ + rflags2 = getcc(size, result, 0); + rflags &= ~RFLAGS_STATUS_BITS; + rflags |= rflags2 & (PSL_PF | PSL_Z | PSL_N); + + error = vie_update_register(vcpu, VM_REG_GUEST_RFLAGS, rflags, 8); + return error; +} + +static int +emulate_or(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, + mem_region_read_t memread, mem_region_write_t memwrite, + void *arg) +{ + int error, size; + enum vm_reg_name reg; + uint64_t val1, val2, result, rflags, rflags2; + + size = vie->opsize; + error = -EINVAL; + + switch (vie->op.op_byte) { + case 0x81: + case 0x83: + /* + * OR mem (ModRM:r/m) with immediate and store the + * result in mem. + * + * 81 /1 or r/m16, imm16 + * 81 /1 or r/m32, imm32 + * REX.W + 81 /1 or r/m64, imm32 sign-extended to + * 64 + * + * 83 /1 or r/m16, imm8 sign-extended to + * 16 + * 83 /1 or r/m32, imm8 sign-extended to + * 32 + * REX.W + 83/1 or r/m64, imm8 sign-extended to + * 64 + */ + + /* get the first operand */ + error = memread(vcpu, gpa, &val1, size, arg); + if (error) + break; + + /* + * perform the operation with the pre-fetched immediate + * operand and write the result + */ + result = val1 | vie->immediate; + error = memwrite(vcpu, gpa, result, size, arg); + break; + case 0x09: + /* + * OR mem (ModRM:r/m) with reg (ModRM:reg) and store the + * result in mem. + * 09/r: OR r/m16, r16 + * 09/r: OR r/m32, r32 + */ + + /* get the first operand */ + error = memread(vcpu, gpa, &val1, size, arg); + if (error) + break; + + /* get the second operand */ + reg = gpr_map[vie->reg]; + error = vie_read_register(vcpu, reg, &val2); + if (error) + break; + + /* perform the operation and write the result */ + result = val1 | val2; + result &= size2mask[size]; + + error = memwrite(vcpu, gpa, result, size, arg); + break; + default: + break; + } + if (error) + return error; + + error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS, &rflags); + if (error) + return error; + + /* + * OF and CF are cleared; the SF, ZF and PF flags are set according + * to the result; AF is undefined. + * + * The updated status flags are obtained by subtracting 0 from 'result'. + */ + rflags2 = getcc(size, result, 0); + rflags &= ~RFLAGS_STATUS_BITS; + rflags |= rflags2 & (PSL_PF | PSL_Z | PSL_N); + + error = vie_update_register(vcpu, VM_REG_GUEST_RFLAGS, rflags, 8); + return error; +} + +static int +emulate_cmp(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, + mem_region_read_t memread, __unused mem_region_write_t memwrite, + void *arg) +{ + int error, size; + uint64_t regop, memop, op1, op2, rflags, rflags2; + enum vm_reg_name reg; + + size = vie->opsize; + switch (vie->op.op_byte) { + case 0x39: + case 0x3B: + /* + * 39/r CMP r/m16, r16 + * 39/r CMP r/m32, r32 + * REX.W 39/r CMP r/m64, r64 + * + * 3B/r CMP r16, r/m16 + * 3B/r CMP r32, r/m32 + * REX.W + 3B/r CMP r64, r/m64 + * + * Compare the first operand with the second operand and + * set status flags in EFLAGS register. The comparison + * is performed by subtracting the second operand from + * the first operand and then setting the status flags. + */ + + /* Get the register operand */ + reg = gpr_map[vie->reg]; + error = vie_read_register(vcpu, reg, ®op); + if (error) + return error; + + /* Get the memory operand */ + error = memread(vcpu, gpa, &memop, size, arg); + if (error) + return error; + + if (vie->op.op_byte == 0x3B) { + op1 = regop; + op2 = memop; + } else { + op1 = memop; + op2 = regop; + } + rflags2 = getcc(size, op1, op2); + break; + case 0x80: + case 0x81: + case 0x83: + /* + * 80 /7 cmp r/m8, imm8 + * REX + 80 /7 cmp r/m8, imm8 + * + * 81 /7 cmp r/m16, imm16 + * 81 /7 cmp r/m32, imm32 + * REX.W + 81 /7 cmp r/m64, imm32 sign-extended + * to 64 + * + * 83 /7 cmp r/m16, imm8 sign-extended + * to 16 + * 83 /7 cmp r/m32, imm8 sign-extended + * to 32 + * REX.W + 83 /7 cmp r/m64, imm8 sign-extended + * to 64 + * + * Compare mem (ModRM:r/m) with immediate and set + * status flags according to the results. The + * comparison is performed by subtracting the + * immediate from the first operand and then setting + * the status flags. + * + */ + if (vie->op.op_byte == 0x80) + size = 1; + + /* get the first operand */ + error = memread(vcpu, gpa, &op1, size, arg); + if (error) + return error; + + rflags2 = getcc(size, op1, vie->immediate); + break; + default: + return -EINVAL; + } + error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS, &rflags); + if (error) + return error; + rflags &= ~RFLAGS_STATUS_BITS; + rflags |= rflags2 & RFLAGS_STATUS_BITS; + + error = vie_update_register(vcpu, VM_REG_GUEST_RFLAGS, rflags, 8); + return error; +} + +static int +emulate_sub(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, + mem_region_read_t memread, __unused mem_region_write_t memwrite, + void *arg) +{ + int error, size; + uint64_t nval, rflags, rflags2, val1, val2; + enum vm_reg_name reg; + + size = vie->opsize; + error = -EINVAL; + + switch (vie->op.op_byte) { + case 0x2B: + /* + * SUB r/m from r and store the result in r + * + * 2B/r SUB r16, r/m16 + * 2B/r SUB r32, r/m32 + * REX.W + 2B/r SUB r64, r/m64 + */ + + /* get the first operand */ + reg = gpr_map[vie->reg]; + error = vie_read_register(vcpu, reg, &val1); + if (error) + break; + + /* get the second operand */ + error = memread(vcpu, gpa, &val2, size, arg); + if (error) + break; + + /* perform the operation and write the result */ + nval = val1 - val2; + error = vie_update_register(vcpu, reg, nval, + size); + break; + default: + break; + } + + if (!error) { + rflags2 = getcc(size, val1, val2); + error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS, + &rflags); + if (error) + return error; + + rflags &= ~RFLAGS_STATUS_BITS; + rflags |= rflags2 & RFLAGS_STATUS_BITS; + error = vie_update_register(vcpu, VM_REG_GUEST_RFLAGS, + rflags, 8); + } + + return error; +} + +static int +emulate_stack_op(struct vcpu *vcpu, uint64_t mmio_gpa, struct vie *vie, + struct vm_guest_paging *paging, mem_region_read_t memread, + mem_region_write_t memwrite, void *arg) +{ + struct seg_desc ss_desc; + uint64_t cr0, rflags, rsp, stack_gla, stack_gpa, val; + int error, size, stackaddrsize, pushop; + + memset(&ss_desc, 0, sizeof(ss_desc)); + + val = 0; + size = vie->opsize; + pushop = (vie->op.op_type == VIE_OP_TYPE_PUSH) ? 1 : 0; + + /* + * From "Address-Size Attributes for Stack Accesses", Intel SDL, Vol 1 + */ + if (paging->cpu_mode == CPU_MODE_REAL) { + stackaddrsize = 2; + } else if (paging->cpu_mode == CPU_MODE_64BIT) { + /* + * "Stack Manipulation Instructions in 64-bit Mode", SDM, Vol 3 + * - Stack pointer size is always 64-bits. + * - PUSH/POP of 32-bit values is not possible in 64-bit mode. + * - 16-bit PUSH/POP is supported by using the operand size + * override prefix (66H). + */ + stackaddrsize = 8; + size = vie->opsize_override ? 2 : 8; + } else { + /* + * In protected or compatibility mode the 'B' flag in the + * stack-segment descriptor determines the size of the + * stack pointer. + */ + error = vm_get_seg_desc(vcpu, VM_REG_GUEST_SS, &ss_desc); + ASSERT(error == 0, "%s: error %d getting SS descriptor", + __func__, error); + if (SEG_DESC_DEF32(ss_desc.access)) + stackaddrsize = 4; + else + stackaddrsize = 2; + } + + error = vie_read_register(vcpu, VM_REG_GUEST_CR0, &cr0); + ASSERT(error == 0, "%s: error %d getting cr0", __func__, error); + + error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS, &rflags); + ASSERT(error == 0, "%s: error %d getting rflags", __func__, error); + + error = vie_read_register(vcpu, VM_REG_GUEST_RSP, &rsp); + ASSERT(error == 0, "%s: error %d getting rsp", __func__, error); + if (pushop) + rsp -= size; + + if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS, &ss_desc, + rsp, size, stackaddrsize, pushop ? PROT_WRITE : PROT_READ, + &stack_gla)) { + /*vm_inject_ss(vcpu, 0);*/ + pr_err("TODO: inject ss exception"); + } + + if (vie_canonical_check(paging->cpu_mode, stack_gla)) { + /*vm_inject_ss(vcpu, 0);*/ + pr_err("TODO: inject ss exception"); + } + + if (vie_alignment_check(paging->cpl, size, cr0, rflags, stack_gla)) { + /*vm_inject_ac(vcpu, 0);*/ + pr_err("TODO: inject ac exception"); + return 0; + } + + vm_gva2gpa(vcpu, stack_gla, &stack_gpa); + if (pushop) { + error = memread(vcpu, mmio_gpa, &val, size, arg); + if (error == 0) + error = memwrite(vcpu, stack_gpa, val, size, arg); + } else { + error = memread(vcpu, stack_gpa, &val, size, arg); + if (error == 0) + error = memwrite(vcpu, mmio_gpa, val, size, arg); + rsp += size; + } + + + if (error == 0) { + error = vie_update_register(vcpu, VM_REG_GUEST_RSP, rsp, + stackaddrsize); + ASSERT(error == 0, "error %d updating rsp", error); + } + return error; +} + +static int +emulate_push(struct vcpu *vcpu, uint64_t mmio_gpa, struct vie *vie, + struct vm_guest_paging *paging, mem_region_read_t memread, + mem_region_write_t memwrite, void *arg) +{ + int error; + + /* + * Table A-6, "Opcode Extensions", Intel SDM, Vol 2. + * + * PUSH is part of the group 5 extended opcodes and is identified + * by ModRM:reg = b110. + */ + if ((vie->reg & 7) != 6) + return -EINVAL; + + error = emulate_stack_op(vcpu, mmio_gpa, vie, paging, memread, + memwrite, arg); + return error; +} + +static int +emulate_pop(struct vcpu *vcpu, uint64_t mmio_gpa, struct vie *vie, + struct vm_guest_paging *paging, mem_region_read_t memread, + mem_region_write_t memwrite, void *arg) +{ + int error; + + /* + * Table A-6, "Opcode Extensions", Intel SDM, Vol 2. + * + * POP is part of the group 1A extended opcodes and is identified + * by ModRM:reg = b000. + */ + if ((vie->reg & 7) != 0) + return -EINVAL; + + error = emulate_stack_op(vcpu, mmio_gpa, vie, paging, memread, + memwrite, arg); + return error; +} + +static int +emulate_group1(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, + __unused struct vm_guest_paging *paging, + mem_region_read_t memread, + mem_region_write_t memwrite, void *memarg) +{ + int error; + + switch (vie->reg & 7) { + case 0x1: /* OR */ + error = emulate_or(vcpu, gpa, vie, + memread, memwrite, memarg); + break; + case 0x4: /* AND */ + error = emulate_and(vcpu, gpa, vie, + memread, memwrite, memarg); + break; + case 0x7: /* CMP */ + error = emulate_cmp(vcpu, gpa, vie, + memread, memwrite, memarg); + break; + default: + error = EINVAL; + break; + } + + return error; +} + +static int +emulate_bittest(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, + mem_region_read_t memread, __unused mem_region_write_t memwrite, + void *memarg) +{ + uint64_t val, rflags; + int error, bitmask, bitoff; + + /* + * 0F BA is a Group 8 extended opcode. + * + * Currently we only emulate the 'Bit Test' instruction which is + * identified by a ModR/M:reg encoding of 100b. + */ + if ((vie->reg & 7) != 4) + return -EINVAL; + + error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS, &rflags); + ASSERT(error == 0, "%s: error %d getting rflags", __func__, error); + + error = memread(vcpu, gpa, &val, vie->opsize, memarg); + if (error) + return error; + + /* + * Intel SDM, Vol 2, Table 3-2: + * "Range of Bit Positions Specified by Bit Offset Operands" + */ + bitmask = vie->opsize * 8 - 1; + bitoff = vie->immediate & bitmask; + + /* Copy the bit into the Carry flag in %rflags */ + if (val & (1UL << bitoff)) + rflags |= PSL_C; + else + rflags &= ~PSL_C; + + error = vie_update_register(vcpu, VM_REG_GUEST_RFLAGS, rflags, 8); + ASSERT(error == 0, "%s: error %d updating rflags", __func__, error); + + return 0; +} + +int +vmm_emulate_instruction(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, + struct vm_guest_paging *paging, mem_region_read_t memread, + mem_region_write_t memwrite, void *memarg) +{ + int error; + + if (!vie->decoded) + return -EINVAL; + switch (vie->op.op_type) { + case VIE_OP_TYPE_GROUP1: + error = emulate_group1(vcpu, gpa, vie, paging, + memread, memwrite, memarg); + break; + case VIE_OP_TYPE_POP: + error = emulate_pop(vcpu, gpa, vie, paging, + memread, memwrite, memarg); + break; + case VIE_OP_TYPE_PUSH: + error = emulate_push(vcpu, gpa, vie, paging, + memread, memwrite, memarg); + break; + case VIE_OP_TYPE_CMP: + error = emulate_cmp(vcpu, gpa, vie, + memread, memwrite, memarg); + break; + case VIE_OP_TYPE_MOV: + error = emulate_mov(vcpu, gpa, vie, + memread, memwrite, memarg); + break; + case VIE_OP_TYPE_MOVSX: + case VIE_OP_TYPE_MOVZX: + error = emulate_movx(vcpu, gpa, vie, + memread, memwrite, memarg); + break; + case VIE_OP_TYPE_MOVS: + error = emulate_movs(vcpu, gpa, vie, paging, + memread, memwrite, memarg); + break; + case VIE_OP_TYPE_STOS: + error = emulate_stos(vcpu, gpa, vie, paging, + memread, memwrite, memarg); + break; + case VIE_OP_TYPE_AND: + error = emulate_and(vcpu, gpa, vie, + memread, memwrite, memarg); + break; + case VIE_OP_TYPE_TEST: + error = emulate_test(vcpu, gpa, vie, + memread, memwrite, memarg); + break; + case VIE_OP_TYPE_OR: + error = emulate_or(vcpu, gpa, vie, + memread, memwrite, memarg); + break; + case VIE_OP_TYPE_SUB: + error = emulate_sub(vcpu, gpa, vie, + memread, memwrite, memarg); + break; + case VIE_OP_TYPE_BITTEST: + error = emulate_bittest(vcpu, gpa, vie, + memread, memwrite, memarg); + break; + default: + error = -EINVAL; + break; + } + return error; +} + +int +vie_alignment_check(int cpl, int size, uint64_t cr0, uint64_t rf, uint64_t gla) +{ + ASSERT(size == 1 || size == 2 || size == 4 || size == 8, + "%s: invalid size %d", __func__, size); + ASSERT(cpl >= 0 && cpl <= 3, "%s: invalid cpl %d", __func__, cpl); + + if (cpl != 3 || (cr0 & CR0_AM) == 0 || (rf & PSL_AC) == 0) + return 0; + + return (gla & (size - 1)) ? 1 : 0; +} + +int +vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla) +{ + uint64_t mask; + + if (cpu_mode != CPU_MODE_64BIT) + return 0; + + /* + * The value of the bit 47 in the 'gla' should be replicated in the + * most significant 16 bits. + */ + mask = ~((1UL << 48) - 1); + if (gla & (1UL << 47)) + return (gla & mask) != mask; + else + return (gla & mask) != 0; +} + +uint64_t +vie_size2mask(int size) +{ + ASSERT(size == 1 || size == 2 || size == 4 || size == 8, + "vie_size2mask: invalid size %d", size); + return size2mask[size]; +} + +int +vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg, + struct seg_desc *desc, uint64_t offset, int length, int addrsize, + int prot, uint64_t *gla) +{ + uint64_t firstoff, low_limit, high_limit, segbase; + int glasize, type; + + ASSERT(seg >= VM_REG_GUEST_ES && seg <= VM_REG_GUEST_GS, + "%s: invalid segment %d", __func__, seg); + ASSERT(length == 1 || length == 2 || length == 4 || length == 8, + "%s: invalid operand size %d", __func__, length); + ASSERT((prot & ~(PROT_READ | PROT_WRITE)) == 0, + "%s: invalid prot %#x", __func__, prot); + + firstoff = offset; + if (cpu_mode == CPU_MODE_64BIT) { + ASSERT(addrsize == 4 || addrsize == 8, + "%s: invalid address size %d for cpu_mode %d", + __func__, addrsize, cpu_mode); + glasize = 8; + } else { + ASSERT(addrsize == 2 || addrsize == 4, + "%s: invalid address size %d for cpu mode %d", + __func__, addrsize, cpu_mode); + glasize = 4; + /* + * If the segment selector is loaded with a NULL selector + * then the descriptor is unusable and attempting to use + * it results in a #GP(0). + */ + if (SEG_DESC_UNUSABLE(desc->access)) + return -1; + + /* + * The processor generates a #NP exception when a segment + * register is loaded with a selector that points to a + * descriptor that is not present. If this was the case then + * it would have been checked before the VM-exit. + */ + ASSERT(SEG_DESC_PRESENT(desc->access), + "segment %d not present: %#x", seg, desc->access); + + /* + * The descriptor type must indicate a code/data segment. + */ + type = SEG_DESC_TYPE(desc->access); + ASSERT(type >= 16 && type <= 31, + "segment %d has invalid descriptor type %#x", + seg, type); + + if (prot & PROT_READ) { + /* #GP on a read access to a exec-only code segment */ + if ((type & 0xA) == 0x8) + return -1; + } + + if (prot & PROT_WRITE) { + /* + * #GP on a write access to a code segment or a + * read-only data segment. + */ + if (type & 0x8) /* code segment */ + return -1; + + if ((type & 0xA) == 0) /* read-only data seg */ + return -1; + } + + /* + * 'desc->limit' is fully expanded taking granularity into + * account. + */ + if ((type & 0xC) == 0x4) { + /* expand-down data segment */ + low_limit = desc->limit + 1; + high_limit = SEG_DESC_DEF32(desc->access) ? + 0xffffffff : 0xffff; + } else { + /* code segment or expand-up data segment */ + low_limit = 0; + high_limit = desc->limit; + } + + while (length > 0) { + offset &= vie_size2mask(addrsize); + if (offset < low_limit || offset > high_limit) + return -1; + offset++; + length--; + } + } + + /* + * In 64-bit mode all segments except %fs and %gs have a segment + * base address of 0. + */ + if (cpu_mode == CPU_MODE_64BIT && seg != VM_REG_GUEST_FS && + seg != VM_REG_GUEST_GS) { + segbase = 0; + } else { + segbase = desc->base; + } + + /* + * Truncate 'firstoff' to the effective address size before adding + * it to the segment base. + */ + firstoff &= vie_size2mask(addrsize); + *gla = (segbase + firstoff) & vie_size2mask(glasize); + return 0; +} + +void +vie_init(struct vie *vie, const char *inst_bytes, int inst_length) +{ + ASSERT(inst_length >= 0 && inst_length <= VIE_INST_SIZE, + "%s: invalid instruction length (%d)", __func__, inst_length); + + memset(vie, 0, sizeof(struct vie)); + + vie->base_register = VM_REG_LAST; + vie->index_register = VM_REG_LAST; + vie->segment_register = VM_REG_LAST; + + if (inst_length) { + memcpy_s((char *)vie->inst, VIE_INST_SIZE, + (char *)inst_bytes, inst_length); + vie->num_valid = inst_length; + } +} + +static int +vie_peek(struct vie *vie, uint8_t *x) +{ + + if (vie->num_processed < vie->num_valid) { + *x = vie->inst[vie->num_processed]; + return 0; + } else + return -1; +} + + static void +vie_advance(struct vie *vie) +{ + + vie->num_processed++; +} + +static bool +segment_override(uint8_t x, int *seg) +{ + + switch (x) { + case 0x2E: + *seg = VM_REG_GUEST_CS; + break; + case 0x36: + *seg = VM_REG_GUEST_SS; + break; + case 0x3E: + *seg = VM_REG_GUEST_DS; + break; + case 0x26: + *seg = VM_REG_GUEST_ES; + break; + case 0x64: + *seg = VM_REG_GUEST_FS; + break; + case 0x65: + *seg = VM_REG_GUEST_GS; + break; + default: + return false; + } + return true; +} + +static int +decode_prefixes(struct vie *vie, enum vm_cpu_mode cpu_mode, int cs_d) +{ + uint8_t x; + + while (1) { + if (vie_peek(vie, &x)) + return -1; + + if (x == 0x66) + vie->opsize_override = 1; + else if (x == 0x67) + vie->addrsize_override = 1; + else if (x == 0xF3) + vie->repz_present = 1; + else if (x == 0xF2) + vie->repnz_present = 1; + else if (segment_override(x, &vie->segment_register)) + vie->segment_override = 1; + else + break; + + vie_advance(vie); + } + + /* + * From section 2.2.1, "REX Prefixes", Intel SDM Vol 2: + * - Only one REX prefix is allowed per instruction. + * - The REX prefix must immediately precede the opcode byte or the + * escape opcode byte. + * - If an instruction has a mandatory prefix (0x66, 0xF2 or 0xF3) + * the mandatory prefix must come before the REX prefix. + */ + if (cpu_mode == CPU_MODE_64BIT && x >= 0x40 && x <= 0x4F) { + vie->rex_present = 1; + vie->rex_w = x & 0x8 ? 1 : 0; + vie->rex_r = x & 0x4 ? 1 : 0; + vie->rex_x = x & 0x2 ? 1 : 0; + vie->rex_b = x & 0x1 ? 1 : 0; + vie_advance(vie); + } + + /* + * Section "Operand-Size And Address-Size Attributes", Intel SDM, Vol 1 + */ + if (cpu_mode == CPU_MODE_64BIT) { + /* + * Default address size is 64-bits and default operand size + * is 32-bits. + */ + vie->addrsize = vie->addrsize_override ? 4 : 8; + if (vie->rex_w) + vie->opsize = 8; + else if (vie->opsize_override) + vie->opsize = 2; + else + vie->opsize = 4; + } else if (cs_d) { + /* Default address and operand sizes are 32-bits */ + vie->addrsize = vie->addrsize_override ? 2 : 4; + vie->opsize = vie->opsize_override ? 2 : 4; + } else { + /* Default address and operand sizes are 16-bits */ + vie->addrsize = vie->addrsize_override ? 4 : 2; + vie->opsize = vie->opsize_override ? 4 : 2; + } + return 0; +} + +static int +decode_two_byte_opcode(struct vie *vie) +{ + uint8_t x; + + if (vie_peek(vie, &x)) + return -1; + + vie->op = two_byte_opcodes[x]; + + if (vie->op.op_type == VIE_OP_TYPE_NONE) + return -1; + + vie_advance(vie); + return 0; +} + +static int +decode_opcode(struct vie *vie) +{ + uint8_t x; + + if (vie_peek(vie, &x)) + return -1; + + vie->op = one_byte_opcodes[x]; + + if (vie->op.op_type == VIE_OP_TYPE_NONE) + return -1; + + vie_advance(vie); + + if (vie->op.op_type == VIE_OP_TYPE_TWO_BYTE) + return decode_two_byte_opcode(vie); + + return 0; +} + +static int +decode_modrm(struct vie *vie, enum vm_cpu_mode cpu_mode) +{ + uint8_t x; + + if (vie->op.op_flags & VIE_OP_F_NO_MODRM) + return 0; + + if (cpu_mode == CPU_MODE_REAL) + return -1; + + if (vie_peek(vie, &x)) + return -1; + + vie->mod = (x >> 6) & 0x3; + vie->rm = (x >> 0) & 0x7; + vie->reg = (x >> 3) & 0x7; + + /* + * A direct addressing mode makes no sense in the context of an EPT + * fault. There has to be a memory access involved to cause the + * EPT fault. + */ + if (vie->mod == VIE_MOD_DIRECT) + return -1; + + if ((vie->mod == VIE_MOD_INDIRECT && vie->rm == VIE_RM_DISP32) || + (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)) { + /* + * Table 2-5: Special Cases of REX Encodings + * + * mod=0, r/m=5 is used in the compatibility mode to + * indicate a disp32 without a base register. + * + * mod!=3, r/m=4 is used in the compatibility mode to + * indicate that the SIB byte is present. + * + * The 'b' bit in the REX prefix is don't care in + * this case. + */ + } else { + vie->rm |= (vie->rex_b << 3); + } + + vie->reg |= (vie->rex_r << 3); + + /* SIB */ + if (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB) + goto done; + + vie->base_register = gpr_map[vie->rm]; + + switch (vie->mod) { + case VIE_MOD_INDIRECT_DISP8: + vie->disp_bytes = 1; + break; + case VIE_MOD_INDIRECT_DISP32: + vie->disp_bytes = 4; + break; + case VIE_MOD_INDIRECT: + if (vie->rm == VIE_RM_DISP32) { + vie->disp_bytes = 4; + /* + * Table 2-7. RIP-Relative Addressing + * + * In 64-bit mode mod=00 r/m=101 implies [rip] + disp32 + * whereas in compatibility mode it just implies disp32. + */ + + if (cpu_mode == CPU_MODE_64BIT) + vie->base_register = VM_REG_GUEST_RIP; + else + vie->base_register = VM_REG_LAST; + } + break; + } + +done: + vie_advance(vie); + + return 0; +} + +static int +decode_sib(struct vie *vie) +{ + uint8_t x; + + /* Proceed only if SIB byte is present */ + if (vie->mod == VIE_MOD_DIRECT || vie->rm != VIE_RM_SIB) + return 0; + + if (vie_peek(vie, &x)) + return -1; + + /* De-construct the SIB byte */ + vie->ss = (x >> 6) & 0x3; + vie->index = (x >> 3) & 0x7; + vie->base = (x >> 0) & 0x7; + + /* Apply the REX prefix modifiers */ + vie->index |= vie->rex_x << 3; + vie->base |= vie->rex_b << 3; + + switch (vie->mod) { + case VIE_MOD_INDIRECT_DISP8: + vie->disp_bytes = 1; + break; + case VIE_MOD_INDIRECT_DISP32: + vie->disp_bytes = 4; + break; + } + + if (vie->mod == VIE_MOD_INDIRECT && + (vie->base == 5 || vie->base == 13)) { + /* + * Special case when base register is unused if mod = 0 + * and base = %rbp or %r13. + * + * Documented in: + * Table 2-3: 32-bit Addressing Forms with the SIB Byte + * Table 2-5: Special Cases of REX Encodings + */ + vie->disp_bytes = 4; + } else { + vie->base_register = gpr_map[vie->base]; + } + + /* + * All encodings of 'index' are valid except for %rsp (4). + * + * Documented in: + * Table 2-3: 32-bit Addressing Forms with the SIB Byte + * Table 2-5: Special Cases of REX Encodings + */ + if (vie->index != 4) + vie->index_register = gpr_map[vie->index]; + + /* 'scale' makes sense only in the context of an index register */ + if (vie->index_register < VM_REG_LAST) + vie->scale = 1 << vie->ss; + + vie_advance(vie); + + return 0; +} + +static int +decode_displacement(struct vie *vie) +{ + int n, i; + uint8_t x; + + union { + char buf[4]; + int8_t signed8; + int32_t signed32; + } u; + + n = vie->disp_bytes; + if (n == 0) + return 0; + + if (n != 1 && n != 4) + panic("decode_displacement: invalid disp_bytes %d", n); + + for (i = 0; i < n; i++) { + if (vie_peek(vie, &x)) + return -1; + + u.buf[i] = x; + vie_advance(vie); + } + + if (n == 1) + vie->displacement = u.signed8; /* sign-extended */ + else + vie->displacement = u.signed32; /* sign-extended */ + + return 0; +} + +static int +decode_immediate(struct vie *vie) +{ + int i, n; + uint8_t x; + union { + char buf[4]; + int8_t signed8; + int16_t signed16; + int32_t signed32; + } u; + + /* Figure out immediate operand size (if any) */ + if (vie->op.op_flags & VIE_OP_F_IMM) { + /* + * Section 2.2.1.5 "Immediates", Intel SDM: + * In 64-bit mode the typical size of immediate operands + * remains 32-bits. When the operand size if 64-bits, the + * processor sign-extends all immediates to 64-bits prior + * to their use. + */ + if (vie->opsize == 4 || vie->opsize == 8) + vie->imm_bytes = 4; + else + vie->imm_bytes = 2; + } else if (vie->op.op_flags & VIE_OP_F_IMM8) { + vie->imm_bytes = 1; + } + + n = vie->imm_bytes; + if (n == 0) + return 0; + + ASSERT(n == 1 || n == 2 || n == 4, + "%s: invalid number of immediate bytes: %d", __func__, n); + + for (i = 0; i < n; i++) { + if (vie_peek(vie, &x)) + return -1; + + u.buf[i] = x; + vie_advance(vie); + } + + /* sign-extend the immediate value before use */ + if (n == 1) + vie->immediate = u.signed8; + else if (n == 2) + vie->immediate = u.signed16; + else + vie->immediate = u.signed32; + + return 0; +} + +static int +decode_moffset(struct vie *vie) +{ + int i, n; + uint8_t x; + union { + char buf[8]; + uint64_t u64; + } u; + + if ((vie->op.op_flags & VIE_OP_F_MOFFSET) == 0) + return 0; + + /* + * Section 2.2.1.4, "Direct Memory-Offset MOVs", Intel SDM: + * The memory offset size follows the address-size of the instruction. + */ + n = vie->addrsize; + ASSERT(n == 2 || n == 4 || n == 8, "invalid moffset bytes: %d", n); + + u.u64 = 0; + for (i = 0; i < n; i++) { + if (vie_peek(vie, &x)) + return -1; + + u.buf[i] = x; + vie_advance(vie); + } + vie->displacement = u.u64; + return 0; +} + +int +vmm_decode_instruction(__unused struct vcpu *vcpu, __unused uint64_t gla, + enum vm_cpu_mode cpu_mode, int cs_d, struct vie *vie) +{ + if (decode_prefixes(vie, cpu_mode, cs_d)) + return -1; + + if (decode_opcode(vie)) + return -1; + + if (decode_modrm(vie, cpu_mode)) + return -1; + + if (decode_sib(vie)) + return -1; + + if (decode_displacement(vie)) + return -1; + + if (decode_immediate(vie)) + return -1; + + if (decode_moffset(vie)) + return -1; + + vie->decoded = 1; /* success */ + + return 0; +} diff --git a/hypervisor/arch/x86/guest/instr_emul.h b/hypervisor/arch/x86/guest/instr_emul.h new file mode 100644 index 000000000..f92015d7c --- /dev/null +++ b/hypervisor/arch/x86/guest/instr_emul.h @@ -0,0 +1,95 @@ +/*- + * Copyright (c) 2012 NetApp, Inc. + * Copyright (c) 2017 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _VMM_INSTRUCTION_EMUL_H_ +#define _VMM_INSTRUCTION_EMUL_H_ + +/* + * Callback functions to read and write memory regions. + */ +typedef int (*mem_region_read_t)(struct vcpu *vcpu, uint64_t gpa, + uint64_t *rval, int rsize, void *arg); + +typedef int (*mem_region_write_t)(struct vcpu *vcpu, uint64_t gpa, + uint64_t wval, int wsize, void *arg); + +/* + * Emulate the decoded 'vie' instruction. + * + * The callbacks 'mrr' and 'mrw' emulate reads and writes to the memory region + * containing 'gpa'. 'mrarg' is an opaque argument that is passed into the + * callback functions. + * + * 'void *vm' should be 'struct vm *' when called from kernel context and + * 'struct vmctx *' when called from user context. + * s + */ +int vmm_emulate_instruction(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, + struct vm_guest_paging *paging, mem_region_read_t mrr, + mem_region_write_t mrw, void *mrarg); + +int vie_update_register(struct vcpu *vcpu, enum vm_reg_name reg, + uint64_t val, int size); + +/* + * Returns 1 if an alignment check exception should be injected and 0 otherwise. + */ +int vie_alignment_check(int cpl, int operand_size, uint64_t cr0, + uint64_t rflags, uint64_t gla); + +/* Returns 1 if the 'gla' is not canonical and 0 otherwise. */ +int vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla); + +uint64_t vie_size2mask(int size); + +int vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg, + struct seg_desc *desc, uint64_t off, int length, int addrsize, int prot, + uint64_t *gla); + +void vie_init(struct vie *vie, const char *inst_bytes, int inst_length); + +/* + * Decode the instruction fetched into 'vie' so it can be emulated. + * + * 'gla' is the guest linear address provided by the hardware assist + * that caused the nested page table fault. It is used to verify that + * the software instruction decoding is in agreement with the hardware. + * + * Some hardware assists do not provide the 'gla' to the hypervisor. + * To skip the 'gla' verification for this or any other reason pass + * in VIE_INVALID_GLA instead. + */ +#define VIE_INVALID_GLA (1UL << 63) /* a non-canonical address */ +int vmm_decode_instruction(struct vcpu *vcpu, uint64_t gla, + enum vm_cpu_mode cpu_mode, int csd, struct vie *vie); + +int emulate_instruction(struct vcpu *vcpu, struct mem_io *mmio); +int analyze_instruction(struct vcpu *vcpu, struct mem_io *mmio); + +#endif /* _VMM_INSTRUCTION_EMUL_H_ */ diff --git a/hypervisor/arch/x86/guest/instr_emul_wrapper.c b/hypervisor/arch/x86/guest/instr_emul_wrapper.c new file mode 100644 index 000000000..2637cc095 --- /dev/null +++ b/hypervisor/arch/x86/guest/instr_emul_wrapper.c @@ -0,0 +1,466 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include "instr_emul_wrapper.h" +#include "instr_emul.h" + +struct emul_cnx { + struct vie vie; + struct vm_guest_paging paging; + struct vcpu *vcpu; + struct mem_io *mmio; +}; + +static DEFINE_CPU_DATA(struct emul_cnx, g_inst_ctxt); + +static int +encode_vmcs_seg_desc(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc); + +static int32_t +get_vmcs_field(int ident); + +static bool +is_segment_register(int reg); + +static bool +is_descriptor_table(int reg); + +int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) +{ + struct run_context *cur_context; + + if (!vcpu) + return -EINVAL; + if ((reg >= VM_REG_LAST) || (reg < VM_REG_GUEST_RAX)) + return -EINVAL; + + if ((reg >= VM_REG_GUEST_RAX) && (reg <= VM_REG_GUEST_RDI)) { + cur_context = + &vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context]; + *retval = cur_context->guest_cpu_regs.longs[reg]; + } else if ((reg > VM_REG_GUEST_RDI) && (reg < VM_REG_LAST)) { + int32_t field = get_vmcs_field(reg); + + if (field != -1) + *retval = exec_vmread(field); + else + return -EINVAL; + } + + return 0; +} + +int vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) +{ + struct run_context *cur_context; + + if (!vcpu) + return -EINVAL; + if ((reg >= VM_REG_LAST) || (reg < VM_REG_GUEST_RAX)) + return -EINVAL; + + if ((reg >= VM_REG_GUEST_RAX) && (reg <= VM_REG_GUEST_RDI)) { + cur_context = + &vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context]; + cur_context->guest_cpu_regs.longs[reg] = val; + } else if ((reg > VM_REG_GUEST_RDI) && (reg < VM_REG_LAST)) { + int32_t field = get_vmcs_field(reg); + + if (field != -1) + exec_vmwrite(field, val); + else + return -EINVAL; + } + + return 0; +} + +int vm_set_seg_desc(struct vcpu *vcpu, int seg, struct seg_desc *ret_desc) +{ + int error; + uint32_t base, limit, access; + + if ((!vcpu) || (!ret_desc)) + return -EINVAL; + + if (!is_segment_register(seg) && !is_descriptor_table(seg)) + return -EINVAL; + + error = encode_vmcs_seg_desc(seg, &base, &limit, &access); + if ((error != 0) || (access == 0xffffffff)) + return -EINVAL; + + exec_vmwrite(base, ret_desc->base); + exec_vmwrite(limit, ret_desc->limit); + exec_vmwrite(access, ret_desc->access); + + return 0; +} + +int vm_get_seg_desc(struct vcpu *vcpu, int seg, struct seg_desc *desc) +{ + int error; + uint32_t base, limit, access; + + if ((!vcpu) || (!desc)) + return -EINVAL; + + if (!is_segment_register(seg) && !is_descriptor_table(seg)) + return -EINVAL; + + error = encode_vmcs_seg_desc(seg, &base, &limit, &access); + if ((error != 0) || (access == 0xffffffff)) + return -EINVAL; + + desc->base = exec_vmread(base); + desc->limit = exec_vmread(limit); + desc->access = exec_vmread(access); + + return 0; +} + +int vm_restart_instruction(struct vcpu *vcpu) +{ + if (!vcpu) + return -EINVAL; + + VCPU_RETAIN_RIP(vcpu); + return 0; +} + +static bool is_descriptor_table(int reg) +{ + switch (reg) { + case VM_REG_GUEST_IDTR: + case VM_REG_GUEST_GDTR: + return true; + default: + return false; + } +} + +static bool is_segment_register(int reg) +{ + switch (reg) { + case VM_REG_GUEST_ES: + case VM_REG_GUEST_CS: + case VM_REG_GUEST_SS: + case VM_REG_GUEST_DS: + case VM_REG_GUEST_FS: + case VM_REG_GUEST_GS: + case VM_REG_GUEST_TR: + case VM_REG_GUEST_LDTR: + return true; + default: + return false; + } +} + +static int encode_vmcs_seg_desc(int seg, uint32_t *base, uint32_t *lim, + uint32_t *acc) +{ + switch (seg) { + case VM_REG_GUEST_ES: + *base = VMX_GUEST_ES_BASE; + *lim = VMX_GUEST_ES_LIMIT; + *acc = VMX_GUEST_ES_ATTR; + break; + case VM_REG_GUEST_CS: + *base = VMX_GUEST_CS_BASE; + *lim = VMX_GUEST_CS_LIMIT; + *acc = VMX_GUEST_CS_ATTR; + break; + case VM_REG_GUEST_SS: + *base = VMX_GUEST_SS_BASE; + *lim = VMX_GUEST_SS_LIMIT; + *acc = VMX_GUEST_SS_ATTR; + break; + case VM_REG_GUEST_DS: + *base = VMX_GUEST_DS_BASE; + *lim = VMX_GUEST_DS_LIMIT; + *acc = VMX_GUEST_DS_ATTR; + break; + case VM_REG_GUEST_FS: + *base = VMX_GUEST_FS_BASE; + *lim = VMX_GUEST_FS_LIMIT; + *acc = VMX_GUEST_FS_ATTR; + break; + case VM_REG_GUEST_GS: + *base = VMX_GUEST_GS_BASE; + *lim = VMX_GUEST_GS_LIMIT; + *acc = VMX_GUEST_GS_ATTR; + break; + case VM_REG_GUEST_TR: + *base = VMX_GUEST_TR_BASE; + *lim = VMX_GUEST_TR_LIMIT; + *acc = VMX_GUEST_TR_ATTR; + break; + case VM_REG_GUEST_LDTR: + *base = VMX_GUEST_LDTR_BASE; + *lim = VMX_GUEST_LDTR_LIMIT; + *acc = VMX_GUEST_LDTR_ATTR; + break; + case VM_REG_GUEST_IDTR: + *base = VMX_GUEST_IDTR_BASE; + *lim = VMX_GUEST_IDTR_LIMIT; + *acc = 0xffffffff; + break; + case VM_REG_GUEST_GDTR: + *base = VMX_GUEST_GDTR_BASE; + *lim = VMX_GUEST_GDTR_LIMIT; + *acc = 0xffffffff; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int32_t get_vmcs_field(int ident) +{ + switch (ident) { + case VM_REG_GUEST_CR0: + return VMX_GUEST_CR0; + case VM_REG_GUEST_CR3: + return VMX_GUEST_CR3; + case VM_REG_GUEST_CR4: + return VMX_GUEST_CR4; + case VM_REG_GUEST_DR7: + return VMX_GUEST_DR7; + case VM_REG_GUEST_RSP: + return VMX_GUEST_RSP; + case VM_REG_GUEST_RIP: + return VMX_GUEST_RIP; + case VM_REG_GUEST_RFLAGS: + return VMX_GUEST_RFLAGS; + case VM_REG_GUEST_ES: + return VMX_GUEST_ES_SEL; + case VM_REG_GUEST_CS: + return VMX_GUEST_CS_SEL; + case VM_REG_GUEST_SS: + return VMX_GUEST_SS_SEL; + case VM_REG_GUEST_DS: + return VMX_GUEST_DS_SEL; + case VM_REG_GUEST_FS: + return VMX_GUEST_FS_SEL; + case VM_REG_GUEST_GS: + return VMX_GUEST_GS_SEL; + case VM_REG_GUEST_TR: + return VMX_GUEST_TR_SEL; + case VM_REG_GUEST_LDTR: + return VMX_GUEST_LDTR_SEL; + case VM_REG_GUEST_EFER: + return VMX_GUEST_IA32_EFER_FULL; + case VM_REG_GUEST_PDPTE0: + return VMX_GUEST_PDPTE0_FULL; + case VM_REG_GUEST_PDPTE1: + return VMX_GUEST_PDPTE1_FULL; + case VM_REG_GUEST_PDPTE2: + return VMX_GUEST_PDPTE2_FULL; + case VM_REG_GUEST_PDPTE3: + return VMX_GUEST_PDPTE3_FULL; + default: + return -1; + } +} + +static enum vm_cpu_mode get_vmx_cpu_mode(void) +{ + uint32_t csar; + + if (exec_vmread(VMX_GUEST_IA32_EFER_FULL) & EFER_LMA) { + csar = exec_vmread(VMX_GUEST_CS_ATTR); + if (csar & 0x2000) + return CPU_MODE_64BIT; /* CS.L = 1 */ + else + return CPU_MODE_COMPATIBILITY; + } else if (exec_vmread(VMX_GUEST_CR0) & CR0_PE) { + return CPU_MODE_PROTECTED; + } else { + return CPU_MODE_REAL; + } +} + +static void get_guest_paging_info(struct vcpu *vcpu, struct emul_cnx *emul_cnx) +{ + uint32_t cpl, csar; + + ASSERT(emul_cnx != NULL && vcpu != NULL, "Error in input arguments"); + + csar = exec_vmread(VMX_GUEST_CS_ATTR); + cpl = (csar >> 5) & 3; + emul_cnx->paging.cr3 = + vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr3; + emul_cnx->paging.cpl = cpl; + emul_cnx->paging.cpu_mode = get_vmx_cpu_mode(); + emul_cnx->paging.paging_mode = PAGING_MODE_FLAT;/*maybe change later*/ +} + +static int mmio_read(struct vcpu *vcpu, __unused uint64_t gpa, uint64_t *rval, + __unused int size, __unused void *arg) +{ + struct emul_cnx *emul_cnx; + struct mem_io *mmio; + + if (!vcpu) + return -EINVAL; + + emul_cnx = &per_cpu(g_inst_ctxt, vcpu->pcpu_id); + mmio = emul_cnx->mmio; + + ASSERT(mmio != NULL, "invalid mmio when reading"); + + *rval = mmio->value; + + return 0; +} + +static int mmio_write(struct vcpu *vcpu, __unused uint64_t gpa, uint64_t wval, + __unused int size, __unused void *arg) +{ + struct emul_cnx *emul_cnx; + struct mem_io *mmio; + + if (!vcpu) + return -EINVAL; + + emul_cnx = &per_cpu(g_inst_ctxt, vcpu->pcpu_id); + mmio = emul_cnx->mmio; + + ASSERT(mmio != NULL, "invalid mmio when writing"); + + mmio->value = wval; + + return 0; +} + +void vm_gva2gpa(struct vcpu *vcpu, uint64_t gva, uint64_t *gpa) +{ + + ASSERT(gpa != NULL, "Error in input arguments"); + ASSERT(vcpu != NULL, + "Invalid vcpu id when gva2gpa"); + + *gpa = gva2gpa(vcpu->vm, + vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr3, gva); +} + +int analyze_instruction(struct vcpu *vcpu, struct mem_io *mmio) +{ + uint64_t guest_rip_gva, guest_rip_gpa; + char *guest_rip_hva; + struct emul_cnx *emul_cnx; + uint32_t csar; + int retval = 0; + enum vm_cpu_mode cpu_mode; + int i; + + guest_rip_gva = + vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].rip; + + guest_rip_gpa = gva2gpa(vcpu->vm, + vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr3, + guest_rip_gva); + + guest_rip_hva = GPA2HVA(vcpu->vm, guest_rip_gpa); + emul_cnx = &per_cpu(g_inst_ctxt, vcpu->pcpu_id); + emul_cnx->mmio = mmio; + emul_cnx->vcpu = vcpu; + + /* by now, HVA <-> HPA is 1:1 mapping, so use hpa is OK*/ + vie_init(&emul_cnx->vie, guest_rip_hva, + vcpu->arch_vcpu.inst_len); + + get_guest_paging_info(vcpu, emul_cnx); + csar = exec_vmread(VMX_GUEST_CS_ATTR); + cpu_mode = get_vmx_cpu_mode(); + + mmio->private_data = emul_cnx; + + retval = vmm_decode_instruction(vcpu, guest_rip_gva, + cpu_mode, SEG_DESC_DEF32(csar), &emul_cnx->vie); + + mmio->access_size = emul_cnx->vie.opsize; + + if (retval != 0) { + /* dump to instruction when decoding failed */ + pr_err("decode following instruction failed @ 0x%016llx:", + exec_vmread(VMX_GUEST_RIP)); + for (i = 0; i < emul_cnx->vie.num_valid; i++) { + if (i >= VIE_INST_SIZE) + break; + + if (i == 0) + pr_err("\n"); + pr_err("%d=%02hhx ", + i, emul_cnx->vie.inst[i]); + } + } + + return retval; +} + +int emulate_instruction(struct vcpu *vcpu, struct mem_io *mmio) +{ + struct emul_cnx *emul_cnx = (struct emul_cnx *)(mmio->private_data); + struct vm_guest_paging *paging = &emul_cnx->paging; + int i, retval = 0; + uint64_t gpa = mmio->paddr; + mem_region_read_t mread = mmio_read; + mem_region_write_t mwrite = mmio_write; + + retval = vmm_emulate_instruction(vcpu, gpa, + &emul_cnx->vie, paging, mread, mwrite, &retval); + + if (retval != 0) { + /* dump to instruction when emulation failed */ + pr_err("emulate following instruction failed @ 0x%016llx:", + exec_vmread(VMX_GUEST_RIP)); + for (i = 0; i < emul_cnx->vie.num_valid; i++) { + if (i >= VIE_INST_SIZE) + break; + + if (i == 0) + pr_err("\n"); + + pr_err("%d=%02hhx ", + i, emul_cnx->vie.inst[i]); + } + } + return retval; +} diff --git a/hypervisor/arch/x86/guest/instr_emul_wrapper.h b/hypervisor/arch/x86/guest/instr_emul_wrapper.h new file mode 100644 index 000000000..3581e9bd7 --- /dev/null +++ b/hypervisor/arch/x86/guest/instr_emul_wrapper.h @@ -0,0 +1,203 @@ +/*- + * Copyright (c) 2012 NetApp, Inc. + * Copyright (c) 2017 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include + +struct vie_op { + uint8_t op_byte; /* actual opcode byte */ + uint8_t op_type; /* type of operation (e.g. MOV) */ + uint16_t op_flags; +}; + +#define VIE_INST_SIZE 15 +struct vie { + uint8_t inst[VIE_INST_SIZE]; /* instruction bytes */ + uint8_t num_valid; /* size of the instruction */ + uint8_t num_processed; + + uint8_t addrsize:4, opsize:4; /* address and operand sizes */ + uint8_t rex_w:1, /* REX prefix */ + rex_r:1, + rex_x:1, + rex_b:1, + rex_present:1, + repz_present:1, /* REP/REPE/REPZ prefix */ + repnz_present:1, /* REPNE/REPNZ prefix */ + opsize_override:1, /* Operand size override */ + addrsize_override:1, /* Address size override */ + segment_override:1; /* Segment override */ + + uint8_t mod:2, /* ModRM byte */ + reg:4, + rm:4; + + uint8_t ss:2, /* SIB byte */ + index:4, + base:4; + + uint8_t disp_bytes; + uint8_t imm_bytes; + + uint8_t scale; + int base_register; /* VM_REG_GUEST_xyz */ + int index_register; /* VM_REG_GUEST_xyz */ + int segment_register; /* VM_REG_GUEST_xyz */ + + int64_t displacement; /* optional addr displacement */ + int64_t immediate; /* optional immediate operand */ + + uint8_t decoded; /* set to 1 if successfully decoded */ + + struct vie_op op; /* opcode description */ +}; + +#define PSL_C 0x00000001 /* carry bit */ +#define PSL_PF 0x00000004 /* parity bit */ +#define PSL_AF 0x00000010 /* bcd carry bit */ +#define PSL_Z 0x00000040 /* zero bit */ +#define PSL_N 0x00000080 /* negative bit */ +#define PSL_T 0x00000100 /* trace enable bit */ +#define PSL_I 0x00000200 /* interrupt enable bit */ +#define PSL_D 0x00000400 /* string instruction direction bit */ +#define PSL_V 0x00000800 /* overflow bit */ +#define PSL_IOPL 0x00003000 /* i/o privilege level */ +#define PSL_NT 0x00004000 /* nested task bit */ +#define PSL_RF 0x00010000 /* resume flag bit */ +#define PSL_VM 0x00020000 /* virtual 8086 mode bit */ +#define PSL_AC 0x00040000 /* alignment checking */ +#define PSL_VIF 0x00080000 /* virtual interrupt enable */ +#define PSL_VIP 0x00100000 /* virtual interrupt pending */ +#define PSL_ID 0x00200000 /* identification bit */ + +/* + * The 'access' field has the format specified in Table 21-2 of the Intel + * Architecture Manual vol 3b. + * + * XXX The contents of the 'access' field are architecturally defined except + * bit 16 - Segment Unusable. + */ +struct seg_desc { + uint64_t base; + uint32_t limit; + uint32_t access; +}; + + +/* + * Protections are chosen from these bits, or-ed together + */ +#define PROT_NONE 0x00 /* no permissions */ +#define PROT_READ 0x01 /* pages can be read */ +#define PROT_WRITE 0x02 /* pages can be written */ +#define PROT_EXEC 0x04 /* pages can be executed */ + +#define SEG_DESC_TYPE(access) ((access) & 0x001f) +#define SEG_DESC_DPL(access) (((access) >> 5) & 0x3) +#define SEG_DESC_PRESENT(access) (((access) & 0x0080) ? 1 : 0) +#define SEG_DESC_DEF32(access) (((access) & 0x4000) ? 1 : 0) +#define SEG_DESC_GRANULARITY(access) (((access) & 0x8000) ? 1 : 0) +#define SEG_DESC_UNUSABLE(access) (((access) & 0x10000) ? 1 : 0) + +enum vm_cpu_mode { + CPU_MODE_REAL, + CPU_MODE_PROTECTED, + CPU_MODE_COMPATIBILITY, /* IA-32E mode (CS.L = 0) */ + CPU_MODE_64BIT, /* IA-32E mode (CS.L = 1) */ +}; + +enum vm_paging_mode { + PAGING_MODE_FLAT, + PAGING_MODE_32, + PAGING_MODE_PAE, + PAGING_MODE_64, +}; + +struct vm_guest_paging { + uint64_t cr3; + int cpl; + enum vm_cpu_mode cpu_mode; + enum vm_paging_mode paging_mode; +}; + +/* + * Identifiers for architecturally defined registers. + */ +enum vm_reg_name { + VM_REG_GUEST_RAX, + VM_REG_GUEST_RBX, + VM_REG_GUEST_RCX, + VM_REG_GUEST_RDX, + VM_REG_GUEST_RBP, + VM_REG_GUEST_RSI, + VM_REG_GUEST_R8, + VM_REG_GUEST_R9, + VM_REG_GUEST_R10, + VM_REG_GUEST_R11, + VM_REG_GUEST_R12, + VM_REG_GUEST_R13, + VM_REG_GUEST_R14, + VM_REG_GUEST_R15, + VM_REG_GUEST_RDI, + VM_REG_GUEST_CR0, + VM_REG_GUEST_CR3, + VM_REG_GUEST_CR4, + VM_REG_GUEST_DR7, + VM_REG_GUEST_RSP, + VM_REG_GUEST_RIP, + VM_REG_GUEST_RFLAGS, + VM_REG_GUEST_ES, + VM_REG_GUEST_CS, + VM_REG_GUEST_SS, + VM_REG_GUEST_DS, + VM_REG_GUEST_FS, + VM_REG_GUEST_GS, + VM_REG_GUEST_LDTR, + VM_REG_GUEST_TR, + VM_REG_GUEST_IDTR, + VM_REG_GUEST_GDTR, + VM_REG_GUEST_EFER, + VM_REG_GUEST_CR2, + VM_REG_GUEST_PDPTE0, + VM_REG_GUEST_PDPTE1, + VM_REG_GUEST_PDPTE2, + VM_REG_GUEST_PDPTE3, + VM_REG_GUEST_INTR_SHADOW, + VM_REG_LAST +}; + +typedef unsigned long u_long; + +int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval); +int vm_set_register(struct vcpu *vcpu, int reg, uint64_t val); +int vm_get_seg_desc(struct vcpu *vcpu, int reg, + struct seg_desc *ret_desc); +int vm_set_seg_desc(struct vcpu *vcpu, int reg, + struct seg_desc *desc); +int vm_restart_instruction(struct vcpu *vcpu); +void vm_gva2gpa(struct vcpu *vcpu, uint64_t gla, uint64_t *gpa); diff --git a/hypervisor/arch/x86/guest/time.h b/hypervisor/arch/x86/guest/time.h new file mode 100644 index 000000000..9265837ef --- /dev/null +++ b/hypervisor/arch/x86/guest/time.h @@ -0,0 +1,118 @@ +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * Copyright (c) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)time.h 8.5 (Berkeley) 5/4/95 + * $FreeBSD$ + */ + +#ifndef _TIME_H_ +#define _TIME_H_ + +struct callout { + void *c_arg; /* function argument */ + void (*c_func)(void *); /* function to call */ + short c_flags; /* User State */ +}; + +#define CALLOUT_ACTIVE 0x0002 /* callout is currently active */ +#define CALLOUT_PENDING 0x0004 /* callout is waiting for timeout */ +#define callout_active(c) ((c)->c_flags & CALLOUT_ACTIVE) +#define callout_deactivate(c) ((c)->c_flags &= ~CALLOUT_ACTIVE) +#define callout_pending(c) ((c)->c_flags & CALLOUT_PENDING) + +typedef int64_t time_t; +typedef int64_t sbintime_t; + +struct bintime { + time_t sec; + uint64_t frac; +}; + +static inline void +bintime_add(struct bintime *_bt, const struct bintime *_bt2) +{ + uint64_t _u; + + _u = _bt->frac; + _bt->frac += _bt2->frac; + if (_u > _bt->frac) + _bt->sec++; + _bt->sec += _bt2->sec; +} + +static inline void +bintime_sub(struct bintime *_bt, const struct bintime *_bt2) +{ + uint64_t _u; + + _u = _bt->frac; + _bt->frac -= _bt2->frac; + if (_u < _bt->frac) + _bt->sec--; + _bt->sec -= _bt2->sec; +} + +static inline void +bintime_mul(struct bintime *_bt, uint32_t _x) +{ + uint64_t _p1, _p2; + + _p1 = (_bt->frac & 0xffffffffull) * _x; + _p2 = (_bt->frac >> 32) * _x + (_p1 >> 32); + _bt->sec *= _x; + _bt->sec += (_p2 >> 32); + _bt->frac = (_p2 << 32) | (_p1 & 0xffffffffull); +} + +#define bintime_cmp(a, b, cmp) \ + (((a)->sec == (b)->sec) ? \ + ((a)->frac cmp(b)->frac) : \ + ((a)->sec cmp(b)->sec)) + +#define SBT_1S ((sbintime_t)1 << 32) +#define SBT_1US (SBT_1S / 1000000) + +#define BT2FREQ(bt) \ + (((uint64_t)0x8000000000000000 + ((bt)->frac >> 2)) / \ + ((bt)->frac >> 1)) + +#define FREQ2BT(freq, bt) \ +{ \ + (bt)->sec = 0; \ + (bt)->frac = ((uint64_t)0x8000000000000000 / (freq)) << 1; \ +} + +static inline sbintime_t +bttosbt(const struct bintime _bt) +{ + + return (((sbintime_t)_bt.sec << 32) + (_bt.frac >> 32)); +} + +#endif /* !_TIME_H_ */ diff --git a/hypervisor/arch/x86/guest/vcpu.c b/hypervisor/arch/x86/guest/vcpu.c new file mode 100644 index 000000000..ab5815e14 --- /dev/null +++ b/hypervisor/arch/x86/guest/vcpu.c @@ -0,0 +1,357 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +vm_sw_loader_t vm_sw_loader; + +/*********************************************************************** + * vcpu_id/pcpu_id mapping table: + * + * if + * VM0_CPUS[2] = {0, 2} , VM1_CPUS[2] = {3, 1}; + * then + * for physical CPU 0 : vcpu->pcpu_id = 0, vcpu->vcpu_id = 0, vmid = 0; + * for physical CPU 2 : vcpu->pcpu_id = 2, vcpu->vcpu_id = 1, vmid = 0; + * for physical CPU 3 : vcpu->pcpu_id = 3, vcpu->vcpu_id = 0, vmid = 1; + * for physical CPU 1 : vcpu->pcpu_id = 1, vcpu->vcpu_id = 1, vmid = 1; + * + ***********************************************************************/ +int create_vcpu(int cpu_id, struct vm *vm, struct vcpu **rtn_vcpu_handle) +{ + struct vcpu *vcpu; + + ASSERT(vm != NULL, ""); + ASSERT(rtn_vcpu_handle != NULL, ""); + + pr_info("Creating VCPU %d", cpu_id); + + /* Allocate memory for VCPU */ + vcpu = calloc(1, sizeof(struct vcpu)); + ASSERT(vcpu != NULL, ""); + + /* Initialize the physical CPU ID for this VCPU */ + vcpu->pcpu_id = cpu_id; + + /* Initialize the parent VM reference */ + vcpu->vm = vm; + + /* Initialize the virtual ID for this VCPU */ + /* FIXME: + * We have assumption that we always destroys vcpus in one + * shot (like when vm is destroyed). If we need to support + * specific vcpu destroy on fly, this vcpu_id assignment + * needs revise. + */ + + /* + * vcpu->vcpu_id = vm->hw.created_vcpus; + * vm->hw.created_vcpus++; + */ + vcpu->vcpu_id = atomic_xadd_int(&vm->hw.created_vcpus, 1); + /* vm->hw.vcpu_array[vcpu->vcpu_id] = vcpu; */ + atomic_store_rel_64( + (unsigned long *)&vm->hw.vcpu_array[vcpu->vcpu_id], + (unsigned long)vcpu); + + ASSERT(vcpu->vcpu_id < vm->hw.num_vcpus, + "Allocated vcpu_id is out of range!"); + + per_cpu(vcpu, cpu_id) = vcpu; + + pr_info("PCPU%d is working as VM%d VCPU%d, Role: %s", + vcpu->pcpu_id, vcpu->vm->attr.id, vcpu->vcpu_id, + is_vcpu_bsp(vcpu) ? "PRIMARY" : "SECONDARY"); + + /* Is this VCPU a VM BSP, create page hierarchy for this VM */ + if (is_vcpu_bsp(vcpu)) { + /* Set up temporary guest page tables */ + vm->arch_vm.guest_pml4 = create_guest_paging(vm); + pr_info("VM *d VCPU %d CR3: 0x%016llx ", + vm->attr.id, vcpu->vcpu_id, vm->arch_vm.guest_pml4); + } + + /* Allocate VMCS region for this VCPU */ + vcpu->arch_vcpu.vmcs = alloc_page(); + ASSERT(vcpu->arch_vcpu.vmcs != NULL, ""); + + /* Memset VMCS region for this VCPU */ + memset(vcpu->arch_vcpu.vmcs, 0, CPU_PAGE_SIZE); + + /* Initialize exception field in VCPU context */ + vcpu->arch_vcpu.exception_info.exception = -1; + + /* Initialize cur context */ + vcpu->arch_vcpu.cur_context = NORMAL_WORLD; + + /* Create per vcpu vlapic */ + vlapic_create(vcpu); + + /* Populate the return handle */ + *rtn_vcpu_handle = vcpu; + + vcpu->launched = false; + vcpu->paused_cnt = 0; + vcpu->running = 0; + vcpu->ioreq_pending = 0; + vcpu->arch_vcpu.nr_sipi = 0; + vcpu->pending_pre_work = 0; + vcpu->state = VCPU_INIT; + + return 0; +} + +int start_vcpu(struct vcpu *vcpu) +{ + uint64_t rip, instlen; + struct run_context *cur_context = + &vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context]; + int64_t status = 0; + + ASSERT(vcpu != NULL, "Incorrect arguments"); + + /* If this VCPU is not already launched, launch it */ + if (!vcpu->launched) { + pr_info("VM %d Starting VCPU %d", + vcpu->vm->attr.id, vcpu->vcpu_id); + + /* Set vcpu launched */ + vcpu->launched = true; + + /* avoid VMCS recycling RSB usage, set IBPB. + * NOTE: this should be done for any time vmcs got switch + * currently, there is no other place to do vmcs switch + * Please add IBPB set for future vmcs switch case(like trusty) + */ + if (ibrs_type == IBRS_RAW) + msr_write(MSR_IA32_PRED_CMD, PRED_SET_IBPB); + + /* Launch the VM */ + status = vmx_vmrun(cur_context, VM_LAUNCH, ibrs_type); + + /* See if VM launched successfully */ + if (status == 0) { + if (is_vcpu_bsp(vcpu)) { + pr_info("VM %d VCPU %d successfully launched", + vcpu->vm->attr.id, vcpu->vcpu_id); + } + } + } else { + /* This VCPU was already launched, check if the last guest + * instruction needs to be repeated and resume VCPU accordingly + */ + instlen = vcpu->arch_vcpu.inst_len; + rip = cur_context->rip; + exec_vmwrite(VMX_GUEST_RIP, ((rip + instlen) & + 0xFFFFFFFFFFFFFFFF)); + + /* Resume the VM */ + status = vmx_vmrun(cur_context, VM_RESUME, ibrs_type); + } + + /* Save guest CR3 register */ + cur_context->cr3 = exec_vmread(VMX_GUEST_CR3); + + /* Obtain current VCPU instruction pointer and length */ + cur_context->rip = exec_vmread(VMX_GUEST_RIP); + vcpu->arch_vcpu.inst_len = exec_vmread(VMX_EXIT_INSTR_LEN); + + cur_context->rsp = exec_vmread(VMX_GUEST_RSP); + cur_context->rflags = exec_vmread(VMX_GUEST_RFLAGS); + + /* Obtain VM exit reason */ + vcpu->arch_vcpu.exit_reason = exec_vmread(VMX_EXIT_REASON); + + if (status != 0) { + /* refer to 64-ia32 spec section 24.9.1 volume#3 */ + if (vcpu->arch_vcpu.exit_reason & VMX_VMENTRY_FAIL) + pr_fatal("vmentry fail reason=%lx", vcpu->arch_vcpu.exit_reason); + else + pr_fatal("vmexit fail err_inst=%lx", exec_vmread(VMX_INSTR_ERROR)); + + ASSERT(status == 0, "vm fail"); + } + + return status; +} + +int shutdown_vcpu(__unused struct vcpu *vcpu) +{ + /* TODO : Implement VCPU shutdown sequence */ + + return 0; +} + +int destroy_vcpu(struct vcpu *vcpu) +{ + ASSERT(vcpu != NULL, "Incorrect arguments"); + + /* vcpu->vm->hw.vcpu_array[vcpu->vcpu_id] = NULL; */ + atomic_store_rel_64( + (unsigned long *)&vcpu->vm->hw.vcpu_array[vcpu->vcpu_id], + (unsigned long)NULL); + + atomic_subtract_int(&vcpu->vm->hw.created_vcpus, 1); + + vlapic_free(vcpu); + free(vcpu->arch_vcpu.vmcs); + free(vcpu->guest_msrs); + free_pcpu(vcpu->pcpu_id); + free(vcpu); + + return 0; +} + +/* NOTE: + * vcpu should be paused before call this function. + */ +void reset_vcpu(struct vcpu *vcpu) +{ + struct vlapic *vlapic; + + pr_dbg("vcpu%d reset", vcpu->vcpu_id); + ASSERT(vcpu->state != VCPU_RUNNING, + "reset vcpu when it's running"); + + if (vcpu->state == VCPU_INIT) + return; + + vcpu->state = VCPU_INIT; + + vcpu->launched = false; + vcpu->paused_cnt = 0; + vcpu->running = 0; + vcpu->ioreq_pending = 0; + vcpu->arch_vcpu.nr_sipi = 0; + vcpu->pending_pre_work = 0; + vlapic = vcpu->arch_vcpu.vlapic; + vlapic_init(vlapic); +} + +void init_vcpu(struct vcpu *vcpu) +{ + if (is_vcpu_bsp(vcpu)) + vcpu->arch_vcpu.cpu_mode = PAGE_PROTECTED_MODE; + else + vcpu->arch_vcpu.cpu_mode = REAL_MODE; + /* init_vmcs is delayed to vcpu vmcs launch first time */ +} + +void pause_vcpu(struct vcpu *vcpu, enum vcpu_state new_state) +{ + int pcpu_id = get_cpu_id(); + + pr_dbg("vcpu%d paused, new state: %d", + vcpu->vcpu_id, new_state); + + vcpu->prev_state = vcpu->state; + vcpu->state = new_state; + + get_schedule_lock(pcpu_id); + if (atomic_load_acq_32(&vcpu->running) == 1) { + remove_vcpu_from_runqueue(vcpu); + make_reschedule_request(vcpu); + release_schedule_lock(pcpu_id); + + if (vcpu->pcpu_id != pcpu_id) { + while (atomic_load_acq_32(&vcpu->running) == 1) + __asm__ __volatile("pause" ::: "memory"); + } + } else { + remove_vcpu_from_runqueue(vcpu); + release_schedule_lock(pcpu_id); + } +} + +void resume_vcpu(struct vcpu *vcpu) +{ + pr_dbg("vcpu%d resumed", vcpu->vcpu_id); + + vcpu->state = vcpu->prev_state; + + get_schedule_lock(vcpu->pcpu_id); + if (vcpu->state == VCPU_RUNNING) { + add_vcpu_to_runqueue(vcpu); + make_reschedule_request(vcpu); + } + release_schedule_lock(vcpu->pcpu_id); +} + +void schedule_vcpu(struct vcpu *vcpu) +{ + vcpu->state = VCPU_RUNNING; + pr_dbg("vcpu%d scheduled", vcpu->vcpu_id); + + get_schedule_lock(vcpu->pcpu_id); + add_vcpu_to_runqueue(vcpu); + make_reschedule_request(vcpu); + release_schedule_lock(vcpu->pcpu_id); +} + +/* help function for vcpu create */ +int prepare_vcpu(struct vm *vm, int pcpu_id) +{ + int ret = 0; + struct vcpu *vcpu = NULL; + + ret = create_vcpu(pcpu_id, vm, &vcpu); + ASSERT(ret == 0, "vcpu create failed"); + + if (is_vcpu_bsp(vcpu)) { + /* Load VM SW */ + if (!vm_sw_loader) + vm_sw_loader = general_sw_loader; + vm_sw_loader(vm, vcpu); + vcpu->arch_vcpu.cpu_mode = PAGE_PROTECTED_MODE; + } else { + vcpu->arch_vcpu.cpu_mode = REAL_MODE; + } + + /* init_vmcs is delayed to vcpu vmcs launch first time */ + + /* initialize the vcpu tsc aux */ + vcpu->msr_tsc_aux_guest = vcpu->vcpu_id; + + set_pcpu_used(pcpu_id); + + INIT_LIST_HEAD(&vcpu->run_list); + + return ret; +} + +void request_vcpu_pre_work(struct vcpu *vcpu, int pre_work_id) +{ + bitmap_set(pre_work_id, &vcpu->pending_pre_work); +} diff --git a/hypervisor/arch/x86/guest/vioapic.c b/hypervisor/arch/x86/guest/vioapic.c new file mode 100644 index 000000000..b7ed5bb1c --- /dev/null +++ b/hypervisor/arch/x86/guest/vioapic.c @@ -0,0 +1,662 @@ +/*- + * Copyright (c) 2013 Tycho Nightingale + * Copyright (c) 2013 Neel Natu + * Copyright (c) 2017 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#define pr_fmt(fmt) "vioapic: " fmt + +#include +#include +#include +#include +#include + +#define IOREGSEL 0x00 +#define IOWIN 0x10 +#define IOEOI 0x40 + +#define REDIR_ENTRIES_HW 120 /* SOS align with native ioapic */ +#define REDIR_ENTRIES_UOS 24 /* UOS pins*/ +#define RTBL_RO_BITS ((uint64_t)(IOAPIC_RTE_REM_IRR | IOAPIC_RTE_DELIVS)) + +#define ACRN_DBG_IOAPIC 6 + +struct vioapic { + struct vm *vm; + spinlock_t mtx; + uint32_t id; + uint32_t ioregsel; + struct { + uint64_t reg; + int acnt; /* sum of pin asserts (+1) and deasserts (-1) */ + } rtbl[REDIR_ENTRIES_HW]; +}; + +#define VIOAPIC_LOCK(vioapic) spinlock_obtain(&((vioapic)->mtx)) +#define VIOAPIC_UNLOCK(vioapic) spinlock_release(&((vioapic)->mtx)) + +static inline const char *pinstate_str(bool asserted) +{ + return (asserted) ? "asserted" : "deasserted"; +} + +struct vioapic * +vm_ioapic(struct vm *vm) +{ + return (struct vioapic *)vm->arch_vm.virt_ioapic; +} + +static void +vioapic_send_intr(struct vioapic *vioapic, int pin) +{ + int vector, delmode; + uint32_t low, high, dest; + bool level, phys; + + if (pin < 0 || pin >= vioapic_pincount(vioapic->vm)) + pr_err("vioapic_send_intr: invalid pin number %d", pin); + + low = vioapic->rtbl[pin].reg; + high = vioapic->rtbl[pin].reg >> 32; + + if ((low & IOAPIC_RTE_INTMASK) == IOAPIC_RTE_INTMSET) { + dev_dbg(ACRN_DBG_IOAPIC, "ioapic pin%d: masked", pin); + return; + } + + phys = ((low & IOAPIC_RTE_DESTMOD) == IOAPIC_RTE_DESTPHY); + delmode = low & IOAPIC_RTE_DELMOD; + level = low & IOAPIC_RTE_TRGRLVL ? true : false; + if (level) + vioapic->rtbl[pin].reg |= IOAPIC_RTE_REM_IRR; + + vector = low & IOAPIC_RTE_INTVEC; + dest = high >> APIC_ID_SHIFT; + vlapic_deliver_intr(vioapic->vm, level, dest, phys, delmode, vector); +} + +static void +vioapic_set_pinstate(struct vioapic *vioapic, int pin, bool newstate) +{ + int oldcnt, newcnt; + bool needintr; + + if (pin < 0 || pin >= vioapic_pincount(vioapic->vm)) + pr_err("vioapic_set_pinstate: invalid pin number %d", pin); + + oldcnt = vioapic->rtbl[pin].acnt; + if (newstate) + vioapic->rtbl[pin].acnt++; + else + vioapic->rtbl[pin].acnt--; + newcnt = vioapic->rtbl[pin].acnt; + + if (newcnt < 0) { + pr_err("ioapic pin%d: bad acnt %d", pin, newcnt); + } + + needintr = false; + if (oldcnt == 0 && newcnt == 1) { + needintr = true; + dev_dbg(ACRN_DBG_IOAPIC, "ioapic pin%d: asserted", pin); + } else if (oldcnt == 1 && newcnt == 0) { + dev_dbg(ACRN_DBG_IOAPIC, "ioapic pin%d: deasserted", pin); + } else { + dev_dbg(ACRN_DBG_IOAPIC, "ioapic pin%d: %s, ignored, acnt %d", + pin, pinstate_str(newstate), newcnt); + } + + if (needintr) + vioapic_send_intr(vioapic, pin); +} + +enum irqstate { + IRQSTATE_ASSERT, + IRQSTATE_DEASSERT, + IRQSTATE_PULSE +}; + +static int +vioapic_set_irqstate(struct vm *vm, int irq, enum irqstate irqstate) +{ + struct vioapic *vioapic; + + if (irq < 0 || irq >= vioapic_pincount(vm)) + return -EINVAL; + + vioapic = vm_ioapic(vm); + + VIOAPIC_LOCK(vioapic); + switch (irqstate) { + case IRQSTATE_ASSERT: + vioapic_set_pinstate(vioapic, irq, true); + break; + case IRQSTATE_DEASSERT: + vioapic_set_pinstate(vioapic, irq, false); + break; + case IRQSTATE_PULSE: + vioapic_set_pinstate(vioapic, irq, true); + vioapic_set_pinstate(vioapic, irq, false); + break; + default: + panic("vioapic_set_irqstate: invalid irqstate %d", irqstate); + } + VIOAPIC_UNLOCK(vioapic); + + return 0; +} + +int +vioapic_assert_irq(struct vm *vm, int irq) +{ + return vioapic_set_irqstate(vm, irq, IRQSTATE_ASSERT); +} + +int +vioapic_deassert_irq(struct vm *vm, int irq) +{ + return vioapic_set_irqstate(vm, irq, IRQSTATE_DEASSERT); +} + +int +vioapic_pulse_irq(struct vm *vm, int irq) +{ + return vioapic_set_irqstate(vm, irq, IRQSTATE_PULSE); +} + +/* + * Reset the vlapic's trigger-mode register to reflect the ioapic pin + * configuration. + */ +void +vioapic_update_tmr(struct vcpu *vcpu) +{ + struct vioapic *vioapic; + struct vlapic *vlapic; + uint32_t low; + int delmode, pin, vector; + bool level; + + vlapic = vcpu->arch_vcpu.vlapic; + vioapic = vm_ioapic(vcpu->vm); + + VIOAPIC_LOCK(vioapic); + for (pin = 0; pin < vioapic_pincount(vioapic->vm); pin++) { + low = vioapic->rtbl[pin].reg; + + level = low & IOAPIC_RTE_TRGRLVL ? true : false; + + /* + * For a level-triggered 'pin' let the vlapic figure out if + * an assertion on this 'pin' would result in an interrupt + * being delivered to it. If yes, then it will modify the + * TMR bit associated with this vector to level-triggered. + */ + delmode = low & IOAPIC_RTE_DELMOD; + vector = low & IOAPIC_RTE_INTVEC; + vlapic_set_tmr_one_vec(vlapic, delmode, vector, level); + } + vlapic_apicv_batch_set_tmr(vlapic); + VIOAPIC_UNLOCK(vioapic); +} + +static uint32_t +vioapic_read(struct vioapic *vioapic, uint32_t addr) +{ + int regnum, pin, rshift; + + regnum = addr & 0xff; + switch (regnum) { + case IOAPIC_ID: + return vioapic->id; + case IOAPIC_VER: + return ((vioapic_pincount(vioapic->vm) - 1) << MAX_RTE_SHIFT) + | 0x11; + case IOAPIC_ARB: + return vioapic->id; + default: + break; + } + + /* redirection table entries */ + if (regnum >= IOAPIC_REDTBL && + regnum < IOAPIC_REDTBL + vioapic_pincount(vioapic->vm) * 2) { + pin = (regnum - IOAPIC_REDTBL) / 2; + if ((regnum - IOAPIC_REDTBL) % 2) + rshift = 32; + else + rshift = 0; + + return vioapic->rtbl[pin].reg >> rshift; + } + + return 0; +} + +/* + * version 0x20+ ioapic has EOI register. And cpu could write vector to this + * register to clear related IRR. + */ +static void +vioapic_write_eoi(struct vioapic *vioapic, int32_t vector) +{ + struct vm *vm = vioapic->vm; + int pin; + + if (vector < VECTOR_FOR_INTR_START || vector > NR_MAX_VECTOR) + pr_err("vioapic_process_eoi: invalid vector %d", vector); + + VIOAPIC_LOCK(vioapic); + for (pin = 0; pin < vioapic_pincount(vm); pin++) { + if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_REM_IRR) == 0) + continue; + if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_INTVEC) != + (uint64_t)vector) + continue; + + vioapic->rtbl[pin].reg &= ~IOAPIC_RTE_REM_IRR; + if (vioapic->rtbl[pin].acnt > 0) { + dev_dbg(ACRN_DBG_IOAPIC, + "ioapic pin%d: asserted at eoi, acnt %d", + pin, vioapic->rtbl[pin].acnt); + vioapic_send_intr(vioapic, pin); + } + } + VIOAPIC_UNLOCK(vioapic); +} + +static void +vioapic_write(struct vioapic *vioapic, uint32_t addr, uint32_t data) +{ + uint64_t data64, mask64; + uint64_t last, new, changed; + int regnum, pin, lshift; + + regnum = addr & 0xff; + switch (regnum) { + case IOAPIC_ID: + vioapic->id = data & APIC_ID_MASK; + break; + case IOAPIC_VER: + case IOAPIC_ARB: + /* readonly */ + break; + default: + break; + } + + /* redirection table entries */ + if (regnum >= IOAPIC_REDTBL && + regnum < IOAPIC_REDTBL + vioapic_pincount(vioapic->vm) * 2) { + pin = (regnum - IOAPIC_REDTBL) / 2; + if ((regnum - IOAPIC_REDTBL) % 2) + lshift = 32; + else + lshift = 0; + + last = new = vioapic->rtbl[pin].reg; + + data64 = (uint64_t)data << lshift; + mask64 = (uint64_t)0xffffffff << lshift; + new &= ~mask64 | RTBL_RO_BITS; + new |= data64 & ~RTBL_RO_BITS; + + changed = last ^ new; + /* pin0 from vpic mask/unmask */ + if (pin == 0 && (changed & IOAPIC_RTE_INTMASK)) { + /* mask -> umask */ + if ((last & IOAPIC_RTE_INTMASK) && + ((new & IOAPIC_RTE_INTMASK) == 0)) { + if ((vioapic->vm->vpic_wire_mode + == VPIC_WIRE_NULL) || + (vioapic->vm->vpic_wire_mode + == VPIC_WIRE_INTR)) { + atomic_set_int( + &vioapic->vm->vpic_wire_mode, + VPIC_WIRE_IOAPIC); + dev_dbg(ACRN_DBG_IOAPIC, + "vpic wire mode -> IOAPIC"); + } else { + pr_err("WARNING: invalid vpic wire mode change"); + return; + } + /* unmask -> mask */ + } else if (((last & IOAPIC_RTE_INTMASK) == 0) && + (new & IOAPIC_RTE_INTMASK)) { + if (vioapic->vm->vpic_wire_mode + == VPIC_WIRE_IOAPIC) { + atomic_set_int( + &vioapic->vm->vpic_wire_mode, + VPIC_WIRE_INTR); + dev_dbg(ACRN_DBG_IOAPIC, + "vpic wire mode -> INTR"); + } + } + } + vioapic->rtbl[pin].reg = new; + dev_dbg(ACRN_DBG_IOAPIC, "ioapic pin%d: redir table entry %#lx", + pin, vioapic->rtbl[pin].reg); + /* + * If any fields in the redirection table entry (except mask + * or polarity) have changed then rendezvous all the vcpus + * to update their vlapic trigger-mode registers. + */ + if (changed & ~(IOAPIC_RTE_INTMASK | IOAPIC_RTE_INTPOL)) { + int i; + struct vcpu *vcpu; + + dev_dbg(ACRN_DBG_IOAPIC, + "ioapic pin%d: recalculate vlapic trigger-mode reg", + pin); + + VIOAPIC_UNLOCK(vioapic); + + foreach_vcpu(i, vioapic->vm, vcpu) { + vcpu_make_request(vcpu, ACRN_REQUEST_TMR_UPDATE); + } + VIOAPIC_LOCK(vioapic); + } + + /* + * Generate an interrupt if the following conditions are met: + * - pin is not masked + * - previous interrupt has been EOIed + * - pin level is asserted + */ + if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_INTMASK) == + IOAPIC_RTE_INTMCLR && + (vioapic->rtbl[pin].reg & IOAPIC_RTE_REM_IRR) == 0 && + (vioapic->rtbl[pin].acnt > 0)) { + dev_dbg(ACRN_DBG_IOAPIC, + "ioapic pin%d: asserted at rtbl write, acnt %d", + pin, vioapic->rtbl[pin].acnt); + vioapic_send_intr(vioapic, pin); + } + + /* remap for active: interrupt mask -> unmask + * remap for deactive: interrupt mask & vector set to 0 + */ + data64 = vioapic->rtbl[pin].reg; + if ((((data64 & IOAPIC_RTE_INTMASK) == IOAPIC_RTE_INTMCLR) + && ((last & IOAPIC_RTE_INTMASK) == IOAPIC_RTE_INTMSET)) + || (((data64 & IOAPIC_RTE_INTMASK) == IOAPIC_RTE_INTMSET) + && ((vioapic->rtbl[pin].reg & IOAPIC_RTE_INTVEC) == 0))) { + /* VM enable intr */ + struct ptdev_intx_info intx; + + /* NOTE: only support max 256 pin */ + intx.virt_pin = (uint8_t)pin; + intx.vpin_src = PTDEV_VPIN_IOAPIC; + ptdev_intx_pin_remap(vioapic->vm, &intx); + } + } +} + +static int +vioapic_mmio_rw(struct vioapic *vioapic, uint64_t gpa, + uint64_t *data, int size, bool doread) +{ + uint64_t offset; + + offset = gpa - VIOAPIC_BASE; + + /* + * The IOAPIC specification allows 32-bit wide accesses to the + * IOREGSEL (offset 0) and IOWIN (offset 16) registers. + */ + if (size != 4 || (offset != IOREGSEL && offset != IOWIN && + offset != IOEOI)) { + if (doread) + *data = 0; + return 0; + } + + VIOAPIC_LOCK(vioapic); + if (offset == IOREGSEL) { + if (doread) + *data = vioapic->ioregsel; + else + vioapic->ioregsel = *data; + } else if (offset == IOEOI) { + /* only need to handle write operation */ + if (!doread) + vioapic_write_eoi(vioapic, *data); + } else { + if (doread) { + *data = vioapic_read(vioapic, vioapic->ioregsel); + } else { + vioapic_write(vioapic, vioapic->ioregsel, + *data); + } + } + VIOAPIC_UNLOCK(vioapic); + + return 0; +} + +int +vioapic_mmio_read(void *vm, uint64_t gpa, uint64_t *rval, + int size) +{ + int error; + struct vioapic *vioapic; + + vioapic = vm_ioapic(vm); + error = vioapic_mmio_rw(vioapic, gpa, rval, size, true); + return error; +} + +int +vioapic_mmio_write(void *vm, uint64_t gpa, uint64_t wval, + int size) +{ + int error; + struct vioapic *vioapic; + + vioapic = vm_ioapic(vm); + error = vioapic_mmio_rw(vioapic, gpa, &wval, size, false); + return error; +} + +void +vioapic_process_eoi(struct vm *vm, int vector) +{ + struct vioapic *vioapic; + int pin; + + if (vector < VECTOR_FOR_INTR_START || vector > NR_MAX_VECTOR) + pr_err("vioapic_process_eoi: invalid vector %d", vector); + + vioapic = vm_ioapic(vm); + dev_dbg(ACRN_DBG_IOAPIC, "ioapic processing eoi for vector %d", vector); + + /* notify device to ack if assigned pin */ + for (pin = 0; pin < vioapic_pincount(vm); pin++) { + if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_REM_IRR) == 0) + continue; + if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_INTVEC) != + (uint64_t)vector) + continue; + ptdev_intx_ack(vm, pin, PTDEV_VPIN_IOAPIC); + } + + /* + * XXX keep track of the pins associated with this vector instead + * of iterating on every single pin each time. + */ + VIOAPIC_LOCK(vioapic); + for (pin = 0; pin < vioapic_pincount(vm); pin++) { + if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_REM_IRR) == 0) + continue; + if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_INTVEC) != + (uint64_t)vector) + continue; + + vioapic->rtbl[pin].reg &= ~IOAPIC_RTE_REM_IRR; + if (vioapic->rtbl[pin].acnt > 0) { + dev_dbg(ACRN_DBG_IOAPIC, + "ioapic pin%d: asserted at eoi, acnt %d", + pin, vioapic->rtbl[pin].acnt); + vioapic_send_intr(vioapic, pin); + } + } + VIOAPIC_UNLOCK(vioapic); +} + +struct vioapic * +vioapic_init(struct vm *vm) +{ + int i; + struct vioapic *vioapic; + + vioapic = calloc(1, sizeof(struct vioapic)); + ASSERT(vioapic != NULL, ""); + + vioapic->vm = vm; + spinlock_init(&vioapic->mtx); + + /* Initialize all redirection entries to mask all interrupts */ + for (i = 0; i < vioapic_pincount(vioapic->vm); i++) + vioapic->rtbl[i].reg = 0x0001000000010000UL; + + register_mmio_emulation_handler(vm, + vioapic_mmio_access_handler, + (uint64_t)VIOAPIC_BASE, + (uint64_t)VIOAPIC_BASE + VIOAPIC_SIZE, + (void *) 0); + + return vioapic; +} + +void +vioapic_cleanup(struct vioapic *vioapic) +{ + unregister_mmio_emulation_handler(vioapic->vm, + (uint64_t)VIOAPIC_BASE, + (uint64_t)VIOAPIC_BASE + VIOAPIC_SIZE); + free(vioapic); +} + +int +vioapic_pincount(struct vm *vm) +{ + if (is_vm0(vm)) + return REDIR_ENTRIES_HW; + else + return REDIR_ENTRIES_UOS; +} + +int vioapic_mmio_access_handler(struct vcpu *vcpu, struct mem_io *mmio, + void *handler_private_data) +{ + struct vm *vm = vcpu->vm; + uint64_t gpa = mmio->paddr; + int ret = 0; + + (void)handler_private_data; + + /* Note all RW to IOAPIC are 32-Bit in size */ + ASSERT(mmio->access_size == 4, + "All RW to LAPIC must be 32-bits in size"); + + if (mmio->read_write == HV_MEM_IO_READ) { + ret = vioapic_mmio_read(vm, + gpa, + &mmio->value, + mmio->access_size); + mmio->mmio_status = MMIO_TRANS_VALID; + + } else if (mmio->read_write == HV_MEM_IO_WRITE) { + ret = vioapic_mmio_write(vm, + gpa, + mmio->value, + mmio->access_size); + + mmio->mmio_status = MMIO_TRANS_VALID; + } + + return ret; +} + +bool vioapic_get_rte(struct vm *vm, int pin, void *rte) +{ + struct vioapic *vioapic; + + vioapic = vm_ioapic(vm); + if (vioapic && rte) { + *(uint64_t *)rte = vioapic->rtbl[pin].reg; + return true; + } else + return false; +} + +int get_vioapic_info(char *str, int str_max, int vmid) +{ + int pin, len, size = str_max, vector, delmode; + uint64_t rte; + uint32_t low, high, dest; + bool level, phys, remote_irr, mask; + struct vm *vm = get_vm_from_vmid(vmid); + + if (!vm) { + len = snprintf(str, size, + "\r\nvm is not exist for vmid %d", vmid); + size -= len; + str += len; + goto END; + } + + len = snprintf(str, size, + "\r\nPIN\tVEC\tDM\tDEST\tTM\tDELM\tIRR\tMASK"); + size -= len; + str += len; + + for (pin = 0 ; pin < vioapic_pincount(vm); pin++) { + vioapic_get_rte(vm, pin, (void *)&rte); + low = rte; + high = rte >> 32; + mask = ((low & IOAPIC_RTE_INTMASK) == IOAPIC_RTE_INTMSET); + remote_irr = ((low & IOAPIC_RTE_REM_IRR) == IOAPIC_RTE_REM_IRR); + phys = ((low & IOAPIC_RTE_DESTMOD) == IOAPIC_RTE_DESTPHY); + delmode = low & IOAPIC_RTE_DELMOD; + level = low & IOAPIC_RTE_TRGRLVL ? true : false; + vector = low & IOAPIC_RTE_INTVEC; + dest = high >> APIC_ID_SHIFT; + + len = snprintf(str, size, + "\r\n%d\t0x%X\t%s\t0x%X\t%s\t%d\t%d\t%d", + pin, vector, phys ? "phys" : "logic", + dest, level ? "level" : "edge", + delmode >> 8, remote_irr, mask); + size -= len; + str += len; + } +END: + snprintf(str, size, "\r\n"); + return 0; +} diff --git a/hypervisor/arch/x86/guest/vlapic.c b/hypervisor/arch/x86/guest/vlapic.c new file mode 100644 index 000000000..5fee08add --- /dev/null +++ b/hypervisor/arch/x86/guest/vlapic.c @@ -0,0 +1,2398 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * Copyright (c) 2017 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#define pr_fmt(fmt) "vlapic: " fmt + +#include +#include +#include +#include +#include +#include +#include + +#include "time.h" +#include "vlapic_priv.h" + +#define VLAPIC_VERBOS 0 +#define PRIO(x) ((x) >> 4) + +#define VLAPIC_VERSION (16) + +#define APICBASE_RESERVED 0x000002ff +#define APICBASE_BSP 0x00000100 +#define APICBASE_X2APIC 0x00000400 +#define APICBASE_ENABLED 0x00000800 + +#define ACRN_DBG_LAPIC 6 + +#if VLAPIC_VERBOS +#define VLAPIC_CTR_IRR(vlapic, msg) \ +do { \ + struct lapic_reg *irrptr = &(vlapic)->apic_page->irr[0]; \ + dev_dbg(ACRN_DBG_LAPIC, msg " irr0 0x%08x", irrptr[0].val); \ + dev_dbg(ACRN_DBG_LAPIC, msg " irr1 0x%08x", irrptr[1].val); \ + dev_dbg(ACRN_DBG_LAPIC, msg " irr2 0x%08x", irrptr[2].val); \ + dev_dbg(ACRN_DBG_LAPIC, msg " irr3 0x%08x", irrptr[3].val); \ + dev_dbg(ACRN_DBG_LAPIC, msg " irr4 0x%08x", irrptr[4].val); \ + dev_dbg(ACRN_DBG_LAPIC, msg " irr5 0x%08x", irrptr[5].val); \ + dev_dbg(ACRN_DBG_LAPIC, msg " irr6 0x%08x", irrptr[6].val); \ + dev_dbg(ACRN_DBG_LAPIC, msg " irr7 0x%08x", irrptr[7].val); \ +} while (0) + +#define VLAPIC_CTR_ISR(vlapic, msg) \ +do { \ + struct lapic_reg *isrptr = &(vlapic)->apic_page->isr[0]; \ + dev_dbg(ACRN_DBG_LAPIC, msg " isr0 0x%08x", isrptr[0].val); \ + dev_dbg(ACRN_DBG_LAPIC, msg " isr1 0x%08x", isrptr[1].val); \ + dev_dbg(ACRN_DBG_LAPIC, msg " isr2 0x%08x", isrptr[2].val); \ + dev_dbg(ACRN_DBG_LAPIC, msg " isr3 0x%08x", isrptr[3].val); \ + dev_dbg(ACRN_DBG_LAPIC, msg " isr4 0x%08x", isrptr[4].val); \ + dev_dbg(ACRN_DBG_LAPIC, msg " isr5 0x%08x", isrptr[5].val); \ + dev_dbg(ACRN_DBG_LAPIC, msg " isr6 0x%08x", isrptr[6].val); \ + dev_dbg(ACRN_DBG_LAPIC, msg " isr7 0x%08x", isrptr[7].val); \ +} while (0) +#else +#define VLAPIC_CTR_IRR(vlapic, msg) +#define VLAPIC_CTR_ISR(vlapic, msg) +#endif + +/* + * The 'vlapic->timer_mtx' is used to provide mutual exclusion between the + * vlapic_callout_handler() and vcpu accesses to: + * - timer_freq_bt, timer_period_bt, timer_fire_bt + * - timer LVT register + */ +#define VLAPIC_TIMER_LOCK(vlapic) spinlock_obtain(&((vlapic)->timer_mtx)) +#define VLAPIC_TIMER_UNLOCK(vlapic) spinlock_release(&((vlapic)->timer_mtx)) + +/* + * APIC timer frequency: + * - arbitrary but chosen to be in the ballpark of contemporary hardware. + * - power-of-two to avoid loss of precision when converted to a bintime. + */ +#define VLAPIC_BUS_FREQ (128 * 1024 * 1024) + +/* TIMER_LVT bit[18:17] == 0x10 TSD DEADLINE mode */ +#define VLAPIC_TSCDEADLINE(lvt) (((lvt) & 0x60000) == 0x40000) + +/*APIC-v APIC-access address */ +static void *apicv_apic_access_addr; + +static int +vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset, + uint64_t data, bool *retu); +static int +vlapic_read(struct vlapic *vlapic, int mmio_access, uint64_t offset, + uint64_t *data, bool *retu); + +static int +apicv_set_intr_ready(struct vlapic *vlapic, int vector, bool level); + +static int +apicv_pending_intr(struct vlapic *vlapic, int *vecptr); + +static void +apicv_set_tmr(struct vlapic *vlapic, int vector, bool level); + +static void +apicv_batch_set_tmr(struct vlapic *vlapic); + +/* + * Returns 1 if the vcpu needs to be notified of the interrupt and 0 otherwise. + */ +static int +vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level); + +/* + * Post an interrupt to the vcpu running on 'hostcpu'. This will use a + * hardware assist if available (e.g. Posted Interrupt) or fall back to + * sending an 'ipinum' to interrupt the 'hostcpu'. + */ +static void vlapic_set_error(struct vlapic *vlapic, uint32_t mask); +static int vlapic_trigger_lvt(struct vlapic *vlapic, int vector); + +static uint64_t vlapic_get_apicbase(struct vlapic *vlapic); +static int vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val); + +static struct vlapic * +vm_lapic_from_vcpu_id(struct vm *vm, int vcpu_id) +{ + struct vcpu *vcpu; + + vcpu = vcpu_from_vid(vm, vcpu_id); + ASSERT(vcpu != NULL, "vm%d, vcpu%d", vm->attr.id, vcpu_id); + + return vcpu->arch_vcpu.vlapic; +} + +struct vlapic * +vm_lapic_from_pcpuid(struct vm *vm, int pcpu_id) +{ + struct vcpu *vcpu; + + vcpu = vcpu_from_pid(vm, pcpu_id); + ASSERT(vcpu != NULL, "vm%d, pcpu%d", vm->attr.id, pcpu_id); + + return vcpu->arch_vcpu.vlapic; +} + +static int vm_apicid2vcpu_id(struct vm *vm, uint8_t lapicid) +{ + int i; + struct vcpu *vcpu; + + foreach_vcpu(i, vm, vcpu) { + if (vlapic_get_apicid(vcpu->arch_vcpu.vlapic) == lapicid) + return vcpu->vcpu_id; + } + + pr_err("vm_apicid2vcpu_id: bad lapicid %d", lapicid); + + return phy_cpu_num; +} + +static uint64_t +vm_active_cpus(struct vm *vm) +{ + uint64_t dmask = 0; + int i; + struct vcpu *vcpu; + + foreach_vcpu(i, vm, vcpu) { + bitmap_set(vcpu->vcpu_id, &dmask); + } + + return dmask; +} + +uint32_t +vlapic_get_id(struct vlapic *vlapic) +{ + return vlapic->apic_page->id; +} + +uint8_t +vlapic_get_apicid(struct vlapic *vlapic) +{ + return vlapic->apic_page->id >> APIC_ID_SHIFT; +} + +static inline uint32_t +vlapic_build_id(struct vlapic *vlapic) +{ + struct vcpu *vcpu = vlapic->vcpu; + uint32_t id; + + if (is_vm0(vcpu->vm)) { + /* Get APIC ID sequence format from cpu_storage */ + id = per_cpu(lapic_id, vcpu->vcpu_id); + } else + id = vcpu->vcpu_id; + + dev_dbg(ACRN_DBG_LAPIC, "vlapic APIC PAGE ID : 0x%08x", + (id << APIC_ID_SHIFT)); + + return (id << APIC_ID_SHIFT); +} + +static void +vlapic_dfr_write_handler(struct vlapic *vlapic) +{ + struct lapic *lapic; + + lapic = vlapic->apic_page; + lapic->dfr &= APIC_DFR_MODEL_MASK; + lapic->dfr |= APIC_DFR_RESERVED; + + if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT) + dev_dbg(ACRN_DBG_LAPIC, "vlapic DFR in Flat Model"); + else if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER) + dev_dbg(ACRN_DBG_LAPIC, "vlapic DFR in Cluster Model"); + else + dev_dbg(ACRN_DBG_LAPIC, "DFR in Unknown Model %#x", lapic->dfr); +} + +static void +vlapic_ldr_write_handler(struct vlapic *vlapic) +{ + struct lapic *lapic; + + lapic = vlapic->apic_page; + lapic->ldr &= ~APIC_LDR_RESERVED; + dev_dbg(ACRN_DBG_LAPIC, "vlapic LDR set to %#x", lapic->ldr); +} + +static void +vlapic_id_write_handler(struct vlapic *vlapic) +{ + struct lapic *lapic; + + /* + * We don't allow the ID register to be modified so reset it back to + * its default value. + */ + lapic = vlapic->apic_page; + lapic->id = vlapic_get_id(vlapic); +} + +static void +binuptime(struct bintime *bt) +{ + uint64_t now = TICKS_TO_US(rdtsc()); + + bt->sec = now / 1000000; + bt->frac = ((now - bt->sec * 1000000) * + (uint64_t)0x80000000 / 1000000) << 33; +} + +int +callout_reset_sbt(struct callout *c, __unused sbintime_t sbt, + __unused sbintime_t prec, void (*ftn)(void *), + void *arg, __unused int flags) +{ + c->c_flags |= CALLOUT_PENDING; + c->c_func = ftn; + c->c_arg = arg; + + /* TODO: add expire timer*/ + + c->c_flags &= ~CALLOUT_PENDING; + c->c_flags |= CALLOUT_ACTIVE; + + return 0; +} + +int +callout_stop(struct callout *c) +{ + callout_deactivate(c); + c->c_flags |= CALLOUT_PENDING; + + return 0; +} + +static int +vlapic_timer_divisor(uint32_t dcr) +{ + switch (dcr & 0xB) { + case APIC_TDCR_1: + return 1; + case APIC_TDCR_2: + return 2; + case APIC_TDCR_4: + return 4; + case APIC_TDCR_8: + return 8; + case APIC_TDCR_16: + return 16; + case APIC_TDCR_32: + return 32; + case APIC_TDCR_64: + return 64; + case APIC_TDCR_128: + return 128; + default: + panic("vlapic_timer_divisor: invalid dcr"); + } +} + +static uint32_t +vlapic_get_ccr(struct vlapic *vlapic) +{ + struct bintime bt_now, bt_rem; + struct lapic *lapic; + uint32_t ccr; + + ccr = 0; + lapic = vlapic->apic_page; + + VLAPIC_TIMER_LOCK(vlapic); + if (callout_active(&vlapic->callout)) { + /* + * If the timer is scheduled to expire in the future then + * compute the value of 'ccr' based on the remaining time. + */ + binuptime(&bt_now); + if (bintime_cmp(&vlapic->timer_fire_bt, &bt_now, >)) { + bt_rem = vlapic->timer_fire_bt; + bintime_sub(&bt_rem, &bt_now); + ccr += bt_rem.sec * BT2FREQ(&vlapic->timer_freq_bt); + ccr += bt_rem.frac / vlapic->timer_freq_bt.frac; + } + } + ASSERT(ccr <= lapic->icr_timer, + "vlapic_get_ccr: invalid ccr %#x, icr_timer is %#x", + ccr, lapic->icr_timer); + dev_dbg(ACRN_DBG_LAPIC, "vlapic ccr_timer = %#x, icr_timer = %#x", + ccr, lapic->icr_timer); + VLAPIC_TIMER_UNLOCK(vlapic); + return ccr; +} + +static void +vlapic_dcr_write_handler(struct vlapic *vlapic) +{ + struct lapic *lapic; + int divisor; + + lapic = vlapic->apic_page; + VLAPIC_TIMER_LOCK(vlapic); + + divisor = vlapic_timer_divisor(lapic->dcr_timer); + dev_dbg(ACRN_DBG_LAPIC, "vlapic dcr_timer=%#x, divisor=%d", + lapic->dcr_timer, divisor); + + /* + * Update the timer frequency and the timer period. + * + * XXX changes to the frequency divider will not take effect until + * the timer is reloaded. + */ + FREQ2BT(VLAPIC_BUS_FREQ / divisor, &vlapic->timer_freq_bt); + vlapic->timer_period_bt = vlapic->timer_freq_bt; + bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer); + + VLAPIC_TIMER_UNLOCK(vlapic); +} + +static void +vlapic_esr_write_handler(struct vlapic *vlapic) +{ + struct lapic *lapic; + + lapic = vlapic->apic_page; + lapic->esr = vlapic->esr_pending; + vlapic->esr_pending = 0; +} + +static int +vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level) +{ + struct lapic *lapic; + struct lapic_reg *irrptr, *tmrptr; + uint32_t mask; + int idx; + + ASSERT((vector >= 0) && (vector <= NR_MAX_VECTOR), + "invalid vector %d", vector); + + lapic = vlapic->apic_page; + if (!(lapic->svr & APIC_SVR_ENABLE)) { + dev_dbg(ACRN_DBG_LAPIC, + "vlapic is software disabled, ignoring interrupt %d", + vector); + return 0; + } + + if (vector < 16) { + vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR); + dev_dbg(ACRN_DBG_LAPIC, + "vlapic ignoring interrupt to vector %d", + vector); + return 1; + } + + if (vlapic->ops.apicv_set_intr_ready) + return (*vlapic->ops.apicv_set_intr_ready) + (vlapic, vector, level); + + idx = vector / 32; + mask = 1 << (vector % 32); + + irrptr = &lapic->irr[0]; + atomic_set_int(&irrptr[idx].val, mask); + + /* + * Verify that the trigger-mode of the interrupt matches with + * the vlapic TMR registers. + */ + tmrptr = &lapic->tmr[0]; + if ((tmrptr[idx].val & mask) != (level ? mask : 0)) { + dev_dbg(ACRN_DBG_LAPIC, + "vlapic TMR[%d] is 0x%08x but interrupt is %s-triggered", + idx, tmrptr[idx].val, level ? "level" : "edge"); + } + + VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready"); + return 1; +} + +static inline uint32_t * +vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset) +{ + struct lapic *lapic = vlapic->apic_page; + int i; + + switch (offset) { + case APIC_OFFSET_CMCI_LVT: + return &lapic->lvt_cmci; + case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: + i = (offset - APIC_OFFSET_TIMER_LVT) >> 2; + return (&lapic->lvt_timer) + i; + default: + panic("vlapic_get_lvt: invalid LVT\n"); + } +} + +static inline int +lvt_off_to_idx(uint32_t offset) +{ + int index; + + switch (offset) { + case APIC_OFFSET_CMCI_LVT: + index = APIC_LVT_CMCI; + break; + case APIC_OFFSET_TIMER_LVT: + index = APIC_LVT_TIMER; + break; + case APIC_OFFSET_THERM_LVT: + index = APIC_LVT_THERMAL; + break; + case APIC_OFFSET_PERF_LVT: + index = APIC_LVT_PMC; + break; + case APIC_OFFSET_LINT0_LVT: + index = APIC_LVT_LINT0; + break; + case APIC_OFFSET_LINT1_LVT: + index = APIC_LVT_LINT1; + break; + case APIC_OFFSET_ERROR_LVT: + index = APIC_LVT_ERROR; + break; + default: + index = -1; + break; + } + ASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, + "lvt_off_to_idx: invalid lvt index %d for offset %#x", + index, offset); + + return index; +} + +static inline uint32_t +vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset) +{ + int idx; + uint32_t val; + + idx = lvt_off_to_idx(offset); + val = atomic_load_acq_32(&vlapic->lvt_last[idx]); + return val; +} + +static void +vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset) +{ + uint32_t *lvtptr, mask, val; + struct lapic *lapic; + int idx; + + lapic = vlapic->apic_page; + lvtptr = vlapic_get_lvtptr(vlapic, offset); + val = *lvtptr; + idx = lvt_off_to_idx(offset); + + if (!(lapic->svr & APIC_SVR_ENABLE)) + val |= APIC_LVT_M; + mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR; + switch (offset) { + case APIC_OFFSET_TIMER_LVT: + mask |= APIC_LVTT_TM; + break; + case APIC_OFFSET_ERROR_LVT: + break; + case APIC_OFFSET_LINT0_LVT: + case APIC_OFFSET_LINT1_LVT: + mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP; + /* FALLTHROUGH */ + default: + mask |= APIC_LVT_DM; + break; + } + val &= mask; + + /* vlapic mask/unmask LINT0 for ExtINT? */ + if (offset == APIC_OFFSET_LINT0_LVT && + ((val & APIC_LVT_DM) == APIC_LVT_DM_EXTINT)) { + uint32_t last = vlapic_get_lvt(vlapic, offset); + + /* mask -> unmask: may from every vlapic in the vm */ + if ((last & APIC_LVT_M) && ((val & APIC_LVT_M) == 0)) { + if (vlapic->vm->vpic_wire_mode == VPIC_WIRE_INTR || + vlapic->vm->vpic_wire_mode == VPIC_WIRE_NULL) { + atomic_set_int(&vlapic->vm->vpic_wire_mode, + VPIC_WIRE_LAPIC); + dev_dbg(ACRN_DBG_LAPIC, + "vpic wire mode -> LAPIC"); + } else { + pr_err("WARNING:invalid vpic wire mode change"); + return; + } + /* unmask -> mask: only from the vlapic LINT0-ExtINT enabled */ + } else if (((last & APIC_LVT_M) == 0) && (val & APIC_LVT_M)) { + if (vlapic->vm->vpic_wire_mode == VPIC_WIRE_LAPIC) { + atomic_set_int(&vlapic->vm->vpic_wire_mode, + VPIC_WIRE_NULL); + dev_dbg(ACRN_DBG_LAPIC, + "vpic wire mode -> NULL"); + } + } + } + *lvtptr = val; + atomic_store_rel_32(&vlapic->lvt_last[idx], val); +} + +static void +vlapic_mask_lvts(struct vlapic *vlapic) +{ + struct lapic *lapic = vlapic->apic_page; + + lapic->lvt_cmci |= APIC_LVT_M; + vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT); + + lapic->lvt_timer |= APIC_LVT_M; + vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT); + + lapic->lvt_thermal |= APIC_LVT_M; + vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT); + + lapic->lvt_pcint |= APIC_LVT_M; + vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT); + + lapic->lvt_lint0 |= APIC_LVT_M; + vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT); + + lapic->lvt_lint1 |= APIC_LVT_M; + vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT); + + lapic->lvt_error |= APIC_LVT_M; + vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT); +} + +static int +vlapic_fire_lvt(struct vlapic *vlapic, uint32_t lvt) +{ + uint32_t vec, mode; + + if (lvt & APIC_LVT_M) + return 0; + + vec = lvt & APIC_LVT_VECTOR; + mode = lvt & APIC_LVT_DM; + + switch (mode) { + case APIC_LVT_DM_FIXED: + if (vec < 16) { + vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR); + return 0; + } + if (vlapic_set_intr_ready(vlapic, vec, false)) + vcpu_make_request(vlapic->vcpu, ACRN_REQUEST_EVENT); + break; + case APIC_LVT_DM_NMI: + vcpu_inject_nmi(vlapic->vcpu); + break; + case APIC_LVT_DM_EXTINT: + vcpu_inject_extint(vlapic->vcpu); + break; + default: + /* Other modes ignored */ + return 0; + } + return 1; +} + +static void +dump_isrvec_stk(struct vlapic *vlapic) +{ + int i; + struct lapic_reg *isrptr; + + isrptr = &vlapic->apic_page->isr[0]; + for (i = 0; i < 8; i++) + printf("ISR%d 0x%08x\n", i, isrptr[i].val); + + for (i = 0; i <= vlapic->isrvec_stk_top; i++) + printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]); +} + +/* + * Algorithm adopted from section "Interrupt, Task and Processor Priority" + * in Intel Architecture Manual Vol 3a. + */ +static void +vlapic_update_ppr(struct vlapic *vlapic) +{ + int isrvec, tpr, ppr; + + /* + * Note that the value on the stack at index 0 is always 0. + * + * This is a placeholder for the value of ISRV when none of the + * bits is set in the ISRx registers. + */ + isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top]; + tpr = vlapic->apic_page->tpr; + + /* update ppr */ + { + int i, lastprio, curprio, vector, idx; + struct lapic_reg *isrptr; + + if (vlapic->isrvec_stk_top == 0 && isrvec != 0) + panic("isrvec_stk is corrupted: %d", isrvec); + + /* + * Make sure that the priority of the nested interrupts is + * always increasing. + */ + lastprio = -1; + for (i = 1; i <= vlapic->isrvec_stk_top; i++) { + curprio = PRIO(vlapic->isrvec_stk[i]); + if (curprio <= lastprio) { + dump_isrvec_stk(vlapic); + panic("isrvec_stk does not satisfy invariant"); + } + lastprio = curprio; + } + + /* + * Make sure that each bit set in the ISRx registers has a + * corresponding entry on the isrvec stack. + */ + i = 1; + isrptr = &vlapic->apic_page->isr[0]; + for (vector = 0; vector < 256; vector++) { + idx = vector / 32; + if (isrptr[idx].val & (1 << (vector % 32))) { + if ((i > vlapic->isrvec_stk_top) || + ((i < ISRVEC_STK_SIZE) && + (vlapic->isrvec_stk[i] != vector))) { + dump_isrvec_stk(vlapic); + panic("ISR and isrvec_stk out of sync"); + } + i++; + } + } + } + + if (PRIO(tpr) >= PRIO(isrvec)) + ppr = tpr; + else + ppr = isrvec & 0xf0; + + vlapic->apic_page->ppr = ppr; + dev_dbg(ACRN_DBG_LAPIC, "vlapic_update_ppr 0x%02x", ppr); +} + +static void +vlapic_process_eoi(struct vlapic *vlapic) +{ + struct lapic *lapic = vlapic->apic_page; + struct lapic_reg *isrptr, *tmrptr; + int i, bitpos, vector; + + isrptr = &lapic->isr[0]; + tmrptr = &lapic->tmr[0]; + + for (i = 7; i >= 0; i--) { + bitpos = fls(isrptr[i].val); + if (bitpos >= 0) { + if (vlapic->isrvec_stk_top <= 0) { + panic("invalid vlapic isrvec_stk_top %d", + vlapic->isrvec_stk_top); + } + isrptr[i].val &= ~(1 << bitpos); + vector = i * 32 + bitpos; + dev_dbg(ACRN_DBG_LAPIC, "EOI vector %d", vector); + VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi"); + vlapic->isrvec_stk_top--; + vlapic_update_ppr(vlapic); + if ((tmrptr[i].val & (1 << bitpos)) != 0) { + /* hook to vIOAPIC */ + vioapic_process_eoi(vlapic->vm, vector); + } + return; + } + } + dev_dbg(ACRN_DBG_LAPIC, "Gratuitous EOI"); +} + +static inline int +vlapic_get_lvt_field(uint32_t lvt, uint32_t mask) +{ + + return lvt & mask; +} + +static inline int +vlapic_periodic_timer(struct vlapic *vlapic) +{ + uint32_t lvt; + + lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); + + return vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC); +} + +static void +vlapic_set_error(struct vlapic *vlapic, uint32_t mask) +{ + uint32_t lvt; + + vlapic->esr_pending |= mask; + if (vlapic->esr_firing) + return; + vlapic->esr_firing = 1; + + /* The error LVT always uses the fixed delivery mode. */ + lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_ERROR_LVT); + vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED); + vlapic->esr_firing = 0; +} + +static void +vlapic_fire_timer(struct vlapic *vlapic) +{ + uint32_t lvt; + + /* The timer LVT always uses the fixed delivery mode.*/ + lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); + if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) + dev_dbg(ACRN_DBG_LAPIC, "vlapic timer fired"); +} + +static int +vlapic_trigger_lvt(struct vlapic *vlapic, int vector) +{ + uint32_t lvt; + + if (vlapic_enabled(vlapic) == false) { + /* + * When the local APIC is global/hardware disabled, + * LINT[1:0] pins are configured as INTR and NMI pins, + * respectively. + */ + switch (vector) { + case APIC_LVT_LINT0: + vcpu_inject_extint(vlapic->vcpu); + break; + case APIC_LVT_LINT1: + vcpu_inject_nmi(vlapic->vcpu); + break; + default: + break; + } + return 0; + } + + switch (vector) { + case APIC_LVT_LINT0: + lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_LINT0_LVT); + break; + case APIC_LVT_LINT1: + lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_LINT1_LVT); + break; + case APIC_LVT_TIMER: + lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); + lvt |= APIC_LVT_DM_FIXED; + break; + case APIC_LVT_ERROR: + lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_ERROR_LVT); + lvt |= APIC_LVT_DM_FIXED; + break; + case APIC_LVT_PMC: + lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_PERF_LVT); + break; + case APIC_LVT_THERMAL: + lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_THERM_LVT); + break; + case APIC_LVT_CMCI: + lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_CMCI_LVT); + break; + default: + return -EINVAL; + } + vlapic_fire_lvt(vlapic, lvt); + return 0; +} + +static void +vlapic_callout_handler(void *arg) +{ + struct vlapic *vlapic; + struct bintime bt, btnow; + sbintime_t rem_sbt; + + vlapic = arg; + + VLAPIC_TIMER_LOCK(vlapic); + if (callout_pending(&vlapic->callout)) /* callout was reset */ + goto done; + + if (!callout_active(&vlapic->callout)) /* callout was stopped */ + goto done; + + callout_deactivate(&vlapic->callout); + + vlapic_fire_timer(vlapic); + + if (vlapic_periodic_timer(vlapic)) { + binuptime(&btnow); + ASSERT(bintime_cmp(&btnow, &vlapic->timer_fire_bt, >=), + "vlapic callout at %#lx.%#lx, expected at %#lx.#%lx", + btnow.sec, btnow.frac, vlapic->timer_fire_bt.sec, + vlapic->timer_fire_bt.frac); + + /* + * Compute the delta between when the timer was supposed to + * fire and the present time. + */ + bt = btnow; + bintime_sub(&bt, &vlapic->timer_fire_bt); + + rem_sbt = bttosbt(vlapic->timer_period_bt); + if (bintime_cmp(&bt, &vlapic->timer_period_bt, <)) { + /* + * Adjust the time until the next countdown downward + * to account for the lost time. + */ + rem_sbt -= bttosbt(bt); + } else { + /* + * If the delta is greater than the timer period then + * just reset our time base instead of trying to catch + * up. + */ + vlapic->timer_fire_bt = btnow; + + dev_dbg(ACRN_DBG_LAPIC, + "vlapic timer lagged by %lu usecs, period is %lu usecs", + bttosbt(bt) / SBT_1US, + bttosbt(vlapic->timer_period_bt) / SBT_1US); + + dev_dbg(ACRN_DBG_LAPIC, "resetting time base"); + } + + bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); + callout_reset_sbt(&vlapic->callout, rem_sbt, 0, + vlapic_callout_handler, vlapic, 0); + } +done: + VLAPIC_TIMER_UNLOCK(vlapic); +} + +static void +vlapic_icrtmr_write_handler(struct vlapic *vlapic) +{ + struct lapic *lapic; + sbintime_t sbt; + uint32_t icr_timer; + + VLAPIC_TIMER_LOCK(vlapic); + + lapic = vlapic->apic_page; + icr_timer = lapic->icr_timer; + + vlapic->timer_period_bt = vlapic->timer_freq_bt; + bintime_mul(&vlapic->timer_period_bt, icr_timer); + + if (icr_timer != 0) { + binuptime(&vlapic->timer_fire_bt); + bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); + + sbt = bttosbt(vlapic->timer_period_bt); + callout_reset_sbt(&vlapic->callout, sbt, 0, + vlapic_callout_handler, vlapic, 0); + } else + callout_stop(&vlapic->callout); + + VLAPIC_TIMER_UNLOCK(vlapic); +} + +/* + * This function populates 'dmask' with the set of vcpus that match the + * addressing specified by the (dest, phys, lowprio) tuple. + */ +static void +vlapic_calcdest(struct vm *vm, uint64_t *dmask, uint32_t dest, + bool phys, bool lowprio) +{ + struct vlapic *vlapic; + struct vlapic *target = NULL; + uint32_t dfr, ldr, ldest, cluster; + uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id; + uint64_t amask; + int vcpu_id; + + if (dest == 0xff) { + /* + * Broadcast in both logical and physical modes. + */ + *dmask = vm_active_cpus(vm); + return; + } + + if (phys) { + /* + * Physical mode: destination is LAPIC ID. + */ + *dmask = 0; + vcpu_id = vm_apicid2vcpu_id(vm, dest); + if (vcpu_id < phy_cpu_num) + bitmap_set(vcpu_id, dmask); + } else { + /* + * In the "Flat Model" the MDA is interpreted as an 8-bit wide + * bitmask. This model is only available in the xAPIC mode. + */ + mda_flat_ldest = dest & 0xff; + + /* + * In the "Cluster Model" the MDA is used to identify a + * specific cluster and a set of APICs in that cluster. + */ + mda_cluster_id = (dest >> 4) & 0xf; + mda_cluster_ldest = dest & 0xf; + + /* + * Logical mode: match each APIC that has a bit set + * in its LDR that matches a bit in the ldest. + */ + *dmask = 0; + amask = vm_active_cpus(vm); + while ((vcpu_id = bitmap_ffs(&amask)) >= 0) { + bitmap_clr(vcpu_id, &amask); + + vlapic = vm_lapic_from_vcpu_id(vm, vcpu_id); + dfr = vlapic->apic_page->dfr; + ldr = vlapic->apic_page->ldr; + + if ((dfr & APIC_DFR_MODEL_MASK) == + APIC_DFR_MODEL_FLAT) { + ldest = ldr >> 24; + mda_ldest = mda_flat_ldest; + } else if ((dfr & APIC_DFR_MODEL_MASK) == + APIC_DFR_MODEL_CLUSTER) { + + cluster = ldr >> 28; + ldest = (ldr >> 24) & 0xf; + + if (cluster != mda_cluster_id) + continue; + mda_ldest = mda_cluster_ldest; + } else { + /* + * Guest has configured a bad logical + * model for this vcpu - skip it. + */ + dev_dbg(ACRN_DBG_LAPIC, + "CANNOT deliver interrupt"); + dev_dbg(ACRN_DBG_LAPIC, + "vlapic has bad logical model %x", dfr); + continue; + } + + if ((mda_ldest & ldest) != 0) { + if (lowprio) { + if (target == NULL) + target = vlapic; + else if (target->apic_page->ppr > + vlapic->apic_page->ppr) + target = vlapic; + } else { + bitmap_set(vcpu_id, dmask); + } + } + } + + if (lowprio && (target != NULL)) + bitmap_set(target->vcpu->vcpu_id, dmask); + } +} + +void +calcvdest(struct vm *vm, uint64_t *dmask, uint32_t dest, bool phys) +{ + vlapic_calcdest(vm, dmask, dest, phys, false); +} + +static void +vlapic_set_tpr(struct vlapic *vlapic, uint8_t val) +{ + struct lapic *lapic = vlapic->apic_page; + + if (lapic->tpr != val) { + dev_dbg(ACRN_DBG_LAPIC, + "vlapic TPR changed from %#x to %#x", lapic->tpr, val); + lapic->tpr = val; + vlapic_update_ppr(vlapic); + } +} + +static uint8_t +vlapic_get_tpr(struct vlapic *vlapic) +{ + struct lapic *lapic = vlapic->apic_page; + + return lapic->tpr; +} + +void +vlapic_set_cr8(struct vlapic *vlapic, uint64_t val) +{ + uint8_t tpr; + + if (val & ~0xf) { + vcpu_inject_gp(vlapic->vcpu); + return; + } + + tpr = val << 4; + vlapic_set_tpr(vlapic, tpr); +} + +uint64_t +vlapic_get_cr8(struct vlapic *vlapic) +{ + uint8_t tpr; + + tpr = vlapic_get_tpr(vlapic); + return tpr >> 4; +} + +static int +vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu) +{ + int i; + bool phys; + uint64_t dmask; + uint64_t icrval; + uint32_t dest, vec, mode; + struct lapic *lapic; + struct vcpu *target_vcpu; + uint32_t target_vcpu_id; + + lapic = vlapic->apic_page; + lapic->icr_lo &= ~APIC_DELSTAT_PEND; + icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo; + + dest = icrval >> (32 + 24); + vec = icrval & APIC_VECTOR_MASK; + mode = icrval & APIC_DELMODE_MASK; + phys = ((icrval & APIC_DESTMODE_LOG) == 0); + +#ifdef CONFIG_EFI_STUB + if (sipi_from_efi_boot_service_exit(dest, mode, vec)) + return 0; +#endif + + if (mode == APIC_DELMODE_FIXED && vec < 16) { + vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR); + dev_dbg(ACRN_DBG_LAPIC, "Ignoring invalid IPI %d", vec); + return 0; + } + + dev_dbg(ACRN_DBG_LAPIC, + "icrlo 0x%016llx triggered ipi %d", icrval, vec); + + if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) { + switch (icrval & APIC_DEST_MASK) { + case APIC_DEST_DESTFLD: + vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false); + break; + case APIC_DEST_SELF: + bitmap_setof(vlapic->vcpu->vcpu_id, &dmask); + break; + case APIC_DEST_ALLISELF: + dmask = vm_active_cpus(vlapic->vm); + break; + case APIC_DEST_ALLESELF: + dmask = vm_active_cpus(vlapic->vm); + bitmap_clr(vlapic->vcpu->vcpu_id, &dmask); + break; + default: + dmask = 0; /* satisfy gcc */ + break; + } + + while ((i = bitmap_ffs(&dmask)) >= 0) { + bitmap_clr(i, &dmask); + target_vcpu = vcpu_from_vid(vlapic->vm, i); + if (target_vcpu == NULL) + return 0; + + if (mode == APIC_DELMODE_FIXED) { + vlapic_set_intr(target_vcpu, vec, + LAPIC_TRIG_EDGE); + dev_dbg(ACRN_DBG_LAPIC, + "vlapic sending ipi %d to vcpu_id %d", + vec, i); + } else { + vcpu_inject_nmi(target_vcpu); + dev_dbg(ACRN_DBG_LAPIC, + "vlapic send ipi nmi to vcpu_id %d", i); + } + } + + return 0; /* handled completely in the kernel */ + } + + if (phys) { + /* INIT/SIPI is sent in Physical mode with LAPIC ID as its + * destination, so the dest need to be changed to VCPU ID; + */ + target_vcpu_id = vm_apicid2vcpu_id(vlapic->vm, dest); + target_vcpu = vcpu_from_vid(vlapic->vm, target_vcpu_id); + if (target_vcpu == NULL) { + pr_err("Target VCPU not found"); + return 0; + } + + if (mode == APIC_DELMODE_INIT) { + if ((icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) + return 0; + + dev_dbg(ACRN_DBG_LAPIC, + "Sending INIT from VCPU %d to %d", + vlapic->vcpu->vcpu_id, target_vcpu_id); + + /* put target vcpu to INIT state and wait for SIPI */ + pause_vcpu(target_vcpu, VCPU_PAUSED); + reset_vcpu(target_vcpu); + target_vcpu->arch_vcpu.nr_sipi = 2; + + return 0; + } + + if (mode == APIC_DELMODE_STARTUP) { + + /* Ignore SIPIs in any state other than wait-for-SIPI */ + if ((target_vcpu->state != VCPU_INIT) || + (target_vcpu->arch_vcpu.nr_sipi == 0)) + return 0; + + dev_dbg(ACRN_DBG_LAPIC, + "Sending SIPI from VCPU %d to %d with vector %d", + vlapic->vcpu->vcpu_id, target_vcpu_id, vec); + + if (--target_vcpu->arch_vcpu.nr_sipi > 0) + return 0; + + target_vcpu->arch_vcpu.cpu_mode = REAL_MODE; + target_vcpu->arch_vcpu.sipi_vector = vec; + pr_err("Start Secondary VCPU%d for VM[%d]...", + target_vcpu->vcpu_id, + target_vcpu->vm->attr.id); + schedule_vcpu(target_vcpu); + + *retu = true; + return 0; + } + } + + /* + * This will cause a return to userland. + */ + return 1; +} + +int +vlapic_pending_intr(struct vlapic *vlapic, int *vecptr) +{ + struct lapic *lapic = vlapic->apic_page; + int i, bitpos; + uint32_t vector; + uint32_t val; + struct lapic_reg *irrptr; + + if (vlapic->ops.apicv_pending_intr) + return (*vlapic->ops.apicv_pending_intr)(vlapic, vecptr); + + irrptr = &lapic->irr[0]; + + for (i = 7; i >= 0; i--) { + val = atomic_load_acq_int(&irrptr[i].val); + bitpos = fls(val); + if (bitpos >= 0) { + vector = i * 32 + bitpos; + if (PRIO(vector) > PRIO(lapic->ppr)) { + if (vecptr != NULL) + *vecptr = vector; + return 1; + } + break; + } + } + return 0; +} + +void +vlapic_intr_accepted(struct vlapic *vlapic, int vector) +{ + struct lapic *lapic = vlapic->apic_page; + struct lapic_reg *irrptr, *isrptr; + int idx, stk_top; + + if (vlapic->ops.apicv_intr_accepted) + return (*vlapic->ops.apicv_intr_accepted)(vlapic, vector); + + /* + * clear the ready bit for vector being accepted in irr + * and set the vector as in service in isr. + */ + idx = vector / 32; + + irrptr = &lapic->irr[0]; + atomic_clear_int(&irrptr[idx].val, 1 << (vector % 32)); + VLAPIC_CTR_IRR(vlapic, "vlapic_intr_accepted"); + + isrptr = &lapic->isr[0]; + isrptr[idx].val |= 1 << (vector % 32); + VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted"); + + /* + * Update the PPR + */ + vlapic->isrvec_stk_top++; + + stk_top = vlapic->isrvec_stk_top; + if (stk_top >= ISRVEC_STK_SIZE) + panic("isrvec_stk_top overflow %d", stk_top); + + vlapic->isrvec_stk[stk_top] = vector; + vlapic_update_ppr(vlapic); +} + +static void +vlapic_svr_write_handler(struct vlapic *vlapic) +{ + struct lapic *lapic; + uint32_t old, new, changed; + + lapic = vlapic->apic_page; + + new = lapic->svr; + old = vlapic->svr_last; + vlapic->svr_last = new; + + changed = old ^ new; + if ((changed & APIC_SVR_ENABLE) != 0) { + if ((new & APIC_SVR_ENABLE) == 0) { + /* + * The apic is now disabled so stop the apic timer + * and mask all the LVT entries. + */ + dev_dbg(ACRN_DBG_LAPIC, "vlapic is software-disabled"); + VLAPIC_TIMER_LOCK(vlapic); + callout_stop(&vlapic->callout); + VLAPIC_TIMER_UNLOCK(vlapic); + vlapic_mask_lvts(vlapic); + /* the only one enabled LINT0-ExtINT vlapic disabled */ + if (vlapic->vm->vpic_wire_mode == VPIC_WIRE_NULL) { + atomic_set_int(&vlapic->vm->vpic_wire_mode, + VPIC_WIRE_INTR); + dev_dbg(ACRN_DBG_LAPIC, + "vpic wire mode -> INTR"); + } + } else { + /* + * The apic is now enabled so restart the apic timer + * if it is configured in periodic mode. + */ + dev_dbg(ACRN_DBG_LAPIC, "vlapic is software-enabled"); + if (vlapic_periodic_timer(vlapic)) + vlapic_icrtmr_write_handler(vlapic); + } + } +} + +static int +vlapic_read(struct vlapic *vlapic, int mmio_access, uint64_t offset, + uint64_t *data, bool *retu) +{ + struct lapic *lapic = vlapic->apic_page; + int i; + + if (!mmio_access) { + /* + * XXX Generate GP fault for MSR accesses in xAPIC mode + */ + dev_dbg(ACRN_DBG_LAPIC, + "x2APIC MSR read from offset %#lx in xAPIC mode", + offset); + *data = 0; + goto done; + } + + if (offset > sizeof(*lapic)) { + *data = 0; + goto done; + } + + offset &= ~3; + switch (offset) { + case APIC_OFFSET_ID: + *data = lapic->id; + break; + case APIC_OFFSET_VER: + *data = lapic->version; + break; + case APIC_OFFSET_TPR: + *data = vlapic_get_tpr(vlapic); + break; + case APIC_OFFSET_APR: + *data = lapic->apr; + break; + case APIC_OFFSET_PPR: + *data = lapic->ppr; + break; + case APIC_OFFSET_EOI: + *data = lapic->eoi; + break; + case APIC_OFFSET_LDR: + *data = lapic->ldr; + break; + case APIC_OFFSET_DFR: + *data = lapic->dfr; + break; + case APIC_OFFSET_SVR: + *data = lapic->svr; + break; + case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: + i = (offset - APIC_OFFSET_ISR0) >> 4; + *data = lapic->isr[i].val; + break; + case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: + i = (offset - APIC_OFFSET_TMR0) >> 4; + *data = lapic->tmr[i].val; + break; + case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: + i = (offset - APIC_OFFSET_IRR0) >> 4; + *data = lapic->irr[i].val; + break; + case APIC_OFFSET_ESR: + *data = lapic->esr; + break; + case APIC_OFFSET_ICR_LOW: + *data = lapic->icr_lo; + break; + case APIC_OFFSET_ICR_HI: + *data = lapic->icr_hi; + break; + case APIC_OFFSET_CMCI_LVT: + case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: + *data = vlapic_get_lvt(vlapic, offset); +#ifdef INVARIANTS + reg = vlapic_get_lvtptr(vlapic, offset); + ASSERT(*data == *reg, + "inconsistent lvt value at offset %#lx: %#lx/%#x", + offset, *data, *reg); +#endif + break; + case APIC_OFFSET_TIMER_ICR: + /* if TSCDEADLINE mode always return 0*/ + if (VLAPIC_TSCDEADLINE(lapic->lvt_timer)) + *data = 0; + else + *data = vlapic_get_ccr(vlapic); + break; + case APIC_OFFSET_TIMER_CCR: + /* TODO */ + *data = vlapic_get_ccr(vlapic); + break; + case APIC_OFFSET_TIMER_DCR: + *data = lapic->dcr_timer; + break; + case APIC_OFFSET_SELF_IPI: + /* + * XXX generate a GP fault if vlapic is in x2apic mode + */ + *data = 0; + break; + case APIC_OFFSET_RRR: + default: + *data = 0; + *retu = true; + break; + } +done: + dev_dbg(ACRN_DBG_LAPIC, + "vlapic read offset %#x, data %#lx", offset, *data); + return 0; +} + +static int +vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset, + uint64_t data, bool *retu) +{ + struct lapic *lapic = vlapic->apic_page; + uint32_t *regptr; + int retval; + + ASSERT((offset & 0xf) == 0 && offset < CPU_PAGE_SIZE, + "vlapic_write: invalid offset %#lx", offset); + + dev_dbg(ACRN_DBG_LAPIC, "vlapic write offset %#lx, data %#lx", + offset, data); + + if (offset > sizeof(*lapic)) + return 0; + + /* + * XXX Generate GP fault for MSR accesses in xAPIC mode + */ + if (!mmio_access) { + dev_dbg(ACRN_DBG_LAPIC, + "x2APIC MSR write of %#lx to offset %#lx in xAPIC mode", + data, offset); + return 0; + } + + retval = 0; + switch (offset) { + case APIC_OFFSET_ID: + lapic->id = data; + vlapic_id_write_handler(vlapic); + break; + case APIC_OFFSET_TPR: + vlapic_set_tpr(vlapic, data & 0xff); + break; + case APIC_OFFSET_EOI: + vlapic_process_eoi(vlapic); + break; + case APIC_OFFSET_LDR: + lapic->ldr = data; + vlapic_ldr_write_handler(vlapic); + break; + case APIC_OFFSET_DFR: + lapic->dfr = data; + vlapic_dfr_write_handler(vlapic); + break; + case APIC_OFFSET_SVR: + lapic->svr = data; + vlapic_svr_write_handler(vlapic); + break; + case APIC_OFFSET_ICR_LOW: + lapic->icr_lo = data; + retval = vlapic_icrlo_write_handler(vlapic, retu); + break; + case APIC_OFFSET_ICR_HI: + lapic->icr_hi = data; + break; + case APIC_OFFSET_CMCI_LVT: + case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: + regptr = vlapic_get_lvtptr(vlapic, offset); + *regptr = data; + vlapic_lvt_write_handler(vlapic, offset); + break; + case APIC_OFFSET_TIMER_ICR: + /* if TSCDEADLINE mode ignore icr_timer */ + if (VLAPIC_TSCDEADLINE(lapic->lvt_timer)) + break; + lapic->icr_timer = data; + vlapic_icrtmr_write_handler(vlapic); + break; + + case APIC_OFFSET_TIMER_DCR: + lapic->dcr_timer = data; + vlapic_dcr_write_handler(vlapic); + break; + + case APIC_OFFSET_ESR: + vlapic_esr_write_handler(vlapic); + break; + + case APIC_OFFSET_SELF_IPI: + break; + + case APIC_OFFSET_VER: + case APIC_OFFSET_APR: + case APIC_OFFSET_PPR: + case APIC_OFFSET_RRR: + case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: + case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: + case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: + break; + case APIC_OFFSET_TIMER_CCR: + break; + default: + /* Read only */ + break; + } + + return retval; +} + +static void +vlapic_reset(struct vlapic *vlapic) +{ + struct lapic *lapic; + + lapic = vlapic->apic_page; + memset(lapic, 0, sizeof(struct lapic)); + + lapic->id = vlapic_build_id(vlapic); + lapic->version = VLAPIC_VERSION; + lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT); + lapic->dfr = 0xffffffff; + lapic->svr = APIC_SVR_VECTOR; + vlapic_mask_lvts(vlapic); + vlapic_reset_tmr(vlapic); + + lapic->dcr_timer = 0; + vlapic_dcr_write_handler(vlapic); + + vlapic->svr_last = lapic->svr; + + if (vlapic->last_timer > 0) { + cancel_timer(vlapic->last_timer, vlapic->vcpu->pcpu_id); + vlapic->last_timer = -1; + } +} + +void +vlapic_init(struct vlapic *vlapic) +{ + ASSERT(vlapic->vm != NULL, "vlapic_init: vm is not initialized"); + ASSERT(vlapic->vcpu->vcpu_id >= 0 && + vlapic->vcpu->vcpu_id < phy_cpu_num, + "vlapic_init: vcpu_id is not initialized"); + ASSERT(vlapic->apic_page != NULL, + "vlapic_init: apic_page is not initialized"); + + /* + * If the vlapic is configured in x2apic mode then it will be + * accessed in the critical section via the MSR emulation code. + * + * Therefore the timer mutex must be a spinlock because blockable + * mutexes cannot be acquired in a critical section. + */ + spinlock_init(&vlapic->timer_mtx); + + vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED; + + if (vlapic->vcpu->vcpu_id == 0) + vlapic->msr_apicbase |= APICBASE_BSP; + + vlapic_reset(vlapic); +} + +void +vlapic_cleanup(__unused struct vlapic *vlapic) +{ + callout_stop(&vlapic->callout); +} + +static uint64_t +vlapic_get_apicbase(struct vlapic *vlapic) +{ + + return vlapic->msr_apicbase; +} + +static int +vlapic_set_apicbase(struct vlapic *vlapic, uint64_t new) +{ + + if (vlapic->msr_apicbase != new) { + dev_dbg(ACRN_DBG_LAPIC, + "NOT support to change APIC_BASE MSR from %#lx to %#lx", + vlapic->msr_apicbase, new); + return (-1); + } + + return 0; +} + +void +vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys, + int delmode, int vec) +{ + bool lowprio; + int vcpu_id; + uint64_t dmask; + struct vcpu *target_vcpu; + + if (delmode != IOAPIC_RTE_DELFIXED && + delmode != IOAPIC_RTE_DELLOPRI && + delmode != IOAPIC_RTE_DELEXINT) { + dev_dbg(ACRN_DBG_LAPIC, + "vlapic intr invalid delmode %#x", delmode); + return; + } + lowprio = (delmode == IOAPIC_RTE_DELLOPRI); + + /* + * We don't provide any virtual interrupt redirection hardware so + * all interrupts originating from the ioapic or MSI specify the + * 'dest' in the legacy xAPIC format. + */ + vlapic_calcdest(vm, &dmask, dest, phys, lowprio); + + while ((vcpu_id = bitmap_ffs(&dmask)) >= 0) { + bitmap_clr(vcpu_id, &dmask); + target_vcpu = vcpu_from_vid(vm, vcpu_id); + if (target_vcpu == NULL) + return; + + /* only make request when vlapic enabled */ + if (vlapic_enabled(target_vcpu->arch_vcpu.vlapic)) { + if (delmode == IOAPIC_RTE_DELEXINT) + vcpu_inject_extint(target_vcpu); + else + vlapic_set_intr(target_vcpu, vec, level); + } + } +} + +bool +vlapic_enabled(struct vlapic *vlapic) +{ + struct lapic *lapic = vlapic->apic_page; + + if ((vlapic->msr_apicbase & APICBASE_ENABLED) != 0 && + (lapic->svr & APIC_SVR_ENABLE) != 0) + return true; + else + return false; +} + +void +vlapic_set_tmr(struct vlapic *vlapic, int vector, bool level) +{ + struct lapic *lapic; + struct lapic_reg *tmrptr; + uint32_t mask; + int idx; + + lapic = vlapic->apic_page; + tmrptr = &lapic->tmr[0]; + idx = vector / 32; + mask = 1 << (vector % 32); + if (level) + tmrptr[idx].val |= mask; + else + tmrptr[idx].val &= ~mask; +} + +/* + * APICv batch set tmr will try to set multi vec at the same time + * to avoid unnecessary VMCS read/update. + */ +void +vlapic_apicv_batch_set_tmr(struct vlapic *vlapic) +{ + if (vlapic->ops.apicv_batch_set_tmr != NULL) + (*vlapic->ops.apicv_batch_set_tmr)(vlapic); +} + +void +vlapic_apicv_set_tmr(struct vlapic *vlapic, int vector, bool level) +{ + if (vlapic->ops.apicv_set_tmr != NULL) + (*vlapic->ops.apicv_set_tmr)(vlapic, vector, level); +} + +void +vlapic_reset_tmr(struct vlapic *vlapic) +{ + int vector; + + dev_dbg(ACRN_DBG_LAPIC, + "vlapic resetting all vectors to edge-triggered"); + + for (vector = 0; vector <= 255; vector++) + vlapic_set_tmr(vlapic, vector, false); + + vcpu_make_request(vlapic->vcpu, ACRN_REQUEST_TMR_UPDATE); +} + +void +vlapic_set_tmr_one_vec(struct vlapic *vlapic, __unused int delmode, + int vector, bool level) +{ + ASSERT((vector >= 0) && (vector <= NR_MAX_VECTOR), + "invalid vector %d", vector); + + /* + * A level trigger is valid only for fixed and lowprio delivery modes. + */ + if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) { + dev_dbg(ACRN_DBG_LAPIC, + "Ignoring level trigger-mode for delivery-mode %d", + delmode); + return; + } + + /* NOTE + * We don't check whether the vcpu is in the dest here. That means + * all vcpus of vm will do tmr update. + * + * If there is new caller to this function, need to refine this + * part of work. + */ + dev_dbg(ACRN_DBG_LAPIC, "vector %d set to level-triggered", vector); + vlapic_set_tmr(vlapic, vector, level); +} + +int +vlapic_set_intr(struct vcpu *vcpu, int vector, bool level) +{ + struct vlapic *vlapic; + int ret = 0; + + if (vcpu == NULL) + return -EINVAL; + + /* + * According to section "Maskable Hardware Interrupts" in Intel SDM + * vectors 16 through 255 can be delivered through the local APIC. + */ + if (vector < 16 || vector > 255) + return -EINVAL; + + vlapic = vcpu->arch_vcpu.vlapic; + if (vlapic_set_intr_ready(vlapic, vector, level)) + vcpu_make_request(vcpu, ACRN_REQUEST_EVENT); + else + ret = -ENODEV; + + return ret; +} + +int +vlapic_set_local_intr(struct vm *vm, int cpu_id, int vector) +{ + struct vlapic *vlapic; + uint64_t dmask; + int error; + + if (cpu_id < -1 || cpu_id >= phy_cpu_num) + return -EINVAL; + + if (cpu_id == -1) + dmask = vm_active_cpus(vm); + else + bitmap_setof(cpu_id, &dmask); + error = 0; + while ((cpu_id = bitmap_ffs(&dmask)) >= 0) { + bitmap_clr(cpu_id, &dmask); + vlapic = vm_lapic_from_vcpu_id(vm, cpu_id); + error = vlapic_trigger_lvt(vlapic, vector); + if (error) + break; + } + + return error; +} + +int +vlapic_intr_msi(struct vm *vm, uint64_t addr, uint64_t msg) +{ + int delmode, vec; + uint32_t dest; + bool phys; + + dev_dbg(ACRN_DBG_LAPIC, "lapic MSI addr: %#lx msg: %#lx", addr, msg); + + if ((addr & MSI_ADDR_MASK) != MSI_ADDR_BASE) { + dev_dbg(ACRN_DBG_LAPIC, "lapic MSI invalid addr %#lx", addr); + return -1; + } + + /* + * Extract the x86-specific fields from the MSI addr/msg + * params according to the Intel Arch spec, Vol3 Ch 10. + * + * The PCI specification does not support level triggered + * MSI/MSI-X so ignore trigger level in 'msg'. + * + * The 'dest' is interpreted as a logical APIC ID if both + * the Redirection Hint and Destination Mode are '1' and + * physical otherwise. + */ + dest = (addr >> 12) & 0xff; + phys = ((addr & (MSI_ADDR_RH | MSI_ADDR_LOG)) != + (MSI_ADDR_RH | MSI_ADDR_LOG)); + delmode = msg & APIC_DELMODE_MASK; + vec = msg & 0xff; + + dev_dbg(ACRN_DBG_LAPIC, "lapic MSI %s dest %#x, vec %d", + phys ? "physical" : "logical", dest, vec); + + vlapic_deliver_intr(vm, LAPIC_TRIG_EDGE, dest, phys, delmode, vec); + return 0; +} + +static bool +x2apic_msr(uint32_t msr) +{ + if (msr >= 0x800 && msr <= 0xBFF) + return true; + else + return false; +} + +static uint32_t +x2apic_msr_to_regoff(uint32_t msr) +{ + + return (msr - 0x800) << 4; +} + +bool +vlapic_msr(uint32_t msr) +{ + + if (x2apic_msr(msr) || (msr == MSR_IA32_APIC_BASE)) + return true; + else + return false; +} + +/* interrupt context */ +static int tsc_periodic_time(uint64_t data) +{ + struct vcpu *vcpu = (struct vcpu *)data; + struct vlapic *vlapic; + struct lapic *lapic; + + vlapic = vcpu->arch_vcpu.vlapic; + lapic = vlapic->apic_page; + + /* inject vcpu timer interrupt if existing */ + if (VLAPIC_TSCDEADLINE(lapic->lvt_timer)) + vlapic_intr_edge(vcpu, lapic->lvt_timer & 0xFF); + + return 0; +} + +int +vlapic_rdmsr(struct vcpu *vcpu, uint32_t msr, uint64_t *rval, + bool *retu) +{ + int error; + uint32_t offset; + struct vlapic *vlapic; + + dev_dbg(ACRN_DBG_LAPIC, "cpu[%d] rdmsr: %x", vcpu->vcpu_id, msr); + vlapic = vcpu->arch_vcpu.vlapic; + + if (msr == MSR_IA32_APIC_BASE) { + *rval = vlapic_get_apicbase(vlapic); + error = 0; + } else { + offset = x2apic_msr_to_regoff(msr); + error = vlapic_read(vlapic, 0, offset, rval, retu); + } + + return error; +} + +int +vlapic_wrmsr(struct vcpu *vcpu, uint32_t msr, uint64_t val, bool *retu) +{ + int error; + uint32_t offset; + struct vlapic *vlapic; + struct lapic *lapic; + + vlapic = vcpu->arch_vcpu.vlapic; + lapic = vlapic->apic_page; + + if (msr == MSR_IA32_APIC_BASE) { + error = vlapic_set_apicbase(vlapic, val); + } else if (msr == MSR_IA32_TSC_DEADLINE) { + error = 0; + if (!VLAPIC_TSCDEADLINE(lapic->lvt_timer)) + return error; + + if (val == 0UL) { + cancel_timer(vlapic->last_timer, vcpu->pcpu_id); + vlapic->last_timer = -1; + } else { + vlapic->last_timer = update_timer(vlapic->last_timer, + tsc_periodic_time, + (long)vcpu, + val); + + if (vlapic->last_timer < 0) { + pr_err("vLAPIC failed to add timer on VM %d VCPU%d", + vcpu->vm->attr.id, vcpu->vcpu_id); + error = -1; + } + } + } else { + offset = x2apic_msr_to_regoff(msr); + error = vlapic_write(vlapic, 0, offset, val, retu); + } + + dev_dbg(ACRN_DBG_LAPIC, "cpu[%d] wrmsr: %x val=%#x", + vcpu->vcpu_id, msr, val); + return error; +} + +int +vlapic_mmio_write(struct vcpu *vcpu, uint64_t gpa, uint64_t wval, int size) +{ + int error; + uint64_t off; + struct vlapic *vlapic; + bool arg; + + off = gpa - DEFAULT_APIC_BASE; + + /* + * Memory mapped local apic accesses must be 4 bytes wide and + * aligned on a 16-byte boundary. + */ + if (size != 4 || off & 0xf) + return -EINVAL; + + vlapic = vcpu->arch_vcpu.vlapic; + error = vlapic_write(vlapic, 1, off, wval, &arg); + return error; +} + +int +vlapic_mmio_read(struct vcpu *vcpu, uint64_t gpa, uint64_t *rval, + __unused int size) +{ + int error; + uint64_t off; + struct vlapic *vlapic; + bool arg; + + off = gpa - DEFAULT_APIC_BASE; + + /* + * Memory mapped local apic accesses should be aligned on a + * 16-byte boundary. They are also suggested to be 4 bytes + * wide, alas not all OSes follow suggestions. + */ + off &= ~3; + if (off & 0xf) + return -EINVAL; + + vlapic = vcpu->arch_vcpu.vlapic; + error = vlapic_read(vlapic, 1, off, rval, &arg); + return error; +} + +int vlapic_mmio_access_handler(struct vcpu *vcpu, struct mem_io *mmio, + __unused void *handler_private_data) +{ + uint64_t gpa = mmio->paddr; + int ret = 0; + + /* Note all RW to LAPIC are 32-Bit in size */ + ASSERT(mmio->access_size == 4, + "All RW to LAPIC must be 32-bits in size"); + + if (mmio->read_write == HV_MEM_IO_READ) { + ret = vlapic_mmio_read(vcpu, + gpa, + &mmio->value, + mmio->access_size); + mmio->mmio_status = MMIO_TRANS_VALID; + + } else if (mmio->read_write == HV_MEM_IO_WRITE) { + ret = vlapic_mmio_write(vcpu, + gpa, + mmio->value, + mmio->access_size); + + mmio->mmio_status = MMIO_TRANS_VALID; + } + + return ret; +} + +int vlapic_create(struct vcpu *vcpu) +{ + void *apic_page = alloc_page(); + struct vlapic *vlapic = calloc(1, sizeof(struct vlapic)); + + ASSERT(vlapic != NULL, "vlapic allocate failed"); + ASSERT(apic_page != NULL, "apic reg page allocate failed"); + + memset((void *)apic_page, 0, CPU_PAGE_SIZE); + vlapic->vm = vcpu->vm; + vlapic->vcpu = vcpu; + vlapic->apic_page = (struct lapic *) apic_page; + + if (is_apicv_enabled()) { + vlapic->ops.apicv_set_intr_ready = apicv_set_intr_ready; + vlapic->ops.apicv_pending_intr = apicv_pending_intr; + vlapic->ops.apicv_set_tmr = apicv_set_tmr; + vlapic->ops.apicv_batch_set_tmr = apicv_batch_set_tmr; + + vlapic->pir_desc = + (struct pir_desc *)(&(vlapic->pir)); + if (is_vcpu_bsp(vcpu)) { + ept_mmap(vcpu->vm, + apicv_get_apic_access_addr(vcpu->vm), + DEFAULT_APIC_BASE, + CPU_PAGE_SIZE, + MAP_MMIO, + MMU_MEM_ATTR_WRITE | + MMU_MEM_ATTR_READ | + MMU_MEM_ATTR_UNCACHED); + } + } + + vcpu->arch_vcpu.vlapic = vlapic; + + vlapic_init(vlapic); + + if (!is_apicv_enabled()) { + return register_mmio_emulation_handler(vcpu->vm, + vlapic_mmio_access_handler, + (uint64_t)DEFAULT_APIC_BASE, + (uint64_t)DEFAULT_APIC_BASE + + CPU_PAGE_SIZE, + (void *) 0); + } + + return 0; +} + +void vlapic_free(struct vcpu *vcpu) +{ + struct vlapic *vlapic = NULL; + void *apic_page = NULL; + + if (vcpu == NULL) + return; + + vlapic = vcpu->arch_vcpu.vlapic; + if (vlapic == NULL) + return; + + if (vlapic->last_timer > 0) + cancel_timer(vlapic->last_timer, vcpu->pcpu_id); + + if (!is_apicv_enabled()) { + unregister_mmio_emulation_handler(vcpu->vm, + (uint64_t)DEFAULT_APIC_BASE, + (uint64_t)DEFAULT_APIC_BASE + CPU_PAGE_SIZE); + } + + apic_page = vlapic->apic_page; + if (apic_page == NULL) { + free(vlapic); + return; + } + + free(apic_page); + free(vlapic); +} + +/** + * APIC-v functions + * **/ +static int +apicv_set_intr_ready(struct vlapic *vlapic, int vector, __unused bool level) +{ + struct pir_desc *pir_desc; + uint64_t mask; + int idx, notify; + + pir_desc = vlapic->pir_desc; + + idx = vector / 64; + mask = 1UL << (vector % 64); + + atomic_set_long(&pir_desc->pir[idx], mask); + notify = atomic_cmpset_long(&pir_desc->pending, 0, 1); + return notify; +} + +static int +apicv_pending_intr(struct vlapic *vlapic, __unused int *vecptr) +{ + struct pir_desc *pir_desc; + struct lapic *lapic; + uint64_t pending, pirval; + uint32_t ppr, vpr; + int i; + + pir_desc = vlapic->pir_desc; + + pending = atomic_load_acq_long(&pir_desc->pending); + if (!pending) + return 0; + + lapic = vlapic->apic_page; + ppr = lapic->ppr & 0xF0; + + if (ppr == 0) + return 1; + + for (i = 3; i >= 0; i--) { + pirval = pir_desc->pir[i]; + if (pirval != 0) { + vpr = (i * 64 + flsl(pirval)) & 0xF0; + return (vpr > ppr); + } + } + return 0; +} + +static void +apicv_set_tmr(__unused struct vlapic *vlapic, int vector, bool level) +{ + uint64_t mask, val; + + mask = 1UL << (vector % 64); + + val = exec_vmread(VMX_EOI_EXIT(vector)); + if (level) + val |= mask; + else + val &= ~mask; + + exec_vmwrite(VMX_EOI_EXIT(vector), val); +} + +/* Update the VMX_EOI_EXIT according to related tmr */ +#define EOI_STEP_LEN (64) +#define TMR_STEP_LEN (32) +static void +apicv_batch_set_tmr(struct vlapic *vlapic) +{ + struct lapic *lapic = vlapic->apic_page; + uint64_t val; + struct lapic_reg *ptr; + unsigned int s, e; + + ptr = &lapic->tmr[0]; + s = 0; + e = 256; + + while (s < e) { + val = ptr[s/TMR_STEP_LEN + 1].val; + val <<= TMR_STEP_LEN; + val |= ptr[s/TMR_STEP_LEN].val; + exec_vmwrite64(VMX_EOI_EXIT(s), val); + + s += EOI_STEP_LEN; + } +} + +/** + *APIC-v: Get the HPA to APIC-access page + * **/ +uint64_t +apicv_get_apic_access_addr(__unused struct vm *vm) +{ + if (apicv_apic_access_addr == NULL) { + apicv_apic_access_addr = alloc_page(); + ASSERT(apicv_apic_access_addr != NULL, + "apicv allocate failed."); + + memset((void *)apicv_apic_access_addr, 0, CPU_PAGE_SIZE); + } + return (uint64_t)(apicv_apic_access_addr); +} + +/** + *APIC-v: Get the HPA to virtualized APIC registers page + * **/ +uint64_t +apicv_get_apic_page_addr(struct vlapic *vlapic) +{ + return (uint64_t)(vlapic->apic_page); +} + +/* + * Transfer the pending interrupts in the PIR descriptor to the IRR + * in the virtual APIC page. + */ + +void +apicv_inject_pir(struct vlapic *vlapic) +{ + struct pir_desc *pir_desc; + struct lapic *lapic; + uint64_t val, pirval; + int rvi, pirbase = -1, i; + uint16_t intr_status_old, intr_status_new; + struct lapic_reg *irr = NULL; + + pir_desc = vlapic->pir_desc; + if (atomic_cmpset_long(&pir_desc->pending, 1, 0) == 0) + return; + + pirval = 0; + pirbase = -1; + lapic = vlapic->apic_page; + irr = &lapic->irr[0]; + + for (i = 0; i < 4; i++) { + val = atomic_readandclear_long(&pir_desc->pir[i]); + if (val != 0) { + irr[i * 2].val |= val; + irr[(i * 2) + 1].val |= val >> 32; + + pirbase = 64*i; + pirval = val; + } + } + + /* + * Update RVI so the processor can evaluate pending virtual + * interrupts on VM-entry. + * + * It is possible for pirval to be 0 here, even though the + * pending bit has been set. The scenario is: + * CPU-Y is sending a posted interrupt to CPU-X, which + * is running a guest and processing posted interrupts in h/w. + * CPU-X will eventually exit and the state seen in s/w is + * the pending bit set, but no PIR bits set. + * + * CPU-X CPU-Y + * (vm running) (host running) + * rx posted interrupt + * CLEAR pending bit + * SET PIR bit + * READ/CLEAR PIR bits + * SET pending bit + * (vm exit) + * pending bit set, PIR 0 + */ + if (pirval != 0) { + rvi = pirbase + flsl(pirval); + + intr_status_old = (uint16_t) + (0xFFFF & + exec_vmread(VMX_GUEST_INTR_STATUS)); + + intr_status_new = (intr_status_old & 0xFF00) | rvi; + if (intr_status_new > intr_status_old) + exec_vmwrite(VMX_GUEST_INTR_STATUS, + intr_status_new); + } +} + +int apicv_access_exit_handler(__unused struct vcpu *vcpu) +{ + TRACE_2L(TRC_VMEXIT_APICV_ACCESS, 0, 0); + return 0; +} + +int apicv_virtualized_eoi_exit_handler(struct vcpu *vcpu) +{ + struct vlapic *vlapic = NULL; + + int vector = exec_vmread(VMX_EXIT_QUALIFICATION) & 0xFF; + struct lapic *lapic; + struct lapic_reg *tmrptr; + uint32_t idx, mask; + + VCPU_RETAIN_RIP(vcpu); + + vlapic = vcpu->arch_vcpu.vlapic; + lapic = vlapic->apic_page; + + tmrptr = &lapic->tmr[0]; + idx = vector / 32; + mask = 1 << (vector % 32); + + if ((tmrptr[idx].val & mask) != 0) { + /* hook to vIOAPIC */ + vioapic_process_eoi(vlapic->vm, vector); + } + + TRACE_2L(TRC_VMEXIT_APICV_VIRT_EOI, vector, 0); + + return 0; +} + +int apicv_write_exit_handler(struct vcpu *vcpu) +{ + bool retu; + uint64_t qual; + int error, handled, offset; + struct vlapic *vlapic = NULL; + + qual = exec_vmread(VMX_EXIT_QUALIFICATION); + offset = (qual & 0xFFF); + + handled = 1; + VCPU_RETAIN_RIP(vcpu); + vlapic = vcpu->arch_vcpu.vlapic; + + switch (offset) { + case APIC_OFFSET_ID: + vlapic_id_write_handler(vlapic); + break; + case APIC_OFFSET_LDR: + vlapic_ldr_write_handler(vlapic); + break; + case APIC_OFFSET_DFR: + vlapic_dfr_write_handler(vlapic); + break; + case APIC_OFFSET_SVR: + vlapic_svr_write_handler(vlapic); + break; + case APIC_OFFSET_ESR: + vlapic_esr_write_handler(vlapic); + break; + case APIC_OFFSET_ICR_LOW: + retu = false; + error = vlapic_icrlo_write_handler(vlapic, &retu); + if (error != 0 || retu) + handled = 0; + break; + case APIC_OFFSET_CMCI_LVT: + case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: + vlapic_lvt_write_handler(vlapic, offset); + break; + case APIC_OFFSET_TIMER_ICR: + vlapic_icrtmr_write_handler(vlapic); + break; + case APIC_OFFSET_TIMER_DCR: + vlapic_dcr_write_handler(vlapic); + break; + default: + handled = 0; + pr_err("Unhandled APIC-Write, offset:0x%x", offset); + break; + } + + TRACE_2L(TRC_VMEXIT_APICV_WRITE, offset, 0); + + return handled; +} diff --git a/hypervisor/arch/x86/guest/vlapic_priv.h b/hypervisor/arch/x86/guest/vlapic_priv.h new file mode 100644 index 000000000..c5d45349d --- /dev/null +++ b/hypervisor/arch/x86/guest/vlapic_priv.h @@ -0,0 +1,153 @@ +/*- + * Copyright (c) 2013 Neel Natu + * Copyright (c) 2017 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _VLAPIC_PRIV_H_ +#define _VLAPIC_PRIV_H_ + +/* + * APIC Register: Offset Description + */ +#define APIC_OFFSET_ID 0x20 /* Local APIC ID */ +#define APIC_OFFSET_VER 0x30 /* Local APIC Version */ +#define APIC_OFFSET_TPR 0x80 /* Task Priority Register */ +#define APIC_OFFSET_APR 0x90 /* Arbitration Priority */ +#define APIC_OFFSET_PPR 0xA0 /* Processor Priority Register */ +#define APIC_OFFSET_EOI 0xB0 /* EOI Register */ +#define APIC_OFFSET_RRR 0xC0 /* Remote read */ +#define APIC_OFFSET_LDR 0xD0 /* Logical Destination */ +#define APIC_OFFSET_DFR 0xE0 /* Destination Format Register */ +#define APIC_OFFSET_SVR 0xF0 /* Spurious Vector Register */ +#define APIC_OFFSET_ISR0 0x100 /* In Service Register */ +#define APIC_OFFSET_ISR1 0x110 +#define APIC_OFFSET_ISR2 0x120 +#define APIC_OFFSET_ISR3 0x130 +#define APIC_OFFSET_ISR4 0x140 +#define APIC_OFFSET_ISR5 0x150 +#define APIC_OFFSET_ISR6 0x160 +#define APIC_OFFSET_ISR7 0x170 +#define APIC_OFFSET_TMR0 0x180 /* Trigger Mode Register */ +#define APIC_OFFSET_TMR1 0x190 +#define APIC_OFFSET_TMR2 0x1A0 +#define APIC_OFFSET_TMR3 0x1B0 +#define APIC_OFFSET_TMR4 0x1C0 +#define APIC_OFFSET_TMR5 0x1D0 +#define APIC_OFFSET_TMR6 0x1E0 +#define APIC_OFFSET_TMR7 0x1F0 +#define APIC_OFFSET_IRR0 0x200 /* Interrupt Request Register */ +#define APIC_OFFSET_IRR1 0x210 +#define APIC_OFFSET_IRR2 0x220 +#define APIC_OFFSET_IRR3 0x230 +#define APIC_OFFSET_IRR4 0x240 +#define APIC_OFFSET_IRR5 0x250 +#define APIC_OFFSET_IRR6 0x260 +#define APIC_OFFSET_IRR7 0x270 +#define APIC_OFFSET_ESR 0x280 /* Error Status Register */ +#define APIC_OFFSET_CMCI_LVT 0x2F0 /* Local Vector Table (CMCI) */ +#define APIC_OFFSET_ICR_LOW 0x300 /* Interrupt Command Register */ +#define APIC_OFFSET_ICR_HI 0x310 +#define APIC_OFFSET_TIMER_LVT 0x320 /* Local Vector Table (Timer) */ +#define APIC_OFFSET_THERM_LVT 0x330 /* Local Vector Table (Thermal) */ +#define APIC_OFFSET_PERF_LVT 0x340 /* Local Vector Table (PMC) */ +#define APIC_OFFSET_LINT0_LVT 0x350 /* Local Vector Table (LINT0) */ +#define APIC_OFFSET_LINT1_LVT 0x360 /* Local Vector Table (LINT1) */ +#define APIC_OFFSET_ERROR_LVT 0x370 /* Local Vector Table (ERROR) */ +#define APIC_OFFSET_TIMER_ICR 0x380 /* Timer's Initial Count */ +#define APIC_OFFSET_TIMER_CCR 0x390 /* Timer's Current Count */ +#define APIC_OFFSET_TIMER_DCR 0x3E0 /* Timer's Divide Configuration */ +#define APIC_OFFSET_SELF_IPI 0x3F0 /* Self IPI register */ + +/* + * 16 priority levels with at most one vector injected per level. + */ +#define ISRVEC_STK_SIZE (16 + 1) + +#define VLAPIC_MAXLVT_INDEX APIC_LVT_CMCI + +struct vlapic; + +struct pir_desc { + uint64_t pir[4]; + uint64_t pending; + uint64_t unused[3]; +} __aligned(64); + +struct vlapic_ops { + int (*apicv_set_intr_ready) + (struct vlapic *vlapic, int vector, bool level); + int (*apicv_pending_intr)(struct vlapic *vlapic, int *vecptr); + void (*apicv_intr_accepted)(struct vlapic *vlapic, int vector); + void (*apicv_post_intr)(struct vlapic *vlapic, int hostcpu); + void (*apicv_set_tmr)(struct vlapic *vlapic, int vector, bool level); + void (*apicv_batch_set_tmr)(struct vlapic *vlapic); + void (*enable_x2apic_mode)(struct vlapic *vlapic); +}; + +struct vlapic { + struct vm *vm; + struct vcpu *vcpu; + struct lapic *apic_page; + struct pir_desc *pir_desc; + struct vlapic_ops ops; + + uint32_t esr_pending; + int esr_firing; + + struct callout callout; /* vlapic timer */ + struct bintime timer_fire_bt; /* callout expiry time */ + struct bintime timer_freq_bt; /* timer frequency */ + struct bintime timer_period_bt; /* timer period */ + long last_timer; /* the last timer id */ + + spinlock_t timer_mtx; + + /* + * The 'isrvec_stk' is a stack of vectors injected by the local apic. + * A vector is popped from the stack when the processor does an EOI. + * The vector on the top of the stack is used to compute the + * Processor Priority in conjunction with the TPR. + */ + uint8_t isrvec_stk[ISRVEC_STK_SIZE]; + int isrvec_stk_top; + + uint64_t msr_apicbase; + + /* + * Copies of some registers in the virtual APIC page. We do this for + * a couple of different reasons: + * - to be able to detect what changed (e.g. svr_last) + * - to maintain a coherent snapshot of the register (e.g. lvt_last) + */ + uint32_t svr_last; + uint32_t lvt_last[VLAPIC_MAXLVT_INDEX + 1]; + struct pir_desc pir; +}; + +void vlapic_cleanup(struct vlapic *vlapic); + +#endif /* _VLAPIC_PRIV_H_ */ diff --git a/hypervisor/arch/x86/guest/vm.c b/hypervisor/arch/x86/guest/vm.c new file mode 100644 index 000000000..80519e088 --- /dev/null +++ b/hypervisor/arch/x86/guest/vm.c @@ -0,0 +1,324 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +/* Local variables */ + +/* VMs list */ +struct list_head vm_list = { + .next = &vm_list, + .prev = &vm_list, +}; + +/* Lock for VMs list */ +spinlock_t vm_list_lock = { + .head = 0, + .tail = 0 +}; + +/* used for vmid allocation. And this means the max vm number is 64 */ +static unsigned long vmid_bitmap; + +static void init_vm(struct vm_description *vm_desc, + struct vm *vm_handle) +{ + /* Populate VM attributes from VM description */ + vm_handle->hw.num_vcpus = vm_desc->vm_hw_num_cores; + vm_handle->state_info.privilege = vm_desc->vm_state_info_privilege; + vm_handle->state_info.boot_count = 0; +} + +/* return a pointer to the virtual machine structure associated with + * this VM ID + */ +struct vm *get_vm_from_vmid(int vm_id) +{ + struct vm *vm = NULL; + struct list_head *pos; + + spinlock_obtain(&vm_list_lock); + list_for_each(pos, &vm_list) { + vm = list_entry(pos, struct vm, list); + if (vm->attr.id == vm_id) { + spinlock_release(&vm_list_lock); + return vm; + } + } + spinlock_release(&vm_list_lock); + + return NULL; +} + +int create_vm(struct vm_description *vm_desc, struct vm **rtn_vm) +{ + unsigned int id; + struct vm *vm; + int status = 0; + + if ((vm_desc == NULL) || (rtn_vm == NULL)) + status = -EINVAL; + + if (status == 0) { + /* Allocate memory for virtual machine */ + vm = calloc(1, sizeof(struct vm)); + ASSERT(vm != NULL, "vm allocation failed"); + + /* + * Map Virtual Machine to its VM Description + */ + init_vm(vm_desc, vm); + + + /* Init mmio list */ + INIT_LIST_HEAD(&vm->mmio_list); + + if (vm->hw.num_vcpus == 0) + vm->hw.num_vcpus = phy_cpu_num; + + vm->hw.vcpu_array = + calloc(1, sizeof(struct vcpu *) * vm->hw.num_vcpus); + ASSERT(vm->hw.vcpu_array != NULL, + "vcpu_array allocation failed"); + + for (id = 0; id < sizeof(long) * 8; id++) + if (bitmap_test_and_set(id, &vmid_bitmap) == 0) + break; + vm->attr.id = vm->attr.boot_idx = id; + snprintf(&vm->attr.name[0], MAX_VM_NAME_LEN, "vm_%d", + vm->attr.id); + + atomic_store_rel_int(&vm->hw.created_vcpus, 0); + + /* gpa_lowtop are used for system start up */ + vm->hw.gpa_lowtop = 0; + /* Only for SOS: Configure VM software information */ + /* For UOS: This VM software information is configure in DM */ + if (is_vm0(vm)) { + prepare_vm0_memmap_and_e820(vm); +#ifndef CONFIG_EFI_STUB + status = init_vm0_boot_info(vm); +#endif + } else { + /* populate UOS vm fields according to vm_desc */ + vm->secure_world_enabled = + vm_desc->secure_world_enabled; + memcpy_s(&vm->GUID[0], sizeof(vm->GUID), + &vm_desc->GUID[0], + sizeof(vm_desc->GUID)); + } + + INIT_LIST_HEAD(&vm->list); + spinlock_obtain(&vm_list_lock); + list_add(&vm->list, &vm_list); + spinlock_release(&vm_list_lock); + + /* Ensure VM software information obtained */ + if (status == 0) { + + /* Set up IO bit-mask such that VM exit occurs on + * selected IO ranges + */ + setup_io_bitmap(vm); + + /* Create virtual uart */ + if (is_vm0(vm)) + vm->vuart = vuart_init(vm); + + vm->vpic = vpic_init(vm); + + /* vpic wire_mode default is INTR */ + vm->vpic_wire_mode = VPIC_WIRE_INTR; + + /* Allocate full emulated vIOAPIC instance */ + vm->arch_vm.virt_ioapic = vioapic_init(vm); + + /* Populate return VM handle */ + *rtn_vm = vm; + ptdev_vm_init(vm); + vm->sw.req_buf = 0; + + vm->state = VM_CREATED; + } + + } + + /* Return status to caller */ + return status; +} + +int shutdown_vm(struct vm *vm) +{ + int i, status = 0; + struct vcpu *vcpu = NULL; + + if (vm == NULL) + return -EINVAL; + + pause_vm(vm); + + /* Only allow shutdown paused vm */ + if (vm->state != VM_PAUSED) + return -EINVAL; + + foreach_vcpu(i, vm, vcpu) { + reset_vcpu(vcpu); + destroy_vcpu(vcpu); + } + + spinlock_obtain(&vm_list_lock); + list_del_init(&vm->list); + spinlock_release(&vm_list_lock); + + ptdev_vm_deinit(vm); + + /* cleanup and free vioapic */ + vioapic_cleanup(vm->arch_vm.virt_ioapic); + + /* Free EPT allocated resources assigned to VM */ + destroy_ept(vm); + + /* Free MSR bitmap */ + free(vm->arch_vm.msr_bitmap); + + /* TODO: De-initialize I/O Emulation */ + free_io_emulation_resource(vm); + + /* Free iommu_domain */ + if (vm->iommu_domain) + destroy_iommu_domain(vm->iommu_domain); + + bitmap_clr(vm->attr.id, &vmid_bitmap); + + if (vm->vpic) + vpic_cleanup(vm); + + free(vm->hw.vcpu_array); + + /* TODO: De-Configure HV-SW */ + /* Deallocate VM */ + free(vm); + + /* Return status to caller */ + return status; +} + +int start_vm(struct vm *vm) +{ + struct vcpu *vcpu = NULL; + + vm->state = VM_STARTED; + + /* Only start BSP (vid = 0) and let BSP start other APs */ + vcpu = vcpu_from_vid(vm, 0); + ASSERT(vcpu != NULL, "vm%d, vcpu0", vm->attr.id); + schedule_vcpu(vcpu); + + return 0; +} + +/* + * DM only pause vm for shutdown/reboot. If we need to + * extend the pause vm for DM, this API should be extended. + */ +int pause_vm(struct vm *vm) +{ + int i; + struct vcpu *vcpu = NULL; + + if (vm->state == VM_PAUSED) + return 0; + + vm->state = VM_PAUSED; + + foreach_vcpu(i, vm, vcpu) + pause_vcpu(vcpu, VCPU_ZOMBIE); + + return 0; +} + +int vm_resume(struct vm *vm) +{ + int i; + struct vcpu *vcpu = NULL; + + foreach_vcpu(i, vm, vcpu) + resume_vcpu(vcpu); + + vm->state = VM_STARTED; + + return 0; +} + +/* Finally, we will remove the array and only maintain vm0 desc */ +struct vm_description *get_vm_desc(int idx) +{ + struct vm_description_array *vm_desc_array; + + /* Obtain base of user defined VM description array data + * structure + */ + vm_desc_array = (struct vm_description_array *)get_vm_desc_base(); + /* Obtain VM description array base */ + if (idx >= vm_desc_array->num_vm_desc) + return NULL; + else + return &vm_desc_array->vm_desc_array[idx]; +} + +/* Create vm/vcpu for vm0 */ +int prepare_vm0(void) +{ + int i, ret; + struct vm *vm = NULL; + struct vm_description *vm_desc = NULL; + + vm_desc = get_vm_desc(0); + ASSERT(vm_desc, "get vm desc failed"); + ret = create_vm(vm_desc, &vm); + ASSERT(ret == 0, "VM creation failed!"); + + prepare_vcpu(vm, vm_desc->vm_hw_logical_core_ids[0]); + + /* Prepare the AP for vm0 */ + for (i = 1; i < vm_desc->vm_hw_num_cores; i++) + prepare_vcpu(vm, vm_desc->vm_hw_logical_core_ids[i]); + + /* start vm0 BSP automatically */ + start_vm(vm); + + pr_fatal("Start VM0"); + + return 0; +} diff --git a/hypervisor/arch/x86/guest/vmcall.c b/hypervisor/arch/x86/guest/vmcall.c new file mode 100644 index 000000000..8f323b0e7 --- /dev/null +++ b/hypervisor/arch/x86/guest/vmcall.c @@ -0,0 +1,148 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +int vmcall_handler(struct vcpu *vcpu) +{ + int64_t ret = 0; + struct vm *vm = vcpu->vm; + struct run_context *cur_context = + &vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context]; + /* hypercall ID from guest*/ + uint64_t hypcall_id = cur_context->guest_cpu_regs.regs.r8; + /* hypercall param1 from guest*/ + uint64_t param1 = cur_context->guest_cpu_regs.regs.rdi; + /* hypercall param2 from guest*/ + uint64_t param2 = cur_context->guest_cpu_regs.regs.rsi; + /* hypercall param3 from guest, reserved*/ + /* uint64_t param3 = cur_context->guest_cpu_regs.regs.rdx; */ + /* hypercall param4 from guest, reserved*/ + /* uint64_t param4 = cur_context->guest_cpu_regs.regs.rcx; */ + + /* Dispatch the hypercall handler */ + switch (hypcall_id) { + case HC_GET_API_VERSION: + ret = hcall_get_api_version(vm, param1); + break; + + case HC_CREATE_VM: + ret = hcall_create_vm(vm, param1); + break; + + case HC_DESTROY_VM: + ret = hcall_destroy_vm(param1); + break; + + case HC_START_VM: + ret = hcall_resume_vm(param1); + break; + + case HC_PAUSE_VM: + ret = hcall_pause_vm(param1); + break; + + case HC_CREATE_VCPU: + ret = hcall_create_vcpu(vm, param1, param2); + break; + + case HC_ASSERT_IRQLINE: + ret = hcall_assert_irqline(vm, param1, param2); + break; + + case HC_DEASSERT_IRQLINE: + ret = hcall_deassert_irqline(vm, param1, param2); + break; + + case HC_PULSE_IRQLINE: + ret = hcall_pulse_irqline(vm, param1, param2); + break; + + case HC_INJECT_MSI: + ret = hcall_inject_msi(vm, param1, param2); + break; + + case HC_SET_IOREQ_BUFFER: + ret = hcall_set_ioreq_buffer(vm, param1, param2); + break; + + case HC_NOTIFY_REQUEST_FINISH: + ret = hcall_notify_req_finish(param1, param2); + break; + + case HC_VM_SET_MEMMAP: + ret = hcall_set_vm_memmap(vm, param1, param2); + break; + + case HC_VM_PCI_MSIX_REMAP: + ret = hcall_remap_pci_msix(vm, param1, param2); + break; + + case HC_VM_GPA2HPA: + ret = hcall_gpa_to_hpa(vm, param1, param2); + break; + + case HC_ASSIGN_PTDEV: + ret = hcall_assign_ptdev(vm, param1, param2); + break; + + case HC_DEASSIGN_PTDEV: + ret = hcall_deassign_ptdev(vm, param1, param2); + break; + + case HC_SET_PTDEV_INTR_INFO: + ret = hcall_set_ptdev_intr_info(vm, param1, param2); + break; + + case HC_RESET_PTDEV_INTR_INFO: + ret = hcall_reset_ptdev_intr_info(vm, param1, param2); + break; + + case HC_SETUP_SBUF: + ret = hcall_setup_sbuf(vm, param1); + break; + + default: + pr_err("op %d: Invalid hypercall\n", hypcall_id); + ret = -1; + break; + } + + cur_context->guest_cpu_regs.regs.rax = ret; + + TRACE_2L(TRC_VMEXIT_VMCALL, vm->attr.id, hypcall_id); + + return 0; +} diff --git a/hypervisor/arch/x86/guest/vmsr.c b/hypervisor/arch/x86/guest/vmsr.c new file mode 100644 index 000000000..edc456ccd --- /dev/null +++ b/hypervisor/arch/x86/guest/vmsr.c @@ -0,0 +1,321 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +/*MRS need to be emulated, the order in this array better as freq of ops*/ +static const uint32_t emulated_msrs[] = { + MSR_IA32_TSC_DEADLINE, /* Enable TSC_DEADLINE VMEXIT */ + +/* following MSR not emulated now */ +/* + * MSR_IA32_APIC_BASE, + * MSR_IA32_SYSENTER_CS, + * MSR_IA32_SYSENTER_ESP, + * MSR_IA32_SYSENTER_EIP, + * MSR_IA32_TSC_AUX, + * MSR_IA32_TIME_STAMP_COUNTER, + */ +}; + +/* the index is matched with emulated msrs array*/ +enum { + IDX_TSC_DEADLINE, + + IDX_MAX_MSR +}; + +static void enable_msr_interception(uint8_t *bitmap, uint32_t msr) +{ + uint8_t *read_map; + uint8_t *write_map; + uint8_t value; + /* low MSR */ + if (msr < 0x1FFF) { + read_map = bitmap; + write_map = bitmap + 2048; + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { + read_map = bitmap + 1024; + write_map = bitmap + 3072; + } else { + pr_err("Invalid MSR"); + return; + } + + msr &= 0x1FFF; + value = read_map[(msr>>3)]; + value |= 1<<(msr%8); + /* right now we trap for both r/w */ + read_map[(msr>>3)] = value; + write_map[(msr>>3)] = value; +} + +/* not used now just leave it for some cases it may be used as API*/ +void disable_msr_interception(uint8_t *bitmap, uint32_t msr) +{ + uint8_t *read_map; + uint8_t *write_map; + uint8_t value; + /* low MSR */ + if (msr < 0x1FFF) { + read_map = bitmap; + write_map = bitmap + 2048; + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { + read_map = bitmap + 1024; + write_map = bitmap + 3072; + } else { + pr_err("Invalid MSR"); + return; + } + + msr &= 0x1FFF; + value = read_map[(msr>>3)]; + value &= ~(1<<(msr%8)); + /* right now we trap for both r/w */ + read_map[(msr>>3)] = value; + write_map[(msr>>3)] = value; +} + +void init_msr_emulation(struct vcpu *vcpu) +{ + uint32_t i = 0; + uint32_t msrs_count = ARRAY_SIZE(emulated_msrs); + void *msr_bitmap; + uint64_t value64; + + ASSERT(msrs_count == IDX_MAX_MSR, + "MSR ID should be matched with emulated_msrs"); + + /*msr bitmap, just allocated/init once, and used for all vm's vcpu*/ + if (is_vcpu_bsp(vcpu)) { + + /* Allocate and initialize memory for MSR bitmap region*/ + vcpu->vm->arch_vm.msr_bitmap = alloc_page(); + ASSERT(vcpu->vm->arch_vm.msr_bitmap, ""); + memset(vcpu->vm->arch_vm.msr_bitmap, 0x0, CPU_PAGE_SIZE); + + msr_bitmap = vcpu->vm->arch_vm.msr_bitmap; + + for (i = 0; i < msrs_count; i++) + enable_msr_interception(msr_bitmap, emulated_msrs[i]); + + /* below MSR protected from guest OS, if access to inject gp*/ + enable_msr_interception(msr_bitmap, MSR_IA32_MTRR_CAP); + enable_msr_interception(msr_bitmap, MSR_IA32_MTRR_DEF_TYPE); + + for (i = MSR_IA32_MTRR_PHYSBASE_0; + i <= MSR_IA32_MTRR_PHYSMASK_9; i++) { + enable_msr_interception(msr_bitmap, i); + } + + enable_msr_interception(msr_bitmap, MSR_IA32_MTRR_FIX64K_00000); + enable_msr_interception(msr_bitmap, MSR_IA32_MTRR_FIX16K_80000); + enable_msr_interception(msr_bitmap, MSR_IA32_MTRR_FIX16K_A0000); + + for (i = MSR_IA32_MTRR_FIX4K_C0000; + i <= MSR_IA32_MTRR_FIX4K_F8000; i++) { + enable_msr_interception(msr_bitmap, i); + } + } + + /* Set up MSR bitmap - pg 2904 24.6.9 */ + value64 = (int64_t) vcpu->vm->arch_vm.msr_bitmap; + exec_vmwrite64(VMX_MSR_BITMAP_FULL, value64); + pr_dbg("VMX_MSR_BITMAP: 0x%016llx ", value64); + + vcpu->guest_msrs = (uint64_t *)calloc(msrs_count, sizeof(uint64_t)); + + ASSERT(vcpu->guest_msrs != NULL, ""); + memset(vcpu->guest_msrs, 0, msrs_count * sizeof(uint64_t)); +} + +int rdmsr_handler(struct vcpu *vcpu) +{ + uint32_t msr; + uint64_t v = 0; + uint32_t id; + int cur_context = vcpu->arch_vcpu.cur_context; + + /* Read the msr value */ + msr = vcpu->arch_vcpu.contexts[cur_context].guest_cpu_regs.regs.rcx; + + /* Do the required processing for each msr case */ + switch (msr) { + case MSR_IA32_TSC_DEADLINE: + { + v = vcpu->guest_msrs[IDX_TSC_DEADLINE]; + break; + } + + case MSR_IA32_MTRR_CAP: + case MSR_IA32_MTRR_DEF_TYPE: + case MSR_IA32_MTRR_PHYSBASE_0 ... MSR_IA32_MTRR_PHYSMASK_9: + case MSR_IA32_MTRR_FIX64K_00000 ... MSR_IA32_MTRR_FIX4K_F8000: + { + vcpu_inject_gp(vcpu); + break; + } + + /* following MSR not emulated now just left for future */ + case MSR_IA32_SYSENTER_CS: + { + v = exec_vmread(VMX_GUEST_IA32_SYSENTER_CS); + break; + } + case MSR_IA32_SYSENTER_ESP: + { + v = exec_vmread(VMX_GUEST_IA32_SYSENTER_ESP); + break; + } + case MSR_IA32_SYSENTER_EIP: + { + v = exec_vmread(VMX_GUEST_IA32_SYSENTER_EIP); + break; + } + case MSR_IA32_TSC_AUX: + { + v = vcpu->arch_vcpu.msr_tsc_aux; + break; + } + case MSR_IA32_TIME_STAMP_COUNTER: + { + /* Read the host TSC value */ + CPU_RDTSCP_EXECUTE(&v, &id); + + /* Add the TSC_offset to host TSC and return the value */ + v += exec_vmread64(VMX_TSC_OFFSET_FULL); + break; + } + case MSR_IA32_APIC_BASE: + { + bool ret; + /* Read APIC base */ + vlapic_rdmsr(vcpu, msr, &v, &ret); + break; + } + default: + { + pr_warn("rdmsr: %lx should not come here!", msr); + v = 0; + break; + } + } + + /* Store the MSR contents in RAX and RDX */ + vcpu->arch_vcpu.contexts[cur_context].guest_cpu_regs.regs.rax = + v & 0xffffffff; + vcpu->arch_vcpu.contexts[cur_context].guest_cpu_regs.regs.rdx = v >> 32; + + TRACE_2L(TRC_VMEXIT_RDMSR, msr, v); + + return 0; +} + +int wrmsr_handler(struct vcpu *vcpu) +{ + uint32_t msr; + uint64_t v; + struct run_context *cur_context = + &vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context]; + + /* Read the MSR ID */ + msr = cur_context->guest_cpu_regs.regs.rcx; + + /* Get the MSR contents */ + v = (((uint64_t) cur_context->guest_cpu_regs.regs.rdx) << 32) | + ((uint64_t) cur_context->guest_cpu_regs.regs.rax); + + /* Do the required processing for each msr case */ + switch (msr) { + case MSR_IA32_TSC_DEADLINE: + { + bool ret; + /* Write APIC base */ + vlapic_wrmsr(vcpu, msr, v, &ret); + vcpu->guest_msrs[IDX_TSC_DEADLINE] = v; + break; + } + case MSR_IA32_MTRR_CAP: + case MSR_IA32_MTRR_DEF_TYPE: + case MSR_IA32_MTRR_PHYSBASE_0 ... MSR_IA32_MTRR_PHYSMASK_9: + case MSR_IA32_MTRR_FIX64K_00000 ... MSR_IA32_MTRR_FIX4K_F8000: + { + vcpu_inject_gp(vcpu); + break; + } + + /* following MSR not emulated now just left for future */ + case MSR_IA32_SYSENTER_CS: + { + exec_vmwrite(VMX_GUEST_IA32_SYSENTER_CS, v); + break; + } + case MSR_IA32_SYSENTER_ESP: + { + exec_vmwrite(VMX_GUEST_IA32_SYSENTER_ESP, v); + break; + } + case MSR_IA32_SYSENTER_EIP: + { + exec_vmwrite(VMX_GUEST_IA32_SYSENTER_EIP, v); + break; + } + case MSR_IA32_GS_BASE: + { + exec_vmwrite(VMX_GUEST_GS_BASE, v); + break; + } + case MSR_IA32_TSC_AUX: + { + vcpu->arch_vcpu.msr_tsc_aux = v; + break; + } + case MSR_IA32_APIC_BASE: + { + bool ret; + /* Write APIC base */ + vlapic_wrmsr(vcpu, msr, v, &ret); + break; + } + default: + { + ASSERT(0, "wrmsr: %lx should not come here!", msr); + msr_write(msr, v); + break; + } + } + + TRACE_2L(TRC_VMEXIT_WRMSR, msr, v); + + return 0; +} diff --git a/hypervisor/arch/x86/guest/vpic.c b/hypervisor/arch/x86/guest/vpic.c new file mode 100644 index 000000000..4c487fc4c --- /dev/null +++ b/hypervisor/arch/x86/guest/vpic.c @@ -0,0 +1,950 @@ +/*- + * Copyright (c) 2014 Tycho Nightingale + * Copyright (c) 2017 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#define pr_fmt(fmt) "vpic: " fmt + +#include +#include +#include +#include +#include + +#define VPIC_LOCK_INIT(vpic) spinlock_init(&((vpic)->lock)) +#define VPIC_LOCK(vpic) spinlock_obtain(&((vpic)->lock)) +#define VPIC_UNLOCK(vpic) spinlock_release(&((vpic)->lock)) +/* TODO: add spinlock_locked support? */ +/*#define VPIC_LOCKED(vpic) spinlock_locked(&((vpic)->lock))*/ + +#define vm_pic(vm) (vm->vpic) + +#define true 1 +#define false 0 + +#define ACRN_DBG_PIC 6 + +enum irqstate { + IRQSTATE_ASSERT, + IRQSTATE_DEASSERT, + IRQSTATE_PULSE +}; + +struct pic { + bool ready; + int icw_num; + int rd_cmd_reg; + + bool aeoi; + bool poll; + bool rotate; + bool sfn; /* special fully-nested mode */ + + int irq_base; + uint8_t request; /* Interrupt Request Register (IIR) */ + uint8_t service; /* Interrupt Service (ISR) */ + uint8_t mask; /* Interrupt Mask Register (IMR) */ + uint8_t smm; /* special mask mode */ + + int acnt[8]; /* sum of pin asserts and deasserts */ + int lowprio; /* lowest priority irq */ + + bool intr_raised; + uint8_t elc; +}; + +struct vpic { + struct vm *vm; + spinlock_t lock; + struct pic pic[2]; +}; + +/* + * Loop over all the pins in priority order from highest to lowest. + */ +#define PIC_PIN_FOREACH(pinvar, pic, tmpvar) \ + for (tmpvar = 0, pinvar = (pic->lowprio + 1) & 0x7; \ + tmpvar < 8; \ + tmpvar++, pinvar = (pinvar + 1) & 0x7) + +static void vpic_set_pinstate(struct vpic *vpic, int pin, bool newstate); + +static inline bool master_pic(struct vpic *vpic, struct pic *pic) +{ + + if (pic == &vpic->pic[0]) + return true; + else + return false; +} + +static inline int vpic_get_highest_isrpin(struct pic *pic) +{ + int bit, pin; + int i; + + PIC_PIN_FOREACH(pin, pic, i) { + bit = (1 << pin); + + if (pic->service & bit) { + /* + * An IS bit that is masked by an IMR bit will not be + * cleared by a non-specific EOI in Special Mask Mode. + */ + if (pic->smm && (pic->mask & bit) != 0) + continue; + else + return pin; + } + } + + return -1; +} + +static inline int vpic_get_highest_irrpin(struct pic *pic) +{ + int serviced; + int bit, pin, tmp; + + /* + * In 'Special Fully-Nested Mode' when an interrupt request from + * a slave is in service, the slave is not locked out from the + * master's priority logic. + */ + serviced = pic->service; + if (pic->sfn) + serviced &= ~(1 << 2); + + /* + * In 'Special Mask Mode', when a mask bit is set in OCW1 it inhibits + * further interrupts at that level and enables interrupts from all + * other levels that are not masked. In other words the ISR has no + * bearing on the levels that can generate interrupts. + */ + if (pic->smm) + serviced = 0; + + PIC_PIN_FOREACH(pin, pic, tmp) { + bit = 1 << pin; + + /* + * If there is already an interrupt in service at the same + * or higher priority then bail. + */ + if ((serviced & bit) != 0) + break; + + /* + * If an interrupt is asserted and not masked then return + * the corresponding 'pin' to the caller. + */ + if ((pic->request & bit) != 0 && (pic->mask & bit) == 0) + return pin; + } + + return -1; +} + +static void vpic_notify_intr(struct vpic *vpic) +{ + struct pic *pic; + int pin; + + /* + * First check the slave. + */ + pic = &vpic->pic[1]; + pin = vpic_get_highest_irrpin(pic); + if (!pic->intr_raised && pin != -1) { + dev_dbg(ACRN_DBG_PIC, + "pic slave notify pin = %d (imr 0x%x irr 0x%x isr 0x%x)\n", + pin, pic->mask, pic->request, pic->service); + + /* + * Cascade the request from the slave to the master. + */ + pic->intr_raised = true; + vpic_set_pinstate(vpic, 2, true); + vpic_set_pinstate(vpic, 2, false); + } else { + dev_dbg(ACRN_DBG_PIC, + "pic slave no eligible interrupt (imr 0x%x irr 0x%x isr 0x%x)", + pic->mask, pic->request, pic->service); + } + + /* + * Then check the master. + */ + pic = &vpic->pic[0]; + pin = vpic_get_highest_irrpin(pic); + if (!pic->intr_raised && pin != -1) { + dev_dbg(ACRN_DBG_PIC, + "pic master notify pin = %d (imr 0x%x irr 0x%x isr 0x%x)\n", + pin, pic->mask, pic->request, pic->service); + + /* + * From Section 3.6.2, "Interrupt Modes", in the + * MPtable Specification, Version 1.4 + * + * PIC interrupts are routed to both the Local APIC + * and the I/O APIC to support operation in 1 of 3 + * modes. + * + * 1. Legacy PIC Mode: the PIC effectively bypasses + * all APIC components. In this mode the local APIC is + * disabled and LINT0 is reconfigured as INTR to + * deliver the PIC interrupt directly to the CPU. + * + * 2. Virtual Wire Mode: the APIC is treated as a + * virtual wire which delivers interrupts from the PIC + * to the CPU. In this mode LINT0 is programmed as + * ExtINT to indicate that the PIC is the source of + * the interrupt. + * + * 3. Virtual Wire Mode via I/O APIC: PIC interrupts are + * fielded by the I/O APIC and delivered to the appropriate + * CPU. In this mode the I/O APIC input 0 is programmed + * as ExtINT to indicate that the PIC is the source of the + * interrupt. + */ + pic->intr_raised = true; + if (vpic->vm->vpic_wire_mode == VPIC_WIRE_INTR) { + struct vcpu *vcpu = vcpu_from_vid(vpic->vm, 0); + + ASSERT(vcpu != NULL, "vm%d, vcpu0", vpic->vm->attr.id); + vcpu_inject_extint(vcpu); + } else { + vlapic_set_local_intr(vpic->vm, -1, APIC_LVT_LINT0); + /* notify vioapic pin0 if existing + * For vPIC + vIOAPIC mode, vpic master irq connected + * to vioapic pin0 (irq2) + * From MPSpec session 5.1 + */ + vioapic_pulse_irq(vpic->vm, 0); + } + } else { + dev_dbg(ACRN_DBG_PIC, + "pic master no eligible interrupt (imr 0x%x irr 0x%x isr 0x%x)", + pic->mask, pic->request, pic->service); + } +} + +static int vpic_icw1(__unused struct vpic *vpic, struct pic *pic, uint8_t val) +{ + dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic icw1 0x%x\n", + vpic->vm, val); + + pic->ready = false; + + pic->icw_num = 1; + pic->request = 0; + pic->mask = 0; + pic->lowprio = 7; + pic->rd_cmd_reg = 0; + pic->poll = 0; + pic->smm = 0; + + if ((val & ICW1_SNGL) != 0) { + dev_dbg(ACRN_DBG_PIC, "vpic cascade mode required\n"); + return -1; + } + + if ((val & ICW1_IC4) == 0) { + dev_dbg(ACRN_DBG_PIC, "vpic icw4 required\n"); + return -1; + } + + pic->icw_num++; + + return 0; +} + +static int vpic_icw2(__unused struct vpic *vpic, struct pic *pic, uint8_t val) +{ + dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic icw2 0x%x\n", + vpic->vm, val); + + pic->irq_base = val & 0xf8; + + pic->icw_num++; + + return 0; +} + +static int vpic_icw3(__unused struct vpic *vpic, struct pic *pic, + __unused uint8_t val) +{ + dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic icw3 0x%x\n", + vpic->vm, val); + + pic->icw_num++; + + return 0; +} + +static int vpic_icw4(struct vpic *vpic, struct pic *pic, uint8_t val) +{ + dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic icw4 0x%x\n", + vpic->vm, val); + + if ((val & ICW4_8086) == 0) { + dev_dbg(ACRN_DBG_PIC, + "vpic microprocessor mode required\n"); + return -1; + } + + if ((val & ICW4_AEOI) != 0) + pic->aeoi = true; + + if ((val & ICW4_SFNM) != 0) { + if (master_pic(vpic, pic)) { + pic->sfn = true; + } else { + dev_dbg(ACRN_DBG_PIC, + "Ignoring special fully nested mode on slave pic: %#x", + val); + } + } + + pic->icw_num = 0; + pic->ready = true; + + return 0; +} + +bool vpic_is_pin_mask(struct vpic *vpic, uint8_t virt_pin) +{ + struct pic *pic; + + if (virt_pin < 8) + pic = &vpic->pic[0]; + else if (virt_pin < 16) { + pic = &vpic->pic[1]; + virt_pin -= 8; + } else + return true; + + if (pic->mask & (1 << virt_pin)) + return true; + else + return false; +} + +static int vpic_ocw1(struct vpic *vpic, struct pic *pic, uint8_t val) +{ + int pin, i, bit; + uint8_t old = pic->mask; + + dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic ocw1 0x%x\n", + vpic->vm, val); + + pic->mask = val & 0xff; + + /* query and setup if pin/irq is for passthrough device */ + PIC_PIN_FOREACH(pin, pic, i) { + bit = (1 << pin); + + /* remap for active: interrupt mask -> unmask + * remap for deactive: when vIOAPIC take it over + */ + if (((pic->mask & bit) == 0) && (old & bit)) { + struct ptdev_intx_info intx; + + /* master pic pin2 connect with slave pic, + * not device, so not need pt remap + */ + if ((pin == 2) && master_pic(vpic, pic)) + continue; + + intx.virt_pin = pin; + intx.vpin_src = PTDEV_VPIN_PIC; + if (!master_pic(vpic, pic)) + intx.virt_pin += 8; + ptdev_intx_pin_remap(vpic->vm, &intx); + } + } + + return 0; +} + +static int vpic_ocw2(struct vpic *vpic, struct pic *pic, uint8_t val) +{ + dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic ocw2 0x%x\n", + vpic->vm, val); + + pic->rotate = ((val & OCW2_R) != 0); + + if ((val & OCW2_EOI) != 0) { + int isr_bit; + + if ((val & OCW2_SL) != 0) { + /* specific EOI */ + isr_bit = val & 0x7; + } else { + /* non-specific EOI */ + isr_bit = vpic_get_highest_isrpin(pic); + } + + if (isr_bit != -1) { + pic->service &= ~(1 << isr_bit); + + if (pic->rotate) + pic->lowprio = isr_bit; + } + + /* if level ack PTDEV */ + if (pic->elc & (1 << (isr_bit & 0x7))) { + ptdev_intx_ack(vpic->vm, + master_pic(vpic, pic) ? isr_bit : isr_bit + 8, + PTDEV_VPIN_PIC); + } + } else if ((val & OCW2_SL) != 0 && pic->rotate == true) { + /* specific priority */ + pic->lowprio = val & 0x7; + } + + return 0; +} + +static int vpic_ocw3(__unused struct vpic *vpic, struct pic *pic, uint8_t val) +{ + dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic ocw3 0x%x\n", + vpic->vm, val); + + if (val & OCW3_ESMM) { + pic->smm = val & OCW3_SMM ? 1 : 0; + dev_dbg(ACRN_DBG_PIC, "%s pic special mask mode %s\n", + master_pic(vpic, pic) ? "master" : "slave", + pic->smm ? "enabled" : "disabled"); + } + + if (val & OCW3_RR) { + /* read register command */ + pic->rd_cmd_reg = val & OCW3_RIS; + + /* Polling mode */ + pic->poll = ((val & OCW3_P) != 0); + } + + return 0; +} + +static void vpic_set_pinstate(struct vpic *vpic, int pin, bool newstate) +{ + struct pic *pic; + int oldcnt, newcnt; + bool level; + + ASSERT(pin >= 0 && pin < 16, + "vpic_set_pinstate: invalid pin number"); + + pic = &vpic->pic[pin >> 3]; + + oldcnt = pic->acnt[pin & 0x7]; + if (newstate) + pic->acnt[pin & 0x7]++; + else + pic->acnt[pin & 0x7]--; + newcnt = pic->acnt[pin & 0x7]; + + if (newcnt < 0) { + pr_warn("pic pin%d: bad acnt %d\n", pin, newcnt); + } + + level = ((vpic->pic[pin >> 3].elc & (1 << (pin & 0x7))) != 0); + + if ((oldcnt == 0 && newcnt == 1) || (newcnt > 0 && level == true)) { + /* rising edge or level */ + dev_dbg(ACRN_DBG_PIC, "pic pin%d: asserted\n", pin); + pic->request |= (1 << (pin & 0x7)); + } else if (oldcnt == 1 && newcnt == 0) { + /* falling edge */ + dev_dbg(ACRN_DBG_PIC, "pic pin%d: deasserted\n", pin); + if (level) + pic->request &= ~(1 << (pin & 0x7)); + } else { + dev_dbg(ACRN_DBG_PIC, + "pic pin%d: %s, ignored, acnt %d\n", + pin, newstate ? "asserted" : "deasserted", newcnt); + } + + vpic_notify_intr(vpic); +} + +static int vpic_set_irqstate(struct vm *vm, int irq, enum irqstate irqstate) +{ + struct vpic *vpic; + struct pic *pic; + + if (irq < 0 || irq > 15) + return -EINVAL; + + vpic = vm_pic(vm); + pic = &vpic->pic[irq >> 3]; + + if (pic->ready == false) + return 0; + + VPIC_LOCK(vpic); + switch (irqstate) { + case IRQSTATE_ASSERT: + vpic_set_pinstate(vpic, irq, true); + break; + case IRQSTATE_DEASSERT: + vpic_set_pinstate(vpic, irq, false); + break; + case IRQSTATE_PULSE: + vpic_set_pinstate(vpic, irq, true); + vpic_set_pinstate(vpic, irq, false); + break; + default: + ASSERT(0, "vpic_set_irqstate: invalid irqstate"); + } + VPIC_UNLOCK(vpic); + + return 0; +} + +/* hypervisor interface: assert/deassert/pulse irq */ +int vpic_assert_irq(struct vm *vm, int irq) +{ + return vpic_set_irqstate(vm, irq, IRQSTATE_ASSERT); +} + +int vpic_deassert_irq(struct vm *vm, int irq) +{ + return vpic_set_irqstate(vm, irq, IRQSTATE_DEASSERT); +} + +int vpic_pulse_irq(struct vm *vm, int irq) +{ + return vpic_set_irqstate(vm, irq, IRQSTATE_PULSE); +} + +int vpic_set_irq_trigger(struct vm *vm, int irq, enum vpic_trigger trigger) +{ + struct vpic *vpic; + + if (irq < 0 || irq > 15) + return -EINVAL; + + /* + * See comment in vpic_elc_handler. These IRQs must be + * edge triggered. + */ + if (trigger == LEVEL_TRIGGER) { + switch (irq) { + case 0: + case 1: + case 2: + case 8: + case 13: + return -EINVAL; + } + } + + vpic = vm_pic(vm); + + VPIC_LOCK(vpic); + + if (trigger == LEVEL_TRIGGER) + vpic->pic[irq >> 3].elc |= 1 << (irq & 0x7); + else + vpic->pic[irq >> 3].elc &= ~(1 << (irq & 0x7)); + + VPIC_UNLOCK(vpic); + + return 0; +} + +int vpic_get_irq_trigger(struct vm *vm, int irq, enum vpic_trigger *trigger) +{ + struct vpic *vpic; + + if (irq < 0 || irq > 15) + return -EINVAL; + + vpic = vm_pic(vm); + if (!vpic) + return -EINVAL; + + if (vpic->pic[irq>>3].elc & (1 << (irq & 0x7))) + *trigger = LEVEL_TRIGGER; + else + *trigger = EDGE_TRIGGER; + return 0; +} + +void vpic_pending_intr(struct vm *vm, int *vecptr) +{ + struct vpic *vpic; + struct pic *pic; + int pin; + + vpic = vm_pic(vm); + + pic = &vpic->pic[0]; + + VPIC_LOCK(vpic); + + pin = vpic_get_highest_irrpin(pic); + if (pin == 2) { + pic = &vpic->pic[1]; + pin = vpic_get_highest_irrpin(pic); + } + + /* + * If there are no pins active at this moment then return the spurious + * interrupt vector instead. + */ + if (pin == -1) { + *vecptr = -1; + VPIC_UNLOCK(vpic); + return; + } + + ASSERT(pin >= 0 && pin <= 7, "invalid pin"); + *vecptr = pic->irq_base + pin; + + dev_dbg(ACRN_DBG_PIC, "Got pending vector 0x%x\n", *vecptr); + + VPIC_UNLOCK(vpic); +} + +static void vpic_pin_accepted(struct pic *pic, int pin) +{ + pic->intr_raised = false; + + if ((pic->elc & (1 << pin)) == 0) { + /*only used edge trigger mode*/ + pic->request &= ~(1 << pin); + } + + if (pic->aeoi == true) { + if (pic->rotate == true) + pic->lowprio = pin; + } else { + pic->service |= (1 << pin); + } +} + +void vpic_intr_accepted(struct vm *vm, int vector) +{ + struct vpic *vpic; + int pin; + + vpic = vm_pic(vm); + + VPIC_LOCK(vpic); + + pin = vector & 0x7; + + if ((vector & ~0x7) == vpic->pic[1].irq_base) { + vpic_pin_accepted(&vpic->pic[1], pin); + /* + * If this vector originated from the slave, + * accept the cascaded interrupt too. + */ + vpic_pin_accepted(&vpic->pic[0], 2); + } else { + vpic_pin_accepted(&vpic->pic[0], pin); + } + + vpic_notify_intr(vpic); + + VPIC_UNLOCK(vpic); +} + +static int vpic_read(struct vpic *vpic, struct pic *pic, + int port, uint32_t *eax) +{ + int pin; + + VPIC_LOCK(vpic); + + if (pic->poll) { + pic->poll = 0; + pin = vpic_get_highest_irrpin(pic); + if (pin >= 0) { + vpic_pin_accepted(pic, pin); + *eax = 0x80 | pin; + } else { + *eax = 0; + } + } else { + if (port & ICU_IMR_OFFSET) { + /* read interrupt mask register */ + *eax = pic->mask; + } else { + if (pic->rd_cmd_reg == OCW3_RIS) { + /* read interrupt service register */ + *eax = pic->service; + } else { + /* read interrupt request register */ + *eax = pic->request; + } + } + } + + VPIC_UNLOCK(vpic); + + return 0; +} + +static int vpic_write(struct vpic *vpic, struct pic *pic, + int port, uint32_t *eax) +{ + int error; + uint8_t val; + + error = 0; + val = *eax; + + VPIC_LOCK(vpic); + + if (port & ICU_IMR_OFFSET) { + switch (pic->icw_num) { + case 2: + error = vpic_icw2(vpic, pic, val); + break; + case 3: + error = vpic_icw3(vpic, pic, val); + break; + case 4: + error = vpic_icw4(vpic, pic, val); + break; + default: + error = vpic_ocw1(vpic, pic, val); + break; + } + } else { + if (val & (1 << 4)) + error = vpic_icw1(vpic, pic, val); + + if (pic->ready) { + if (val & (1 << 3)) + error = vpic_ocw3(vpic, pic, val); + else + error = vpic_ocw2(vpic, pic, val); + } + } + + if (pic->ready) + vpic_notify_intr(vpic); + + VPIC_UNLOCK(vpic); + + return error; +} + +static int vpic_master_handler(struct vm *vm, bool in, int port, int bytes, + uint32_t *eax) +{ + struct vpic *vpic; + struct pic *pic; + + vpic = vm_pic(vm); + pic = &vpic->pic[0]; + + if (bytes != 1) + return -1; + + if (in) + return vpic_read(vpic, pic, port, eax); + + return vpic_write(vpic, pic, port, eax); +} + +static uint32_t vpic_master_io_read(__unused struct vm_io_handler *hdlr, + struct vm *vm, ioport_t addr, size_t width) +{ + uint32_t val = 0; + + if (vpic_master_handler(vm, true, (int)addr, (int)width, &val) < 0) + pr_err("pic master read port 0x%x width=%d failed\n", + addr, width); + return val; +} + +static void vpic_master_io_write(__unused struct vm_io_handler *hdlr, + struct vm *vm, ioport_t addr, size_t width, uint32_t v) +{ + uint32_t val = v; + + if (vpic_master_handler(vm, false, (int)addr, (int)width, &val) < 0) + pr_err("%s: write port 0x%x width=%d value 0x%x failed\n", + __func__, addr, width, val); +} + +static int vpic_slave_handler(struct vm *vm, bool in, int port, int bytes, + uint32_t *eax) +{ + struct vpic *vpic; + struct pic *pic; + + vpic = vm_pic(vm); + pic = &vpic->pic[1]; + + if (bytes != 1) + return -1; + + if (in) + return vpic_read(vpic, pic, port, eax); + + return vpic_write(vpic, pic, port, eax); +} + +static uint32_t vpic_slave_io_read(__unused struct vm_io_handler *hdlr, + struct vm *vm, ioport_t addr, size_t width) +{ + uint32_t val = 0; + + if (vpic_slave_handler(vm, true, (int)addr, (int)width, &val) < 0) + pr_err("pic slave read port 0x%x width=%d failed\n", + addr, width); + return val; +} + +static void vpic_slave_io_write(__unused struct vm_io_handler *hdlr, + struct vm *vm, ioport_t addr, size_t width, uint32_t v) +{ + uint32_t val = v; + + if (vpic_slave_handler(vm, false, (int)addr, (int)width, &val) < 0) + pr_err("%s: write port 0x%x width=%d value 0x%x failed\n", + __func__, addr, width, val); +} + +static int vpic_elc_handler(struct vm *vm, bool in, int port, int bytes, + uint32_t *eax) +{ + struct vpic *vpic; + bool is_master; + + vpic = vm_pic(vm); + is_master = (port == IO_ELCR1); + + if (bytes != 1) + return -1; + + VPIC_LOCK(vpic); + + if (in) { + if (is_master) + *eax = vpic->pic[0].elc; + else + *eax = vpic->pic[1].elc; + } else { + /* + * For the master PIC the cascade channel (IRQ2), the + * heart beat timer (IRQ0), and the keyboard + * controller (IRQ1) cannot be programmed for level + * mode. + * + * For the slave PIC the real time clock (IRQ8) and + * the floating point error interrupt (IRQ13) cannot + * be programmed for level mode. + */ + if (is_master) + vpic->pic[0].elc = (*eax & 0xf8); + else + vpic->pic[1].elc = (*eax & 0xde); + } + + VPIC_UNLOCK(vpic); + + return 0; +} + +static uint32_t vpic_elc_io_read(__unused struct vm_io_handler *hdlr, + struct vm *vm, ioport_t addr, size_t width) +{ + uint32_t val = 0; + + if (vpic_elc_handler(vm, true, (int)addr, (int)width, &val) < 0) + pr_err("pic elc read port 0x%x width=%d failed", addr, width); + return val; +} + +static void vpic_elc_io_write(__unused struct vm_io_handler *hdlr, + struct vm *vm, ioport_t addr, size_t width, uint32_t v) +{ + uint32_t val = v; + + if (vpic_elc_handler(vm, false, (int)addr, (int)width, &val) < 0) + pr_err("%s: write port 0x%x width=%d value 0x%x failed\n", + __func__, addr, width, val); +} + +void vpic_register_io_handler(struct vm *vm) +{ + struct vm_io_range master_range = { + .flags = IO_ATTR_RW, + .base = 0x20, + .len = 2 + }; + struct vm_io_range slave_range = { + .flags = IO_ATTR_RW, + .base = 0xa0, + .len = 2 + }; + struct vm_io_range elcr_range = { + .flags = IO_ATTR_RW, + .base = 0x4d0, + .len = 2 + }; + + register_io_emulation_handler(vm, &master_range, + &vpic_master_io_read, &vpic_master_io_write); + register_io_emulation_handler(vm, &slave_range, + &vpic_slave_io_read, &vpic_slave_io_write); + register_io_emulation_handler(vm, &elcr_range, + &vpic_elc_io_read, &vpic_elc_io_write); +} + +void *vpic_init(struct vm *vm) +{ + struct vpic *vpic; + + vpic_register_io_handler(vm); + + vpic = malloc(sizeof(struct vpic)); + ASSERT(vpic != NULL, ""); + vpic->vm = vm; + vpic->pic[0].mask = 0xff; + vpic->pic[1].mask = 0xff; + + VPIC_LOCK_INIT(vpic); + + return vpic; +} + +void vpic_cleanup(struct vm *vm) +{ + if (vm->vpic) { + free(vm->vpic); + vm->vpic = NULL; + } +} diff --git a/hypervisor/arch/x86/idt.S b/hypervisor/arch/x86/idt.S new file mode 100644 index 000000000..7f9208b70 --- /dev/null +++ b/hypervisor/arch/x86/idt.S @@ -0,0 +1,441 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +.altmacro + +.global HOST_IDT +.global HOST_IDTR + +.section .data +.align 8 + .long 0 + .short 0 +HOST_IDTR: + .short HOST_IDT_SIZE - 1 + .quad HOST_IDT + +/* + * We'll rearrange and fix up the descriptors at runtime + */ +.macro interrupt_descriptor entry, dpl=0 ist=0 + .long HOST_GDT_RING0_CODE_SEL << 16 + .long 0x00008e00 + (dpl << 13) + ist + .quad entry +.endm + +.macro trap_descriptor entry, dpl=0, ist=0 + .long HOST_GDT_RING0_CODE_SEL << 16 + .long 0x00008f00 + (dpl <<13) + ist + .quad entry +.endm + + +.macro _external_interrupt_descriptor vector + __external_interrupt_descriptor %vector +.endm + + +.macro __external_interrupt_descriptor vector + interrupt_descriptor external_interrupt_\vector +.endm + +#define MACHINE_CHECK_IST (0x1) +#define DOUBLE_FAULT_IST (0x2) +#define STACK_FAULT_IST (0x3) + +/* + * We'll use interrupt gates. Change to trap or task only as needed. + */ +.section .rodata +.align 16 +HOST_IDT: +interrupt_descriptor excp_divide_error +interrupt_descriptor excp_debug, 3 +interrupt_descriptor excp_nmi +interrupt_descriptor excp_breakpoint, 3 +interrupt_descriptor excp_overflow, 3 +interrupt_descriptor excp_bounds_check +interrupt_descriptor excp_illegal_opcode +interrupt_descriptor excp_device_not_available +interrupt_descriptor excp_double_fault, 0, DOUBLE_FAULT_IST +interrupt_descriptor excp_rsvd_09 +interrupt_descriptor excp_invalid_tss +interrupt_descriptor excp_segment_not_present +interrupt_descriptor excp_stack_fault, 0, STACK_FAULT_IST +interrupt_descriptor excp_general_protection +interrupt_descriptor excp_page_fault +interrupt_descriptor excp_rsvd_0f +interrupt_descriptor excp_float_error +interrupt_descriptor excp_alignment_check +interrupt_descriptor expt_machine_check, 0, MACHINE_CHECK_IST +interrupt_descriptor excp_simd_fp_error +interrupt_descriptor excp_virtualization +interrupt_descriptor excp_rsvd_21 +interrupt_descriptor excp_rsvd_22 +interrupt_descriptor excp_rsvd_23 +interrupt_descriptor excp_rsvd_24 +interrupt_descriptor excp_rsvd_25 +interrupt_descriptor excp_rsvd_26 +interrupt_descriptor excp_rsvd_27 +interrupt_descriptor excp_rsvd_28 +interrupt_descriptor excp_rsvd_29 +interrupt_descriptor excp_rsvd_30 +interrupt_descriptor excp_rsvd_31 + +vector = 0x20 +.rept (0x100 - 0x20) + _external_interrupt_descriptor vector + vector = vector + 1 +.endr + +.section .text +.align 16 +excp_divide_error: + pushq $0x0 /* pseudo error code */ + pushq $0x00 + jmp excp_save_frame + +.align 8 +excp_debug: + pushq $0x0 /* pseudo error code */ + pushq $0x01 + jmp excp_save_frame + +.align 8 +excp_nmi: + + + + +.align 8 +excp_breakpoint: + pushq $0x0 /* pseudo error code */ + pushq $0x03 + jmp excp_save_frame + +.align 8 +excp_overflow: + pushq $0x0 /* pseudo error code */ + pushq $0x04 + jmp excp_save_frame + +.align 8 +excp_bounds_check: + pushq $0x0 /* pseudo error code */ + pushq $0x05 + jmp excp_save_frame + +.align 8 +excp_illegal_opcode: + pushq $0x0 /* pseudo error code */ + pushq $0x06 + jmp excp_save_frame + +.align 8 +excp_device_not_available: + pushq $0x0 /* pseudo error code */ + pushq $0x07 + jmp excp_save_frame + +.align 8 +excp_double_fault: + pushq $0x08 + jmp excp_save_frame + +.align 8 +excp_invalid_tss: + pushq $0x0A + jmp excp_save_frame + +.align 8 +excp_segment_not_present: + pushq $0x0B + jmp excp_save_frame + +.align 8 +excp_stack_fault: + pushq $0x0C + jmp excp_save_frame + +.align 8 +excp_general_protection: + pushq $0x0D + jmp excp_save_frame + +.align 8 +excp_page_fault: + pushq $0x0E + jmp excp_save_frame + +.align 8 +excp_float_error: + pushq $0x0 /* pseudo error code */ + pushq $0x10 + jmp excp_save_frame + +.align 8 +excp_alignment_check: + pushq $0x11 + jmp excp_save_frame + +.align 8 +expt_machine_check: + pushq $0x0 /* pseudo error code */ + pushq $0x12 + jmp excp_save_frame + +.align 8 +excp_simd_fp_error: + pushq $0x0 /* pseudo error code */ + pushq $0x13 + jmp excp_save_frame + +.align 8 +excp_virtualization: + pushq $0x0 /* pseudo error code */ + pushq $0x14 + jmp excp_save_frame + + + +/* + * Macros for rsvd vectors. Vectors 0x09, 0x0F, 0x15 through 0x1F + */ +.macro _rsvd_vector vector + __rsvd_vector %vector +.endm + +.macro __rsvd_vector vector +.align 8 +excp_rsvd_\vector\(): + pushq $0x0 /* pseudo error code */ + pushq $\vector + jmp excp_rsvd +.endm + +.align 8 +excp_rsvd_09: + _rsvd_vector 0x09 + +.align 8 +excp_rsvd_0f: + _rsvd_vector 0x0f + +vector = 0x15 +.rept (0x20 - 0x15) + _rsvd_vector vector + vector = vector + 1 +.endr + + + +/* + * Macros for external interrupts. Vectors$0x20 through$0xFF + */ +.macro _external_interrupt vector + __external_interrupt %vector +.endm + +.macro __external_interrupt vector +.align 8 +external_interrupt_\vector\(): + pushq $0x0 /* pseudo error code */ + pushq $\vector + jmp external_interrupt_save_frame +.endm + +vector =0x20 +.rept (0x100 - 0x20) + _external_interrupt vector + vector = vector + 1 +.endr + + + +/* + * Common entry point for defined exceptions + */ +.align 8 +excp_save_frame: + pushq %r11 + pushq %r10 + pushq %r9 + pushq %r8 + pushq %rdi + pushq %rsi + pushq %rdx + pushq %rcx + pushq %rax + pushq %rbp + pushq %rbx + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + + /* Put current stack pointer into 1st param register (rdi) */ + movq %rsp, %rdi + + call dispatch_exception + + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbx + popq %rbp + popq %rax + popq %rcx + popq %rdx + popq %rsi + popq %rdi + popq %r8 + popq %r9 + popq %r10 + popq %r11 + + /* Skip vector and error code*/ + add $16, %rsp + + iretq + + +/* + * Common entry point for reserved exceptions. + * These should never execute. + * We put a handler on them anyway to highlight the unexpected. + */ +.align 8 +excp_rsvd: + pushq %r11 + pushq %r10 + pushq %r9 + pushq %r8 + pushq %rdi + pushq %rsi + pushq %rdx + pushq %rcx + pushq %rax + + + pushq %rbp + pushq %rbx + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + + /* Put current stack pointer into 1st param register (rdi) */ + movq %rsp, %rdi + + call dispatch_exception + + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbx + popq %rbp + + popq %rax + popq %rcx + popq %rdx + popq %rsi + popq %rdi + popq %r8 + popq %r9 + popq %r10 + popq %r11 + + /* Skip vector and error code*/ + add $16, %rsp + + iretq + + +/* + * Common entry point for defined interrupts. + * Vectors 0x20 through 0xFF + */ +.align 8 +external_interrupt_save_frame: + pushq %r11 + pushq %r10 + pushq %r9 + pushq %r8 + pushq %rdi + pushq %rsi + pushq %rdx + pushq %rcx + pushq %rax + + + pushq %rbp + pushq %rbx + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + + /* Put current stack pointer into 1st param register (rdi) */ + movq %rsp, %rdi + + call dispatch_interrupt + + /* + * We disable softirq path from interrupt IRET, since right now all IRQ + * are for Guest, and we can execute softirq in hv_main() loop + */ + + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbx + popq %rbp + + popq %rax + popq %rcx + popq %rdx + popq %rsi + popq %rdi + popq %r8 + popq %r9 + popq %r10 + popq %r11 + + /* Skip vector and error code*/ + add $16, %rsp + + iretq + diff --git a/hypervisor/arch/x86/interrupt.c b/hypervisor/arch/x86/interrupt.c new file mode 100644 index 000000000..40ffd2839 --- /dev/null +++ b/hypervisor/arch/x86/interrupt.c @@ -0,0 +1,431 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#define EXCEPTION_ERROR_CODE_VALID 8 +#define INTERRPUT_QUEUE_BUFF_SIZE 255 + +#define ACRN_DBG_INTR 6 + +static const uint16_t exception_type[] = { + [0] = VMX_INT_TYPE_HW_EXP, + [1] = VMX_INT_TYPE_HW_EXP, + [2] = VMX_INT_TYPE_HW_EXP, + [3] = VMX_INT_TYPE_HW_EXP, + [4] = VMX_INT_TYPE_HW_EXP, + [5] = VMX_INT_TYPE_HW_EXP, + [6] = VMX_INT_TYPE_HW_EXP, + [7] = VMX_INT_TYPE_HW_EXP, + [8] = VMX_INT_TYPE_HW_EXP | EXCEPTION_ERROR_CODE_VALID, + [9] = VMX_INT_TYPE_HW_EXP, + [10] = VMX_INT_TYPE_HW_EXP | EXCEPTION_ERROR_CODE_VALID, + [11] = VMX_INT_TYPE_HW_EXP | EXCEPTION_ERROR_CODE_VALID, + [12] = VMX_INT_TYPE_HW_EXP | EXCEPTION_ERROR_CODE_VALID, + [13] = VMX_INT_TYPE_HW_EXP | EXCEPTION_ERROR_CODE_VALID, + [14] = VMX_INT_TYPE_HW_EXP | EXCEPTION_ERROR_CODE_VALID, + [15] = VMX_INT_TYPE_HW_EXP, + [16] = VMX_INT_TYPE_HW_EXP, + [17] = VMX_INT_TYPE_HW_EXP | EXCEPTION_ERROR_CODE_VALID, + [18] = VMX_INT_TYPE_HW_EXP, + [19] = VMX_INT_TYPE_HW_EXP, + [20] = VMX_INT_TYPE_HW_EXP, + [21] = VMX_INT_TYPE_HW_EXP, + [22] = VMX_INT_TYPE_HW_EXP, + [23] = VMX_INT_TYPE_HW_EXP, + [24] = VMX_INT_TYPE_HW_EXP, + [25] = VMX_INT_TYPE_HW_EXP, + [26] = VMX_INT_TYPE_HW_EXP, + [27] = VMX_INT_TYPE_HW_EXP, + [28] = VMX_INT_TYPE_HW_EXP, + [29] = VMX_INT_TYPE_HW_EXP, + [30] = VMX_INT_TYPE_HW_EXP, + [31] = VMX_INT_TYPE_HW_EXP +}; + +static int is_guest_irq_enabled(struct vcpu *vcpu) +{ + struct run_context *cur_context = + &vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context]; + uint32_t guest_rflags, guest_state; + int status = false; + + /* Read the RFLAGS of the guest */ + guest_rflags = cur_context->rflags; + /* Check the RFLAGS[IF] bit first */ + if (guest_rflags & HV_ARCH_VCPU_RFLAGS_IF) { + /* Interrupts are allowed */ + /* Check for temporarily disabled interrupts */ + guest_state = exec_vmread(VMX_GUEST_INTERRUPTIBILITY_INFO); + + if ((guest_state & (HV_ARCH_VCPU_BLOCKED_BY_STI | + HV_ARCH_VCPU_BLOCKED_BY_MOVSS)) == 0) { + status = true; + } + } + return status; +} + +static bool vcpu_pending_request(struct vcpu *vcpu) +{ + struct vlapic *vlapic; + int vector = 0; + int ret = 0; + + /* Query vLapic to get vector to inject */ + vlapic = vcpu->arch_vcpu.vlapic; + ret = vlapic_pending_intr(vlapic, &vector); + + /* we need to check and raise request if we have pending event + * in LAPIC IRR + */ + if (ret != 0) { + /* we have pending IRR */ + vcpu_make_request(vcpu, ACRN_REQUEST_EVENT); + } + + return vcpu->arch_vcpu.pending_intr != 0; +} + +int vcpu_make_request(struct vcpu *vcpu, int eventid) +{ + bitmap_set(eventid, &vcpu->arch_vcpu.pending_intr); + /* + * if current hostcpu is not the target vcpu's hostcpu, we need + * to invoke IPI to wake up target vcpu + * + * TODO: Here we just compare with cpuid, since cpuid currently is + * global under pCPU / vCPU 1:1 mapping. If later we enabled vcpu + * scheduling, we need change here to determine it target vcpu is + * VMX non-root or root mode + */ + if ((int)get_cpu_id() != vcpu->pcpu_id) + send_single_ipi(vcpu->pcpu_id, VECTOR_NOTIFY_VCPU); + + return 0; +} + +static int vcpu_do_pending_event(struct vcpu *vcpu) +{ + struct vlapic *vlapic = vcpu->arch_vcpu.vlapic; + int vector = 0; + int ret = 0; + + if (is_apicv_enabled()) { + apicv_inject_pir(vlapic); + return 0; + } + + /* Query vLapic to get vector to inject */ + ret = vlapic_pending_intr(vlapic, &vector); + + /* + * From the Intel SDM, Volume 3, 6.3.2 Section "Maskable + * Hardware Interrupts": + * - maskable interrupt vectors [16,255] can be delivered + * through the local APIC. + */ + if (ret == 0) + return -1; + + if (!(vector >= 16 && vector <= 255)) { + dev_dbg(ACRN_DBG_INTR, "invalid vector %d from local APIC", + vector); + return -1; + } + + exec_vmwrite(VMX_ENTRY_INT_INFO_FIELD, VMX_INT_INFO_VALID | + (vector & 0xFF)); + + vlapic_intr_accepted(vlapic, vector); + return 0; +} + +static int vcpu_do_pending_extint(struct vcpu *vcpu) +{ + struct vm *vm; + struct vcpu *primary; + int vector; + + vm = vcpu->vm; + + /* check if there is valid interrupt from vPIC, if yes just inject it */ + /* PIC only connect with primary CPU */ + primary = get_primary_vcpu(vm); + if (vm->vpic && vcpu == primary) { + + vpic_pending_intr(vcpu->vm, &vector); + if (vector > 0) { + dev_dbg(ACRN_DBG_INTR, "VPIC: to inject PIC vector %d\n", + vector & 0xFF); + exec_vmwrite(VMX_ENTRY_INT_INFO_FIELD, + VMX_INT_INFO_VALID | + (vector & 0xFF)); + vpic_intr_accepted(vcpu->vm, vector); + } + } + + return 0; +} + +static int vcpu_do_pending_gp(__unused struct vcpu *vcpu) +{ + /* GP vector = 13 */ + exec_vmwrite(VMX_ENTRY_INT_INFO_FIELD, + VMX_INT_INFO_VALID | 13); + return 0; +} + +/* please keep this for interrupt debug: + * 1. Timer alive or not + * 2. native LAPIC interrupt pending/EOI status + * 3. CPU stuck or not + */ +void dump_lapic(void) +{ + dev_dbg(ACRN_DBG_INTR, + "LAPIC: TIME %08x, init=0x%x cur=0x%x ISR=0x%x IRR=0x%x", + mmio_read_long(0xFEE00000 + LAPIC_LVT_TIMER_REGISTER), + mmio_read_long(0xFEE00000 + LAPIC_INITIAL_COUNT_REGISTER), + mmio_read_long(0xFEE00000 + LAPIC_CURRENT_COUNT_REGISTER), + mmio_read_long(0xFEE00000 + LAPIC_IN_SERVICE_REGISTER_7), + mmio_read_long(0xFEE00000 + LAPIC_INT_REQUEST_REGISTER_7)); +} + +int vcpu_inject_extint(struct vcpu *vcpu) +{ + return vcpu_make_request(vcpu, ACRN_REQUEST_EXTINT); +} + +int vcpu_inject_nmi(struct vcpu *vcpu) +{ + return vcpu_make_request(vcpu, ACRN_REQUEST_NMI); +} + +int vcpu_inject_gp(struct vcpu *vcpu) +{ + return vcpu_make_request(vcpu, ACRN_REQUEST_GP); +} + +int interrupt_win_exiting_handler(struct vcpu *vcpu) +{ + int value32; + + TRACE_2L(TRC_VMEXIT_INTERRUPT_WINDOW, 0, 0); + + if (!vcpu) + return -1; + + if (vcpu_pending_request(vcpu)) { + /* Do nothing + * acrn_do_intr_process will continue for this vcpu + */ + } else { + /* No interrupts to inject. + * Disable the interrupt window exiting + */ + vcpu->arch_vcpu.irq_window_enabled = 0; + value32 = exec_vmread(VMX_PROC_VM_EXEC_CONTROLS); + value32 &= ~(VMX_PROCBASED_CTLS_IRQ_WIN); + exec_vmwrite(VMX_PROC_VM_EXEC_CONTROLS, value32); + } + + VCPU_RETAIN_RIP(vcpu); + return 0; +} + +int external_interrupt_handler(struct vcpu *vcpu) +{ + int vector = exec_vmread(VMX_EXIT_INT_INFO) & 0xFF; + struct intr_ctx ctx; + + ctx.vector = vector; + /* do not RETAIN RIP for spurious interrupt */ + if (dispatch_interrupt(&ctx) == 0) + VCPU_RETAIN_RIP(vcpu); + + TRACE_2L(TRC_VMEXIT_EXTERNAL_INTERRUPT, vector, 0); + + return 0; +} + +int acrn_do_intr_process(struct vcpu *vcpu) +{ + int ret = 0; + int vector; + int tmp; + bool intr_pending = false; + uint64_t *pending_intr_bits = &vcpu->arch_vcpu.pending_intr; + + if (bitmap_test_and_clear(ACRN_REQUEST_TLB_FLUSH, pending_intr_bits)) + mmu_invept(vcpu); + + if (bitmap_test_and_clear(ACRN_REQUEST_TMR_UPDATE, pending_intr_bits)) + vioapic_update_tmr(vcpu); + + /* handling pending vector injection: + * there are many reason inject failed, we need re-inject again + */ + if (vcpu->arch_vcpu.exit_interrupt_info & VMX_INT_INFO_VALID) { + exec_vmwrite(VMX_ENTRY_INT_INFO_FIELD, + vcpu->arch_vcpu.exit_interrupt_info); + goto INTR_WIN; + } + + /* handling exception request */ + vector = vcpu->arch_vcpu.exception_info.exception; + + /* If there is a valid exception, inject exception to guest */ + if (vector >= 0) { + if (exception_type[vector] & + EXCEPTION_ERROR_CODE_VALID) { + exec_vmwrite(VMX_ENTRY_EXCEPTION_EC, + vcpu->arch_vcpu.exception_info.error); + } + + exec_vmwrite(VMX_ENTRY_INT_INFO_FIELD, + VMX_INT_INFO_VALID | + ((exception_type[vector] & 15) << 8) + | (vector & 0xFF)); + + vcpu->arch_vcpu.exception_info.exception = -1; + + goto INTR_WIN; + } + + /* Do pending interrupts process */ + /* TODO: checkin NMI intr windows before inject */ + if (bitmap_test_and_clear(ACRN_REQUEST_NMI, pending_intr_bits)) { + /* Inject NMI vector = 2 */ + exec_vmwrite(VMX_ENTRY_INT_INFO_FIELD, + VMX_INT_INFO_VALID | (VMX_INT_TYPE_NMI << 8) | 2); + + /* Intel SDM 10.8.1 + * NMI, SMI, INIT, ExtINT, or SIPI directly deliver to CPU + * do not need EOI to LAPIC + * However, ExtINT need EOI to PIC + */ + goto INTR_WIN; + } + + /* Guest interruptable or not */ + if (!is_guest_irq_enabled(vcpu)) { + /* interrupt window unavailable */ + goto INTR_WIN; + } + + /* Inject external interrupt first */ + if (bitmap_test_and_clear(ACRN_REQUEST_EXTINT, pending_intr_bits)) { + /* has pending external interrupts */ + ret = vcpu_do_pending_extint(vcpu); + goto INTR_WIN; + } + + /* Inject vLAPIC vectors */ + if (bitmap_test_and_clear(ACRN_REQUEST_EVENT, pending_intr_bits)) { + /* has pending vLAPIC interrupts */ + ret = vcpu_do_pending_event(vcpu); + goto INTR_WIN; + } + + /* Inject GP event */ + if (bitmap_test_and_clear(ACRN_REQUEST_GP, pending_intr_bits)) { + /* has pending GP interrupts */ + ret = vcpu_do_pending_gp(vcpu); + goto INTR_WIN; + } + +INTR_WIN: + /* check if we have new interrupt pending for next VMExit */ + intr_pending = vcpu_pending_request(vcpu); + + /* Enable interrupt window exiting if pending */ + if (intr_pending && vcpu->arch_vcpu.irq_window_enabled == 0) { + vcpu->arch_vcpu.irq_window_enabled = 1; + tmp = exec_vmread(VMX_PROC_VM_EXEC_CONTROLS); + tmp |= (VMX_PROCBASED_CTLS_IRQ_WIN); + exec_vmwrite(VMX_PROC_VM_EXEC_CONTROLS, tmp); + } + + return ret; +} + +int exception_handler(struct vcpu *vcpu) +{ + uint32_t intinfo, int_err_code; + uint32_t exception_vector; + uint32_t cpl; + int status = 0; + + if (vcpu == NULL) { + TRACE_4I(TRC_VMEXIT_EXCEPTION_OR_NMI, 0, 0, 0, 0); + status = -EINVAL; + } + + if (status != 0) + return status; + + pr_dbg(" Handling guest exception"); + + /* Obtain VM-Exit information field pg 2912 */ + intinfo = exec_vmread(VMX_EXIT_INT_INFO); + exception_vector = intinfo & 0xFF; + /* Check if exception caused by the guest is a HW exception. If the + * exit occurred due to a HW exception obtain the error code to be + * conveyed to get via the stack + */ + if (intinfo & VMX_INT_INFO_ERR_CODE_VALID) { + int_err_code = exec_vmread(VMX_EXIT_INT_EC); + + /* get current privilege level and fault address */ + cpl = exec_vmread(VMX_GUEST_CS_ATTR); + cpl = (cpl >> 5) & 3; + + if (cpl < 3) + int_err_code &= ~4; + else + int_err_code |= 4; + } else { + int_err_code = 0; + } + + /* Handle all other exceptions */ + VCPU_RETAIN_RIP(vcpu); + vcpu->arch_vcpu.exception_info.exception = exception_vector; + vcpu->arch_vcpu.exception_info.error = int_err_code; + + TRACE_4I(TRC_VMEXIT_EXCEPTION_OR_NMI, + exception_vector, int_err_code, 2, 0); + + return status; +} diff --git a/hypervisor/arch/x86/intr_lapic.c b/hypervisor/arch/x86/intr_lapic.c new file mode 100644 index 000000000..65e1ee526 --- /dev/null +++ b/hypervisor/arch/x86/intr_lapic.c @@ -0,0 +1,418 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +/* Rate range 1 to 1000 or 1uSec to 1mSec */ +#define APIC_TIMER_MAX 0xffffffff +#define HYPE_PERIOD_MAX 1000 +#define APIC_DIVIDE_BY_ONE 0x0b +#define PIT_TARGET 0x3FFF + +/* xAPIC/x2APIC Interrupt Command Register (ICR) structure */ +union apic_icr { + uint64_t value; + struct { + uint32_t lo_32; + uint32_t hi_32; + } value_32; + struct { + uint64_t vector:8; + uint64_t delivery_mode:3; + uint64_t destination_mode:1; + uint64_t delivery_status:1; + uint64_t rsvd_1:1; + uint64_t level:1; + uint64_t trigger_mode:1; + uint64_t rsvd_2:2; + uint64_t shorthand:2; + uint64_t rsvd_3:12; + uint64_t rsvd_4:32; + } bits; + struct { + uint64_t rsvd_1:32; + uint64_t rsvd_2:24; + uint64_t dest_field:8; + } x_bits; + struct { + uint64_t rsvd_1:32; + uint64_t dest_field:32; + } x2_bits; +}; + +/* xAPIC/x2APIC Interrupt Command Register (ICR) structure */ +union apic_lvt { + uint32_t value; + union { + struct { + uint32_t vector:8; + uint32_t rsvd_1:4; + uint32_t delivery_status:1; + uint32_t rsvd_2:3; + uint32_t mask:1; + uint32_t mode:2; + uint32_t rsvd_3:13; + } timer; + struct { + uint32_t vector:8; + uint32_t delivery_mode:3; + uint32_t rsvd_1:1; + uint32_t delivery_status:1; + uint32_t rsvd_2:3; + uint32_t mask:1; + uint32_t rsvd_3:15; + } cmci; + struct { + uint32_t vector:8; + uint32_t delivery_mode:3; + uint32_t rsvd_1:1; + uint32_t delivery_status:1; + uint32_t polarity:1; + uint32_t remote_irr:1; + uint32_t trigger_mode:1; + uint32_t mask:1; + uint32_t rsvd_2:15; + } lint; + struct { + uint32_t vector:8; + uint32_t rsvd_1:4; + uint32_t delivery_status:1; + uint32_t rsvd_2:3; + uint32_t mask:1; + uint32_t rsvd_3:15; + } error; + struct { + uint32_t vector:8; + uint32_t delivery_mode:3; + uint32_t rsvd_1:1; + uint32_t delivery_status:1; + uint32_t rsvd_2:3; + uint32_t mask:1; + uint32_t rsvd_3:15; + } pmc; + struct { + uint32_t vector:8; + uint32_t delivery_mode:3; + uint32_t rsvd_1:1; + uint32_t delivery_status:1; + uint32_t rsvd_2:3; + uint32_t mask:1; + uint32_t rsvd_3:15; + } thermal; + struct { + uint32_t vector:8; + uint32_t rsvd_1:4; + uint32_t delivery_status:1; + uint32_t rsvd_2:3; + uint32_t mask:1; + uint32_t rsvd_3:15; + } common; + } bits; +}; + +union lapic_base_msr { + uint64_t value; + struct { + uint64_t rsvd_1:8; + uint64_t bsp:1; + uint64_t rsvd_2:1; + uint64_t x2APIC_enable:1; + uint64_t xAPIC_enable:1; + uint64_t lapic_paddr:24; + uint64_t rsvd_3:28; + } fields; +}; + +struct lapic_info { + int init_status; + struct { + paddr_t paddr; + vaddr_t vaddr; + } xapic; + +}; + +static struct lapic_info lapic_info; + +static uint32_t read_lapic_reg32(uint32_t offset) +{ + ASSERT((offset >= 0x020) && (offset <= 0x3FF), ""); + return mmio_read_long(lapic_info.xapic.vaddr + offset); +} + +static void write_lapic_reg32(uint32_t offset, uint32_t value) +{ + ASSERT((offset >= 0x020) && (offset <= 0x3FF), ""); + mmio_write_long(value, lapic_info.xapic.vaddr + offset); +} + +static void clear_lapic_isr(void) +{ + uint64_t isr_reg = LAPIC_IN_SERVICE_REGISTER_0; + + /* This is a Intel recommended procedure and assures that the processor + * does not get hung up due to already set "in-service" interrupts left + * over from the boot loader environment. This actually occurs in real + * life, therefore we will ensure all the in-service bits are clear. + */ + do { + if (read_lapic_reg32(isr_reg)) { + write_lapic_reg32(LAPIC_EOI_REGISTER, 0); + continue; + } + isr_reg += 0x10; + } while (isr_reg <= LAPIC_IN_SERVICE_REGISTER_7); +} + +static void map_lapic(void) +{ + /* At some point we may need to translate this paddr to a vaddr. 1:1 + * mapping for now. + */ + lapic_info.xapic.vaddr = lapic_info.xapic.paddr; +} + +int early_init_lapic(void) +{ + union lapic_base_msr lapic_base_msr; + + /* Get local APIC base address */ + lapic_base_msr.value = msr_read(MSR_IA32_APIC_BASE); + + /* Initialize globals only 1 time */ + if (lapic_info.init_status == false) { + /* Get Local APIC physical address. */ + lapic_info.xapic.paddr = LAPIC_BASE; + + /* Map in the local xAPIC */ + map_lapic(); + + lapic_info.init_status = true; + } + + /* Check if xAPIC mode enabled */ + if (lapic_base_msr.fields.xAPIC_enable == 0) { + /* Ensure in xAPIC mode */ + lapic_base_msr.fields.xAPIC_enable = 1; + lapic_base_msr.fields.x2APIC_enable = 0; + msr_write(MSR_IA32_APIC_BASE, lapic_base_msr.value); + } else { + /* Check if x2apic is disabled */ + ASSERT(lapic_base_msr.fields.x2APIC_enable == 0, + "Disable X2APIC in BIOS"); + } + + return 0; +} + +int init_lapic(uint32_t cpu_id) +{ + /* Set the Logical Destination Register */ + write_lapic_reg32(LAPIC_LOGICAL_DESTINATION_REGISTER, + (1 << cpu_id) << 24); + + /* Set the Destination Format Register */ + write_lapic_reg32(LAPIC_DESTINATION_FORMAT_REGISTER, 0xf << 28); + + /* Mask all LAPIC LVT entries before enabling the local APIC */ + write_lapic_reg32(LAPIC_LVT_CMCI_REGISTER, LAPIC_LVT_MASK); + write_lapic_reg32(LAPIC_LVT_TIMER_REGISTER, LAPIC_LVT_MASK); + write_lapic_reg32(LAPIC_LVT_THERMAL_SENSOR_REGISTER, LAPIC_LVT_MASK); + write_lapic_reg32(LAPIC_LVT_PMC_REGISTER, LAPIC_LVT_MASK); + write_lapic_reg32(LAPIC_LVT_LINT0_REGISTER, LAPIC_LVT_MASK); + write_lapic_reg32(LAPIC_LVT_LINT1_REGISTER, LAPIC_LVT_MASK); + write_lapic_reg32(LAPIC_LVT_ERROR_REGISTER, LAPIC_LVT_MASK); + + /* Enable Local APIC */ + /* TODO: add spurious-interrupt handler */ + write_lapic_reg32(LAPIC_SPURIOUS_VECTOR_REGISTER, + LAPIC_SVR_APIC_ENABLE_MASK | LAPIC_SVR_VECTOR); + + /* Ensure there are no ISR bits set. */ + clear_lapic_isr(); + + return 0; +} + +int send_lapic_eoi(void) +{ + write_lapic_reg32(LAPIC_EOI_REGISTER, 0); + return 0; +} + +static void wait_for_delivery(void) +{ + union apic_icr tmp; + + do { + tmp.value_32.lo_32 = + read_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_0); + } while (tmp.bits.delivery_status); +} + +uint32_t get_cur_lapic_id(void) +{ + uint32_t lapic_id; + + lapic_id = read_lapic_reg32(LAPIC_ID_REGISTER); + lapic_id = (lapic_id >> 24); + + return lapic_id; +} + +int +send_startup_ipi(enum intr_cpu_startup_shorthand cpu_startup_shorthand, + uint32_t cpu_startup_dest, paddr_t cpu_startup_start_address) +{ + union apic_icr icr; + uint8_t shorthand; + int status = 0; + uint32_t eax, ebx, ecx, edx; + uint32_t family; + + if (cpu_startup_shorthand >= INTR_CPU_STARTUP_UNKNOWN) + status = -EINVAL; + + ASSERT(status == 0, "Incorrect arguments"); + + icr.value = 0; + icr.bits.destination_mode = INTR_LAPIC_ICR_PHYSICAL; + + if (cpu_startup_shorthand == INTR_CPU_STARTUP_USE_DEST) { + shorthand = INTR_LAPIC_ICR_USE_DEST_ARRAY; + icr.x_bits.dest_field = per_cpu(lapic_id, cpu_startup_dest); + } else { /* Use destination shorthand */ + shorthand = INTR_LAPIC_ICR_ALL_EX_SELF; + icr.value_32.hi_32 = 0; + } + + /* + * family calculation from SDM Vol. 2A + * CPUID with INPUT EAX=01h:Returns Model, Family, Stepping Information + */ + cpuid(CPUID_FEATURES, &eax, &ebx, &ecx, &edx); + family = (eax >> 8) & 0xff; + if (family == 0xF) + family += (eax >> 20) & 0xff; + + /* Assert INIT IPI */ + write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_1, icr.value_32.hi_32); + icr.bits.shorthand = shorthand; + icr.bits.delivery_mode = INTR_LAPIC_ICR_INIT; + icr.bits.level = INTR_LAPIC_ICR_ASSERT; + icr.bits.trigger_mode = INTR_LAPIC_ICR_LEVEL; + write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_0, icr.value_32.lo_32); + wait_for_delivery(); + + /* Give 10ms for INIT sequence to complete for old processors. + * Modern processors (family == 6) don't need to wait here. + */ + if (family != 6) + mdelay(10); + + /* De-assert INIT IPI */ + write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_1, icr.value_32.hi_32); + icr.bits.level = INTR_LAPIC_ICR_DEASSERT; + write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_0, icr.value_32.lo_32); + wait_for_delivery(); + + /* Send Start IPI with page number of secondary reset code */ + write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_1, icr.value_32.hi_32); + icr.value_32.lo_32 = 0; + icr.bits.shorthand = shorthand; + icr.bits.delivery_mode = INTR_LAPIC_ICR_STARTUP; + icr.bits.vector = ((paddr_t) cpu_startup_start_address) >> 12; + write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_0, icr.value_32.lo_32); + wait_for_delivery(); + + if (family == 6) /* 10us is enough for Modern processors */ + udelay(10); + else /* 200us for old processors */ + udelay(200); + + /* Send another start IPI as per the Intel Arch specification */ + write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_1, icr.value_32.hi_32); + write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_0, icr.value_32.lo_32); + wait_for_delivery(); + + return status; +} + +void send_single_ipi(uint32_t pcpu_id, uint32_t vector) +{ + uint32_t dest_lapic_id, hi_32, lo_32; + + /* Get the lapic ID of the destination processor. */ + dest_lapic_id = per_cpu(lapic_id, pcpu_id); + + /* Set the target processor. */ + hi_32 = dest_lapic_id << 24; + + /* Set the vector ID. */ + lo_32 = vector; + + /* Set the destination field to the target processor. */ + write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_1, hi_32); + + /* Write the vector ID to ICR. */ + write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_0, lo_32); + + wait_for_delivery(); +} + +int send_shorthand_ipi(uint8_t vector, + enum intr_lapic_icr_shorthand shorthand, + enum intr_lapic_icr_delivery_mode delivery_mode) +{ + union apic_icr icr; + int status = 0; + + if ((shorthand < INTR_LAPIC_ICR_SELF) + || (shorthand > INTR_LAPIC_ICR_ALL_EX_SELF) + || (delivery_mode > INTR_LAPIC_ICR_NMI)) + status = -EINVAL; + + ASSERT(status == 0, "Incorrect arguments"); + + icr.value = 0; + icr.bits.shorthand = shorthand; + icr.bits.delivery_mode = delivery_mode; + icr.bits.vector = vector; + write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_1, icr.value_32.hi_32); + write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_0, icr.value_32.lo_32); + wait_for_delivery(); + + return status; +} diff --git a/hypervisor/arch/x86/intr_main.c b/hypervisor/arch/x86/intr_main.c new file mode 100644 index 000000000..7c824126a --- /dev/null +++ b/hypervisor/arch/x86/intr_main.c @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +int interrupt_init(uint32_t cpu_id) +{ + struct host_idt_descriptor *idtd = &HOST_IDTR; + int status; + + set_idt(idtd); + + status = init_lapic(cpu_id); + ASSERT(status == 0, "lapic init failed"); + if (status != 0) + return -ENODEV; + + status = init_default_irqs(cpu_id); + ASSERT(status == 0, "irqs init failed"); + if (status != 0) + return -ENODEV; + + CPU_IRQ_ENABLE(); + + return status; +} diff --git a/hypervisor/arch/x86/io.c b/hypervisor/arch/x86/io.c new file mode 100644 index 000000000..05a3e6e23 --- /dev/null +++ b/hypervisor/arch/x86/io.c @@ -0,0 +1,292 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +int dm_emulate_pio_post(struct vcpu *vcpu) +{ + int cur = vcpu->vcpu_id; + int cur_context = vcpu->arch_vcpu.cur_context; + struct vhm_request_buffer *req_buf = + (void *)HPA2HVA(vcpu->vm->sw.req_buf); + uint32_t mask = + 0xFFFFFFFFul >> (32 - 8 * vcpu->req.reqs.pio_request.size); + uint64_t *rax; + + ASSERT(cur_context == 0, "pio emulation only happen in normal wrold"); + + rax = &vcpu->arch_vcpu.contexts[cur_context].guest_cpu_regs.regs.rax; + vcpu->req.reqs.pio_request.value = + req_buf->req_queue[cur].reqs.pio_request.value; + + /* VHM emulation data already copy to req, mark to free slot now */ + req_buf->req_queue[cur].valid = false; + + if (req_buf->req_queue[cur].processed != REQ_STATE_SUCCESS) + return -1; + + if (vcpu->req.reqs.pio_request.direction == REQUEST_READ) + *rax = ((*rax) & ~mask) | + (vcpu->req.reqs.pio_request.value & mask); + + return 0; +} + +static void dm_emulate_pio_pre(struct vcpu *vcpu, uint64_t exit_qual, + uint32_t sz, uint64_t req_value) +{ + vcpu->req.type = REQ_PORTIO; + if (VM_EXIT_IO_INSTRUCTION_ACCESS_DIRECTION(exit_qual)) + vcpu->req.reqs.pio_request.direction = REQUEST_READ; + else + vcpu->req.reqs.pio_request.direction = REQUEST_WRITE; + + vcpu->req.reqs.pio_request.address = + VM_EXIT_IO_INSTRUCTION_PORT_NUMBER(exit_qual); + vcpu->req.reqs.pio_request.size = sz; + vcpu->req.reqs.pio_request.value = req_value; +} + +int io_instr_handler(struct vcpu *vcpu) +{ + uint32_t sz; + uint32_t mask; + uint32_t port; + int8_t direction; + struct vm_io_handler *handler; + uint64_t exit_qual; + struct vm *vm = vcpu->vm; + int cur_context_idx = vcpu->arch_vcpu.cur_context; + struct run_context *cur_context; + int status = -EINVAL; + + ASSERT(cur_context_idx == 0, + "pio emulation only happen in normal wrold"); + + cur_context = &vcpu->arch_vcpu.contexts[cur_context_idx]; + exit_qual = vcpu->arch_vcpu.exit_qualification; + + sz = VM_EXIT_IO_INSTRUCTION_SIZE(exit_qual) + 1; + port = VM_EXIT_IO_INSTRUCTION_PORT_NUMBER(exit_qual); + direction = VM_EXIT_IO_INSTRUCTION_ACCESS_DIRECTION(exit_qual); + mask = 0xfffffffful >> (32 - 8 * sz); + + memset(&vcpu->req, 0, sizeof(struct vhm_request)); + + TRACE_4I(TRC_VMEXIT_IO_INSTRUCTION, port, direction, sz, + cur_context_idx); + + for (handler = vm->arch_vm.io_handler; + handler; handler = handler->next) { + + if ((port >= handler->desc.addr + handler->desc.len) || + (port + sz <= handler->desc.addr)) + continue; + + /* Dom0 do not require IO emulation */ + if (is_vm0(vm)) + status = 0; + + if (direction == 0) { + if (handler->desc.io_write == NULL) + continue; + + handler->desc.io_write(handler, vm, port, sz, + cur_context->guest_cpu_regs.regs.rax); + + pr_dbg("IO write on port %04x, data %08x", port, + cur_context->guest_cpu_regs.regs.rax & mask); + + status = 0; + break; + } else if (handler->desc.io_read) { + uint32_t data = handler->desc.io_read(handler, vm, + port, sz); + + cur_context->guest_cpu_regs.regs.rax &= ~mask; + cur_context->guest_cpu_regs.regs.rax |= data & mask; + + pr_dbg("IO read on port %04x, data %08x", port, data); + + status = 0; + break; + } + } + + /* Go for VHM */ + if (status != 0) { + uint64_t *rax = &cur_context->guest_cpu_regs.regs.rax; + + dm_emulate_pio_pre(vcpu, exit_qual, sz, *rax); + status = acrn_insert_request_wait(vcpu, &vcpu->req); + } + + if (status != 0) { + pr_fatal("IO %s access to port 0x%04x, size=%u", + direction ? "read" : "write", port, sz); + + } + + /* Catch any problems */ + ASSERT(status == 0, "Invalid IO access"); + + return status; +} + +static void register_io_handler(struct vm *vm, struct vm_io_handler *hdlr) +{ + if (vm->arch_vm.io_handler) + hdlr->next = vm->arch_vm.io_handler; + + vm->arch_vm.io_handler = hdlr; +} + +static void empty_io_handler_list(struct vm *vm) +{ + struct vm_io_handler *handler = vm->arch_vm.io_handler; + struct vm_io_handler *tmp; + + while (handler) { + tmp = handler; + handler = tmp->next; + free(tmp); + } + vm->arch_vm.io_handler = NULL; +} + +void free_io_emulation_resource(struct vm *vm) +{ + empty_io_handler_list(vm); + + /* Free I/O emulation bitmaps */ + free(vm->arch_vm.iobitmap[0]); + free(vm->arch_vm.iobitmap[1]); +} + +static void deny_guest_io_access(struct vm *vm, uint32_t address, uint32_t nbytes) +{ + uint32_t *b; + uint32_t i; + uint32_t a; + + for (i = 0; i < nbytes; i++) { + b = vm->arch_vm.iobitmap[0]; + if (address & 0x8000) + b = vm->arch_vm.iobitmap[1]; + a = address & 0x7fff; + b[a >> 5] |= (1 << (a & 0x1f)); + address++; + } +} + +static uint32_t +default_io_read(__unused struct vm_io_handler *hdlr, __unused struct vm *vm, + ioport_t address, size_t width) +{ + uint32_t v = io_read(address, width); + return v; +} + +static void default_io_write(__unused struct vm_io_handler *hdlr, + __unused struct vm *vm, ioport_t addr, + size_t width, uint32_t v) +{ + io_write(v, addr, width); +} + +static struct vm_io_handler *create_io_handler(uint32_t port, uint32_t len, + io_read_fn_t io_read_fn_ptr, + io_write_fn_t io_write_fn_ptr) +{ + + struct vm_io_handler *handler; + + handler = calloc(1, sizeof(struct vm_io_handler)); + + if (handler != NULL) { + handler->desc.addr = port; + handler->desc.len = len; + handler->desc.io_read = io_read_fn_ptr; + handler->desc.io_write = io_write_fn_ptr; + } else { + pr_err("Error: out of memory"); + } + + return handler; +} + +void setup_io_bitmap(struct vm *vm) +{ + /* Allocate VM architecture state and IO bitmaps A and B */ + vm->arch_vm.iobitmap[0] = alloc_page(); + vm->arch_vm.iobitmap[1] = alloc_page(); + + ASSERT(vm->arch_vm.iobitmap[0] && vm->arch_vm.iobitmap[1], ""); + + if (is_vm0(vm)) { + memset(vm->arch_vm.iobitmap[0], 0x00, CPU_PAGE_SIZE); + memset(vm->arch_vm.iobitmap[1], 0x00, CPU_PAGE_SIZE); + } else { + /* block all IO port access from Guest */ + memset(vm->arch_vm.iobitmap[0], 0xFF, CPU_PAGE_SIZE); + memset(vm->arch_vm.iobitmap[1], 0xFF, CPU_PAGE_SIZE); + } +} + +void register_io_emulation_handler(struct vm *vm, struct vm_io_range *range, + io_read_fn_t io_read_fn_ptr, + io_write_fn_t io_write_fn_ptr) +{ + struct vm_io_handler *handler = NULL; + io_read_fn_t io_read_fn = &default_io_read; + io_write_fn_t io_write_fn = &default_io_write; + + if (range->flags == IO_ATTR_RW && io_read_fn_ptr && io_write_fn_ptr) { + io_read_fn = io_read_fn_ptr; + io_write_fn = io_write_fn_ptr; + } else if (range->flags == IO_ATTR_R) { + if (io_read_fn_ptr) + io_read_fn = io_read_fn_ptr; + io_write_fn = NULL; + } + + if (is_vm0(vm)) + deny_guest_io_access(vm, range->base, range->len); + + handler = create_io_handler(range->base, + range->len, io_read_fn, io_write_fn); + + register_io_handler(vm, handler); +} diff --git a/hypervisor/arch/x86/ioapic.c b/hypervisor/arch/x86/ioapic.c new file mode 100644 index 000000000..0561b73b7 --- /dev/null +++ b/hypervisor/arch/x86/ioapic.c @@ -0,0 +1,439 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +/* Register offsets */ +#define IOAPIC_REGSEL_OFFSET 0 +#define IOAPIC_WINSWL_OFFSET 0x10 + +/* IOAPIC Redirection Table (RTE) Entry structure */ +struct ioapic_rte { + uint32_t lo_32; + uint32_t hi_32; +} ioapic_rte; + +struct gsi_table { + uint8_t ioapic_id; + uint8_t pin; + uint64_t addr; +}; +static struct gsi_table gsi_table[NR_MAX_GSI]; +static int nr_gsi; +static spinlock_t ioapic_lock; + +/* + * the irq to ioapic pin mapping should extract from ACPI MADT table + * hardcoded here + */ +uint16_t legacy_irq_to_pin[NR_LEGACY_IRQ] = { + 2, /* IRQ0*/ + 1, /* IRQ1*/ + 0, /* IRQ2 connected to Pin0 (ExtInt source of PIC) if existing */ + 3, /* IRQ3*/ + 4, /* IRQ4*/ + 5, /* IRQ5*/ + 6, /* IRQ6*/ + 7, /* IRQ7*/ + 8, /* IRQ8*/ + 9 | IOAPIC_RTE_TRGRLVL, /* IRQ9*/ + 10, /* IRQ10*/ + 11, /* IRQ11*/ + 12, /* IRQ12*/ + 13, /* IRQ13*/ + 14, /* IRQ14*/ + 15, /* IRQ15*/ +}; + +static uint64_t map_ioapic( + uint64_t ioapic_paddr) +{ + /* At some point we may need to translate this paddr to a vaddr. + * 1:1 mapping for now. + */ + return (vaddr_t) ioapic_paddr; +} + +static inline uint32_t +ioapic_read_reg32(const uint64_t ioapic_base, const uint8_t offset) +{ + uint32_t v; + + spinlock_rflags; + + spinlock_irqsave_obtain(&ioapic_lock); + + /* Write IOREGSEL */ + *(uint32_t *)(ioapic_base) = offset; + /* Read IOWIN */ + v = *(uint32_t *)(ioapic_base + IOAPIC_WINSWL_OFFSET); + + spinlock_irqrestore_release(&ioapic_lock); + return v; +} + +static inline void +ioapic_write_reg32(const uint64_t ioapic_base, + const uint8_t offset, const uint32_t value) +{ + spinlock_rflags; + + spinlock_irqsave_obtain(&ioapic_lock); + + /* Write IOREGSEL */ + *(uint32_t *)(ioapic_base) = offset; + /* Write IOWIN */ + *(uint32_t *)(ioapic_base + IOAPIC_WINSWL_OFFSET) = value; + + spinlock_irqrestore_release(&ioapic_lock); +} + +static inline uint64_t +get_ioapic_base(int apic_id) +{ + uint64_t addr = -1UL; + + /* should extract next ioapic from ACPI MADT table */ + if (apic_id == 0) + addr = DEFAULT_IO_APIC_BASE; + else if (apic_id == 1) + addr = 0xfec3f000; + else if (apic_id == 2) + addr = 0xfec7f000; + else + ASSERT(apic_id <= 2, "ACPI MADT table missing"); + return addr; +} + + +static inline void +ioapic_get_rte_entry(uint64_t ioapic_addr, + int pin, struct ioapic_rte *rte) +{ + rte->lo_32 = ioapic_read_reg32(ioapic_addr, pin*2 + 0x10); + rte->hi_32 = ioapic_read_reg32(ioapic_addr, pin*2 + 0x11); +} + +static inline void +ioapic_set_rte_entry(uint64_t ioapic_addr, + int pin, struct ioapic_rte *rte) +{ + ioapic_write_reg32(ioapic_addr, pin*2 + 0x10, rte->lo_32); + ioapic_write_reg32(ioapic_addr, pin*2 + 0x11, rte->hi_32); +} + +static inline struct ioapic_rte +create_rte_for_legacy_irq(int irq, int vr) +{ + struct ioapic_rte rte = {0, 0}; + + /* Legacy IRQ 0-15 setup, default masked + * are actually defined in either MPTable or ACPI MADT table + * before we have ACPI table parsing in HV we use common hardcode + */ + + rte.lo_32 |= IOAPIC_RTE_INTMSET; + rte.lo_32 |= (legacy_irq_to_pin[irq] & IOAPIC_RTE_TRGRLVL); + rte.lo_32 |= DEFAULT_DEST_MODE; + rte.lo_32 |= DEFAULT_DELIVERY_MODE; + rte.lo_32 |= (IOAPIC_RTE_INTVEC & vr); + + /* FIXME: Fixed to active Low? */ + rte.lo_32 |= IOAPIC_RTE_INTALO; + + /* Dest field: legacy irq fixed to CPU0 */ + rte.hi_32 |= 1 << 24; + + return rte; +} + +static inline struct ioapic_rte +create_rte_for_gsi_irq(int irq, int vr) +{ + struct ioapic_rte rte = {0, 0}; + + if (irq < NR_LEGACY_IRQ) + return create_rte_for_legacy_irq(irq, vr); + + /* irq default masked, level trig */ + rte.lo_32 |= IOAPIC_RTE_INTMSET; + rte.lo_32 |= IOAPIC_RTE_TRGRLVL; + rte.lo_32 |= DEFAULT_DEST_MODE; + rte.lo_32 |= DEFAULT_DELIVERY_MODE; + rte.lo_32 |= (IOAPIC_RTE_INTVEC & vr); + + /* FIXME: Fixed to active Low? */ + rte.lo_32 |= IOAPIC_RTE_INTALO; + + /* Dest field */ + rte.hi_32 |= ALL_CPUS_MASK << 24; + + return rte; +} + +static void ioapic_set_routing(int gsi, int vr) +{ + uint64_t addr; + struct ioapic_rte rte; + + addr = gsi_table[gsi].addr; + rte = create_rte_for_gsi_irq(gsi, vr); + ioapic_set_rte_entry(addr, gsi_table[gsi].pin, &rte); + + if (rte.lo_32 & IOAPIC_RTE_TRGRMOD) + update_irq_handler(gsi, handle_level_interrupt_common); + else + update_irq_handler(gsi, common_handler_edge); + + dev_dbg(ACRN_DBG_IRQ, "GSI: irq:%d pin:%d rte:%x", + gsi, gsi_table[gsi].pin, + rte.lo_32); +} + +void ioapic_get_rte(int irq, uint64_t *rte) +{ + uint64_t addr; + struct ioapic_rte _rte; + + if (!irq_is_gsi(irq)) + return; + + addr = gsi_table[irq].addr; + ioapic_get_rte_entry(addr, gsi_table[irq].pin, &_rte); + + *rte = _rte.hi_32; + *rte = *rte << 32 | _rte.lo_32; +} + +void ioapic_set_rte(int irq, uint64_t raw_rte) +{ + uint64_t addr; + struct ioapic_rte rte; + + if (!irq_is_gsi(irq)) + return; + + addr = gsi_table[irq].addr; + rte.lo_32 = raw_rte; + rte.hi_32 = raw_rte >> 32; + ioapic_set_rte_entry(addr, gsi_table[irq].pin, &rte); + + dev_dbg(ACRN_DBG_IRQ, "GSI: irq:%d pin:%d rte:%x", + irq, gsi_table[irq].pin, + rte.lo_32); +} + +int irq_gsi_num(void) +{ + return nr_gsi; +} + +bool irq_is_gsi(int irq) +{ + return irq < nr_gsi; +} + +int irq_to_pin(int irq) +{ + if (irq_is_gsi(irq)) + return gsi_table[irq].pin; + else + return -1; +} + +int pin_to_irq(int pin) +{ + int i; + + if (pin < 0) + return IRQ_INVALID; + + for (i = 0; i < nr_gsi; i++) { + if (gsi_table[i].pin == (uint8_t) pin) + return i; + } + return IRQ_INVALID; +} + +void +irq_gsi_mask_unmask(int irq, bool mask) +{ + uint64_t addr = gsi_table[irq].addr; + int pin = gsi_table[irq].pin; + struct ioapic_rte rte; + + if (!irq_is_gsi(irq)) + return; + + ioapic_get_rte_entry(addr, pin, &rte); + if (mask) + rte.lo_32 |= IOAPIC_RTE_INTMSET; + else + rte.lo_32 &= ~IOAPIC_RTE_INTMASK; + ioapic_set_rte_entry(addr, pin, &rte); + dev_dbg(ACRN_DBG_PTIRQ, "update: irq:%d pin:%d rte:%x", + irq, pin, rte.lo_32); +} + +void setup_ioapic_irq(void) +{ + int ioapic_id; + int gsi; + int vr; + + spinlock_init(&ioapic_lock); + + for (ioapic_id = 0, gsi = 0; ioapic_id < NR_IOAPICS; ioapic_id++) { + int pin; + int max_pins; + int version; + uint64_t addr; + + addr = map_ioapic(get_ioapic_base(ioapic_id)); + version = ioapic_read_reg32(addr, IOAPIC_VER); + max_pins = (version & IOAPIC_MAX_RTE_MASK) >> MAX_RTE_SHIFT; + dev_dbg(ACRN_DBG_IRQ, "IOAPIC version: %x", version); + ASSERT(max_pins > NR_LEGACY_IRQ, + "Legacy IRQ num > total GSI"); + + for (pin = 0; pin < max_pins; pin++) { + gsi_table[gsi].ioapic_id = ioapic_id; + gsi_table[gsi].addr = addr; + + if (gsi < NR_LEGACY_IRQ) + gsi_table[gsi].pin = + legacy_irq_to_pin[gsi] & 0xff; + else + gsi_table[gsi].pin = pin; + + /* pinned irq before use it */ + if (irq_mark_used(gsi) < 0) { + pr_err("failed to alloc IRQ[%d]", gsi); + gsi++; + continue; + } + + /* assign vector for this GSI + * for legacy irq, reserved vector and never free + */ + if (gsi < NR_LEGACY_IRQ) { + vr = irq_desc_alloc_vector(gsi, false); + if (vr < 0) { + pr_err("failed to alloc VR"); + gsi++; + continue; + } + } else + vr = 0; /* not to allocate VR right now */ + + ioapic_set_routing(gsi, vr); + gsi++; + } + } + + /* system max gsi numbers */ + nr_gsi = gsi; + ASSERT(nr_gsi < NR_MAX_GSI, "GSI table overflow"); +} + +void dump_ioapic(void) +{ + int irq; + + for (irq = 0; irq < nr_gsi; irq++) { + uint64_t addr = gsi_table[irq].addr; + int pin = gsi_table[irq].pin; + struct ioapic_rte rte; + + ioapic_get_rte_entry(addr, pin, &rte); + dev_dbg(ACRN_DBG_IRQ, "DUMP: irq:%d pin:%d rte:%x", + irq, pin, rte.lo_32); + } +} + +void get_rte_info(struct ioapic_rte *rte, bool *mask, bool *irr, + bool *phys, int *delmode, bool *level, int *vector, uint32_t *dest) +{ + *mask = ((rte->lo_32 & IOAPIC_RTE_INTMASK) == IOAPIC_RTE_INTMSET); + *irr = ((rte->lo_32 & IOAPIC_RTE_REM_IRR) == IOAPIC_RTE_REM_IRR); + *phys = ((rte->lo_32 & IOAPIC_RTE_DESTMOD) == IOAPIC_RTE_DESTPHY); + *delmode = rte->lo_32 & IOAPIC_RTE_DELMOD; + *level = rte->lo_32 & IOAPIC_RTE_TRGRLVL ? true : false; + *vector = rte->lo_32 & IOAPIC_RTE_INTVEC; + *dest = rte->hi_32 >> APIC_ID_SHIFT; +} + +int get_ioapic_info(char *str, int str_max_len) +{ + int irq, len, size = str_max_len; + + len = snprintf(str, size, + "\r\nIRQ\tPIN\tRTE.HI32\tRTE.LO32\tVEC\tDST\tDM\tTM\tDELM\tIRR\tMASK"); + size -= len; + str += len; + + for (irq = 0; irq < nr_gsi; irq++) { + uint64_t addr = gsi_table[irq].addr; + int pin = gsi_table[irq].pin; + struct ioapic_rte rte; + + bool irr, phys, level, mask; + int delmode, vector; + uint32_t dest; + + ioapic_get_rte_entry(addr, pin, &rte); + + get_rte_info(&rte, &mask, &irr, &phys, &delmode, &level, + &vector, &dest); + + len = snprintf(str, size, "\r\n%03d\t%03d\t0x%08X\t0x%08X\t", + irq, pin, rte.hi_32, rte.lo_32); + size -= len; + str += len; + + len = snprintf(str, size, "0x%02X\t0x%02X\t%s\t%s\t%d\t%d\t%d", + vector, dest, phys ? "phys" : "logic", + level ? "level" : "edge", delmode >> 8, irr, mask); + size -= len; + str += len; + + if (size < 2) { + pr_err("\r\nsmall buffer for ioapic dump"); + return -1; + } + } + + snprintf(str, size, "\r\n"); + return 0; +} diff --git a/hypervisor/arch/x86/irq.c b/hypervisor/arch/x86/irq.c new file mode 100644 index 000000000..d709c4dfd --- /dev/null +++ b/hypervisor/arch/x86/irq.c @@ -0,0 +1,761 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +static spinlock_t exception_spinlock = { .head = 0, .tail = 0, }; + +struct irq_request_info { + /* vector set to 0xE0 ~ 0xFF for pri_register_handler + * and set to -1 for normal_register_handler + */ + int vector; + dev_handler_t func; + void *dev_data; + bool share; + bool lowpri; + char *name; +}; + +/* any field change in below required irq_lock protection with irqsave */ +struct irq_desc { + int irq; /* index to irq_desc_base */ + enum irq_state used; /* this irq have assigned to device */ + enum irq_desc_state state; /* irq_desc status */ + int vector; /* assigned vector */ + void *handler_data; /* irq_handler private data */ + int (*irq_handler)(struct irq_desc *irq_desc, void *handler_data); + struct dev_handler_node *dev_list; + spinlock_t irq_lock; + uint64_t *irq_cnt; /* this irq cnt happened on CPUs */ + uint64_t irq_lost_cnt; +}; + +static struct irq_desc *irq_desc_base; +static int vector_to_irq[NR_MAX_VECTOR + 1]; + +static DEFINE_CPU_DATA(uint64_t[NR_MAX_IRQS], irq_count); +static DEFINE_CPU_DATA(uint64_t, spurious); + +spurious_handler_t spurious_handler; + +static void init_irq_desc(void) +{ + int i, page_num = 0; + int desc_size = NR_MAX_IRQS * sizeof(struct irq_desc); + + page_num = (desc_size + CPU_PAGE_SIZE-1) >> CPU_PAGE_SHIFT; + + irq_desc_base = alloc_pages(page_num); + + ASSERT(irq_desc_base, "page alloc failed!"); + memset(irq_desc_base, 0, page_num * CPU_PAGE_SIZE); + + for (i = 0; i < NR_MAX_IRQS; i++) { + irq_desc_base[i].irq = i; + irq_desc_base[i].vector = VECTOR_INVALID; + spinlock_init(&irq_desc_base[i].irq_lock); + } + + for (i = 0; i <= NR_MAX_VECTOR; i++) + vector_to_irq[i] = IRQ_INVALID; + +} + +/* + * alloc vector 0x20-0xDF for irq + * lowpri: 0x20-0x7F + * highpri: 0x80-0xDF + */ +static int find_available_vector(bool lowpri) +{ + int i, start, end; + + if (lowpri) { + start = VECTOR_FOR_NOR_LOWPRI_START; + end = VECTOR_FOR_NOR_LOWPRI_END; + } else { + start = VECTOR_FOR_NOR_HIGHPRI_START; + end = VECTOR_FOR_NOR_HIGHPRI_END; + } + + /* TODO: vector lock required */ + for (i = start; i < end; i++) { + if (vector_to_irq[i] == IRQ_INVALID) + return i; + } + return -1; +} + +/* + * check and set irq to be assigned + * return: -1 if irq already assigned otherwise return irq + */ +int irq_mark_used(int irq) +{ + struct irq_desc *desc; + + spinlock_rflags; + + if (irq < 0) + return -1; + + desc = irq_desc_base + irq; + spinlock_irqsave_obtain(&desc->irq_lock); + if (desc->used == IRQ_NOT_ASSIGNED) + desc->used = IRQ_ASSIGNED_NOSHARE; + spinlock_irqrestore_release(&desc->irq_lock); + return irq; +} + +/* + * system find available irq and set assigned + * return: irq, -1 not found + */ +static int alloc_irq(void) +{ + int i; + struct irq_desc *desc; + + spinlock_rflags; + + for (i = irq_gsi_num(); i < NR_MAX_IRQS; i++) { + desc = irq_desc_base + i; + spinlock_irqsave_obtain(&desc->irq_lock); + if (desc->used == IRQ_NOT_ASSIGNED) { + desc->used = IRQ_ASSIGNED_NOSHARE; + spinlock_irqrestore_release(&desc->irq_lock); + break; + } + spinlock_irqrestore_release(&desc->irq_lock); + } + return (i == NR_MAX_IRQS) ? -1:i; +} + +/* need irq_lock protection before use */ +static void _irq_desc_set_vector(int irq, int vr) +{ + struct irq_desc *desc; + + desc = irq_desc_base + irq; + vector_to_irq[vr] = irq; + desc->vector = vr; +} + +/* lock version of set vector */ +static void irq_desc_set_vector(int irq, int vr) +{ + struct irq_desc *desc; + + spinlock_rflags; + + desc = irq_desc_base + irq; + spinlock_irqsave_obtain(&desc->irq_lock); + vector_to_irq[vr] = irq; + desc->vector = vr; + spinlock_irqrestore_release(&desc->irq_lock); +} + +/* used with holding irq_lock outside */ +static void _irq_desc_free_vector(int irq) +{ + struct irq_desc *desc; + int vr; + + if (irq > NR_MAX_IRQS || irq < 0) + return; + + desc = irq_desc_base + irq; + + vr = desc->vector; + desc->used = IRQ_NOT_ASSIGNED; + desc->state = IRQ_DESC_PENDING; + desc->vector = VECTOR_INVALID; + + vr &= NR_MAX_VECTOR; + if (vector_to_irq[vr] == irq) + vector_to_irq[vr] = IRQ_INVALID; +} + +static void disable_pic_irq(void) +{ + io_write_byte(0xff, 0xA1); + io_write_byte(0xff, 0x21); +} + +static bool +irq_desc_append_dev(struct irq_desc *desc, void *node, bool share) +{ + struct dev_handler_node *dev_list; + bool added = true; + + spinlock_rflags; + + spinlock_irqsave_obtain(&desc->irq_lock); + dev_list = desc->dev_list; + + /* assign if first node */ + if (dev_list == NULL) { + desc->dev_list = node; + desc->used = (share)?IRQ_ASSIGNED_SHARED:IRQ_ASSIGNED_NOSHARE; + + /* Only GSI possible for Level and it already init during + * ioapic setup. + * caller can later update it with update_irq_handler() + */ + if (!desc->irq_handler) + desc->irq_handler = common_handler_edge; + } else if (!share || desc->used == IRQ_ASSIGNED_NOSHARE) { + /* dev node added failed */ + added = false; + } else { + /* dev_list point to last valid node */ + while (dev_list->next) + dev_list = dev_list->next; + /* add node */ + dev_list->next = node; + } + spinlock_irqrestore_release(&desc->irq_lock); + + return added; +} + +static struct dev_handler_node* +common_register_handler(int irq, + struct irq_request_info *info) +{ + struct dev_handler_node *node = NULL; + struct irq_desc *desc; + bool added = false; + + /* ====================================================== + * This is low level ISR handler registering function + * case: irq = -1 + * caller did not know which irq to use, and want system to + * allocate available irq for it. These irq are in range: + * nr_gsi ~ NR_MAX_IRQS + * a irq will be allocated and the vector will be assigned to this + * irq automatically. + * + * case: irq >=0 and irq < nr_gsi + * caller want to add device ISR handler into ioapic pins. + * two kind of devices: legacy device and PCI device with INTx + * a vector will automatically assigned. + * + * case: irq with speical type (not from IOAPIC/MSI) + * These irq value are pre-defined for Timer, IPI, Spurious etc + * vectors are pre-defined also + * + * return value: pinned irq and assigned vector for this irq. + * caller can use this irq to enable/disable/mask/unmask interrupt + * and if this irq is for: + * GSI legacy: nothing to do for legacy irq, already initialized + * GSI other: need to progam PCI INTx to match this irq pin + * MSI: caller need program vector to PCI device + * + * ===================================================== + */ + ASSERT(info != NULL, "Invalid param"); + + /* HV select a irq for device if irq < 0 + * this vector/irq match to APCI DSDT or PCI INTx/MSI + */ + if (irq < 0) + irq = alloc_irq(); + else + irq = irq_mark_used(irq); + + if (irq < 0) { + pr_err("failed to assign IRQ"); + goto OUT; + } + + node = calloc(1, sizeof(struct dev_handler_node)); + if (node == NULL) { + pr_err("failed to alloc node"); + irq_desc_try_free_vector(irq); + goto OUT; + } + + desc = irq_desc_base + irq; + added = irq_desc_append_dev(desc, node, info->share); + if (!added) { + free(node); + node = NULL; + pr_err("failed to add node to non-shared irq"); + } +OUT: + if (added) { + /* it is safe to call irq_desc_alloc_vector multiple times*/ + if (info->vector >= VECTOR_FOR_PRI_START && + info->vector <= VECTOR_FOR_PRI_END) + irq_desc_set_vector(irq, info->vector); + else if (info->vector < 0) + irq_desc_alloc_vector(irq, info->lowpri); + else { + pr_err("the input vector is not correct"); + free(node); + return NULL; + } + + node->dev_handler = info->func; + node->dev_data = info->dev_data; + node->desc = desc; + + /* we are okay using strcpy_s here even with spinlock + * since no #PG in HV right now + */ + strcpy_s(node->name, 32, info->name); + dev_dbg(ACRN_DBG_IRQ, "[%s] %s irq%d vr:0x%x", + __func__, node->name, irq, desc->vector); + } + + return node; +} + +/* it is safe to call irq_desc_alloc_vector multiple times*/ +int irq_desc_alloc_vector(int irq, bool lowpri) +{ + int vr = -1; + struct irq_desc *desc; + + spinlock_rflags; + + /* irq should be always available at this time */ + if (irq > NR_MAX_IRQS || irq < 0) + return false; + + desc = irq_desc_base + irq; + spinlock_irqsave_obtain(&desc->irq_lock); + if (desc->vector != VECTOR_INVALID) { + /* already allocated a vector */ + goto OUT; + } + + /* FLAT mode, a irq connected to every cpu's same vector */ + vr = find_available_vector(lowpri); + if (vr < 0) { + pr_err("no vector found for irq[%d]", irq); + goto OUT; + } + _irq_desc_set_vector(irq, vr); +OUT: + spinlock_irqrestore_release(&desc->irq_lock); + return vr; +} + +void irq_desc_try_free_vector(int irq) +{ + struct irq_desc *desc; + + spinlock_rflags; + + /* legacy irq's vector is reserved and should not be freed */ + if (irq > NR_MAX_IRQS || irq < NR_LEGACY_IRQ) + return; + + desc = irq_desc_base + irq; + spinlock_irqsave_obtain(&desc->irq_lock); + if (desc->dev_list == NULL) + _irq_desc_free_vector(irq); + + spinlock_irqrestore_release(&desc->irq_lock); + +} + +int irq_to_vector(int irq) +{ + if (irq < NR_MAX_IRQS) + return irq_desc_base[irq].vector; + else + return VECTOR_INVALID; +} + +int dev_to_irq(struct dev_handler_node *node) +{ + return node->desc->irq; +} + +int dev_to_vector(struct dev_handler_node *node) +{ + return node->desc->vector; +} + +int init_default_irqs(unsigned int cpu_id) +{ + if (cpu_id > 0) + return 0; + + init_irq_desc(); + + /* we use ioapic only, disable legacy PIC */ + disable_pic_irq(); + setup_ioapic_irq(); + init_softirq(); + + return 0; +} + +void dispatch_exception(struct intr_ctx *ctx) +{ + unsigned int cpu_id = get_cpu_id(); + + /* Obtain lock to ensure exception dump doesn't get corrupted */ + spinlock_obtain(&exception_spinlock); + + dump_exception(ctx, cpu_id); + + /* Release lock to let other CPUs handle exception */ + spinlock_release(&exception_spinlock); + + /* Halt the CPU */ + cpu_halt(cpu_id); +} + +int handle_spurious_interrupt(int vector) +{ + send_lapic_eoi(); + + get_cpu_var(spurious)++; + + pr_warn("Spurious vector: 0x%x.", vector); + + if (spurious_handler) + return spurious_handler(vector); + else + return 0; +} + +/* do_IRQ() */ +int dispatch_interrupt(struct intr_ctx *ctx) +{ + int vr = ctx->vector; + int irq = vector_to_irq[vr]; + struct irq_desc *desc; + + if (irq == IRQ_INVALID) + goto ERR; + + desc = irq_desc_base + irq; + per_cpu(irq_count, get_cpu_id())[irq]++; + + if (vr != desc->vector) + goto ERR; + + if (desc->used == IRQ_NOT_ASSIGNED || !desc->irq_handler) { + /* mask irq if possible */ + goto ERR; + } + + desc->irq_handler(desc, desc->handler_data); + return 0; +ERR: + return handle_spurious_interrupt(vr); +} + +int handle_level_interrupt_common(struct irq_desc *desc, + __unused void *handler_data) +{ + struct dev_handler_node *dev = desc->dev_list; + spinlock_rflags; + + /* + * give other Core a try to return without hold irq_lock + * and record irq_lost count here + */ + if (desc->state != IRQ_DESC_PENDING) { + send_lapic_eoi(); + desc->irq_lost_cnt++; + return 0; + } + + spinlock_irqsave_obtain(&desc->irq_lock); + desc->state = IRQ_DESC_IN_PROCESS; + + /* mask iopaic pin */ + if (irq_is_gsi(desc->irq)) + GSI_MASK_IRQ(desc->irq); + + /* Send EOI to LAPIC/IOAPIC IRR */ + send_lapic_eoi(); + + while (dev) { + if (dev->dev_handler) + dev->dev_handler(desc->irq, dev->dev_data); + dev = dev->next; + } + + if (irq_is_gsi(desc->irq)) + GSI_UNMASK_IRQ(desc->irq); + + desc->state = IRQ_DESC_PENDING; + spinlock_irqrestore_release(&desc->irq_lock); + + return 0; +} + +int common_handler_edge(struct irq_desc *desc, __unused void *handler_data) +{ + struct dev_handler_node *dev = desc->dev_list; + spinlock_rflags; + + /* + * give other Core a try to return without hold irq_lock + * and record irq_lost count here + */ + if (desc->state != IRQ_DESC_PENDING) { + send_lapic_eoi(); + desc->irq_lost_cnt++; + return 0; + } + + spinlock_irqsave_obtain(&desc->irq_lock); + desc->state = IRQ_DESC_IN_PROCESS; + + /* Send EOI to LAPIC/IOAPIC IRR */ + send_lapic_eoi(); + + while (dev) { + if (dev->dev_handler) + dev->dev_handler(desc->irq, dev->dev_data); + dev = dev->next; + } + + desc->state = IRQ_DESC_PENDING; + spinlock_irqrestore_release(&desc->irq_lock); + + return 0; +} + +int common_dev_handler_level(struct irq_desc *desc, __unused void *handler_data) +{ + struct dev_handler_node *dev = desc->dev_list; + spinlock_rflags; + + /* + * give other Core a try to return without hold irq_lock + * and record irq_lost count here + */ + if (desc->state != IRQ_DESC_PENDING) { + send_lapic_eoi(); + desc->irq_lost_cnt++; + return 0; + } + + spinlock_irqsave_obtain(&desc->irq_lock); + desc->state = IRQ_DESC_IN_PROCESS; + + /* mask iopaic pin */ + if (irq_is_gsi(desc->irq)) + GSI_MASK_IRQ(desc->irq); + + /* Send EOI to LAPIC/IOAPIC IRR */ + send_lapic_eoi(); + + while (dev) { + if (dev->dev_handler) + dev->dev_handler(desc->irq, dev->dev_data); + dev = dev->next; + } + + desc->state = IRQ_DESC_PENDING; + spinlock_irqrestore_release(&desc->irq_lock); + + /* we did not unmask irq until guest EOI the vector */ + return 0; +} + +/* no desc->irq_lock for quick handling local interrupt like lapic timer */ +int quick_handler_nolock(struct irq_desc *desc, __unused void *handler_data) +{ + struct dev_handler_node *dev = desc->dev_list; + + /* Send EOI to LAPIC/IOAPIC IRR */ + send_lapic_eoi(); + + while (dev) { + if (dev->dev_handler) + dev->dev_handler(desc->irq, dev->dev_data); + dev = dev->next; + } + + return 0; +} + +void update_irq_handler(int irq, irq_handler_t func) +{ + struct irq_desc *desc; + + spinlock_rflags; + + if (irq >= NR_MAX_IRQS) + return; + + desc = irq_desc_base + irq; + spinlock_irqsave_obtain(&desc->irq_lock); + desc->irq_handler = func; + spinlock_irqrestore_release(&desc->irq_lock); +} + +void unregister_handler_common(struct dev_handler_node *node) +{ + struct dev_handler_node *head; + struct irq_desc *desc; + + spinlock_rflags; + + if (node == NULL) + return; + + dev_dbg(ACRN_DBG_IRQ, "[%s] %s irq%d vr:0x%x", + __func__, node->name, + dev_to_irq(node), + dev_to_vector(node)); + + desc = node->desc; + spinlock_irqsave_obtain(&desc->irq_lock); + + head = desc->dev_list; + if (head == node) { + desc->dev_list = NULL; + goto UNLOCK_EXIT; + } + + while (head->next) { + if (head->next == node) + break; + head = head->next; + } + + head->next = node->next; + +UNLOCK_EXIT: + spinlock_irqrestore_release(&desc->irq_lock); + irq_desc_try_free_vector(desc->irq); + free(node); +} + +/* + * Allocate IRQ with Vector from 0x20 ~ 0xDF + */ +struct dev_handler_node* +normal_register_handler(int irq, + dev_handler_t func, + void *dev_data, + bool share, + bool lowpri, + const char *name) +{ + struct irq_request_info info; + + info.vector = -1; + info.lowpri = lowpri; + info.func = func; + info.dev_data = dev_data; + info.share = share; + info.name = (char *)name; + + return common_register_handler(irq, &info); +} + +/* + * Allocate IRQ with vector from 0xE0 ~ 0xFF + * Allocate a IRQ and install isr on that specific cpu + * User can install same irq/isr on different CPU by call this function multiple + * times + */ +struct dev_handler_node* +pri_register_handler(int irq, + int vector, + dev_handler_t func, + void *dev_data, + const char *name) +{ + struct irq_request_info info; + + if (vector < VECTOR_FOR_PRI_START || vector > VECTOR_FOR_PRI_END) + return NULL; + + info.vector = vector; + info.lowpri = false; + info.func = func; + info.dev_data = dev_data; + info.share = true; + info.name = (char *)name; + + return common_register_handler(irq, &info); +} + +int get_cpu_interrupt_info(char *str, int str_max) +{ + int irq, vector, pcpu_id, len, size = str_max; + struct irq_desc *desc; + + len = snprintf(str, size, "\r\nIRQ\tVECTOR"); + size -= len; + str += len; + for (pcpu_id = 0; pcpu_id < phy_cpu_num; pcpu_id++) { + len = snprintf(str, size, "\tCPU%d", pcpu_id); + size -= len; + str += len; + } + len = snprintf(str, size, "\tLOST\tSHARE"); + size -= len; + str += len; + + for (irq = 0; irq < NR_MAX_IRQS; irq++) { + desc = irq_desc_base + irq; + vector = irq_to_vector(irq); + if (desc->used != IRQ_NOT_ASSIGNED && + vector != VECTOR_INVALID) { + len = snprintf(str, size, "\r\n%d\t0x%X", irq, vector); + size -= len; + str += len; + for (pcpu_id = 0; pcpu_id < phy_cpu_num; pcpu_id++) { + len = snprintf(str, size, "\t%d", + per_cpu(irq_count, pcpu_id)[irq]++); + size -= len; + str += len; + } + len = snprintf(str, size, "\t%d\t%s", + desc->irq_lost_cnt, + desc->used == IRQ_ASSIGNED_SHARED ? + "shared" : "no-shared"); + size -= len; + str += len; + } + } + snprintf(str, size, "\r\n"); + return 0; +} diff --git a/hypervisor/arch/x86/mmu.c b/hypervisor/arch/x86/mmu.c new file mode 100644 index 000000000..0b104b112 --- /dev/null +++ b/hypervisor/arch/x86/mmu.c @@ -0,0 +1,932 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * Copyright (c) 2017 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#include +#include +#include +#include +#include + +static void *mmu_pml4_addr; + +enum mem_map_request_type { + PAGING_REQUEST_TYPE_MAP = 0, /* Creates a new mapping. */ + PAGING_REQUEST_TYPE_UNMAP = 1, /* Removes a pre-existing entry */ + PAGING_REQUEST_TYPE_MODIFY = 2, + /* Modifies a pre-existing entries attributes. */ + PAGING_REQUEST_TYPE_UNKNOWN, +}; + +struct mm_capability { + /* EPT and MMU 1-GByte page supported flag */ + bool ept_1gb_page_supported; + bool invept_supported; + bool invept_single_context_supported; + bool invept_global_context_supported; + bool invvpid_supported; + bool invvpid_single_context_supported; + bool invvpid_global_context_supported; + bool mmu_1gb_page_supported; +}; +static struct mm_capability mm_caps; + +#define INVEPT_TYPE_SINGLE_CONTEXT 1UL +#define INVEPT_TYPE_ALL_CONTEXTS 2UL +#define INVEPT_SET_ERROR_CODE \ + " jnc 1f\n" \ + " mov $1, %0\n" /* CF: error = 1 */ \ + " jmp 3f\n" \ + "1: jnz 2f\n" \ + " mov $2, %0\n" /* ZF: error = 2 */ \ + " jmp 3f\n" \ + "2: mov $0, %0\n" \ + "3:" + +struct invept_desc { + uint64_t eptp; + uint64_t _res; +}; + +static inline void _invept(uint64_t type, struct invept_desc desc) +{ + int error = 0; + + asm volatile ("invept %1, %2\n" + INVEPT_SET_ERROR_CODE + : "=r" (error) + : "m" (desc), "r" (type) + : "memory"); + + ASSERT(error == 0, "invept error"); +} + +static void check_mmu_capability(void) +{ + uint64_t val; + uint32_t eax, ebx, ecx, edx; + + memset(&mm_caps, 0, sizeof(struct mm_capability)); + + /* Read the MSR register of EPT and VPID Capability - SDM A.10 */ + val = msr_read(MSR_IA32_VMX_EPT_VPID_CAP); + mm_caps.ept_1gb_page_supported = (val & MSR_VMX_EPT_VPID_CAP_1GB) + ? (true) : (false); + mm_caps.invept_supported = + (val & MSR_VMX_INVEPT) ? (true) : (false); + mm_caps.invept_single_context_supported = + (val & MSR_VMX_INVEPT_SINGLE_CONTEXT) ? (true) : (false); + mm_caps.invept_global_context_supported = + (val & MSR_VMX_INVEPT_GLOBAL_CONTEXT) ? (true) : (false); + mm_caps.invvpid_supported = + (val & MSR_VMX_INVVPID) ? (true) : (false); + mm_caps.invvpid_single_context_supported = + (val & MSR_VMX_INVVPID_SINGLE_CONTEXT) ? (true) : (false); + mm_caps.invvpid_global_context_supported = + (val & MSR_VMX_INVVPID_GLOBAL_CONTEXT) ? (true) : (false); + + /* Read CPUID to check if PAGE1GB is supported + * SDM 4.1.4 If CPUID.80000001H:EDX.Page1GB[bit26]=1, + * 1-GByte pages are supported with 4-level paging + */ + cpuid(CPUID_EXTEND_FUNCTION_1, &eax, &ebx, &ecx, &edx); + mm_caps.mmu_1gb_page_supported = (edx & CPUID_EDX_PAGE1GB) ? + (true) : (false); +} + +static inline bool check_invept_single_support(void) +{ + return mm_caps.invept_supported && + mm_caps.invept_single_context_supported; +} + +static inline bool check_invept_global_support(void) +{ + return mm_caps.invept_supported && + mm_caps.invept_global_context_supported; +} + +void mmu_invept(struct vcpu *vcpu) +{ + struct invept_desc desc = {0}; + + if (check_invept_single_support()) { + desc.eptp = (uint64_t) vcpu->vm->arch_vm.ept | (3 << 3) | 6; + _invept(INVEPT_TYPE_SINGLE_CONTEXT, desc); + } else if (check_invept_global_support()) + _invept(INVEPT_TYPE_ALL_CONTEXTS, desc); +} + +static bool check_mmu_1gb_support(struct map_params *map_params) +{ + bool status = false; + + if (map_params->page_table_type == PT_EPT) + status = mm_caps.ept_1gb_page_supported; + else + status = mm_caps.mmu_1gb_page_supported; + return status; +} + +static uint32_t map_mem_region(void *vaddr, void *paddr, + void *table_base, uint64_t attr, uint32_t table_level, + int ept_entry, enum mem_map_request_type request_type) +{ + uint64_t table_entry; + uint64_t table_present; + uint32_t table_offset; + uint32_t mapped_size; + + if (table_base == NULL || table_level >= IA32E_UNKNOWN + || request_type >= PAGING_REQUEST_TYPE_UNKNOWN) { + /* Shouldn't go here */ + ASSERT(false, "Incorrect Arguments. Failed to map region"); + } + + /* switch based on of table */ + switch (table_level) { + case IA32E_PDPT: + + /* Get offset to the entry in the PDPT for this address */ + table_offset = IA32E_PDPTE_INDEX_CALC(vaddr); + + /* PS bit must be set for these entries to be mapped */ + attr |= IA32E_PDPTE_PS_BIT; + + /* Set mapped size to 1 GB */ + mapped_size = MEM_1G; + + break; + + case IA32E_PD: + + /* Get offset to the entry in the PD for this address */ + table_offset = IA32E_PDE_INDEX_CALC(vaddr); + + /* PS bit must be set for these entries to be mapped */ + attr |= IA32E_PDE_PS_BIT; + + /* Set mapped size to 2 MB */ + mapped_size = MEM_2M; + + break; + + case IA32E_PT: + + /* Get offset to the entry in the PT for this address */ + table_offset = IA32E_PTE_INDEX_CALC(vaddr); + + /* NOTE: No PS bit in page table entries */ + + /* Set mapped size to 4 KB */ + mapped_size = MEM_4K; + + /* If not a EPT entry, see if the PAT bit is set for PDPT entry + */ + if ((!ept_entry) && (attr & IA32E_PDPTE_PAT_BIT)) { + /* The PAT bit is set; Clear it and set the page table + * PAT bit instead + */ + attr &= (uint64_t) (~((uint64_t) IA32E_PDPTE_PAT_BIT)); + attr |= IA32E_PTE_PAT_BIT; + } + + break; + + case IA32E_PML4: + default: + + /* Set mapping size to 0 - can't map memory in PML4 */ + mapped_size = 0; + + break; + } + + /* Check to see if mapping should occur */ + if (mapped_size != 0) { + /* Get current table entry */ + uint64_t tmp = MEM_READ64(table_base + table_offset); + + /* Check if EPT entry */ + if (ept_entry) { + /* Use read/write/execute bits to determine presence of + * entry + */ + table_present = (IA32E_EPT_R_BIT | + IA32E_EPT_W_BIT | IA32E_EPT_X_BIT); + } else { + /* Use the P bit to determine if an entry is present */ + table_present = IA32E_COMM_P_BIT; + } + + switch (request_type) { + case PAGING_REQUEST_TYPE_MAP: + { + /* No need to confirm current table entry + * isn't already present + * support map-->remap + */ + table_entry = (ept_entry + ? attr + : (attr | IA32E_COMM_P_BIT)); + + table_entry |= (uint64_t)paddr; + + /* Write the table entry to map this memory */ + MEM_WRITE64(table_base + table_offset, table_entry); + break; + } + case PAGING_REQUEST_TYPE_UNMAP: + { + if (tmp & table_present) { + /* Table is present. + * Write the table entry to map this memory + */ + MEM_WRITE64(table_base + table_offset, 0); + } + break; + } + case PAGING_REQUEST_TYPE_MODIFY: + { + /* Allow mapping or modification as requested. */ + table_entry = (ept_entry + ? attr : (attr | IA32E_COMM_P_BIT)); + + table_entry |= (uint64_t) paddr; + + /* Write the table entry to map this memory */ + MEM_WRITE64(table_base + table_offset, table_entry); + + break; + } + default: + ASSERT("Bad memory map request type" == 0, ""); + break; + } + } + + /* Return mapped size to caller */ + return mapped_size; +} + +static uint32_t fetch_page_table_offset(void *addr, uint32_t table_level) +{ + uint32_t table_offset; + + /* Switch based on level of table */ + switch (table_level) { + case IA32E_PML4: + + /* Get offset to the entry in the PML4 + * for this address + */ + table_offset = IA32E_PML4E_INDEX_CALC(addr); + break; + + case IA32E_PDPT: + + /* Get offset to the entry in the PDPT + * for this address + */ + table_offset = IA32E_PDPTE_INDEX_CALC(addr); + break; + + case IA32E_PD: + + /* Get offset to the entry in the PD + * for this address + */ + table_offset = IA32E_PDE_INDEX_CALC(addr); + break; + + case IA32E_PT: + table_offset = IA32E_PTE_INDEX_CALC(addr); + break; + + default: + pr_err("Wrong page table level = 0x%lx", table_level); + ASSERT(false, "Wrong page table level"); + break; + } + + return table_offset; +} + +static inline uint32_t check_page_table_present(struct map_params *map_params, + uint64_t table_entry) +{ + if (map_params->page_table_type == PT_EPT) { + table_entry &= (IA32E_EPT_R_BIT | IA32E_EPT_W_BIT | + IA32E_EPT_X_BIT); + } else { + table_entry &= (IA32E_COMM_P_BIT); + } + + return (table_entry) ? PT_PRESENT : PT_NOT_PRESENT; +} + +static uint64_t get_table_entry(struct map_params *map_params, void *addr, + void *table_base, uint32_t table_level) +{ + uint32_t table_offset; + uint64_t table_entry; + int status = 0; + + if (table_base == NULL + || table_level >= IA32E_UNKNOWN + || map_params == NULL) { + status = -EINVAL; + } + ASSERT(status == 0, "Incorrect Arguments"); + + table_offset = fetch_page_table_offset(addr, table_level); + + /* Read the table entry */ + table_entry = MEM_READ64(table_base + table_offset); + + /* Return the next table in the walk */ + return table_entry; +} + +static void *walk_paging_struct(void *addr, void *table_base, + uint32_t table_level, struct map_params *map_params) +{ + uint32_t table_offset; + uint64_t table_entry; + uint64_t table_present; + /* if table_level == IA32E_PT Just return the same address + * can't walk down any further + */ + void *sub_table_addr = ((table_level == IA32E_PT) ? table_base:NULL); + int status = 0; + + if (table_base == NULL || table_level >= IA32E_UNKNOWN + || map_params == NULL) { + status = -EINVAL; + } + ASSERT(status == 0, "Incorrect Arguments"); + + table_offset = fetch_page_table_offset(addr, table_level); + + /* See if we can skip the rest */ + if (sub_table_addr != table_base) { + /* Read the table entry */ + table_entry = MEM_READ64(table_base + table_offset); + + /* Check if EPT entry being created */ + if (map_params->page_table_type == PT_EPT) { + /* Set table present bits to any of the + * read/write/execute bits + */ + table_present = (IA32E_EPT_R_BIT | IA32E_EPT_W_BIT | + IA32E_EPT_X_BIT); + } else { + /* Set table preset bits to P bit or r/w bit */ + table_present = (IA32E_COMM_P_BIT | IA32E_COMM_RW_BIT); + } + + /* Determine if a valid entry exists */ + if ((table_entry & table_present) == 0) { + /* No entry present - need to allocate a new table */ + sub_table_addr = + alloc_paging_struct(); + /* Check to ensure memory available for this structure*/ + if (sub_table_addr == 0) { + /* Error: Unable to find table memory necessary + * to map memory + */ + ASSERT(sub_table_addr == 0, + "Fail to find table memory " + "for map memory"); + + return sub_table_addr; + } + + /* Write entry to current table to reference the new + * sub-table + */ + MEM_WRITE64(table_base + table_offset, + (uint64_t) sub_table_addr | table_present); + } else { + /* Get address of the sub-table */ + sub_table_addr = (void *)(table_entry & IA32E_REF_MASK); + } + } + + /* Return the next table in the walk */ + return sub_table_addr; +} + +void *get_paging_pml4(void) +{ + /* Return address to caller */ + return mmu_pml4_addr; +} + +void enable_paging(void *pml4_base_addr) +{ + CPU_CR_WRITE(cr3, (unsigned long)pml4_base_addr); +} + +void init_paging(void) +{ + struct map_params map_params; + struct e820_entry *entry; + uint32_t i; + int attr_wb = (MMU_MEM_ATTR_READ | + MMU_MEM_ATTR_WRITE | + MMU_MEM_ATTR_EXECUTE | + MMU_MEM_ATTR_WB_CACHE); + int attr_uc = (MMU_MEM_ATTR_READ | + MMU_MEM_ATTR_WRITE | + MMU_MEM_ATTR_EXECUTE | + MMU_MEM_ATTR_UNCACHED); + + pr_dbg("HV MMU Initialization"); + + check_mmu_capability(); + + /* Allocate memory for Hypervisor PML4 table */ + mmu_pml4_addr = alloc_paging_struct(); + + init_e820(); + obtain_e820_mem_info(); + + /* Loop through all memory regions in the e820 table */ + map_params.page_table_type = PT_HOST; + map_params.pml4_base = mmu_pml4_addr; + + /* Map all memory regions to UC attribute */ + map_mem(&map_params, (void *)e820_mem.mem_bottom, + (void *)e820_mem.mem_bottom, + (e820_mem.mem_top - e820_mem.mem_bottom), + attr_uc); + + /* Modify WB attribute for E820_TYPE_RAM */ + for (i = 0, entry = &e820[0]; + i < e820_entries; + i++, entry = &e820[i]) { + if (entry->type == E820_TYPE_RAM) { + modify_mem(&map_params, (void *)entry->baseaddr, + (void *)entry->baseaddr, + entry->length, attr_wb); + } + } + + pr_dbg("Enabling MMU "); + + /* Enable paging */ + enable_paging(mmu_pml4_addr); +} + +void *alloc_paging_struct(void) +{ + void *ptr = NULL; + + /* Allocate a page from Hypervisor heap */ + ptr = alloc_page(); + + ASSERT(ptr, "page alloc failed!"); + memset(ptr, 0, CPU_PAGE_SIZE); + + return ptr; +} + +uint64_t config_page_table_attr(struct map_params *map_params, uint32_t flags) +{ + int ept_entry = map_params->page_table_type; + uint64_t attr = 0; + + /* Convert generic memory flags to architecture specific attributes */ + /* Check if read access */ + if (flags & MMU_MEM_ATTR_READ) { + /* Configure for read access */ + attr |= + (ept_entry ? IA32E_EPT_R_BIT : MMU_MEM_ATTR_BIT_READ_WRITE); + } + + /* Check for write access */ + if (flags & MMU_MEM_ATTR_WRITE) { + /* Configure for write access */ + attr |= + (ept_entry ? IA32E_EPT_W_BIT : MMU_MEM_ATTR_BIT_READ_WRITE); + } + + /* Check for execute access */ + if (flags & MMU_MEM_ATTR_EXECUTE) { + /* Configure for execute (EPT only) */ + attr |= (ept_entry ? IA32E_EPT_X_BIT : 0); + } + + /* EPT & VT-d share the same page tables, set SNP bit + * to force snooping of PCIe devices if the page + * is cachable + */ + if ((flags & MMU_MEM_ATTR_UNCACHED) != MMU_MEM_ATTR_UNCACHED + && ept_entry == PT_EPT) { + attr |= IA32E_EPT_SNOOP_CTRL; + } + + /* Check for cache / memory types */ + if (flags & MMU_MEM_ATTR_WB_CACHE) { + /* Configure for write back cache */ + attr |= + (ept_entry ? IA32E_EPT_WB : MMU_MEM_ATTR_TYPE_CACHED_WB); + } else if (flags & MMU_MEM_ATTR_WT_CACHE) { + /* Configure for write through cache */ + attr |= + (ept_entry ? IA32E_EPT_WT : MMU_MEM_ATTR_TYPE_CACHED_WT); + } else if (flags & MMU_MEM_ATTR_UNCACHED) { + /* Configure for uncached */ + attr |= + (ept_entry ? IA32E_EPT_UNCACHED : MMU_MEM_ATTR_TYPE_UNCACHED); + } else if (flags & MMU_MEM_ATTR_WC) { + /* Configure for write combining */ + attr |= + (ept_entry ? IA32E_EPT_WC : MMU_MEM_ATTR_TYPE_WRITE_COMBINED); + } else { + /* Configure for write protected */ + attr |= + (ept_entry ? IA32E_EPT_WP : MMU_MEM_ATTR_TYPE_WRITE_PROTECTED); + } + return attr; + +} + +void obtain_last_page_table_entry(struct map_params *map_params, + struct entry_params *entry, void *addr, bool direct) +{ + uint64_t table_entry; + uint32_t table_present = 0; + /* Obtain the PML4 address */ + void *table_addr = direct ? (map_params->pml4_base) + : (map_params->pml4_inverted); + + /* Obtain page table entry from PML4 table*/ + table_entry = get_table_entry(map_params, addr, + table_addr, IA32E_PML4); + table_present = check_page_table_present(map_params, table_entry); + if (table_present == PT_NOT_PRESENT) { + /* PML4E not present, return PML4 base address */ + entry->entry_level = IA32E_PML4; + entry->entry_base = (uint64_t)table_addr; + entry->entry_present = PT_NOT_PRESENT; + entry->page_size = check_mmu_1gb_support(map_params) ? + (PAGE_SIZE_1G) : (PAGE_SIZE_2M); + entry->entry_off = fetch_page_table_offset(addr, IA32E_PML4); + entry->entry_val = table_entry; + return; + } + + /* Obtain page table entry from PDPT table*/ + table_addr = (void *)(table_entry & IA32E_REF_MASK); + table_entry = get_table_entry(map_params, addr, + table_addr, IA32E_PDPT); + table_present = check_page_table_present(map_params, table_entry); + if (table_present == PT_NOT_PRESENT) { + /* PDPTE not present, return PDPT base address */ + entry->entry_level = IA32E_PDPT; + entry->entry_base = (uint64_t)table_addr; + entry->entry_present = PT_NOT_PRESENT; + entry->page_size = check_mmu_1gb_support(map_params) ? + (PAGE_SIZE_1G) : (PAGE_SIZE_2M); + entry->entry_off = fetch_page_table_offset(addr, IA32E_PDPT); + entry->entry_val = table_entry; + return; + } + if (table_entry & IA32E_PDPTE_PS_BIT) { + /* 1GB page size, return the base addr of the pg entry*/ + entry->entry_level = IA32E_PDPT; + entry->entry_base = (uint64_t)table_addr; + entry->page_size = check_mmu_1gb_support(map_params) ? + (PAGE_SIZE_1G) : (PAGE_SIZE_2M); + entry->entry_present = PT_PRESENT; + entry->entry_off = fetch_page_table_offset(addr, IA32E_PDPT); + entry->entry_val = table_entry; + return; + } + + /* Obtain page table entry from PD table*/ + table_addr = (void *)(table_entry&IA32E_REF_MASK); + table_entry = get_table_entry(map_params, addr, + table_addr, IA32E_PD); + table_present = check_page_table_present(map_params, table_entry); + if (table_present == PT_NOT_PRESENT) { + /* PDE not present, return PDE base address */ + entry->entry_level = IA32E_PD; + entry->entry_base = (uint64_t)table_addr; + entry->entry_present = PT_NOT_PRESENT; + entry->page_size = PAGE_SIZE_2M; + entry->entry_off = fetch_page_table_offset(addr, IA32E_PD); + entry->entry_val = table_entry; + return; + + } + if (table_entry & IA32E_PDE_PS_BIT) { + /* 2MB page size, return the base addr of the pg entry*/ + entry->entry_level = IA32E_PD; + entry->entry_base = (uint64_t)table_addr; + entry->entry_present = PT_PRESENT; + entry->page_size = PAGE_SIZE_2M; + entry->entry_off = fetch_page_table_offset(addr, IA32E_PD); + entry->entry_val = table_entry; + return; + } + + /* Obtain page table entry from PT table*/ + table_addr = (void *)(table_entry&IA32E_REF_MASK); + table_entry = get_table_entry(map_params, addr, + table_addr, IA32E_PT); + table_present = check_page_table_present(map_params, table_entry); + entry->entry_present = ((table_present == PT_PRESENT) + ? (PT_PRESENT):(PT_NOT_PRESENT)); + entry->entry_level = IA32E_PT; + entry->entry_base = (uint64_t)table_addr; + entry->page_size = PAGE_SIZE_4K; + entry->entry_off = fetch_page_table_offset(addr, IA32E_PT); + entry->entry_val = table_entry; +} + +static uint64_t update_page_table_entry(struct map_params *map_params, + void *paddr, void *vaddr, uint64_t size, uint64_t attr, + enum mem_map_request_type request_type, bool direct) +{ + uint64_t remaining_size = size; + uint32_t adjustment_size; + int ept_entry = map_params->page_table_type; + /* Obtain the PML4 address */ + void *table_addr = direct ? (map_params->pml4_base) + : (map_params->pml4_inverted); + + /* Walk from the PML4 table to the PDPT table */ + table_addr = walk_paging_struct(vaddr, table_addr, IA32E_PML4, + map_params); + + if ((remaining_size >= MEM_1G) + && (MEM_ALIGNED_CHECK(vaddr, MEM_1G)) + && (MEM_ALIGNED_CHECK(paddr, MEM_1G)) + && check_mmu_1gb_support(map_params)) { + /* Map this 1 GByte memory region */ + adjustment_size = map_mem_region(vaddr, paddr, + table_addr, attr, IA32E_PDPT, + ept_entry, request_type); + } else if ((remaining_size >= MEM_2M) + && (MEM_ALIGNED_CHECK(vaddr, MEM_2M)) + && (MEM_ALIGNED_CHECK(paddr, MEM_2M))) { + /* Walk from the PDPT table to the PD table */ + table_addr = walk_paging_struct(vaddr, table_addr, + IA32E_PDPT, map_params); + /* Map this 2 MByte memory region */ + adjustment_size = map_mem_region(vaddr, paddr, + table_addr, attr, IA32E_PD, ept_entry, + request_type); + } else { + /* Walk from the PDPT table to the PD table */ + table_addr = walk_paging_struct(vaddr, + table_addr, IA32E_PDPT, map_params); + /* Walk from the PD table to the page table */ + table_addr = walk_paging_struct(vaddr, + table_addr, IA32E_PD, map_params); + /* Map this 4 KByte memory region */ + adjustment_size = map_mem_region(vaddr, paddr, + table_addr, attr, IA32E_PT, + ept_entry, request_type); + } + + return adjustment_size; + +} + +static uint64_t break_page_table(struct map_params *map_params, void *paddr, + void *vaddr, uint64_t page_size, bool direct) +{ + uint32_t i = 0; + uint64_t pa; + uint64_t attr = 0x00; + uint64_t next_page_size = 0x00; + void *sub_tab_addr = NULL; + struct entry_params entry; + + switch (page_size) { + /* Breaking 1GB page to 2MB page*/ + case PAGE_SIZE_1G: + next_page_size = PAGE_SIZE_2M; + attr |= IA32E_PDE_PS_BIT; + pr_info("%s, Breaking 1GB -->2MB vaddr=0x%llx", + __func__, vaddr); + break; + + /* Breaking 2MB page to 4KB page*/ + case PAGE_SIZE_2M: + next_page_size = PAGE_SIZE_4K; + pr_info("%s, Breaking 2MB -->4KB vaddr=0x%llx", + __func__, vaddr); + break; + + /* 4KB page, No action*/ + case PAGE_SIZE_4K: + default: + next_page_size = PAGE_SIZE_4K; + pr_info("%s, Breaking 4KB no action vaddr=0x%llx", + __func__, vaddr); + break; + } + + if (page_size != next_page_size) { + obtain_last_page_table_entry(map_params, &entry, vaddr, direct); + + /* New entry present - need to allocate a new table */ + sub_tab_addr = alloc_paging_struct(); + /* Check to ensure memory available for this structure */ + if (sub_tab_addr == 0) { + /* Error: + * Unable to find table memory necessary to map memory + */ + pr_err("Fail to find table memory for map memory"); + ASSERT(sub_tab_addr == 0, ""); + return 0; + } + + /* the physical address maybe be not aligned of + * current page size, obtain the starting physical address + * aligned of current page size + */ + pa = ((((uint64_t)paddr) / page_size) * page_size); + if (map_params->page_table_type == PT_EPT) { + /* Keep original attribute(here &0x3f) + * bit 0(R) bit1(W) bit2(X) bit3~5 MT + */ + attr |= (entry.entry_val & 0x3f); + } else { + /* Keep original attribute(here &0x7f) */ + attr |= (entry.entry_val & 0x7f); + } + /* write all entries and keep original attr*/ + for (i = 0; i < IA32E_NUM_ENTRIES; i++) { + MEM_WRITE64(sub_tab_addr + (i * IA32E_COMM_ENTRY_SIZE), + (attr | (pa + (i * next_page_size)))); + } + if (map_params->page_table_type == PT_EPT) { + /* Write the table entry to map this memory, + * SDM chapter28 figure 28-1 + * bit 0(R) bit1(W) bit2(X) bit3~5 MUST be reserved + * (here &0x07) + */ + MEM_WRITE64(entry.entry_base + entry.entry_off, + ((entry.entry_val & 0x07) | + ((uint64_t)sub_tab_addr))); + } else { + /* Write the table entry to map this memory, + * SDM chapter4 figure 4-11 + * bit0(P) bit1(RW) bit2(U/S) bit3(PWT) bit4(PCD) + * bit5(A) bit6(D or Ignore) + */ + MEM_WRITE64(entry.entry_base + entry.entry_off, + ((entry.entry_val & 0x7f) | + ((uint64_t)sub_tab_addr))); + } + } + + return next_page_size; +} + +static void modify_paging(struct map_params *map_params, void *paddr, + void *vaddr, uint64_t size, uint32_t flags, + enum mem_map_request_type request_type, bool direct) +{ + int64_t remaining_size; + uint64_t adjust_size; + uint64_t attr; + int status = 0; + struct entry_params entry; + uint64_t page_size; + uint64_t vaddr_end = ((uint64_t)vaddr) + size; + + /* if the address is not PAGE aligned, will drop + * the unaligned part + */ + paddr = (void *)ROUND_PAGE_UP((uint64_t)paddr); + vaddr = (void *)ROUND_PAGE_UP((uint64_t)vaddr); + vaddr_end = ROUND_PAGE_DOWN(vaddr_end); + remaining_size = vaddr_end - (uint64_t)vaddr; + + if ((request_type >= PAGING_REQUEST_TYPE_UNKNOWN) + || (map_params == NULL)) { + pr_err("%s: vaddr=0x%llx size=0x%llx req_type=0x%lx", + __func__, vaddr, size, request_type); + status = -EINVAL; + } + ASSERT(status == 0, "Incorrect Arguments"); + + attr = config_page_table_attr(map_params, flags); + /* Loop until the entire block of memory is appropriately + * MAP/UNMAP/MODIFY + */ + while (remaining_size > 0) { + obtain_last_page_table_entry(map_params, &entry, vaddr, direct); + /* filter the unmap request, no action in this case*/ + page_size = entry.page_size; + if ((request_type == PAGING_REQUEST_TYPE_UNMAP) + && (entry.entry_present == PT_NOT_PRESENT)) { + adjust_size = + page_size - ((uint64_t)(vaddr) % page_size); + vaddr += adjust_size; + paddr += adjust_size; + remaining_size -= adjust_size; + continue; + } + + /* if the address is NOT aligned of current page size, + * or required memory size < page size + * need to break page firstly + */ + if (entry.entry_present == PT_PRESENT) { + /* Maybe need to recursive breaking in this case + * e.g. 1GB->2MB->4KB + */ + while ((uint64_t)remaining_size < page_size + || (!MEM_ALIGNED_CHECK(vaddr, page_size)) + || (!MEM_ALIGNED_CHECK(paddr, page_size))) { + /* The breaking function return the page size + * of next level page table + */ + page_size = break_page_table(map_params, + paddr, vaddr, page_size, direct); + } + } else { + page_size = ((uint64_t)remaining_size < page_size) + ? ((uint64_t)remaining_size) : (page_size); + } + /* The function return the memory size that one entry can map */ + adjust_size = update_page_table_entry(map_params, paddr, vaddr, + page_size, attr, request_type, direct); + vaddr += adjust_size; + paddr += adjust_size; + remaining_size -= adjust_size; + } +} + +void map_mem(struct map_params *map_params, void *paddr, void *vaddr, + uint64_t size, uint32_t flags) +{ + /* used for MMU and EPT*/ + modify_paging(map_params, paddr, vaddr, size, flags, + PAGING_REQUEST_TYPE_MAP, true); + /* only for EPT */ + if (map_params->page_table_type == PT_EPT) { + modify_paging(map_params, vaddr, paddr, size, flags, + PAGING_REQUEST_TYPE_MAP, false); + } +} + +void unmap_mem(struct map_params *map_params, void *paddr, void *vaddr, + uint64_t size, uint32_t flags) +{ + /* used for MMU and EPT */ + modify_paging(map_params, paddr, vaddr, size, flags, + PAGING_REQUEST_TYPE_UNMAP, true); + /* only for EPT */ + if (map_params->page_table_type == PT_EPT) { + modify_paging(map_params, vaddr, paddr, size, flags, + PAGING_REQUEST_TYPE_UNMAP, false); + } +} + +void modify_mem(struct map_params *map_params, void *paddr, void *vaddr, + uint64_t size, uint32_t flags) +{ + /* used for MMU and EPT*/ + modify_paging(map_params, paddr, vaddr, size, flags, + PAGING_REQUEST_TYPE_MODIFY, true); + /* only for EPT */ + if (map_params->page_table_type == PT_EPT) { + modify_paging(map_params, vaddr, paddr, size, flags, + PAGING_REQUEST_TYPE_MODIFY, false); + } +} diff --git a/hypervisor/arch/x86/notify.c b/hypervisor/arch/x86/notify.c new file mode 100644 index 000000000..42d02bc9a --- /dev/null +++ b/hypervisor/arch/x86/notify.c @@ -0,0 +1,98 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +static struct dev_handler_node *notification_node; + +/* run in interrupt context */ +static int kick_notification(__unused int irq, __unused void *data) +{ + /* Notification vector does not require handling here, it's just used + * to kick taget cpu out of non-root mode. + */ + return 0; +} + +static int request_notification_irq(dev_handler_t func, void *data, + const char *name) +{ + int irq = -1; /* system allocate */ + struct dev_handler_node *node = NULL; + + if (notification_node != NULL) { + pr_info("%s, Notification vector already allocated on this CPU", + __func__); + return -EBUSY; + } + + /* all cpu register the same notification vector */ + node = pri_register_handler(irq, VECTOR_NOTIFY_VCPU, func, data, name); + if (node == NULL) { + pr_err("Failed to add notify isr"); + return -1; + } + update_irq_handler(dev_to_irq(node), quick_handler_nolock); + notification_node = node; + return 0; +} + +void setup_notification(void) +{ + int cpu; + char name[32] = {0}; + + cpu = get_cpu_id(); + if (cpu > 0) + return; + + /* support IPI notification, VM0 will register all CPU */ + snprintf(name, 32, "NOTIFY_ISR%d", cpu); + if (request_notification_irq(kick_notification, NULL, name) < 0) { + pr_err("Failed to setup notification"); + return; + } + + dev_dbg(ACRN_DBG_PTIRQ, "NOTIFY: irq[%d] setup vector %x", + dev_to_irq(notification_node), + dev_to_vector(notification_node)); +} + +void cleanup_notification(void) +{ + if (notification_node) + unregister_handler_common(notification_node); + notification_node = NULL; +} diff --git a/hypervisor/arch/x86/softirq.c b/hypervisor/arch/x86/softirq.c new file mode 100644 index 000000000..0f1e75606 --- /dev/null +++ b/hypervisor/arch/x86/softirq.c @@ -0,0 +1,117 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +static DEFINE_CPU_DATA(uint64_t, softirq_pending); + +void disable_softirq(int cpu_id) +{ + bitmap_clr(SOFTIRQ_ATOMIC, &per_cpu(softirq_pending, cpu_id)); +} + +void enable_softirq(int cpu_id) +{ + bitmap_set(SOFTIRQ_ATOMIC, &per_cpu(softirq_pending, cpu_id)); +} + +void init_softirq(void) +{ + int cpu_id; + + for (cpu_id = 0; cpu_id < phy_cpu_num; cpu_id++) { + per_cpu(softirq_pending, cpu_id) = 0; + bitmap_set(SOFTIRQ_ATOMIC, &per_cpu(softirq_pending, cpu_id)); + } +} + +void raise_softirq(int softirq_id) +{ + int cpu_id = get_cpu_id(); + uint64_t *bitmap = &per_cpu(softirq_pending, cpu_id); + + if (cpu_id >= phy_cpu_num) + return; + + bitmap_set(softirq_id, bitmap); +} + +void exec_softirq(void) +{ + int cpu_id = get_cpu_id(); + uint64_t *bitmap = &per_cpu(softirq_pending, cpu_id); + + uint64_t rflag; + int softirq_id; + + if (cpu_id >= phy_cpu_num) + return; + + /* Disable softirq + * SOFTIRQ_ATOMIC bit = 0 means softirq already in execution + */ + if (!bitmap_test_and_clear(SOFTIRQ_ATOMIC, bitmap)) + return; + + if (((*bitmap) & SOFTIRQ_MASK) == 0UL) + goto ENABLE_AND_EXIT; + + /* check if we are in interrupt context */ + CPU_RFLAGS_SAVE(&rflag); + if (!(rflag & (1<<9))) + goto ENABLE_AND_EXIT; + + while (1) { + softirq_id = bitmap_ffs(bitmap); + if ((softirq_id < 0) || (softirq_id >= SOFTIRQ_MAX)) + break; + + bitmap_clr(softirq_id, bitmap); + + switch (softirq_id) { + case SOFTIRQ_TIMER: + timer_softirq(cpu_id); + break; + case SOFTIRQ_DEV_ASSIGN: + ptdev_softirq(cpu_id); + break; + default: + break; + + } + } + +ENABLE_AND_EXIT: + enable_softirq(cpu_id); +} + diff --git a/hypervisor/arch/x86/timer.c b/hypervisor/arch/x86/timer.c new file mode 100644 index 000000000..733a82079 --- /dev/null +++ b/hypervisor/arch/x86/timer.c @@ -0,0 +1,561 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +#define MAX_TIMER_ACTIONS 32 + +struct timer_statistics { + struct { + uint64_t pickup_id; + uint64_t pickup_time; + uint64_t pickup_deadline; + uint64_t added_id; + uint64_t added_time; + uint64_t added_deadline; + } last; + uint64_t total_pickup_cnt; + uint64_t total_added_cnt; + uint64_t irq_cnt; + long pending_cnt; +}; + +struct timer { + timer_handle_t func; /* callback if time reached */ + uint64_t priv_data; /* func private data */ + uint64_t deadline; /* tsc deadline to interrupt */ + long handle; /* unique handle for user */ + int cpu_id; /* armed on which CPU */ + int id; /* timer ID, used by release */ + struct list_head node; /* link all timers */ +}; + +struct per_cpu_timers { + struct timer *timers_pool; /* it's timers pool for allocation */ + uint64_t free_bitmap; + struct list_head timer_list; /* it's for runtime active timer list */ + spinlock_t lock; + int cpu_id; + struct timer_statistics stat; +}; + +static DEFINE_CPU_DATA(struct per_cpu_timers, cpu_timers); + +#define TIMER_IRQ (NR_MAX_IRQS - 1) + +DEFINE_CPU_DATA(struct dev_handler_node *, timer_node); + +static struct timer* +find_expired_timer(struct per_cpu_timers *cpu_timer, uint64_t tsc_now); + +static struct timer *alloc_timer(int cpu_id) +{ + int idx; + struct per_cpu_timers *cpu_timer; + struct timer *timer; + + spinlock_rflags; + + cpu_timer = &per_cpu(cpu_timers, cpu_id); + spinlock_irqsave_obtain(&cpu_timer->lock); + idx = bitmap_ffs(&cpu_timer->free_bitmap); + if (idx < 0) { + spinlock_irqrestore_release(&cpu_timer->lock); + return NULL; + } + + bitmap_clr(idx, &cpu_timer->free_bitmap); + cpu_timer->stat.total_added_cnt++; + cpu_timer->stat.pending_cnt++; + + /* assign unique handle and never duplicate */ + timer = cpu_timer->timers_pool + idx; + timer->handle = cpu_timer->stat.total_added_cnt; + spinlock_irqrestore_release(&cpu_timer->lock); + + ASSERT((cpu_timer->timers_pool[cpu_id].cpu_id == cpu_id), + "timer cpu_id did not match"); + return timer; +} + +static void release_timer(struct timer *timer) +{ + struct per_cpu_timers *cpu_timer; + + spinlock_rflags; + + cpu_timer = &per_cpu(cpu_timers, timer->cpu_id); + timer->priv_data = 0; + timer->func = NULL; + timer->deadline = 0; + spinlock_irqsave_obtain(&cpu_timer->lock); + bitmap_set(timer->id, &cpu_timer->free_bitmap); + cpu_timer->stat.pending_cnt--; + spinlock_irqrestore_release(&cpu_timer->lock); +} + +static int get_target_cpu(void) +{ + /* we should search idle CPU to balance timer service */ + return get_cpu_id(); +} + +static struct timer* +find_expired_timer(struct per_cpu_timers *cpu_timer, uint64_t tsc_now) +{ + struct timer *timer; + struct list_head *pos; + + spinlock_rflags; + + spinlock_irqsave_obtain(&cpu_timer->lock); + list_for_each(pos, &cpu_timer->timer_list) { + timer = list_entry(pos, struct timer, node); + if (timer->deadline <= tsc_now) + goto UNLOCK; + } + timer = NULL; +UNLOCK: + spinlock_irqrestore_release(&cpu_timer->lock); + return timer; +} + +/* need lock protect outside */ +static struct timer* +_search_nearest_timer(struct per_cpu_timers *cpu_timer) +{ + struct timer *timer; + struct timer *target = NULL; + struct list_head *pos; + + list_for_each(pos, &cpu_timer->timer_list) { + timer = list_entry(pos, struct timer, node); + if (target == NULL) + target = timer; + else if (timer->deadline < target->deadline) + target = timer; + } + + return target; +} + +/* need lock protect outside */ +static struct timer* +_search_timer_by_handle(struct per_cpu_timers *cpu_timer, long handle) +{ + struct timer *timer; + struct list_head *pos; + + list_for_each(pos, &cpu_timer->timer_list) { + timer = list_entry(pos, struct timer, node); + if (timer->handle == handle) + goto FOUND; + } + timer = NULL; +FOUND: + return timer; +} + +static void +run_timer(struct per_cpu_timers *cpu_timer, struct timer *timer) +{ + spinlock_rflags; + + /* remove from list first */ + spinlock_irqsave_obtain(&cpu_timer->lock); + list_del(&timer->node); + spinlock_irqrestore_release(&cpu_timer->lock); + + /* deadline = 0 means stop timer, we should skip */ + if (timer->func && timer->deadline != 0UL) + timer->func(timer->priv_data); + + cpu_timer->stat.last.pickup_id = timer->id; + cpu_timer->stat.last.pickup_deadline = timer->deadline; + cpu_timer->stat.last.pickup_time = rdtsc(); + cpu_timer->stat.total_pickup_cnt++; + + TRACE_4I(TRACE_TIMER_ACTION_PCKUP, timer->id, timer->deadline, + timer->deadline >> 32, cpu_timer->stat.total_pickup_cnt); +} + +/* run in interrupt context */ +static int tsc_deadline_handler(__unused int irq, __unused void *data) +{ + raise_softirq(SOFTIRQ_TIMER); + return 0; +} + +static inline void schedule_next_timer(int cpu) +{ + struct timer *timer; + struct per_cpu_timers *cpu_timer = &per_cpu(cpu_timers, cpu); + + spinlock_rflags; + + spinlock_irqsave_obtain(&cpu_timer->lock); + timer = _search_nearest_timer(cpu_timer); + if (timer) { + /* it is okay to program a expired time */ + msr_write(MSR_IA32_TSC_DEADLINE, timer->deadline); + } + spinlock_irqrestore_release(&cpu_timer->lock); +} + +int request_timer_irq(int cpu, dev_handler_t func, void *data, const char *name) +{ + struct dev_handler_node *node = NULL; + + if (cpu >= phy_cpu_num) + return -1; + + if (per_cpu(timer_node, cpu)) { + pr_err("CPU%d timer isr already added", cpu); + unregister_handler_common(per_cpu(timer_node, cpu)); + } + + node = pri_register_handler(TIMER_IRQ, VECTOR_TIMER, func, data, name); + if (node != NULL) { + per_cpu(timer_node, cpu) = node; + update_irq_handler(TIMER_IRQ, quick_handler_nolock); + } else { + pr_err("Failed to add timer isr"); + return -1; + } + + return 0; +} + +/*TODO: init in separate cpu */ +static void init_timer_pool(void) +{ + int i, j; + struct per_cpu_timers *cpu_timer; + struct timer *timers_pool; + + /* Make sure only init one time*/ + if (get_cpu_id() > 0) + return; + + for (i = 0; i < phy_cpu_num; i++) { + cpu_timer = &per_cpu(cpu_timers, i); + cpu_timer->cpu_id = i; + timers_pool = + calloc(MAX_TIMER_ACTIONS, sizeof(struct timer)); + ASSERT(timers_pool, "Create timers pool failed"); + + cpu_timer->timers_pool = timers_pool; + cpu_timer->free_bitmap = (1UL<timer_list); + spinlock_init(&cpu_timer->lock); + for (j = 0; j < MAX_TIMER_ACTIONS; j++) { + timers_pool[j].id = j; + timers_pool[j].cpu_id = i; + timers_pool[j].priv_data = 0; + timers_pool[j].func = NULL; + timers_pool[j].deadline = 0; + timers_pool[j].handle = -1UL; + } + } +} + +static void init_tsc_deadline_timer(void) +{ + uint32_t val; + + val = VECTOR_TIMER; + val |= 0x40000; /* TSC deadline and unmask */ + mmio_write_long(val, LAPIC_BASE + LAPIC_LVT_TIMER_REGISTER); + asm volatile("mfence" : : : "memory"); + /* disarm timer */ + msr_write(MSR_IA32_TSC_DEADLINE, 0UL); +} + +void timer_init(void) +{ + char name[32] = {0}; + int cpu = get_cpu_id(); + + snprintf(name, 32, "timer_tick[%d]", cpu); + if (request_timer_irq(cpu, tsc_deadline_handler, NULL, name) < 0) { + pr_err("Timer setup failed"); + return; + } + + init_tsc_deadline_timer(); + init_timer_pool(); +} + +void timer_cleanup(void) +{ + int cpu = get_cpu_id(); + + if (per_cpu(timer_node, cpu)) + unregister_handler_common(per_cpu(timer_node, cpu)); + + per_cpu(timer_node, cpu) = NULL; +} + +int timer_softirq(int cpu_id) +{ + struct per_cpu_timers *cpu_timer; + struct timer *timer; + int max = MAX_TIMER_ACTIONS; + + /* handle passed timer */ + cpu_timer = &per_cpu(cpu_timers, cpu_id); + cpu_timer->stat.irq_cnt++; + + /* This is to make sure we are not blocked due to delay inside func() + * force to exit irq handler after we serviced >31 timers + * caller used to add_timer() in timer->func(), if there is a delay + * inside func(), it will infinitely loop here, because new added timer + * already passed due to previously func()'s delay. + */ + timer = find_expired_timer(cpu_timer, rdtsc()); + while (timer && --max > 0) { + run_timer(cpu_timer, timer); + /* put back to timer pool */ + release_timer(timer); + /* search next one */ + timer = find_expired_timer(cpu_timer, rdtsc()); + } + + /* update nearest timer */ + schedule_next_timer(cpu_id); + return 0; +} + +/* + * add_timer is okay to add passed timer but not 0 + * return: handle, this handle is unique and can be used to find back + * this added timer. handle will be invalid after timer expired + */ +long add_timer(timer_handle_t func, uint64_t data, uint64_t deadline) +{ + struct timer *timer; + struct per_cpu_timers *cpu_timer; + int cpu_id = get_target_cpu(); + + spinlock_rflags; + + if (deadline == 0 || func == NULL) + return -1; + + /* possible interrupt context please avoid mem alloct here*/ + timer = alloc_timer(cpu_id); + if (timer == NULL) + return -1; + + timer->func = func; + timer->priv_data = data; + timer->deadline = deadline; + timer->cpu_id = get_target_cpu(); + + cpu_timer = &per_cpu(cpu_timers, timer->cpu_id); + + /* We need irqsave here even softirq enabled to protect timer_list */ + spinlock_irqsave_obtain(&cpu_timer->lock); + list_add_tail(&timer->node, &cpu_timer->timer_list); + cpu_timer->stat.last.added_id = timer->id; + cpu_timer->stat.last.added_time = rdtsc(); + cpu_timer->stat.last.added_deadline = timer->deadline; + spinlock_irqrestore_release(&cpu_timer->lock); + TRACE_4I(TRACE_TIMER_ACTION_ADDED, timer->id, timer->deadline, + timer->deadline >> 32, cpu_timer->stat.total_added_cnt); + + schedule_next_timer(cpu_id); + return timer->handle; +} + +/* + * update_timer existing timer. if not found, add new timer + */ +long +update_timer(long handle, timer_handle_t func, uint64_t data, + uint64_t deadline) +{ + struct timer *timer; + struct per_cpu_timers *cpu_timer; + int cpu_id = get_target_cpu(); + + spinlock_rflags; + bool ret = false; + + if (deadline == 0) + return -1; + + cpu_timer = &per_cpu(cpu_timers, cpu_id); + spinlock_irqsave_obtain(&cpu_timer->lock); + timer = _search_timer_by_handle(cpu_timer, handle); + if (timer) { + /* update deadline and re-sort */ + timer->deadline = deadline; + timer->func = func; + timer->priv_data = data; + TRACE_4I(TRACE_TIMER_ACTION_UPDAT, timer->id, + timer->deadline, timer->deadline >> 32, + cpu_timer->stat.total_added_cnt); + ret = true; + } + spinlock_irqrestore_release(&cpu_timer->lock); + + if (ret) + schedule_next_timer(cpu_id); + else { + /* if update failed, we add to new, and update handle */ + /* TODO: the correct behavior should be return failure here */ + handle = add_timer(func, data, deadline); + } + + return handle; +} + +/* NOTE: cpu_id referred to physical cpu id here */ +bool cancel_timer(long handle, int cpu_id) +{ + struct timer *timer; + struct per_cpu_timers *cpu_timer; + + spinlock_rflags; + bool ret = false; + + cpu_timer = &per_cpu(cpu_timers, cpu_id); + spinlock_irqsave_obtain(&cpu_timer->lock); + timer = _search_timer_by_handle(cpu_timer, handle); + if (timer) { + /* NOTE: we can not directly release timer here. + * Instead we set deadline to expired and clear func. + * This timer will be reclaim next timer + */ + timer->deadline = 0; + timer->func = NULL; + ret = true; + } + spinlock_irqrestore_release(&cpu_timer->lock); + return ret; +} + +void dump_timer_pool_info(int cpu_id) +{ + struct per_cpu_timers *cpu_timer = + &per_cpu(cpu_timers, cpu_id); + struct list_head *pos; + int cn = 0; + + spinlock_rflags; + + if (cpu_id >= phy_cpu_num) + return; + + pr_info("Timer%d statistics: Pending: %d\n\t" + "total_pickup: %lld total_added: %lld total_irq: %lld", + cpu_id, + cpu_timer->stat.pending_cnt, + cpu_timer->stat.total_pickup_cnt, + cpu_timer->stat.total_added_cnt, + cpu_timer->stat.irq_cnt); + + pr_info("LAST pickup[%d] time: 0x%llx deadline: 0x%llx", + cpu_timer->stat.last.pickup_id, + cpu_timer->stat.last.pickup_time, + cpu_timer->stat.last.pickup_deadline); + + pr_info("LAST added[%d] time: 0x%llx deadline: 0x%llx", + cpu_timer->stat.last.added_id, + cpu_timer->stat.last.added_time, + cpu_timer->stat.last.added_deadline); + + spinlock_irqsave_obtain(&cpu_timer->lock); + list_for_each(pos, &cpu_timer->timer_list) { + cn++; + pr_info("-->pending: %d trigger: 0x%llx", cn, + list_entry(pos, struct timer, node)->deadline); + } + spinlock_irqrestore_release(&cpu_timer->lock); +} + +void check_tsc(void) +{ + uint64_t temp64; + + /* Ensure time-stamp timer is turned on for each CPU */ + CPU_CR_READ(cr4, &temp64); + CPU_CR_WRITE(cr4, (temp64 & ~CR4_TSD)); +} + +uint64_t tsc_cycles_in_period(uint16_t timer_period_in_us) +{ + uint16_t initial_pit; + uint16_t current_pit; + uint32_t current_tsc; +#define PIT_TARGET 0x3FFF + + if (timer_period_in_us < 1000) + pr_warn("Bad timer_period_in_us: %d\n", + timer_period_in_us); + + /* Assume the 8254 delivers 18.2 ticks per second when 16 bits fully + * wrap. This is about 1.193MHz or a clock period of 0.8384uSec + */ + initial_pit = (uint16_t)(timer_period_in_us*1193000UL/1000000); + initial_pit += PIT_TARGET; + + /* Port 0x43 ==> Control word write; Data 0x30 ==> Select Counter 0, + * Read/Write least significant byte first, mode 0, 16 bits. + */ + + io_write_byte(0x30, 0x43); + io_write_byte(initial_pit & 0x00ff, 0x40); /* Write LSB */ + io_write_byte(initial_pit >> 8, 0x40); /* Write MSB */ + + current_tsc = rdtsc(); + + do { + /* Port 0x43 ==> Control word write; 0x00 ==> Select + * Counter 0, Counter Latch Command, Mode 0; 16 bits + */ + io_write_byte(0x00, 0x43); + + current_pit = io_read_byte(0x40); /* Read LSB */ + current_pit |= io_read_byte(0x40) << 8; /* Read MSB */ + /* Let the counter count down to PIT_TARGET */ + } while (current_pit > PIT_TARGET); + + current_tsc = rdtsc() - current_tsc; + + return (uint64_t) current_tsc; +} + diff --git a/hypervisor/arch/x86/vmexit.c b/hypervisor/arch/x86/vmexit.c new file mode 100644 index 000000000..3b1b5362a --- /dev/null +++ b/hypervisor/arch/x86/vmexit.c @@ -0,0 +1,494 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +static int rdtscp_handler(struct vcpu *vcpu); +static int unhandled_vmexit_handler(struct vcpu *vcpu); +static int rdtsc_handler(struct vcpu *vcpu); +/* VM Dispatch table for Exit condition handling */ +static const struct vm_exit_dispatch dispatch_table[] = { + [VMX_EXIT_REASON_EXCEPTION_OR_NMI] = { + .handler = exception_handler}, + [VMX_EXIT_REASON_EXTERNAL_INTERRUPT] = { + .handler = external_interrupt_handler}, + [VMX_EXIT_REASON_TRIPLE_FAULT] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_INIT_SIGNAL] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_STARTUP_IPI] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_IO_SMI] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_OTHER_SMI] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_INTERRUPT_WINDOW] = { + .handler = interrupt_win_exiting_handler}, + [VMX_EXIT_REASON_NMI_WINDOW] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_TASK_SWITCH] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_CPUID] = { + .handler = cpuid_handler}, + [VMX_EXIT_REASON_GETSEC] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_HLT] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_INVD] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_INVLPG] = { + .handler = unhandled_vmexit_handler,}, + [VMX_EXIT_REASON_RDPMC] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_RDTSC] = { + .handler = rdtsc_handler}, + [VMX_EXIT_REASON_RSM] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_VMCALL] = { + .handler = vmcall_handler}, + [VMX_EXIT_REASON_VMCLEAR] { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_VMLAUNCH] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_VMPTRLD] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_VMPTRST] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_VMREAD] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_VMRESUME] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_VMWRITE] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_VMXOFF] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_VMXON] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_CR_ACCESS] = { + .handler = cr_access_handler, + .need_exit_qualification = 1}, + [VMX_EXIT_REASON_DR_ACCESS] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_IO_INSTRUCTION] = { + .handler = io_instr_handler, + .need_exit_qualification = 1}, + [VMX_EXIT_REASON_RDMSR] = { + .handler = rdmsr_handler}, + [VMX_EXIT_REASON_WRMSR] = { + .handler = wrmsr_handler}, + [VMX_EXIT_REASON_ENTRY_FAILURE_INVALID_GUEST_STATE] = { + .handler = unhandled_vmexit_handler, + .need_exit_qualification = 1}, + [VMX_EXIT_REASON_ENTRY_FAILURE_MSR_LOADING] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_MWAIT] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_MONITOR_TRAP] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_MONITOR] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_PAUSE] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_ENTRY_FAILURE_MACHINE_CHECK] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_TPR_BELOW_THRESHOLD] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_APIC_ACCESS] = { + .handler = apicv_access_exit_handler}, + [VMX_EXIT_REASON_VIRTUALIZED_EOI] = { + .handler = apicv_virtualized_eoi_exit_handler}, + [VMX_EXIT_REASON_GDTR_IDTR_ACCESS] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_LDTR_TR_ACCESS] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_EPT_VIOLATION] = { + .handler = ept_violation_handler, + .need_exit_qualification = 1}, + [VMX_EXIT_REASON_EPT_MISCONFIGURATION] = { + .handler = ept_misconfig_handler, + .need_exit_qualification = 1}, + [VMX_EXIT_REASON_INVEPT] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_RDTSCP] = { + .handler = rdtscp_handler}, + [VMX_EXIT_REASON_VMX_PREEMPTION_TIMER_EXPIRED] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_INVVPID] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_WBINVD] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_XSETBV] = { + .handler = unhandled_vmexit_handler}, + [VMX_EXIT_REASON_APIC_WRITE] = { + .handler = apicv_write_exit_handler} +}; + +struct vm_exit_dispatch *vmexit_handler(struct vcpu *vcpu) +{ + struct vm_exit_dispatch *dispatch = HV_NULL; + uint16_t basic_exit_reason; + + /* Obtain interrupt info */ + vcpu->arch_vcpu.exit_interrupt_info = + exec_vmread(VMX_IDT_VEC_INFO_FIELD); + + /* Calculate basic exit reason (low 16-bits) */ + basic_exit_reason = vcpu->arch_vcpu.exit_reason & 0xFFFF; + + /* Log details for exit */ + pr_dbg("Exit Reason: 0x%016llx ", vcpu->arch_vcpu.exit_reason); + + /* Ensure exit reason is within dispatch table */ + if (basic_exit_reason < ARRAY_SIZE(dispatch_table)) { + /* Calculate dispatch table entry */ + dispatch = (struct vm_exit_dispatch *) + (dispatch_table + basic_exit_reason); + + /* See if an exit qualification is necessary for this exit + * handler + */ + if (dispatch->need_exit_qualification) { + /* Get exit qualification */ + vcpu->arch_vcpu.exit_qualification = + exec_vmread(VMX_EXIT_QUALIFICATION); + } + } + + /* Update current vcpu in VM that caused vm exit */ + vcpu->vm->current_vcpu = vcpu; + + /* Return pointer to exit dispatch entry */ + return dispatch; +} + +static int unhandled_vmexit_handler(__unused struct vcpu *vcpu) +{ + pr_fatal("Error: Unhandled VM exit condition from guest at 0x%016llx ", + exec_vmread(VMX_GUEST_RIP)); + + pr_fatal("Exit Reason: 0x%016llx ", vcpu->arch_vcpu.exit_reason); + + pr_err("Exit qualification: 0x%016llx ", + exec_vmread(VMX_EXIT_QUALIFICATION)); + + /* while(1); */ + + TRACE_2L(TRC_VMEXIT_UNHANDLED, vcpu->arch_vcpu.exit_reason, 0); + + return 0; +} + +static int write_cr0(struct vcpu *vcpu, uint64_t value) +{ + uint32_t value32; + uint64_t value64; + + pr_dbg("VMM: Guest trying to write 0x%08x to CR0", value); + + /* Read host mask value */ + value64 = exec_vmread(VMX_CR0_MASK); + + /* Clear all bits being written by guest that are owned by host */ + value &= ~value64; + + /* Update CR0 in guest state */ + vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr0 |= value; + exec_vmwrite(VMX_GUEST_CR0, + vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr0); + pr_dbg("VMM: Guest allowed to write 0x%08x to CR0", + vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr0); + + /* If guest is trying to transition vcpu from unpaged real mode to page + * protected mode make necessary changes to VMCS structure to reflect + * transition from real mode to paged-protected mode + */ + if (!is_vcpu_bsp(vcpu) && + (vcpu->arch_vcpu.cpu_mode == REAL_MODE) && + (value & CR0_PG) && (value & CR0_PE)) { + /* Enable protected mode */ + value32 = exec_vmread(VMX_ENTRY_CONTROLS); + value32 |= (VMX_ENTRY_CTLS_IA32E_MODE | + VMX_ENTRY_CTLS_LOAD_PAT | + VMX_ENTRY_CTLS_LOAD_EFER); + exec_vmwrite(VMX_ENTRY_CONTROLS, value32); + pr_dbg("VMX_ENTRY_CONTROLS: 0x%x ", value32); + + /* Disable unrestricted mode */ + value32 = exec_vmread(VMX_PROC_VM_EXEC_CONTROLS2); + value32 |= (VMX_PROCBASED_CTLS2_EPT | + VMX_PROCBASED_CTLS2_RDTSCP); + exec_vmwrite(VMX_PROC_VM_EXEC_CONTROLS2, value32); + pr_dbg("VMX_PROC_VM_EXEC_CONTROLS2: 0x%x ", value32); + + /* Set up EFER */ + value64 = exec_vmread64(VMX_GUEST_IA32_EFER_FULL); + value64 |= (MSR_IA32_EFER_SCE_BIT | + MSR_IA32_EFER_LME_BIT | + MSR_IA32_EFER_LMA_BIT | MSR_IA32_EFER_NXE_BIT); + exec_vmwrite64(VMX_GUEST_IA32_EFER_FULL, value64); + pr_dbg("VMX_GUEST_IA32_EFER: 0x%016llx ", value64); + } + + return 0; +} + +static int write_cr3(struct vcpu *vcpu, uint64_t value) +{ + /* Write to guest's CR3 */ + vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr3 = value; + + /* Commit new value to VMCS */ + exec_vmwrite(VMX_GUEST_CR3, + vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr3); + + return 0; +} + +static int write_cr4(struct vcpu *vcpu, uint64_t value) +{ + uint64_t temp64; + + pr_dbg("VMM: Guest trying to write 0x%08x to CR4", value); + + /* Read host mask value */ + temp64 = exec_vmread(VMX_CR4_MASK); + + /* Clear all bits being written by guest that are owned by host */ + value &= ~temp64; + + /* Write updated CR4 (bitwise OR of allowed guest bits and CR4 host + * value) + */ + vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr4 |= value; + exec_vmwrite(VMX_GUEST_CR4, + vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr4); + pr_dbg("VMM: Guest allowed to write 0x%08x to CR4", + vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr4); + + return 0; +} + +static int read_cr3(struct vcpu *vcpu, uint64_t *value) +{ + *value = vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr3; + + pr_dbg("VMM: reading 0x%08x from CR3", *value); + + return 0; +} + +int cpuid_handler(struct vcpu *vcpu) +{ + struct run_context *cur_context = + &vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context]; + + emulate_cpuid(vcpu, (uint32_t)cur_context->guest_cpu_regs.regs.rax, + (uint32_t *)&cur_context->guest_cpu_regs.regs.rax, + (uint32_t *)&cur_context->guest_cpu_regs.regs.rbx, + (uint32_t *)&cur_context->guest_cpu_regs.regs.rcx, + (uint32_t *)&cur_context->guest_cpu_regs.regs.rdx); + + TRACE_2L(TRC_VMEXIT_CPUID, vcpu->vcpu_id, 0); + + return 0; +} + +int cr_access_handler(struct vcpu *vcpu) +{ + uint64_t *regptr; + struct run_context *cur_context = + &vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context]; + static const int reg_trans_tab[] = { + [0] = VMX_MACHINE_T_GUEST_RAX_INDEX, + [1] = VMX_MACHINE_T_GUEST_RCX_INDEX, + [2] = VMX_MACHINE_T_GUEST_RDX_INDEX, + [3] = VMX_MACHINE_T_GUEST_RBX_INDEX, + [4] = 0xFF, /* for sp reg, should not be used, just for init */ + [5] = VMX_MACHINE_T_GUEST_RBP_INDEX, + [6] = VMX_MACHINE_T_GUEST_RSI_INDEX, + [7] = VMX_MACHINE_T_GUEST_RDI_INDEX, + [8] = VMX_MACHINE_T_GUEST_R8_INDEX, + [9] = VMX_MACHINE_T_GUEST_R9_INDEX, + [10] = VMX_MACHINE_T_GUEST_R10_INDEX, + [11] = VMX_MACHINE_T_GUEST_R11_INDEX, + [12] = VMX_MACHINE_T_GUEST_R12_INDEX, + [13] = VMX_MACHINE_T_GUEST_R13_INDEX, + [14] = VMX_MACHINE_T_GUEST_R14_INDEX, + [15] = VMX_MACHINE_T_GUEST_R15_INDEX + }; + int idx = VM_EXIT_CR_ACCESS_REG_IDX(vcpu->arch_vcpu.exit_qualification); + + ASSERT(idx != 4, "index should not be 4 (target SP)"); + regptr = cur_context->guest_cpu_regs.longs + reg_trans_tab[idx]; + + switch ((VM_EXIT_CR_ACCESS_ACCESS_TYPE + (vcpu->arch_vcpu.exit_qualification) << 4) | + VM_EXIT_CR_ACCESS_CR_NUM(vcpu->arch_vcpu.exit_qualification)) { + case 0x00: + /* mov to cr0 */ + write_cr0(vcpu, *regptr); + break; + + case 0x03: + /* mov to cr3 */ + write_cr3(vcpu, *regptr); + break; + + case 0x04: + /* mov to cr4 */ + write_cr4(vcpu, *regptr); + break; + + case 0x13: + /* mov from cr3 */ + read_cr3(vcpu, regptr); + break; +#if 0 + case 0x14: + /* mov from cr4 (this should not happen) */ + case 0x10: + /* mov from cr0 (this should not happen) */ +#endif + case 0x08: + /* mov to cr8 */ + vlapic_set_cr8(vcpu->arch_vcpu.vlapic, *regptr); + break; + case 0x18: + /* mov from cr8 */ + *regptr = vlapic_get_cr8(vcpu->arch_vcpu.vlapic); + break; + default: + panic("Unhandled CR access"); + return -EINVAL; + } + + TRACE_2L(TRC_VMEXIT_CR_ACCESS, + VM_EXIT_CR_ACCESS_ACCESS_TYPE + (vcpu->arch_vcpu.exit_qualification), + VM_EXIT_CR_ACCESS_CR_NUM + (vcpu->arch_vcpu.exit_qualification)); + + return 0; +} + +#if 0 +/* + * VMX_PROCBASED_CTLS_INVLPG is not enabled in the VM-execution + * control therefore we don't need it's handler. + * + * INVLPG: this instruction Invalidates any translation lookaside buffer + */ +int invlpg_handler(__unused struct vcpu *vcpu) +{ + pr_fatal("INVLPG executed"); + + return 0; +} + +/* + * XSETBV instruction set's the XCR0 that is used to tell for which components + * states can be saved on a context switch using xsave. + * + * We don't handle this right now because we are on a platform that does not + * support XSAVE/XRSTORE feature as reflected by the instruction CPUID. + * + * to make sure this never get called until we support it we can prevent the + * reading of this bit in CPUID VMEXIT. + * + * Linux checks this in CPUID: cpufeature.h: #define cpu_has_xsave + */ +static int xsetbv_instr_handler(__unused struct vcpu *vcpu) +{ + ASSERT("Not Supported" == 0, "XSETBV executed"); + + return 0; +} +#endif + +static int rdtsc_handler(struct vcpu *vcpu) +{ + uint64_t host_tsc, guest_tsc, tsc_offset; + uint32_t id; + struct run_context *cur_context = + &vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context]; + + /* Read the host TSC value */ + CPU_RDTSCP_EXECUTE(&host_tsc, &id); + + /* Get the guest TSC offset value from VMCS */ + tsc_offset = + exec_vmread64(VMX_TSC_OFFSET_FULL); + + /* Update the guest TSC value by following: TSC_guest = TSC_host + + * TSC_guest_Offset + */ + guest_tsc = host_tsc + tsc_offset; + + /* Return the TSC_guest in rax:rdx */ + cur_context->guest_cpu_regs.regs.rax = (uint32_t) guest_tsc; + cur_context->guest_cpu_regs.regs.rdx = (uint32_t) (guest_tsc >> 32); + + TRACE_2L(TRC_VMEXIT_RDTSC, host_tsc, tsc_offset); + + return 0; +} + +static int rdtscp_handler(struct vcpu *vcpu) +{ + uint64_t host_tsc, guest_tsc, tsc_offset; + uint32_t id; + struct run_context *cur_context = + &vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context]; + + /* Read the host TSC value */ + CPU_RDTSCP_EXECUTE(&host_tsc, &id); + + /* Get the guest TSC offset value from VMCS */ + tsc_offset = + exec_vmread64(VMX_TSC_OFFSET_FULL); + + /* Update the guest TSC value by following: * TSC_guest = TSC_host + + * TSC_guest_Offset + */ + guest_tsc = host_tsc + tsc_offset; + + /* Return the TSC_guest in rax:rdx and IA32_TSC_AUX in rcx */ + cur_context->guest_cpu_regs.regs.rax = (uint32_t) guest_tsc; + cur_context->guest_cpu_regs.regs.rdx = (uint32_t) (guest_tsc >> 32); + cur_context->guest_cpu_regs.regs.rcx = vcpu->arch_vcpu.msr_tsc_aux; + + TRACE_2L(TRC_VMEXIT_RDTSCP, guest_tsc, vcpu->arch_vcpu.msr_tsc_aux); + + return 0; +} diff --git a/hypervisor/arch/x86/vmx.c b/hypervisor/arch/x86/vmx.c new file mode 100644 index 000000000..f4ca10f71 --- /dev/null +++ b/hypervisor/arch/x86/vmx.c @@ -0,0 +1,1346 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#ifdef CONFIG_EFI_STUB +#include +extern struct efi_ctx* efi_ctx; +extern int efi_launch_vector; +#endif + +#define PAT_POWER_ON_VALUE (PAT_MEM_TYPE_WB + \ + ((uint64_t)PAT_MEM_TYPE_WT << 8) + \ + ((uint64_t)PAT_MEM_TYPE_UCM << 16) + \ + ((uint64_t)PAT_MEM_TYPE_UC << 24) + \ + ((uint64_t)PAT_MEM_TYPE_WB << 32) + \ + ((uint64_t)PAT_MEM_TYPE_WT << 40) + \ + ((uint64_t)PAT_MEM_TYPE_UCM << 48) + \ + ((uint64_t)PAT_MEM_TYPE_UC << 56)) + +static inline int exec_vmxon(void *addr) +{ + uint64_t rflags; + uint64_t tmp64; + int status = 0; + + if (addr == NULL) + status = -EINVAL; + ASSERT(status == 0, "Incorrect arguments"); + + /* Read Feature ControL MSR */ + tmp64 = msr_read(MSR_IA32_FEATURE_CONTROL); + + /* Determine if feature control is locked */ + if (tmp64 & MSR_IA32_FEATURE_CONTROL_LOCK) { + /* See if VMX enabled */ + if (!(tmp64 & MSR_IA32_FEATURE_CONTROL_VMX_NO_SMX)) { + /* Return error - VMX can't be enabled */ + status = -EINVAL; + } + } else { + /* Lock and enable VMX support */ + tmp64 |= (MSR_IA32_FEATURE_CONTROL_LOCK | + MSR_IA32_FEATURE_CONTROL_VMX_NO_SMX); + msr_write(MSR_IA32_FEATURE_CONTROL, tmp64); + } + + /* Ensure previous operations successful */ + if (status == 0) { + /* Turn VMX on */ + asm volatile ("mov %1, %%rax\n" + "vmxon (%%rax)\n" + "pushfq\n" + "pop %0\n":"=r" (rflags) + : "r"(addr) + : "%rax"); + + /* if carry and zero flags are clear operation success */ + if (rflags & (RFLAGS_C | RFLAGS_Z)) + status = -EINVAL; + } + + /* Return result to caller */ + return status; +} + +int check_vmx_support(void) +{ + uint32_t eax, ebx, ecx, edx; + int ret_val = 0; + uint64_t tmp64; + + /* Run CPUID to determine if VTX support available */ + cpuid(CPUID_FEATURES, &eax, &ebx, &ecx, &edx); + + /* See if VMX feature bit is set in ECX */ + if (!(ecx & CPUID_ECX_VMX)) { + /* Log and return error */ + pr_fatal("VMX not supported by CPU"); + ret_val = -EINVAL; + } else { + /* Read feature control MSR */ + tmp64 = msr_read(MSR_IA32_FEATURE_CONTROL); + + /* See if feature control MSR is locked and VMX not enabled + * appropriately + */ + if ((tmp64 & MSR_IA32_FEATURE_CONTROL_LOCK) && + (!(tmp64 & MSR_IA32_FEATURE_CONTROL_VMX_NO_SMX))) { + /* Log and return error */ + pr_fatal("MSR_IA32_FEATURE_CONTROL: Lock bit is on and VMXON bit is off"); + pr_fatal(" Cannot do vmxon"); + ret_val = -EINVAL; + } + } + + /* Return status to caller */ + return ret_val; +} + +int exec_vmxon_instr(void) +{ + uint64_t tmp64; + uint32_t tmp32; + int ret_val = -EINVAL; + void *vmxon_region; + + /* Allocate page aligned memory for VMXON region */ + vmxon_region = alloc_page(); + + if (vmxon_region != 0) { + /* Initialize vmxon page with revision id from IA32 VMX BASIC + * MSR + */ + tmp32 = msr_read(MSR_IA32_VMX_BASIC); + memcpy_s((uint32_t *) vmxon_region, 4, &tmp32, 4); + + /* Turn on CR0.NE and CR4.VMXE */ + CPU_CR_READ(cr0, &tmp64); + CPU_CR_WRITE(cr0, tmp64 | CR0_NE); + CPU_CR_READ(cr4, &tmp64); + CPU_CR_WRITE(cr4, tmp64 | CR4_VMXE); + + /* Turn ON VMX */ + ret_val = exec_vmxon(&vmxon_region); + } + + return ret_val; +} + +int exec_vmclear(void *addr) +{ + uint64_t rflags; + int status = 0; + + if (addr == NULL) + status = -EINVAL; + ASSERT(status == 0, "Incorrect arguments"); + + asm volatile ( + "mov %1, %%rax\n" + "vmclear (%%rax)\n" + "pushfq\n" + "pop %0\n":"=r" (rflags) + : "r"(addr) + : "%rax"); + + /* if carry and zero flags are clear operation success */ + if (rflags & (RFLAGS_C | RFLAGS_Z)) + status = -EINVAL; + + return status; +} + +int exec_vmptrld(void *addr) +{ + uint64_t rflags; + int status = 0; + + if (addr == NULL) + status = -EINVAL; + ASSERT(status == 0, "Incorrect arguments"); + + asm volatile ( + "mov %1, %%rax\n" + "vmptrld (%%rax)\n" + "pushfq\n" + "pop %0\n" + : "=r" (rflags) + : "r"(addr) + : "%rax"); + + /* if carry and zero flags are clear operation success */ + if (rflags & (RFLAGS_C | RFLAGS_Z)) + status = -EINVAL; + + return status; +} + +uint64_t exec_vmread(uint32_t field) +{ + uint64_t value; + + asm volatile ( + "vmread %%rdx, %%rax " + : "=a" (value) + : "d"(field) + : "cc"); + + return value; +} + +uint64_t exec_vmread64(uint32_t field_full) +{ + uint64_t low; + + low = exec_vmread(field_full); + +#ifdef __i386__ + low += exec_vmread(field_full + 1) << 32; +#endif + return low; +} + +void exec_vmwrite(uint32_t field, uint64_t value) +{ + asm volatile ( + "vmwrite %%rax, %%rdx " + : : "a" (value), "d"(field) + : "cc"); +} + +void exec_vmwrite64(unsigned int field_full, uint64_t value) +{ +#ifdef __i386__ + int low = (int)(value & 0xFFFFFFFF); + int high = (int)((value >> 32) & 0xFFFFFFFF); + + exec_vmwrite(field_full, low); + exec_vmwrite(field_full + 1, high); +#else + exec_vmwrite(field_full, value); +#endif +} + +#define HV_ARCH_VMX_GET_CS(SEL) \ +{ \ + asm volatile ("movw %%cs, %%ax" : "=a"(sel)); \ +} + +uint32_t get_cs_access_rights(void) +{ + uint32_t usable_ar; + uint16_t sel_value; + + asm volatile ("movw %%cs, %%ax" : "=a" (sel_value)); + asm volatile ("lar %%eax, %%eax" : "=a" (usable_ar) : "a"(sel_value)); + usable_ar = usable_ar >> 8; + usable_ar &= 0xf0ff; /* clear bits 11:8 */ + + return usable_ar; +} + +static void init_guest_state(struct vcpu *vcpu) +{ + uint64_t field; + uint64_t value; + uint32_t value32; + uint64_t value64; + uint16_t sel; + uint32_t limit, access, base; + uint32_t ldt_idx = 0x38; + int es = 0, ss = 0, ds = 0, fs = 0, gs = 0, data32_idx; + uint32_t lssd32_idx = 0x70; + struct vm *vm = vcpu->vm; + struct run_context *cur_context = + &vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context]; + + pr_dbg("*********************"); + pr_dbg("Initialize guest state"); + pr_dbg("*********************"); + + /*************************************************/ + /* Set up CRx */ + /*************************************************/ + pr_dbg("Natural-width********"); + + /* Setup guest control register values */ + /* Set up guest CRO field */ + if (get_vcpu_mode(vcpu) == REAL_MODE) { + /*cur_context->cr0 = (CR0_CD | CR0_NW | CR0_ET | CR0_NE);*/ + cur_context->cr0 = CR0_ET | CR0_NE; + cur_context->cr3 = 0; + cur_context->cr4 = CR4_VMXE; + } else if (get_vcpu_mode(vcpu) == PAGE_PROTECTED_MODE) { + cur_context->cr0 = ((uint64_t)CR0_PG | CR0_PE | CR0_NE); + cur_context->cr4 = ((uint64_t)CR4_PSE | CR4_PAE | CR4_MCE | CR4_VMXE); + cur_context->cr3 = (uint64_t)vm->arch_vm.guest_pml4 | CR3_PWT; + } + + value = cur_context->cr0; + field = VMX_GUEST_CR0; + exec_vmwrite(field, value & 0xFFFFFFFF); + pr_dbg("VMX_GUEST_CR0: 0x%016llx ", value); + + /* Set up guest CR3 field */ + value = cur_context->cr3; + field = VMX_GUEST_CR3; + exec_vmwrite(field, value & 0xFFFFFFFF); + pr_dbg("VMX_GUEST_CR3: 0x%016llx ", value); + + /* Set up guest CR4 field */ + value = cur_context->cr4; + field = VMX_GUEST_CR4; + exec_vmwrite(field, value & 0xFFFFFFFF); + pr_dbg("VMX_GUEST_CR4: 0x%016llx ", value); + + /***************************************************/ + /* Set up Flags - the value of RFLAGS on VM entry */ + /***************************************************/ + field = VMX_GUEST_RFLAGS; + cur_context->rflags = 0x2; /* Bit 1 is a active high reserved bit */ + exec_vmwrite(field, cur_context->rflags); + pr_dbg("VMX_GUEST_RFLAGS: 0x%016llx ", value); + + /***************************************************/ + /* Set Code Segment - CS */ + /***************************************************/ + if (get_vcpu_mode(vcpu) == REAL_MODE) { + /* AP is initialized with real mode + * and CS value is left shift 8 bits from sipi vector; + */ + sel = vcpu->arch_vcpu.sipi_vector << 8; + limit = 0xffff; + access = 0x9F; + base = sel << 4; + } else { + HV_ARCH_VMX_GET_CS(sel); + access = get_cs_access_rights(); + limit = 0xffffffff; + base = 0; + } + + /* Selector */ + field = VMX_GUEST_CS_SEL; + exec_vmwrite(field, sel); + pr_dbg("VMX_GUEST_CS_SEL: 0x%x ", sel); + + /* Limit */ + field = VMX_GUEST_CS_LIMIT; + exec_vmwrite(field, limit); + pr_dbg("VMX_GUEST_CS_LIMIT: 0x%x ", limit); + + /* Access */ + field = VMX_GUEST_CS_ATTR; + exec_vmwrite(field, access); + pr_dbg("VMX_GUEST_CS_ATTR: 0x%x ", access); + + /* Base */ + field = VMX_GUEST_CS_BASE; + exec_vmwrite(field, base); + pr_dbg("VMX_GUEST_CS_BASE: 0x%016llx ", base); + + /***************************************************/ + /* Set up instruction pointer and stack pointer */ + /***************************************************/ + /* Set up guest instruction pointer */ + field = VMX_GUEST_RIP; + if (get_vcpu_mode(vcpu) == REAL_MODE) + value32 = 0; + else + value32 = (uint32_t) ((uint64_t) vcpu->entry_addr & 0xFFFFFFFF); + + pr_dbg("GUEST RIP on VMEntry %x ", value32); + exec_vmwrite(field, value32); + + if (get_vcpu_mode(vcpu) == PAGE_PROTECTED_MODE) { + /* Set up guest stack pointer to 0 */ + field = VMX_GUEST_RSP; + value32 = 0; + pr_dbg("GUEST RSP on VMEntry %x ", + value32); + exec_vmwrite(field, value32); + } + + /***************************************************/ + /* Set up GDTR, IDTR and LDTR */ + /***************************************************/ + + /* GDTR - Global Descriptor Table */ + if (get_vcpu_mode(vcpu) == REAL_MODE) { + /* Base */ + base = 0; + + /* Limit */ + limit = 0xFFFF; + } else if (get_vcpu_mode(vcpu) == PAGE_PROTECTED_MODE) { + uint64_t gdtb = 0; + + /* Base *//* TODO: Should guest GDTB point to host GDTB ? */ + /* Obtain the current global descriptor table base */ + asm volatile ("sgdt %0" : : "m" (gdtb)); + value32 = gdtb & 0x0ffff; + gdtb = gdtb >> 16; /* base */ + + if ((gdtb >> 47 & 0x1)) + gdtb |= 0xffff000000000000ull; + + base = gdtb; + + /* Limit */ + limit = HOST_GDT_SIZE - 1; + } + + /* GDTR Base */ + field = VMX_GUEST_GDTR_BASE; + exec_vmwrite(field, base); + pr_dbg("VMX_GUEST_GDTR_BASE: 0x%x ", base); + + /* GDTR Limit */ + field = VMX_GUEST_GDTR_LIMIT; + exec_vmwrite(field, limit); + pr_dbg("VMX_GUEST_GDTR_LIMIT: 0x%x ", limit); + + /* IDTR - Interrupt Descriptor Table */ + if (get_vcpu_mode(vcpu) == REAL_MODE) { + /* Base */ + base = 0; + + /* Limit */ + limit = 0xFFFF; + } else if (get_vcpu_mode(vcpu) == PAGE_PROTECTED_MODE) { + uint64_t idtb = 0; + + /* TODO: Should guest IDTR point to host IDTR ? */ + asm volatile ("sidt %0"::"m" (idtb)); + value32 = idtb & 0x0ffff; + /* Limit */ + limit = value32; + idtb = idtb >> 16; /* base */ + + if ((idtb >> 47 & 0x1)) + idtb |= 0xffff000000000000ull; + + /* Base */ + base = idtb; + } + + /* IDTR Base */ + field = VMX_GUEST_IDTR_BASE; + exec_vmwrite(field, base); + pr_dbg("VMX_GUEST_IDTR_BASE: 0x%x ", base); + + /* IDTR Limit */ + field = VMX_GUEST_IDTR_LIMIT; + exec_vmwrite(field, limit); + pr_dbg("VMX_GUEST_IDTR_LIMIT: 0x%x ", limit); + + /***************************************************/ + /* Debug register */ + /***************************************************/ + /* Set up guest Debug register */ + field = VMX_GUEST_DR7; + value = 0x400; + exec_vmwrite(field, value); + pr_dbg("VMX_GUEST_DR7: 0x%016llx ", value); + + /***************************************************/ + /* ES, CS, SS, DS, FS, GS */ + /***************************************************/ + data32_idx = 0x10; + if (get_vcpu_mode(vcpu) == REAL_MODE) { + es = ss = ds = fs = gs = data32_idx; + limit = 0xffff; + + } else if (get_vcpu_mode(vcpu) == PAGE_PROTECTED_MODE) { + asm volatile ("movw %%es, %%ax":"=a" (es)); + asm volatile ("movw %%ss, %%ax":"=a" (ss)); + asm volatile ("movw %%ds, %%ax":"=a" (ds)); + asm volatile ("movw %%fs, %%ax":"=a" (fs)); + asm volatile ("movw %%gs, %%ax":"=a" (gs)); + limit = 0xffffffff; + } + + /* Selector */ + field = VMX_GUEST_ES_SEL; + exec_vmwrite(field, es); + pr_dbg("VMX_GUEST_ES_SEL: 0x%x ", es); + + field = VMX_GUEST_SS_SEL; + exec_vmwrite(field, ss); + pr_dbg("VMX_GUEST_SS_SEL: 0x%x ", ss); + + field = VMX_GUEST_DS_SEL; + exec_vmwrite(field, ds); + pr_dbg("VMX_GUEST_DS_SEL: 0x%x ", ds); + + field = VMX_GUEST_FS_SEL; + exec_vmwrite(field, fs); + pr_dbg("VMX_GUEST_FS_SEL: 0x%x ", fs); + + field = VMX_GUEST_GS_SEL; + exec_vmwrite(field, gs); + pr_dbg("VMX_GUEST_GS_SEL: 0x%x ", gs); + + /* Limit */ + field = VMX_GUEST_ES_LIMIT; + exec_vmwrite(field, limit); + pr_dbg("VMX_GUEST_ES_LIMIT: 0x%x ", limit); + field = VMX_GUEST_SS_LIMIT; + exec_vmwrite(field, limit); + pr_dbg("VMX_GUEST_SS_LIMIT: 0x%x ", limit); + field = VMX_GUEST_DS_LIMIT; + exec_vmwrite(field, limit); + pr_dbg("VMX_GUEST_DS_LIMIT: 0x%x ", limit); + field = VMX_GUEST_FS_LIMIT; + exec_vmwrite(field, limit); + pr_dbg("VMX_GUEST_FS_LIMIT: 0x%x ", limit); + field = VMX_GUEST_GS_LIMIT; + exec_vmwrite(field, limit); + pr_dbg("VMX_GUEST_GS_LIMIT: 0x%x ", limit); + + /* Access */ + if (get_vcpu_mode(vcpu) == REAL_MODE) + value32 = 0x0093; + else if (get_vcpu_mode(vcpu) == PAGE_PROTECTED_MODE) + value32 = 0xc093; + + field = VMX_GUEST_ES_ATTR; + exec_vmwrite(field, value32); + pr_dbg("VMX_GUEST_ES_ATTR: 0x%x ", value32); + field = VMX_GUEST_SS_ATTR; + exec_vmwrite(field, value32); + pr_dbg("VMX_GUEST_SS_ATTR: 0x%x ", value32); + field = VMX_GUEST_DS_ATTR; + exec_vmwrite(field, value32); + pr_dbg("VMX_GUEST_DS_ATTR: 0x%x ", value32); + field = VMX_GUEST_FS_ATTR; + exec_vmwrite(field, value32); + pr_dbg("VMX_GUEST_FS_ATTR: 0x%x ", value32); + field = VMX_GUEST_GS_ATTR; + exec_vmwrite(field, value32); + pr_dbg("VMX_GUEST_GS_ATTR: 0x%x ", value32); + + /* Base */ + value = 0; + field = VMX_GUEST_ES_BASE; + exec_vmwrite(field, es << 4); + pr_dbg("VMX_GUEST_ES_BASE: 0x%016llx ", value); + field = VMX_GUEST_SS_BASE; + exec_vmwrite(field, ss << 4); + pr_dbg("VMX_GUEST_SS_BASE: 0x%016llx ", value); + field = VMX_GUEST_DS_BASE; + exec_vmwrite(field, ds << 4); + pr_dbg("VMX_GUEST_DS_BASE: 0x%016llx ", value); + field = VMX_GUEST_FS_BASE; + exec_vmwrite(field, fs << 4); + pr_dbg("VMX_GUEST_FS_BASE: 0x%016llx ", value); + field = VMX_GUEST_GS_BASE; + exec_vmwrite(field, gs << 4); + pr_dbg("VMX_GUEST_GS_BASE: 0x%016llx ", value); + + /***************************************************/ + /* LDT and TR (dummy) */ + /***************************************************/ + field = VMX_GUEST_LDTR_SEL; + value32 = ldt_idx; + exec_vmwrite(field, value32); + pr_dbg("VMX_GUEST_LDTR_SEL: 0x%x ", value32); + + field = VMX_GUEST_LDTR_LIMIT; + value32 = 0xffffffff; + exec_vmwrite(field, value32); + pr_dbg("VMX_GUEST_LDTR_LIMIT: 0x%x ", value32); + + field = VMX_GUEST_LDTR_ATTR; + value32 = 0x10000; + exec_vmwrite(field, value32); + pr_dbg("VMX_GUEST_LDTR_ATTR: 0x%x ", value32); + + field = VMX_GUEST_LDTR_BASE; + value32 = 0x00; + exec_vmwrite(field, value32); + pr_dbg("VMX_GUEST_LDTR_BASE: 0x%x ", value32); + + /* Task Register */ + field = VMX_GUEST_TR_SEL; + value32 = lssd32_idx; + exec_vmwrite(field, value32); + pr_dbg("VMX_GUEST_TR_SEL: 0x%x ", value32); + + field = VMX_GUEST_TR_LIMIT; + value32 = 0xff; + exec_vmwrite(field, value32); + pr_dbg("VMX_GUEST_TR_LIMIT: 0x%x ", value32); + + field = VMX_GUEST_TR_ATTR; + value32 = 0x8b; + exec_vmwrite(field, value32); + pr_dbg("VMX_GUEST_TR_ATTR: 0x%x ", value32); + + field = VMX_GUEST_TR_BASE; + value32 = 0x00; + exec_vmwrite(field, value32); + pr_dbg("VMX_GUEST_TR_BASE: 0x%x ", value32); + + field = VMX_GUEST_INTERRUPTIBILITY_INFO; + value32 = 0; + exec_vmwrite(field, value32); + pr_dbg("VMX_GUEST_INTERRUPTIBILITY_INFO: 0x%x ", + value32); + + field = VMX_GUEST_ACTIVITY_STATE; + value32 = 0; + exec_vmwrite(field, value32); + pr_dbg("VMX_GUEST_ACTIVITY_STATE: 0x%x ", + value32); + + field = VMX_GUEST_SMBASE; + value32 = 0; + exec_vmwrite(field, value32); + pr_dbg("VMX_GUEST_SMBASE: 0x%x ", value32); + + asm volatile ("mov $0x174, %rcx"); + asm volatile ("rdmsr"); + asm volatile ("mov %%rax, %0"::"m" (value32):"memory"); + field = VMX_GUEST_IA32_SYSENTER_CS; + exec_vmwrite(field, value32); + pr_dbg("VMX_GUEST_IA32_SYSENTER_CS: 0x%x ", + value32); + + value64 = PAT_POWER_ON_VALUE; + exec_vmwrite64(VMX_GUEST_IA32_PAT_FULL, value64); + pr_dbg("VMX_GUEST_IA32_PAT: 0x%016llx ", + value64); + + if (get_vcpu_mode(vcpu) == REAL_MODE) { + /* Disable long mode (clear IA32_EFER.LME) in VMCS IA32_EFER + * MSR + */ + value64 = msr_read(MSR_IA32_EFER); + value64 &= ~(MSR_IA32_EFER_LME_BIT | MSR_IA32_EFER_LMA_BIT); + } else { + value64 = msr_read(MSR_IA32_EFER); + } + exec_vmwrite64(VMX_GUEST_IA32_EFER_FULL, value64); + pr_dbg("VMX_GUEST_IA32_EFER: 0x%016llx ", + value64); + + value64 = 0; + exec_vmwrite64(VMX_GUEST_IA32_DEBUGCTL_FULL, value64); + pr_dbg("VMX_GUEST_IA32_DEBUGCTL: 0x%016llx ", + value64); + + /* Set up guest pending debug exception */ + field = VMX_GUEST_PENDING_DEBUG_EXCEPT; + value = 0x0; + exec_vmwrite(field, value); + pr_dbg("VMX_GUEST_PENDING_DEBUG_EXCEPT: 0x%016llx ", value); + + /* These fields manage host and guest system calls * pg 3069 31.10.4.2 + * - set up these fields with * contents of current SYSENTER ESP and + * EIP MSR values + */ + field = VMX_GUEST_IA32_SYSENTER_ESP; + value = msr_read(MSR_IA32_SYSENTER_ESP); + exec_vmwrite(field, value); + pr_dbg("VMX_GUEST_IA32_SYSENTER_ESP: 0x%016llx ", + value); + field = VMX_GUEST_IA32_SYSENTER_EIP; + value = msr_read(MSR_IA32_SYSENTER_EIP); + exec_vmwrite(field, value); + pr_dbg("VMX_GUEST_IA32_SYSENTER_EIP: 0x%016llx ", + value); +} + +static void init_host_state(__unused struct vcpu *vcpu) +{ + uint64_t field; + uint16_t value16; + uint32_t value32; + uint64_t value64; + uint64_t value; + uint64_t trbase; + uint64_t trbase_lo; + uint64_t trbase_hi; + uint64_t realtrbase; + uint64_t gdtb = 0; + uint64_t idtb = 0; + uint16_t tr_sel; + + pr_dbg("*********************"); + pr_dbg("Initialize host state"); + pr_dbg("*********************"); + + /*************************************************** + * 16 - Bit fields + * Move the current ES, CS, SS, DS, FS, GS, TR, LDTR * values to the + * corresponding 16-bit host * segment selection (ES, CS, SS, DS, FS, + * GS), * Task Register (TR), * Local Descriptor Table Register (LDTR) + * + ***************************************************/ + field = VMX_HOST_ES_SEL; + asm volatile ("movw %%es, %%ax":"=a" (value16)); + exec_vmwrite(field, value16); + pr_dbg("VMX_HOST_ES_SEL: 0x%x ", value16); + + field = VMX_HOST_CS_SEL; + asm volatile ("movw %%cs, %%ax":"=a" (value16)); + exec_vmwrite(field, value16); + pr_dbg("VMX_HOST_CS_SEL: 0x%x ", value16); + + field = VMX_HOST_SS_SEL; + asm volatile ("movw %%ss, %%ax":"=a" (value16)); + exec_vmwrite(field, value16); + pr_dbg("VMX_HOST_SS_SEL: 0x%x ", value16); + + field = VMX_HOST_DS_SEL; + asm volatile ("movw %%ds, %%ax":"=a" (value16)); + exec_vmwrite(field, value16); + pr_dbg("VMX_HOST_DS_SEL: 0x%x ", value16); + + field = VMX_HOST_FS_SEL; + asm volatile ("movw %%fs, %%ax":"=a" (value16)); + exec_vmwrite(field, value16); + pr_dbg("VMX_HOST_FS_SEL: 0x%x ", value16); + + field = VMX_HOST_GS_SEL; + asm volatile ("movw %%gs, %%ax":"=a" (value16)); + exec_vmwrite(field, value16); + pr_dbg("VMX_HOST_GS_SEL: 0x%x ", value16); + + field = VMX_HOST_TR_SEL; + asm volatile ("str %%ax":"=a" (tr_sel)); + exec_vmwrite(field, tr_sel); + pr_dbg("VMX_HOST_TR_SEL: 0x%x ", tr_sel); + + /****************************************************** + * 32-bit fields + * Set up the 32 bit host state fields - pg 3418 B.3.3 * Set limit for + * ES, CS, DD, DS, FS, GS, LDTR, Guest TR, * GDTR, and IDTR + ******************************************************/ + + /* TODO: Should guest GDTB point to host GDTB ? */ + /* Obtain the current global descriptor table base */ + asm volatile ("sgdt %0"::"m" (gdtb)); + value32 = gdtb & 0x0ffff; + gdtb = gdtb >> 16; /* base */ + + if ((gdtb >> 47) & 0x1) + gdtb |= 0xffff000000000000ull; + + /* Set up the guest and host GDTB base fields with current GDTB base */ + field = VMX_HOST_GDTR_BASE; + exec_vmwrite(field, gdtb); + pr_dbg("VMX_HOST_GDTR_BASE: 0x%x ", gdtb); + + /* TODO: Should guest TR point to host TR ? */ + trbase = gdtb + tr_sel; + if ((trbase >> 47) & 0x1) + trbase |= 0xffff000000000000ull; + + /* SS segment override */ + asm volatile ("mov %0,%%rax\n" + ".byte 0x36\n" + "movq (%%rax),%%rax\n":"=a" (trbase_lo):"0"(trbase) + ); + realtrbase = ((trbase_lo >> 16) & (0x0ffff)) | + (((trbase_lo >> 32) & 0x000000ff) << 16) | + (((trbase_lo >> 56) & 0xff) << 24); + + /* SS segment override for upper32 bits of base in ia32e mode */ + asm volatile ("mov %0,%%rax\n" + ".byte 0x36\n" + "movq 8(%%rax),%%rax\n":"=a" (trbase_hi):"0"(trbase)); + realtrbase = realtrbase | (trbase_hi << 32); + + /* Set up host and guest TR base fields */ + field = VMX_HOST_TR_BASE; + exec_vmwrite(field, realtrbase); + pr_dbg("VMX_HOST_TR_BASE: 0x%x ", realtrbase); + + /* Obtain the current interrupt descriptor table base */ + asm volatile ("sidt %0"::"m" (idtb)); + value32 = idtb & 0x0ffff; + /* base */ + idtb = idtb >> 16; + + if ((idtb >> 47 & 0x1)) + idtb |= 0xffff000000000000ull; + + field = VMX_HOST_IDTR_BASE; + exec_vmwrite(field, idtb); + pr_dbg("VMX_HOST_IDTR_BASE: 0x%x ", idtb); + + asm volatile ("mov $0x174, %rcx"); + asm volatile ("rdmsr"); + asm volatile ("mov %%rax, %0"::"m" (value32):"memory"); + field = VMX_HOST_IA32_SYSENTER_CS; + exec_vmwrite(field, value32); + pr_dbg("VMX_HOST_IA32_SYSENTER_CS: 0x%x ", + value32); + + /**************************************************/ + /* 64-bit fields */ + pr_dbg("64-bit********"); + + value64 = msr_read(MSR_IA32_PAT); + exec_vmwrite64(VMX_HOST_IA32_PAT_FULL, value64); + pr_dbg("VMX_HOST_IA32_PAT: 0x%016llx ", value64); + + value64 = msr_read(MSR_IA32_EFER); + exec_vmwrite64(VMX_HOST_IA32_EFER_FULL, value64); + pr_dbg("VMX_HOST_IA32_EFER: 0x%016llx ", + value64); + + /**************************************************/ + /* Natural width fields */ + pr_dbg("Natural-width********"); + /* Set up host CR0 field */ + CPU_CR_READ(cr0, &value); + value = (uint32_t) value; + field = VMX_HOST_CR0; + exec_vmwrite(field, value); + pr_dbg("VMX_GUEST_CR0: 0x%016llx ", value); + + /* Set up host CR3 field */ + CPU_CR_READ(cr3, &value); + value = (uint32_t) value; + field = VMX_HOST_CR3; + exec_vmwrite(field, value); + pr_dbg("VMX_GUEST_CR3: 0x%016llx ", value); + + /* Set up host CR4 field */ + CPU_CR_READ(cr4, &value); + value = (uint32_t) value; + field = VMX_HOST_CR4; + exec_vmwrite(field, value); + pr_dbg("VMX_GUEST_CR4: 0x%016llx ", value); + + /* Set up host and guest FS base address */ + value = msr_read(MSR_IA32_FS_BASE); + field = VMX_HOST_FS_BASE; + exec_vmwrite(field, value); + pr_dbg("VMX_HOST_FS_BASE: 0x%016llx ", value); + value = msr_read(MSR_IA32_GS_BASE); + field = VMX_HOST_GS_BASE; + exec_vmwrite(field, value); + pr_dbg("VMX_HOST_GS_BASE: 0x%016llx ", value); + + /* Set up host instruction pointer on VM Exit */ + field = VMX_HOST_RIP; + value32 = (uint32_t) ((uint64_t) (&vm_exit) & 0xFFFFFFFF); + pr_dbg("HOST RIP on VMExit %x ", value32); + exec_vmwrite(field, value32); + pr_dbg("vm exit return address = %x ", value32); + + /* These fields manage host and guest system calls * pg 3069 31.10.4.2 + * - set up these fields with * contents of current SYSENTER ESP and + * EIP MSR values + */ + field = VMX_HOST_IA32_SYSENTER_ESP; + value = msr_read(MSR_IA32_SYSENTER_ESP); + exec_vmwrite(field, value); + pr_dbg("VMX_HOST_IA32_SYSENTER_ESP: 0x%016llx ", + value); + field = VMX_HOST_IA32_SYSENTER_EIP; + value = msr_read(MSR_IA32_SYSENTER_EIP); + exec_vmwrite(field, value); + pr_dbg("VMX_HOST_IA32_SYSENTER_EIP: 0x%016llx ", value); +} + +static void init_exec_ctrl(struct vcpu *vcpu) +{ + uint32_t value32, fixed0, fixed1; + uint64_t value64; + struct vm *vm = (struct vm *) vcpu->vm; + + /* Log messages to show initializing VMX execution controls */ + pr_dbg("*****************************"); + pr_dbg("Initialize execution control "); + pr_dbg("*****************************"); + + /* Set up VM Execution control to enable Set VM-exits on external + * interrupts preemption timer - pg 2899 24.6.1 + */ + value32 = msr_read(MSR_IA32_VMX_PINBASED_CTLS); + + + /* enable external interrupt VM Exit */ + value32 |= VMX_PINBASED_CTLS_IRQ_EXIT; + + exec_vmwrite(VMX_PIN_VM_EXEC_CONTROLS, value32); + pr_dbg("VMX_PIN_VM_EXEC_CONTROLS: 0x%x ", value32); + + /* Set up primary processor based VM execution controls - pg 2900 + * 24.6.2. Set up for: + * Enable TSC offsetting + * Enable TSC exiting + * guest access to IO bit-mapped ports causes VM exit + * guest access to MSR causes VM exit + * Activate secondary controls + */ + /* These are bits 1,4-6,8,13-16, and 26, the corresponding bits of + * the IA32_VMX_PROCBASED_CTRLS MSR are always read as 1 --- A.3.2 + */ + value32 = msr_read(MSR_IA32_VMX_PROCBASED_CTLS); + value32 |= (/* VMX_PROCBASED_CTLS_TSC_OFF | */ + /* VMX_PROCBASED_CTLS_RDTSC | */ + VMX_PROCBASED_CTLS_IO_BITMAP | + VMX_PROCBASED_CTLS_MSR_BITMAP | + VMX_PROCBASED_CTLS_SECONDARY); + + /*Disable VM_EXIT for CR3 access*/ + value32 &= ~(VMX_PROCBASED_CTLS_CR3_LOAD | + VMX_PROCBASED_CTLS_CR3_STORE); + + if (is_apicv_enabled()) { + value32 |= VMX_PROCBASED_CTLS_TPR_SHADOW; + } else { + /* Add CR8 VMExit for vlapic */ + value32 |= + (VMX_PROCBASED_CTLS_CR8_LOAD | + VMX_PROCBASED_CTLS_CR8_STORE); + } + + exec_vmwrite(VMX_PROC_VM_EXEC_CONTROLS, value32); + pr_dbg("VMX_PROC_VM_EXEC_CONTROLS: 0x%x ", value32); + + /* Set up secondary processor based VM execution controls - pg 2901 + * 24.6.2. Set up for: * Enable EPT * Enable RDTSCP * Unrestricted + * guest (optional) + */ + value32 = msr_read(MSR_IA32_VMX_PROCBASED_CTLS2); + value32 |= (VMX_PROCBASED_CTLS2_EPT | + /* VMX_PROCBASED_CTLS2_RDTSCP | */ + VMX_PROCBASED_CTLS2_UNRESTRICT); + + if (is_apicv_enabled()) { + value32 |= + (VMX_PROCBASED_CTLS2_VAPIC | + VMX_PROCBASED_CTLS2_VAPIC_REGS | + VMX_PROCBASED_CTLS2_VIRQ); + } + + exec_vmwrite(VMX_PROC_VM_EXEC_CONTROLS2, value32); + pr_dbg("VMX_PROC_VM_EXEC_CONTROLS2: 0x%x ", value32); + + if (is_apicv_enabled()) { + /*APIC-v, config APIC-access address*/ + value64 = apicv_get_apic_access_addr(vcpu->vm); + exec_vmwrite64(VMX_APIC_ACCESS_ADDR_FULL, + value64); + + /*APIC-v, config APIC virtualized page address*/ + value64 = apicv_get_apic_page_addr(vcpu->arch_vcpu.vlapic); + exec_vmwrite64(VMX_VIRTUAL_APIC_PAGE_ADDR_FULL, + value64); + + exec_vmwrite64(VMX_EOI_EXIT0_FULL, -1UL); + exec_vmwrite64(VMX_EOI_EXIT1_FULL, -1UL); + exec_vmwrite64(VMX_EOI_EXIT2_FULL, -1UL); + exec_vmwrite64(VMX_EOI_EXIT3_FULL, -1UL); + } + + /* Check for EPT support */ + if (is_ept_supported()) + pr_dbg("EPT is supported"); + else + pr_err("Error: EPT is not supported"); + + /* Load EPTP execution control + * TODO: introduce API to make this data driven based + * on VMX_EPT_VPID_CAP + */ + value64 = ((uint64_t) vm->arch_vm.ept) | (3 << 3) | 6; + exec_vmwrite64(VMX_EPT_POINTER_FULL, value64); + pr_dbg("VMX_EPT_POINTER: 0x%016llx ", value64); + + /* Set up guest exception mask bitmap setting a bit * causes a VM exit + * on corresponding guest * exception - pg 2902 24.6.3 + * enable VM exit on MC and DB + */ + value32 = (1 << IDT_MC) | (1u << IDT_DB); + exec_vmwrite(VMX_EXCEPTION_BITMAP, value32); + + /* Set up page fault error code mask - second paragraph * pg 2902 + * 24.6.3 - guest page fault exception causing * vmexit is governed by + * both VMX_EXCEPTION_BITMAP and * VMX_PF_EC_MASK + */ + exec_vmwrite(VMX_PF_EC_MASK, 0); + + /* Set up page fault error code match - second paragraph * pg 2902 + * 24.6.3 - guest page fault exception causing * vmexit is governed by + * both VMX_EXCEPTION_BITMAP and * VMX_PF_EC_MATCH + */ + exec_vmwrite(VMX_PF_EC_MATCH, 0); + + /* Set up CR3 target count - An execution of mov to CR3 * by guest + * causes HW to evaluate operand match with * one of N CR3-Target Value + * registers. The CR3 target * count values tells the number of + * target-value regs to evaluate + */ + exec_vmwrite(VMX_CR3_TARGET_COUNT, 0); + + /* Set up TPR threshold for virtual interrupt delivery * - pg 2904 + * 24.6.8 + */ + exec_vmwrite(VMX_TPR_THRESHOLD, 0); + + /* Set up IO bitmap register A and B - pg 2902 24.6.4 */ + value64 = (int64_t) vm->arch_vm.iobitmap[0]; + exec_vmwrite64(VMX_IO_BITMAP_A_FULL, value64); + pr_dbg("VMX_IO_BITMAP_A: 0x%016llx ", value64); + value64 = (int64_t) vm->arch_vm.iobitmap[1]; + exec_vmwrite64(VMX_IO_BITMAP_B_FULL, value64); + pr_dbg("VMX_IO_BITMAP_B: 0x%016llx ", value64); + + init_msr_emulation(vcpu); + + /* Set up executive VMCS pointer - pg 2905 24.6.10 */ + exec_vmwrite64(VMX_EXECUTIVE_VMCS_PTR_FULL, 0); + + /* Setup Time stamp counter offset - pg 2902 24.6.5 */ + /* exec_vmwrite64(VMX_TSC_OFFSET_FULL, VMX_TSC_OFFSET_HIGH, 0); */ + + /* Set up the link pointer */ + exec_vmwrite64(VMX_VMS_LINK_PTR_FULL, 0xFFFFFFFFFFFFFFFF); + + /* Natural-width */ + pr_dbg("Natural-width*********"); + + /* Read the CR0 fixed0 / fixed1 MSR registers */ + fixed0 = msr_read(MSR_IA32_VMX_CR0_FIXED0); + fixed1 = msr_read(MSR_IA32_VMX_CR0_FIXED1); + + if (get_vcpu_mode(vcpu) == REAL_MODE) { + /* Check to see if unrestricted guest support is available */ + if (msr_read(MSR_IA32_VMX_MISC) & (1 << 5)) { + /* Adjust fixed bits as they can/will reflect incorrect + * settings that ARE valid in unrestricted guest mode. + * Both PG and PE bits can bit changed in unrestricted + * guest mode. + */ + fixed0 &= ~(CR0_PG | CR0_PE); + fixed1 |= (CR0_PG | CR0_PE); + + /* Log success for unrestricted mode being present */ + pr_dbg("Unrestricted support is available. "); + } else { + /* Log failure for unrestricted mode NOT being + * present + */ + pr_err("Error: Unrestricted support is not available"); + /* IA32_VMX_MISC bit 5 clear */ + } + + } + + /* (get_vcpu_mode(vcpu) == REAL_MODE) */ + /* Output fixed CR0 values */ + pr_dbg("Fixed0 CR0 value: 0x%x", fixed0); + pr_dbg("Fixed1 CR0 value: 0x%x", fixed1); + + /* Determine which bits are "flexible" in CR0 - allowed to be changed + * as per arch manual in VMX operation. Any bits that are different + * between fixed0 and fixed1 are "flexible" and the guest can change. + */ + value32 = fixed0 ^ fixed1; + + /* Set the CR0 mask to the inverse of the "flexible" bits */ + value32 = ~value32; + exec_vmwrite(VMX_CR0_MASK, value32); + + /* Output CR0 mask value */ + pr_dbg("CR0 mask value: 0x%x", value32); + + /* Calculate the CR0 shadow register value that will be used to enforce + * the correct values for host owned bits + */ + value32 = (fixed0 | fixed1) & value32; + exec_vmwrite(VMX_CR0_READ_SHADOW, value32); + + /* Output CR0 shadow value */ + pr_dbg("CR0 shadow value: 0x%x", value32); + + /* Read the CR4 fixed0 / fixed1 MSR registers */ + fixed0 = msr_read(MSR_IA32_VMX_CR4_FIXED0); + fixed1 = msr_read(MSR_IA32_VMX_CR4_FIXED1); + + /* Output fixed CR0 values */ + pr_dbg("Fixed0 CR4 value: 0x%x", fixed0); + pr_dbg("Fixed1 CR4 value: 0x%x", fixed1); + + /* Determine which bits are "flexible" in CR4 - allowed to be changed + * as per arch manual in VMX operation. Any bits that are different + * between fixed0 and fixed1 are "flexible" and the guest can change. + */ + value32 = fixed0 ^ fixed1; + + /* Set the CR4 mask to the inverse of the "flexible" bits */ + value32 = ~value32; + exec_vmwrite(VMX_CR4_MASK, value32); + + /* Output CR4 mask value */ + pr_dbg("CR4 mask value: 0x%x", value32); + + /* Calculate the CR4 shadow register value that will be used to enforce + * the correct values for host owned bits + */ + value32 = (fixed0 | fixed1) & value32; + exec_vmwrite(VMX_CR4_READ_SHADOW, value32); + + /* Output CR4 shadow value */ + pr_dbg("CR4 shadow value: 0x%x", value32); + + /* The CR3 target registers work in concert with VMX_CR3_TARGET_COUNT + * field. Using these registers guest CR3 access can be managed. i.e., + * if operand does not match one of these register values a VM exit + * would occur + */ + exec_vmwrite(VMX_CR3_TARGET_0, 0); + exec_vmwrite(VMX_CR3_TARGET_1, 0); + exec_vmwrite(VMX_CR3_TARGET_2, 0); + exec_vmwrite(VMX_CR3_TARGET_3, 0); +} + +static void init_entry_ctrl(__unused struct vcpu *vcpu) +{ + uint32_t value32; + + /* Log messages to show initializing VMX entry controls */ + pr_dbg("*************************"); + pr_dbg("Initialize Entry control "); + pr_dbg("*************************"); + + /* Set up VMX entry controls - pg 2908 24.8.1 * Set IA32e guest mode - + * on VM entry processor is in IA32e 64 bitmode * Start guest with host + * IA32_PAT and IA32_EFER + */ + value32 = msr_read(MSR_IA32_VMX_ENTRY_CTLS); + if (get_vcpu_mode(vcpu) == PAGE_PROTECTED_MODE) + value32 |= (VMX_ENTRY_CTLS_IA32E_MODE); + + value32 |= (VMX_ENTRY_CTLS_LOAD_EFER | + VMX_ENTRY_CTLS_LOAD_PAT); + + exec_vmwrite(VMX_ENTRY_CONTROLS, value32); + pr_dbg("VMX_ENTRY_CONTROLS: 0x%x ", value32); + + /* Set up VMX entry MSR load count - pg 2908 24.8.2 Tells the number of + * MSRs on load from memory on VM entry from mem address provided by + * VM-entry MSR load address field + */ + exec_vmwrite(VMX_ENTRY_MSR_LOAD_COUNT, 0); + + /* Set up VM entry interrupt information field pg 2909 24.8.3 */ + exec_vmwrite(VMX_ENTRY_INT_INFO_FIELD, 0); + + /* Set up VM entry exception error code - pg 2910 24.8.3 */ + exec_vmwrite(VMX_ENTRY_EXCEPTION_EC, 0); + + /* Set up VM entry instruction length - pg 2910 24.8.3 */ + exec_vmwrite(VMX_ENTRY_INSTR_LENGTH, 0); +} + +static void init_exit_ctrl(__unused struct vcpu *vcpu) +{ + uint32_t value32; + + /* Log messages to show initializing VMX entry controls */ + pr_dbg("************************"); + pr_dbg("Initialize Exit control "); + pr_dbg("************************"); + + /* Set up VM exit controls - pg 2907 24.7.1 for: Host address space + * size is 64 bit Set up to acknowledge interrupt on exit, if 1 the HW + * acks the interrupt in VMX non-root and saves the interrupt vector to + * the relevant VM exit field for further processing by Hypervisor + * Enable saving and loading of IA32_PAT and IA32_EFER on VMEXIT Enable + * saving of pre-emption timer on VMEXIT + */ + value32 = msr_read(MSR_IA32_VMX_EXIT_CTLS); + value32 |= (VMX_EXIT_CTLS_ACK_IRQ | + VMX_EXIT_CTLS_SAVE_PAT | + VMX_EXIT_CTLS_LOAD_PAT | + VMX_EXIT_CTLS_LOAD_EFER | + VMX_EXIT_CTLS_SAVE_EFER | + VMX_EXIT_CTLS_HOST_ADDR64); + + exec_vmwrite(VMX_EXIT_CONTROLS, value32); + pr_dbg("VMX_EXIT_CONTROL: 0x%x ", value32); + + /* Set up VM exit MSR store and load counts pg 2908 24.7.2 - tells the + * HW number of MSRs to stored to mem and loaded from mem on VM exit. + * The 64 bit VM-exit MSR store and load address fields provide the + * corresponding addresses + */ + exec_vmwrite(VMX_EXIT_MSR_STORE_COUNT, 0); + exec_vmwrite(VMX_EXIT_MSR_LOAD_COUNT, 0); +} + +#ifdef CONFIG_EFI_STUB +static void override_uefi_vmcs(struct vcpu *vcpu) +{ + uint64_t field; + struct run_context *cur_context = + &vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context]; + + if (get_vcpu_mode(vcpu) == PAGE_PROTECTED_MODE) { + cur_context->cr3 = (uint64_t)efi_ctx->cr3 | CR3_PWT; + /* Set up guest CR3 field */ + field = VMX_GUEST_CR3; + exec_vmwrite(field, cur_context->cr3 & 0xFFFFFFFF); + pr_dbg("VMX_GUEST_CR3: 0x%016llx ", cur_context->cr3); + + /* Selector */ + field = VMX_GUEST_CS_SEL; + exec_vmwrite(field, efi_ctx->cs_sel); + pr_dbg("VMX_GUEST_CS_SEL: 0x%x ", efi_ctx->cs_sel); + + /* Access */ + field = VMX_GUEST_CS_ATTR; + exec_vmwrite(field, efi_ctx->cs_ar); + pr_dbg("VMX_GUEST_CS_ATTR: 0x%x ", efi_ctx->cs_ar); + + field = VMX_GUEST_ES_SEL; + exec_vmwrite(field, efi_ctx->es_sel); + pr_dbg("VMX_GUEST_ES_SEL: 0x%x ", efi_ctx->es_sel); + + field = VMX_GUEST_SS_SEL; + exec_vmwrite(field, efi_ctx->ss_sel); + pr_dbg("VMX_GUEST_SS_SEL: 0x%x ", efi_ctx->ss_sel); + + field = VMX_GUEST_DS_SEL; + exec_vmwrite(field, efi_ctx->ds_sel); + pr_dbg("VMX_GUEST_DS_SEL: 0x%x ", efi_ctx->ds_sel); + + field = VMX_GUEST_FS_SEL; + exec_vmwrite(field, efi_ctx->fs_sel); + pr_dbg("VMX_GUEST_FS_SEL: 0x%x ", efi_ctx->fs_sel); + + field = VMX_GUEST_GS_SEL; + exec_vmwrite(field, efi_ctx->gs_sel); + pr_dbg("VMX_GUEST_GS_SEL: 0x%x ", efi_ctx->gs_sel); + + /* Base */ + field = VMX_GUEST_ES_BASE; + exec_vmwrite(field, efi_ctx->es_sel << 4); + field = VMX_GUEST_SS_BASE; + exec_vmwrite(field, efi_ctx->ss_sel << 4); + field = VMX_GUEST_DS_BASE; + exec_vmwrite(field, efi_ctx->ds_sel << 4); + field = VMX_GUEST_FS_BASE; + exec_vmwrite(field, efi_ctx->fs_sel << 4); + field = VMX_GUEST_GS_BASE; + exec_vmwrite(field, efi_ctx->gs_sel << 4); + + /* RSP */ + field = VMX_GUEST_RSP; + exec_vmwrite(field, efi_ctx->rsp); + pr_dbg("GUEST RSP on VMEntry %x ", efi_ctx->rsp); + + /* GDTR Base */ + field = VMX_GUEST_GDTR_BASE; + exec_vmwrite(field, (uint64_t)efi_ctx->gdt.base); + pr_dbg("VMX_GUEST_GDTR_BASE: 0x%x ", efi_ctx->gdt.base); + + /* GDTR Limit */ + field = VMX_GUEST_GDTR_LIMIT; + exec_vmwrite(field, efi_ctx->gdt.limit); + pr_dbg("VMX_GUEST_GDTR_LIMIT: 0x%x ", efi_ctx->gdt.limit); + + /* IDTR Base */ + field = VMX_GUEST_IDTR_BASE; + exec_vmwrite(field, (uint64_t)efi_ctx->idt.base); + pr_dbg("VMX_GUEST_IDTR_BASE: 0x%x ", efi_ctx->idt.base); + + /* IDTR Limit */ + field = VMX_GUEST_IDTR_LIMIT; + exec_vmwrite(field, efi_ctx->idt.limit); + pr_dbg("VMX_GUEST_IDTR_LIMIT: 0x%x ", efi_ctx->idt.limit); + } + + /* Interrupt */ + if (efi_launch_vector > 0) { + field = VMX_GUEST_RFLAGS; + cur_context->rflags = 0x2; + cur_context->rflags |= 1 << 9; /* enable intr for efi stub */ + exec_vmwrite(field, cur_context->rflags); + exec_vmwrite(VMX_ENTRY_INT_INFO_FIELD, + VMX_INT_INFO_VALID | + (efi_launch_vector & 0xFF)); + efi_launch_vector = -1; + } +} +#endif + +int init_vmcs(struct vcpu *vcpu) +{ + uint32_t vmx_rev_id; + int status = 0; + + if (vcpu == NULL) + status = -EINVAL; + ASSERT(status == 0, "Incorrect arguments"); + + /* Log message */ + pr_dbg("Initializing VMCS"); + + /* Obtain the VM Rev ID from HW and populate VMCS page with it */ + vmx_rev_id = msr_read(MSR_IA32_VMX_BASIC); + memcpy_s((void *) vcpu->arch_vcpu.vmcs, 4, &vmx_rev_id, 4); + + /* Execute VMCLEAR on current VMCS */ + status = exec_vmclear((void *)&vcpu->arch_vcpu.vmcs); + ASSERT(status == 0, "Failed VMCLEAR during VMCS setup!"); + + /* Load VMCS pointer */ + status = exec_vmptrld((void *)&vcpu->arch_vcpu.vmcs); + ASSERT(status == 0, "Failed VMCS pointer load!"); + + /* Initialize the Virtual Machine Control Structure (VMCS) */ + init_host_state(vcpu); + init_guest_state(vcpu); + init_exec_ctrl(vcpu); + init_entry_ctrl(vcpu); + init_exit_ctrl(vcpu); + +#ifdef CONFIG_EFI_STUB + if (is_vm0(vcpu->vm) && vcpu->pcpu_id == 0) + override_uefi_vmcs(vcpu); +#endif + /* Return status to caller */ + return status; +} diff --git a/hypervisor/arch/x86/vmx_asm.S b/hypervisor/arch/x86/vmx_asm.S new file mode 100644 index 000000000..5c1246c4e --- /dev/null +++ b/hypervisor/arch/x86/vmx_asm.S @@ -0,0 +1,245 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + + .text + +/*int vmx_vmrun(struct run_context *context, int launch, int ibrs_type) */ + .code64 + .align 8 + .global vmx_vmrun +vmx_vmrun: + + /* Save all host GPRs that must be preserved across function calls + per System V ABI */ + push %rdx + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + + /* Save RDI on top of host stack for easy access to VCPU pointer + on return from guest context */ + push %rdi + + /* rdx = ibrs_type */ + /* if ibrs_type != IBRS_NONE, means IBRS feature is supported, + * restore MSR SPEC_CTRL to guest + */ + cmp $IBRS_NONE,%rdx + je next + + movl $MSR_IA32_SPEC_CTRL,%ecx + mov VMX_MACHINE_T_GUEST_SPEC_CTRL_OFFSET(%rdi),%rax + movl $0,%edx + wrmsr + +next: + + /* Load VMCS_HOST_RSP_FIELD field value */ + mov $VMX_HOST_RSP,%rdx + + /* Write the current stack pointer to the VMCS_HOST_RSP_FIELD */ + vmwrite %rsp,%rdx + + /* Error occurred - handle error */ + jbe vm_eval_error + + /* Compare the launch flag to see if launching (1) or resuming (0) */ + cmp $VM_LAUNCH, %rsi + + mov VMX_MACHINE_T_GUEST_CR2_OFFSET(%rdi),%rax + mov %rax,%cr2 + + mov VMX_MACHINE_T_GUEST_RAX_OFFSET(%rdi),%rax + mov VMX_MACHINE_T_GUEST_RBX_OFFSET(%rdi),%rbx + mov VMX_MACHINE_T_GUEST_RCX_OFFSET(%rdi),%rcx + mov VMX_MACHINE_T_GUEST_RDX_OFFSET(%rdi),%rdx + mov VMX_MACHINE_T_GUEST_RBP_OFFSET(%rdi),%rbp + mov VMX_MACHINE_T_GUEST_RSI_OFFSET(%rdi),%rsi + mov VMX_MACHINE_T_GUEST_R8_OFFSET(%rdi),%r8 + mov VMX_MACHINE_T_GUEST_R9_OFFSET(%rdi),%r9 + mov VMX_MACHINE_T_GUEST_R10_OFFSET(%rdi),%r10 + mov VMX_MACHINE_T_GUEST_R11_OFFSET(%rdi),%r11 + mov VMX_MACHINE_T_GUEST_R12_OFFSET(%rdi),%r12 + mov VMX_MACHINE_T_GUEST_R13_OFFSET(%rdi),%r13 + mov VMX_MACHINE_T_GUEST_R14_OFFSET(%rdi),%r14 + mov VMX_MACHINE_T_GUEST_R15_OFFSET(%rdi),%r15 + + mov VMX_MACHINE_T_GUEST_RDI_OFFSET(%rdi),%rdi + + /* Execute appropriate VMX instruction */ + je vm_launch + + /* Execute a VM resume */ + vmresume + +vm_launch: + + /* Execute a VM launch */ + vmlaunch + + .global vm_exit +vm_exit: + + /* Get VCPU data structure pointer from top of host stack and + save guest RDI in its place */ + xchg 0(%rsp),%rdi + + /* Save current GPRs to guest state area */ + mov %rax,VMX_MACHINE_T_GUEST_RAX_OFFSET(%rdi) + + mov %cr2,%rax + mov %rax,VMX_MACHINE_T_GUEST_CR2_OFFSET(%rdi) + + mov %rbx,VMX_MACHINE_T_GUEST_RBX_OFFSET(%rdi) + mov %rcx,VMX_MACHINE_T_GUEST_RCX_OFFSET(%rdi) + mov %rdx,VMX_MACHINE_T_GUEST_RDX_OFFSET(%rdi) + mov %rbp,VMX_MACHINE_T_GUEST_RBP_OFFSET(%rdi) + mov %rsi,VMX_MACHINE_T_GUEST_RSI_OFFSET(%rdi) + mov %r8,VMX_MACHINE_T_GUEST_R8_OFFSET(%rdi) + mov %r9,VMX_MACHINE_T_GUEST_R9_OFFSET(%rdi) + mov %r10,VMX_MACHINE_T_GUEST_R10_OFFSET(%rdi) + mov %r11,VMX_MACHINE_T_GUEST_R11_OFFSET(%rdi) + mov %r12,VMX_MACHINE_T_GUEST_R12_OFFSET(%rdi) + mov %r13,VMX_MACHINE_T_GUEST_R13_OFFSET(%rdi) + mov %r14,VMX_MACHINE_T_GUEST_R14_OFFSET(%rdi) + mov %r15,VMX_MACHINE_T_GUEST_R15_OFFSET(%rdi) + + /* Load guest RDI off host stack and into RDX */ + mov 0(%rsp),%rdx + + /* Save guest RDI to guest state area */ + mov %rdx,VMX_MACHINE_T_GUEST_RDI_OFFSET(%rdi) + + /* Save RDI to RSI for later SPEC_CTRL save*/ + mov %rdi,%rsi + +vm_eval_error: + + /* Restore host GPR System V required registers */ + pop %rdi + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + pop %rdx + + + /* Check vm fail, refer to 64-ia32 spec section 26.2 in volume#3 */ + mov $VM_FAIL,%rax + jc vm_return + jz vm_return + + /* Clear host registers to prevent speculative use */ + xor %rcx,%rcx + xor %r8,%r8 + xor %r9,%r9 + xor %r10,%r10 + xor %r11,%r11 + + /* rdx = ibrs_type */ + /* IBRS_NONE: no ibrs setting, just flush rsb + * IBRS_RAW: set IBRS then flush rsb + * IBRS_OPT: set STIBP & IBPB then flush rsb + */ + cmp $IBRS_NONE,%rdx + je stuff_rsb + + cmp $IBRS_OPT,%rdx + je ibrs_opt + + /* Save guest MSR SPEC_CTRL, low 32 bit is enough */ + movl $MSR_IA32_SPEC_CTRL,%ecx + rdmsr + mov %rax,VMX_MACHINE_T_GUEST_SPEC_CTRL_OFFSET(%rsi) + movl $SPEC_ENABLE_IBRS,%eax + movl $0,%edx + wrmsr + + jmp stuff_rsb + +ibrs_opt: + + movl $MSR_IA32_PRED_CMD,%ecx + movl $PRED_SET_IBPB,%eax + movl $0,%edx + wrmsr + + /* Save guest MSR SPEC_CTRL, low 32 bit is enough */ + movl $MSR_IA32_SPEC_CTRL,%ecx + rdmsr + mov %rax,VMX_MACHINE_T_GUEST_SPEC_CTRL_OFFSET(%rsi) + movl $SPEC_ENABLE_STIBP,%eax + movl $0,%edx + wrmsr + + /* stuff rsb by 32 CALLs, make sure no any "ret" is executed before this + * stuffing rsb, otherwise, someone may insert some code before this for + * future update. + */ +stuff_rsb: + + /* stuff 32 RSB, rax = 32/2 */ + mov $16,%rax +.align 16 +3: + call 4f +33: + pause + jmp 33b +.align 16 +4: + call 5f +44: + pause + jmp 44b +.align 16 +5: dec %rax + jnz 3b + /* stuff 32 RSB, rsp += 8*32 */ + add $(8*32),%rsp + + mov $VM_SUCCESS,%rax + +vm_return: + /* Return to caller */ + ret + diff --git a/hypervisor/arch/x86/vtd.c b/hypervisor/arch/x86/vtd.c new file mode 100644 index 000000000..a8340d10d --- /dev/null +++ b/hypervisor/arch/x86/vtd.c @@ -0,0 +1,1162 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define pr_fmt(fmt) "iommu: " fmt + +#include +#include +#include +#include +#include + +#define DBG_IOMMU 0 + +#if DBG_IOMMU +#define ACRN_DBG_IOMMU LOG_INFO +#define DMAR_FAULT_LOOP_MAX 10 +#else +#define ACRN_DBG_IOMMU 6 +#endif + +/* set an appropriate bus limitation when iommu init, + * to reduce memory & time cost + */ +#define IOMMU_INIT_BUS_LIMIT (0xf) + +#define PAGE_MASK (0xFFFUL) +#define LEVEL_WIDTH 9 + +#define ROOT_ENTRY_LOWER_PRESENT_POS (0) +#define ROOT_ENTRY_LOWER_PRESENT_MASK ((uint64_t)1) +#define ROOT_ENTRY_LOWER_CTP_POS (12) +#define ROOT_ENTRY_LOWER_CTP_MASK ((uint64_t)0xFFFFFFFFFFFFF) + +#define CTX_ENTRY_UPPER_AW_POS (0) +#define CTX_ENTRY_UPPER_AW_MASK \ + ((uint64_t)0x7 << CTX_ENTRY_UPPER_AW_POS) +#define CTX_ENTRY_UPPER_DID_POS (8) +#define CTX_ENTRY_UPPER_DID_MASK \ + ((uint64_t)0x3F << CTX_ENTRY_UPPER_DID_POS) +#define CTX_ENTRY_LOWER_P_POS (0) +#define CTX_ENTRY_LOWER_P_MASK \ + ((uint64_t)0x1 << CTX_ENTRY_LOWER_P_POS) +#define CTX_ENTRY_LOWER_FPD_POS (1) +#define CTX_ENTRY_LOWER_FPD_MASK \ + ((uint64_t)0x1 << CTX_ENTRY_LOWER_FPD_POS) +#define CTX_ENTRY_LOWER_TT_POS (2) +#define CTX_ENTRY_LOWER_TT_MASK \ + ((uint64_t)0x3 << CTX_ENTRY_LOWER_TT_POS) +#define CTX_ENTRY_LOWER_SLPTPTR_POS (12) +#define CTX_ENTRY_LOWER_SLPTPTR_MASK \ + ((uint64_t)0xFFFFFFFFFFFFF << CTX_ENTRY_LOWER_SLPTPTR_POS) + +#define DMAR_GET_BITSLICE(var, bitname) \ + ((var & bitname ## _MASK) >> bitname ## _POS) + +#define DMAR_SET_BITSLICE(var, bitname, val) \ + ((var & \ + ~bitname ## _MASK) | ((val << bitname ## _POS) & bitname ## _MASK)) + +/* translation type */ +#define DMAR_CTX_TT_UNTRANSLATED 0x0 +#define DMAR_CTX_TT_ALL 0x1 +#define DMAR_CTX_TT_PASSTHROUGH 0x2 + +/* Fault event MSI data register */ +#define DMAR_MSI_DELIVERY_MODE_SHIFT (8) +#define DMAR_MSI_DELIVERY_FIXED (0 << DMAR_MSI_DELIVERY_MODE_SHIFT) +#define DMAR_MSI_DELIVERY_LOWPRI (1 << DMAR_MSI_DELIVERY_MODE_SHIFT) + +/* Fault event MSI address register */ +#define DMAR_MSI_DEST_MODE_SHIFT (2) +#define DMAR_MSI_DEST_MODE_PHYS (0 << DMAR_MSI_DEST_MODE_SHIFT) +#define DMAR_MSI_DEST_MODE_LOGIC (1 << DMAR_MSI_DEST_MODE_SHIFT) +#define DMAR_MSI_REDIRECTION_SHIFT (3) +#define DMAR_MSI_REDIRECTION_CPU (0 << DMAR_MSI_REDIRECTION_SHIFT) +#define DMAR_MSI_REDIRECTION_LOWPRI (1 << DMAR_MSI_REDIRECTION_SHIFT) + +#define IOMMU_LOCK(u) spinlock_obtain(&((u)->lock)) +#define IOMMU_UNLOCK(u) spinlock_release(&((u)->lock)) + +#define DMAR_OP_TIMEOUT TIME_MS_DELTA + +#define DMAR_WAIT_COMPLETION(offset, condition, status) \ + do { \ + uint64_t start = rdtsc(); \ + while (1) { \ + status = iommu_read32(dmar_uint, offset); \ + if (condition) \ + break; \ + ASSERT((rdtsc() - start < TIME_MS_DELTA), \ + "DMAR OP Timeout!"); \ + asm volatile ("pause" ::: "memory"); \ + } \ + } while (0) + +enum dmar_cirg_type { + DMAR_CIRG_RESERVED = 0, + DMAR_CIRG_GLOBAL, + DMAR_CIRG_DOMAIN, + DMAR_CIRG_DEVICE +}; + +enum dmar_iirg_type { + DMAR_IIRG_RESERVED = 0, + DMAR_IIRG_GLOBAL, + DMAR_IIRG_DOMAIN, + DMAR_IIRG_PAGE +}; + +/* dmar unit runtime data */ +struct dmar_drhd_rt { + struct list_head list; + spinlock_t lock; + + struct dmar_drhd *drhd; + + uint64_t root_table_addr; + + uint64_t cap; + uint64_t ecap; + uint64_t gcmd; /* sw cache value of global cmd register */ + + uint32_t irq; + struct dev_handler_node *dmar_irq_node; + + uint32_t max_domain_id; + + bool cap_pw_coherency; /* page-walk coherency */ + uint8_t cap_msagaw; + uint16_t cap_num_fault_regs; + uint16_t cap_fault_reg_offset; + uint16_t ecap_iotlb_offset; +}; + +struct dmar_root_entry { + uint64_t lower; + uint64_t upper; +}; + +struct dmar_context_entry { + uint64_t lower; + uint64_t upper; +}; + +struct iommu_domain { + struct list_head list; + bool is_host; + bool is_tt_ept; /* if reuse EPT of the domain */ + uint16_t dom_id; + int vm_id; + uint32_t addr_width; /* address width of the domain */ + void *trans_table_ptr; +}; + +static struct list_head dmar_drhd_units; +static uint32_t dmar_hdrh_unit_count; + +/* Use to record the domain ids that are used, + * support 64 domains (should be enough?) + * domain id 0 is reserved, + * bit0 --> domain id 0, ..., bit63 --> domain id 63 + */ +static uint32_t max_domain_id = 63; +static uint64_t domain_bitmap; +static spinlock_t domain_lock; +static struct iommu_domain *host_domain; +static struct list_head iommu_domains; + +static void dmar_register_hrhd(struct dmar_drhd_rt *drhd_rt); +static struct dmar_drhd_rt *device_to_dmaru(uint16_t segment, uint8_t bus, + uint8_t devfun); +static int register_hrhd_units(void) +{ + struct dmar_info *info = get_dmar_info(); + struct dmar_drhd_rt *drhd_rt; + uint32_t i; + + if (!info) { + pr_warn("vtd: no dmar units found"); + return -1; + } + + for (i = 0; i < info->drhd_count; i++) { + drhd_rt = malloc(sizeof(struct dmar_drhd_rt)); + ASSERT(drhd_rt != NULL, ""); + memset(drhd_rt, 0, sizeof(struct dmar_drhd_rt)); + drhd_rt->drhd = &info->drhd_units[i]; + dmar_register_hrhd(drhd_rt); + } + + return 0; +} + +static uint32_t iommu_read32(struct dmar_drhd_rt *dmar_uint, uint32_t offset) +{ + return mmio_read_long(dmar_uint->drhd->reg_base_addr + offset); +} + +static uint64_t iommu_read64(struct dmar_drhd_rt *dmar_uint, uint32_t offset) +{ + uint64_t value; + + value = (mmio_read_long(dmar_uint->drhd->reg_base_addr + offset + 4)); + value = value << 32; + value = value | (mmio_read_long(dmar_uint->drhd->reg_base_addr + + offset)); + + return value; +} + +static void iommu_write32(struct dmar_drhd_rt *dmar_uint, uint32_t offset, + uint32_t value) +{ + mmio_write_long(value, dmar_uint->drhd->reg_base_addr + offset); +} + +static void iommu_write64(struct dmar_drhd_rt *dmar_uint, uint32_t offset, + uint64_t value) +{ + uint32_t temp; + + temp = value; + mmio_write_long(temp, dmar_uint->drhd->reg_base_addr + offset); + + temp = value >> 32; + mmio_write_long(temp, dmar_uint->drhd->reg_base_addr + offset + 4); +} + +#if DBG_IOMMU +static void dmar_uint_show_capability(struct dmar_drhd_rt *dmar_uint) +{ + pr_info("dmar unit[0x%x]", dmar_uint->drhd->reg_base_addr); + pr_info("\tNumDomain:%d", + iommu_cap_ndoms(dmar_uint->cap)); + pr_info("\tAdvancedFaultLogging:%d", + iommu_cap_afl(dmar_uint->cap)); + pr_info("\tRequiredWBFlush:%d", + iommu_cap_rwbf(dmar_uint->cap)); + pr_info("\tProtectedLowMemRegion:%d", + iommu_cap_plmr(dmar_uint->cap)); + pr_info("\tProtectedHighMemRegion:%d", + iommu_cap_phmr(dmar_uint->cap)); + pr_info("\tCachingMode:%d", + iommu_cap_caching_mode(dmar_uint->cap)); + pr_info("\tSAGAW:0x%x", + iommu_cap_sagaw(dmar_uint->cap)); + pr_info("\tMGAW:%d", + iommu_cap_mgaw(dmar_uint->cap)); + pr_info("\tZeroLenRead:%d", + iommu_cap_zlr(dmar_uint->cap)); + pr_info("\tLargePageSupport:0x%x", + iommu_cap_super_page_val(dmar_uint->cap)); + pr_info("\tPageSelectiveInvalidation:%d", + iommu_cap_pgsel_inv(dmar_uint->cap)); + pr_info("\tPageSelectInvalidation:%d", + iommu_cap_pgsel_inv(dmar_uint->cap)); + pr_info("\tNumOfFaultRecordingReg:%d", + iommu_cap_num_fault_regs(dmar_uint->cap)); + pr_info("\tMAMV:0x%x", + iommu_cap_max_amask_val(dmar_uint->cap)); + pr_info("\tWriteDraining:%d", + iommu_cap_write_drain(dmar_uint->cap)); + pr_info("\tReadDraining:%d", + iommu_cap_read_drain(dmar_uint->cap)); + pr_info("\tPostInterrupts:%d\n", + iommu_cap_pi(dmar_uint->cap)); + pr_info("\tPage-walk Coherency:%d", + iommu_ecap_c(dmar_uint->ecap)); + pr_info("\tQueuedInvalidation:%d", + iommu_ecap_qi(dmar_uint->ecap)); + pr_info("\tDeviceTLB:%d", + iommu_ecap_dt(dmar_uint->ecap)); + pr_info("\tInterruptRemapping:%d", + iommu_ecap_ir(dmar_uint->ecap)); + pr_info("\tExtendedInterruptMode:%d", + iommu_ecap_eim(dmar_uint->ecap)); + pr_info("\tPassThrough:%d", + iommu_ecap_pt(dmar_uint->ecap)); + pr_info("\tSnoopControl:%d", + iommu_ecap_sc(dmar_uint->ecap)); + pr_info("\tIOTLB RegOffset:0x%x", + iommu_ecap_iro(dmar_uint->ecap)); + pr_info("\tMHMV:0x%x", iommu_ecap_mhmv(dmar_uint->ecap)); + pr_info("\tECS:%d", iommu_ecap_ecs(dmar_uint->ecap)); + pr_info("\tMTS:%d", iommu_ecap_mts(dmar_uint->ecap)); + pr_info("\tNEST:%d", iommu_ecap_nest(dmar_uint->ecap)); + pr_info("\tDIS:%d", iommu_ecap_dis(dmar_uint->ecap)); + pr_info("\tPRS:%d", iommu_ecap_prs(dmar_uint->ecap)); + pr_info("\tERS:%d", iommu_ecap_ers(dmar_uint->ecap)); + pr_info("\tSRS:%d", iommu_ecap_srs(dmar_uint->ecap)); + pr_info("\tNWFS:%d", iommu_ecap_nwfs(dmar_uint->ecap)); + pr_info("\tEAFS:%d", iommu_ecap_eafs(dmar_uint->ecap)); + pr_info("\tPSS:0x%x", iommu_ecap_pss(dmar_uint->ecap)); + pr_info("\tPASID:%d", iommu_ecap_pasid(dmar_uint->ecap)); + pr_info("\tDIT:%d", iommu_ecap_dit(dmar_uint->ecap)); + pr_info("\tPDS:%d\n", iommu_ecap_pds(dmar_uint->ecap)); +} +#endif + +static inline uint8_t width_to_level(int width) +{ + return ((width - 12) + (LEVEL_WIDTH)-1) / (LEVEL_WIDTH); +} + +static inline uint8_t width_to_agaw(int width) +{ + return width_to_level(width) - 2; +} + +static uint8_t dmar_uint_get_msagw(struct dmar_drhd_rt *dmar_uint) +{ + int i; + uint8_t sgaw = iommu_cap_sagaw(dmar_uint->cap); + + for (i = 4; i >= 0; i--) { + if ((1 << i) & sgaw) + break; + } + return (uint8_t)i; +} + +static bool +dmar_unit_support_aw(struct dmar_drhd_rt *dmar_uint, uint32_t addr_width) +{ + uint8_t aw; + + aw = (uint8_t)width_to_agaw(addr_width); + + return ((1 << aw) & iommu_cap_sagaw(dmar_uint->cap)) != 0; +} + +static void dmar_enable_translation(struct dmar_drhd_rt *dmar_uint) +{ + uint32_t status; + + IOMMU_LOCK(dmar_uint); + dmar_uint->gcmd |= DMA_GCMD_TE; + iommu_write32(dmar_uint, DMAR_GCMD_REG, dmar_uint->gcmd); + + /* 32-bit register */ + DMAR_WAIT_COMPLETION(DMAR_GSTS_REG, status & DMA_GSTS_TES, status); + + + status = iommu_read32(dmar_uint, DMAR_GSTS_REG); + + IOMMU_UNLOCK(dmar_uint); + + dev_dbg(ACRN_DBG_IOMMU, "%s: gsr:0x%x", __func__, status); +} + +static void dmar_disable_translation(struct dmar_drhd_rt *dmar_uint) +{ + uint32_t status; + + IOMMU_LOCK(dmar_uint); + dmar_uint->gcmd &= ~DMA_GCMD_TE; + iommu_write32(dmar_uint, DMAR_GCMD_REG, dmar_uint->gcmd); + + /* 32-bit register */ + DMAR_WAIT_COMPLETION(DMAR_GSTS_REG, !(status & DMA_GSTS_TES), status); + IOMMU_UNLOCK(dmar_uint); +} + +static void dmar_register_hrhd(struct dmar_drhd_rt *dmar_uint) +{ + dev_dbg(ACRN_DBG_IOMMU, "Register dmar uint [%d] @0x%llx", + dmar_hdrh_unit_count, + dmar_uint->drhd->reg_base_addr); + + spinlock_init(&dmar_uint->lock); + + dmar_uint->cap = iommu_read64(dmar_uint, DMAR_CAP_REG); + dmar_uint->ecap = iommu_read64(dmar_uint, DMAR_ECAP_REG); + dmar_uint->gcmd = iommu_read64(dmar_uint, DMAR_GCMD_REG); + + dmar_uint->cap_msagaw = dmar_uint_get_msagw(dmar_uint); + dmar_uint->cap_num_fault_regs = + iommu_cap_num_fault_regs(dmar_uint->cap); + dmar_uint->cap_fault_reg_offset = + iommu_cap_fault_reg_offset(dmar_uint->cap); + dmar_uint->ecap_iotlb_offset = iommu_ecap_iro(dmar_uint->ecap) * 16; + +#if DBG_IOMMU + pr_info("version:0x%x, cap:0x%llx, ecap:0x%llx", + iommu_read32(dmar_uint, DMAR_VER_REG), + dmar_uint->cap, + dmar_uint->ecap); + pr_info("sagaw:0x%x, msagaw:0x%x, iotlb offset 0x%x", + iommu_cap_sagaw(dmar_uint->cap), + dmar_uint->cap_msagaw, + dmar_uint->ecap_iotlb_offset); + + dmar_uint_show_capability(dmar_uint); +#endif + + /* check capability */ + if ((iommu_cap_super_page_val(dmar_uint->cap) & 0x1) == 0) + dev_dbg(ACRN_DBG_IOMMU, "dmar uint doesn't support 2MB page!"); + + if ((iommu_cap_super_page_val(dmar_uint->cap) & 0x2) == 0) + dev_dbg(ACRN_DBG_IOMMU, "dmar uint doesn't support 1GB page!"); + + /* when the hardware support snoop control, + * to make sure snoop control is always enabled, + * the SNP filed in the leaf PTE should be set. + * How to guarantee it when EPT is used as second-level + * translation paging structures? + */ + if (!iommu_ecap_sc(dmar_uint->ecap)) + dev_dbg(ACRN_DBG_IOMMU, + "dmar uint doesn't support snoop control!"); + + dmar_uint->max_domain_id = iommu_cap_ndoms(dmar_uint->cap) - 1; + + if (dmar_uint->max_domain_id > 63) + dmar_uint->max_domain_id = 63; + + if (max_domain_id > dmar_uint->max_domain_id) + max_domain_id = dmar_uint->max_domain_id; + + /* register operation is considered serial, no lock here */ + if (dmar_uint->drhd->flags & DRHD_FLAG_INCLUDE_PCI_ALL_MASK) + list_add_tail(&dmar_uint->list, &dmar_drhd_units); + else + list_add(&dmar_uint->list, &dmar_drhd_units); + + dmar_hdrh_unit_count++; + + if (dmar_uint->gcmd & DMA_GCMD_TE) + dmar_disable_translation(dmar_uint); +} + +static struct dmar_drhd_rt *device_to_dmaru(uint16_t segment, uint8_t bus, + uint8_t devfun) +{ + struct dmar_drhd_rt *dmar_uint; + struct list_head *pos; + uint32_t i; + + list_for_each(pos, &dmar_drhd_units) { + dmar_uint = list_entry(pos, struct dmar_drhd_rt, list); + + if (dmar_uint->drhd->segment != segment) + continue; + + for (i = 0; i < dmar_uint->drhd->dev_cnt; i++) { + if ((dmar_uint->drhd->devices[i].bus == bus) && + (dmar_uint->drhd->devices[i].devfun == devfun)) + return dmar_uint; + } + + /* has the same segment number and + * the dmar unit has INCLUDE_PCI_ALL set + */ + if (dmar_uint->drhd->flags & DRHD_FLAG_INCLUDE_PCI_ALL_MASK) + return dmar_uint; + } + + return NULL; +} + +static int alloc_domain_id(void) +{ + int i; + uint64_t mask; + + spinlock_obtain(&domain_lock); + /* domain id 0 is reserved, when CM = 1. + * so domain id allocation start from 1 + */ + for (i = 1; i < 64; i++) { + mask = (1 << i); + if ((domain_bitmap & mask) == 0) { + domain_bitmap |= mask; + break; + } + } + spinlock_release(&domain_lock); + return i; +} + +static void free_domain_id(int dom_id) +{ + uint64_t mask = (1 << dom_id); + + spinlock_obtain(&domain_lock); + domain_bitmap &= ~mask; + spinlock_release(&domain_lock); +} + +static struct iommu_domain *create_host_domain(void) +{ + struct iommu_domain *domain = calloc(1, sizeof(struct iommu_domain)); + + ASSERT(domain != NULL, ""); + domain->is_host = true; + domain->dom_id = alloc_domain_id(); + /* dmar uint need to support translation passthrough */ + domain->trans_table_ptr = NULL; + domain->addr_width = 48; + + return domain; +} + +static void dmar_write_buffer_flush(struct dmar_drhd_rt *dmar_uint) +{ + uint32_t status; + + if (!iommu_cap_rwbf(dmar_uint->cap)) + return; + + IOMMU_LOCK(dmar_uint); + iommu_write64(dmar_uint, DMAR_GCMD_REG, + dmar_uint->gcmd | DMA_GCMD_WBF); + + /* read lower 32 bits to check */ + DMAR_WAIT_COMPLETION(DMAR_GSTS_REG, !(status & DMA_GSTS_WBFS), status); + IOMMU_UNLOCK(dmar_uint); +} + +/* + * did: domain id + * sid: source id + * fm: function mask + * cirg: cache-invalidation request granularity + */ +static void dmar_invalid_context_cache(struct dmar_drhd_rt *dmar_uint, + uint16_t did, uint16_t sid, uint8_t fm, enum dmar_cirg_type cirg) +{ + uint64_t cmd = DMA_CCMD_ICC; + uint32_t status; + + switch (cirg) { + case DMAR_CIRG_GLOBAL: + cmd |= DMA_CCMD_GLOBAL_INVL; + break; + case DMAR_CIRG_DOMAIN: + cmd |= DMA_CCMD_DOMAIN_INVL | DMA_CCMD_DID(did); + break; + case DMAR_CIRG_DEVICE: + cmd |= DMA_CCMD_DEVICE_INVL | DMA_CCMD_DID(did) | + DMA_CCMD_SID(sid) | DMA_CCMD_FM(fm); + break; + default: + pr_err("unknown CIRG type"); + return; + } + + IOMMU_LOCK(dmar_uint); + iommu_write64(dmar_uint, DMAR_CCMD_REG, cmd); + /* read upper 32bits to check */ + DMAR_WAIT_COMPLETION(DMAR_CCMD_REG + 4, !(status & DMA_CCMD_ICC_32), + status); + + IOMMU_UNLOCK(dmar_uint); + + dev_dbg(ACRN_DBG_IOMMU, "cc invalidation granularity %d", + DMA_CCMD_GET_CAIG_32(status)); +} + +static void dmar_invalid_context_cache_global(struct dmar_drhd_rt *dmar_uint) +{ + dmar_invalid_context_cache(dmar_uint, 0, 0, 0, DMAR_CIRG_GLOBAL); +} + +static void dmar_invalid_iotlb(struct dmar_drhd_rt *dmar_uint, + uint16_t did, uint64_t address, uint8_t am, + bool hint, enum dmar_iirg_type iirg) +{ + /* set Drain Reads & Drain Writes, + * if hardware doesn't support it, will be ignored by hardware + */ + uint64_t cmd = DMA_IOTLB_IVT | DMA_IOTLB_DR | DMA_IOTLB_DW; + uint64_t addr = 0; + uint32_t status; + + switch (iirg) { + case DMAR_IIRG_GLOBAL: + cmd |= DMA_IOTLB_GLOBAL_INVL; + break; + case DMAR_IIRG_DOMAIN: + cmd |= DMA_IOTLB_DOMAIN_INVL | DMA_IOTLB_DID(did); + break; + case DMAR_IIRG_PAGE: + cmd |= DMA_IOTLB_PAGE_INVL | DMA_IOTLB_DID(did); + addr = address | DMA_IOTLB_INVL_ADDR_AM(am); + if (hint) + addr |= DMA_IOTLB_INVL_ADDR_IH_UNMODIFIED; + break; + default: + pr_err("unknown IIRG type"); + return; + } + IOMMU_LOCK(dmar_uint); + if (addr) + iommu_write64(dmar_uint, dmar_uint->ecap_iotlb_offset, addr); + + iommu_write64(dmar_uint, dmar_uint->ecap_iotlb_offset + 8, cmd); + /* read upper 32bits to check */ + DMAR_WAIT_COMPLETION(dmar_uint->ecap_iotlb_offset + 12, + !(status & DMA_IOTLB_IVT_32), status); + IOMMU_UNLOCK(dmar_uint); + + if (!DMA_IOTLB_GET_IAIG_32(status)) { + pr_err("fail to invalidate IOTLB!, 0x%x, 0x%x", + status, iommu_read32(dmar_uint, DMAR_FSTS_REG)); + } +} + +/* Invalidate IOTLB globally, + * all iotlb entries are invalidated, + * all PASID-cache entries are invalidated, + * all paging-structure-cache entries are invalidated. + */ +static void dmar_invalid_iotlb_global(struct dmar_drhd_rt *dmar_uint) +{ + dmar_invalid_iotlb(dmar_uint, 0, 0, 0, 0, DMAR_IIRG_GLOBAL); +} + +static void dmar_set_root_table(struct dmar_drhd_rt *dmar_uint) +{ + uint64_t address; + uint32_t status; + + IOMMU_LOCK(dmar_uint); + + /* Currently don't support extended root table */ + address = dmar_uint->root_table_addr; + + iommu_write64(dmar_uint, DMAR_RTADDR_REG, address); + + iommu_write32(dmar_uint, DMAR_GCMD_REG, + dmar_uint->gcmd | DMA_GCMD_SRTP); + + /* 32-bit register */ + DMAR_WAIT_COMPLETION(DMAR_GSTS_REG, status & DMA_GSTS_RTPS, status); + IOMMU_UNLOCK(dmar_uint); +} + +static int dmar_fault_event_mask(struct dmar_drhd_rt *dmar_uint) +{ + IOMMU_LOCK(dmar_uint); + iommu_write32(dmar_uint, DMAR_FECTL_REG, DMA_FECTL_IM); + IOMMU_UNLOCK(dmar_uint); + return 0; +} + +static int dmar_fault_event_unmask(struct dmar_drhd_rt *dmar_uint) +{ + IOMMU_LOCK(dmar_uint); + iommu_write32(dmar_uint, DMAR_FECTL_REG, 0); + IOMMU_UNLOCK(dmar_uint); + return 0; +} + +static void dmar_fault_msi_write(struct dmar_drhd_rt *dmar_uint, + uint8_t vector) +{ + uint32_t data; + uint32_t addr_low; + uint32_t lapic_id = get_cur_lapic_id(); + + data = DMAR_MSI_DELIVERY_LOWPRI | vector; + /* redirection hint: 0 + * destination mode: 0 + */ + addr_low = 0xFEE00000 | ((lapic_id & 0xFF) << 12); + + IOMMU_LOCK(dmar_uint); + iommu_write32(dmar_uint, DMAR_FEDATA_REG, data); + iommu_write32(dmar_uint, DMAR_FEADDR_REG, addr_low); + IOMMU_UNLOCK(dmar_uint); +} + +#if DBG_IOMMU +static void fault_status_analysis(uint32_t status) +{ + if (DMA_FSTS_PFO(status)) + pr_info("Primary Fault Overflow"); + + if (DMA_FSTS_PPF(status)) + pr_info("Primary Pending Fault"); + + if (DMA_FSTS_AFO(status)) + pr_info("Advanced Fault Overflow"); + + if (DMA_FSTS_APF(status)) + pr_info("Advanced Pending Fault"); + + if (DMA_FSTS_IQE(status)) + pr_info("Invalidation Queue Error"); + + if (DMA_FSTS_ICE(status)) + pr_info("Invalidation Completion Error"); + + if (DMA_FSTS_ITE(status)) + pr_info("Invalidation Time-out Error"); + + if (DMA_FSTS_PRO(status)) + pr_info("Page Request Overflow"); +} +#endif + +static void fault_record_analysis(__unused uint64_t low, uint64_t high) +{ + if (!DMA_FRCD_UP_F(high)) + return; + + /* currently skip PASID related parsing */ + pr_info("%s, Reason: 0x%x, SID: %x.%x.%x @0x%llx", + DMA_FRCD_UP_T(high) ? "Read/Atomic" : "Write", + DMA_FRCD_UP_FR(high), + DMA_FRCD_UP_SID(high) >> 8, + (DMA_FRCD_UP_SID(high) >> 3) & 0x1f, + DMA_FRCD_UP_SID(high) & 0x7, + low); +#if DBG_IOMMU + if (iommu_ecap_dt(dmar_uint->ecap)) + pr_info("Address Type: 0x%x", + DMA_FRCD_UP_AT(high)); +#endif +} + +static int dmar_fault_handler(__unused int irq, void *data) +{ + struct dmar_drhd_rt *dmar_uint = (struct dmar_drhd_rt *)data; + uint32_t fsr; + uint32_t index; + uint32_t record_reg_offset; + uint64_t record[2]; + int loop = 0; + + dev_dbg(ACRN_DBG_IOMMU, "%s: irq = %d", __func__, irq); + + fsr = iommu_read32(dmar_uint, DMAR_FSTS_REG); + +#if DBG_IOMMU + fault_status_analysis(fsr); +#endif + + while (DMA_FSTS_PPF(fsr)) { + loop++; + index = DMA_FSTS_FRI(fsr); + record_reg_offset = dmar_uint->cap_fault_reg_offset + + index * 16; + if (index >= dmar_uint->cap_num_fault_regs) { + dev_dbg(ACRN_DBG_IOMMU, "%s: invalid FR Index", + __func__); + break; + } + + /* read 128-bit fault recording register */ + record[0] = iommu_read64(dmar_uint, record_reg_offset); + record[1] = iommu_read64(dmar_uint, record_reg_offset + 8); + + dev_dbg(ACRN_DBG_IOMMU, "%s: record[%d] @0x%x: 0x%llx, 0x%llx", + __func__, index, record_reg_offset, + record[0], record[1]); + + fault_record_analysis(record[0], record[1]); + + /* write to clear */ + iommu_write64(dmar_uint, record_reg_offset, record[0]); + iommu_write64(dmar_uint, record_reg_offset + 8, record[1]); + +#ifdef DMAR_FAULT_LOOP_MAX + if (loop > DMAR_FAULT_LOOP_MAX) { + dev_dbg(ACRN_DBG_IOMMU, "%s: loop more than %d times", + __func__, DMAR_FAULT_LOOP_MAX); + break; + } +#endif + + fsr = iommu_read32(dmar_uint, DMAR_FSTS_REG); + } + + return 0; +} + +static int dmar_setup_interrupt(struct dmar_drhd_rt *dmar_uint) +{ + int vector; + + if (dmar_uint->dmar_irq_node) { + dev_dbg(ACRN_DBG_IOMMU, "%s: irq already setup", __func__); + return 0; + } + + dmar_uint->dmar_irq_node = normal_register_handler(-1, + dmar_fault_handler, + dmar_uint, true, false, + "dmar_fault_event"); + + if (!dmar_uint->dmar_irq_node) { + pr_err("%s: fail to setup interrupt", __func__); + return 1; + } + + vector = dev_to_vector(dmar_uint->dmar_irq_node); + + dev_dbg(ACRN_DBG_IOMMU, "alloc irq#%d vector#%d for dmar_uint", + dev_to_irq(dmar_uint->dmar_irq_node), vector); + + dmar_fault_msi_write(dmar_uint, vector); + dmar_fault_event_unmask(dmar_uint); + + return 0; +} + +static void dmar_enable(struct dmar_drhd_rt *dmar_uint) +{ + dev_dbg(ACRN_DBG_IOMMU, "enable dmar uint [0x%x]", + dmar_uint->drhd->reg_base_addr); + dmar_setup_interrupt(dmar_uint); + dmar_write_buffer_flush(dmar_uint); + dmar_set_root_table(dmar_uint); + dmar_invalid_context_cache_global(dmar_uint); + dmar_invalid_iotlb_global(dmar_uint); + dmar_enable_translation(dmar_uint); +} + +static void dmar_disable(struct dmar_drhd_rt *dmar_uint) +{ + if (dmar_uint->gcmd & DMA_GCMD_TE) + dmar_disable_translation(dmar_uint); + + dmar_fault_event_mask(dmar_uint); +} + +struct iommu_domain *create_iommu_domain(int vm_id, void *translation_table, + int addr_width) +{ + struct iommu_domain *domain; + uint16_t domain_id; + + /* TODO: check if a domain with the vm_id exists */ + + if (!translation_table) { + pr_err("translation table is NULL"); + return NULL; + } + + domain_id = alloc_domain_id(); + if (domain_id > max_domain_id) { + pr_err("domain id is exhausted"); + return NULL; + } + + domain = calloc(1, sizeof(struct iommu_domain)); + + ASSERT(domain != NULL, ""); + domain->is_host = false; + domain->dom_id = domain_id; + domain->vm_id = vm_id; + domain->trans_table_ptr = translation_table; + domain->addr_width = addr_width; + domain->is_tt_ept = true; + + + spinlock_obtain(&domain_lock); + list_add(&domain->list, &iommu_domains); + spinlock_release(&domain_lock); + + dev_dbg(ACRN_DBG_IOMMU, "create domain [%d]: vm_id = %d, ept@0x%x", + domain->dom_id, + domain->vm_id, + domain->trans_table_ptr); + + return domain; +} + +int destroy_iommu_domain(struct iommu_domain *domain) +{ + if (!domain) + return 1; + + /* currently only support ept */ + if (!domain->is_tt_ept) + ASSERT(false, "translation_table is not EPT!"); + + /* TODO: check if any device assigned to this domain */ + + spinlock_obtain(&domain_lock); + list_del(&domain->list); + spinlock_release(&domain_lock); + + free_domain_id(domain->dom_id); + free(domain); + + return 0; +} + +static int add_iommu_device(struct iommu_domain *domain, uint16_t segment, + uint8_t bus, uint8_t devfun) +{ + struct dmar_drhd_rt *dmar_uint; + uint64_t *root_table; + uint64_t context_table_addr; + uint64_t *context_table; + struct dmar_root_entry *root_entry; + struct dmar_context_entry *context_entry; + uint64_t upper = 0; + uint64_t lower = 0; + + if (!domain) + return 1; + + dmar_uint = device_to_dmaru(segment, bus, devfun); + if (!dmar_uint) { + pr_err("no dmar unit found for device:0x%x:%x.%x", + bus, devfun >> 3, devfun & 0x7); + return 1; + } + + if (dmar_uint->drhd->ignore) { + dev_dbg(ACRN_DBG_IOMMU, "device is ignored :0x%x:%x.%x", + bus, devfun >> 3, devfun & 0x7); + return 0; + } + + if (!dmar_unit_support_aw(dmar_uint, domain->addr_width)) { + pr_err("dmar doesn't support addr width %d", + domain->addr_width); + return 1; + } + + if (dmar_uint->root_table_addr == 0) { + /* 1:1 mapping for hypervisor HEAP, + * physical address equals virtual address + */ + dmar_uint->root_table_addr = + (uint64_t)alloc_paging_struct(); + } + + root_table = (uint64_t *)dmar_uint->root_table_addr; + + root_entry = (struct dmar_root_entry *)&root_table[bus * 2]; + + if (!DMAR_GET_BITSLICE(root_entry->lower, ROOT_ENTRY_LOWER_PRESENT)) { + /* create context table for the bus if not present */ + context_table_addr = + (uint64_t)alloc_paging_struct(); + + context_table_addr = context_table_addr >> 12; + + lower = DMAR_SET_BITSLICE(lower, ROOT_ENTRY_LOWER_CTP, + context_table_addr); + lower = DMAR_SET_BITSLICE(lower, ROOT_ENTRY_LOWER_PRESENT, 1); + + root_entry->upper = 0; + root_entry->lower = lower; + } else { + context_table_addr = DMAR_GET_BITSLICE(root_entry->lower, + ROOT_ENTRY_LOWER_CTP); + } + + context_table_addr = context_table_addr << 12; + + context_table = (uint64_t *)context_table_addr; + context_entry = (struct dmar_context_entry *)&context_table[devfun * 2]; + + /* the context entry should not be present */ + if (DMAR_GET_BITSLICE(context_entry->lower, CTX_ENTRY_LOWER_P)) { + pr_err("%s: context entry@0x%llx (Lower:%x) ", + __func__, context_entry, context_entry->lower); + pr_err("already present for %x:%x.%x", + bus, devfun >> 3, devfun & 0x7); + return 1; + } + + /* setup context entry for the devfun */ + upper = 0; + lower = 0; + if (domain->is_host) { + if (iommu_ecap_pt(dmar_uint->ecap)) { + /* When the Translation-type (T) field indicates + * pass-through processing (10b), AW field must be + * programmed to indicate the largest AGAW value + * supported by hardware. + */ + upper = DMAR_SET_BITSLICE(upper, CTX_ENTRY_UPPER_AW, + dmar_uint->cap_msagaw); + lower = DMAR_SET_BITSLICE(lower, CTX_ENTRY_LOWER_TT, + DMAR_CTX_TT_PASSTHROUGH); + } else + ASSERT(false, + "dmaru doesn't support trans passthrough"); + } else { + /* TODO: add Device TLB support */ + upper = + DMAR_SET_BITSLICE(upper, CTX_ENTRY_UPPER_AW, + width_to_agaw( + domain->addr_width)); + lower = DMAR_SET_BITSLICE(lower, CTX_ENTRY_LOWER_TT, + DMAR_CTX_TT_UNTRANSLATED); + } + + upper = DMAR_SET_BITSLICE(upper, CTX_ENTRY_UPPER_DID, domain->dom_id); + lower = DMAR_SET_BITSLICE(lower, CTX_ENTRY_LOWER_SLPTPTR, + (uint64_t)domain->trans_table_ptr >> 12); + lower = DMAR_SET_BITSLICE(lower, CTX_ENTRY_LOWER_P, 1); + + context_entry->upper = upper; + context_entry->lower = lower; + + return 0; +} + +static int +remove_iommu_device(struct iommu_domain *domain, uint16_t segment, + uint8_t bus, uint8_t devfun) +{ + struct dmar_drhd_rt *dmar_uint; + uint64_t *root_table; + uint64_t context_table_addr; + uint64_t *context_table; + struct dmar_root_entry *root_entry; + struct dmar_context_entry *context_entry; + + if (!domain) + return 1; + + dmar_uint = device_to_dmaru(segment, bus, devfun); + if (!dmar_uint) { + pr_err("no dmar unit found for device:0x%x:%x", + bus, devfun); + return 1; + } + + root_table = (uint64_t *)dmar_uint->root_table_addr; + root_entry = (struct dmar_root_entry *)&root_table[bus * 2]; + + context_table_addr = DMAR_GET_BITSLICE(root_entry->lower, + ROOT_ENTRY_LOWER_CTP); + context_table_addr = context_table_addr << 12; + context_table = (uint64_t *)context_table_addr; + + context_entry = (struct dmar_context_entry *)&context_table[devfun * 2]; + + if (DMAR_GET_BITSLICE(context_entry->upper, + CTX_ENTRY_UPPER_DID) != domain->dom_id) { + pr_err("%s: domain id mismatch", __func__); + return 1; + } + + /* clear the present bit first */ + context_entry->lower = 0; + context_entry->upper = 0; + + /* if caching mode is present, need to invalidate translation cache */ + /* if(cap_caching_mode(dmar_uint->cap)) { */ + dmar_invalid_context_cache_global(dmar_uint); + dmar_invalid_iotlb_global(dmar_uint); + /* } */ + return 0; +} + +int assign_iommu_device(struct iommu_domain *domain, uint8_t bus, + uint8_t devfun) +{ + if (!domain) + return 1; + + /* TODO: check if the device assigned */ + + remove_iommu_device(host_domain, 0, bus, devfun); + add_iommu_device(domain, 0, bus, devfun); + return 0; +} + +int unassign_iommu_device(struct iommu_domain *domain, uint8_t bus, + uint8_t devfun) +{ + if (!domain) + return 1; + + /* TODO: check if the device assigned */ + + remove_iommu_device(domain, 0, bus, devfun); + add_iommu_device(host_domain, 0, bus, devfun); + return 0; +} + +void enable_iommu(void) +{ + struct dmar_drhd_rt *dmar_uint; + struct list_head *pos; + + list_for_each(pos, &dmar_drhd_units) { + dmar_uint = list_entry(pos, struct dmar_drhd_rt, list); + if (!dmar_uint->drhd->ignore) + dmar_enable(dmar_uint); + else + dev_dbg(ACRN_DBG_IOMMU, "ignore dmar_uint @0x%x", + dmar_uint->drhd->reg_base_addr); + } +} + +void disable_iommu(void) +{ + struct dmar_drhd_rt *dmar_uint; + struct list_head *pos; + + list_for_each(pos, &dmar_drhd_units) { + dmar_uint = list_entry(pos, struct dmar_drhd_rt, list); + dmar_disable(dmar_uint); + } +} + +int init_iommu(void) +{ + uint16_t bus; + uint16_t devfun; + + INIT_LIST_HEAD(&dmar_drhd_units); + INIT_LIST_HEAD(&iommu_domains); + + spinlock_init(&domain_lock); + + if (register_hrhd_units()) + return -1; + + host_domain = create_host_domain(); + + for (bus = 0; bus <= IOMMU_INIT_BUS_LIMIT; bus++) { + for (devfun = 0; devfun <= 255; devfun++) { + add_iommu_device(host_domain, 0, + (uint8_t)bus, (uint8_t)devfun); + } + } + + enable_iommu(); + + return 0; +} diff --git a/hypervisor/boot/acpi.c b/hypervisor/boot/acpi.c new file mode 100644 index 000000000..c4d356416 --- /dev/null +++ b/hypervisor/boot/acpi.c @@ -0,0 +1,263 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2003 John Baldwin + * Copyright (c) 2018 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "acpi.h" + +#define ACPI_SIG_RSDP "RSD PTR " /* Root System Description Ptr */ +#define ACPI_OEM_ID_SIZE 6 +#define ACPI_SIG_MADT "APIC" /* Multiple APIC Description Table */ +#define ACPI_SIG_DMAR "DMAR" +#define RSDP_CHECKSUM_LENGTH 20 +#define ACPI_NAME_SIZE 4 +#define ACPI_MADT_TYPE_LOCAL_APIC 0 +#define ACPI_MADT_ENABLED 1 +#define ACPI_OEM_TABLE_ID_SIZE 8 + +struct acpi_table_rsdp { + /* ACPI signature, contains "RSD PTR " */ + char signature[8]; + /* ACPI 1.0 checksum */ + uint8_t checksum; + /* OEM identification */ + char oem_id[ACPI_OEM_ID_SIZE]; + /* Must be (0) for ACPI 1.0 or (2) for ACPI 2.0+ */ + uint8_t revision; + /* 32-bit physical address of the RSDT */ + uint32_t rsdt_physical_address; + /* Table length in bytes, including header (ACPI 2.0+) */ + uint32_t length; + /* 64-bit physical address of the XSDT (ACPI 2.0+) */ + uint64_t xsdt_physical_address; + /* Checksum of entire table (ACPI 2.0+) */ + uint8_t extended_checksum; + /* Reserved, must be zero */ + uint8_t reserved[3]; +}; + +struct acpi_table_rsdt { + /* Common ACPI table header */ + struct acpi_table_header header; + /* Array of pointers to ACPI tables */ + uint32_t table_offset_entry[1]; +} __packed; + +struct acpi_table_xsdt { + /* Common ACPI table header */ + struct acpi_table_header header; + /* Array of pointers to ACPI tables */ + uint64_t table_offset_entry[1]; +} __packed; + +struct acpi_subtable_header { + uint8_t type; + uint8_t length; +}; + +struct acpi_table_madt { + /* Common ACPI table header */ + struct acpi_table_header header; + /* Physical address of local APIC */ + uint32_t address; + uint32_t flags; +}; + +struct acpi_madt_local_apic { + struct acpi_subtable_header header; + /* ACPI processor id */ + uint8_t processor_id; + /* Processor's local APIC id */ + uint8_t id; + uint32_t lapic_flags; +}; + +static void *global_rsdp; +static uint64_t madt; + +static struct acpi_table_rsdp* +biosacpi_search_rsdp(char *base, int length) +{ + struct acpi_table_rsdp *rsdp; + uint8_t *cp, sum; + int ofs, idx; + + /* search on 16-byte boundaries */ + for (ofs = 0; ofs < length; ofs += 16) { + rsdp = (struct acpi_table_rsdp *)(base + ofs); + + /* compare signature, validate checksum */ + if (!strncmp(rsdp->signature, ACPI_SIG_RSDP, + strnlen_s(ACPI_SIG_RSDP, 8))) { + cp = (uint8_t *)rsdp; + sum = 0; + for (idx = 0; idx < RSDP_CHECKSUM_LENGTH; idx++) + sum += *(cp + idx); + + if (sum != 0) + continue; + + return rsdp; + } + } + + return NULL; +} + +static void *get_rsdp(void) +{ + struct acpi_table_rsdp *rsdp = NULL; + uint16_t *addr; + + /* EBDA is addressed by the 16 bit pointer at 0x40E */ + addr = (uint16_t *)0x40E; + + rsdp = biosacpi_search_rsdp((char *)((uint64_t)(*addr << 4)), 0x400); + if (rsdp != NULL) + return rsdp; + + /* Check the upper memory BIOS space, 0xe0000 - 0xfffff. */ + rsdp = biosacpi_search_rsdp((char *)0xe0000, 0x20000); + if (rsdp != NULL) + return rsdp; + + return rsdp; +} + +static int +probe_table(uint64_t address, const char *sig) +{ + struct acpi_table_header *table = (struct acpi_table_header *)address; + + if (strncmp(table->signature, sig, ACPI_NAME_SIZE) != 0) + return 0; + + return 1; +} + +uint64_t get_acpi_tbl(char *sig) +{ + struct acpi_table_rsdp *rsdp; + struct acpi_table_rsdt *rsdt; + struct acpi_table_xsdt *xsdt; + uint64_t addr = 0; + int i, count; + + rsdp = (struct acpi_table_rsdp *)global_rsdp; + + if (rsdp->revision >= 2 && rsdp->xsdt_physical_address) { + /* + * AcpiOsGetRootPointer only verifies the checksum for + * the version 1.0 portion of the RSDP. Version 2.0 has + * an additional checksum that we verify first. + */ + xsdt = (struct acpi_table_xsdt *)(rsdp->xsdt_physical_address); + count = (xsdt->header.length - + sizeof(struct acpi_table_header)) / + sizeof(uint64_t); + + for (i = 0; i < count; i++) { + if (probe_table(xsdt->table_offset_entry[i], sig)) { + addr = xsdt->table_offset_entry[i]; + break; + } + } + } else { + /* Root table is an RSDT (32-bit physical addresses) */ + rsdt = (struct acpi_table_rsdt *) + ((void *)(uint64_t)rsdp->rsdt_physical_address); + count = (rsdt->header.length - + sizeof(struct acpi_table_header)) / + sizeof(uint32_t); + + for (i = 0; i < count; i++) { + if (probe_table(rsdt->table_offset_entry[i], sig)) { + addr = rsdt->table_offset_entry[i]; + break; + } + } + } + + return addr; +} + +static int _parse_madt(uint64_t madt, uint8_t *lapic_id_base) +{ + int pcpu_id = 0; + struct acpi_madt_local_apic *processor; + struct acpi_table_madt *madt_ptr; + void *first; + void *end; + struct acpi_subtable_header *entry; + + madt_ptr = (struct acpi_table_madt *)madt; + + first = madt_ptr + 1; + end = (char *)madt_ptr + madt_ptr->header.length; + + for (entry = first; (void *)entry < end; ) { + if (entry->length < sizeof(struct acpi_subtable_header)) + continue; + + if (entry->type == ACPI_MADT_TYPE_LOCAL_APIC) { + processor = (struct acpi_madt_local_apic *)entry; + if (processor->lapic_flags & ACPI_MADT_ENABLED) { + *lapic_id_base++ = processor->id; + pcpu_id++; + } + } + + entry = (struct acpi_subtable_header *) + (((uint64_t)entry) + entry->length); + } + + return pcpu_id; +} + +/* The lapic_id info gotten from madt will be returned in lapic_id_base */ +int parse_madt(uint8_t *lapic_id_base) +{ + global_rsdp = get_rsdp(); + ASSERT(global_rsdp != NULL, "fail to get rsdp"); + + madt = get_acpi_tbl(ACPI_SIG_MADT); + ASSERT(madt != 0, "fail to get madt"); + + return _parse_madt(madt, lapic_id_base); +} + +uint64_t get_dmar_table(void) +{ + return get_acpi_tbl(ACPI_SIG_DMAR); +} diff --git a/hypervisor/boot/dmar_parse.c b/hypervisor/boot/dmar_parse.c new file mode 100644 index 000000000..099a7b187 --- /dev/null +++ b/hypervisor/boot/dmar_parse.c @@ -0,0 +1,360 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "bsp_cfg.h" +#ifdef CONFIG_DMAR_PARSE_ENABLED +#include +#include +#include +#include +#include +#include "vtd.h" +#include "acpi.h" + +#define PCI_CONFIG_ADDRESS 0xcf8 +#define PCI_CONFIG_DATA 0xcfc +#define PCI_CONFIG_ACCESS_EN 0x80000000 + +enum acpi_dmar_type { + ACPI_DMAR_TYPE_HARDWARE_UNIT = 0, + ACPI_DMAR_TYPE_RESERVED_MEMORY = 1, + ACPI_DMAR_TYPE_ROOT_ATS = 2, + ACPI_DMAR_TYPE_HARDWARE_AFFINITY = 3, + ACPI_DMAR_TYPE_NAMESPACE = 4, + ACPI_DMAR_TYPE_RESERVED = 5 +}; + +/* Values for entry_type in ACPI_DMAR_DEVICE_SCOPE - device types */ +enum acpi_dmar_scope_type { + ACPI_DMAR_SCOPE_TYPE_NOT_USED = 0, + ACPI_DMAR_SCOPE_TYPE_ENDPOINT = 1, + ACPI_DMAR_SCOPE_TYPE_BRIDGE = 2, + ACPI_DMAR_SCOPE_TYPE_IOAPIC = 3, + ACPI_DMAR_SCOPE_TYPE_HPET = 4, + ACPI_DMAR_SCOPE_TYPE_NAMESPACE = 5, + ACPI_DMAR_SCOPE_TYPE_RESERVED = 6 /* 6 and greater are reserved */ +}; + +struct acpi_table_dmar { + /* Common ACPI table header */ + struct acpi_table_header header; + /* Host address Width */ + uint8_t width; + uint8_t flags; + uint8_t reserved[10]; +}; + +/* DMAR subtable header */ +struct acpi_dmar_header { + uint16_t type; + uint16_t length; +}; + +struct acpi_dmar_hardware_unit { + struct acpi_dmar_header header; + uint8_t flags; + uint8_t reserved; + uint16_t segment; + /* register base address */ + uint64_t address; +}; + +struct find_iter_args { + int i; + struct acpi_dmar_hardware_unit *res; +}; + +struct acpi_dmar_pci_path { + uint8_t device; + uint8_t function; +}; + +struct acpi_dmar_device_scope { + uint8_t entry_type; + uint8_t length; + uint16_t reserved; + uint8_t enumeration_id; + uint8_t bus; +}; + +typedef int (*dmar_iter_t)(struct acpi_dmar_header*, void*); + +static struct dmar_info dmar_info_parsed; +static int dmar_unit_cnt; + +static void +dmar_iterate_tbl(dmar_iter_t iter, void *arg) +{ + struct acpi_table_dmar *dmar_tbl; + struct acpi_dmar_header *dmar_header; + char *ptr, *ptr_end; + + dmar_tbl = (struct acpi_table_dmar *)get_dmar_table(); + ASSERT(dmar_tbl != NULL, ""); + + ptr = (char *)dmar_tbl + sizeof(*dmar_tbl); + ptr_end = (char *)dmar_tbl + dmar_tbl->header.length; + + for (;;) { + if (ptr >= ptr_end) + break; + dmar_header = (struct acpi_dmar_header *)ptr; + if (dmar_header->length <= 0) { + pr_err("drhd: corrupted DMAR table, l %d\n", + dmar_header->length); + break; + } + ptr += dmar_header->length; + if (!iter(dmar_header, arg)) + break; + } +} + +static int +drhd_count_iter(struct acpi_dmar_header *dmar_header, __unused void *arg) +{ + if (dmar_header->type == ACPI_DMAR_TYPE_HARDWARE_UNIT) + dmar_unit_cnt++; + return 1; +} + +static int +drhd_find_iter(struct acpi_dmar_header *dmar_header, void *arg) +{ + struct find_iter_args *args; + + if (dmar_header->type != ACPI_DMAR_TYPE_HARDWARE_UNIT) + return 1; + + args = arg; + if (args->i == 0) { + args->res = (struct acpi_dmar_hardware_unit *)dmar_header; + return 0; + } + args->i--; + return 1; +} + +static struct acpi_dmar_hardware_unit * +drhd_find_by_index(int idx) +{ + struct find_iter_args args; + + args.i = idx; + args.res = NULL; + dmar_iterate_tbl(drhd_find_iter, &args); + return args.res; +} + +static uint8_t get_secondary_bus(uint8_t bus, uint8_t dev, uint8_t func) +{ + uint32_t data; + + io_write_long(PCI_CONFIG_ACCESS_EN | (bus << 16) | (dev << 11) | + (func << 8) | 0x18, PCI_CONFIG_ADDRESS); + + data = io_read_long(PCI_CONFIG_DATA); + + return (data >> 8) & 0xff; +} + +static uint16_t +dmar_path_bdf(int path_len, int busno, + const struct acpi_dmar_pci_path *path) +{ + int i; + uint8_t bus; + uint8_t dev; + uint8_t fun; + + + bus = (uint8_t)busno; + dev = path->device; + fun = path->function; + + + for (i = 1; i < path_len; i++) { + bus = get_secondary_bus(bus, dev, fun); + dev = path[i].device; + fun = path[i].function; + } + return (bus << 8 | DEVFUN(dev, fun)); +} + + +static int +handle_dmar_devscope(struct dmar_dev_scope *dev_scope, + void *addr, int remaining) +{ + int path_len; + uint16_t bdf; + struct acpi_dmar_pci_path *path; + struct acpi_dmar_device_scope *apci_devscope = addr; + + if (remaining < (int)sizeof(struct acpi_dmar_device_scope)) + return -1; + + if (remaining < apci_devscope->length) + return -1; + + path = (struct acpi_dmar_pci_path *)(apci_devscope + 1); + path_len = (apci_devscope->length - + sizeof(struct acpi_dmar_device_scope)) / + sizeof(struct acpi_dmar_pci_path); + + bdf = dmar_path_bdf(path_len, apci_devscope->bus, path); + dev_scope->bus = (bdf >> 8) & 0xff; + dev_scope->devfun = bdf & 0xff; + + return apci_devscope->length; +} + +static uint32_t +get_drhd_dev_scope_cnt(struct acpi_dmar_hardware_unit *drhd) +{ + struct acpi_dmar_device_scope *scope; + char *start; + char *end; + uint32_t count = 0; + + start = (char *)drhd + sizeof(struct acpi_dmar_hardware_unit); + end = (char *)drhd + drhd->header.length; + + while (start < end) { + scope = (struct acpi_dmar_device_scope *)start; + if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT || + scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE || + scope->entry_type == ACPI_DMAR_SCOPE_TYPE_NAMESPACE) + count++; + start += scope->length; + } + return count; +} + +static int +handle_one_drhd(struct acpi_dmar_hardware_unit *acpi_drhd, + struct dmar_drhd *drhd) +{ + struct dmar_dev_scope *dev_scope; + struct acpi_dmar_device_scope *ads; + int remaining, consumed; + char *cp; + uint32_t dev_count; + + drhd->segment = acpi_drhd->segment; + drhd->flags = acpi_drhd->flags; + drhd->reg_base_addr = acpi_drhd->address; + + if (drhd->flags & DRHD_FLAG_INCLUDE_PCI_ALL_MASK) { + drhd->dev_cnt = 0; + drhd->devices = NULL; + return 0; + } + + dev_count = get_drhd_dev_scope_cnt(acpi_drhd); + drhd->dev_cnt = dev_count; + if (dev_count) { + drhd->devices = + calloc(dev_count, sizeof(struct dmar_dev_scope)); + ASSERT(drhd->devices, ""); + } else { + drhd->devices = NULL; + return 0; + } + + remaining = acpi_drhd->header.length - + sizeof(struct acpi_dmar_hardware_unit); + + dev_scope = drhd->devices; + + while (remaining > 0) { + cp = (char *)acpi_drhd + acpi_drhd->header.length - remaining; + + consumed = handle_dmar_devscope(dev_scope, cp, remaining); + + if (((drhd->segment << 16) | + (dev_scope->bus << 8) | + dev_scope->devfun) == CONFIG_GPU_SBDF) { + ASSERT(dev_count == 1, "no dedicated iommu for gpu"); + drhd->ignore = true; + } + + if (consumed <= 0) + break; + + remaining -= consumed; + /* skip IOAPIC & HPET */ + ads = (struct acpi_dmar_device_scope *)cp; + if (ads->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC && + ads->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) + dev_scope++; + else + pr_dbg("drhd: skip dev_scope type %d", + ads->entry_type); + } + + return 0; +} + +int parse_dmar_table(void) +{ + int i; + struct acpi_dmar_hardware_unit *acpi_drhd; + + /* find out how many dmar units */ + dmar_iterate_tbl(drhd_count_iter, NULL); + + /* alloc memory for dmar uint */ + dmar_info_parsed.drhd_units = + calloc(dmar_unit_cnt, sizeof(struct dmar_drhd)); + ASSERT(dmar_info_parsed.drhd_units, ""); + + dmar_info_parsed.drhd_count = dmar_unit_cnt; + + for (i = 0; i < dmar_unit_cnt; i++) { + acpi_drhd = drhd_find_by_index(i); + if (acpi_drhd == NULL) + continue; + if (acpi_drhd->flags & DRHD_FLAG_INCLUDE_PCI_ALL_MASK) + ASSERT((i+1) == dmar_unit_cnt, + "drhd with flags set should be the last one"); + handle_one_drhd(acpi_drhd, &dmar_info_parsed.drhd_units[i]); + } + + return 0; +} + +struct dmar_info *get_dmar_info(void) +{ + parse_dmar_table(); + return &dmar_info_parsed; +} + +#endif diff --git a/hypervisor/boot/include/acpi.h b/hypervisor/boot/include/acpi.h new file mode 100644 index 000000000..94389938e --- /dev/null +++ b/hypervisor/boot/include/acpi.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ACPI_H +#define ACPI_H + +struct acpi_table_header { + /* ASCII table signature */ + char signature[4]; + /* Length of table in bytes, including this header */ + uint32_t length; + /* ACPI Specification minor version number */ + uint8_t revision; + /* To make sum of entire table == 0 */ + uint8_t checksum; + /* ASCII OEM identification */ + char oem_id[6]; + /* ASCII OEM table identification */ + char oem_table_id[8]; + /* OEM revision number */ + uint32_t oem_revision; + /* ASCII ASL compiler vendor ID */ + char asl_compiler_id[4]; + /* ASL compiler version */ + uint32_t asl_compiler_revision; +}; + +int parse_madt(uint8_t *lapic_id_base); + +uint64_t get_dmar_table(void); +#endif /* !ACPI_H */ diff --git a/hypervisor/bsp/include/bsp_extern.h b/hypervisor/bsp/include/bsp_extern.h new file mode 100644 index 000000000..bffdb3061 --- /dev/null +++ b/hypervisor/bsp/include/bsp_extern.h @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/************************************************************************ + * + * FILE NAME + * + * bsp_extern.h + * + * DESCRIPTION + * + * This file defines the generic BSP interface + * + ************************************************************************/ +#ifndef BSP_EXTERN_H +#define BSP_EXTERN_H + +#define UOS_DEFAULT_START_ADDR (0x100000000) +/**********************************/ +/* EXTERNAL VARIABLES */ +/**********************************/ + +/* BSP Interfaces */ +void init_bsp(void); + +/* External Interfaces */ +struct _vm_description_array; +const struct _vm_description_array *get_vm_desc_base(void); + +#endif /* BSP_EXTERN_H */ diff --git a/hypervisor/bsp/ld/link_ram.ld.in b/hypervisor/bsp/ld/link_ram.ld.in new file mode 100644 index 000000000..7e880cdd1 --- /dev/null +++ b/hypervisor/bsp/ld/link_ram.ld.in @@ -0,0 +1,94 @@ +#include "bsp_cfg.h" + +ENTRY(cpu_primary_start_32) + +MEMORY +{ + /* Low 1MB of memory for secondary processor start-up */ + lowram : ORIGIN = CONFIG_LOW_RAM_START, LENGTH = CONFIG_LOW_RAM_SIZE + + /* 32 MBytes of RAM for HV */ + ram : ORIGIN = CONFIG_RAM_START, LENGTH = CONFIG_RAM_SIZE +} + +SECTIONS +{ + .boot : + { + _ld_ram_start = . ; + KEEP(*(multiboot_header)) ; + } > ram + + .entry : + { + KEEP(*(entry)) ; + + } > ram + + .text : + { + *(.text .text*) ; + *(.gnu.linkonce.t*) + *(.note.gnu.build-id) + } > ram + + .rodata : + { + *(.rodata*) ; + + } > ram + + _ld_cpu_secondary_reset_load = .; + + .cpu_secondary : AT (_ld_cpu_secondary_reset_load) + { + _ld_cpu_secondary_reset_start = .; + *(.cpu_secondary_reset); + . = ALIGN(4); + _ld_cpu_secondary_reset_end = .; + + } > lowram + + _ld_cpu_secondary_reset_size = _ld_cpu_secondary_reset_end - _ld_cpu_secondary_reset_start; + + .data (_ld_cpu_secondary_reset_load + _ld_cpu_secondary_reset_size): + { + *(.data) ; + *(.data*) ; + *(.sdata) + *(.gnu.linkonce.d*) + + } > ram + + .bss_noinit (NOLOAD): + { + . = ALIGN(4) ; + *(.bss_noinit) ; + *(.bss_noinit*) ; + . = ALIGN(4) ; + } > ram + + .bss (NOLOAD): + { + . = ALIGN(4) ; + _ld_bss_start = . ; + *(.bss) ; + *(.bss*) ; + *(COMMON) ; + . = ALIGN(4) ; + _ld_bss_end = . ; + } > ram + + .discard (NOLOAD): + { + . = ALIGN(4096) ; + _ld_cpu_data_start = .; + *(.cpu_data) ; + . = ALIGN(4096) ; + _ld_cpu_data_end = .; + } > ram + + _ld_ram_size = LENGTH(ram) ; + _ld_ram_end = _ld_ram_size + _ld_ram_start ; +} + diff --git a/hypervisor/bsp/sbl/include/bsp/bsp_cfg.h b/hypervisor/bsp/sbl/include/bsp/bsp_cfg.h new file mode 100644 index 000000000..3733b2c77 --- /dev/null +++ b/hypervisor/bsp/sbl/include/bsp/bsp_cfg.h @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef BSP_CFG_H +#define BSP_CFG_H +#define NR_IOAPICS 1 +#define STACK_SIZE 8192 +#define LOG_BUF_SIZE 0x100000 +#define LOG_DESTINATION 3 +#define CPU_UP_TIMEOUT 100 +#define CONFIG_SERIAL_MMIO_BASE 0xfc000000 +#define MALLOC_ALIGN 16 +#define NUM_ALLOC_PAGES 4096 +#define HEAP_SIZE 0x100000 +#define CONSOLE_LOGLEVEL_DEFAULT 2 +#define MEM_LOGLEVEL_DEFAULT 4 +#define CONFIG_LOW_RAM_START 0x00001000 +#define CONFIG_LOW_RAM_SIZE 0x000CF000 +#define CONFIG_RAM_START 0x6E000000 +#define CONFIG_RAM_SIZE 0x02000000 /* 32M */ +#define CONFIG_RETPOLINE +#endif /* BSP_CFG_H */ diff --git a/hypervisor/bsp/sbl/sbl.c b/hypervisor/bsp/sbl/sbl.c new file mode 100644 index 000000000..bc9a42558 --- /dev/null +++ b/hypervisor/bsp/sbl/sbl.c @@ -0,0 +1,94 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +/* IOAPIC id */ +#define SBL_IOAPIC_ID 8 +/* IOAPIC base address */ +#define SBL_IOAPIC_ADDR 0xfec00000 +/* IOAPIC range size */ +#define SBL_IOAPIC_SIZE 0x100000 +/* Local APIC base address */ +#define SBL_LAPIC_ADDR 0xfee00000 +/* Local APIC range size */ +#define SBL_LAPIC_SIZE 0x100000 +/* Number of PCI IRQ assignments */ +#define SBL_PCI_IRQ_ASSIGNMENT_NUM 28 + +#ifndef CONFIG_DMAR_PARSE_ENABLED +static struct dmar_dev_scope default_drhd_unit_dev_scope0[] = { + { .bus = 0, .devfun = DEVFUN(0x2, 0), }, +}; + +static struct dmar_drhd drhd_info_array[] = { + { + .dev_cnt = 1, + .segment = 0, + .flags = 0, + .reg_base_addr = 0xFED64000, + /* Ignore the iommu for intel graphic device since GVT-g needs + * vtd disabled for gpu + */ + .ignore = true, + .devices = default_drhd_unit_dev_scope0, + }, + { + /* No need to specify devices since + * DRHD_FLAG_INCLUDE_PCI_ALL_MASK set + */ + .dev_cnt = 0, + .segment = 0, + .flags = DRHD_FLAG_INCLUDE_PCI_ALL_MASK, + .reg_base_addr = 0xFED65000, + .ignore = false, + .devices = NULL, + }, +}; + +static struct dmar_info sbl_dmar_info = { + .drhd_count = 2, + .drhd_units = drhd_info_array, +}; + +struct dmar_info *get_dmar_info(void) +{ + return &sbl_dmar_info; +} +#endif + +void init_bsp(void) +{ +} diff --git a/hypervisor/bsp/sbl/vm_description.c b/hypervisor/bsp/sbl/vm_description.c new file mode 100644 index 000000000..eb1ca5ac4 --- /dev/null +++ b/hypervisor/bsp/sbl/vm_description.c @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#define NUM_USER_VMS 2 + +/* Number of CPUs in VM0 */ +#define VM0_NUM_CPUS 1 + +/* Logical CPU IDs assigned to VM0 */ +int VM0_CPUS[VM0_NUM_CPUS] = {0}; + +/* Number of CPUs in VM1 */ +#define VM1_NUM_CPUS 2 + +/* Logical CPU IDs assigned with VM1 */ +int VM1_CPUS[VM1_NUM_CPUS] = {3, 1}; + +const struct vm_description_array vm_desc = { + /* Number of user virtual machines */ + .num_vm_desc = NUM_USER_VMS, + + /* Virtual Machine descriptions */ + .vm_desc_array = { + { + /* Internal variable, MUSTBE init to -1 */ + .vm_attr_name = "vm_0", + .vm_hw_num_cores = VM0_NUM_CPUS, + .vm_hw_logical_core_ids = &VM0_CPUS[0], + .vm_state_info_privilege = VM_PRIVILEGE_LEVEL_HIGH, + .vm_created = false, + }, + } +}; + +const struct vm_description_array *get_vm_desc_base(void) +{ + return &vm_desc; +} diff --git a/hypervisor/bsp/uefi/clearlinux/acrn.conf b/hypervisor/bsp/uefi/clearlinux/acrn.conf new file mode 100755 index 000000000..88b6b95bf --- /dev/null +++ b/hypervisor/bsp/uefi/clearlinux/acrn.conf @@ -0,0 +1,3 @@ +title ACRN OS +linux /EFI/org.clearlinux/acrn.efi +options sos=bzImage pci_devices_ignore=(0:18:2) noxsave maxcpus=1 console=tty0 console=ttyS0 i915.nuclear_pageflip=1 root=/dev/sda3 rw rootwait clocksource=hpet ignore_loglevel no_timer_check consoleblank=0 i915.tsd_init=7 i915.tsd_delay=2000 i915.avail_planes_per_pipe=0x00000F i915.domain_plane_owners=0x011111110000 i915.enable_guc_loading=0 i915.enable_guc_submission=0 i915.enable_preemption=1 i915.context_priority_mode=2 i915.enable_gvt=1 hvlog=2M@0x1FE00000 cma=2560M@0x100000000-0 diff --git a/hypervisor/bsp/uefi/cmdline.c b/hypervisor/bsp/uefi/cmdline.c new file mode 100644 index 000000000..70a45be7b --- /dev/null +++ b/hypervisor/bsp/uefi/cmdline.c @@ -0,0 +1,131 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAX_PORT 0x10000 /* port 0 - 64K */ +#define DEFAULT_UART_PORT 0x3F8 + +#define ACRN_DBG_PARSE 6 + +#define MAX_CMD_LEN 64 + +static const char * const cmd_list[] = { + "uart=disabled", /* to disable uart */ + "uart=port@", /* like uart=port@0x3F8 */ + "uart=mmio@", /*like: uart=mmio@0xFC000000 */ +}; + +enum IDX_CMD { + IDX_DISABLE_UART, + IDX_PORT_UART, + IDX_MMIO_UART, + + IDX_MAX_CMD, +}; + +static void handle_cmd(const char *cmd, int len) +{ + int i; + + for (i = 0; i < IDX_MAX_CMD; i++) { + int tmp = strnlen_s(cmd_list[i], MAX_CMD_LEN); + + /*cmd prefix should be same with one in cmd_list */ + if (len < tmp) + continue; + + if (strncmp(cmd_list[i], cmd, tmp) != 0) + continue; + + if (i == IDX_DISABLE_UART) { + /* set uart disabled*/ + uart16550_set_property(0, 0, 0); + } else if ((i == IDX_PORT_UART) || (i == IDX_MMIO_UART)) { + uint64_t addr = strtoul(cmd + tmp, NULL, 16); + + dev_dbg(ACRN_DBG_PARSE, "uart addr=0x%llx", addr); + + if (i == IDX_PORT_UART) { + if (addr > MAX_PORT) + addr = DEFAULT_UART_PORT; + + uart16550_set_property(1, 1, addr); + } else { + uart16550_set_property(1, 0, addr); + } + } + } +} + +int parse_hv_cmdline(void) +{ + const char *start; + const char *end; + struct multiboot_info *mbi = NULL; + + if (boot_regs[0] != MULTIBOOT_INFO_MAGIC) { + ASSERT(0, "no multiboot info found"); + return -EINVAL; + } + + mbi = (struct multiboot_info *)((uint64_t)boot_regs[1]); + dev_dbg(ACRN_DBG_PARSE, "Multiboot detected, flag=0x%x", mbi->mi_flags); + + if (!(mbi->mi_flags & MULTIBOOT_INFO_HAS_CMDLINE)) { + dev_dbg(ACRN_DBG_PARSE, "no hv cmdline!"); + return -EINVAL; + } + + start = (char *)(uint64_t)mbi->mi_cmdline; + dev_dbg(ACRN_DBG_PARSE, "hv cmdline: %s", start); + + do { + while (*start == ' ') + start++; + + end = start + 1; + while (*end != ' ' && *end) + end++; + + handle_cmd(start, end - start); + start = end + 1; + + } while (*end && *start); + + return 0; +} diff --git a/hypervisor/bsp/uefi/efi/Makefile b/hypervisor/bsp/uefi/efi/Makefile new file mode 100644 index 000000000..5ddd5984b --- /dev/null +++ b/hypervisor/bsp/uefi/efi/Makefile @@ -0,0 +1,105 @@ +# +# Copyright (c) 2011, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided +# with the distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# + +RELEASE:=0 +HV_OBJDIR:=build +HV_FILE:=acrn +EFI_OBJDIR:=$(HV_OBJDIR)/bsp/uefi/efi +C_SRCS = boot.c pe.c malloc.c +ACRN_OBJS := $(patsubst %.c,$(EFI_OBJDIR)/%.o,$(C_SRCS)) + +OBJCOPY=objcopy + +HOST = $(shell $(CC) -dumpmachine | sed "s/\(-\).*$$//") +ARCH := $(shell $(CC) -dumpmachine | sed "s/\(-\).*$$//") + +ifeq ($(ARCH),x86_64) + LIBDIR := $(shell if [ -d /usr/lib64 ]; then echo /usr/lib64; \ + else if [ -d /usr/lib ]; then echo /usr/lib; fi ; fi;) + FORMAT=efi-app-x86-64 +else + ARCH=ia32 + LIBDIR=/usr/lib32 + FORMAT=efi-app-ia32 +endif + +INCDIR := /usr/include + +# gnuefi sometimes installs these under a gnuefi/ directory, and sometimes not +CRT0 := $(LIBDIR)/crt0-efi-$(ARCH).o +LDSCRIPT := $(LIBDIR)/elf_$(ARCH)_efi.lds + +CFLAGS=-I. -I.. -I$(INCDIR)/efi -I$(INCDIR)/efi/$(ARCH) \ + -DEFI_FUNCTION_WRAPPER -fPIC -fshort-wchar -ffreestanding \ + -Wall -I../fs/ -D$(ARCH) + +ifeq ($(ARCH),ia32) + ifeq ($(HOST),x86_64) + CFLAGS += -m32 + endif +endif +ifeq ($(ARCH),x86_64) + CFLAGS += -mno-red-zone +endif + +LDFLAGS=-T $(LDSCRIPT) -Bsymbolic -shared -nostdlib -znocombreloc \ + -L$(LIBDIR) $(CRT0) +EFIBIN=$(HV_OBJDIR)/$(HV_FILE).efi +BOOT=$(EFI_OBJDIR)/boot.efi + +all: $(EFIBIN) + $(OBJCOPY) --add-section .hv="$(HV_OBJDIR)/$(HV_FILE).bin" --change-section-vma .hv=0x6e000 --set-section-flags .hv=alloc,data,contents,load --section-alignment 0x1000 $(EFI_OBJDIR)/boot.efi $(EFIBIN) + +install: $(EFIBIN) + install -D $(EFIBIN) $(DESTDIR)/usr/share/$(HV_FILE).efi + +$(EFIBIN): $(BOOT) + +$(EFI_OBJDIR)/boot.efi: $(EFI_OBJDIR)/boot.so + +$(EFI_OBJDIR)/boot.so: $(ACRN_OBJS) $(FS) + $(LD) $(LDFLAGS) -o $@ $^ -lgnuefi -lefi $(shell $(CC) $(CFLAGS) -print-libgcc-file-name) + +clean: + rm -f $(BOOT) $(HV_OBJDIR)/$(HV_FILE).efi $(EFI_OBJDIR)/boot.so $(ACRN_OBJS) $(FS) + +$(EFI_OBJDIR)/%.o:%.S + [ ! -e $@ ] && mkdir -p $(dir $@); \ + $(CC) $(CFLAGS) -c -o $@ $< + +$(EFI_OBJDIR)/%.o: %.c + [ ! -e $@ ] && mkdir -p $(dir $@); \ + $(CC) $(patsubst %, -I%, $(INCLUDE_PATH)) -I. -c $(CFLAGS) $(ARCH_CFLAGS) $< -o $@ + +%.efi: %.so + $(OBJCOPY) -j .text -j .sdata -j .data -j .dynamic -j .dynsym -j .rel \ + -j .rela -j .reloc --target=$(FORMAT) $*.so $@ diff --git a/hypervisor/bsp/uefi/efi/boot.c b/hypervisor/bsp/uefi/efi/boot.c new file mode 100644 index 000000000..75e4feb55 --- /dev/null +++ b/hypervisor/bsp/uefi/efi/boot.c @@ -0,0 +1,615 @@ +/* + * Copyright (c) 2011, Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include "efilinux.h" +#include "stdlib.h" +#include "boot.h" +#include "multiboot.h" + +#define ERROR_STRING_LENGTH 32 +#define EFI_LOADER_SIGNATURE "EL64" + +#define LEAGCY_BIOS + +#define ACPI_XSDT_ENTRY_SIZE (sizeof (UINT64)) +#define ACPI_NAME_SIZE 4 +#define ACPI_OEM_ID_SIZE 6 +#define ACPI_OEM_TABLE_ID_SIZE 8 + +EFI_SYSTEM_TABLE *sys_table; +EFI_BOOT_SERVICES *boot; +EFI_RUNTIME_SERVICES *runtime; + +/** + * memory_map - Allocate and fill out an array of memory descriptors + * @map_buf: buffer containing the memory map + * @map_size: size of the buffer containing the memory map + * @map_key: key for the current memory map + * @desc_size: size of the desc + * @desc_version: memory descriptor version + * + * On success, @map_size contains the size of the memory map pointed + * to by @map_buf and @map_key, @desc_size and @desc_version are + * updated. + */ +EFI_STATUS +memory_map(EFI_MEMORY_DESCRIPTOR **map_buf, UINTN *map_size, + UINTN *map_key, UINTN *desc_size, UINT32 *desc_version) +{ + EFI_STATUS err; + + *map_size = sizeof(**map_buf) * 31; +get_map: + + /* + * Because we're about to allocate memory, we may + * potentially create a new memory descriptor, thereby + * increasing the size of the memory map. So increase + * the buffer size by the size of one memory + * descriptor, just in case. + */ + *map_size += sizeof(**map_buf); + + err = allocate_pool(EfiLoaderData, *map_size, + (void **)map_buf); + if (err != EFI_SUCCESS) { + Print(L"Failed to allocate pool for memory map"); + goto failed; + } + + err = get_memory_map(map_size, *map_buf, map_key, + desc_size, desc_version); + if (err != EFI_SUCCESS) { + if (err == EFI_BUFFER_TOO_SMALL) { + /* + * 'map_size' has been updated to reflect the + * required size of a map buffer. + */ + free_pool((void *)*map_buf); + goto get_map; + } + + Print(L"Failed to get memory map"); + goto failed; + } + +failed: + return err; +} + +static inline BOOLEAN isspace(CHAR8 ch) +{ + return ((unsigned char)ch <= ' '); +} + +#if 0 +static void print_ch(char *str) +{ + int j; + CHAR16 *buf; + int len = strlen(str); + + buf = malloc((strlen(str) + 1)* 2); + for (j=0; jFilePath); + for (i = 0; i < StrLen(pathstr); i++) { + if (pathstr[i] == '/') + pathstr[i] = '\\'; + } + + pathlen = StrLen(pathstr); + + if (name[0] == '\\') { + *path = FileDevicePath(info->DeviceHandle, name); + goto out; + } + + for (i=pathlen - 1; i > 0; i--) { + if (pathstr[i] == '\\') break; + } + pathstr[i] = '\0'; + + pathlen = StrLen(pathstr); + + pathlen++; + pathname = AllocatePool((pathlen + 1 + StrLen(name))*sizeof(CHAR16)); + if (!pathname) { + Print(L"Failed to allocate memory for pathname\n"); + efi_status = EFI_OUT_OF_RESOURCES; + goto out; + } + StrCpy(pathname, pathstr); + StrCat(pathname, L"\\"); + StrCat(pathname, name); + + *path = FileDevicePath(info->DeviceHandle, pathname); + +out: + FreePool(pathstr); + return efi_status; +} +/** + * load_kernel - Load a kernel image into memory from the boot device + */ +EFI_STATUS +load_sos_image(EFI_HANDLE image, CHAR16 *name, CHAR16 *cmdline) +{ + UINTN map_size, _map_size, map_key; + UINT32 desc_version; + UINTN desc_size; + EFI_MEMORY_DESCRIPTOR *map_buf; + EFI_PHYSICAL_ADDRESS addr; + EFI_LOADED_IMAGE *info = NULL; + EFI_STATUS err; + struct multiboot_mmap *mmap; + struct multiboot_info *mbi; + + struct acpi_table_rsdp *rsdp; + int i, j; + + + err = handle_protocol(image, &LoadedImageProtocol, (void **)&info); + if (err != EFI_SUCCESS) + goto out; + + + EFI_HANDLE bz_hd; + EFI_DEVICE_PATH *path; + EFI_LOADED_IMAGE *bz_info = NULL; + EFI_IMAGE_ENTRY_POINT pe_entry; + struct efi_ctx* pe; + + err = get_path(name, info, &path); + if (err != EFI_SUCCESS) { + Print(L"fail to get bzImage.efi path"); + goto out; + } + + err = uefi_call_wrapper(BS->LoadImage, 6, FALSE, image, path, NULL, 0, &bz_hd); + + if (err != EFI_SUCCESS) { + Print(L"failed to load bzImage %lx\n", err); + goto out; + } + + err = handle_protocol(bz_hd, &LoadedImageProtocol, (void **)&bz_info); + if (err != EFI_SUCCESS) + goto out; + + if (cmdline) { + bz_info->LoadOptions = cmdline; + bz_info->LoadOptionsSize = (StrLen(cmdline) + 1) * sizeof(CHAR16); + } + + pe_entry = get_pe_entry(bz_info->ImageBase); + + if (pe_entry == NULL) { + Print(L"fail to get pe entry of bzImage\n"); + goto out; + } + + err = emalloc(sizeof(struct efi_ctx), 8, &addr); + if (err != EFI_SUCCESS) + goto out; + pe = (struct efi_ctx*)(UINTN)addr; + pe->entry = pe_entry; + pe->handle = bz_hd; + pe->table = sys_table; + + + /* multiboot info */ + err = emalloc(16384, 8, &addr); + if (err != EFI_SUCCESS) + goto out; + + mbi = (struct multiboot_info *)(UINTN)addr; + memset((void *)mbi, 0x0, sizeof(*mbi)); + + /* allocate mmap[] */ + err = emalloc(sizeof(struct multiboot_mmap)*128, 8, &addr); + if (err != EFI_SUCCESS) + goto out; + mmap = (struct multiboot_mmap *)(UINTN)addr; + memset((void *)mmap, 0x0, sizeof(*mmap)*128); + + + EFI_CONFIGURATION_TABLE *config_table = sys_table->ConfigurationTable; + for (i = 0; i < sys_table->NumberOfTableEntries;i++) { + EFI_GUID acpi_20_table_guid = ACPI_20_TABLE_GUID; + EFI_GUID acpi_table_guid = ACPI_TABLE_GUID; + if (CompareGuid(&acpi_20_table_guid, &config_table->VendorGuid) == 0) { + rsdp = config_table->VendorTable; + break; + } + + if (CompareGuid(&acpi_table_guid, &config_table->VendorGuid) == 0) + rsdp = config_table->VendorTable; + + config_table++; + } + + if (!rsdp) { + Print(L"unable to find RSDP\n"); + goto out; + } + + + /* We're just interested in the map's size for now */ + map_size = 0; + err = get_memory_map(&map_size, NULL, NULL, NULL, NULL); + if (err != EFI_SUCCESS && err != EFI_BUFFER_TOO_SMALL) + goto out; + +again: + _map_size = map_size; + err = emalloc(map_size, 1, &addr); + if (err != EFI_SUCCESS) + goto out; + map_buf = (EFI_MEMORY_DESCRIPTOR *)(UINTN)addr; + + /* + * Remember! We've already allocated map_buf with emalloc (and + * 'map_size' contains its size) which means that it should be + * positioned below our allocation for the kernel. Use that + * space for the memory map. + */ + err = get_memory_map(&map_size, map_buf, &map_key, + &desc_size, &desc_version); + if (err != EFI_SUCCESS) { + if (err == EFI_BUFFER_TOO_SMALL) { + /* + * Argh! The buffer that we allocated further + * up wasn't large enough which means we need + * to allocate them again, but this time + * larger. 'map_size' has been updated by the + * call to memory_map(). + */ + efree((UINTN)map_buf, _map_size); + goto again; + } + goto out; + } + + /* + * Convert the EFI memory map to E820. + */ + for (i = 0, j = 0; i < map_size / desc_size; i++) { + EFI_MEMORY_DESCRIPTOR *d; + unsigned int e820_type = 0; + + d = (EFI_MEMORY_DESCRIPTOR *)((unsigned long)map_buf + (i * desc_size)); + switch(d->Type) { + case EfiReservedMemoryType: + case EfiRuntimeServicesCode: + case EfiRuntimeServicesData: + case EfiMemoryMappedIO: + case EfiMemoryMappedIOPortSpace: + case EfiPalCode: + e820_type = E820_RESERVED; + break; + + case EfiUnusableMemory: + e820_type = E820_UNUSABLE; + break; + + case EfiACPIReclaimMemory: + e820_type = E820_ACPI; + break; + + case EfiLoaderCode: + case EfiLoaderData: + case EfiBootServicesCode: + case EfiBootServicesData: + case EfiConventionalMemory: + e820_type = E820_RAM; + break; + + case EfiACPIMemoryNVS: + e820_type = E820_NVS; + break; + + default: + continue; + } + if (e820_type == E820_RAM) { + UINT64 start = d->PhysicalStart; + UINT64 end = d->PhysicalStart + (d->NumberOfPages< (ACRN_HV_ADDR + ACRN_HV_SIZE)) + Print(L"e820[%d] start=%lx len=%lx\n", i, d->PhysicalStart, d->NumberOfPages << EFI_PAGE_SHIFT); + } + + if (j && mmap[j-1].mm_type == e820_type && + (mmap[j-1].mm_base_addr + mmap[j-1].mm_length) == d->PhysicalStart) { + mmap[j-1].mm_length += d->NumberOfPages << EFI_PAGE_SHIFT; + } else { + mmap[j].mm_base_addr = d->PhysicalStart; + mmap[j].mm_length = d->NumberOfPages << EFI_PAGE_SHIFT; + mmap[j].mm_type = e820_type; + j++; + } + } + + /* switch hv memory region(0x20000000 ~ 0x22000000) to availiable RAM in e820 table */ + mmap[j].mm_base_addr = ACRN_HV_ADDR; + mmap[j].mm_length = ACRN_HV_SIZE; + mmap[j].mm_type = E820_RAM; + j++; + + /* reserve secondary memory region(0x1000 ~ 0x10000) for hv */ + err = __emalloc(ACRN_SECONDARY_SIZE, ACRN_SECONDARY_ADDR, &addr, EfiReservedMemoryType); + if (err != EFI_SUCCESS) + goto out; + + mbi->mi_flags |= MULTIBOOT_INFO_HAS_MMAP | MULTIBOOT_INFO_HAS_CMDLINE; + mbi->mi_mmap_length = j*sizeof(struct multiboot_mmap); + + //mbi->mi_cmdline = (UINTN)"uart=mmio@0x92230000"; + //mbi->mi_cmdline = (UINTN)"uart=port@0x3F8"; + mbi->mi_cmdline = (UINTN)"uart=disabled"; + mbi->mi_mmap_addr = (UINTN)mmap; + +#ifdef LEAGCY_BIOS + /* copy rsdt in low memory space(0~0x1000) for hypervisor parsing */ + memcpy((void *)0x500, (void*)rsdp, sizeof(struct acpi_table_rsdp)); + *(UINT16*)(0x40E) = 0x50; +#endif + //Print(L"start 9!\n"); + + asm volatile ("mov %%cr0, %0":"=r"(pe->cr0)); + asm volatile ("mov %%cr3, %0":"=r"(pe->cr3)); + asm volatile ("mov %%cr4, %0":"=r"(pe->cr4)); + asm volatile ("sidt %0" :: "m" (pe->idt)); + asm volatile ("sgdt %0" :: "m" (pe->gdt)); + asm volatile ("str %0" :: "m" (pe->tr_sel)); + asm volatile ("sldt %0" :: "m" (pe->ldt_sel)); + + asm volatile ("mov %%cs, %%ax": "=a"(pe->cs_sel)); + asm volatile ("lar %%eax, %%eax" + :"=a"(pe->cs_ar) + :"a"(pe->cs_sel) + ); + pe->cs_ar = (pe->cs_ar >> 8) & 0xf0ff; /* clear bits 11:8 */ + + asm volatile ("mov %%es, %%ax": "=a"(pe->es_sel)); + asm volatile ("mov %%ss, %%ax": "=a"(pe->ss_sel)); + asm volatile ("mov %%ds, %%ax": "=a"(pe->ds_sel)); + asm volatile ("mov %%fs, %%ax": "=a"(pe->fs_sel)); + asm volatile ("mov %%gs, %%ax": "=a"(pe->gs_sel)); + + + uint32_t idx = 0xC0000080; /* MSR_IA32_EFER */ + uint32_t msrl, msrh; + asm volatile ("rdmsr":"=a"(msrl), "=d"(msrh): "c"(idx)); + pe->efer = ((uint64_t)msrh<<32) | msrl; + + asm volatile ("pushf\n\t" + "pop %0\n\t" + :"=r"(pe->rflags):); + + asm volatile ("movq %%rsp, %0":"=r"(pe->rsp)); + + hv_jump(ACRN_HV_ADDR, mbi, pe); +out: + return err; +} + + +static EFI_STATUS +parse_args(CHAR16 *options, UINT32 size, CHAR16 **name, + CHAR16 **hcmdline, CHAR16 **scmdline) +{ + CHAR16 *n, *p, *cmdline, *search; + UINTN i = 0; + + *hcmdline = NULL; + *scmdline = NULL; + *name = NULL; + + cmdline = StrDuplicate(options); + + search = PoolPrint(L"sos="); + n = strstr_16(cmdline, search); + if (!n) { + Print(L"Failed to get sos\n"); + return EFI_OUT_OF_RESOURCES; + } + FreePool(search); + + + n += 4; + p = n; + i = 0; + while (*n && !isspace((CHAR8)*n)) { + n++; i++; + } + *n++ = '\0'; + *name = p; + + *scmdline = n; + + return EFI_SUCCESS; +} + + +/** + * efi_main - The entry point for the OS loader image. + * @image: firmware-allocated handle that identifies the image + * @sys_table: EFI system table + */ +EFI_STATUS +efi_main(EFI_HANDLE image, EFI_SYSTEM_TABLE *_table) +{ + WCHAR *error_buf; + EFI_STATUS err; + EFI_LOADED_IMAGE *info; + EFI_PHYSICAL_ADDRESS addr; + CHAR16 *options = NULL, *name; + UINT32 options_size = 0; + CHAR16 *hcmdline, *scmdline; + UINTN sec_addr; + UINTN sec_size; + char *section; + + + InitializeLib(image, _table); + + sys_table = _table; + boot = sys_table->BootServices; + runtime = sys_table->RuntimeServices; + + if (CheckCrc(sys_table->Hdr.HeaderSize, &sys_table->Hdr) != TRUE) + return EFI_LOAD_ERROR; + + + err = handle_protocol(image, &LoadedImageProtocol, (void **)&info); + if (err != EFI_SUCCESS) + goto failed; + + options = info->LoadOptions; + options_size = info->LoadOptionsSize; + + err = parse_args(options, options_size, &name, &hcmdline, &scmdline); + if (err != EFI_SUCCESS) + return err; + + section = ".hv"; + err = get_pe_section(info->ImageBase, section, &sec_addr, &sec_size); + if (EFI_ERROR(err)) { + Print(L"Unable to locate section of ACRNHV %r ", err); + goto failed; + } + + err = __emalloc(ACRN_HV_SIZE, ACRN_HV_ADDR, &addr, EfiReservedMemoryType); + if (err != EFI_SUCCESS) + goto failed; + + /* Copy ACRNHV binary to fixed phys addr. LoadImage and StartImage ?? */ + memcpy((char*)addr, info->ImageBase + sec_addr, sec_size); + + /* load sos and run hypervisor */ + err = load_sos_image(image, name, scmdline); + + if (err != EFI_SUCCESS) + goto free_args; + + return EFI_SUCCESS; + +free_args: + free(name); +failed: + /* + * We need to be careful not to trash 'err' here. If we fail + * to allocate enough memory to hold the error string fallback + * to returning 'err'. + */ + if (allocate_pool(EfiLoaderData, ERROR_STRING_LENGTH, + (void **)&error_buf) != EFI_SUCCESS) { + Print(L"Couldn't allocate pages for error string\n"); + return err; + } + + StatusToString(error_buf, err); + Print(L": %s\n", error_buf); + return exit(image, err, ERROR_STRING_LENGTH, error_buf); +} + diff --git a/hypervisor/bsp/uefi/efi/boot.h b/hypervisor/bsp/uefi/efi/boot.h new file mode 100644 index 000000000..21cb87536 --- /dev/null +++ b/hypervisor/bsp/uefi/efi/boot.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2011, Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __ACRNBOOT_H__ +#define __ACRNBOOT_H__ + +#define E820_RAM 1 +#define E820_RESERVED 2 +#define E820_ACPI 3 +#define E820_NVS 4 +#define E820_UNUSABLE 5 + +#define ACRN_HV_SIZE 0x2000000 +#define ACRN_HV_ADDR 0x20000000 + +#define ACRN_SECONDARY_SIZE 0xf000 +#define ACRN_SECONDARY_ADDR 0x8000 + + +EFI_STATUS get_pe_section(CHAR8 *base, char *section, UINTN *vaddr, UINTN *size); +EFI_STATUS load_sos_image(EFI_HANDLE image, CHAR16 *name, CHAR16 *cmdline); + +struct efi_info { + UINT32 efi_loader_signature; + UINT32 efi_systab; + UINT32 efi_memdesc_size; + UINT32 efi_memdesc_version; + UINT32 efi_memmap; + UINT32 efi_memmap_size; + UINT32 efi_systab_hi; + UINT32 efi_memmap_hi; +}; + +typedef struct { + UINT16 limit; + UINT64 *base; +} __attribute__((packed)) dt_addr_t; + +struct e820_entry { + UINT64 addr; /* start of memory segment */ + UINT64 size; /* size of memory segment */ + UINT32 type; /* type of memory segment */ +} __attribute__((packed)); + + +struct efi_ctx { + EFI_IMAGE_ENTRY_POINT entry; + EFI_HANDLE handle; + EFI_SYSTEM_TABLE* table; + dt_addr_t gdt; + dt_addr_t idt; + uint16_t tr_sel; + uint16_t ldt_sel; + uint64_t cr0; + uint64_t cr3; + uint64_t cr4; + uint64_t rflags; + uint16_t cs_sel; + uint32_t cs_ar; + uint16_t es_sel; + uint16_t ss_sel; + uint16_t ds_sel; + uint16_t fs_sel; + uint16_t gs_sel; + uint64_t rsp; + uint64_t efer; +}__attribute__((packed)); + +#endif + diff --git a/hypervisor/bsp/uefi/efi/efilinux.h b/hypervisor/bsp/uefi/efi/efilinux.h new file mode 100644 index 000000000..d92ab3e4e --- /dev/null +++ b/hypervisor/bsp/uefi/efi/efilinux.h @@ -0,0 +1,238 @@ +/* + * Copyright (c) 2011, Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * This file contains some wrappers around the gnu-efi functions. As + * we're not going through uefi_call_wrapper() directly, this allows + * us to get some type-safety for function call arguments and for the + * compiler to check that the number of function call arguments is + * correct. + * + * It's also a good place to document the EFI interface. + */ + +#ifndef __EFILINUX_H__ +#define __EFILINUX_H__ + +#define EFILINUX_VERSION_MAJOR 1 +#define EFILINUX_VERSION_MINOR 0 + + +extern EFI_SYSTEM_TABLE *sys_table; +extern EFI_BOOT_SERVICES *boot; +extern EFI_RUNTIME_SERVICES *runtime; + +/** + * allocate_pages - Allocate memory pages from the system + * @atype: type of allocation to perform + * @mtype: type of memory to allocate + * @num_pages: number of contiguous 4KB pages to allocate + * @memory: used to return the address of allocated pages + * + * Allocate @num_pages physically contiguous pages from the system + * memory and return a pointer to the base of the allocation in + * @memory if the allocation succeeds. On success, the firmware memory + * map is updated accordingly. + * + * If @atype is AllocateAddress then, on input, @memory specifies the + * address at which to attempt to allocate the memory pages. + */ +static inline EFI_STATUS +allocate_pages(EFI_ALLOCATE_TYPE atype, EFI_MEMORY_TYPE mtype, + UINTN num_pages, EFI_PHYSICAL_ADDRESS *memory) +{ + return uefi_call_wrapper(boot->AllocatePages, 4, atype, + mtype, num_pages, memory); +} + +/** + * free_pages - Return memory allocated by allocate_pages() to the firmware + * @memory: physical base address of the page range to be freed + * @num_pages: number of contiguous 4KB pages to free + * + * On success, the firmware memory map is updated accordingly. + */ +static inline EFI_STATUS +free_pages(EFI_PHYSICAL_ADDRESS memory, UINTN num_pages) +{ + return uefi_call_wrapper(boot->FreePages, 2, memory, num_pages); +} + +/** + * allocate_pool - Allocate pool memory + * @type: the type of pool to allocate + * @size: number of bytes to allocate from pool of @type + * @buffer: used to return the address of allocated memory + * + * Allocate memory from pool of @type. If the pool needs more memory + * pages are allocated from EfiConventionalMemory in order to grow the + * pool. + * + * All allocations are eight-byte aligned. + */ +static inline EFI_STATUS +allocate_pool(EFI_MEMORY_TYPE type, UINTN size, void **buffer) +{ + return uefi_call_wrapper(boot->AllocatePool, 3, type, size, buffer); +} + +/** + * free_pool - Return pool memory to the system + * @buffer: the buffer to free + * + * Return @buffer to the system. The returned memory is marked as + * EfiConventionalMemory. + */ +static inline EFI_STATUS free_pool(void *buffer) +{ + return uefi_call_wrapper(boot->FreePool, 1, buffer); +} + +/** + * get_memory_map - Return the current memory map + * @size: the size in bytes of @map + * @map: buffer to hold the current memory map + * @key: used to return the key for the current memory map + * @descr_size: used to return the size in bytes of EFI_MEMORY_DESCRIPTOR + * @descr_version: used to return the version of EFI_MEMORY_DESCRIPTOR + * + * Get a copy of the current memory map. The memory map is an array of + * EFI_MEMORY_DESCRIPTORs. An EFI_MEMORY_DESCRIPTOR describes a + * contiguous block of memory. + * + * On success, @key is updated to contain an identifer for the current + * memory map. The firmware's key is changed every time something in + * the memory map changes. @size is updated to indicate the size of + * the memory map pointed to by @map. + * + * @descr_size and @descr_version are used to ensure backwards + * compatibility with future changes made to the EFI_MEMORY_DESCRIPTOR + * structure. @descr_size MUST be used when the size of an + * EFI_MEMORY_DESCRIPTOR is used in a calculation, e.g when iterating + * over an array of EFI_MEMORY_DESCRIPTORs. + * + * On failure, and if the buffer pointed to by @map is too small to + * hold the memory map, EFI_BUFFER_TOO_SMALL is returned and @size is + * updated to reflect the size of a buffer required to hold the memory + * map. + */ +static inline EFI_STATUS +get_memory_map(UINTN *size, EFI_MEMORY_DESCRIPTOR *map, UINTN *key, + UINTN *descr_size, UINT32 *descr_version) +{ + return uefi_call_wrapper(boot->GetMemoryMap, 5, size, map, + key, descr_size, descr_version); +} + +/** + * exit_boot_serivces - Terminate all boot services + * @image: firmware-allocated handle that identifies the image + * @key: key to the latest memory map + * + * This function is called when efilinux wants to take complete + * control of the system. efilinux should not make calls to boot time + * services after this function is called. + */ +static inline EFI_STATUS +exit_boot_services(EFI_HANDLE image, UINTN key) +{ + return uefi_call_wrapper(boot->ExitBootServices, 2, image, key); +} + + +/** + * handle_protocol - Query @handle to see if it supports @protocol + * @handle: the handle being queried + * @protocol: the GUID of the protocol + * @interface: used to return the protocol interface + * + * Query @handle to see if @protocol is supported. If it is supported, + * @interface contains the protocol interface. + */ +static inline EFI_STATUS +handle_protocol(EFI_HANDLE handle, EFI_GUID *protocol, void **interface) +{ + return uefi_call_wrapper(boot->HandleProtocol, 3, + handle, protocol, interface); +} + + +/** + * exit - Terminate a loaded EFI image + * @image: firmware-allocated handle that identifies the image + * @status: the image's exit code + * @size: size in bytes of @reason. Ignored if @status is EFI_SUCCESS + * @reason: a NUL-terminated status string, optionally followed by binary data + * + * This function terminates @image and returns control to the boot + * services. This function MUST NOT be called until all loaded child + * images have exited. All memory allocated by the image must be freed + * before calling this function, apart from the buffer @reason, which + * will be freed by the firmware. + */ +static inline EFI_STATUS +exit(EFI_HANDLE image, EFI_STATUS status, UINTN size, CHAR16 *reason) +{ + return uefi_call_wrapper(boot->Exit, 4, image, status, size, reason); +} + +#define PAGE_SIZE 4096 + +static const CHAR16 *memory_types[] = { + L"EfiReservedMemoryType", + L"EfiLoaderCode", + L"EfiLoaderData", + L"EfiBootServicesCode", + L"EfiBootServicesData", + L"EfiRuntimeServicesCode", + L"EfiRuntimeServicesData", + L"EfiConventionalMemory", + L"EfiUnusableMemory", + L"EfiACPIReclaimMemory", + L"EfiACPIMemoryNVS", + L"EfiMemoryMappedIO", + L"EfiMemoryMappedIOPortSpace", + L"EfiPalCode", +}; + +static inline const CHAR16 *memory_type_to_str(UINT32 type) +{ + if (type > sizeof(memory_types)/sizeof(CHAR16 *)) + return L"Unknown"; + + return memory_types[type]; +} + +extern EFI_STATUS memory_map(EFI_MEMORY_DESCRIPTOR **map_buf, + UINTN *map_size, UINTN *map_key, + UINTN *desc_size, UINT32 *desc_version); + +#endif /* __EFILINUX_H__ */ diff --git a/hypervisor/bsp/uefi/efi/malloc.c b/hypervisor/bsp/uefi/efi/malloc.c new file mode 100644 index 000000000..5c3297de8 --- /dev/null +++ b/hypervisor/bsp/uefi/efi/malloc.c @@ -0,0 +1,271 @@ +/* + * Copyright (c) 2011, Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include "efilinux.h" +#include "stdlib.h" + +/** + * emalloc - Allocate memory with a strict alignment requirement + * @size: size in bytes of the requested allocation + * @align: the required alignment of the allocation + * @addr: a pointer to the allocated address on success + * + * If we cannot satisfy @align we return 0. + */ +EFI_STATUS emalloc(UINTN size, UINTN align, EFI_PHYSICAL_ADDRESS *addr) +{ + UINTN map_size, map_key, desc_size; + EFI_MEMORY_DESCRIPTOR *map_buf; + UINTN d, map_end; + UINT32 desc_version; + EFI_STATUS err; + UINTN nr_pages = EFI_SIZE_TO_PAGES(size); + + err = memory_map(&map_buf, &map_size, &map_key, + &desc_size, &desc_version); + if (err != EFI_SUCCESS) + goto fail; + + d = (UINTN)map_buf; + map_end = (UINTN)map_buf + map_size; + + for (; d < map_end; d += desc_size) { + EFI_MEMORY_DESCRIPTOR *desc; + EFI_PHYSICAL_ADDRESS start, end, aligned; + + desc = (EFI_MEMORY_DESCRIPTOR *)d; + if (desc->Type != EfiConventionalMemory) + continue; + + if (desc->NumberOfPages < nr_pages) + continue; + + start = desc->PhysicalStart; + end = start + (desc->NumberOfPages << EFI_PAGE_SHIFT); + + /* Low-memory is super-precious! */ + if (end <= 1 << 20) + continue; + if (start < 1 << 20) { + size -= (1 << 20) - start; + start = (1 << 20); + } + + aligned = (start + align -1) & ~(align -1); + + if ((aligned + size) <= end) { + err = allocate_pages(AllocateAddress, EfiLoaderData, + nr_pages, &aligned); + if (err == EFI_SUCCESS) { + *addr = aligned; + break; + } + } + } + + if (d == map_end) + err = EFI_OUT_OF_RESOURCES; + + free_pool(map_buf); +fail: + return err; +} + +EFI_STATUS __emalloc(UINTN size, UINTN align, EFI_PHYSICAL_ADDRESS *addr, EFI_MEMORY_TYPE mem_type) +{ + UINTN map_size, map_key, desc_size; + EFI_MEMORY_DESCRIPTOR *map_buf; + UINTN d, map_end; + UINT32 desc_version; + EFI_STATUS err; + UINTN nr_pages = EFI_SIZE_TO_PAGES(size); + + err = memory_map(&map_buf, &map_size, &map_key, + &desc_size, &desc_version); + if (err != EFI_SUCCESS) + goto fail; + + d = (UINTN)map_buf; + map_end = (UINTN)map_buf + map_size; + + for (; d < map_end; d += desc_size) { + EFI_MEMORY_DESCRIPTOR *desc; + EFI_PHYSICAL_ADDRESS start, end, aligned; + + desc = (EFI_MEMORY_DESCRIPTOR *)d; + if (desc->Type != EfiConventionalMemory) + continue; + + if (desc->NumberOfPages < nr_pages) + continue; + + start = desc->PhysicalStart; + end = start + (desc->NumberOfPages << EFI_PAGE_SHIFT); + + /* Low-memory is super-precious! */ + if (end <= 1 << 20) + continue; + if (start < 1 << 20) { + size -= (1 << 20) - start; + start = (1 << 20); + } + + aligned = align;//(start + align -1) & ~(align -1); + + + if ((aligned + size) <= end) { + //Print(L"trying to allocate memory at %0x!\n", aligned); + err = allocate_pages(AllocateAddress, mem_type, + nr_pages, &aligned); + if (err == EFI_SUCCESS) { + //Print(L"trying to allocate memory at %0x, success!\n", aligned); + *addr = aligned; + break; + } { + //Print(L"trying to allocate memory at %0x, failure!\n", aligned); + } + } + } + + if (d == map_end) + err = EFI_OUT_OF_RESOURCES; + + free_pool(map_buf); +fail: + return err; +} + +/** + * efree - Return memory allocated with emalloc + * @memory: the address of the emalloc() allocation + * @size: the size of the allocation + */ +void efree(EFI_PHYSICAL_ADDRESS memory, UINTN size) +{ + UINTN nr_pages = EFI_SIZE_TO_PAGES(size); + + free_pages(memory, nr_pages); +} + +/** + * malloc - Allocate memory from the EfiLoaderData pool + * @size: size in bytes of the requested allocation + * + * Return a pointer to an allocation of @size bytes of type + * EfiLoaderData. + */ +void *malloc(UINTN size) +{ + EFI_STATUS err; + void *buffer; + + err = allocate_pool(EfiLoaderData, size, &buffer); + if (err != EFI_SUCCESS) + buffer = NULL; + + return buffer; +} + +/** + * free - Release memory to the EfiLoaderData pool + * @buffer: pointer to the malloc() allocation to free + */ +void free(void *buffer) +{ + if (buffer) + free_pool(buffer); +} + +/** + * calloc - Allocate zeroed memory for an array of elements + * @nmemb: number of elements + * @size: size of each element + */ +void *calloc(UINTN nmemb, UINTN size) +{ + void *buffer; + + /* + * There's no equivalent of UINTN_MAX, so for safety we refuse to + * allocate anything larger than 32 bits. + */ + UINTN bytes = nmemb * size; + if ((nmemb | size) > 0xffffU) { + if (size && bytes / size != nmemb) + return NULL; + } + + buffer = malloc(bytes); + if (buffer) + memset(buffer, 0, bytes); + return buffer; +} + +EFI_STATUS dump_e820(void) +{ + UINTN map_size, map_key, desc_size; + EFI_MEMORY_DESCRIPTOR *map_buf; + UINTN d, map_end; + UINTN i; + UINT32 desc_version; + EFI_STATUS err; + + err = memory_map(&map_buf, &map_size, &map_key, + &desc_size, &desc_version); + if (err != EFI_SUCCESS) + goto fail; + + d = (UINTN)map_buf; + map_end = (UINTN)map_buf + map_size; + + for (i = 0; d < map_end; d += desc_size, i++) { + EFI_MEMORY_DESCRIPTOR *desc; + EFI_PHYSICAL_ADDRESS start, end; + + desc = (EFI_MEMORY_DESCRIPTOR *)d; + if (desc->Type != EfiConventionalMemory) + continue; + + start = desc->PhysicalStart; + end = start + (desc->NumberOfPages << EFI_PAGE_SHIFT); + + Print(L"[%d]start:%lx, end:%lx, type:%d\n", i, start, end, desc->Type); + } + + free_pool(map_buf); +fail: + return err; +} + diff --git a/hypervisor/bsp/uefi/efi/multiboot.h b/hypervisor/bsp/uefi/efi/multiboot.h new file mode 100644 index 000000000..2046917d3 --- /dev/null +++ b/hypervisor/bsp/uefi/efi/multiboot.h @@ -0,0 +1,186 @@ +/* [ORIGIN: src/sys/arch/i386/include/... */ +/* $NetBSD: multiboot.h,v 1.8 2009/02/22 18:05:42 ahoka Exp $ */ + +/*- + * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Julio M. Merino Vidal. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * multiboot.h + */ + +#ifndef _MULTIBOOT_H +#define _MULTIBOOT_H + +#include +//typedef uintptr_t uint32_t; +typedef uintptr_t vaddr_t; + +struct multiboot_info; +extern struct multiboot_info mbi; + +// ======================================================================== + +/* + * Multiboot header structure. + */ +#define MULTIBOOT_HEADER_MAGIC 0x1BADB002 +#define MULTIBOOT_HEADER_MODS_ALIGNED 0x00000001 +#define MULTIBOOT_HEADER_WANT_MEMORY 0x00000002 +#define MULTIBOOT_HEADER_HAS_VBE 0x00000004 +#define MULTIBOOT_HEADER_HAS_ADDR 0x00010000 + +#if !defined(_LOCORE) +struct multiboot_header { + uint32_t mh_magic; + uint32_t mh_flags; + uint32_t mh_checksum; + + /* Valid if mh_flags sets MULTIBOOT_HEADER_HAS_ADDR. */ + uint32_t mh_header_addr; + uint32_t mh_load_addr; + uint32_t mh_load_end_addr; + uint32_t mh_bss_end_addr; + uint32_t mh_entry_addr; + + /* Valid if mh_flags sets MULTIBOOT_HEADER_HAS_VBE. */ + uint32_t mh_mode_type; + uint32_t mh_width; + uint32_t mh_height; + uint32_t mh_depth; +}; +#endif /* !defined(_LOCORE) */ + +/* + * Symbols defined in locore.S. + */ +extern struct multiboot_header *Multiboot_Header; + +// ======================================================================== +/* + * Multiboot information structure. + */ +#define MULTIBOOT_INFO_MAGIC 0x2BADB002 +#define MULTIBOOT_INFO_HAS_MEMORY 0x00000001 +#define MULTIBOOT_INFO_HAS_BOOT_DEVICE 0x00000002 +#define MULTIBOOT_INFO_HAS_CMDLINE 0x00000004 +#define MULTIBOOT_INFO_HAS_MODS 0x00000008 +#define MULTIBOOT_INFO_HAS_AOUT_SYMS 0x00000010 +#define MULTIBOOT_INFO_HAS_ELF_SYMS 0x00000020 +#define MULTIBOOT_INFO_HAS_MMAP 0x00000040 +#define MULTIBOOT_INFO_HAS_DRIVES 0x00000080 +#define MULTIBOOT_INFO_HAS_CONFIG_TABLE 0x00000100 +#define MULTIBOOT_INFO_HAS_LOADER_NAME 0x00000200 +#define MULTIBOOT_INFO_HAS_APM_TABLE 0x00000400 +#define MULTIBOOT_INFO_HAS_VBE 0x00000800 + +#if !defined(_LOCORE) +struct multiboot_info { + uint32_t mi_flags; + + /* Valid if mi_flags sets MULTIBOOT_INFO_HAS_MEMORY. */ + uint32_t mi_mem_lower; + uint32_t mi_mem_upper; + + /* Valid if mi_flags sets MULTIBOOT_INFO_HAS_BOOT_DEVICE. */ + uint8_t mi_boot_device_part3; + uint8_t mi_boot_device_part2; + uint8_t mi_boot_device_part1; + uint8_t mi_boot_device_drive; + + /* Valid if mi_flags sets MULTIBOOT_INFO_HAS_CMDLINE. */ + uint32_t mi_cmdline; + + /* Valid if mi_flags sets MULTIBOOT_INFO_HAS_MODS. */ + uint32_t mi_mods_count; + uint32_t mi_mods_addr; + + /* Valid if mi_flags sets MULTIBOOT_INFO_HAS_{AOUT,ELF}_SYMS. */ + uint32_t mi_elfshdr_num; + uint32_t mi_elfshdr_size; + uint32_t mi_elfshdr_addr; + uint32_t mi_elfshdr_shndx; + + /* Valid if mi_flags sets MULTIBOOT_INFO_HAS_MMAP. */ + uint32_t mi_mmap_length; + uint32_t mi_mmap_addr; + + /* Valid if mi_flags sets MULTIBOOT_INFO_HAS_DRIVES. */ + uint32_t mi_drives_length; + uint32_t mi_drives_addr; + + /* Valid if mi_flags sets MULTIBOOT_INFO_HAS_CONFIG_TABLE. */ + uint32_t unused_mi_config_table; + + /* Valid if mi_flags sets MULTIBOOT_INFO_HAS_LOADER_NAME. */ + uint32_t mi_loader_name; + + /* Valid if mi_flags sets MULTIBOOT_INFO_HAS_APM. */ + uint32_t unused_mi_apm_table; + + /* Valid if mi_flags sets MULTIBOOT_INFO_HAS_VBE. */ + uint32_t unused_mi_vbe_control_info; + uint32_t unused_mi_vbe_mode_info; + uint32_t unused_mi_vbe_interface_seg; + uint32_t unused_mi_vbe_interface_off; + uint32_t unused_mi_vbe_interface_len; +}; + + +/* + * Memory mapping. This describes an entry in the memory mappings table + * as pointed to by mi_mmap_addr. + * + * Be aware that mm_size specifies the size of all other fields *except* + * for mm_size. In order to jump between two different entries, you + * have to count mm_size + 4 bytes. + */ +struct __attribute__((packed)) multiboot_mmap { + uint32_t mm_size; + uint64_t mm_base_addr; + uint64_t mm_length; + uint32_t mm_type; +}; + +/* + * Modules. This describes an entry in the modules table as pointed + * to by mi_mods_addr. + */ + +struct multiboot_module { + uint32_t mmo_start; + uint32_t mmo_end; + char * mmo_string; + uint32_t mmo_reserved; +}; + +#endif /* !defined(_LOCORE) */ + +// ======================================================================== + +#endif /* _MULTIBOOT_H */ diff --git a/hypervisor/bsp/uefi/efi/pe.c b/hypervisor/bsp/uefi/efi/pe.c new file mode 100644 index 000000000..c0995bf5a --- /dev/null +++ b/hypervisor/bsp/uefi/efi/pe.c @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2011, Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * This file contains some wrappers around the gnu-efi functions. As + * we're not going through uefi_call_wrapper() directly, this allows + * us to get some type-safety for function call arguments and for the + * compiler to check that the number of function call arguments is + * correct. + * + * It's also a good place to document the EFI interface. + */ + +#include +#include +#include "stdlib.h" + +#define DOS_FILE_MAGIC_NUMBER 0x5A4D //"MZ" +struct DosFileHeader { + uint16_t mMagic; + uint16_t LastSize; + uint16_t nBlocks; + uint16_t nReloc; + uint16_t HdrSize; + uint16_t MinAlloc; + uint16_t MaxAlloc; + uint16_t ss; + uint16_t sp; + uint16_t Checksum; + uint16_t ip; + uint16_t cs; + uint16_t RelocPos; + uint16_t nOverlay; + uint16_t reserved[4]; + uint16_t OEMId; + uint16_t OEMInfo; + uint16_t reserved2[10]; + uint32_t ExeHeader; +} __attribute__((packed)); + +#define IMAGE_FILE_MACHINE_I386 0x14c +#define IMAGE_FILE_MACHINE_AMD64 0x8664 +#define PE_FILE_MAGIC_NUMBER 0x00004550 //"PE\0\0" +struct PeHeader { + uint32_t mMagic; + uint16_t mMachine; + uint16_t mNumberOfSections; + uint32_t mTimeDateStamp; + uint32_t mPointerToSymbolTable; + uint32_t mNumberOfSymbols; + uint16_t mSizeOfOptionalHeader; + uint16_t mCharacteristics; +} __attribute__((packed)); + +struct OptionHeader { + uint16_t Format; + uint8_t MajorLinkVer; + uint8_t MinorLinkVer; + uint32_t CodeSize; + uint32_t InitializedDataSize; + uint32_t UninitializedDataSize; + uint32_t EntryPoint; + uint32_t BaseOfCode; + uint32_t BaseOfDate; +} __attribute__((packed)); + + +struct PeSectionHeader { + char mName[8]; + uint32_t mVirtualSize; + uint32_t mVirtualAddress; + uint32_t mSizeOfRawData; + uint32_t mPointerToRawData; + uint32_t mPointerToRealocations; + uint32_t mPointerToLinenumbers; + uint16_t mNumberOfRealocations; + uint16_t mNumberOfLinenumbers; + uint32_t mCharacteristics; +} __attribute__((packed)); + + +EFI_STATUS get_pe_section(CHAR8 *base, char *section, UINTN *vaddr, UINTN *size) +{ + struct PeSectionHeader *ph; + struct DosFileHeader *dh; + struct PeHeader *pe; + UINTN i; + UINTN offset; + + dh = (struct DosFileHeader *)base; + + if (dh->mMagic != DOS_FILE_MAGIC_NUMBER) + return EFI_LOAD_ERROR; + + pe = (struct PeHeader *)&base[dh->ExeHeader]; + if (pe->mMagic != PE_FILE_MAGIC_NUMBER) + return EFI_LOAD_ERROR; + + if ((pe->mMachine != IMAGE_FILE_MACHINE_AMD64) + && (pe->mMachine != IMAGE_FILE_MACHINE_I386)) + return EFI_LOAD_ERROR; + + offset = dh->ExeHeader + sizeof(*pe) + pe->mSizeOfOptionalHeader; + + for (i = 0; i < pe->mNumberOfSections; i++) { + ph = (struct PeSectionHeader *)&base[offset]; + if (CompareMem(ph->mName, section, strlen(section)) == 0) { + *vaddr = (UINTN)ph->mVirtualAddress; + *size = (UINTN)ph->mVirtualSize; + break; + } + + offset += sizeof(*ph); + } + + return EFI_SUCCESS; +} + + +EFI_IMAGE_ENTRY_POINT get_pe_entry(CHAR8 *base) +{ + struct DosFileHeader* dh; + struct PeHeader* pe; + struct OptionHeader* oh; + UINTN offset; + + dh = (struct DosFileHeader *)base; + + if (dh->mMagic != DOS_FILE_MAGIC_NUMBER) + return NULL; + + pe = (struct PeHeader *)&base[dh->ExeHeader]; + if (pe->mMagic != PE_FILE_MAGIC_NUMBER) + return NULL; + + if ((pe->mMachine != IMAGE_FILE_MACHINE_AMD64) + && (pe->mMachine != IMAGE_FILE_MACHINE_I386)) + return NULL; + + offset = dh->ExeHeader + sizeof(*pe); + oh = (struct OptionHeader*)&base[offset]; + + return (EFI_IMAGE_ENTRY_POINT)((UINT64)base + oh->EntryPoint); +} diff --git a/hypervisor/bsp/uefi/efi/stdlib.h b/hypervisor/bsp/uefi/efi/stdlib.h new file mode 100644 index 000000000..1323e5fa6 --- /dev/null +++ b/hypervisor/bsp/uefi/efi/stdlib.h @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2011, Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * This file contains some wrappers around the gnu-efi functions. As + * we're not going through uefi_call_wrapper() directly, this allows + * us to get some type-safety for function call arguments and for the + * compiler to check that the number of function call arguments is + * correct. + * + * It's also a good place to document the EFI interface. + */ + + + +#ifndef __STDLIB_H__ +#define __STDLIB_H__ + +extern void *malloc(UINTN size); +extern void free(void *buf); +extern void *calloc(UINTN nmemb, UINTN size); + +extern EFI_STATUS emalloc(UINTN, UINTN, EFI_PHYSICAL_ADDRESS *); +extern EFI_STATUS __emalloc(UINTN, UINTN, EFI_PHYSICAL_ADDRESS *, EFI_MEMORY_TYPE); +extern void efree(EFI_PHYSICAL_ADDRESS, UINTN); + +static inline void memset(void *dstv, char ch, UINTN size) +{ + char *dst = dstv; + int i; + + for (i = 0; i < size; i++) + dst[i] = ch; +} + +static inline void memcpy(char *dst, const char *src, UINTN size) +{ + int i; + + for (i = 0; i < size; i++) + *dst++ = *src++; +} + +static inline int strlen(const char *str) +{ + int len; + + len = 0; + while (*str++) + len++; + + return len; +} + +static inline char *strstr(const char *haystack, const char *needle) +{ + const char *p; + const char *word = NULL; + int len = strlen(needle); + + if (!len) + return NULL; + + p = haystack; + while (*p) { + word = p; + if (!strncmpa((CHAR8 *)p, (CHAR8 *)needle, len)) + break; + p++; + word = NULL; + } + + return (char *)word; +} + +static inline char *strdup(const char *src) +{ + int len; + char *dst; + + len = strlen(src); + dst = malloc(len + 1); + if (dst) + memcpy(dst, src, len + 1); + return dst; +} + +static inline CHAR16 *strstr_16(CHAR16 *haystack, CHAR16 *needle) +{ + CHAR16 *p; + CHAR16 *word = NULL; + UINTN len = StrLen(needle); + + if (!len) + return NULL; + + p = haystack; + while (*p) { + if (!StrnCmp(p, needle, len)) { + word = p; + break; + } + p++; + } + + return (CHAR16*)word; +} + +#endif /* __STDLIB_H__ */ diff --git a/hypervisor/bsp/uefi/include/bsp/bsp_cfg.h b/hypervisor/bsp/uefi/include/bsp/bsp_cfg.h new file mode 100644 index 000000000..96b6a46f3 --- /dev/null +++ b/hypervisor/bsp/uefi/include/bsp/bsp_cfg.h @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef BSP_CFG_H +#define BSP_CFG_H +#define NR_IOAPICS 1 +#define STACK_SIZE 8192 +#define LOG_BUF_SIZE 0x100000 +#define LOG_DESTINATION 3 +#define CPU_UP_TIMEOUT 100 +#define CONFIG_SERIAL_PIO_BASE 0x3f8 +#define MALLOC_ALIGN 16 +#define NUM_ALLOC_PAGES 4096 +#define HEAP_SIZE 0x100000 +#define CONSOLE_LOGLEVEL_DEFAULT 2 +#define MEM_LOGLEVEL_DEFAULT 4 +#define CONFIG_LOW_RAM_START 0x00008000 +#define CONFIG_LOW_RAM_SIZE 0x000CF000 +#define CONFIG_RAM_START 0x20000000 +#define CONFIG_RAM_SIZE 0x02000000 /* 32M */ +#define CONFIG_DMAR_PARSE_ENABLED 1 +#define CONFIG_GPU_SBDF 0x00000010 /* 0000:00:02.0 */ +#define CONFIG_EFI_STUB 1 +#define CONFIG_RETPOLINE +#endif /* BSP_CFG_H */ diff --git a/hypervisor/bsp/uefi/uefi.c b/hypervisor/bsp/uefi/uefi.c new file mode 100644 index 000000000..52e25318c --- /dev/null +++ b/hypervisor/bsp/uefi/uefi.c @@ -0,0 +1,160 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_EFI_STUB +#include +#endif +#include + +/* IOAPIC id */ +#define UEFI_IOAPIC_ID 8 +/* IOAPIC base address */ +#define UEFI_IOAPIC_ADDR 0xfec00000 +/* IOAPIC range size */ +#define UEFI_IOAPIC_SIZE 0x100000 +/* Local APIC base address */ +#define UEFI_LAPIC_ADDR 0xfee00000 +/* Local APIC range size */ +#define UEFI_LAPIC_SIZE 0x100000 +/* Number of PCI IRQ assignments */ +#define UEFI_PCI_IRQ_ASSIGNMENT_NUM 28 + +#ifdef CONFIG_EFI_STUB +uint32_t efi_physical_available_ap_bitmap = 0; +uint32_t efi_wake_up_ap_bitmap = 0; +struct efi_ctx* efi_ctx = NULL; +int efi_launch_vector; +extern uint32_t up_count; +extern unsigned long pcpu_sync; + +bool in_efi_boot_svc(void) +{ + return (efi_wake_up_ap_bitmap != efi_physical_available_ap_bitmap); +} + +int efi_spurious_handler(int vector) +{ + struct vcpu* vcpu; + + if (get_cpu_id() != 0) + return 0; + + vcpu = per_cpu(vcpu, 0); + if (vcpu && vcpu->launched) { + int ret = vlapic_set_intr(vcpu, vector, 0); + if (ret && in_efi_boot_svc()) + exec_vmwrite(VMX_ENTRY_INT_INFO_FIELD, + VMX_INT_INFO_VALID | vector); + } else + efi_launch_vector = vector; + + return 1; +} + +int sipi_from_efi_boot_service_exit(uint32_t dest, uint32_t mode, uint32_t vec) +{ + if (efi_wake_up_ap_bitmap != efi_physical_available_ap_bitmap) { + if (mode == APIC_DELMODE_STARTUP) { + uint32_t cpu_id = cpu_find_logical_id(dest); + send_startup_ipi(INTR_CPU_STARTUP_USE_DEST, + cpu_id, (paddr_t)(vec<<12)); + efi_wake_up_ap_bitmap |= 1 << dest; + } + + return 1; + } + + return 0; +} + +void efi_deferred_wakeup_pcpu(int cpu_id) +{ + uint32_t timeout; + uint32_t expected_up; + + expected_up = up_count + 1; + + send_startup_ipi(INTR_CPU_STARTUP_USE_DEST, + cpu_id, (paddr_t)cpu_secondary_reset); + + timeout = CPU_UP_TIMEOUT * 1000; + + while ((up_count != expected_up)) { + /* Delay 10us */ + udelay(10); + + /* Decrement timeout value */ + timeout -= 10; + } + + bitmap_set(0, &pcpu_sync); +} + +int uefi_sw_loader(struct vm *vm, struct vcpu *vcpu) +{ + int ret = 0; + struct run_context *cur_context = + &vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context]; + + ASSERT(vm != NULL, "Incorrect argument"); + + pr_dbg("Loading guest to run-time location"); + + if (!is_vm0(vm)) + return load_guest(vm, vcpu); + + vcpu->entry_addr = efi_ctx->entry; + cur_context->guest_cpu_regs.regs.rcx = efi_ctx->handle; + cur_context->guest_cpu_regs.regs.rdx = efi_ctx->table; + + return ret; +} +#endif + +void init_bsp(void) +{ + parse_hv_cmdline(); + +#ifdef CONFIG_EFI_STUB + efi_ctx = (struct efi_ctx*)(uint64_t)boot_regs[2]; + ASSERT(efi_ctx != NULL, ""); + + vm_sw_loader = uefi_sw_loader; + + spurious_handler = efi_spurious_handler; + efi_launch_vector = -1; +#endif +} diff --git a/hypervisor/bsp/uefi/vm_description.c b/hypervisor/bsp/uefi/vm_description.c new file mode 100644 index 000000000..0407da6c2 --- /dev/null +++ b/hypervisor/bsp/uefi/vm_description.c @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#define NUM_USER_VMS 2 + +/* Number of CPUs in VM0 */ +#define VM0_NUM_CPUS 1 + +/* Logical CPU IDs assigned to VM0 */ +int VM0_CPUS[VM0_NUM_CPUS] = {0}; + +/* Number of CPUs in VM1 */ +#define VM1_NUM_CPUS 2 + +/* Logical CPU IDs assigned with VM1 */ +int VM1_CPUS[VM1_NUM_CPUS] = {3, 1}; + +const struct vm_description_array vm_desc = { + /* Number of user virtual machines */ + .num_vm_desc = NUM_USER_VMS, + + /* Virtual Machine descriptions */ + .vm_desc_array = { + { + .vm_attr_name = "vm_0", + .vm_hw_num_cores = VM0_NUM_CPUS, + .vm_hw_logical_core_ids = &VM0_CPUS[0], + .vm_state_info_privilege = VM_PRIVILEGE_LEVEL_HIGH, + .vm_created = false, + }, + } +}; + +const struct vm_description_array *get_vm_desc_base(void) +{ + return &vm_desc; +} diff --git a/hypervisor/common/hv_main.c b/hypervisor/common/hv_main.c new file mode 100644 index 000000000..300190e96 --- /dev/null +++ b/hypervisor/common/hv_main.c @@ -0,0 +1,206 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +bool x2apic_enabled; + +static DEFINE_CPU_DATA(uint64_t[64], vmexit_cnt); +static DEFINE_CPU_DATA(uint64_t[64], vmexit_time); + +static void run_vcpu_pre_work(struct vcpu *vcpu) +{ + unsigned long *pending_pre_work = &vcpu->pending_pre_work; + + if (bitmap_test_and_clear(ACRN_VCPU_MMIO_COMPLETE, pending_pre_work)) + dm_emulate_mmio_post(vcpu); +} + +void vcpu_thread(struct vcpu *vcpu) +{ + uint64_t vmexit_begin, vmexit_end; + uint16_t exit_reason; + uint64_t tsc_aux_hyp_cpu = vcpu->pcpu_id; + struct vm_exit_dispatch *vmexit_hdlr; + int ret = 0; + + vmexit_begin = vmexit_end = exit_reason = 0; + /* If vcpu is not launched, we need to do init_vmcs first */ + if (!vcpu->launched) + init_vmcs(vcpu); + + run_vcpu_pre_work(vcpu); + + do { + /* handling pending softirq */ + CPU_IRQ_ENABLE(); + exec_softirq(); + CPU_IRQ_DISABLE(); + + /* Check and process interrupts */ + acrn_do_intr_process(vcpu); + + if (need_rescheduled(vcpu->pcpu_id)) { + /* + * In extrem case, schedule() could return. Which + * means the vcpu resume happens before schedule() + * triggered by vcpu suspend. In this case, we need + * to do pre work and continue vcpu loop after + * schedule() is return. + */ + schedule(); + run_vcpu_pre_work(vcpu); + continue; + } + + vmexit_end = rdtsc(); + if (vmexit_begin > 0) + per_cpu(vmexit_time, vcpu->pcpu_id)[exit_reason] + += (vmexit_end - vmexit_begin); + TRACE_2L(TRACE_VM_ENTER, 0, 0); + + /* Restore guest TSC_AUX */ + if (vcpu->launched) { + CPU_MSR_WRITE(MSR_IA32_TSC_AUX, + vcpu->msr_tsc_aux_guest); + } + + ret = start_vcpu(vcpu); + ASSERT(ret == 0, "vcpu resume failed"); + + vmexit_begin = rdtsc(); + + vcpu->arch_vcpu.nrexits++; + /* Save guest TSC_AUX */ + CPU_MSR_READ(MSR_IA32_TSC_AUX, &vcpu->msr_tsc_aux_guest); + /* Restore native TSC_AUX */ + CPU_MSR_WRITE(MSR_IA32_TSC_AUX, tsc_aux_hyp_cpu); + ASSERT((int)get_cpu_id() == vcpu->pcpu_id, ""); + + /* Dispatch handler */ + vmexit_hdlr = vmexit_handler(vcpu); + ASSERT(vmexit_hdlr != 0, + "Unable to dispatch VM exit handler!"); + + exit_reason = vcpu->arch_vcpu.exit_reason & 0xFFFF; + per_cpu(vmexit_cnt, vcpu->pcpu_id)[exit_reason]++; + TRACE_2L(TRACE_VM_EXIT, exit_reason, + vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].rip); + + if (exit_reason == VMX_EXIT_REASON_EXTERNAL_INTERRUPT) { + /* Handling external_interrupt + * should disable intr + */ + vmexit_hdlr->handler(vcpu); + } else { + CPU_IRQ_ENABLE(); + vmexit_hdlr->handler(vcpu); + CPU_IRQ_DISABLE(); + } + } while (1); +} + +static bool is_vm0_bsp(int pcpu_id) +{ + struct vm_description *vm_desc = get_vm_desc(0); + + ASSERT(vm_desc, "get vm desc failed"); + return pcpu_id == vm_desc->vm_hw_logical_core_ids[0]; +} + +int hv_main(int cpu_id) +{ + int ret = 0; + + pr_info("%s, Starting common entry point for CPU %d", + __func__, cpu_id); + ASSERT(cpu_id < phy_cpu_num, "cpu_id out of range"); + + ASSERT((uint64_t) cpu_id == get_cpu_id(), + "cpu_id/tsc_aux mismatch"); + + /* Check if virtualization extensions are supported */ + ret = check_vmx_support(); + ASSERT(ret == 0, "VMX not supported!"); + + /* Enable virtualization extensions */ + ret = exec_vmxon_instr(); + ASSERT(ret == 0, "Unable to enable VMX!"); + + /* X2APIC mode is disabled by default. */ + x2apic_enabled = false; + + if (is_vm0_bsp(cpu_id)) + prepare_vm0(); + + default_idle(); + + return ret; +} + +int get_vmexit_profile(char *str, int str_max) +{ + int cpu, i, len, size = str_max; + + len = snprintf(str, size, "\r\nNow(us) = %16lld\r\n", + TICKS_TO_US(rdtsc())); + size -= len; + str += len; + + len = snprintf(str, size, "\r\nREASON"); + size -= len; + str += len; + + for (cpu = 0; cpu < phy_cpu_num; cpu++) { + len = snprintf(str, size, "\t CPU%d\t US", cpu); + size -= len; + str += len; + } + + for (i = 0; i < 64; i++) { + len = snprintf(str, size, "\r\n0x%x", i); + size -= len; + str += len; + for (cpu = 0; cpu < phy_cpu_num; cpu++) { + len = snprintf(str, size, "\t%10lld\t%10lld", + per_cpu(vmexit_cnt, cpu)[i], + TICKS_TO_US(per_cpu(vmexit_time, cpu)[i])); + size -= len; + str += len; + } + } + snprintf(str, size, "\r\n"); + return 0; +} diff --git a/hypervisor/common/hypercall.c b/hypervisor/common/hypercall.c new file mode 100644 index 000000000..092ba196c --- /dev/null +++ b/hypervisor/common/hypercall.c @@ -0,0 +1,868 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ACRN_DBG_HYCALL 6 + +int64_t hcall_get_api_version(struct vm *vm, uint64_t param) +{ + struct hc_api_version version; + + if (!is_vm0(vm)) + return -1; + + version.major_version = HV_MAJOR_VERSION; + version.minor_version = HV_MINOR_VERSION; + + if (copy_to_vm(vm, &version, param)) { + pr_err("%s: Unable copy param to vm\n", __func__); + return -1; + } + + return 0; +} + +static int handle_vpic_irqline(struct vm *vm, int irq, enum irq_mode mode) +{ + int ret = -1; + + if (!vm) + return ret; + + switch (mode) { + case IRQ_ASSERT: + ret = vpic_assert_irq(vm, irq); + break; + case IRQ_DEASSERT: + ret = vpic_deassert_irq(vm, irq); + break; + case IRQ_PULSE: + ret = vpic_pulse_irq(vm, irq); + default: + break; + } + + return ret; +} + +static int +handle_vioapic_irqline(struct vm *vm, int irq, enum irq_mode mode) +{ + int ret = -1; + + if (!vm) + return ret; + + switch (mode) { + case IRQ_ASSERT: + ret = vioapic_assert_irq(vm, irq); + break; + case IRQ_DEASSERT: + ret = vioapic_deassert_irq(vm, irq); + break; + case IRQ_PULSE: + ret = vioapic_pulse_irq(vm, irq); + break; + default: + break; + } + return ret; +} + +static int handle_virt_irqline(struct vm *vm, uint64_t target_vmid, + struct acrn_irqline *param, enum irq_mode mode) +{ + int ret = 0; + long intr_type; + struct vm *target_vm = get_vm_from_vmid(target_vmid); + + if (!vm || !param) + return -1; + + intr_type = param->intr_type; + + switch (intr_type) { + case ACRN_INTR_TYPE_ISA: + /* Call vpic for pic injection */ + ret = handle_vpic_irqline(target_vm, param->pic_irq, mode); + + /* call vioapic for ioapic injection if ioapic_irq != -1*/ + if (param->ioapic_irq != -1UL) { + /* handle IOAPIC irqline */ + ret = handle_vioapic_irqline(target_vm, + param->ioapic_irq, mode); + } + break; + case ACRN_INTR_TYPE_IOAPIC: + /* handle IOAPIC irqline */ + ret = handle_vioapic_irqline(target_vm, + param->ioapic_irq, mode); + break; + default: + dev_dbg(ACRN_DBG_HYCALL, "vINTR inject failed. type=%d", + intr_type); + ret = -1; + } + return ret; +} + +int64_t hcall_create_vm(struct vm *vm, uint64_t param) +{ + int64_t ret = 0; + struct vm *target_vm = NULL; + /* VM are created from hv_main() directly + * Here we just return the vmid for DM + */ + struct acrn_create_vm cv; + struct vm_description vm_desc; + + memset((void *)&cv, 0, sizeof(cv)); + if (copy_from_vm(vm, &cv, param)) { + pr_err("%s: Unable copy param to vm\n", __func__); + return -1; + } + + memset(&vm_desc, 0, sizeof(vm_desc)); + vm_desc.secure_world_enabled = cv.secure_world_enabled; + memcpy_s(&vm_desc.GUID[0], 16, &cv.GUID[0], 16); + ret = create_vm(&vm_desc, &target_vm); + + if (ret != 0) { + dev_dbg(ACRN_DBG_HYCALL, "HCALL: Create VM failed"); + cv.vmid = ACRN_INVALID_VMID; + ret = -1; + } else { + cv.vmid = target_vm->attr.id; + ret = 0; + } + + if (copy_to_vm(vm, &cv.vmid, param)) { + pr_err("%s: Unable copy param to vm\n", __func__); + return -1; + } + + return ret; +} + +int64_t hcall_destroy_vm(uint64_t vmid) +{ + int64_t ret = 0; + struct vm *target_vm = get_vm_from_vmid(vmid); + + if (target_vm == NULL) + return -1; + + ret = shutdown_vm(target_vm); + return ret; +} + +int64_t hcall_resume_vm(uint64_t vmid) +{ + int64_t ret = 0; + struct vm *target_vm = get_vm_from_vmid(vmid); + + if (target_vm == NULL) + return -1; + if (target_vm->sw.req_buf == 0) + ret = -1; + else + ret = start_vm(target_vm); + + return ret; +} + +int64_t hcall_pause_vm(uint64_t vmid) +{ + struct vm *target_vm = get_vm_from_vmid(vmid); + + if (target_vm == NULL) + return -1; + + pause_vm(target_vm); + + return 0; +} + +int64_t hcall_create_vcpu(struct vm *vm, uint64_t vmid, uint64_t param) +{ + int ret, pcpu_id; + struct acrn_create_vcpu cv; + + struct vm *target_vm = get_vm_from_vmid(vmid); + + if (!target_vm || !param) + return -1; + + if (copy_from_vm(vm, &cv, param)) { + pr_err("%s: Unable copy param to vm\n", __func__); + return -1; + } + + pcpu_id = allocate_pcpu(); + if (-1 == pcpu_id) { + pr_err("%s: No physical available\n", __func__); + return -1; + } + + ret = prepare_vcpu(target_vm, pcpu_id); + + return ret; +} + +int64_t hcall_assert_irqline(struct vm *vm, uint64_t vmid, uint64_t param) +{ + int64_t ret = 0; + struct acrn_irqline irqline; + + if (copy_from_vm(vm, &irqline, param)) { + pr_err("%s: Unable copy param to vm\n", __func__); + return -1; + } + ret = handle_virt_irqline(vm, vmid, &irqline, IRQ_ASSERT); + + return ret; +} + +int64_t hcall_deassert_irqline(struct vm *vm, uint64_t vmid, uint64_t param) +{ + int64_t ret = 0; + struct acrn_irqline irqline; + + if (copy_from_vm(vm, &irqline, param)) { + pr_err("%s: Unable copy param to vm\n", __func__); + return -1; + } + ret = handle_virt_irqline(vm, vmid, &irqline, IRQ_DEASSERT); + + return ret; +} + +int64_t hcall_pulse_irqline(struct vm *vm, uint64_t vmid, uint64_t param) +{ + int64_t ret = 0; + struct acrn_irqline irqline; + + if (copy_from_vm(vm, &irqline, param)) { + pr_err("%s: Unable copy param to vm\n", __func__); + return -1; + } + ret = handle_virt_irqline(vm, vmid, &irqline, IRQ_PULSE); + + return ret; +} + +int64_t hcall_inject_msi(struct vm *vm, uint64_t vmid, uint64_t param) +{ + int ret = 0; + struct acrn_msi_entry msi; + struct vm *target_vm = get_vm_from_vmid(vmid); + + if (target_vm == NULL) + return -1; + + memset((void *)&msi, 0, sizeof(msi)); + if (copy_from_vm(vm, &msi, param)) { + pr_err("%s: Unable copy param to vm\n", __func__); + return -1; + } + ret = vlapic_intr_msi(target_vm, msi.msi_addr, msi.msi_data); + + return ret; +} + +int64_t hcall_set_ioreq_buffer(struct vm *vm, uint64_t vmid, uint64_t param) +{ + int64_t ret = 0; + struct acrn_set_ioreq_buffer iobuf; + struct vm *target_vm = get_vm_from_vmid(vmid); + + if (target_vm == NULL) + return -1; + + memset((void *)&iobuf, 0, sizeof(iobuf)); + + if (copy_from_vm(vm, &iobuf, param)) { + pr_err("%s: Unable copy param to vm\n", __func__); + return -1; + } + + dev_dbg(ACRN_DBG_HYCALL, "[%d] SET BUFFER=0x%x", + vmid, iobuf.req_buf); + + /* store gpa of guest request_buffer */ + target_vm->sw.req_buf = gpa2hpa(vm, iobuf.req_buf); + + return ret; +} + +static void complete_request(struct vcpu *vcpu) +{ + /* + * If vcpu is in Zombie state and will be destroyed soon. Just + * mark ioreq done and don't resume vcpu. + */ + if (vcpu->state == VCPU_ZOMBIE) { + struct vhm_request_buffer *req_buf; + + req_buf = (struct vhm_request_buffer *)vcpu->vm->sw.req_buf; + req_buf->req_queue[vcpu->vcpu_id].valid = false; + atomic_store_rel_32(&vcpu->ioreq_pending, 0); + + return; + } + + switch (vcpu->req.type) { + case REQ_MMIO: + request_vcpu_pre_work(vcpu, ACRN_VCPU_MMIO_COMPLETE); + break; + + case REQ_PORTIO: + dm_emulate_pio_post(vcpu); + break; + + default: + break; + } + + resume_vcpu(vcpu); +} + +int64_t hcall_notify_req_finish(uint64_t vmid, uint64_t vcpu_id) +{ + int64_t ret = 0; + struct vhm_request_buffer *req_buf; + struct vhm_request *req; + struct vcpu *vcpu; + struct vm *target_vm = get_vm_from_vmid(vmid); + + /* make sure we have set req_buf */ + if (!target_vm || target_vm->sw.req_buf == 0) + return -1; + + dev_dbg(ACRN_DBG_HYCALL, "[%d] NOTIFY_FINISH for vcpu %d", + vmid, vcpu_id); + + vcpu = vcpu_from_vid(target_vm, vcpu_id); + ASSERT(vcpu != NULL, "Failed to get VCPU context."); + + req_buf = (struct vhm_request_buffer *)target_vm->sw.req_buf; + req = req_buf->req_queue + vcpu_id; + + if (req->valid && + ((req->processed == REQ_STATE_SUCCESS) || + (req->processed == REQ_STATE_FAILED))) + complete_request(vcpu); + + return ret; +} + +int64_t hcall_set_vm_memmap(struct vm *vm, uint64_t vmid, uint64_t param) +{ + int64_t ret = 0; + uint64_t hpa; + uint32_t attr, prot; + struct vm_set_memmap memmap; + struct vm *target_vm = get_vm_from_vmid(vmid); + + if (!vm || !target_vm) + return -1; + + memset((void *)&memmap, 0, sizeof(memmap)); + + if (copy_from_vm(vm, &memmap, param)) { + pr_err("%s: Unable copy param to vm\n", __func__); + return -1; + } + + if (!is_vm0(vm)) { + pr_err("%s: ERROR! Not coming from service vm", __func__); + return -1; + } + + if (is_vm0(target_vm)) { + pr_err("%s: ERROR! Targeting to service vm", __func__); + return -1; + } + + if ((memmap.length & 0xFFF) != 0) { + pr_err("%s: ERROR! [vm%d] map size 0x%x is not page aligned", + __func__, vmid, memmap.length); + return -1; + } + + hpa = gpa2hpa(vm, memmap.vm0_gpa); + dev_dbg(ACRN_DBG_HYCALL, "[vm%d] gpa=0x%x hpa=0x%x size=0x%x", + vmid, memmap.remote_gpa, hpa, memmap.length); + + /* Check prot */ + attr = 0; + if (memmap.type != MAP_UNMAP) { + prot = memmap.prot; + if (prot & MEM_ACCESS_READ) + attr |= MMU_MEM_ATTR_READ; + if (prot & MEM_ACCESS_WRITE) + attr |= MMU_MEM_ATTR_WRITE; + if (prot & MEM_ACCESS_EXEC) + attr |= MMU_MEM_ATTR_EXECUTE; + if (prot & MEM_TYPE_WB) + attr |= MMU_MEM_ATTR_WB_CACHE; + else if (prot & MEM_TYPE_WT) + attr |= MMU_MEM_ATTR_WT_CACHE; + else if (prot & MEM_TYPE_UC) + attr |= MMU_MEM_ATTR_UNCACHED; + else if (prot & MEM_TYPE_WC) + attr |= MMU_MEM_ATTR_WC; + else if (prot & MEM_TYPE_WP) + attr |= MMU_MEM_ATTR_WP; + else + attr |= MMU_MEM_ATTR_UNCACHED; + } + + /* create gpa to hpa EPT mapping */ + ret = ept_mmap(target_vm, hpa, + memmap.remote_gpa, memmap.length, memmap.type, attr); + + return ret; +} + +int64_t hcall_remap_pci_msix(struct vm *vm, uint64_t vmid, uint64_t param) +{ + int64_t ret = 0; + struct acrn_vm_pci_msix_remap remap; + struct ptdev_msi_info info; + struct vm *target_vm = get_vm_from_vmid(vmid); + + if (target_vm == NULL) + return -1; + + memset((void *)&remap, 0, sizeof(remap)); + + if (copy_from_vm(vm, &remap, param)) { + pr_err("%s: Unable copy param to vm\n", __func__); + return -1; + } + + if (!is_vm0(vm)) + ret = -1; + else { + info.msix = remap.msix; + info.msix_entry_index = remap.msix_entry_index; + info.vmsi_ctl = remap.msi_ctl; + info.vmsi_addr = remap.msi_addr; + info.vmsi_data = remap.msi_data; + + ret = ptdev_msix_remap(target_vm, + remap.virt_bdf, &info); + remap.msi_data = info.pmsi_data; + remap.msi_addr = info.pmsi_addr; + + if (copy_to_vm(vm, &remap, param)) { + pr_err("%s: Unable copy param to vm\n", __func__); + return -1; + } + } + + return ret; +} + +int64_t hcall_gpa_to_hpa(struct vm *vm, uint64_t vmid, uint64_t param) +{ + int64_t ret = 0; + struct vm_gpa2hpa v_gpa2hpa; + struct vm *target_vm = get_vm_from_vmid(vmid); + + if (target_vm == NULL) + return -1; + + memset((void *)&v_gpa2hpa, 0, sizeof(v_gpa2hpa)); + + if (copy_from_vm(vm, &v_gpa2hpa, param)) { + pr_err("HCALL gpa2hpa: Unable copy param from vm\n"); + return -1; + } + v_gpa2hpa.hpa = gpa2hpa(target_vm, v_gpa2hpa.gpa); + if (copy_to_vm(vm, &v_gpa2hpa, param)) { + pr_err("%s: Unable copy param to vm\n", __func__); + return -1; + } + + return ret; +} + +int64_t hcall_assign_ptdev(struct vm *vm, uint64_t vmid, uint64_t param) +{ + int64_t ret = 0; + uint16_t bdf; + struct vm *target_vm = get_vm_from_vmid(vmid); + + if (target_vm == NULL) + return -1; + + if (copy_from_vm(vm, &bdf, param)) { + pr_err("%s: Unable copy param to vm\n", __func__); + return -1; + } + + /* create a iommu domain for target VM if not created */ + if (!target_vm->iommu_domain) { + ASSERT(target_vm->arch_vm.ept, "EPT of VM not set!"); + /* TODO: how to get vm's address width? */ + target_vm->iommu_domain = create_iommu_domain(vmid, + target_vm->arch_vm.ept, 48); + ASSERT(target_vm->iommu_domain, + "failed to created iommu domain!"); + } + ret = assign_iommu_device(target_vm->iommu_domain, + (uint8_t)(bdf >> 8), (uint8_t)(bdf & 0xff)); + + return ret; +} + +int64_t hcall_deassign_ptdev(struct vm *vm, uint64_t vmid, uint64_t param) +{ + int64_t ret = 0; + uint16_t bdf; + struct vm *target_vm = get_vm_from_vmid(vmid); + + if (target_vm == NULL) + return -1; + + if (copy_from_vm(vm, &bdf, param)) { + pr_err("%s: Unable copy param to vm\n", __func__); + return -1; + } + ret = unassign_iommu_device(target_vm->iommu_domain, + (uint8_t)(bdf >> 8), (uint8_t)(bdf & 0xff)); + + return ret; +} + +int64_t hcall_set_ptdev_intr_info(struct vm *vm, uint64_t vmid, uint64_t param) +{ + int64_t ret = 0; + struct hc_ptdev_irq irq; + struct vm *target_vm = get_vm_from_vmid(vmid); + + if (target_vm == NULL) + return -1; + + memset((void *)&irq, 0, sizeof(irq)); + + if (copy_from_vm(vm, &irq, param)) { + pr_err("%s: Unable copy param to vm\n", __func__); + return -1; + } + + if (irq.type == IRQ_INTX) + ptdev_add_intx_remapping(target_vm, + irq.virt_bdf, irq.phys_bdf, + irq.is.intx.virt_pin, irq.is.intx.phys_pin, + irq.is.intx.pic_pin); + else if (irq.type == IRQ_MSI || irq.type == IRQ_MSIX) + ptdev_add_msix_remapping(target_vm, + irq.virt_bdf, irq.phys_bdf, + irq.is.msix.vector_cnt); + + return ret; +} + +int64_t +hcall_reset_ptdev_intr_info(struct vm *vm, uint64_t vmid, uint64_t param) +{ + int64_t ret = 0; + struct hc_ptdev_irq irq; + struct vm *target_vm = get_vm_from_vmid(vmid); + + if (target_vm == NULL) + return -1; + + memset((void *)&irq, 0, sizeof(irq)); + + if (copy_from_vm(vm, &irq, param)) { + pr_err("%s: Unable copy param to vm\n", __func__); + return -1; + } + + if (irq.type == IRQ_INTX) + ptdev_remove_intx_remapping(target_vm, + irq.is.intx.virt_pin, + irq.is.intx.pic_pin); + else if (irq.type == IRQ_MSI || irq.type == IRQ_MSIX) + ptdev_remove_msix_remapping(target_vm, + irq.virt_bdf, + irq.is.msix.vector_cnt); + + return ret; +} + +#ifdef HV_DEBUG +int64_t hcall_setup_sbuf(struct vm *vm, uint64_t param) +{ + struct sbuf_setup_param ssp; + uint64_t *hva; + + memset((void *)&ssp, 0, sizeof(ssp)); + + if (copy_from_vm(vm, &ssp, param)) { + pr_err("%s: Unable copy param to vm\n", __func__); + return -1; + } + + if (ssp.gpa) + hva = (uint64_t *)GPA2HVA(vm, ssp.gpa); + else + hva = (uint64_t *)NULL; + + return sbuf_share_setup(ssp.pcpu_id, ssp.sbuf_id, hva); +} +#else /* HV_DEBUG */ +int64_t hcall_setup_sbuf(__unused struct vm *vm, + __unused uint64_t param) +{ + return -1; +} +#endif /* HV_DEBUG */ + +static void fire_vhm_interrupt(void) +{ + /* + * use vLAPIC to inject vector to SOS vcpu 0 if vlapic is enabled + * otherwise, send IPI hardcoded to CPU_BOOT_ID + */ + struct vm *vm0; + struct vcpu *vcpu; + + vm0 = get_vm_from_vmid(0); + ASSERT(vm0, "VM Pointer is NULL"); + + vcpu = vcpu_from_vid(vm0, 0); + ASSERT(vcpu, "vcpu_from_vid failed"); + + vlapic_intr_edge(vcpu, VECTOR_VIRT_IRQ_VHM); +} + +#ifdef HV_DEBUG +static void acrn_print_request(int vcpu_id, struct vhm_request *req) +{ + switch (req->type) { + case REQ_MMIO: + dev_dbg(ACRN_DBG_HYCALL, "[vcpu_id=%d type=MMIO]", vcpu_id); + dev_dbg(ACRN_DBG_HYCALL, + "gpa=0x%lx, R/W=%d, size=%ld value=0x%lx processed=%lx", + req->reqs.mmio_request.address, + req->reqs.mmio_request.direction, + req->reqs.mmio_request.size, + req->reqs.mmio_request.value, + req->processed); + break; + case REQ_PORTIO: + dev_dbg(ACRN_DBG_HYCALL, "[vcpu_id=%d type=PORTIO]", vcpu_id); + dev_dbg(ACRN_DBG_HYCALL, + "IO=0x%lx, R/W=%d, size=%ld value=0x%lx processed=%lx", + req->reqs.pio_request.address, + req->reqs.pio_request.direction, + req->reqs.pio_request.size, + req->reqs.pio_request.value, + req->processed); + break; + default: + dev_dbg(ACRN_DBG_HYCALL, "[vcpu_id=%d type=%d] NOT support type", + vcpu_id, req->type); + break; + } +} +#else +static void acrn_print_request(__unused int vcpu_id, + __unused struct vhm_request *req) +{ +} +#endif + +int acrn_insert_request_wait(struct vcpu *vcpu, struct vhm_request *req) +{ + struct vhm_request_buffer *req_buf = + (void *)HPA2HVA(vcpu->vm->sw.req_buf); + long cur; + + ASSERT(sizeof(*req) == (4096/VHM_REQUEST_MAX), + "vhm_request page broken!"); + + + if (!vcpu || !req || vcpu->vm->sw.req_buf == 0) + return -1; + + /* ACRN insert request to VHM and inject upcall */ + cur = vcpu->vcpu_id; + req_buf->req_queue[cur] = *req; + + /* Must clear the signal before we mark req valid + * Once we mark to valid, VHM may process req and signal us + * before we perform upcall. + * because VHM can work in pulling mode without wait for upcall + */ + req_buf->req_queue[cur].valid = true; + + acrn_print_request(vcpu->vcpu_id, req_buf->req_queue + cur); + + /* signal VHM */ + fire_vhm_interrupt(); + + /* pause vcpu, wait for VHM to handle the MMIO request */ + atomic_store_rel_32(&vcpu->ioreq_pending, 1); + pause_vcpu(vcpu, VCPU_PAUSED); + + return 0; +} + +int acrn_insert_request_nowait(struct vcpu *vcpu, struct vhm_request *req) +{ + struct vhm_request_buffer *req_buf; + long cur; + + if (!vcpu || !req || !vcpu->vm->sw.req_buf) + return -1; + + req_buf = (void *)gpa2hpa(vcpu->vm, vcpu->vm->sw.req_buf); + + /* ACRN insert request to VHM and inject upcall */ + cur = vcpu->vcpu_id; + req_buf->req_queue[cur] = *req; + req_buf->req_queue[cur].valid = true; + + /* signal VHM and yield CPU */ + fire_vhm_interrupt(); + + return 0; +} + +static void _get_req_info_(struct vhm_request *req, int *id, char *type, + char *state, char *dir, long *addr, long *val) +{ + strcpy_s(dir, 16, "NONE"); + *addr = *val = 0; + *id = req->client; + + switch (req->type) { + case REQ_PORTIO: + strcpy_s(type, 16, "PORTIO"); + if (req->reqs.pio_request.direction == REQUEST_READ) + strcpy_s(dir, 16, "READ"); + else + strcpy_s(dir, 16, "WRITE"); + *addr = req->reqs.pio_request.address; + *val = req->reqs.pio_request.value; + break; + case REQ_MMIO: + case REQ_WP: + strcpy_s(type, 16, "MMIO/WP"); + if (req->reqs.mmio_request.direction == REQUEST_READ) + strcpy_s(dir, 16, "READ"); + else + strcpy_s(dir, 16, "WRITE"); + *addr = req->reqs.mmio_request.address; + *val = req->reqs.mmio_request.value; + break; + break; + default: + strcpy_s(type, 16, "UNKNOWN"); + } + + switch (req->processed) { + case REQ_STATE_SUCCESS: + strcpy_s(state, 16, "SUCCESS"); + break; + case REQ_STATE_PENDING: + strcpy_s(state, 16, "PENDING"); + break; + case REQ_STATE_PROCESSING: + strcpy_s(state, 16, "PROCESS"); + break; + case REQ_STATE_FAILED: + strcpy_s(state, 16, "FAILED"); + break; + default: + strcpy_s(state, 16, "UNKNOWN"); + } +} + +int get_req_info(char *str, int str_max) +{ + int i, len, size = str_max, client_id; + struct vhm_request_buffer *req_buf; + struct vhm_request *req; + char type[16], state[16], dir[16]; + long addr, val; + struct list_head *pos; + struct vm *vm; + + len = snprintf(str, size, + "\r\nVM\tVCPU\tCID\tTYPE\tSTATE\tDIR\tADDR\t\t\tVAL"); + size -= len; + str += len; + + spinlock_obtain(&vm_list_lock); + list_for_each(pos, &vm_list) { + vm = list_entry(pos, struct vm, list); + req_buf = (struct vhm_request_buffer *)vm->sw.req_buf; + if (req_buf) { + for (i = 0; i < VHM_REQUEST_MAX; i++) { + req = req_buf->req_queue + i; + if (req->valid) { + _get_req_info_(req, &client_id, type, + state, dir, &addr, &val); + len = snprintf(str, size, + "\r\n%d\t%d\t%d\t%s\t%s\t%s", + vm->attr.id, i, client_id, type, + state, dir); + size -= len; + str += len; + + len = snprintf(str, size, + "\t0x%016llx\t0x%016llx", + addr, val); + size -= len; + str += len; + } + } + } + } + spinlock_release(&vm_list_lock); + snprintf(str, size, "\r\n"); + return 0; +} diff --git a/hypervisor/common/schedule.c b/hypervisor/common/schedule.c new file mode 100644 index 000000000..c2923423f --- /dev/null +++ b/hypervisor/common/schedule.c @@ -0,0 +1,234 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +struct sched_context { + spinlock_t runqueue_lock; + struct list_head runqueue; + unsigned long need_scheduled; + struct vcpu *curr_vcpu; + spinlock_t scheduler_lock; +}; + +static DEFINE_CPU_DATA(struct sched_context, sched_ctx); +static unsigned long pcpu_used_bitmap; + +void init_scheduler(void) +{ + int i; + + for (i = 0; i < phy_cpu_num; i++) { + spinlock_init(&per_cpu(sched_ctx, i).runqueue_lock); + spinlock_init(&per_cpu(sched_ctx, i).scheduler_lock); + INIT_LIST_HEAD(&per_cpu(sched_ctx, i).runqueue); + per_cpu(sched_ctx, i).need_scheduled = 0; + per_cpu(sched_ctx, i).curr_vcpu = NULL; + } +} + +void get_schedule_lock(int pcpu_id) +{ + spinlock_obtain(&per_cpu(sched_ctx, pcpu_id).scheduler_lock); +} + +void release_schedule_lock(int pcpu_id) +{ + spinlock_release(&per_cpu(sched_ctx, pcpu_id).scheduler_lock); +} + +int allocate_pcpu(void) +{ + int i; + + for (i = 0; i < phy_cpu_num; i++) { + if (bitmap_test_and_set(i, &pcpu_used_bitmap) == 0) { +#ifdef CONFIG_EFI_STUB + efi_deferred_wakeup_pcpu(i); +#endif + return i; + } + } + + return -1; +} + +void set_pcpu_used(int pcpu_id) +{ + bitmap_set(pcpu_id, &pcpu_used_bitmap); +} + +void free_pcpu(int pcpu_id) +{ + bitmap_clr(pcpu_id, &pcpu_used_bitmap); +} + +void add_vcpu_to_runqueue(struct vcpu *vcpu) +{ + int pcpu_id = vcpu->pcpu_id; + + spinlock_obtain(&per_cpu(sched_ctx, pcpu_id).runqueue_lock); + if (list_empty(&vcpu->run_list)) + list_add_tail(&vcpu->run_list, + &per_cpu(sched_ctx, pcpu_id).runqueue); + spinlock_release(&per_cpu(sched_ctx, pcpu_id).runqueue_lock); +} + +void remove_vcpu_from_runqueue(struct vcpu *vcpu) +{ + int pcpu_id = vcpu->pcpu_id; + + spinlock_obtain(&per_cpu(sched_ctx, pcpu_id).runqueue_lock); + list_del_init(&vcpu->run_list); + spinlock_release(&per_cpu(sched_ctx, pcpu_id).runqueue_lock); +} + +static struct vcpu *select_next_vcpu(int pcpu_id) +{ + struct vcpu *vcpu = NULL; + + spinlock_obtain(&per_cpu(sched_ctx, pcpu_id).runqueue_lock); + if (!list_empty(&per_cpu(sched_ctx, pcpu_id).runqueue)) { + vcpu = get_first_item(&per_cpu(sched_ctx, pcpu_id).runqueue, + struct vcpu, run_list); + } + spinlock_release(&per_cpu(sched_ctx, pcpu_id).runqueue_lock); + + return vcpu; +} + +void make_reschedule_request(struct vcpu *vcpu) +{ + bitmap_set(NEED_RESCHEDULED, + &per_cpu(sched_ctx, vcpu->pcpu_id).need_scheduled); + send_single_ipi(vcpu->pcpu_id, VECTOR_NOTIFY_VCPU); +} + +int need_rescheduled(int pcpu_id) +{ + return bitmap_test_and_clear(NEED_RESCHEDULED, + &per_cpu(sched_ctx, pcpu_id).need_scheduled); +} + +static void context_switch_out(struct vcpu *vcpu) +{ + /* if it's idle thread, no action for switch out */ + if (vcpu == NULL) + return; + + atomic_store_rel_32(&vcpu->running, 0); + /* do prev vcpu context switch out */ + /* For now, we don't need to invalid ept. + * But if we have more than one vcpu on one pcpu, + * we need add ept invalid operation here. + */ +} + +static void context_switch_in(struct vcpu *vcpu) +{ + /* update current_vcpu */ + get_cpu_var(sched_ctx).curr_vcpu = vcpu; + + /* if it's idle thread, no action for switch out */ + if (vcpu == NULL) + return; + + atomic_store_rel_32(&vcpu->running, 1); + /* FIXME: + * Now, we don't need to load new vcpu VMCS because + * we only do switch between vcpu loop and idle loop. + * If we have more than one vcpu on on pcpu, need to + * add VMCS load operation here. + */ +} + +void default_idle(void) +{ + int pcpu_id = get_cpu_id(); + + while (1) { + if (need_rescheduled(pcpu_id)) + schedule(); + else + __asm __volatile("pause" ::: "memory"); + } +} + +static void switch_to(struct vcpu *curr) +{ + /* + * reset stack pointer here. Otherwise, schedule + * is recursive call and stack will overflow finally. + */ + uint64_t cur_sp = (uint64_t)&get_cpu_var(stack)[STACK_SIZE]; + + if (curr == NULL) { + asm volatile ("movq %1, %%rsp\n" + "movq $0, %%rdi\n" + "jmp *%0\n" + : + : "a"(default_idle), "r"(cur_sp) + : "memory"); + } else { + asm volatile ("movq %2, %%rsp\n" + "movq %0, %%rdi\n" + "jmp *%1\n" + : + : "c"(curr), "a"(vcpu_thread), "r"(cur_sp) + : "memory"); + } +} + +void schedule(void) +{ + int pcpu_id = get_cpu_id(); + struct vcpu *next = NULL; + struct vcpu *prev = per_cpu(sched_ctx, pcpu_id).curr_vcpu; + + get_schedule_lock(pcpu_id); + next = select_next_vcpu(pcpu_id); + + if (prev == next) { + release_schedule_lock(pcpu_id); + return; + } + + context_switch_out(prev); + context_switch_in(next); + release_schedule_lock(pcpu_id); + + switch_to(next); + + ASSERT(false, "Shouldn't go here"); +} diff --git a/hypervisor/common/stack_protector.c b/hypervisor/common/stack_protector.c new file mode 100644 index 000000000..a7400ff5a --- /dev/null +++ b/hypervisor/common/stack_protector.c @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +void __stack_chk_fail(void) +{ + ASSERT(0, "stack check fails in HV\n"); +} diff --git a/hypervisor/common/vm_load.c b/hypervisor/common/vm_load.c new file mode 100644 index 000000000..a61ad63ec --- /dev/null +++ b/hypervisor/common/vm_load.c @@ -0,0 +1,254 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +struct zero_page { + uint8_t pad1[0x1e8]; /* 0x000 */ + uint8_t e820_nentries; /* 0x1e8 */ + uint8_t pad2[0x8]; /* 0x1e9 */ + + struct { + uint8_t setup_sects; /* 0x1f1 */ + uint8_t hdr_pad1[0x1e]; /* 0x1f2 */ + uint8_t loader_type; /* 0x210 */ + uint8_t load_flags; /* 0x211 */ + uint8_t hdr_pad2[0x6]; /* 0x212 */ + uint32_t ramdisk_addr; /* 0x218 */ + uint32_t ramdisk_size; /* 0x21c */ + uint8_t hdr_pad3[0x8]; /* 0x220 */ + uint32_t bootargs_addr; /* 0x228 */ + uint8_t hdr_pad4[0x1c]; /* 0x22c */ + uint32_t payload_offset;/* 0x248 */ + uint32_t payload_length;/* 0x24c */ + uint8_t hdr_pad5[0x18]; /* 0x250 */ + } __packed hdr; + + uint8_t pad3[0x68]; /* 0x268 */ + struct e820_entry e820[0x80]; /* 0x2d0 */ + uint8_t pad4[0x330]; /* 0xcd0 */ +} __packed; + +static uint32_t create_e820_table(struct e820_entry *_e820) +{ + uint32_t i; + + ASSERT(e820_entries > 0, + "e820 should be inited"); + + for (i = 0; i < e820_entries; i++) { + _e820[i].baseaddr = e820[i].baseaddr; + _e820[i].length = e820[i].length; + _e820[i].type = e820[i].type; + } + + return e820_entries; +} + +static uint64_t create_zero_page(struct vm *vm) +{ + struct zero_page *zeropage; + struct sw_linux *sw_linux = &(vm->sw.linux_info); + struct zero_page *hva; + uint64_t gpa; + + /* Set zeropage in Linux Guest RAM region just past boot args */ + hva = GPA2HVA(vm, (uint64_t)sw_linux->bootargs_load_addr); + zeropage = (struct zero_page *)((char *)hva + MEM_4K); + + /* clear the zeropage */ + memset(zeropage, 0, MEM_2K); + + /* copy part of the header into the zero page */ + hva = GPA2HVA(vm, (uint64_t)vm->sw.kernel_info.kernel_load_addr); + memcpy_s(&(zeropage->hdr), sizeof(zeropage->hdr), + &(hva->hdr), sizeof(hva->hdr)); + + /* See if kernel has a RAM disk */ + if (sw_linux->ramdisk_src_addr) { + /* Copy ramdisk load_addr and size in zeropage header structure + */ + zeropage->hdr.ramdisk_addr = + (uint32_t)(uint64_t)sw_linux->ramdisk_load_addr; + zeropage->hdr.ramdisk_size = (uint32_t)sw_linux->ramdisk_size; + } + + /* Copy bootargs load_addr in zeropage header structure */ + zeropage->hdr.bootargs_addr = + (uint32_t)(uint64_t)sw_linux->bootargs_load_addr; + + /* set constant arguments in zero page */ + zeropage->hdr.loader_type = 0xff; + zeropage->hdr.load_flags |= (1 << 5); /* quiet */ + + /* Create/add e820 table entries in zeropage */ + zeropage->e820_nentries = create_e820_table(zeropage->e820); + + /* Get the host physical address of the zeropage */ + gpa = hpa2gpa(vm, HVA2HPA((uint64_t)zeropage)); + + /* Return Physical Base Address of zeropage */ + return gpa; +} + +int load_guest(struct vm *vm, struct vcpu *vcpu) +{ + int ret = 0; + void *hva; + struct run_context *cur_context = + &vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context]; + uint64_t lowmem_gpa_top; + + hva = GPA2HVA(vm, GUEST_CFG_OFFSET); + lowmem_gpa_top = *(uint64_t *)hva; + + /* hardcode vcpu entry addr(kernel entry) & rsi (zeropage)*/ + memset(cur_context->guest_cpu_regs.longs, + 0, sizeof(uint64_t)*NUM_GPRS); + + hva = GPA2HVA(vm, lowmem_gpa_top - + MEM_4K - MEM_2K); + vcpu->entry_addr = (void *)(*((uint64_t *)hva)); + cur_context->guest_cpu_regs.regs.rsi = + lowmem_gpa_top - MEM_4K; + + pr_info("%s, Set config according to predefined offset:", + __func__); + pr_info("VCPU%d Entry: 0x%llx, RSI: 0x%016llx, cr3: 0x%016llx", + vcpu->vcpu_id, vcpu->entry_addr, + cur_context->guest_cpu_regs.regs.rsi, + vm->arch_vm.guest_pml4); + + return ret; +} + +int general_sw_loader(struct vm *vm, struct vcpu *vcpu) +{ + int ret = 0; + void *hva; + struct run_context *cur_context = + &vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context]; + char dyn_bootargs[100] = {0}; + uint32_t kernel_entry_offset; + struct zero_page *zeropage; + + ASSERT(vm != NULL, "Incorrect argument"); + + pr_dbg("Loading guest to run-time location"); + + /* FIXME: set config according to predefined offset */ + if (!is_vm0(vm)) + return load_guest(vm, vcpu); + + /* calculate the kernel entry point */ + zeropage = (struct zero_page *) + vm->sw.kernel_info.kernel_src_addr; + kernel_entry_offset = (zeropage->hdr.setup_sects + 1) * 512; + /* 64bit entry is the 512bytes after the start */ + kernel_entry_offset += 512; + vm->sw.kernel_info.kernel_entry_addr = + (void *)((unsigned long)vm->sw.kernel_info.kernel_load_addr + + kernel_entry_offset); + if (is_vcpu_bsp(vcpu)) { + /* Set VCPU entry point to kernel entry */ + vcpu->entry_addr = vm->sw.kernel_info.kernel_entry_addr; + pr_info("%s, VM *d VCPU %d Entry: 0x%016llx ", + __func__, vm->attr.id, vcpu->vcpu_id, vcpu->entry_addr); + } + + /* Calculate the host-physical address where the guest will be loaded */ + hva = GPA2HVA(vm, (uint64_t)vm->sw.kernel_info.kernel_load_addr); + + /* Copy the guest kernel image to its run-time location */ + memcpy_s((void *)hva, vm->sw.kernel_info.kernel_size, + vm->sw.kernel_info.kernel_src_addr, + vm->sw.kernel_info.kernel_size); + + /* See if guest is a Linux guest */ + if (vm->sw.kernel_type == VM_LINUX_GUEST) { + /* Documentation states: ebx=0, edi=0, ebp=0, esi=ptr to + * zeropage + */ + memset(cur_context->guest_cpu_regs.longs, + 0, sizeof(uint64_t) * NUM_GPRS); + + /* Get host-physical address for guest bootargs */ + hva = GPA2HVA(vm, + (uint64_t)vm->sw.linux_info.bootargs_load_addr); + + /* Copy Guest OS bootargs to its load location */ + strcpy_s((char *)hva, MEM_2K, + vm->sw.linux_info.bootargs_src_addr); + + /* add "cma=XXXXM@0xXXXXXXXX" to cmdline*/ + if (is_vm0(vm) && (e820_mem.max_ram_blk_size > 0)) { + snprintf(dyn_bootargs, 100, " cma=%dM@0x%llx\n", + (e820_mem.max_ram_blk_size >> 20), + e820_mem.max_ram_blk_base); + /* Delete '\n' at the end of cmdline */ + strcpy_s((char *)hva + +vm->sw.linux_info.bootargs_size - 1, + 100, dyn_bootargs); + } + + /* Check if a RAM disk is present with Linux guest */ + if (vm->sw.linux_info.ramdisk_src_addr) { + /* Get host-physical address for guest RAM disk */ + hva = GPA2HVA(vm, + (uint64_t)vm->sw.linux_info.ramdisk_load_addr); + + /* Copy RAM disk to its load location */ + memcpy_s((void *)hva, vm->sw.linux_info.ramdisk_size, + vm->sw.linux_info.ramdisk_src_addr, + vm->sw.linux_info.ramdisk_size); + + } + + /* Create Zeropage and copy Physical Base Address of Zeropage + * in RSI + */ + cur_context->guest_cpu_regs.regs.rsi = create_zero_page(vm); + + pr_info("%s, RSI pointing to zero page for VM %d at GPA %X", + __func__, vm->attr.id, + cur_context->guest_cpu_regs.regs.rsi); + + } else { + pr_err("%s, Loading VM SW failed", __func__); + ret = -EINVAL; + } + + return ret; +} diff --git a/hypervisor/debug/console.c b/hypervisor/debug/console.c new file mode 100644 index 000000000..d089bc84b --- /dev/null +++ b/hypervisor/debug/console.c @@ -0,0 +1,236 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include "serial_internal.h" + +static spinlock_t lock; + +static uint32_t serial_handle = SERIAL_INVALID_HANDLE; + +#define CONSOLE_KICK_TIMER_TIMEOUT 40 /* timeout is 40ms*/ + +uint32_t get_serial_handle(void) +{ + return serial_handle; +} + +static int print_char(char x) +{ + serial_puts(serial_handle, &x, 1); + + if (x == '\n') + serial_puts(serial_handle, "\r", 1); + + return 0; +} + +int console_init(void) +{ + spinlock_init(&lock); + + serial_handle = serial_open("STDIO"); + + return 0; +} + +int console_putc(int ch) +{ + int res = -1; + + spinlock_obtain(&lock); + + if (serial_handle != SERIAL_INVALID_HANDLE) + res = print_char(ch); + + spinlock_release(&lock); + + return res; +} + +int console_puts(const char *s) +{ + int res = -1; + const char *p; + + spinlock_obtain(&lock); + + if (serial_handle != SERIAL_INVALID_HANDLE) { + res = 0; + while (*s) { + /* start output at the beginning of the string search + * for end of string or '\n' + */ + p = s; + + while (*p && *p != '\n') + ++p; + + /* write all characters up to p */ + serial_puts(serial_handle, s, p - s); + + res += p - s; + + if (*p == '\n') { + print_char('\n'); + ++p; + res += 2; + } + + /* continue at position p */ + s = p; + } + } + + spinlock_release(&lock); + + return res; +} + +int console_write(const char *s, size_t len) +{ + int res = -1; + const char *e; + const char *p; + + spinlock_obtain(&lock); + + if (serial_handle != SERIAL_INVALID_HANDLE) { + /* calculate pointer to the end of the string */ + e = s + len; + res = 0; + + /* process all characters */ + while (s != e) { + /* search for '\n' or the end of the string */ + p = s; + + while ((p != e) && (*p != '\n')) + ++p; + + /* write all characters processed so far */ + serial_puts(serial_handle, s, p - s); + + res += p - s; + + /* write '\n' if end of string is not reached */ + if (p != e) { + print_char('\n'); + ++p; + res += 2; + } + + /* continue at next position */ + s = p; + } + } + + spinlock_release(&lock); + + return res; +} + +void console_dump_bytes(const void *p, unsigned int len) +{ + + const unsigned char *x = p; + const unsigned char *e = x + len; + int i; + + /* dump all bytes */ + while (x < e) { + /* write the address of the first byte in the row */ + printf("%08x: ", (vaddr_t) x); + /* print one row (16 bytes) as hexadecimal values */ + for (i = 0; i < 16; i++) + printf("%02x ", x[i]); + + /* print one row as ASCII characters (if possible) */ + for (i = 0; i < 16; i++) { + if ((x[i] < ' ') || (x[i] >= 127)) + console_putc('.'); + else + console_putc(x[i]); + } + /* continue with next row */ + console_putc('\n'); + /* set pointer one row ahead */ + x += 16; + } +} + +static void console_read(void) +{ + spinlock_obtain(&lock); + + if (serial_handle != SERIAL_INVALID_HANDLE) { + /* Get all the data available in the RX FIFO */ + serial_get_rx_data(serial_handle); + } + + spinlock_release(&lock); +} + +static void console_handler(void) +{ + /* Dump the RX FIFO to a circular buffer */ + console_read(); + + /* serial Console Rx operation */ + vuart_console_rx_chars(serial_handle); + + /* serial Console Tx operation */ + vuart_console_tx_chars(); + + shell_kick_session(); +} + +static int console_timer_callback(__unused uint64_t data) +{ + /* Kick HV-Shell and Uart-Console tasks */ + console_handler(); + + /* Restart the timer */ + console_setup_timer(); + + return 0; +} + +void console_setup_timer(void) +{ + /* Start an one-shot timer */ + if (add_timer(console_timer_callback, 0, + rdtsc() + TIME_MS_DELTA * CONSOLE_KICK_TIMER_TIMEOUT) < 0) + pr_err("Failed to add console kick timer"); +} diff --git a/hypervisor/debug/dump.c b/hypervisor/debug/dump.c new file mode 100644 index 000000000..87fb33441 --- /dev/null +++ b/hypervisor/debug/dump.c @@ -0,0 +1,368 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +/* + * readable exception descriptors. + */ +static const char *const excp_names[] = { + [0] = "Divide Error", + [1] = "RESERVED", + [2] = "NMI", + [3] = "Breakpoint", + [4] = "Overflow", + [5] = "BOUND range exceeded", + [6] = "Invalid Opcode", + [7] = "Device Not Available", + [8] = "Double Fault", + [9] = "Coprocessor Segment Overrun", + [10] = "Invalid TSS", + [11] = "Segment Not Present", + [12] = "Stack Segment Fault", + [13] = "General Protection", + [14] = "Page Fault", + [15] = "Intel Reserved", + [16] = "x87 FPU Floating Point Error", + [17] = "Alignment Check", + [18] = "Machine Check", + [19] = "SIMD Floating Point Exception", + [20] = "Virtualization Exception", + [21] = "Intel Reserved", + [22] = "Intel Reserved", + [23] = "Intel Reserved", + [24] = "Intel Reserved", + [25] = "Intel Reserved", + [26] = "Intel Reserved", + [27] = "Intel Reserved", + [28] = "Intel Reserved", + [29] = "Intel Reserved", + [30] = "Intel Reserved", + [31] = "Intel Reserved" +}; + +static void dump_guest_reg(struct vcpu *vcpu) +{ + struct run_context *cur_context = + &vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context]; + + printf("\n\n================================================"); + printf("================================\n\n"); + printf("Guest Registers:\r\n"); + printf("= VM ID %d ==== vCPU ID %d === pCPU ID %d ====" + "world %d =============\r\n", + vcpu->vm->attr.id, vcpu->vcpu_id, vcpu->pcpu_id, + vcpu->arch_vcpu.cur_context); + printf("= RIP=0x%016llx RSP=0x%016llx " + "RFLAGS=0x%016llx\r\n", + cur_context->rip, + cur_context->rsp, + cur_context->rflags); + printf("= CR0=0x%016llx CR2=0x%016llx " + " CR3=0x%016llx\r\n", + cur_context->cr0, + cur_context->cr2, + cur_context->cr3); + printf("= RAX=0x%016llx RBX=0x%016llx " + "RCX=0x%016llx\r\n", + cur_context->guest_cpu_regs.regs.rax, + cur_context->guest_cpu_regs.regs.rbx, + cur_context->guest_cpu_regs.regs.rcx); + printf("= RDX=0x%016llx RDI=0x%016llx " + "RSI=0x%016llx\r\n", + cur_context->guest_cpu_regs.regs.rdx, + cur_context->guest_cpu_regs.regs.rdi, + cur_context->guest_cpu_regs.regs.rsi); + printf("= RBP=0x%016llx R8=0x%016llx " + "R9=0x%016llx\r\n", + cur_context->guest_cpu_regs.regs.rbp, + cur_context->guest_cpu_regs.regs.r8, + cur_context->guest_cpu_regs.regs.r9); + printf("= R10=0x%016llx R11=0x%016llx " + "R12=0x%016llx\r\n", + cur_context->guest_cpu_regs.regs.r10, + cur_context->guest_cpu_regs.regs.r11, + cur_context->guest_cpu_regs.regs.r12); + printf("= R13=0x%016llx R14=0x%016llx " + "R15=0x%016llx\r\n", + cur_context->guest_cpu_regs.regs.r13, + cur_context->guest_cpu_regs.regs.r14, + cur_context->guest_cpu_regs.regs.r15); + printf("\r\n"); +} + +static void dump_guest_stack(struct vcpu *vcpu) +{ + uint64_t gpa; + uint64_t hpa; + uint32_t i; + uint64_t *tmp; + uint64_t page1_size; + uint64_t page2_size; + struct run_context *cur_context = + &vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context]; + + gpa = gva2gpa(vcpu->vm, cur_context->cr3, cur_context->rsp); + hpa = gpa2hpa(vcpu->vm, gpa); + printf("\r\nGuest Stack:\r\n"); + printf("Dump stack for vcpu %d, from gva 0x%016llx ->" + "gpa 0x%016llx -> hpa 0x%016llx \r\n", + vcpu->vcpu_id, cur_context->rsp, gpa, hpa); + /* Need check if cross 2 pages*/ + if (((cur_context->rsp % CPU_PAGE_SIZE) + DUMP_STACK_SIZE) + <= CPU_PAGE_SIZE) { + tmp = HPA2HVA(hpa); + for (i = 0; i < DUMP_STACK_SIZE/32; i++) { + printf("addr(0x%llx): 0x%016llx 0x%016llx " + "0x%016llx 0x%016llx\r\n", (hpa+i*32), + tmp[i*4], tmp[i*4+1], + tmp[i*4+2], tmp[i*4+3]); + } + + } else { + tmp = HPA2HVA(hpa); + page1_size = CPU_PAGE_SIZE + - (cur_context->rsp % CPU_PAGE_SIZE); + for (i = 0; i < page1_size/32; i++) { + printf("addr(0x%llx): 0x%016llx 0x%016llx 0x%016llx " + "0x%016llx\r\n", (hpa+i*32), tmp[i*4], + tmp[i*4+1], tmp[i*4+2], tmp[i*4+3]); + } + gpa = gva2gpa(vcpu->vm, cur_context->cr3, + cur_context->rsp + page1_size); + hpa = gpa2hpa(vcpu->vm, gpa); + printf("Dump stack for vcpu %d, from gva 0x%016llx ->" + "gpa 0x%016llx -> hpa 0x%016llx \r\n", + vcpu->vcpu_id, cur_context->rsp + page1_size, + gpa, hpa); + tmp = HPA2HVA(hpa); + page2_size = DUMP_STACK_SIZE - page1_size; + for (i = 0; i < page2_size/32; i++) { + printf("addr(0x%llx): 0x%016llx 0x%016llx 0x%016llx " + "0x%016llx\r\n", (hpa+i*32), tmp[i*4], + tmp[i*4+1], tmp[i*4+2], tmp[i*4+3]); + } + } + + printf("\r\n"); +} + +static void show_guest_call_trace(struct vcpu *vcpu) +{ + uint64_t gpa; + uint64_t hpa; + uint64_t *hva; + uint64_t bp; + uint64_t count = 0; + struct run_context *cur_context = + &vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context]; + + bp = cur_context->guest_cpu_regs.regs.rbp; + printf("Guest Call Trace: **************************************\r\n"); + printf("Maybe the call trace is not accurate, pls check stack!!\r\n"); + /* if enable compiler option(no-omit-frame-pointer) the stack layout + * should be like this when call a function for x86_64 + * + * | | + * rbp+8 | return address | + * rbp | rbp | push rbp + * | | mov rsp rbp + * + * rsp | | + * + * try to print out call trace,here can not check if the rbp is valid + * if the address is invalid, it will cause hv page fault + * then halt system */ + while ((count++ < CALL_TRACE_HIERARCHY_MAX) && (bp != 0)) { + gpa = gva2gpa(vcpu->vm, cur_context->cr3, bp); + hpa = gpa2hpa(vcpu->vm, gpa); + hva = HPA2HVA(hpa); + printf("BP_GVA(0x%016llx)->BP_GPA(0x%016llx)" + "->BP_HPA(0x%016llx) RIP=0x%016llx\r\n", bp, gpa, hpa, + *(uint64_t *)((uint64_t)hva + sizeof(uint64_t))); + /* Get previous rbp*/ + bp = *hva; + } + printf("\r\n"); +} + +static void dump_guest_context(uint32_t cpu_id) +{ + struct vcpu *vcpu; + + vcpu = per_cpu(vcpu, cpu_id); + if (vcpu != NULL) { + dump_guest_reg(vcpu); + dump_guest_stack(vcpu); + show_guest_call_trace(vcpu); + } +} + +static void show_host_call_trace(uint64_t rsp, uint64_t rbp, uint32_t cpu_id) +{ + int i = 0; + int cb_hierarchy = 0; + uint64_t *sp = (uint64_t *)rsp; + + printf("\r\nHost Stack: \r\n"); + for (i = 0; i < DUMP_STACK_SIZE/32; i++) { + printf("addr(0x%llx) 0x%016llx 0x%016llx 0x%016llx " + "0x%016llx\r\n", (rsp+i*32), sp[i*4], sp[i*4+1], + sp[i*4+2], sp[i*4+3]); + } + printf("\r\n"); + + printf("Host Call Trace:\r\n"); + if (rsp > + (uint64_t)&per_cpu(stack, cpu_id)[STACK_SIZE - 1] + || rsp < (uint64_t)&per_cpu(stack, cpu_id)[0]) { + return; + } + + /* if enable compiler option(no-omit-frame-pointer) the stack layout + * should be like this when call a function for x86_64 + * + * | | + * rbp+8 | return address | + * rbp | rbp | push rbp + * | | mov rsp rbp + * + * rsp | | + * + * + * if the address is invalid, it will cause hv page fault + * then halt system */ + while ((rbp <= + (uint64_t)&per_cpu(stack, cpu_id)[STACK_SIZE - 1]) + && (rbp >= (uint64_t)&per_cpu(stack, cpu_id)[0]) + && (cb_hierarchy++ < CALL_TRACE_HIERARCHY_MAX)) { + printf("----> 0x%016llx\r\n", + *(uint64_t *)(rbp + sizeof(uint64_t))); + if (*(uint64_t *)(rbp + 2*sizeof(uint64_t)) + == SP_BOTTOM_MAGIC) { + break; + } + rbp = *(uint64_t *)rbp; + } + printf("\r\n"); +} + +void __assert(uint32_t line, const char *file, char *txt) +{ + uint32_t cpu_id = get_cpu_id(); + uint64_t rsp = cpu_rsp_get(); + uint64_t rbp = cpu_rbp_get(); + + pr_fatal("Assertion failed in file %s,line %u : %s", + file, line, txt); + show_host_call_trace(rsp, rbp, cpu_id); + dump_guest_context(cpu_id); + do { + asm volatile ("pause" ::: "memory"); + } while (1); +} + +void dump_exception(struct intr_ctx *ctx, uint32_t cpu_id) +{ + const char *name = "Not defined"; + static int nested = 1; + + /* avoid endless loop, only dump the first exception */ + if (nested++ > 1) + return; + + if (ctx->vector < 0x20) + name = excp_names[ctx->vector]; + + printf("\n\n================================================"); + printf("================================\n=\n"); + printf("= Unhandled exception: %d (%s)\n", ctx->vector, name); + printf("= CPU ID = %d", cpu_id); + + /* Dump host register*/ + printf("\r\nHost Registers:\r\n"); + printf("= Vector=0x%016llX RIP=0x%016llX\n", + ctx->vector, ctx->rip); + printf("= RAX=0x%016llX RBX=0x%016llX RCX=0x%016llX\n", + ctx->rax, ctx->rbx, ctx->rcx); + printf("= RDX=0x%016llX RDI=0x%016llX RSI=0x%016llX\n", + ctx->rdx, ctx->rdi, ctx->rsi); + printf("= RSP=0x%016llX RBP=0x%016llX RBX=0x%016llX\n", + ctx->rsp, ctx->rbp, ctx->rbx); + printf("= R8=0x%016llX R9=0x%016llX R10=0x%016llX\n", + ctx->r8, ctx->r9, ctx->r10); + printf("= R11=0x%016llX R12=0x%016llX R13=0x%016llX\n", + ctx->r11, ctx->r12, ctx->r13); + printf("= RFLAGS=0x%016llX R14=0x%016llX R15=0x%016llX\n", + ctx->rflags, ctx->r14, ctx->r15); + printf("= ERRCODE=0x%016llX CS=0x%016llX SS=0x%016llX\n", + ctx->error_code, ctx->cs, ctx->ss); + printf("\r\n"); + + /* Dump host stack */ + show_host_call_trace(ctx->rsp, ctx->rbp, cpu_id); + + /* Dump guest context */ + dump_guest_context(cpu_id); + printf("= System halted\n"); + printf("====================================================="); + printf("===========================\n"); +} + +void dump_interrupt(struct intr_ctx *ctx) +{ + printf("\n\n=========================================="); + printf("======================================\n=\n"); + printf("\n=\n"); + printf("= Vector=0x%016llX RIP=0x%016llX\n", + ctx->vector, ctx->rip); + printf("= RAX=0x%016llX RBX=0x%016llX RCX=0x%016llX\n", + ctx->rax, ctx->rbx, ctx->rcx); + printf("= RDX=0x%016llX RDI=0x%016llX RSI=0x%016llX\n", + ctx->rdx, ctx->rdi, ctx->rsi); + printf("= RSP=0x%016llX RBP=0x%016llX RBX=0x%016llX\n", + ctx->rsp, ctx->rbp, ctx->rbx); + printf("= R8=0x%016llX R9=0x%016llX R10=0x%016llX\n", + ctx->r8, ctx->r9, ctx->r10); + printf("= R11=0x%016llX R12=0x%016llX R13=0x%016llX\n", + ctx->r11, ctx->r12, ctx->r13); + printf("= RFLAGS=0x%016llX R14=0x%016llX R15=0x%016llX\n", + ctx->rflags, ctx->r14, ctx->r15); + printf("= ERRCODE=0x%016llX CS=0x%016llX SS=0x%016llX\n", + ctx->error_code, ctx->cs, ctx->ss); + printf("=\n"); + printf("= system halted\n"); + printf("==============================================="); + printf("=================================\n"); +} diff --git a/hypervisor/debug/logmsg.c b/hypervisor/debug/logmsg.c new file mode 100644 index 000000000..b96ed27b3 --- /dev/null +++ b/hypervisor/debug/logmsg.c @@ -0,0 +1,161 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +#define LOG_ENTRY_SIZE 80 + +/* Size of buffer used to store a message being logged, + * should align to LOG_ENTRY_SIZE. + */ +#define LOG_MESSAGE_MAX_SIZE (4 * LOG_ENTRY_SIZE) + +DEFINE_CPU_DATA(char [LOG_MESSAGE_MAX_SIZE], logbuf); + +struct logmsg { + uint32_t flags; + unsigned int seq; + spinlock_t lock; +}; + +static struct logmsg logmsg; + +void init_logmsg(__unused uint32_t mem_size, uint32_t flags) +{ + logmsg.flags = flags; + logmsg.seq = 0; +} + +void do_logmsg(uint32_t severity, const char *fmt, ...) +{ + va_list args; + uint64_t timestamp; + uint32_t cpu_id; + bool do_console_log; + bool do_mem_log; + char *buffer; + spinlock_rflags; + + do_console_log = ((logmsg.flags & LOG_FLAG_STDOUT) && + (severity <= console_loglevel)); + do_mem_log = ((logmsg.flags & LOG_FLAG_MEMORY) && + (severity <= mem_loglevel)); + + if (!do_console_log && !do_mem_log) + return; + + /* Get time-stamp value */ + timestamp = rdtsc(); + + /* Scale time-stamp appropriately */ + timestamp = TICKS_TO_US(timestamp); + + /* Get CPU ID */ + cpu_id = get_cpu_id(); + buffer = per_cpu(logbuf, cpu_id); + + memset(buffer, 0, LOG_MESSAGE_MAX_SIZE); + /* Put time-stamp, CPU ID and severity into buffer */ + snprintf(buffer, LOG_MESSAGE_MAX_SIZE, + "[%lluus][cpu=%u][sev=%u][seq=%u]:", + timestamp, cpu_id, severity, + atomic_inc_return(&logmsg.seq)); + + /* Put message into remaining portion of local buffer */ + va_start(args, fmt); + vsnprintf(buffer + strnlen_s(buffer, LOG_MESSAGE_MAX_SIZE), + LOG_MESSAGE_MAX_SIZE + - strnlen_s(buffer, LOG_MESSAGE_MAX_SIZE), fmt, args); + va_end(args); + + /* Check if flags specify to output to stdout */ + if (do_console_log) { + spinlock_irqsave_obtain(&(logmsg.lock)); + + /* Send buffer to stdout */ + printf("%s\n\r", buffer); + + spinlock_irqrestore_release(&(logmsg.lock)); + } + + /* Check if flags specify to output to memory */ + if (do_mem_log) { + int i, msg_len; + struct shared_buf *sbuf = (struct shared_buf *) + per_cpu(sbuf, cpu_id)[ACRN_HVLOG]; + if (sbuf != NULL) { + msg_len = strnlen_s(buffer, LOG_MESSAGE_MAX_SIZE); + + for (i = 0; i < (msg_len - 1) / LOG_ENTRY_SIZE + 1; + i++) { + sbuf_put(sbuf, (uint8_t *)buffer + + i * LOG_ENTRY_SIZE); + } + } + } +} + +void print_logmsg_buffer(uint32_t cpu_id) +{ + spinlock_rflags; + char buffer[LOG_ENTRY_SIZE + 1]; + int read_cnt; + struct shared_buf *sbuf; + + if (cpu_id >= (uint32_t)phy_cpu_num) + return; + + sbuf = (struct shared_buf *)per_cpu(sbuf, cpu_id)[ACRN_HVLOG]; + if (sbuf != NULL) { + spinlock_irqsave_obtain(&(logmsg.lock)); + printf("CPU%d: head: 0x%x, tail: 0x%x\n\r", + cpu_id, sbuf->head, sbuf->tail); + spinlock_irqrestore_release(&(logmsg.lock)); + do { + memset(buffer, 0, LOG_ENTRY_SIZE + 1); + read_cnt = sbuf_get(sbuf, (uint8_t *)buffer); + if (read_cnt > 0) { + uint32_t idx; + + idx = (read_cnt < LOG_ENTRY_SIZE) ? + read_cnt : LOG_ENTRY_SIZE; + buffer[idx] = '\0'; + + spinlock_irqsave_obtain(&(logmsg.lock)); + printf("%s\n\r", buffer); + spinlock_irqrestore_release(&(logmsg.lock)); + } + } while (read_cnt > 0); + } +} diff --git a/hypervisor/debug/printf.c b/hypervisor/debug/printf.c new file mode 100644 index 000000000..1a141ce58 --- /dev/null +++ b/hypervisor/debug/printf.c @@ -0,0 +1,824 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +#ifndef NULL +#define NULL ((void *) 0) +#endif + +#define PRINT_STRING_MAX_LEN 4096 + +/** Command for the emit function: copy string to output. */ +#define PRINT_CMD_COPY 0x00000000 + +/** Command for the emit function: fill output with first character. */ +#define PRINT_CMD_FILL 0x00000001 + +/** Use upper case letters for hexadecimal format. */ +#define PRINT_FLAG_UPPER 0x00000001 + +/** Use alternate form. */ +#define PRINT_FLAG_ALTERNATE_FORM 0x00000002 + +/** Use '0' instead of ' ' for padding. */ +#define PRINT_FLAG_PAD_ZERO 0x00000004 + +/** Use left instead of right justification. */ +#define PRINT_FLAG_LEFT_JUSTIFY 0x00000008 + +/** Always use the sign as prefix. */ +#define PRINT_FLAG_SIGN 0x00000010 + +/** Use ' ' as prefix if no sign is used. */ +#define PRINT_FLAG_SPACE 0x00000020 + +/** The original value was a (unsigned) char. */ +#define PRINT_FLAG_CHAR 0x00000040 + +/** The original value was a (unsigned) short. */ +#define PRINT_FLAG_SHORT 0x00000080 + +/** The original value was a (unsigned) long. */ +#define PRINT_FLAG_LONG 0x00000100 + +/** The original value was a (unsigned) long long. */ +#define PRINT_FLAG_LONG_LONG 0x00000200 + +/** The value is interpreted as unsigned. */ +#define PRINT_FLAG_UINT32 0x00000400 + +/** Structure used to parse parameters and variables to subroutines. */ +struct print_param { + /** A pointer to the function that is used to emit characters. */ + int (*emit)(int, const char *, int, void *); + /** An opaque pointer that is passed as third argument to the emit + * function. + */ + void *data; + /** Contains variables which are recalculated for each argument. */ + struct { + /** A bitfield with the parsed format flags. */ + int flags; + /** The parsed format width. */ + int width; + /** The parsed format precision. */ + int precision; + /** The bitmask for unsigned values. */ + unsigned long long mask; + /** A pointer to the preformated value. */ + const char *value; + /* The number of characters in the preformated value buffer. */ + uint32_t valuelen; + /** A pointer to the values prefix. */ + const char *prefix; + /** The number of characters in the prefix buffer. */ + uint32_t prefixlen; + } vars; +}; + +/** Structure used to save (v)snprintf() specific values */ +struct snprint_param { + /** The destination buffer. */ + char *dst; + /** The size of the destination buffer. */ + int sz; + /** Counter for written chars. */ + int wrtn; +}; + +/** The characters to use for upper case hexadecimal conversion. + * + * Note that this array is 17 bytes long. The first 16 characters + * are used to convert a 4 bit number to a printable character. + * The last character is used to determine the prefix for the + * alternate form. + */ + +static const char upper_hex_digits[] = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + 'A', 'B', 'C', 'D', 'E', 'F', 'X' +}; + +/** The characters to use for lower case hexadecimal conversion. + * + * Note that this array is 17 bytes long. The first 16 characters + * are used to convert a 4 bit number to a printable character. + * The last character is used to determine the prefix for the + * alternate form. + */ + +static const char lower_hex_digits[] = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + 'a', 'b', 'c', 'd', 'e', 'f', 'x' +}; + +static const char *get_int(const char *s, int *x) +{ + int negative = 0; + *x = 0; + + /* evaluate leading '-' for negative numbers */ + if (*s == '-') { + negative = 1; + ++s; + } + + /* parse uint32_teger */ + while ((*s >= '0') && (*s <= '9')) + *x = *x * 10 + (*s++ - '0'); + + /* apply sign to result */ + if (negative) + *x = -*x; + + return s; +} + +static const char *get_flags(const char *s, int *flags) +{ + /* contains the flag characters */ + static const char flagchars[] = "#0- +"; + /* contains the numeric flags for the characters above */ + static const int fl[sizeof(flagchars)] = { + PRINT_FLAG_ALTERNATE_FORM, /* # */ + PRINT_FLAG_PAD_ZERO, /* 0 */ + PRINT_FLAG_LEFT_JUSTIFY, /* - */ + PRINT_FLAG_SIGN, /* + */ + PRINT_FLAG_SPACE /* ' ' */ + }; + const char *pos; + + /* parse multiple flags */ + while (*s) { + /* get index of flag. Terminate loop if no flag character was + * found + */ + pos = strchr(flagchars, *s); + if (pos == 0) + break; + + /* apply matching flags and continue with the next character */ + ++s; + *flags |= fl[pos - flagchars]; + } + + /* Spec says that '-' has a higher priority than '0' */ + if (*flags & PRINT_FLAG_LEFT_JUSTIFY) + *flags &= ~PRINT_FLAG_PAD_ZERO; + + /* Spec says that '+' has a higher priority than ' ' */ + if (*flags & PRINT_FLAG_SIGN) + *flags &= ~PRINT_FLAG_SPACE; + + return s; +} + +static const char *get_length_modifier(const char *s, + int *flags, unsigned long long *mask) +{ + /* check for h[h] (char/short) */ + if (*s == 'h') { + if (*++s == 'h') { + *flags |= PRINT_FLAG_CHAR; + *mask = 0x000000FF; + ++s; + } else { + *flags |= PRINT_FLAG_SHORT; + *mask = 0x0000FFFF; + } + } + /* check for l[l] (long/long long) */ + else if (*s == 'l') { + if (*++s == 'l') { + *flags |= PRINT_FLAG_LONG_LONG; + ++s; + } else + *flags |= PRINT_FLAG_LONG; + } + + return s; +} + +static int format_number(struct print_param *param) +{ + /* contains the character used for padding */ + char pad; + /* effective width of the result */ + uint32_t width; + /* number of characters to insert for width (w) and precision (p) */ + uint32_t p, w; + /* the result */ + int res; + + /* initialize variables */ + p = w = 0; + res = 0; + width = param->vars.valuelen + param->vars.prefixlen; + + /* calculate additional characters for precision */ + if ((uint32_t)(param->vars.precision) > width) + p = param->vars.precision - width; + + /* calculate additional characters for width */ + if ((uint32_t)(param->vars.width) > (width + p)) + w = param->vars.width - (width + p); + + /* handle case of right justification */ + if ((param->vars.flags & PRINT_FLAG_LEFT_JUSTIFY) == 0) { + /* assume ' ' as padding character */ + pad = ' '; + + /* + * if padding with 0 is used, we have to emit the prefix (if any + * ) first to achieve the expected result. However, if a blank is + * used for padding, the prefix is emitted after the padding. + */ + + if (param->vars.flags & PRINT_FLAG_PAD_ZERO) { + /* use '0' for padding */ + pad = '0'; + + /* emit prefix, return early if an error occurred */ + res = param->emit(PRINT_CMD_COPY, param->vars.prefix, + param->vars.prefixlen, param->data); + if (param->vars.prefix && (res < 0)) + return res; + + /* invalidate prefix */ + param->vars.prefix = 0; + param->vars.prefixlen = 0; + } + + /* fill the width with the padding character, return early if + * an error occurred + */ + res = param->emit(PRINT_CMD_FILL, &pad, w, param->data); + if (res < 0) + return res; + } + + /* emit prefix (if any), return early in case of an error */ + res = param->emit(PRINT_CMD_COPY, param->vars.prefix, + param->vars.prefixlen, param->data); + if (param->vars.prefix && (res < 0)) + return res; + + /* insert additional 0's for precision, return early if an error + * occurred + */ + res = param->emit(PRINT_CMD_FILL, "0", p, param->data); + if (res < 0) + return res; + + /* emit the pre-calculated result, return early in case of an error */ + res = param->emit(PRINT_CMD_COPY, param->vars.value, + param->vars.valuelen, param->data); + if (res < 0) + return res; + + /* handle left justification */ + if ((param->vars.flags & PRINT_FLAG_LEFT_JUSTIFY) != 0) { + /* emit trailing blanks, return early in case of an error */ + res = param->emit(PRINT_CMD_FILL, " ", w, param->data); + if (res < 0) + return res; + } + + /* done, return the last result */ + return res; +} + +static int print_pow2(struct print_param *param, + unsigned long long v, uint32_t shift) +{ + /* max buffer required for octal representation of unsigned long long */ + char digitbuff[22]; + /* Insert position for the next character+1 */ + char *pos = digitbuff + sizeof(digitbuff); + /* buffer for the 0/0x/0X prefix */ + char prefix[2]; + /* pointer to the digits translation table */ + const char *digits; + /* mask to extract next character */ + unsigned long long mask; + int ret; + + /* calculate mask */ + mask = (1ULL << shift) - 1; + + /* determine digit translation table */ + digits = (param->vars.flags & PRINT_FLAG_UPPER) ? + upper_hex_digits : lower_hex_digits; + + /* apply mask for short/char */ + v &= param->vars.mask; + + /* determine prefix for alternate form */ + if ((v == 0) && (param->vars.flags & PRINT_FLAG_ALTERNATE_FORM)) { + prefix[0] = '0'; + param->vars.prefix = prefix; + param->vars.prefixlen = 1; + + if (shift == 4) { + param->vars.prefixlen = 2; + prefix[1] = digits[16]; + } + } + + /* determine digits from right to left */ + do { + *--pos = digits[(v & mask)]; + } while (v >>= shift); + + /* assign parameter and apply width and precision */ + param->vars.value = pos; + param->vars.valuelen = digitbuff + sizeof(digitbuff) - pos; + + ret = format_number(param); + + param->vars.value = NULL; + param->vars.valuelen = 0; + + return ret; +} + +static int print_decimal(struct print_param *param, long long value) +{ + /* max. required buffer for unsigned long long in decimal format */ + char digitbuff[20]; + /* pointer to the next character position (+1) */ + char *pos = digitbuff + sizeof(digitbuff); + /* current value in 32/64 bit */ + union u_qword v; + /* next value in 32/64 bit */ + union u_qword nv; + /* helper union for division result */ + struct udiv_result d; + int ret; + + /* assume an unsigned 64 bit value */ + v.qword = ((unsigned long long)value) & param->vars.mask; + + /* + * assign sign and correct value if value is negative and + * value must be interpreted as signed + */ + if (((param->vars.flags & PRINT_FLAG_UINT32) == 0) && (value < 0)) { + v.qword = (unsigned long long)-value; + param->vars.prefix = "-"; + param->vars.prefixlen = 1; + } + + /* determine sign if explicit requested in the format string */ + if (!param->vars.prefix) { + if (param->vars.flags & PRINT_FLAG_SIGN) { + param->vars.prefix = "+"; + param->vars.prefixlen = 1; + } else if (param->vars.flags & PRINT_FLAG_SPACE) { + param->vars.prefix = " "; + param->vars.prefixlen = 1; + } + } + + /* process 64 bit value as long as needed */ + while (v.dwords.high != 0) { + /* determine digits from right to left */ + udiv64(v.qword, 10, &d); + *--pos = d.r.dwords.low + '0'; + v.qword = d.q.qword; + } + + /* process 32 bit (or reduced 64 bit) value */ + do { + /* determine digits from right to left. The compiler should be + * able to handle a division and multiplication by the constant + * 10. + */ + nv.dwords.low = v.dwords.low / 10; + *--pos = (v.dwords.low - (10 * nv.dwords.low)) + '0'; + } while ((v.dwords.low = nv.dwords.low) != 0); + + /* assign parameter and apply width and precision */ + param->vars.value = pos; + param->vars.valuelen = digitbuff + sizeof(digitbuff) - pos; + + ret = format_number(param); + + param->vars.value = NULL; + param->vars.valuelen = 0; + + return ret; +} + +static int print_string(struct print_param *param, const char *s) +{ + /* the length of the string (-1) if unknown */ + int len; + /* the number of additional characters to insert to reach the required + * width + */ + uint32_t w; + /* the last result of the emit function */ + int res; + + w = 0; + len = -1; + + /* we need the length of the string if either width or precision is + * given + */ + if (param->vars.precision || param->vars.width) + len = strnlen_s(s, PRINT_STRING_MAX_LEN); + + /* precision gives the max. number of characters to emit. */ + if (param->vars.precision && (len > param->vars.precision)) + len = param->vars.precision; + + /* calculate the number of additional characters to get the required + * width + */ + if (param->vars.width > 0 && param->vars.width > len) + w = param->vars.width - len; + + /* emit additional characters for width, return early if an error + * occurred + */ + if ((param->vars.flags & PRINT_FLAG_LEFT_JUSTIFY) == 0) { + res = param->emit(PRINT_CMD_FILL, " ", w, param->data); + if (res < 0) + return res; + } + + /* emit the string, return early if an error occurred */ + res = param->emit(PRINT_CMD_COPY, s, len, param->data); + if (res < 0) + return res; + + /* emit additional characters on the right, return early if an error + * occurred + */ + if (param->vars.flags & PRINT_FLAG_LEFT_JUSTIFY) { + res = param->emit(PRINT_CMD_FILL, " ", w, param->data); + if (res < 0) + return res; + } + + return res; +} + +static int do_print(const char *fmt, struct print_param *param, + __builtin_va_list args) +{ + /* the result of this function */ + int res = 0; + /* temp. storage for the next character */ + char ch; + /* temp. pointer to the start of an analysed character sequence */ + const char *start; + + /* main loop: analyse until there are no more characters */ + while (*fmt) { + /* mark the current position and search the next '%' */ + start = fmt; + + while (*fmt && (*fmt != '%')) + fmt++; + + /* + * pass all characters until the next '%' to the emit function. + * Return early if the function fails + */ + res = param->emit(PRINT_CMD_COPY, start, fmt - start, + param->data); + if (res < 0) + return res; + + /* continue only if the '%' character was found */ + if (*fmt == '%') { + /* mark current position in the format string */ + start = fmt++; + + /* initialize the variables for the next argument */ + memset(&(param->vars), 0, sizeof(param->vars)); + param->vars.mask = 0xFFFFFFFFFFFFFFFFULL; + + /* + * analyze the format specification: + * - get the flags + * - get the width + * - get the precision + * - get the length modifier + */ + fmt = get_flags(fmt, &(param->vars.flags)); + fmt = get_int(fmt, &(param->vars.width)); + + if (*fmt == '.') { + fmt++; + fmt = get_int(fmt, &(param->vars.precision)); + if (param->vars.precision < 0) + param->vars.precision = 0; + } + + fmt = get_length_modifier(fmt, &(param->vars.flags), + &(param->vars.mask)); + ch = *fmt++; + + /* a single '%'? => print out a single '%' */ + if (ch == '%') { + res = param->emit(PRINT_CMD_COPY, &ch, 1, + param->data); + } + /* decimal number */ + else if ((ch == 'd') || (ch == 'i')) { + res = print_decimal(param, + (param->vars.flags & + PRINT_FLAG_LONG_LONG) ? + __builtin_va_arg(args, + long long) + : (long long) + __builtin_va_arg(args, + int)); + } + /* unsigned decimal number */ + else if (ch == 'u') { + param->vars.flags |= PRINT_FLAG_UINT32; + res = print_decimal(param, + (param->vars.flags & + PRINT_FLAG_LONG_LONG) ? + __builtin_va_arg(args, + unsigned long long) + : (unsigned long long) + __builtin_va_arg(args, + unsigned int)); + } + /* octal number */ + else if (ch == 'o') { + res = print_pow2(param, + (param->vars.flags & + PRINT_FLAG_LONG_LONG) ? + __builtin_va_arg(args, + unsigned long long) + : (unsigned long long) + __builtin_va_arg(args, + uint32_t), + 3); + } + /* hexadecimal number */ + else if ((ch == 'X') || (ch == 'x')) { + if (ch == 'X') + param->vars.flags |= PRINT_FLAG_UPPER; + res = print_pow2(param, + (param->vars.flags & + PRINT_FLAG_LONG_LONG) ? + __builtin_va_arg(args, + unsigned long long) + : (unsigned long long) + __builtin_va_arg(args, + uint32_t), + 4); + } + /* string argument */ + else if (ch == 's') { + const char *s = __builtin_va_arg(args, char *); + + if (s == NULL) + s = "(null)"; + res = print_string(param, s); + } + /* pointer argument */ + else if (ch == 'p') { + param->vars.flags |= PRINT_FLAG_ALTERNATE_FORM; + /* XXXCRG res=print_pow2(param, + * (uint32_t) __builtin_va_arg(args, + * void *),4); + */ + res = print_pow2(param, (unsigned long long) + __builtin_va_arg(args, void *), 4); + } + /* single character argument */ + else if (ch == 'c') { + char c[2]; + + c[0] = __builtin_va_arg(args, int); + c[1] = 0; + res = print_string(param, c); + } + /* default: print the format specifier as it is */ + else { + res = param->emit(PRINT_CMD_COPY, start, + fmt - start, param->data); + } + } + /* return if an error occurred */ + if (res < 0) + return res; + } + + /* done. Return the result of the last emit function call */ + return res; +} + +static int charout(int cmd, const char *s, int sz, void *hnd) +{ + /* pointer to an integer to store the number of characters */ + int *nchars = (int *)hnd; + /* working pointer */ + const char *p = s; + + /* copy mode ? */ + if (cmd == PRINT_CMD_COPY) { + /* copy all characters until NUL is found */ + if (sz < 0) + s += console_puts(s); + + /* copy 'sz' characters */ + else + s += console_write(s, sz); + + return (*nchars += (s - p)); + } + /* fill mode */ + else { + *nchars += sz; + while (sz--) + console_putc(*s); + } + + return *nchars; +} + +int vprintf(const char *fmt, va_list args) +{ + /* struct to store all necessary parameters */ + struct print_param param; + /* the result of this function */ + int res = 0; + /* argument fo charout() */ + int nchars = 0; + + /* initialize parameters */ + memset(¶m, 0, sizeof(param)); + param.emit = charout; + param.data = &nchars; + + /* execute the printf() */ + res = do_print(fmt, ¶m, args); + + /* done */ + return res; +} + +int printf(const char *fmt, ...) +{ + /* variable argument list needed for do_print() */ + va_list args; + /* the result of this function */ + int res; + + va_start(args, fmt); + + /* execute the printf() */ + res = vprintf(fmt, args); + + /* destroy parameter list */ + va_end(args); + + /* done */ + return res; +} + +static int charmem(int cmd, const char *s, int sz, void *hnd) +{ + /* pointer to the snprint parameter list */ + struct snprint_param *param = (struct snprint_param *) hnd; + /* pointer to the destination */ + char *p = param->dst + param->wrtn; + /* characters actually written */ + int n = 0; + + /* copy mode ? */ + if (cmd == PRINT_CMD_COPY) { + if (sz < 0) { + while (*s) { + if (n < param->sz - param->wrtn) + *p = *s; + p++; + s++; + n++; + } + + } else { + while (*s && n < sz) { + if (n < param->sz - param->wrtn) + *p = *s; + p++; + s++; + n++; + } + } + + param->wrtn += n; + return n; + } + /* fill mode */ + else { + n = (sz < param->sz - param->wrtn) ? sz : 0; + param->wrtn += sz; + memset(p, *s, n); + } + + return n; +} + +int vsnprintf(char *dst, int sz, const char *fmt, va_list args) +{ + char c[1]; + /* the result of this function */ + int res = 0; + + if (sz <= 0 || !dst) { + dst = c; + sz = 1; + } + + /* struct to store all necessary parameters */ + struct print_param param; + + /* struct to store snprintf specific parameters */ + struct snprint_param snparam; + + /* initialize parameters */ + memset(&snparam, 0, sizeof(snparam)); + snparam.dst = dst; + snparam.sz = sz; + memset(¶m, 0, sizeof(param)); + param.emit = charmem; + param.data = &snparam; + + /* execute the printf() */ + if (do_print(fmt, ¶m, args) < 0) + return -1; + + /* ensure the written string is NULL terminated */ + if (snparam.wrtn < sz) + snparam.dst[snparam.wrtn] = '\0'; + else + snparam.dst[sz - 1] = '\0'; + + /* return the number of chars which would be written */ + res = snparam.wrtn; + + /* done */ + return res; +} + +int snprintf(char *dest, int sz, const char *fmt, ...) +{ + /* variable argument list needed for do_print() */ + va_list args; + /* the result of this function */ + int res; + + va_start(args, fmt); + + /* execute the printf() */ + res = vsnprintf(dest, sz, fmt, args); + + /* destroy parameter list */ + va_end(args); + + /* done */ + return res; +} diff --git a/hypervisor/debug/sbuf.c b/hypervisor/debug/sbuf.c new file mode 100644 index 000000000..9e9606b1d --- /dev/null +++ b/hypervisor/debug/sbuf.c @@ -0,0 +1,194 @@ +/* + * SHARED BUFFER + * + * Copyright (C) 2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Li Fei + * + */ + +#include +#include +#include +#include + +DEFINE_CPU_DATA(uint64_t * [ACRN_SBUF_ID_MAX], sbuf); + +static inline bool sbuf_is_empty(struct shared_buf *sbuf) +{ + return (sbuf->head == sbuf->tail); +} + +static inline uint32_t sbuf_next_ptr(uint32_t pos, + uint32_t span, uint32_t scope) +{ + pos += span; + pos = (pos >= scope) ? (pos - scope) : pos; + return pos; +} + +static inline uint32_t sbuf_calculate_allocate_size(uint32_t ele_num, + uint32_t ele_size) +{ + uint64_t sbuf_allocate_size; + + sbuf_allocate_size = ele_num * ele_size; + sbuf_allocate_size += SBUF_HEAD_SIZE; + if (sbuf_allocate_size > SBUF_MAX_SIZE) { + pr_err("%s, num=0x%x, size=0x%x exceed 0x%x", + __func__, ele_num, ele_size, SBUF_MAX_SIZE); + return 0; + } + + return sbuf_allocate_size; +} + +struct shared_buf *sbuf_allocate(uint32_t ele_num, uint32_t ele_size) +{ + struct shared_buf *sbuf; + uint32_t sbuf_allocate_size; + + if (!ele_num || !ele_size) { + pr_err("%s invalid parameter!", __func__); + return NULL; + } + + sbuf_allocate_size = sbuf_calculate_allocate_size(ele_num, ele_size); + if (!sbuf_allocate_size) + return NULL; + + sbuf = malloc(sbuf_allocate_size); + if (sbuf == NULL) { + pr_err("%s no memory!", __func__); + return NULL; + } + + memset(sbuf, 0, SBUF_HEAD_SIZE); + sbuf->ele_num = ele_num; + sbuf->ele_size = ele_size; + sbuf->size = ele_num * ele_size; + sbuf->magic = SBUF_MAGIC; + pr_info("%s ele_num=0x%x, ele_size=0x%x allocated", + __func__, ele_num, ele_size); + return sbuf; +} + +void sbuf_free(struct shared_buf *sbuf) +{ + if ((sbuf == NULL) || sbuf->magic != SBUF_MAGIC) { + pr_err("%s invalid parameter!", __func__); + return; + } + + sbuf->magic = 0; + free(sbuf); +} + +int sbuf_get(struct shared_buf *sbuf, uint8_t *data) +{ + const void *from; + + if ((sbuf == NULL) || (data == NULL)) + return -EINVAL; + + if (sbuf_is_empty(sbuf)) { + /* no data available */ + return 0; + } + + from = (void *)sbuf + SBUF_HEAD_SIZE + sbuf->head; + + memcpy_s((void *)data, sbuf->ele_size, from, sbuf->ele_size); + + sbuf->head = sbuf_next_ptr(sbuf->head, sbuf->ele_size, sbuf->size); + + return sbuf->ele_size; +} + +/** + * The high caller should guarantee each time there must have + * sbuf->ele_size data can be write form data and this function + * should guarantee execution atomically. + * + * flag: + * If OVERWRITE_EN set, buf can store (ele_num - 1) elements at most. + * Should use lock to guarantee that only one read or write at + * the same time. + * if OVERWRITE_EN not set, buf can store (ele_num - 1) elements + * at most. Shouldn't modify the sbuf->head. + * + * return: + * ele_size: write succeeded. + * 0: no write, buf is full + * negative: failed. + */ + +int sbuf_put(struct shared_buf *sbuf, uint8_t *data) +{ + void *to; + uint32_t next_tail; + bool trigger_overwrite = false; + + if ((sbuf == NULL) || (data == NULL)) + return -EINVAL; + + next_tail = sbuf_next_ptr(sbuf->tail, sbuf->ele_size, sbuf->size); + /* if this write would trigger overrun */ + if (next_tail == sbuf->head) { + /* accumulate overrun count if necessary */ + sbuf->overrun_cnt += sbuf->flags & OVERRUN_CNT_EN; + if (!(sbuf->flags & OVERWRITE_EN)) { + /* if not enable over write, return here. */ + return 0; + } + trigger_overwrite = true; + } + + to = (void *)sbuf + SBUF_HEAD_SIZE + sbuf->tail; + + memcpy_s(to, sbuf->ele_size, data, sbuf->ele_size); + + if (trigger_overwrite) { + sbuf->head = sbuf_next_ptr(sbuf->head, + sbuf->ele_size, sbuf->size); + } + sbuf->tail = next_tail; + + return sbuf->ele_size; +} + +int sbuf_share_setup(uint32_t pcpu_id, uint32_t sbuf_id, uint64_t *hva) +{ + if (pcpu_id >= (uint32_t) phy_cpu_num || + sbuf_id >= ACRN_SBUF_ID_MAX) + return -EINVAL; + + per_cpu(sbuf, pcpu_id)[sbuf_id] = hva; + return 0; +} diff --git a/hypervisor/debug/serial.c b/hypervisor/debug/serial.c new file mode 100644 index 000000000..8de71f257 --- /dev/null +++ b/hypervisor/debug/serial.c @@ -0,0 +1,365 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include "serial_internal.h" + +static struct uart *sio_ports[SERIAL_MAX_DEVS]; +static uint8_t sio_initialized[SERIAL_MAX_DEVS]; + +static struct uart *get_uart_by_id(char *uart_id, uint32_t *index) +{ + /* Initialize the index to the start of array. */ + *index = 0; + + while (sio_ports[*index] != NULL) { + if (strncmp(sio_ports[*index]->tgt_uart->uart_id, uart_id, + strnlen_s(sio_ports[*index]->tgt_uart->uart_id, + SERIAL_ID_MAX_LENGTH)) == 0) + break; + + /* No device is found if index reaches end of array. */ + if (++(*index) == SERIAL_MAX_DEVS) + return NULL; + + } + return sio_ports[*index]; +} + +int serial_init(void) +{ + uint32_t index = 0; + int status = 0; + + while (index < SERIAL_MAX_DEVS) { + /* Allocate memory for generic control block of enabled UART */ + sio_ports[index] = calloc(1, sizeof(struct uart)); + + if (!sio_ports[index]) { + status = -ENOMEM; + break; + } + + sio_ports[index]->tgt_uart = &(Tgt_Uarts[index]); + + /* + * Set the open flag to false to indicate that UART port is + * not opened yet. + */ + sio_ports[index]->open_flag = false; + + /* Reset the tx lock */ + spinlock_init(&sio_ports[index]->tx_lock); + + sio_ports[index]->rx_sio_queue = sbuf_allocate( + sio_ports[index]->tgt_uart->buffer_size, + sizeof(uint8_t)); + if (sio_ports[index]->rx_sio_queue != NULL) { + sbuf_set_flags(sio_ports[index]->rx_sio_queue, + OVERWRITE_EN); + + /* Call target specific initialization function */ + status = sio_ports[index]->tgt_uart-> + init(sio_ports[index]->tgt_uart); + + if (status == 0) + sio_initialized[index] = true; + } else { + status = -ENOMEM; + break; + } + + index++; + } + + return status; +} + +uint32_t serial_open(char *uart_id) +{ + int status = SERIAL_DEV_NOT_FOUND; + struct uart *uart; + uint32_t index; + + /* Get UART control block from given character ID */ + uart = get_uart_by_id(uart_id, &index); + + if (uart != NULL && index < SERIAL_MAX_DEVS && + sio_initialized[index] && + (uart->open_flag == false)) { + /* Reset the buffer lock */ + spinlock_init(&uart->buffer_lock); + + /* Configure the UART port to default settings. */ + uart->config.data_bits = DATA_8; + uart->config.stop_bits = STOP_1; + uart->config.parity_bits = PARITY_NONE; + uart->config.baud_rate = BAUD_115200; + uart->config.flow_control = FLOW_NONE; + uart->config.read_mode = SUSPEND; + + /* Open the UART hardware with default configuration. */ + status = uart->tgt_uart->open(uart->tgt_uart, &(uart->config)); + + if (status == 0) + uart->open_flag = true; + } + + /* Already open serial device */ + else if (uart != NULL && uart->open_flag == true) { + /* Reset the buffer lock */ + spinlock_init(&uart->buffer_lock); + status = 0; + } + + return (status == 0) ? + SERIAL_ENCODE_INDEX(index) : + SERIAL_INVALID_HANDLE; +} + +int serial_get_rx_data(uint32_t uart_handle) +{ + uint32_t index; + struct uart *uart; + int data_avail, rx_byte_status; + uint32_t lsr_reg, bytes_read; + uint8_t ch; + int total_bytes_read = 0; + + if (!SERIAL_VALIDATE_HANDLE(uart_handle)) + return 0; + + index = SERIAL_DECODE_INDEX(uart_handle); + if (index >= SERIAL_MAX_DEVS) + return 0; + + uart = sio_ports[index]; + if (uart == NULL) + return 0; + + /* Place all the data available in RX FIFO, in circular buffer */ + while ((data_avail = uart->tgt_uart->rx_data_is_avail( + uart->tgt_uart, &lsr_reg))) { + + /* Read the byte */ + uart->tgt_uart->read(uart->tgt_uart, (void *)&ch, &bytes_read); + + /* Get RX status for this byte */ + rx_byte_status = uart->tgt_uart->get_rx_err(lsr_reg); + + /* + * Check if discard errors in RX character + * (parity / framing errors) + */ + if (rx_byte_status >= SD_RX_PARITY_ERROR) { + /* Increase error status if bad data */ + uart->rx_error.parity_errors += + (rx_byte_status == SD_RX_PARITY_ERROR); + uart->rx_error.frame_errors += + (rx_byte_status == SD_RX_FRAME_ERROR); + } else { + /* Update the overrun errors */ + uart->rx_error.overrun_errors += + (rx_byte_status == SD_RX_OVERRUN_ERROR); + + /* Enter Critical Section */ + spinlock_obtain(&uart->buffer_lock); + + /* Put the item on circular buffer */ + sbuf_put(uart->rx_sio_queue, &ch); + + /* Exit Critical Section */ + spinlock_release(&uart->buffer_lock); + } + /* Update the total bytes read */ + total_bytes_read += bytes_read; + } + return total_bytes_read; +} + +int serial_getc(uint32_t uart_handle) +{ + uint8_t ch; + struct uart *port; + uint32_t index; + int status = SERIAL_DEV_NOT_FOUND; + + if (!SERIAL_VALIDATE_HANDLE(uart_handle)) + goto exit; + + index = SERIAL_DECODE_INDEX(uart_handle); + + if (index >= SERIAL_MAX_DEVS) + goto exit; + + port = sio_ports[index]; + + if (port == NULL) + goto exit; + + /* First read a character from the circular buffer regardless of the + * read mode of UART port. If status is not CBUFFER_EMPTY, character + * read from UART port is returned to the caller. Otherwise, if read + * mode is not NO_SUSPEND, thread is blocked until a character is read + * from the port. Serial target specific HISR unblocks the thread when + * a character is received and character is then read from the circular + * buffer. + */ + + /* Disable interrupts for critical section */ + spinlock_obtain(&port->buffer_lock); + + status = sbuf_get(port->rx_sio_queue, &ch); + + /* Restore interrupts to original level. */ + spinlock_release(&port->buffer_lock); + +exit: + /* Return the character read, otherwise return the error status */ + return ((status > 0) ? (int)(ch) : SERIAL_EOF); +} + +int serial_gets(uint32_t uart_handle, char *buffer, uint32_t length) +{ + char *data_read = buffer; + int c; + struct uart *port; + uint32_t index; + int status = 0; + + if ((buffer == NULL) || (length == 0)) + return 0; + + if (!SERIAL_VALIDATE_HANDLE(uart_handle)) + return 0; + + index = SERIAL_DECODE_INDEX(uart_handle); + if (index >= SERIAL_MAX_DEVS) + return 0; + + port = sio_ports[index]; + if ((port != NULL) && (port->open_flag == true)) { + for (; length > 0; data_read++, length--) { + /* Disable interrupts for critical section */ + spinlock_obtain(&port->buffer_lock); + + status = sbuf_get(port->rx_sio_queue, (uint8_t *)&c); + + /* Restore interrupts to original level. */ + spinlock_release(&port->buffer_lock); + + if (status <= 0) + break; + + /* Save character in buffer */ + *data_read = (char) c; + } + } + /* Return actual number of bytes read */ + return (int)(data_read - buffer); +} + +static int serial_putc(uint32_t uart_handle, int c) +{ + uint32_t index, bytes_written = 0; + struct uart *uart; + int busy; + + if (!SERIAL_VALIDATE_HANDLE(uart_handle)) + return SERIAL_EOF; + + index = SERIAL_DECODE_INDEX(uart_handle); + + if (index >= SERIAL_MAX_DEVS) + return SERIAL_EOF; + + uart = sio_ports[index]; + + if (uart == NULL) + return SERIAL_EOF; + + /* Wait for TX hardware to be ready */ + do { + busy = uart->tgt_uart->tx_is_busy(uart->tgt_uart); + } while (busy); + + /* Transmit character */ + uart->tgt_uart->write(uart->tgt_uart, &(c), &bytes_written); + + /* Return character written or EOF for error */ + return ((bytes_written > 0) ? c : (SERIAL_EOF)); +} + +int serial_puts(uint32_t uart_handle, const char *s, uint32_t length) +{ + const char *old_data = s; + uint32_t index; + struct uart *port; + int retval = 0; + + if ((s == NULL) || (length == 0)) + return 0; + + if (!SERIAL_VALIDATE_HANDLE(uart_handle)) + return 0; + + index = SERIAL_DECODE_INDEX(uart_handle); + + if (index >= SERIAL_MAX_DEVS) + return 0; + + port = sio_ports[index]; + + if (port == NULL) + return 0; + + /* + * Grab the semaphore so that strings between threads do not + * get mixed. + */ + spinlock_obtain(&port->tx_lock); + + /* + * Loop through the string until desired length of bytes have + * been written or SERIAL_EOF is returned. + */ + for (; length > 0 && retval != SERIAL_EOF; s++, length--) + retval = serial_putc(uart_handle, (int) *s); + + /* Allow other threads to use this service. */ + spinlock_release(&port->tx_lock); + + /* Return actual number of bytes written */ + return (int)(s - old_data); +} diff --git a/hypervisor/debug/serial_internal.h b/hypervisor/debug/serial_internal.h new file mode 100644 index 000000000..431e995c5 --- /dev/null +++ b/hypervisor/debug/serial_internal.h @@ -0,0 +1,208 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SERIAL_INTER_H +#define SERIAL_INTER_H + +struct shared_buf; + +/* Maximum serial devices supported by the platform. */ +#define SERIAL_MAX_DEVS 1 + +/* Maximum length of unique id of each UART port enabled in platform. */ +#define SERIAL_ID_MAX_LENGTH 8 + +/* SERIAL error values */ +#define SERIAL_SUCCESS 0 +#define SERIAL_EOF -1 +#define SERIAL_ERROR -2 +#define SERIAL_DEV_NOT_FOUND -3 +#define INVALID_COM_PORT -4 +#define SERIAL_NO_char_AVAIL -5 + +#define SERIAL_INVALID_HANDLE 0xFFFFFFFF + +/* Pending interrupt defines */ +#define SD_NO_INTERRUPT 0 +#define SD_RX_INTERRUPT 1 + +/* RX error defines */ +#define SD_RX_NO_ERROR 0 +#define SD_RX_OVERRUN_ERROR 1 +#define SD_RX_PARITY_ERROR 2 +#define SD_RX_FRAME_ERROR 3 + +/* Defines for encoding/decoding the unique UART handle of each port. */ + +#define SERIAL_MAGIC_NUM 0x005500AA +#define SERIAL_VALIDATE_HANDLE(handle) \ + ((handle & 0xFFFF00FF) == (SERIAL_MAGIC_NUM)) +#define SERIAL_ENCODE_INDEX(index) ((SERIAL_MAGIC_NUM) | (index << 8)) +#define SERIAL_DECODE_INDEX(handle) ((handle & 0x0000FF00) >> 8) + +#define NO_SUSPEND 0 +#define SUSPEND 0xFFFFFFFFUL + +/* Enumeration values to set UART Configuration */ +typedef enum _baudenum_ { + /* Baud Rate Options */ + BAUD_110 = 110, /* not supported on OMAP5 */ + BAUD_300 = 300, + BAUD_600 = 600, + BAUD_1200 = 1200, + BAUD_2400 = 2400, + BAUD_4800 = 4800, + BAUD_9600 = 9600, + BAUD_14400 = 14400, + BAUD_19200 = 19200, + BAUD_28800 = 28800, + BAUD_38400 = 38400, + BAUD_57600 = 57600, + BAUD_115200 = 115200, + BAUD_230400 = 230400, + BAUD_460800 = 460800, + BAUD_921600 = 921600, + BAUD_1843000 = 1843000, + BAUD_36884000 = 36884000 +} BAUD_ENUM; + +typedef enum _flowenum_ { + /* Flow Control Bits */ + FLOW_NONE = 0, + FLOW_HARD = 1, + FLOW_X = 2 +} FLOW_ENUM; + +typedef enum _parityenum_ { + /* Parity Bits */ + PARITY_NONE = 0, + PARITY_ODD = 1, + PARITY_EVEN = 2, + PARITY_MARK = 3, + PARITY_SPACE = 4 +} PARITY_ENUM; + +typedef enum _stopenum_ { + /* Stop Bits */ + STOP_1 = 1, + STOP_2 = 2 +} STOP_ENUM; + +typedef enum _dataenum_ { + /* Data bits */ + DATA_7 = 7, + DATA_8 = 8 +} DATA_ENUM; + +/* Control Block definition about error in Rx data */ +struct rx_error { + uint32_t parity_errors; + uint32_t frame_errors; + uint32_t overrun_errors; + uint32_t general_errors; +}; + +/* Control Block definition for configuration specific + * parameters of UART + */ +struct uart_config { + uint32_t data_bits; + uint32_t stop_bits; + uint32_t parity_bits; + uint32_t baud_rate; + uint32_t flow_control; + + /* Read mode of UART port in interrupt mode. It can be NO_SUSPEND or + * SUSPEND or (1-4,294,967,293). SUSPEND means unlimited blocking, + * NO_SUSPEND means non-blocking and some integer value means timeout + * blocking support. By default, it is set to SUSPEND. + */ + uint32_t read_mode; + +}; + +/* Control Block definition for target specific driver + * of UART + */ +struct tgt_uart { + char uart_id[SERIAL_ID_MAX_LENGTH]; + mmio_addr_t base_address; + uint32_t clock_frequency; + uint32_t buffer_size; + unsigned int open_count; + + /* Target specific function pointers. */ + int (*init)(struct tgt_uart *tgt_uart); + int (*open)(struct tgt_uart *tgt_uart, struct uart_config *config); + void (*close)(struct tgt_uart *tgt_uart); + void (*read)(struct tgt_uart *tgt_uart, + void *buffer, uint32_t *bytes_read); + void (*write)(struct tgt_uart *tgt_uart, + const void *buffer, uint32_t *bytes_written); + bool (*tx_is_busy)(struct tgt_uart *tgt_uart); + bool (*rx_data_is_avail)(struct tgt_uart *tgt_uart, uint32_t *lsr_reg); + int (*get_rx_err)(uint32_t rx_data); +}; + +/* Control Block definition of light-weight serial driver */ +struct uart { + /* Pointer to target specific Control Block of UART */ + struct tgt_uart *tgt_uart; + + /* Configuration of UART */ + struct uart_config config; + + /* Errors in data received from UART port */ + struct rx_error rx_error; + + /* Pointer to receive circular buffer */ + struct shared_buf *rx_sio_queue; + + /* Lock to provide mutual exclusion for transmitting data to UART port*/ + spinlock_t tx_lock; + + /* Lock to provide mutual exclusion for accessing shared buffer */ + spinlock_t buffer_lock; + + /* Flag to indicate whether UART port is opened or not */ + uint8_t open_flag; + +}; + +/* Null terminated array of target specific UART control blocks */ +extern struct tgt_uart Tgt_Uarts[SERIAL_MAX_DEVS]; + +uint32_t serial_open(char *uart_id); +int serial_getc(uint32_t uart_handle); +int serial_gets(uint32_t uart_handle, char *buffer, uint32_t length); +int serial_puts(uint32_t uart_handle, const char *s, uint32_t length); +int serial_get_rx_data(uint32_t uart_handle); + +#endif /* !SERIAL_INTER_H */ diff --git a/hypervisor/debug/shell_internal.c b/hypervisor/debug/shell_internal.c new file mode 100644 index 000000000..371018600 --- /dev/null +++ b/hypervisor/debug/shell_internal.c @@ -0,0 +1,1099 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include "shell_internal.h" +#include "serial_internal.h" + +#define TEMP_STR_SIZE 60 +#define MAX_STR_SIZE 256 + +#define SHELL_PROMPT_STR "ACRN:\\>" +#define NO_SERIAL_SHELL -4252 /* No serial shell enabled */ +#define KILL_SHELL -4253 /* Ends processing of shell */ + +#define SHELL_CMD_VM_ID_ERROR_MESSAGE(CMD) \ + "Syntax: "CMD" where is ID of the vm." + +/* ASCII Manipulation */ +#define SHELL_ASCII_LOWER_CASE_OFFSET 32 + +/* Input Line Other - Switch to the "other" input line (there are only two + * input lines total). + */ +#define SHELL_INPUT_LINE_OTHER(v) (((v) + 1) % 2) + +/* The initial log level*/ +uint32_t console_loglevel; +uint32_t mem_loglevel; +#ifdef CONSOLE_LOGLEVEL_DEFAULT +uint32_t console_loglevel = CONSOLE_LOGLEVEL_DEFAULT; +#endif +#ifdef MEM_LOGLEVEL_DEFAULT +uint32_t mem_loglevel = MEM_LOGLEVEL_DEFAULT; +#endif + +static int string_to_argv(char *argv_str, void *p_argv_mem, + __unused uint32_t argv_mem_size, int *p_argc, char ***p_argv) +{ + uint32_t argc; + char **argv; + char *p_ch; + + /* Setup initial argument values. */ + argc = 0; + argv = NULL; + + /* Ensure there are arguments to be processed. */ + if (argv_str == NULL) { + *p_argc = argc; + *p_argv = argv; + return -EINVAL; + } + + /* Process the argument string (there is at least one element). */ + argv = (char **)p_argv_mem; + p_ch = argv_str; + + /* Remove all spaces at the beginning of cmd*/ + while (*p_ch == ' ') { + p_ch++; + } + + while (*p_ch != 0) { + /* Add argument (string) pointer to the vector. */ + argv[argc] = p_ch; + + /* Move past the vector entry argument string (in the + * argument string). + */ + while ((*p_ch != ' ') && (*p_ch != ',') && (*p_ch != 0)) + p_ch++; + + /* Count the argument just processed. */ + argc++; + + /* Check for the end of the argument string. */ + if (*p_ch != 0) { + /* Terminate the vector entry argument string + * and move to the next. + */ + *p_ch = 0; + /* Remove all space in middile of cmdline */ + while (*++p_ch == ' ') + ; + } + } + + /* Update return parameters */ + *p_argc = argc; + *p_argv = argv; + + return 0; +} + +static uint8_t shell_input_line(struct shell *p_shell) +{ + bool done = false; + uint8_t ch; + + /* Get a character from the user. */ + ch = p_shell->session_io.io_getc(p_shell); + + /* Check character */ + switch (ch) { + /* Backspace */ + case '\b': + /* Ensure length is not 0 */ + if (p_shell->input_line_len > 0) { + /* Reduce the length of the string by one */ + p_shell->input_line_len--; + + /* Null terminate the last character to erase it */ + p_shell->input_line[p_shell->input_line_active] + [p_shell->input_line_len] = 0; + + if (p_shell->session_io.io_echo_on == true) { + /* Echo backspace */ + p_shell->session_io.io_puts(p_shell, "\b"); + } + + /* Send a space + backspace sequence to delete + * character + */ + p_shell->session_io.io_puts(p_shell, " \b"); + } else if (p_shell->session_io.io_echo_on == false) { + /* Put last character of prompt to prevent backspace + * in terminal + */ + p_shell->session_io.io_puts(p_shell, ">"); + } + break; + + /* Carriage-return */ + case '\r': + /* See if echo is on */ + if (p_shell->session_io.io_echo_on == true) { + /* Echo carriage return / line feed */ + p_shell->session_io.io_puts(p_shell, "\r\n"); + } + + /* Set flag showing line input done */ + done = true; + + /* Reset command length for next command processing */ + p_shell->input_line_len = 0; + break; + + /* Line feed */ + case '\n': + /* Do nothing */ + break; + + /* All other characters */ + default: + /* Ensure data doesn't exceed full terminal width */ + if (p_shell->input_line_len < SHELL_CMD_MAX_LEN) { + /* See if a "standard" prINTable ASCII character received */ + if ((ch >= 32) && (ch <= 126)) { + /* Add character to string */ + p_shell->input_line[p_shell->input_line_active] + [p_shell->input_line_len] = ch; + /* See if echo is on */ + if (p_shell->session_io.io_echo_on == true) { + /* Echo back the input */ + p_shell->session_io.io_puts(p_shell, + &p_shell->input_line + [p_shell->input_line_active] + [p_shell->input_line_len]); + } + + /* Move to next character in string */ + p_shell->input_line_len++; + } else { + /* prINTable character */ + /* See if a "special" character handler is installed */ + if (p_shell->session_io.io_special) { + /* Call special character handler */ + p_shell->session_io.io_special(p_shell, ch); + } + } + } else { + /* See if echo is on */ + if (p_shell->session_io.io_echo_on == true) { + /* Echo carriage return / line feed */ + p_shell->session_io.io_puts(p_shell, "\r\n"); + } + + /* Set flag showing line input done */ + done = true; + + /* Reset command length for next command processing */ + p_shell->input_line_len = 0; + + } + break; + } + + + return done; +} + +static int shell_process(struct shell *p_shell) +{ + int status; + char *p_input_line; + + /* Check for the repeat command character in active input line. + */ + if (p_shell->input_line[p_shell->input_line_active][0] == '.') { + /* Repeat the last command (using inactive input line). + */ + p_input_line = + &p_shell->input_line[SHELL_INPUT_LINE_OTHER + (p_shell->input_line_active)][0]; + } else { + /* Process current command (using active input line). */ + p_input_line = + &p_shell->input_line[p_shell->input_line_active][0]; + + /* Switch active input line. */ + p_shell->input_line_active = + SHELL_INPUT_LINE_OTHER(p_shell->input_line_active); + } + + /* Process command */ + status = shell_process_cmd(p_shell, p_input_line); + + /* Now that the command is processed, zero fill the input buffer */ + memset((void *) p_shell->input_line[p_shell->input_line_active], 0, + SHELL_CMD_MAX_LEN + 1); + + /* Process command and return result to caller */ + return status; +} + +struct shell_cmd *shell_find_cmd(struct shell *p_shell, const char *cmd_str) +{ + struct shell_cmd *p_cmd; + bool is_found = false; + struct list_head *pos; + + p_cmd = NULL; + + if (p_shell->cmd_count <= 0) + return NULL; + + list_for_each(pos, &p_shell->cmd_list) { + p_cmd = list_entry(pos, struct shell_cmd, node); + pr_dbg("shell: cmd in registered list is '%s' in %s", + p_cmd->str, __func__); + + if (strcmp(p_cmd->str, cmd_str) == 0) { + is_found = true; + break; + } + } + + if (!is_found) { + /* No commands in the list. */ + p_cmd = NULL; + } + + return p_cmd; +} + +void kick_shell(struct shell *p_shell) +{ + int status = p_shell ? 0 : EINVAL; + static uint8_t is_cmd_cmplt = 1; + + if (status == 0) { + pr_dbg("shell: entering the shell cmd " + "handling loop from function %s\n", __func__); + + /* At any given instance, UART may be owned by the HV + * OR by the guest that has enabled the vUart. + * Show HV shell prompt ONLY when HV owns the + * serial port. + */ + if (!vuart_console_active()) { + /* Prompt the user for a selection. */ + if (is_cmd_cmplt && p_shell->session_io.io_puts) + p_shell->session_io.io_puts(p_shell, + SHELL_PROMPT_STR); + + /* Get user's input */ + is_cmd_cmplt = shell_input_line(p_shell); + + /* If user has pressed the ENTER then process + * the command + */ + if (is_cmd_cmplt) + /* Process current input line. */ + status = shell_process(p_shell); + } + } else { + /* Serial port handle couldn't be obtained. Stop the shell + * task. + */ + pr_info("shell: stopping the shell task..."); + } +} + +int shell_process_cmd(struct shell *p_shell, char *p_input_line) +{ + int status = 0; + struct shell_cmd *p_cmd; + shell_cmd_fn_t cmd_fcn; + char cmd_argv_str[SHELL_CMD_MAX_LEN + 1]; + int cmd_argv_mem[sizeof(char *) * ((SHELL_CMD_MAX_LEN + 1) / 2)]; + int cmd_argc; + char **cmd_argv; + + /* Copy the input line INTo an argument string to become part of the + * argument vector. + */ + (void) strcpy_s(&cmd_argv_str[0], SHELL_CMD_MAX_LEN, p_input_line); + cmd_argv_str[SHELL_CMD_MAX_LEN] = 0; + + /* Build the argv vector from the string. The first argument in the + * resulting vector will be the command string itself. + */ + + /* NOTE: This process is destructive to the argument string! */ + + (void) string_to_argv(&cmd_argv_str[0], + (void *) &cmd_argv_mem[0], + sizeof(cmd_argv_mem), &cmd_argc, &cmd_argv); + + /* Determine if there is a command to process. */ + if (cmd_argc != 0) { + /* See if command is in the registered command list. */ + p_cmd = shell_find_cmd(p_shell, cmd_argv[0]); + + if (p_cmd != NULL) { + /* Make a copy of the command function to in case it is + * removed right before the call. + */ + cmd_fcn = p_cmd->fcn; + + /* Call the command passing the appropriate command + * arguments. + */ + status = cmd_fcn(p_shell, cmd_argc, &cmd_argv[0]); + } else { /* unregistered cmd */ + p_shell->session_io.io_puts(p_shell, + "\r\nError: Invalid Command\r\n\r\n"); + } + } + + return status; +} + +int shell_init_serial(struct shell *p_shell) +{ + int status = 0; + + uint32_t serial_handle = get_serial_handle(); + + if (serial_handle != SERIAL_INVALID_HANDLE) { + p_shell->session_io.io_session_info = + (void *)(uint64_t)serial_handle; + + status = shell_set_name(p_shell, "Serial"); + } else { + status = NO_SERIAL_SHELL; + pr_err("Error: Unable to get a valid serial port handle"); + } + + /* Zero fill the input buffer */ + memset((void *)p_shell->input_line[p_shell->input_line_active], 0, + SHELL_CMD_MAX_LEN + 1); + + return status; +} + +#define SHELL_ROWS 10 +#define MAX_INDENT_LEN 16 +int shell_cmd_help(struct shell *p_shell, + __unused int argc, __unused char **argv) +{ + int status = 0; + int spaces = 0; + int i; + struct shell_cmd *p_cmd = NULL; + char space_buf[MAX_INDENT_LEN + 1]; + + /* Print title */ + shell_puts(p_shell, "\r\nRegistered Commands:\r\n\r\n"); + + pr_dbg("shell: Number of registered commands = %u in %s\n", + p_shell->cmd_count, __func__); + + memset(space_buf, ' ', sizeof(space_buf)); + /* Proceed based on the number of registered commands. */ + if (p_shell->cmd_count == 0) { + /* No registered commands */ + shell_puts(p_shell, "NONE\r\n"); + } else { + struct list_head *pos; + + i = 0; + list_for_each(pos, &p_shell->cmd_list) { + p_cmd = list_entry(pos, struct shell_cmd, node); + + /* Check if we've filled the screen with info */ + /* i + 1 used to avoid 0%SHELL_ROWS=0 */ + if (((i + 1) % SHELL_ROWS) == 0) { + /* Pause before we continue on to the next + * page. + */ + + /* Print message to the user. */ + shell_puts(p_shell, + "<**** Hit any key to continue ****>"); + + /* Wait for a character from user (NOT USED) */ + (void)p_shell->session_io.io_getc(p_shell); + + /* Print a new line after the key is hit. */ + shell_puts(p_shell, "\r\n"); + } + + i++; + + /* Output the command string */ + shell_puts(p_shell, " "); + shell_puts(p_shell, p_cmd->str); + + /* Calculate spaces needed for alignment */ + spaces = MAX_INDENT_LEN - strnlen_s(p_cmd->str, + MAX_INDENT_LEN) + 1; + + space_buf[spaces] = '\0'; + shell_puts(p_shell, space_buf); + space_buf[spaces] = ' '; + + /* Display parameter info if applicable. */ + if (p_cmd->cmd_param) { + shell_puts(p_shell, p_cmd->cmd_param); + } + + /* Display help text if available. */ + if (p_cmd->help_str) { + shell_puts(p_shell, " - "); + shell_puts(p_shell, p_cmd->help_str); + } + shell_puts(p_shell, "\r\n"); + } + } + + shell_puts(p_shell, "\r\n"); + + return status; +} + +int shell_list_vm(struct shell *p_shell, + __unused int argc, __unused char **argv) +{ + int status = 0; + char temp_str[MAX_STR_SIZE]; + struct list_head *pos; + struct vm *vm; + + shell_puts(p_shell, + "\r\nVM NAME VM ID VM STATE" + "\r\n======= ===== ========\r\n"); + + spinlock_obtain(&vm_list_lock); + list_for_each(pos, &vm_list) { + char state[32]; + + vm = list_entry(pos, struct vm, list); + switch (vm->state) { + case VM_CREATED: + strcpy_s(state, 32, "Created"); break; + case VM_STARTED: + strcpy_s(state, 32, "Started"); break; + case VM_PAUSED: + strcpy_s(state, 32, "Paused"); break; + default: + strcpy_s(state, 32, "Unknown"); break; + } + /* Create output string consisting of VM name and VM id + */ + snprintf(temp_str, MAX_STR_SIZE, + "%-24s %-16d %-8s\r\n", vm->attr.name, + vm->attr.id, state); + + /* Output information for this task */ + shell_puts(p_shell, temp_str); + } + spinlock_release(&vm_list_lock); + + return status; +} + +int shell_list_vcpu(struct shell *p_shell, + __unused int argc, __unused char **argv) +{ + int status = 0; + char temp_str[MAX_STR_SIZE]; + struct list_head *pos; + struct vm *vm; + struct vcpu *vcpu; + + shell_puts(p_shell, + "\r\nVM ID PCPU ID VCPU ID VCPU ROLE VCPU STATE" + "\r\n===== ======= ======= ========= ==========\r\n"); + + spinlock_obtain(&vm_list_lock); + list_for_each(pos, &vm_list) { + char state[32]; + int i; + + vm = list_entry(pos, struct vm, list); + foreach_vcpu(i, vm, vcpu) { + switch (vcpu->state) { + case VCPU_INIT: + strcpy_s(state, 32, "Init"); break; + case VCPU_PAUSED: + strcpy_s(state, 32, "Paused"); break; + case VCPU_RUNNING: + strcpy_s(state, 32, "Running"); break; + case VCPU_ZOMBIE: + strcpy_s(state, 32, "Zombie"); break; + default: + strcpy_s(state, 32, "Unknown"); + } + /* Create output string consisting of VM name + * and VM id + */ + snprintf(temp_str, MAX_STR_SIZE, + " %-9d %-10d %-7d %-12s %-16s\r\n", + vm->attr.id, + vcpu->pcpu_id, + vcpu->vcpu_id, + is_vcpu_bsp(vcpu) ? + "PRIMARY" : "SECONDARY", + state); + /* Output information for this task */ + shell_puts(p_shell, temp_str); + } + } + spinlock_release(&vm_list_lock); + + return status; +} + +int shell_pause_vcpu(struct shell *p_shell, + int argc, char **argv) +{ + int status = 0; + uint32_t vm_id, vcpu_id; + struct vm *vm; + struct vcpu *vcpu; + + /* User input invalidation */ + if (argc != 3) { + status = -EINVAL; + shell_puts(p_shell, + "Please enter correct cmd with \r\n"); + } else { + vm_id = atoi(argv[1]); + vcpu_id = atoi(argv[2]); + + vm = get_vm_from_vmid(vm_id); + if (vm) { + vcpu = vcpu_from_vid(vm, vcpu_id); + if (vcpu) { + if (vcpu->dbg_req_state != VCPU_PAUSED) { + vcpu->dbg_req_state = VCPU_PAUSED; + /* TODO: do we need file a IPI to kick + * VCPU immediately */ + shell_puts(p_shell, + "The vcpu will PAUSE in " + "next vm exit\r\n"); + } else { + shell_puts(p_shell, + "Request again, do nothing\r\n"); + } + } else { + status = -EINVAL; + shell_puts(p_shell, + "No vcpu found in the input " + "\r\n"); + } + } else { + status = -EINVAL; + shell_puts(p_shell, + "No vm found in the input " + "\r\n"); + } + } + + return status; +} + +int shell_resume_vcpu(struct shell *p_shell, + int argc, char **argv) +{ + int status = 0; + uint32_t vm_id, vcpu_id; + struct vm *vm; + struct vcpu *vcpu; + + /* User input invalidation */ + if (argc != 3) { + status = -EINVAL; + shell_puts(p_shell, + "Please enter correct cmd with \r\n"); + } else { + vm_id = atoi(argv[1]); + vcpu_id = atoi(argv[2]); + vm = get_vm_from_vmid(vm_id); + if (vm) { + vcpu = vcpu_from_vid(vm, vcpu_id); + if (vcpu) { + if (vcpu->dbg_req_state == VCPU_PAUSED) { + vcpu->dbg_req_state = 0; + shell_puts(p_shell, + "The vcpu resummed\r\n"); + } else { + shell_puts(p_shell, + "vcpu is not in debug PAUSE, " + "do nothing\r\n"); + } + } else { + status = -EINVAL; + shell_puts(p_shell, + "No vcpu found in the input " + "\r\n"); + } + } else { + status = -EINVAL; + shell_puts(p_shell, + "No vm found in the input " + "\r\n"); + } + } + + return status; +} + +int shell_vcpu_dumpreg(struct shell *p_shell, + int argc, char **argv) +{ + int status = 0; + uint32_t vm_id, vcpu_id; + char temp_str[MAX_STR_SIZE]; + struct vm *vm; + struct vcpu *vcpu; + uint64_t gpa, hpa, i; + uint64_t *tmp; + struct run_context *cur_context; + + /* User input invalidation */ + if (argc != 3) { + shell_puts(p_shell, + "Please enter correct cmd with \r\n"); + return -EINVAL; + } + + vm_id = atoi(argv[1]); + vcpu_id = atoi(argv[2]); + + vm = get_vm_from_vmid(vm_id); + if (!vm) { + shell_puts(p_shell, "No vm found in the input " + "\r\n"); + return -EINVAL; + } + + vcpu = vcpu_from_vid(vm, vcpu_id); + if (!vcpu) { + shell_puts(p_shell, "No vcpu found in the input " + "\r\n"); + return -EINVAL; + } + + cur_context = &vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context]; + + if (vcpu->state != VCPU_PAUSED) { + shell_puts(p_shell, "NOTE: VCPU unPAUSEed, regdump " + "may not be accurate\r\n"); + } + + snprintf(temp_str, MAX_STR_SIZE, + "= VM ID %d ==== CPU ID %d========================\r\n", + vm->attr.id, vcpu->vcpu_id); + shell_puts(p_shell, temp_str); + snprintf(temp_str, MAX_STR_SIZE, "= RIP=0x%016llx RSP=0x%016llx " + "RFLAGS=0x%016llx\r\n", cur_context->rip, + cur_context->rsp, cur_context->rflags); + shell_puts(p_shell, temp_str); + snprintf(temp_str, MAX_STR_SIZE, "= CR0=0x%016llx CR2=0x%016llx " + " CR3=0x%016llx\r\n", cur_context->cr0, + cur_context->cr2, cur_context->cr3); + shell_puts(p_shell, temp_str); + snprintf(temp_str, MAX_STR_SIZE, "= RAX=0x%016llx RBX=0x%016llx " + "RCX=0x%016llx\r\n", + cur_context->guest_cpu_regs.regs.rax, + cur_context->guest_cpu_regs.regs.rbx, + cur_context->guest_cpu_regs.regs.rcx); + shell_puts(p_shell, temp_str); + snprintf(temp_str, MAX_STR_SIZE, "= RDX=0x%016llx RDI=0x%016llx " + "RSI=0x%016llx\r\n", + cur_context->guest_cpu_regs.regs.rdx, + cur_context->guest_cpu_regs.regs.rdi, + cur_context->guest_cpu_regs.regs.rsi); + shell_puts(p_shell, temp_str); + snprintf(temp_str, MAX_STR_SIZE, "= RBP=0x%016llx R8=0x%016llx " + "R9=0x%016llx\r\n", + cur_context->guest_cpu_regs.regs.rbp, + cur_context->guest_cpu_regs.regs.r8, + cur_context->guest_cpu_regs.regs.r9); + shell_puts(p_shell, temp_str); + snprintf(temp_str, MAX_STR_SIZE, "= R10=0x%016llx R11=0x%016llx " + "R12=0x%016llx\r\n", + cur_context->guest_cpu_regs.regs.r10, + cur_context->guest_cpu_regs.regs.r11, + cur_context->guest_cpu_regs.regs.r12); + shell_puts(p_shell, temp_str); + snprintf(temp_str, MAX_STR_SIZE, + "= R13=0x%016llx R14=0x%016llx R15=0x%016llx\r\n", + cur_context->guest_cpu_regs.regs.r13, + cur_context->guest_cpu_regs.regs.r14, + cur_context->guest_cpu_regs.regs.r15); + shell_puts(p_shell, temp_str); + + /* dump sp */ + gpa = gva2gpa(vm, cur_context->cr3, + cur_context->rsp); + if (gpa == 0) { + status = -EINVAL; + shell_puts(p_shell, "Cannot handle user gva yet!\r\n"); + } else { + hpa = gpa2hpa(vm, gpa); + snprintf(temp_str, MAX_STR_SIZE, + "\r\nDump RSP for vm %d, from " + "gva 0x%016llx -> gpa 0x%016llx" + " -> hpa 0x%016llx:\r\n", + vm_id, cur_context->rsp,gpa, hpa); + shell_puts(p_shell, temp_str); + + tmp = HPA2HVA(hpa); + for (i = 0; i < 8; i++) { + snprintf(temp_str, MAX_STR_SIZE, + "= 0x%016llx 0x%016llx " + "0x%016llx 0x%016llx\r\n", + tmp[i*4], tmp[i*4+1], + tmp[i*4+2], tmp[i*4+3]); + shell_puts(p_shell, temp_str); + } + } + + return status; +} + +int shell_vcpu_dumpmem(struct shell *p_shell, + int argc, char **argv) +{ + int status = 0; + uint32_t vm_id, vcpu_id; + uint64_t gva, gpa, hpa; + uint64_t *tmp; + uint32_t i, length = 32; + char temp_str[MAX_STR_SIZE]; + struct vm *vm; + struct vcpu *vcpu; + + /* User input invalidation */ + if (argc != 4 && argc != 5) { + status = -EINVAL; + shell_puts(p_shell, + "Please enter correct cmd with " + "\r\n"); + return status; + } + + vm_id = atoi(argv[1]); + vcpu_id = atoi(argv[2]); + + vm = get_vm_from_vmid(vm_id); + if (vm == NULL) { + status = -EINVAL; + shell_puts(p_shell, + "No vm found in the input \r\n"); + return status; + } + + gva = strtoul(argv[3], NULL, 16); + + if (argc == 5) + length = atoi(argv[4]); + + vcpu = vcpu_from_vid(vm, (long)vcpu_id); + if (vcpu) { + struct run_context *cur_context = + &vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context]; + + gpa = gva2gpa(vcpu->vm, cur_context->cr3, gva); + if (gpa == 0) { + status = -EINVAL; + shell_puts(p_shell, + "Cannot handle user gva yet!\r\n"); + } else { + hpa = gpa2hpa(vcpu->vm, gpa); + snprintf(temp_str, MAX_STR_SIZE, + "Dump memory for vcpu %d, from gva 0x%016llx ->" + "gpa 0x%016llx -> hpa 0x%016llx, length " + "%d:\r\n", vcpu_id, gva, gpa, hpa, length); + shell_puts(p_shell, temp_str); + + tmp = HPA2HVA(hpa); + for (i = 0; i < length/32; i++) { + snprintf(temp_str, MAX_STR_SIZE, + "= 0x%016llx 0x%016llx 0x%016llx " + "0x%016llx\r\n", tmp[i*4], tmp[i*4+1], + tmp[i*4+2], tmp[i*4+3]); + shell_puts(p_shell, temp_str); + } + if (length > 32*(length/32)) { + snprintf(temp_str, MAX_STR_SIZE, + "= 0x%016llx 0x%016llx 0x%016llx " + "0x%016llx\r\n", tmp[i*4], tmp[i*4+1], + tmp[i*4+2], tmp[i*4+3]); + shell_puts(p_shell, temp_str); + } + } + } else { + status = -EINVAL; + shell_puts(p_shell, + "No vcpu found in the input \r\n"); + } + + return status; +} + +int shell_to_sos_console(struct shell *p_shell, + __unused int argc, __unused char **argv) +{ + char temp_str[TEMP_STR_SIZE]; + int guest_no = 0; + + struct vm *vm; + struct vuart *vuart; + + /* Get the virtual device node */ + vm = get_vm_from_vmid(guest_no); + if (vm == NULL) { + pr_err("Error: VM %d is not yet created/started", + guest_no); + + return -EINVAL; + } + vuart = vm->vuart; + if (vuart == NULL) { + snprintf(temp_str, TEMP_STR_SIZE, + "\r\nError: serial console driver is not " + "enabled for VM %d\r\n", + guest_no); + shell_puts(p_shell, temp_str); + } else { + /* UART is now owned by the SOS. + * Indicate by toggling the flag. + */ + vuart->active = true; + /* Output that switching to SOS shell */ + snprintf(temp_str, TEMP_STR_SIZE, + "\r\n----- Entering Guest %d Shell -----\r\n", + guest_no); + + shell_puts(p_shell, temp_str); + } + + return 0; +} + +int shell_show_cpu_int(struct shell *p_shell, + __unused int argc, __unused char **argv) +{ + char *temp_str = alloc_page(); + + get_cpu_interrupt_info(temp_str, CPU_PAGE_SIZE); + shell_puts(p_shell, temp_str); + + free(temp_str); + + return 0; +} + +int shell_show_ptdev_info(struct shell *p_shell, + __unused int argc, __unused char **argv) +{ + char *temp_str = alloc_page(); + + get_ptdev_info(temp_str, CPU_PAGE_SIZE); + shell_puts(p_shell, temp_str); + + free(temp_str); + + return 0; +} + +int shell_show_req_info(struct shell *p_shell, + __unused int argc, __unused char **argv) +{ + char *temp_str = alloc_page(); + + get_req_info(temp_str, CPU_PAGE_SIZE); + shell_puts(p_shell, temp_str); + + free(temp_str); + + return 0; +} + +int shell_show_vioapic_info(struct shell *p_shell, int argc, char **argv) +{ + char *temp_str = alloc_page(); + uint32_t vmid; + + /* User input invalidation */ + if (argc != 2) { + snprintf(temp_str, CPU_PAGE_SIZE, "\r\nvmid param needed\r\n"); + goto END; + } else + vmid = atoi(argv[1]); + + get_vioapic_info(temp_str, CPU_PAGE_SIZE, vmid); +END: + shell_puts(p_shell, temp_str); + free(temp_str); + + return 0; +} + +int shell_show_ioapic_info(struct shell *p_shell, + __unused int argc, __unused char **argv) +{ + char *temp_str = alloc_pages(2); + + get_ioapic_info(temp_str, 2 * CPU_PAGE_SIZE); + shell_puts(p_shell, temp_str); + + free(temp_str); + + return 0; +} + +int shell_show_vmexit_profile(struct shell *p_shell, + __unused int argc, __unused char **argv) +{ + char *temp_str = alloc_pages(2); + + get_vmexit_profile(temp_str, 2*CPU_PAGE_SIZE); + shell_puts(p_shell, temp_str); + + free(temp_str); + + return 0; +} + +int shell_dump_logbuf(__unused struct shell *p_shell, + int argc, char **argv) +{ + uint32_t pcpu_id; + int status = -EINVAL; + + if (argc == 2) { + pcpu_id = atoi(argv[1]); + print_logmsg_buffer(pcpu_id); + return 0; + } + + return status; +} + +int shell_get_loglevel(struct shell *p_shell, __unused int argc, __unused char **argv) +{ + char str[MAX_STR_SIZE] = {0}; + + snprintf(str, MAX_STR_SIZE, + "console_loglevel: %u, mem_loglevel: %u\r\n", + console_loglevel, mem_loglevel); + + shell_puts(p_shell, str); + + return 0; +} + +int shell_set_loglevel(struct shell *p_shell, int argc, char **argv) +{ + int status = 0; + + if (argc == 2) { + console_loglevel = atoi(argv[1]); + } else if (argc == 3) { + console_loglevel = atoi(argv[1]); + mem_loglevel = atoi(argv[2]); + } else { + status = -EINVAL; + shell_puts(p_shell, + "Please enter correct cmd with " + " [mem_loglevel]\r\n"); + } + + return status; +} + +int shell_terminate_serial(struct shell *p_shell) +{ + /* Shell shouldn't own the serial port handle anymore. */ + p_shell->session_io.io_session_info = NULL; + + return 0; +} + +void shell_puts_serial(struct shell *p_shell, char *string_ptr) +{ + uint32_t serial_handle = + (uint32_t)(uint64_t)p_shell->session_io.io_session_info; + + /* Output the string */ + serial_puts(serial_handle, string_ptr, + strnlen_s(string_ptr, SHELL_STRING_MAX_LEN)); +} + +uint8_t shell_getc_serial(struct shell *p_shell) +{ + uint32_t serial_handle = + (uint32_t)(uint64_t)p_shell->session_io.io_session_info; + + return serial_getc(serial_handle); +} + +void shell_special_serial(struct shell *p_shell, uint8_t ch) +{ + switch (ch) { + /* Escape character */ + case 0x1b: + /* Consume the next 2 characters */ + (void) p_shell->session_io.io_getc(p_shell); + (void) p_shell->session_io.io_getc(p_shell); + break; + default: + break; + } +} + +int shell_construct(struct shell **p_shell) +{ + int status = 0; + /* Allocate memory for shell session */ + *p_shell = (struct shell *) calloc(1, sizeof(**p_shell)); + + if (*p_shell) { + /* Zero-initialize the service control block. */ + INIT_LIST_HEAD(&(*p_shell)->cmd_list); + (*p_shell)->cmd_count = 0; + } else { + pr_err("Error: out of memory"); + status = -ENOMEM; + } + + return status; +} diff --git a/hypervisor/debug/shell_internal.h b/hypervisor/debug/shell_internal.h new file mode 100644 index 000000000..3f584dca3 --- /dev/null +++ b/hypervisor/debug/shell_internal.h @@ -0,0 +1,187 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SHELL_INTER_H +#define SHELL_INTER_H + +#include + +struct shell; + +/* Structure to hold the details about shell input and output */ +struct shell_io { + void *io_session_info; + int (*io_init)(struct shell *); + int (*io_deinit)(struct shell *); + void (*io_puts)(struct shell *, char *); + uint8_t (*io_getc)(struct shell *); + void (*io_special)(struct shell *, uint8_t); + bool io_echo_on; +}; + +#define SHELL_CMD_MAX_LEN 100 +#define SHELL_NAME_MAX_LEN 50 +#define SHELL_PARA_MAX_LEN 64 +#define SHELL_HELP_MAX_LEN 256 +#define SHELL_STRING_MAX_LEN (CPU_PAGE_SIZE << 2) + +/* Shell Control Block */ +struct shell { + struct shell_io session_io; /* Session I/O information */ + char input_line[2][SHELL_CMD_MAX_LEN + 1]; /* current & last */ + char name[SHELL_NAME_MAX_LEN]; /* Session name */ + uint32_t input_line_len; /* Length of current input line */ + uint32_t input_line_active; /* Active input line index */ + struct list_head cmd_list; /* List of registered commands */ + uint32_t cmd_count; /* Count of added commands */ +}; + +/* Shell Command Function */ +typedef int (*shell_cmd_fn_t)(struct shell *, int, char **); + +/* Shell Command */ +struct shell_cmd { + struct list_head node; /* Linked list node */ + char *str; /* Command string */ + char *cmd_param; /* Command parameter string */ + char *help_str; /* Help text associated with the command */ + shell_cmd_fn_t fcn; /* Command call-back function */ + +}; + +/* Shell Command list with parameters and help description */ +#define SHELL_CMD_HELP "help" +#define SHELL_CMD_HELP_PARAM NULL +#define SHELL_CMD_HELP_HELP "Display info about the supported shell commands." + +#define SHELL_CMD_VM_LIST "vm_list" +#define SHELL_CMD_VM_LIST_PARAM NULL +#define SHELL_CMD_VM_LIST_HELP "Lists all VMs (VM Name, VM ID, VM State)" + +#define SHELL_CMD_VCPU_LIST "vcpu_list" +#define SHELL_CMD_VCPU_LIST_PARAM NULL +#define SHELL_CMD_VCPU_LIST_HELP "Lists all VCPU in all VMs" + +#define SHELL_CMD_VCPU_PAUSE "vcpu_pause" +#define SHELL_CMD_VCPU_PAUSE_PARAM "" +#define SHELL_CMD_VCPU_PAUSE_HELP "Pause a specific vcpu" + +#define SHELL_CMD_VCPU_RESUME "vcpu_resume" +#define SHELL_CMD_VCPU_RESUME_PARAM "" +#define SHELL_CMD_VCPU_RESUME_HELP "Resume a specific vcpu" + +#define SHELL_CMD_VCPU_DUMPREG "vcpu_dumpreg" +#define SHELL_CMD_VCPU_DUMPREG_PARAM "" +#define SHELL_CMD_VCPU_DUMPREG_HELP "Dump registers for a specific vcpu" + +#define SHELL_CMD_VCPU_DUMPMEM "vcpu_dumpmem" +#define SHELL_CMD_VCPU_DUMPMEM_PARAM "" +#define SHELL_CMD_VCPU_DUMPMEM_HELP "Dump memory for a specific vcpu" + +#define SHELL_CMD_VM_CONSOLE "vm_console" +#define SHELL_CMD_VM_CONSOLE_PARAM NULL +#define SHELL_CMD_VM_CONSOLE_HELP "Switch to SOS's console" + +#define SHELL_CMD_INTERRUPT "int" +#define SHELL_CMD_INTERRUPT_PARAM NULL +#define SHELL_CMD_INTERRUPT_HELP "show interrupt info per CPU" + +#define SHELL_CMD_PTDEV "pt" +#define SHELL_CMD_PTDEV_PARAM NULL +#define SHELL_CMD_PTDEV_HELP "show pass-through device info" + +#define SHELL_CMD_REQ "lsreq" +#define SHELL_CMD_REQ_PARAM NULL +#define SHELL_CMD_REQ_HELP "show ioreq info" + +#define SHELL_CMD_IOAPIC "dump_ioapic" +#define SHELL_CMD_IOAPIC_PARAM NULL +#define SHELL_CMD_IOAPIC_HELP "show native ioapic info" + +#define SHELL_CMD_VIOAPIC "vioapic" +#define SHELL_CMD_VIOAPIC_PARAM "" +#define SHELL_CMD_VIOAPIC_HELP "show vioapic info" + +#define SHELL_CMD_VMEXIT "vmexit" +#define SHELL_CMD_VMEXIT_PARAM NULL +#define SHELL_CMD_VMEXIT_HELP "show vmexit profiling" + +#define SHELL_CMD_LOGDUMP "logdump" +#define SHELL_CMD_LOGDUMP_PARAM "" +#define SHELL_CMD_LOGDUMP_HELP "log buffer dump" + +#define SHELL_CMD_trace "trace" +#define SHELL_CMD_trace_PARAM " " +#define SHELL_CMD_trace_HELP "Dump cpus recent events within millisecond" + +#define SHELL_CMD_GET_LOG_LVL "get_loglevel" +#define SHELL_CMD_GET_LOG_LVL_PARAM NULL +#define SHELL_CMD_GET_LOG_LVL_HELP "Get the loglevel" + +#define SHELL_CMD_SET_LOG_LVL "set_loglevel" +#define SHELL_CMD_SET_LOG_LVL_PARAM " [mem_loglevel]" +#define SHELL_CMD_SET_LOG_LVL_HELP "Set loglevel [0-6]" + + +/* Global function prototypes */ +int shell_show_req_info(struct shell *p_shell, int argc, char **argv); +int shell_construct(struct shell **p_shell); +int shell_cmd_help(struct shell *p_shell, int argc, char **argv); +int shell_reset_cmd(struct shell *p_shell, int argc, char **argv); +int shell_list_vm(struct shell *p_shell, int argc, char **argv); +int shell_list_vcpu(struct shell *p_shell, int argc, char **argv); +int shell_pause_vcpu(struct shell *p_shell, int argc, char **argv); +int shell_resume_vcpu(struct shell *p_shell, int argc, char **argv); +int shell_vcpu_dumpreg(struct shell *p_shell, int argc, char **argv); +int shell_vcpu_dumpmem(struct shell *p_shell, int argc, char **argv); +int shell_boot_vm(struct shell *p_shell, int argc, char **argv); +int shell_trace_cmd(struct shell *p_shell, int argc, char **argv); +int shell_to_sos_console(struct shell *p_shell, int argc, char **argv); +int shell_show_cpu_int(struct shell *p_shell, int argc, char **argv); +int shell_show_ptdev_info(struct shell *p_shell, int argc, char **argv); +int shell_show_vioapic_info(struct shell *p_shell, int argc, char **argv); +int shell_show_ioapic_info(struct shell *p_shell, int argc, char **argv); +int shell_show_vmexit_profile(struct shell *p_shell, int argc, char **argv); +int shell_dump_logbuf(struct shell *p_shell, int argc, char **argv); +int shell_get_loglevel(struct shell *p_shell, int argc, char **argv); +int shell_set_loglevel(struct shell *p_shell, int argc, char **argv); +struct shell_cmd *shell_find_cmd(struct shell *p_shell, const char *cmd); +int shell_process_cmd(struct shell *p_shell, char *p_input_line); +int shell_terminate_serial(struct shell *p_shell); +int shell_init_serial(struct shell *p_shell); +void shell_puts_serial(struct shell *p_shell, char *string_ptr); +uint8_t shell_getc_serial(struct shell *p_shell); +void shell_special_serial(struct shell *p_shell, uint8_t ch); +void kick_shell(struct shell *p_shell); + +int shell_puts(struct shell *p_shell, char *str_ptr); +int shell_set_name(struct shell *p_shell, char *name); + +#endif /* SHELL_INTER_H */ diff --git a/hypervisor/debug/shell_public.c b/hypervisor/debug/shell_public.c new file mode 100644 index 000000000..095084649 --- /dev/null +++ b/hypervisor/debug/shell_public.c @@ -0,0 +1,421 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include "shell_internal.h" + +/* Shell that uses serial I/O */ +static struct shell *serial_session; + +static int shell_register_cmd(struct shell *p_shell, + const char *cmd, + const char *cmd_param, + const char *cmd_help_str, + int (*cmd_fcn)(struct shell *, int, char **)) +{ + int status = 0; + struct shell_cmd *p_cmd; + uint32_t cmd_mem_size; + + if ((p_shell == NULL) || (cmd == NULL) || + (cmd_help_str == NULL) || (cmd_fcn == NULL)) { + return -EINVAL; + } + + /* Check if a duplicate command exists */ + p_cmd = shell_find_cmd(p_shell, cmd); + if (p_cmd != NULL) { + /* Requested command is already registered */ + pr_err("Error: Command %s is already registered.", cmd); + status = -EINVAL; + goto exit; + } + + /* Requested command is not already registered. So allocate enough + * memory for the command structure and the command, parameter and the + * help text strings along with the corresponding null terminating + * character/s. + */ + cmd_mem_size = sizeof(struct shell_cmd) + + (strnlen_s(cmd, SHELL_CMD_MAX_LEN) + 1); + + /* If command takes any parameters, need to allocate memory for storing + * parameter string. + */ + if (cmd_param) + cmd_mem_size += strnlen_s(cmd_param, SHELL_PARA_MAX_LEN) + 1; + + /* If help text is provided for command, need to allocate memory for + * storing help string. + */ + if (cmd_help_str) + cmd_mem_size += strnlen_s(cmd_help_str, SHELL_HELP_MAX_LEN) + 1; + + p_cmd = (struct shell_cmd *) calloc(1, cmd_mem_size); + if (p_cmd == NULL) { + status = -ENOMEM; + goto exit; + } + + /* The command structure, command string, it's parameter string and + * the associated help string are all stored in contiguous memory + * locations. So the cmd string immediately follows the command + * structure.. + */ + p_cmd->str = (char *)p_cmd + sizeof(struct shell_cmd); + strncpy_s(p_cmd->str, SHELL_CMD_MAX_LEN, cmd, SHELL_CMD_MAX_LEN); + + /* Check if this command does take any parameters... */ + if (cmd_param) { + /* The command parameter string immediately follows the command + * string in memory. + */ + p_cmd->cmd_param = p_cmd->str + + (strnlen_s(cmd, SHELL_CMD_MAX_LEN) + 1); + strcpy_s(p_cmd->cmd_param, SHELL_PARA_MAX_LEN, cmd_param); + } + + /* Check if help string is provided for the command.. */ + if (cmd_help_str) { + if (cmd_param) { + /* The command help string immediately follows the + * parameter string in memory | cmd_structure | + * cmd_str | param_str | help_str | + */ + p_cmd->help_str = p_cmd->cmd_param + + (strnlen_s(cmd_param, SHELL_PARA_MAX_LEN) + 1); + + strcpy_s(p_cmd->help_str, + SHELL_HELP_MAX_LEN, cmd_help_str); + } else { + /* No command parameter/s. Help string immediately + * follows the cmd string | cmd_structure | cmd_str | + * help_str | + */ + p_cmd->help_str = p_cmd->str + + (strnlen_s(cmd, SHELL_CMD_MAX_LEN) + 1); + + strcpy_s(p_cmd->help_str, + SHELL_HELP_MAX_LEN, cmd_help_str); + } + } + + /* Set the command function. */ + p_cmd->fcn = cmd_fcn; + + INIT_LIST_HEAD(&p_cmd->node); + list_add(&p_cmd->node, &p_shell->cmd_list); + + /* Update command count. */ + p_shell->cmd_count++; + + status = 0; + +exit: + return status; +} + +int shell_init(void) +{ + int status; + + status = shell_construct(&serial_session); + if (status != 0) + return status; + + /* Set the function pointers for the shell i/p and o/p functions */ + serial_session->session_io.io_init = shell_init_serial; + serial_session->session_io.io_deinit = shell_terminate_serial; + serial_session->session_io.io_puts = shell_puts_serial; + serial_session->session_io.io_getc = shell_getc_serial; + serial_session->session_io.io_special = shell_special_serial; + serial_session->session_io.io_echo_on = (bool)true; + + /* Initialize the handler for the serial port that will be used + * for shell i/p and o/p + */ + status = serial_session->session_io.io_init(serial_session); + + /* Register command handlers for the shell commands that are available + * by default + */ + if (status == 0) { + status = shell_register_cmd(serial_session, + SHELL_CMD_HELP, + SHELL_CMD_HELP_PARAM, + SHELL_CMD_HELP_HELP, + shell_cmd_help); + + if (status != 0) { + pr_err("Error: Command \"%s\" registration failed.", + SHELL_CMD_HELP); + } + + status = shell_register_cmd(serial_session, + SHELL_CMD_VM_LIST, + SHELL_CMD_VM_LIST_PARAM, + SHELL_CMD_VM_LIST_HELP, + shell_list_vm); + + if (status != 0) { + pr_err("Error: Command \"%s\" registration failed.", + SHELL_CMD_VM_LIST); + } + + status = shell_register_cmd(serial_session, + SHELL_CMD_VCPU_LIST, + SHELL_CMD_VCPU_LIST_PARAM, + SHELL_CMD_VCPU_LIST_HELP, + shell_list_vcpu); + + if (status != 0) { + pr_err("Error: Command \"%s\" registration failed.", + SHELL_CMD_VCPU_LIST); + } + + status = shell_register_cmd(serial_session, + SHELL_CMD_VCPU_PAUSE, + SHELL_CMD_VCPU_PAUSE_PARAM, + SHELL_CMD_VCPU_PAUSE_HELP, + shell_pause_vcpu); + + if (status != 0) { + pr_err("Error: Command \"%s\" registration failed.", + SHELL_CMD_VCPU_PAUSE); + } + + status = shell_register_cmd(serial_session, + SHELL_CMD_VCPU_RESUME, + SHELL_CMD_VCPU_RESUME_PARAM, + SHELL_CMD_VCPU_RESUME_HELP, + shell_resume_vcpu); + + if (status != 0) { + pr_err("Error: Command \"%s\" registration failed.", + SHELL_CMD_VCPU_RESUME); + } + + status = shell_register_cmd(serial_session, + SHELL_CMD_VCPU_DUMPREG, + SHELL_CMD_VCPU_DUMPREG_PARAM, + SHELL_CMD_VCPU_DUMPREG_HELP, + shell_vcpu_dumpreg); + + if (status != 0) { + pr_err("Error: Command \"%s\" registration failed.", + SHELL_CMD_VCPU_DUMPREG); + } + + status = shell_register_cmd(serial_session, + SHELL_CMD_VCPU_DUMPMEM, + SHELL_CMD_VCPU_DUMPMEM_PARAM, + SHELL_CMD_VCPU_DUMPMEM_HELP, + shell_vcpu_dumpmem); + + if (status != 0) { + pr_err("Error: Command \"%s\" registration failed.", + SHELL_CMD_VCPU_DUMPMEM); + } + + status = shell_register_cmd(serial_session, + SHELL_CMD_VM_CONSOLE, + SHELL_CMD_VM_CONSOLE_PARAM, + SHELL_CMD_VM_CONSOLE_HELP, + shell_to_sos_console); + + if (status != 0) { + pr_err("Error: Command \"%s\" registration failed.", + SHELL_CMD_VM_CONSOLE); + } + + status = shell_register_cmd(serial_session, + SHELL_CMD_INTERRUPT, + SHELL_CMD_INTERRUPT_PARAM, + SHELL_CMD_INTERRUPT_HELP, + shell_show_cpu_int); + + if (status != 0) { + pr_err("Error: Command \"%s\" registration failed.", + SHELL_CMD_INTERRUPT); + } + + status = shell_register_cmd(serial_session, + SHELL_CMD_PTDEV, + SHELL_CMD_PTDEV_PARAM, + SHELL_CMD_PTDEV_HELP, + shell_show_ptdev_info); + + if (status != 0) { + pr_err("Error: Command \"%s\" registration failed.", + SHELL_CMD_PTDEV); + } + + status = shell_register_cmd(serial_session, + SHELL_CMD_REQ, + SHELL_CMD_REQ_PARAM, + SHELL_CMD_REQ_HELP, + shell_show_req_info); + + if (status != 0) { + pr_err("Error: Command \"%s\" registration failed.", + SHELL_CMD_REQ); + } + + status = shell_register_cmd(serial_session, + SHELL_CMD_VIOAPIC, + SHELL_CMD_VIOAPIC_PARAM, + SHELL_CMD_VIOAPIC_HELP, + shell_show_vioapic_info); + + if (status != 0) { + pr_err("Error: Command \"%s\" registration failed.", + SHELL_CMD_VIOAPIC); + } + + status = shell_register_cmd(serial_session, + SHELL_CMD_IOAPIC, + SHELL_CMD_IOAPIC_PARAM, + SHELL_CMD_IOAPIC_HELP, + shell_show_ioapic_info); + if (status != 0) { + pr_err("Error: Command \"%s\" registration failed.", + SHELL_CMD_IOAPIC); + } + + status = shell_register_cmd(serial_session, + SHELL_CMD_VMEXIT, + SHELL_CMD_VMEXIT_PARAM, + SHELL_CMD_VMEXIT_HELP, + shell_show_vmexit_profile); + + if (status != 0) { + pr_err("Error: Command \"%s\" registration failed.", + SHELL_CMD_VMEXIT); + } + + status = shell_register_cmd(serial_session, + SHELL_CMD_LOGDUMP, + SHELL_CMD_LOGDUMP_PARAM, + SHELL_CMD_LOGDUMP_HELP, + shell_dump_logbuf); + + if (status != 0) { + pr_err("Error: Command \"%s\" registration failed.", + SHELL_CMD_LOGDUMP); + } + + status = shell_register_cmd(serial_session, + SHELL_CMD_GET_LOG_LVL, + SHELL_CMD_GET_LOG_LVL_PARAM, + SHELL_CMD_GET_LOG_LVL_HELP, + shell_get_loglevel); + + if (status != 0) { + pr_err("Error: Command \"%s\" registration failed.", + SHELL_CMD_GET_LOG_LVL); + } + + status = shell_register_cmd(serial_session, + SHELL_CMD_SET_LOG_LVL, + SHELL_CMD_SET_LOG_LVL_PARAM, + SHELL_CMD_SET_LOG_LVL_HELP, + shell_set_loglevel); + + if (status != 0) { + pr_err("Error: Command \"%s\" registration failed.", + SHELL_CMD_SET_LOG_LVL); + } + } + + return status; +} + +int shell_puts(struct shell *p_shell, char *str_ptr) +{ + int status; + + if ((p_shell != NULL) && (p_shell->session_io.io_puts != NULL) && + (str_ptr != NULL)) { + /* Transmit data using this shell session's 'puts' function */ + p_shell->session_io.io_puts(p_shell, str_ptr); + + status = 0; + } else { + /* Error: Invalid request */ + status = -EINVAL; + + } + + return status; +} + +int shell_set_name(struct shell *p_shell, char *name) +{ + int status; + + if ((p_shell != NULL) && (name != NULL)) { + strncpy_s((void *) p_shell->name, SHELL_NAME_MAX_LEN, + (void *) name, SHELL_NAME_MAX_LEN - 1); + + /* Ensure null terminated string */ + p_shell->name[SHELL_NAME_MAX_LEN - 1] = 0; + + status = 0; + } else { + status = -EINVAL; + } + + return status; +} + +void shell_kick_session(void) +{ + /* Kick the shell */ + kick_shell(serial_session); +} + +int shell_switch_console(void) +{ + struct vuart *vuart; + + vuart = vuart_console_active(); + if (vuart == NULL) + return -EINVAL; + + vuart->active = false; + /* Output that switching to ACRN shell */ + shell_puts(serial_session, + "\r\n\r\n----- Entering ACRN Shell -----\r\n"); + return 0; +} diff --git a/hypervisor/debug/uart16550.c b/hypervisor/debug/uart16550.c new file mode 100644 index 000000000..d693a6732 --- /dev/null +++ b/hypervisor/debug/uart16550.c @@ -0,0 +1,347 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include "uart16550.h" +#include "serial_internal.h" + +/* Mapping of 16c550 write-only registers to appropriate structure members */ +#define THR_IDX RBR_IDX +#define IIR_IDX FCR_IDX +#define DLL_IDX RBR_IDX +#define DLM_IDX IER_IDX + +#if defined(CONFIG_SERIAL_PIO_BASE) +static int serial_port_mapped = 1; +static int uart_enabled = 1; +#define UART_BASE_ADDRESS CONFIG_SERIAL_PIO_BASE +#elif defined(CONFIG_SERIAL_MMIO_BASE) +static int serial_port_mapped; +static int uart_enabled = 1; +#define UART_BASE_ADDRESS CONFIG_SERIAL_MMIO_BASE +#else +#warning "no uart base configure, please check!" +static int serial_port_mapped; +static int uart_enabled; +#define UART_BASE_ADDRESS 0 +#endif + +typedef uint32_t uart_reg_t; + +enum UART_REG_IDX{ + RBR_IDX, /* 0 */ + IER_IDX, /* 1 */ + FCR_IDX, /* 2 */ + LCR_IDX, /* 3 */ + MCR_IDX, /* 4 */ + ISR_IDX, /* 5 */ + MSR_IDX, /* 6 */ + SPR_IDX, /* 7 */ + MDR1_IDX, /* 8 */ + REG9_IDX, /* 9 */ + REGA_IDX, /* A */ + REGB_IDX, /* B */ + REGC_IDX, /* C */ + REGD_IDX, /* D */ + REGE_IDX, /* E */ + UASR_IDX, /* F */ + SCR_IDX, /* 10*/ + SSR_IDX, /* 11*/ + REG12_IDX, /* 12*/ + OSC_12M_SEL_IDX, /* 13*/ +}; + +/* CPU oscillator clock */ +#define CPU_OSC_CLOCK 1843200 /* 1.8432 MHz */ + +/* UART hardware definitions */ +#define UART_CLOCK_RATE CPU_OSC_CLOCK +#define UART_BUFFER_SIZE 2048 + +static inline uint32_t uart16550_read_reg(uint32_t base, uint32_t reg_idx) +{ + if (serial_port_mapped) { + return io_read_byte((ioport_t) + ((uint8_t *)(uint64_t)base + reg_idx)); + } else { + return mmio_read_long((mmio_addr_t) + ((uint32_t *)(uint64_t)base + reg_idx)); + } +} + +static inline void uart16550_write_reg(uint32_t base, + uint32_t val, uint32_t reg_idx) +{ + if (serial_port_mapped) { + io_write_byte(val, (ioport_t) + ((uint8_t *)(uint64_t)base + reg_idx)); + } else { + mmio_write_long(val, (mmio_addr_t) + ((uint32_t *)(uint64_t)base + reg_idx)); + } +} + +static void uart16550_enable(__unused struct tgt_uart *tgt_uart) +{ +} + +static int uart16550_calc_baud_div(__unused struct tgt_uart *tgt_uart, + uint32_t ref_freq, uint32_t *baud_div_ptr, uint32_t baud_rate) +{ + uint32_t baud_multiplier = baud_rate < BAUD_460800 ? 16 : 13; + + *baud_div_ptr = ref_freq / (baud_multiplier * baud_rate); + + return 0; +} + +static int uart16550_set_baud_rate(struct tgt_uart *tgt_uart, + uint32_t baud_rate) +{ + int status; + uint32_t baud_div, duart_clock = CPU_OSC_CLOCK; + uart_reg_t temp_reg; + + /* Calculate baud divisor */ + status = uart16550_calc_baud_div( + tgt_uart, duart_clock, &baud_div, baud_rate); + + if (status == 0) { + /* Enable DLL and DLM registers for setting the Divisor */ + temp_reg = uart16550_read_reg(tgt_uart->base_address, LCR_IDX); + temp_reg |= LCR_DLAB; + uart16550_write_reg(tgt_uart->base_address, temp_reg, LCR_IDX); + + /* Write the appropriate divisor value */ + uart16550_write_reg(tgt_uart->base_address, + ((baud_div >> 8) & 0xFF), DLM_IDX); + uart16550_write_reg(tgt_uart->base_address, + (baud_div & 0xFF), DLL_IDX); + + /* Disable DLL and DLM registers */ + temp_reg &= ~LCR_DLAB; + uart16550_write_reg(tgt_uart->base_address, temp_reg, LCR_IDX); + } + + return status; +} + +static int uart16550_init(struct tgt_uart *tgt_uart) +{ + int status = 0; + + if (!uart_enabled) { + /*uart will not be used */ + status = -ENODEV; + } else { + if (strcmp(tgt_uart->uart_id, "STDIO") == 0) { + atomic_set_int(&tgt_uart->open_count, 0); + } else { + /* set open count to 1 to prevent open */ + atomic_set_int(&tgt_uart->open_count, 1); + status = -EINVAL; + } + } + + return status; +} + +static int uart16550_open(struct tgt_uart *tgt_uart, + struct uart_config *config) +{ + uint32_t temp32; + int status = 0; + + if (strcmp(tgt_uart->uart_id, "STDIO") == 0) { + if (atomic_cmpxchg_int(&tgt_uart->open_count, 0, 1) != 0) + return -EBUSY; + + /* Call UART setup function */ + /* Enable TX and RX FIFOs */ + uart16550_write_reg(tgt_uart->base_address, + FCR_FIFOE | FCR_RFR | FCR_TFR, FCR_IDX); + + /* Set parity value */ + if (config->parity_bits == PARITY_ODD) { + /* Odd parity */ + temp32 = LCR_PARITY_ODD; + } else if (config->parity_bits == PARITY_EVEN) { + /* Even parity */ + temp32 = LCR_PARITY_EVEN; + } else { + /* No parity */ + temp32 = LCR_PARITY_NONE; + } + + /* Set Data length */ + if (config->data_bits == DATA_7) { + /* Set bits for 7 data bits */ + temp32 |= LCR_WL7; + } else { + /* Set bits for 8 data bits */ + temp32 |= LCR_WL8; + } + + /* Check for 1 stop bit */ + if (config->stop_bits == STOP_1) { + /* Set bits for 1 stop bit */ + temp32 |= LCR_NB_STOP_BITS_1; + } else { + /* Set bits for 2 stop bits */ + temp32 |= LCR_NB_STOP_BITS_2; + } + + /* Set-up data bits / parity / stop bits. */ + uart16550_write_reg(tgt_uart->base_address, + temp32, LCR_IDX); + + /* Disable interrupts (we use polling) */ + uart16550_write_reg(tgt_uart->base_address, + UART_IER_DISABLE_ALL, IER_IDX); + + /* Set baud rate */ + uart16550_set_baud_rate(tgt_uart, config->baud_rate); + + /* Data terminal ready + Request to send */ + uart16550_write_reg(tgt_uart->base_address, + MCR_RTS | MCR_DTR, MCR_IDX); + + /* Enable the UART hardware */ + uart16550_enable(tgt_uart); + } else { + status = -ENODEV; + } + + return status; +} + +static int uart16550_get_rx_err(uint32_t rx_data) +{ + int rx_status = SD_RX_NO_ERROR; + + /* Check for RX overrun error */ + if ((rx_data & LSR_OE)) + rx_status |= SD_RX_OVERRUN_ERROR; + + /* Check for RX parity error */ + if ((rx_data & LSR_PE)) + rx_status |= SD_RX_PARITY_ERROR; + + /* Check for RX frame error */ + if ((rx_data & LSR_FE)) + rx_status |= SD_RX_FRAME_ERROR; + + /* Return the rx status */ + return rx_status; +} + +static void uart16550_close(struct tgt_uart *tgt_uart) +{ + if (tgt_uart != NULL) { + if (atomic_cmpxchg_int(&tgt_uart->open_count, 1, 0) == 1) { + /* TODO: Add logic to disable the UART */ + } + } +} + +static void uart16550_read(struct tgt_uart *tgt_uart, void *buffer, + uint32_t *bytes_read) +{ + /* If a character has been received, read it */ + if ((uart16550_read_reg(tgt_uart->base_address, ISR_IDX) & LSR_DR) + == LSR_DR) { + /* Read a character */ + *(uint8_t *)buffer = + uart16550_read_reg(tgt_uart->base_address, RBR_IDX); + + /* Read 1 byte */ + *bytes_read = 1; + } else { + *bytes_read = 0; + } +} + +static void uart16550_write(struct tgt_uart *tgt_uart, + const void *buffer, uint32_t *bytes_written) +{ + /* Ensure there are no further Transmit buffer write requests */ + do { + } while (!(uart16550_read_reg(tgt_uart->base_address, + ISR_IDX) & LSR_THRE)); + + /* Transmit the character. */ + uart16550_write_reg(tgt_uart->base_address, + *(uint8_t *)buffer, THR_IDX); + + if (bytes_written != NULL) + *bytes_written = 1; +} + +static bool uart16550_tx_is_busy(struct tgt_uart *tgt_uart) +{ + return ((uart16550_read_reg(tgt_uart->base_address, ISR_IDX) & + (LSR_TEMT)) == 0) ? true : false; +} + +static bool uart16550_rx_data_is_avail(struct tgt_uart *tgt_uart, + uint32_t *lsr_reg) +{ + *(uart_reg_t *)lsr_reg = + uart16550_read_reg(tgt_uart->base_address, ISR_IDX); + return ((*(uart_reg_t *)lsr_reg & LSR_DR) == LSR_DR) ? true : false; +} + +struct tgt_uart Tgt_Uarts[SERIAL_MAX_DEVS] = { + { + .uart_id = "STDIO", + .base_address = UART_BASE_ADDRESS, + .clock_frequency = UART_CLOCK_RATE, + .buffer_size = UART_BUFFER_SIZE, + .init = uart16550_init, + .open = uart16550_open, + .close = uart16550_close, + .read = uart16550_read, + .write = uart16550_write, + .tx_is_busy = uart16550_tx_is_busy, + .rx_data_is_avail = uart16550_rx_data_is_avail, + .get_rx_err = uart16550_get_rx_err, + + } +}; + +void uart16550_set_property(int enabled, int port_mapped, uint64_t base_addr) +{ + uart_enabled = enabled; + serial_port_mapped = port_mapped; + Tgt_Uarts[0].base_address = (uint32_t) base_addr; +} diff --git a/hypervisor/debug/uart16550.h b/hypervisor/debug/uart16550.h new file mode 100644 index 000000000..9a910c3d8 --- /dev/null +++ b/hypervisor/debug/uart16550.h @@ -0,0 +1,130 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UART16550_H +#define UART16550_H + +/* Register / bit definitions for 16c550 uart */ +#define UART16550_RBR 0x00 +/*receive buffer register | base+00h, dlab=0b r*/ +#define UART16550_THR 0x00 +/*transmit holding register | base+00h, dlab=0b w*/ +#define UART16550_DLL 0x00 +/*divisor least significant byte | base+00h, dlab=1b rw*/ +#define UART16550_IER 0x01 +/*interrupt enable register | base+01h, dlab=0b rw*/ +#define UART16550_DLM 0x01 +/*divisor most significant byte | base+01h, dlab=1b rw*/ +#define UART16550_IIR 0x02 +/*interrupt identification register | base+02h, dlab=0b r*/ +#define UART16550_FCR 0x02 +/*fifo control register | base+02h, dlab=0b w*/ +#define UART16550_LCR 0x03 +/*line control register | base+03h, dlab=xb rw*/ +#define UART16550_MCR 0x04 +/*modem control register, only uart0 | base+04h, dlab=xb rw*/ +#define UART16550_LSR 0x05 +/*line status register | base+05h, dlab=xb r*/ +#define UART16550_MSR 0x06 +/*modem status register, only uart0 | base+06h, dlab=xb r*/ +#define UART16550_SCR 0x07 +/*scratch pad register | base+07h, dlab=xb rw*/ +#define UART16550_MDR1 0x08 +#define UARTML7213_BRCSR 0x0e +/*baud rate reference clock select register dlab xb*/ +#define UARTML7213_SRST 0x0f /*Soft Reset Register dlab xb*/ + +/* value definitions for IIR */ +#define IIR_FIFO_MASK 0xc0 /* set if FIFOs are enabled */ +#define IIR_RXTOUT 0x0c +#define IIR_RLS 0x06 +#define IIR_RXRDY 0x04 +#define IIR_TXRDY 0x02 +#define IIR_NOPEND 0x01 +#define IIR_MLSC 0x00 + +#define IER_EDSSI (0x0008) +/*enable/disable modem status interrupt*/ +#define IER_ELSI (0x0004) +/*enable/disable receive data error interrupt*/ +#define IER_ETBEI (0x0002) +/*enable/disable transmit data write request interrupt*/ +#define IER_ERBFI (0x0001) +/*enable/disable receive data read request interrupt*/ + +/* definition for LCR */ +#define LCR_DLAB (1 << 7) /*DLAB THR/RBR&IER or DLL&DLM= Bit 7*/ +#define LCR_SB (1 << 6) /*break control on/off= Bit 6*/ +#define LCR_SP (1 << 5) /*Specifies the operation of parity bit*/ +#define LCR_EPS (1 << 4) /*Specifies the logic of a parity bit*/ +#define LCR_PEN (1 << 3) /*Specifies whether to add a parity bit*/ +#define LCR_STB (1 << 2) /*stop bit length*/ +#define LCR_WL8 (0x03) /*number of bits of serial data*/ +#define LCR_WL7 (0x02) /*number of bits of serial data*/ +#define LCR_WL6 (0x01) /*number of bits of serial data*/ +#define LCR_WL5 (0x00) /*number of bits of serial data*/ +#define LCR_PARITY_ODD (LCR_PEN) +#define LCR_PARITY_NONE 0x0 +#define LCR_PARITY_EVEN (LCR_PEN | LCR_EPS) +#define LCR_NB_STOP_BITS_1 0x0 +#define LCR_NB_STOP_BITS_2 (LCR_STB) + +/* bit definitions for LSR */ +/* at least one error in data within fifo */ +#define LSR_ERR (1 << 7) +/* Transmit data Present */ +#define LSR_TEMT (1 << 6) +/* Transmit data write request present */ +#define LSR_THRE (1 << 5) +/* Break interrupt data Present */ +#define LSR_BI (1 << 4) +/* Framing Error Occurred */ +#define LSR_FE (1 << 3) +/* Parity Error Occurred */ +#define LSR_PE (1 << 2) +/* Overrun error */ +#define LSR_OE (1 << 1) +/* Readable received data is present */ +#define LSR_DR (1 << 0) + +/* definition for MCR */ +#define MCR_RTS (1 << 1) /* Request to Send */ +#define MCR_DTR (1 << 0) /* Data Terminal Ready */ + +/* definition for FCR */ +#define FCR_RX_MASK 0xc0 +#define FCR_DMA (1 << 3) +#define FCR_TFR (1 << 2) /* Reset Transmit Fifo */ +#define FCR_RFR (1 << 1) /* Reset Receive Fifo */ +#define FCR_FIFOE (1 << 0) /* Fifo Enable */ + +#define UART_IER_DISABLE_ALL 0x00000000 + +#endif /* !UART16550_H */ diff --git a/hypervisor/debug/vuart.c b/hypervisor/debug/vuart.c new file mode 100644 index 000000000..96464f727 --- /dev/null +++ b/hypervisor/debug/vuart.c @@ -0,0 +1,398 @@ +/*- + * Copyright (c) 2012 NetApp, Inc. + * Copyright (c) 2013 Neel Natu + * Copyright (c) 2018 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#include +#include +#include +#include + +#include "uart16550.h" +#include "serial_internal.h" + +#define COM1_BASE 0x3F8 +#define COM1_IRQ 4 +#define DEFAULT_RCLK 1843200 +#define DEFAULT_BAUD 9600 +#define RX_SIZE 256 +#define TX_SIZE 65536 + +#define vuart_lock_init(vu) spinlock_init(&((vu)->lock)) +#define vuart_lock(vu) spinlock_obtain(&((vu)->lock)) +#define vuart_unlock(vu) spinlock_release(&((vu)->lock)) + +#define vm_vuart(vm) (vm->vuart) + +static void fifo_reset(struct fifo *fifo) +{ + fifo->rindex = 0; + fifo->windex = 0; + fifo->num = 0; +} + +static void fifo_init(struct fifo *fifo, int sz) +{ + fifo->buf = calloc(1, sz); + ASSERT(fifo->buf != NULL, ""); + fifo->size = sz; + fifo_reset(fifo); +} + +static char fifo_putchar(struct fifo *fifo, char ch) +{ + fifo->buf[fifo->windex] = ch; + if (fifo->num < fifo->size) { + fifo->windex = (fifo->windex + 1) % fifo->size; + fifo->num++; + } else { + fifo->rindex = (fifo->rindex + 1) % fifo->size; + fifo->windex = (fifo->windex + 1) % fifo->size; + } + return 0; +} + +static char fifo_getchar(struct fifo *fifo) +{ + char c; + + if (fifo->num > 0) { + c = fifo->buf[fifo->rindex]; + fifo->rindex = (fifo->rindex + 1) % fifo->size; + fifo->num--; + return c; + } else + return -1; +} + +static int fifo_numchars(struct fifo *fifo) +{ + return fifo->num; +} + +/* + * The IIR returns a prioritized interrupt reason: + * - receive data available + * - transmit holding register empty + * + * Return an interrupt reason if one is available. + */ +static int uart_intr_reason(struct vuart *vu) +{ + if ((vu->lsr & LSR_OE) != 0 && (vu->ier & IER_ELSI) != 0) + return IIR_RLS; + else if (fifo_numchars(&vu->rxfifo) > 0 && (vu->ier & IER_ERBFI) != 0) + return IIR_RXTOUT; + else if (vu->thre_int_pending && (vu->ier & IER_ETBEI) != 0) + return IIR_TXRDY; + else + return IIR_NOPEND; +} + +static void uart_init(struct vuart *vu) +{ + uint16_t divisor; + + divisor = DEFAULT_RCLK / DEFAULT_BAUD / 16; + vu->dll = divisor; + vu->dlh = divisor >> 16; + + vu->active = false; + vu->base = COM1_BASE; + fifo_init(&vu->rxfifo, RX_SIZE); + fifo_init(&vu->txfifo, TX_SIZE); + vuart_lock_init(vu); +} + +/* + * Toggle the COM port's intr pin depending on whether or not we have an + * interrupt condition to report to the processor. + */ +static void uart_toggle_intr(struct vuart *vu) +{ + char intr_reason; + + intr_reason = uart_intr_reason(vu); + + if (intr_reason != IIR_NOPEND) { + if (vu->vm->vpic) + vpic_assert_irq(vu->vm, COM1_IRQ); + + vioapic_assert_irq(vu->vm, COM1_IRQ); + if (vu->vm->vpic) + vpic_deassert_irq(vu->vm, COM1_IRQ); + + vioapic_deassert_irq(vu->vm, COM1_IRQ); + } +} + +static void uart_write(__unused struct vm_io_handler *hdlr, + struct vm *vm, ioport_t offset, + __unused size_t width, uint32_t value) +{ + struct vuart *vu = vm_vuart(vm); + offset -= vu->base; + vuart_lock(vu); + /* + * Take care of the special case DLAB accesses first + */ + if ((vu->lcr & LCR_DLAB) != 0) { + if (offset == UART16550_DLL) { + vu->dll = value; + goto done; + } + + if (offset == UART16550_DLM) { + vu->dlh = value; + goto done; + } + } + + switch (offset) { + case UART16550_THR: + fifo_putchar(&vu->txfifo, value); + vu->thre_int_pending = true; + break; + case UART16550_IER: + /* + * Apply mask so that bits 4-7 are 0 + * Also enables bits 0-3 only if they're 1 + */ + vu->ier = value & 0x0F; + break; + case UART16550_FCR: + /* + * The FCR_ENABLE bit must be '1' for the programming + * of other FCR bits to be effective. + */ + if ((value & FCR_FIFOE) == 0) { + vu->fcr = 0; + } else { + if ((value & FCR_RFR) != 0) + fifo_reset(&vu->rxfifo); + + vu->fcr = value & + (FCR_FIFOE | FCR_DMA | FCR_RX_MASK); + } + break; + case UART16550_LCR: + vu->lcr = value; + break; + case UART16550_MCR: + /* ignore modem */ + break; + case UART16550_LSR: + /* + * Line status register is not meant to be written to + * during normal operation. + */ + break; + case UART16550_MSR: + /* + * As far as I can tell MSR is a read-only register. + */ + break; + case UART16550_SCR: + vu->scr = value; + break; + default: + break; + } + +done: + uart_toggle_intr(vu); + vuart_unlock(vu); +} + +static uint32_t uart_read(__unused struct vm_io_handler *hdlr, + struct vm *vm, ioport_t offset, + __unused size_t width) +{ + char iir, intr_reason, reg; + struct vuart *vu = vm_vuart(vm); + offset -= vu->base; + vuart_lock(vu); + /* + * Take care of the special case DLAB accesses first + */ + if ((vu->lcr & LCR_DLAB) != 0) { + if (offset == UART16550_DLL) { + reg = vu->dll; + goto done; + } + + if (offset == UART16550_DLM) { + reg = vu->dlh; + goto done; + } + } + switch (offset) { + case UART16550_RBR: + vu->lsr &= ~LSR_OE; + reg = fifo_getchar(&vu->rxfifo); + break; + case UART16550_IER: + reg = vu->ier; + break; + case UART16550_IIR: + iir = (vu->fcr & FCR_FIFOE) ? IIR_FIFO_MASK : 0; + intr_reason = uart_intr_reason(vu); + /* + * Deal with side effects of reading the IIR register + */ + if (intr_reason == IIR_TXRDY) + vu->thre_int_pending = false; + iir |= intr_reason; + reg = iir; + break; + case UART16550_LCR: + reg = vu->lcr; + break; + case UART16550_MCR: + reg = vu->mcr; + break; + case UART16550_LSR: + /* Transmitter is always ready for more data */ + vu->lsr |= LSR_TEMT | LSR_THRE; + /* Check for new receive data */ + if (fifo_numchars(&vu->rxfifo) > 0) + vu->lsr |= LSR_DR; + else + vu->lsr &= ~LSR_DR; + reg = vu->lsr; + /* The LSR_OE bit is cleared on LSR read */ + vu->lsr &= ~LSR_OE; + break; + case UART16550_MSR: + /* ignore modem I*/ + reg = 0; + break; + case UART16550_SCR: + reg = vu->scr; + break; + default: + reg = 0xFF; + break; + } +done: + uart_toggle_intr(vu); + vuart_unlock(vu); + return reg; +} + +void vuart_register_io_handler(struct vm *vm) +{ + struct vm_io_range range = { + .flags = IO_ATTR_RW, + .base = 0x3f8, + .len = 8 + }; + + register_io_emulation_handler(vm, &range, uart_read, uart_write); +} + +void vuart_console_tx_chars(void) +{ + struct vuart *vu; + + vu = vuart_console_active(); + if (vu == NULL) + return; + + vuart_lock(vu); + while (fifo_numchars(&vu->txfifo) > 0) + printf("%c", fifo_getchar(&vu->txfifo)); + vuart_unlock(vu); +} + +void vuart_console_rx_chars(uint32_t serial_handle) +{ + struct vuart *vu; + uint32_t vbuf_len; + char buffer[100]; + uint32_t buf_idx = 0; + + if (serial_handle == SERIAL_INVALID_HANDLE) { + pr_err("%s: invalid serial handle 0x%llx\n", + __func__, serial_handle); + return; + } + + vu = vuart_console_active(); + if (vu == NULL) + return; + + vuart_lock(vu); + /* Get data from serial */ + vbuf_len = serial_gets(serial_handle, buffer, 100); + if (vbuf_len) { + while (buf_idx < vbuf_len) { + if (buffer[buf_idx] == GUEST_CONSOLE_TO_HV_SWITCH_KEY) { + /* Switch the console */ + shell_switch_console(); + break; + } + buf_idx++; + } + if (vu->active != false) { + buf_idx = 0; + while (buf_idx < vbuf_len) + fifo_putchar(&vu->rxfifo, buffer[buf_idx++]); + + uart_toggle_intr(vu); + } + } + vuart_unlock(vu); +} + +struct vuart *vuart_console_active(void) +{ + struct vm *vm = get_vm_from_vmid(0); + + if (vm && vm->vuart) { + struct vuart *vu = vm->vuart; + + if (vu->active) + return vm->vuart; + } + return NULL; +} + +void *vuart_init(struct vm *vm) +{ + struct vuart *vu; + + vu = calloc(1, sizeof(struct vuart)); + ASSERT(vu != NULL, ""); + uart_init(vu); + vu->vm = vm; + vuart_register_io_handler(vm); + + return vu; +} diff --git a/hypervisor/include/arch/x86/apicreg.h b/hypervisor/include/arch/x86/apicreg.h new file mode 100644 index 000000000..7a3216e5e --- /dev/null +++ b/hypervisor/include/arch/x86/apicreg.h @@ -0,0 +1,524 @@ +/*- + * Copyright (c) 1996, by Peter Wemm and Steve Passe + * Copyright (c) 2017 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the developer may NOT be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _APICREG_H_ +#define _APICREG_H_ + +/* + * Local && I/O APIC definitions. + */ + +/* + * Pentium P54C+ Built-in APIC + * (Advanced programmable Interrupt Controller) + * + * Base Address of Built-in APIC in memory location + * is 0xfee00000. + * + * Map of APIC Registers: + * + * Offset (hex) Description Read/Write state + * 000 Reserved + * 010 Reserved + * 020 ID Local APIC ID R/W + * 030 VER Local APIC Version R + * 040 Reserved + * 050 Reserved + * 060 Reserved + * 070 Reserved + * 080 Task Priority Register R/W + * 090 Arbitration Priority Register R + * 0A0 Processor Priority Register R + * 0B0 EOI Register W + * 0C0 RRR Remote read R + * 0D0 Logical Destination R/W + * 0E0 Destination Format Register 0..27 R; 28..31 R/W + * 0F0 SVR Spurious Interrupt Vector Reg. 0..3 R; 4..9 R/W + * 100 ISR 000-031 R + * 110 ISR 032-063 R + * 120 ISR 064-095 R + * 130 ISR 095-128 R + * 140 ISR 128-159 R + * 150 ISR 160-191 R + * 160 ISR 192-223 R + * 170 ISR 224-255 R + * 180 TMR 000-031 R + * 190 TMR 032-063 R + * 1A0 TMR 064-095 R + * 1B0 TMR 095-128 R + * 1C0 TMR 128-159 R + * 1D0 TMR 160-191 R + * 1E0 TMR 192-223 R + * 1F0 TMR 224-255 R + * 200 IRR 000-031 R + * 210 IRR 032-063 R + * 220 IRR 064-095 R + * 230 IRR 095-128 R + * 240 IRR 128-159 R + * 250 IRR 160-191 R + * 260 IRR 192-223 R + * 270 IRR 224-255 R + * 280 Error Status Register R + * 290 Reserved + * 2A0 Reserved + * 2B0 Reserved + * 2C0 Reserved + * 2D0 Reserved + * 2E0 Reserved + * 2F0 Local Vector Table (CMCI) R/W + * 300 ICR_LOW Interrupt Command Reg. (0-31) R/W + * 310 ICR_HI Interrupt Command Reg. (32-63) R/W + * 320 Local Vector Table (Timer) R/W + * 330 Local Vector Table (Thermal) R/W (PIV+) + * 340 Local Vector Table (Performance) R/W (P6+) + * 350 LVT1 Local Vector Table (LINT0) R/W + * 360 LVT2 Local Vector Table (LINT1) R/W + * 370 LVT3 Local Vector Table (ERROR) R/W + * 380 Initial Count Reg. for Timer R/W + * 390 Current Count of Timer R + * 3A0 Reserved + * 3B0 Reserved + * 3C0 Reserved + * 3D0 Reserved + * 3E0 Timer Divide Configuration Reg. R/W + * 3F0 Reserved + */ + + +/****************************************************************************** + * global defines, etc. + */ + + +/****************************************************************************** + * LOCAL APIC structure + */ + +#ifndef LOCORE + +#define PAD3 int: 32; int: 32; int: 32 +#define PAD4 int: 32; int: 32; int: 32; int: 32 + +struct lapic_reg { + uint32_t val; PAD3; +}; + +struct lapic { + /* reserved */ PAD4; + /* reserved */ PAD4; + uint32_t id; PAD3; + uint32_t version; PAD3; + /* reserved */ PAD4; + /* reserved */ PAD4; + /* reserved */ PAD4; + /* reserved */ PAD4; + uint32_t tpr; PAD3; + uint32_t apr; PAD3; + uint32_t ppr; PAD3; + uint32_t eoi; PAD3; + /* reserved */ PAD4; + uint32_t ldr; PAD3; + uint32_t dfr; PAD3; + uint32_t svr; PAD3; + struct lapic_reg isr[8]; + struct lapic_reg tmr[8]; + struct lapic_reg irr[8]; + uint32_t esr; PAD3; + /* reserved */ PAD4; + /* reserved */ PAD4; + /* reserved */ PAD4; + /* reserved */ PAD4; + /* reserved */ PAD4; + /* reserved */ PAD4; + uint32_t lvt_cmci; PAD3; + uint32_t icr_lo; PAD3; + uint32_t icr_hi; PAD3; + uint32_t lvt_timer; PAD3; + uint32_t lvt_thermal; PAD3; + uint32_t lvt_pcint; PAD3; + uint32_t lvt_lint0; PAD3; + uint32_t lvt_lint1; PAD3; + uint32_t lvt_error; PAD3; + uint32_t icr_timer; PAD3; + uint32_t ccr_timer; PAD3; + /* reserved */ PAD4; + /* reserved */ PAD4; + /* reserved */ PAD4; + /* reserved */ PAD4; + uint32_t dcr_timer; PAD3; + /* reserved */ PAD4; +}; + +enum LAPIC_REGISTERS { + LAPIC_ID = 0x2, + LAPIC_VERSION = 0x3, + LAPIC_TPR = 0x8, + LAPIC_APR = 0x9, + LAPIC_PPR = 0xa, + LAPIC_EOI = 0xb, + LAPIC_LDR = 0xd, + LAPIC_DFR = 0xe, /* Not in x2APIC */ + LAPIC_SVR = 0xf, + LAPIC_ISR0 = 0x10, + LAPIC_ISR1 = 0x11, + LAPIC_ISR2 = 0x12, + LAPIC_ISR3 = 0x13, + LAPIC_ISR4 = 0x14, + LAPIC_ISR5 = 0x15, + LAPIC_ISR6 = 0x16, + LAPIC_ISR7 = 0x17, + LAPIC_TMR0 = 0x18, + LAPIC_TMR1 = 0x19, + LAPIC_TMR2 = 0x1a, + LAPIC_TMR3 = 0x1b, + LAPIC_TMR4 = 0x1c, + LAPIC_TMR5 = 0x1d, + LAPIC_TMR6 = 0x1e, + LAPIC_TMR7 = 0x1f, + LAPIC_IRR0 = 0x20, + LAPIC_IRR1 = 0x21, + LAPIC_IRR2 = 0x22, + LAPIC_IRR3 = 0x23, + LAPIC_IRR4 = 0x24, + LAPIC_IRR5 = 0x25, + LAPIC_IRR6 = 0x26, + LAPIC_IRR7 = 0x27, + LAPIC_ESR = 0x28, + LAPIC_LVT_CMCI = 0x2f, + LAPIC_ICR_LO = 0x30, + LAPIC_ICR_HI = 0x31, /* Not in x2APIC */ + LAPIC_LVT_TIMER = 0x32, + LAPIC_LVT_THERMAL = 0x33, + LAPIC_LVT_PCINT = 0x34, + LAPIC_LVT_LINT0 = 0x35, + LAPIC_LVT_LINT1 = 0x36, + LAPIC_LVT_ERROR = 0x37, + LAPIC_ICR_TIMER = 0x38, + LAPIC_CCR_TIMER = 0x39, + LAPIC_DCR_TIMER = 0x3e, + LAPIC_SELF_IPI = 0x3f, /* Only in x2APIC */ + LAPIC_EXT_FEATURES = 0x40, /* AMD */ + LAPIC_EXT_CTRL = 0x41, /* AMD */ + LAPIC_EXT_SEOI = 0x42, /* AMD */ + LAPIC_EXT_IER0 = 0x48, /* AMD */ + LAPIC_EXT_IER1 = 0x49, /* AMD */ + LAPIC_EXT_IER2 = 0x4a, /* AMD */ + LAPIC_EXT_IER3 = 0x4b, /* AMD */ + LAPIC_EXT_IER4 = 0x4c, /* AMD */ + LAPIC_EXT_IER5 = 0x4d, /* AMD */ + LAPIC_EXT_IER6 = 0x4e, /* AMD */ + LAPIC_EXT_IER7 = 0x4f, /* AMD */ + LAPIC_EXT_LVT0 = 0x50, /* AMD */ + LAPIC_EXT_LVT1 = 0x51, /* AMD */ + LAPIC_EXT_LVT2 = 0x52, /* AMD */ + LAPIC_EXT_LVT3 = 0x53, /* AMD */ +}; + +#define LAPIC_MEM_MUL 0x10 + +/* + * Although some registers are available on AMD processors only, + * it's not a big waste to reserve them on all platforms. + * However, we need to watch out for this space being assigned for + * non-APIC purposes in the future processor models. + */ +#define LAPIC_MEM_REGION ((LAPIC_EXT_LVT3 + 1) * LAPIC_MEM_MUL) + +/****************************************************************************** + * I/O APIC structure + */ + +struct ioapic { + uint32_t ioregsel; PAD3; + uint32_t iowin; PAD3; +}; + +#undef PAD4 +#undef PAD3 + +#endif /* !LOCORE */ + + +/****************************************************************************** + * various code 'logical' values + */ + +/****************************************************************************** + * LOCAL APIC defines + */ + +/* default physical locations of LOCAL (CPU) APICs */ +#define DEFAULT_APIC_BASE 0xfee00000 + +/* constants relating to APIC ID registers */ +#define APIC_ID_MASK 0xff000000 +#define APIC_ID_SHIFT 24 +#define APIC_ID_CLUSTER 0xf0 +#define APIC_ID_CLUSTER_ID 0x0f +#define APIC_MAX_CLUSTER 0xe +#define APIC_MAX_INTRACLUSTER_ID 3 +#define APIC_ID_CLUSTER_SHIFT 4 + +/* fields in VER */ +#define APIC_VER_VERSION 0x000000ff +#define APIC_VER_MAXLVT 0x00ff0000 +#define MAXLVTSHIFT 16 +#define APIC_VER_EOI_SUPPRESSION 0x01000000 +#define APIC_VER_AMD_EXT_SPACE 0x80000000 + +/* fields in LDR */ +#define APIC_LDR_RESERVED 0x00ffffff + +/* fields in DFR */ +#define APIC_DFR_RESERVED 0x0fffffff +#define APIC_DFR_MODEL_MASK 0xf0000000 +#define APIC_DFR_MODEL_FLAT 0xf0000000 +#define APIC_DFR_MODEL_CLUSTER 0x00000000 + +/* fields in SVR */ +#define APIC_SVR_VECTOR 0x000000ff +#define APIC_SVR_VEC_PROG 0x000000f0 +#define APIC_SVR_VEC_FIX 0x0000000f +#define APIC_SVR_ENABLE 0x00000100 +#define APIC_SVR_SWDIS 0x00000000 +#define APIC_SVR_SWEN 0x00000100 +#define APIC_SVR_FOCUS 0x00000200 +#define APIC_SVR_FEN 0x00000000 +#define APIC_SVR_FDIS 0x00000200 +#define APIC_SVR_EOI_SUPPRESSION 0x00001000 + +/* fields in TPR */ +#define APIC_TPR_PRIO 0x000000ff +#define APIC_TPR_INT 0x000000f0 +#define APIC_TPR_SUB 0x0000000f + +/* fields in ESR */ +#define APIC_ESR_SEND_CS_ERROR 0x00000001 +#define APIC_ESR_RECEIVE_CS_ERROR 0x00000002 +#define APIC_ESR_SEND_ACCEPT 0x00000004 +#define APIC_ESR_RECEIVE_ACCEPT 0x00000008 +#define APIC_ESR_SEND_ILLEGAL_VECTOR 0x00000020 +#define APIC_ESR_RECEIVE_ILLEGAL_VECTOR 0x00000040 +#define APIC_ESR_ILLEGAL_REGISTER 0x00000080 + +/* fields in ICR_LOW */ +#define APIC_VECTOR_MASK 0x000000ff + +#define APIC_DELMODE_MASK 0x00000700 +#define APIC_DELMODE_FIXED 0x00000000 +#define APIC_DELMODE_LOWPRIO 0x00000100 +#define APIC_DELMODE_SMI 0x00000200 +#define APIC_DELMODE_RR 0x00000300 +#define APIC_DELMODE_NMI 0x00000400 +#define APIC_DELMODE_INIT 0x00000500 +#define APIC_DELMODE_STARTUP 0x00000600 +#define APIC_DELMODE_RESV 0x00000700 + +#define APIC_DESTMODE_MASK 0x00000800 +#define APIC_DESTMODE_PHY 0x00000000 +#define APIC_DESTMODE_LOG 0x00000800 + +#define APIC_DELSTAT_MASK 0x00001000 +#define APIC_DELSTAT_IDLE 0x00000000 +#define APIC_DELSTAT_PEND 0x00001000 + +#define APIC_RESV1_MASK 0x00002000 + +#define APIC_LEVEL_MASK 0x00004000 +#define APIC_LEVEL_DEASSERT 0x00000000 +#define APIC_LEVEL_ASSERT 0x00004000 + +#define APIC_TRIGMOD_MASK 0x00008000 +#define APIC_TRIGMOD_EDGE 0x00000000 +#define APIC_TRIGMOD_LEVEL 0x00008000 + +#define APIC_RRSTAT_MASK 0x00030000 +#define APIC_RRSTAT_INVALID 0x00000000 +#define APIC_RRSTAT_INPROG 0x00010000 +#define APIC_RRSTAT_VALID 0x00020000 +#define APIC_RRSTAT_RESV 0x00030000 + +#define APIC_DEST_MASK 0x000c0000 +#define APIC_DEST_DESTFLD 0x00000000 +#define APIC_DEST_SELF 0x00040000 +#define APIC_DEST_ALLISELF 0x00080000 +#define APIC_DEST_ALLESELF 0x000c0000 + +#define APIC_RESV2_MASK 0xfff00000 + +#define APIC_ICRLO_RESV_MASK (APIC_RESV1_MASK | APIC_RESV2_MASK) + +/* fields in LVT1/2 */ +#define APIC_LVT_VECTOR 0x000000ff +#define APIC_LVT_DM 0x00000700 +#define APIC_LVT_DM_FIXED 0x00000000 +#define APIC_LVT_DM_SMI 0x00000200 +#define APIC_LVT_DM_NMI 0x00000400 +#define APIC_LVT_DM_INIT 0x00000500 +#define APIC_LVT_DM_EXTINT 0x00000700 +#define APIC_LVT_DS 0x00001000 +#define APIC_LVT_IIPP 0x00002000 +#define APIC_LVT_IIPP_INTALO 0x00002000 +#define APIC_LVT_IIPP_INTAHI 0x00000000 +#define APIC_LVT_RIRR 0x00004000 +#define APIC_LVT_TM 0x00008000 +#define APIC_LVT_M 0x00010000 + + +/* fields in LVT Timer */ +#define APIC_LVTT_VECTOR 0x000000ff +#define APIC_LVTT_DS 0x00001000 +#define APIC_LVTT_M 0x00010000 +#define APIC_LVTT_TM 0x00060000 +#define APIC_LVTT_TM_ONE_SHOT 0x00000000 +#define APIC_LVTT_TM_PERIODIC 0x00020000 +#define APIC_LVTT_TM_TSCDLT 0x00040000 +#define APIC_LVTT_TM_RSRV 0x00060000 + +/* APIC timer current count */ +#define APIC_TIMER_MAX_COUNT 0xffffffff + +/* fields in TDCR */ +#define APIC_TDCR_2 0x00 +#define APIC_TDCR_4 0x01 +#define APIC_TDCR_8 0x02 +#define APIC_TDCR_16 0x03 +#define APIC_TDCR_32 0x08 +#define APIC_TDCR_64 0x09 +#define APIC_TDCR_128 0x0a +#define APIC_TDCR_1 0x0b + +/* Constants related to AMD Extended APIC Features Register */ +#define APIC_EXTF_ELVT_MASK 0x00ff0000 +#define APIC_EXTF_ELVT_SHIFT 16 +#define APIC_EXTF_EXTID_CAP 0x00000004 +#define APIC_EXTF_SEIO_CAP 0x00000002 +#define APIC_EXTF_IER_CAP 0x00000001 + +/* LVT table indices */ +#define APIC_LVT_LINT0 0 +#define APIC_LVT_LINT1 1 +#define APIC_LVT_TIMER 2 +#define APIC_LVT_ERROR 3 +#define APIC_LVT_PMC 4 +#define APIC_LVT_THERMAL 5 +#define APIC_LVT_CMCI 6 +#define APIC_LVT_MAX APIC_LVT_CMCI + +/* AMD extended LVT constants, seem to be assigned by fiat */ +#define APIC_ELVT_IBS 0 /* Instruction based sampling */ +#define APIC_ELVT_MCA 1 /* MCE thresholding */ +#define APIC_ELVT_DEI 2 /* Deferred error interrupt */ +#define APIC_ELVT_SBI 3 /* Sideband interface */ +#define APIC_ELVT_MAX APIC_ELVT_SBI + +/****************************************************************************** + * I/O APIC defines + */ + +/* default physical locations of an IO APIC */ +#define DEFAULT_IO_APIC_BASE 0xfec00000 + +/* window register offset */ +#define IOAPIC_WINDOW 0x10 +#define IOAPIC_EOIR 0x40 + +/* indexes into IO APIC */ +#define IOAPIC_ID 0x00 +#define IOAPIC_VER 0x01 +#define IOAPIC_ARB 0x02 +#define IOAPIC_REDTBL 0x10 +#define IOAPIC_REDTBL0 IOAPIC_REDTBL +#define IOAPIC_REDTBL1 (IOAPIC_REDTBL+0x02) +#define IOAPIC_REDTBL2 (IOAPIC_REDTBL+0x04) +#define IOAPIC_REDTBL3 (IOAPIC_REDTBL+0x06) +#define IOAPIC_REDTBL4 (IOAPIC_REDTBL+0x08) +#define IOAPIC_REDTBL5 (IOAPIC_REDTBL+0x0a) +#define IOAPIC_REDTBL6 (IOAPIC_REDTBL+0x0c) +#define IOAPIC_REDTBL7 (IOAPIC_REDTBL+0x0e) +#define IOAPIC_REDTBL8 (IOAPIC_REDTBL+0x10) +#define IOAPIC_REDTBL9 (IOAPIC_REDTBL+0x12) +#define IOAPIC_REDTBL10 (IOAPIC_REDTBL+0x14) +#define IOAPIC_REDTBL11 (IOAPIC_REDTBL+0x16) +#define IOAPIC_REDTBL12 (IOAPIC_REDTBL+0x18) +#define IOAPIC_REDTBL13 (IOAPIC_REDTBL+0x1a) +#define IOAPIC_REDTBL14 (IOAPIC_REDTBL+0x1c) +#define IOAPIC_REDTBL15 (IOAPIC_REDTBL+0x1e) +#define IOAPIC_REDTBL16 (IOAPIC_REDTBL+0x20) +#define IOAPIC_REDTBL17 (IOAPIC_REDTBL+0x22) +#define IOAPIC_REDTBL18 (IOAPIC_REDTBL+0x24) +#define IOAPIC_REDTBL19 (IOAPIC_REDTBL+0x26) +#define IOAPIC_REDTBL20 (IOAPIC_REDTBL+0x28) +#define IOAPIC_REDTBL21 (IOAPIC_REDTBL+0x2a) +#define IOAPIC_REDTBL22 (IOAPIC_REDTBL+0x2c) +#define IOAPIC_REDTBL23 (IOAPIC_REDTBL+0x2e) + +/* fields in VER, for redirection entry */ +#define IOAPIC_MAX_RTE_MASK 0x00ff0000 +#define MAX_RTE_SHIFT 16 + +/* + * fields in the IO APIC's redirection table entries + */ +#define IOAPIC_RTE_DEST APIC_ID_MASK /* broadcast addr: all APICs */ + +#define IOAPIC_RTE_RESV 0x00fe0000 /* reserved */ + +#define IOAPIC_RTE_INTMASK 0x00010000 /* R/W: INTerrupt mask */ +#define IOAPIC_RTE_INTMCLR 0x00000000 /* clear, allow INTs */ +#define IOAPIC_RTE_INTMSET 0x00010000 /* set, inhibit INTs */ + +#define IOAPIC_RTE_TRGRMOD 0x00008000 /* R/W: trigger mode */ +#define IOAPIC_RTE_TRGREDG 0x00000000 /* edge */ +#define IOAPIC_RTE_TRGRLVL 0x00008000 /* level */ + +#define IOAPIC_RTE_REM_IRR 0x00004000 /* RO: remote IRR */ + +#define IOAPIC_RTE_INTPOL 0x00002000 /*R/W:INT input pin polarity*/ +#define IOAPIC_RTE_INTAHI 0x00000000 /* active high */ +#define IOAPIC_RTE_INTALO 0x00002000 /* active low */ + +#define IOAPIC_RTE_DELIVS 0x00001000 /* RO: delivery status */ + +#define IOAPIC_RTE_DESTMOD 0x00000800 /*R/W:destination mode*/ +#define IOAPIC_RTE_DESTPHY 0x00000000 /* physical */ +#define IOAPIC_RTE_DESTLOG 0x00000800 /* logical */ + +#define IOAPIC_RTE_DELMOD 0x00000700 /* R/W: delivery mode */ +#define IOAPIC_RTE_DELFIXED 0x00000000 /* fixed */ +#define IOAPIC_RTE_DELLOPRI 0x00000100 /* lowest priority */ +#define IOAPIC_RTE_DELSMI 0x00000200 /*System Management INT*/ +#define IOAPIC_RTE_DELRSV1 0x00000300 /* reserved */ +#define IOAPIC_RTE_DELNMI 0x00000400 /* NMI signal */ +#define IOAPIC_RTE_DELINIT 0x00000500 /* INIT signal */ +#define IOAPIC_RTE_DELRSV2 0x00000600 /* reserved */ +#define IOAPIC_RTE_DELEXINT 0x00000700 /* External INTerrupt */ + +#define IOAPIC_RTE_INTVEC 0x000000ff /*R/W: INT vector field*/ + +#endif /* _APICREG_H_ */ diff --git a/hypervisor/include/arch/x86/assign.h b/hypervisor/include/arch/x86/assign.h new file mode 100644 index 000000000..2d554c19e --- /dev/null +++ b/hypervisor/include/arch/x86/assign.h @@ -0,0 +1,99 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ASSIGN_H +#define ASSIGN_H + +enum ptdev_intr_type { + PTDEV_INTR_MSI, + PTDEV_INTR_INTX +}; + +enum ptdev_vpin_source { + PTDEV_VPIN_IOAPIC, + PTDEV_VPIN_PIC, +}; + +/* entry per guest virt vector */ +struct ptdev_msi_info { + uint32_t vmsi_addr; /* virt msi_addr */ + uint32_t vmsi_data; /* virt msi_data */ + uint16_t vmsi_ctl; /* virt msi_ctl */ + uint32_t pmsi_addr; /* phys msi_addr */ + uint32_t pmsi_data; /* phys msi_data */ + int msix; /* 0-MSI, 1-MSIX */ + int msix_entry_index; /* MSI: 0, MSIX: index of vector table*/ + int virt_vector; + int phys_vector; +}; + +/* entry per guest vioapic pin */ +struct ptdev_intx_info { + enum ptdev_vpin_source vpin_src; + uint8_t virt_pin; + uint8_t phys_pin; +}; + +/* entry per each allocated irq/vector */ +struct ptdev_remapping_info { + struct vm *vm; + uint16_t virt_bdf; /* PCI bus:slot.func*/ + uint16_t phys_bdf; /* PCI bus:slot.func*/ + uint32_t active; /* 1=active, 0=inactive and to free*/ + enum ptdev_intr_type type; + struct dev_handler_node *node; + struct list_head softirq_node; + struct list_head entry_node; + + union { + struct ptdev_msi_info msi; + struct ptdev_intx_info intx; + }; +}; + +void ptdev_intx_ack(struct vm *vm, int virt_pin, + enum ptdev_vpin_source vpin_src); +int ptdev_msix_remap(struct vm *vm, uint16_t virt_bdf, + struct ptdev_msi_info *info); +int ptdev_intx_pin_remap(struct vm *vm, struct ptdev_intx_info *info); +void ptdev_softirq(int cpu); +void ptdev_init(void); +void ptdev_vm_init(struct vm *vm); +void ptdev_vm_deinit(struct vm *vm); +void ptdev_add_intx_remapping(struct vm *vm, uint16_t virt_bdf, + uint16_t phys_bdf, uint8_t virt_pin, uint8_t phys_pin, bool pic_pin); +void ptdev_remove_intx_remapping(struct vm *vm, uint8_t virt_pin, bool pic_pin); +void ptdev_add_msix_remapping(struct vm *vm, uint16_t virt_bdf, + uint16_t phys_bdf, int vector_count); +void ptdev_remove_msix_remapping(struct vm *vm, uint16_t virt_bdf, + int vector_count); +int get_ptdev_info(char *str, int str_max); + +#endif /* ASSIGN_H */ diff --git a/hypervisor/include/arch/x86/cpu.h b/hypervisor/include/arch/x86/cpu.h new file mode 100644 index 000000000..066512fdb --- /dev/null +++ b/hypervisor/include/arch/x86/cpu.h @@ -0,0 +1,412 @@ +/*- + * Copyright (c) 1989, 1990 William F. Jolitz + * Copyright (c) 1990 The Regents of the University of California. + * Copyright (c) 2017 Intel Corporation + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * William Jolitz. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)segments.h 7.1 (Berkeley) 5/9/91 + * $FreeBSD$ + */ + +#ifndef CPU_H +#define CPU_H + +/* Define page size */ +#define CPU_PAGE_SHIFT 12 +#define CPU_PAGE_SIZE 0x1000 + +/* Define CPU stack alignment */ +#define CPU_STACK_ALIGN 16 + +/* CR0 register definitions */ +#define CR0_PG (1<<31) /* paging enable */ +#define CR0_CD (1<<30) /* cache disable */ +#define CR0_NW (1<<29) /* not write through */ +#define CR0_AM (1<<18) /* alignment mask */ +#define CR0_WP (1<<16) /* write protect */ +#define CR0_NE (1<<5) /* numeric error */ +#define CR0_ET (1<<4) /* extension type */ +#define CR0_TS (1<<3) /* task switched */ +#define CR0_EM (1<<2) /* emulation */ +#define CR0_MP (1<<1) /* monitor coprocessor */ +#define CR0_PE (1<<0) /* protected mode enabled */ + +/* CR3 register definitions */ +#define CR3_PWT (1<<3) /* page-level write through */ +#define CR3_PCD (1<<4) /* page-level cache disable */ + +/* CR4 register definitions */ +#define CR4_VME (1<<0) /* virtual 8086 mode extensions */ +#define CR4_PVI (1<<1) /* protected mode virtual interrupts */ +#define CR4_TSD (1<<2) /* time stamp disable */ +#define CR4_DE (1<<3) /* debugging extensions */ +#define CR4_PSE (1<<4) /* page size extensions */ +#define CR4_PAE (1<<5) /* physical address extensions */ +#define CR4_MCE (1<<6) /* machine check enable */ +#define CR4_PGE (1<<7) /* page global enable */ +#define CR4_PCE (1<<8) +/* performance monitoring counter enable */ +#define CR4_OSFXSR (1<<9) /* OS support for FXSAVE/FXRSTOR */ +#define CR4_OSXMMEXCPT (1<<10) +/* OS support for unmasked SIMD floating point exceptions */ +#define CR4_VMXE (1<<13) /* VMX enable */ +#define CR4_SMXE (1<<14) /* SMX enable */ +#define CR4_PCIDE (1<<17) /* PCID enable */ +#define CR4_OSXSAVE (1<<18) +/* XSAVE and Processor Extended States enable bit */ + + +/* + * Entries in the Interrupt Descriptor Table (IDT) + */ +#define IDT_DE 0 /* #DE: Divide Error */ +#define IDT_DB 1 /* #DB: Debug */ +#define IDT_NMI 2 /* Nonmaskable External Interrupt */ +#define IDT_BP 3 /* #BP: Breakpoint */ +#define IDT_OF 4 /* #OF: Overflow */ +#define IDT_BR 5 /* #BR: Bound Range Exceeded */ +#define IDT_UD 6 /* #UD: Undefined/Invalid Opcode */ +#define IDT_NM 7 /* #NM: No Math Coprocessor */ +#define IDT_DF 8 /* #DF: Double Fault */ +#define IDT_FPUGP 9 /* Coprocessor Segment Overrun */ +#define IDT_TS 10 /* #TS: Invalid TSS */ +#define IDT_NP 11 /* #NP: Segment Not Present */ +#define IDT_SS 12 /* #SS: Stack Segment Fault */ +#define IDT_GP 13 /* #GP: General Protection Fault */ +#define IDT_PF 14 /* #PF: Page Fault */ +#define IDT_MF 16 /* #MF: FPU Floating-Point Error */ +#define IDT_AC 17 /* #AC: Alignment Check */ +#define IDT_MC 18 /* #MC: Machine Check */ +#define IDT_XF 19 /* #XF: SIMD Floating-Point Exception */ + +/*Bits in EFER special registers */ +#define EFER_LMA 0x000000400 /* Long mode active (R) */ + +/* CPU clock frequencies (FSB) */ +#define CPU_FSB_83KHZ 83200 +#define CPU_FSB_100KHZ 99840 +#define CPU_FSB_133KHZ 133200 +#define CPU_FSB_166KHZ 166400 + +/* Time conversions */ +#define CPU_GHZ_TO_HZ 1000000000 +#define CPU_GHZ_TO_KHZ 1000000 +#define CPU_GHZ_TO_MHZ 1000 +#define CPU_MHZ_TO_HZ 1000000 +#define CPU_MHZ_TO_KHZ 1000 + +/* Boot CPU ID */ +#define CPU_BOOT_ID 0 + +/* CPU states defined */ +#define CPU_STATE_RESET 0 +#define CPU_STATE_INITIALIZING 1 +#define CPU_STATE_RUNNING 2 +#define CPU_STATE_HALTED 3 +#define CPU_STATE_DEAD 4 + +/* hypervisor stack bottom magic('intl') */ +#define SP_BOTTOM_MAGIC 0x696e746c + +/* type of speculation control + * 0 - no speculation control support + * 1 - raw IBRS + IPBP support + * 2 - with STIBP optimization support + */ +#define IBRS_NONE 0 +#define IBRS_RAW 1 +#define IBRS_OPT 2 + +#ifndef ASSEMBLER + +/**********************************/ +/* EXTERNAL VARIABLES */ +/**********************************/ +extern const uint8_t _ld_cpu_secondary_reset_load[]; +extern uint8_t _ld_cpu_secondary_reset_start[]; +extern const uint64_t _ld_cpu_secondary_reset_size; +extern uint8_t _ld_bss_start[]; +extern uint8_t _ld_bss_end[]; +extern uint8_t _ld_cpu_data_start[]; +extern uint8_t _ld_cpu_data_end[]; + +extern int ibrs_type; + +/* + * To support per_cpu access, we use a special section ".cpu_data" to define + * the pattern of per CPU data. And we allocate memory for per CPU data + * according to multiple this section size and pcpu number. + * + * +------------------+------------------+---+------------------+ + * | percpu for pcpu0 | percpu for pcpu1 |...| percpu for pcpuX | + * +------------------+------------------+---+------------------+ + * ^ ^ + * | | + * --.cpu_data size-- + * + * To access per cpu data, we use: + * per_cpu_data_base_ptr + curr_pcpu_id * cpu_data_section_size + + * offset_of_symbol_in_cpu_data_section + * to locate the per cpu data. + */ + +/* declare per cpu data */ +#define EXTERN_CPU_DATA(type, name) \ + extern __typeof__(type) cpu_data_##name + +EXTERN_CPU_DATA(uint8_t, lapic_id); +EXTERN_CPU_DATA(void *, vcpu); +EXTERN_CPU_DATA(uint8_t[STACK_SIZE], stack) __aligned(16); + +/* define per cpu data */ +#define DEFINE_CPU_DATA(type, name) \ + __typeof__(type) cpu_data_##name \ + __attribute__((__section__(".cpu_data"))) + +extern void *per_cpu_data_base_ptr; +extern int phy_cpu_num; + +#define PER_CPU_DATA_OFFSET(sym_addr) \ + ((uint64_t)(sym_addr) - (uint64_t)(_ld_cpu_data_start)) + +#define PER_CPU_DATA_SIZE \ + ((uint64_t)_ld_cpu_data_end - (uint64_t)(_ld_cpu_data_start)) + +/* + * get percpu data for pcpu_id. + * + * It returns: + * per_cpu_data_##name[pcpu_id]; + */ +#define per_cpu(name, pcpu_id) \ + (*({ uint64_t base = (uint64_t)per_cpu_data_base_ptr; \ + uint64_t off = PER_CPU_DATA_OFFSET(&cpu_data_##name); \ + ((typeof(&cpu_data_##name))(base + \ + (pcpu_id) * PER_CPU_DATA_SIZE + off)); \ + })) + +/* get percpu data for current pcpu */ +#define get_cpu_var(name) per_cpu(name, get_cpu_id()) + +/* Function prototypes */ +void cpu_halt(uint32_t logical_id); +uint64_t cpu_cycles_per_second(void); +uint64_t tsc_cycles_in_period(uint16_t timer_period_in_us); +void cpu_secondary_reset(void); +int hv_main(int cpu_id); +bool check_tsc_adjust_support(void); +bool check_ibrs_ibpb_support(void); +bool check_stibp_support(void); +bool is_apicv_enabled(void); + +/* Read control register */ +#define CPU_CR_READ(cr, result_ptr) \ +{ \ + asm volatile ("mov %%" __CPP_STRING(cr) ", %0" \ + : "=r"(*result_ptr)); \ +} + +/* Write control register */ +#define CPU_CR_WRITE(cr, value) \ +{ \ + asm volatile ("mov %0, %%" __CPP_STRING(cr) \ + : /* No output */ \ + : "r"(value)); \ +} + +/* Read MSR */ +#define CPU_MSR_READ(reg, msr_val_ptr) \ +{ \ + uint32_t msrl, msrh; \ + asm volatile (" rdmsr ":"=a"(msrl), \ + "=d"(msrh) : "c" (reg)); \ + *msr_val_ptr = ((uint64_t)msrh<<32) | msrl; \ +} + +/* Write MSR */ +#define CPU_MSR_WRITE(reg, msr_val) \ +{ \ + uint32_t msrl, msrh; \ + msrl = (uint32_t)msr_val; \ + msrh = (uint32_t)(msr_val >> 32); \ + asm volatile (" wrmsr " : : "c" (reg), \ + "a" (msrl), "d" (msrh)); \ +} + +/* Disables interrupts on the current CPU */ +#define CPU_IRQ_DISABLE() \ +{ \ + asm volatile ("cli\n" : : : "cc"); \ +} + +/* Enables interrupts on the current CPU */ +#define CPU_IRQ_ENABLE() \ +{ \ + asm volatile ("sti\n" : : : "cc"); \ +} + +/* This macro writes the stack pointer. */ +#define CPU_SP_WRITE(stack_ptr) \ +{ \ + uint64_t rsp = (uint64_t)stack_ptr & ~(CPU_STACK_ALIGN - 1); \ + asm volatile ("movq %0, %%rsp" : : "r"(rsp)); \ +} + +/* Synchronizes all read accesses from memory */ +#define CPU_MEMORY_READ_BARRIER() \ +{ \ + asm volatile ("lfence\n" : : : "memory"); \ +} + +/* Synchronizes all write accesses to memory */ +#define CPU_MEMORY_WRITE_BARRIER() \ +{ \ + asm volatile ("sfence\n" : : : "memory"); \ +} + +/* Synchronizes all read and write accesses to/from memory */ +#define CPU_MEMORY_BARRIER() \ +{ \ + asm volatile ("mfence\n" : : : "memory"); \ +} + +/* Write the task register */ +#define CPU_LTR_EXECUTE(ltr_ptr) \ +{ \ + asm volatile ("ltr %%ax\n" : : "a"(ltr_ptr)); \ +} + +/* Read time-stamp counter / processor ID */ +#define CPU_RDTSCP_EXECUTE(timestamp_ptr, cpu_id_ptr) \ +{ \ + uint32_t tsl, tsh; \ + asm volatile ("rdtscp":"=a"(tsl), "=d"(tsh), \ + "=c"(*cpu_id_ptr)); \ + *timestamp_ptr = ((uint64_t)tsh << 32) | tsl; \ +} + +/* Define variable(s) required to save / restore architecture interrupt state. + * These variable(s) are used in conjunction with the ESAL_AR_INT_ALL_DISABLE() + * and ESAL_AR_INT_ALL_RESTORE() macros to hold any data that must be preserved + * in order to allow these macros to function correctly. + */ +#define CPU_INT_CONTROL_VARS uint64_t cpu_int_value + +/* Macro to save rflags register */ +#define CPU_RFLAGS_SAVE(rflags_ptr) \ +{ \ + asm volatile (" pushf"); \ + asm volatile (" pop %0" \ + : "=r" (*(rflags_ptr)) \ + : /* No inputs */); \ +} + +/* Macro to restore rflags register */ +#define CPU_RFLAGS_RESTORE(rflags) \ +{ \ + asm volatile (" push %0" : : "r" (rflags)); \ + asm volatile (" popf"); \ +} + +/* This macro locks out interrupts and saves the current architecture status + * register / state register to the specified address. This function does not + * attempt to mask any bits in the return register value and can be used as a + * quick method to guard a critical section. + * NOTE: This macro is used in conjunction with CPU_INT_ALL_RESTORE + * defined below and CPU_INT_CONTROL_VARS defined above. + */ + +#define CPU_INT_ALL_DISABLE() \ +{ \ + CPU_RFLAGS_SAVE(&cpu_int_value); \ + CPU_IRQ_DISABLE(); \ +} + +/* This macro restores the architecture status / state register used to lockout + * interrupts to the value provided. The intent of this function is to be a + * fast mechanism to restore the interrupt level at the end of a critical + * section to its original level. + * NOTE: This macro is used in conjunction with CPU_INT_ALL_DISABLE + * and CPU_INT_CONTROL_VARS defined above. + */ +#define CPU_INT_ALL_RESTORE() \ +{ \ + CPU_RFLAGS_RESTORE(cpu_int_value); \ +} + +/* Macro to get CPU ID */ +static inline uint32_t get_cpu_id(void) +{ + uint32_t tsl, tsh, cpu_id; + + asm volatile ("rdtscp":"=a" (tsl), "=d"(tsh), "=c"(cpu_id)::); + return cpu_id; +} + +static inline uint64_t cpu_rsp_get(void) +{ + uint64_t ret; + + asm volatile("movq %%rsp, %0" + : "=r"(ret)); + return ret; +} + +static inline uint64_t cpu_rbp_get(void) +{ + uint64_t ret; + + asm volatile("movq %%rbp, %0" + : "=r"(ret)); + return ret; +} + + + +static inline uint64_t +msr_read(uint32_t reg_num) +{ + uint64_t msr_val; + + CPU_MSR_READ(reg_num, &msr_val); + return msr_val; +} + +static inline void +msr_write(uint32_t reg_num, uint64_t value64) +{ + CPU_MSR_WRITE(reg_num, value64); +} + +#else /* ASSEMBLER defined */ + +#endif /* ASSEMBLER defined */ + +#endif /* CPU_H */ diff --git a/hypervisor/include/arch/x86/cpuid.h b/hypervisor/include/arch/x86/cpuid.h new file mode 100644 index 000000000..60787f86b --- /dev/null +++ b/hypervisor/include/arch/x86/cpuid.h @@ -0,0 +1,152 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* +* cpuid.h +* +* Created on: Jan 4, 2018 +* Author: don +*/ + +#ifndef CPUID_H_ +#define CPUID_H_ + +/* CPUID bit definitions */ +#define CPUID_ECX_SSE3 (1<<0) +#define CPUID_ECX_PCLMUL (1<<1) +#define CPUID_ECX_DTES64 (1<<2) +#define CPUID_ECX_MONITOR (1<<3) +#define CPUID_ECX_DS_CPL (1<<4) +#define CPUID_ECX_VMX (1<<5) +#define CPUID_ECX_SMX (1<<6) +#define CPUID_ECX_EST (1<<7) +#define CPUID_ECX_TM2 (1<<8) +#define CPUID_ECX_SSSE3 (1<<9) +#define CPUID_ECX_CID (1<<10) +#define CPUID_ECX_FMA (1<<12) +#define CPUID_ECX_CX16 (1<<13) +#define CPUID_ECX_ETPRD (1<<14) +#define CPUID_ECX_PDCM (1<<15) +#define CPUID_ECX_DCA (1<<18) +#define CPUID_ECX_SSE4_1 (1<<19) +#define CPUID_ECX_SSE4_2 (1<<20) +#define CPUID_ECX_x2APIC (1<<21) +#define CPUID_ECX_MOVBE (1<<22) +#define CPUID_ECX_POPCNT (1<<23) +#define CPUID_ECX_AES (1<<25) +#define CPUID_ECX_XSAVE (1<<26) +#define CPUID_ECX_OSXSAVE (1<<27) +#define CPUID_ECX_AVX (1<<28) +#define CPUID_EDX_FPU (1<<0) +#define CPUID_EDX_VME (1<<1) +#define CPUID_EDX_DE (1<<2) +#define CPUID_EDX_PSE (1<<3) +#define CPUID_EDX_TSC (1<<4) +#define CPUID_EDX_MSR (1<<5) +#define CPUID_EDX_PAE (1<<6) +#define CPUID_EDX_MCE (1<<7) +#define CPUID_EDX_CX8 (1<<8) +#define CPUID_EDX_APIC (1<<9) +#define CPUID_EDX_SEP (1<<11) +#define CPUID_EDX_MTRR (1<<12) +#define CPUID_EDX_PGE (1<<13) +#define CPUID_EDX_MCA (1<<14) +#define CPUID_EDX_CMOV (1<<15) +#define CPUID_EDX_PAT (1<<16) +#define CPUID_EDX_PSE36 (1<<17) +#define CPUID_EDX_PSN (1<<18) +#define CPUID_EDX_CLF (1<<19) +#define CPUID_EDX_DTES (1<<21) +#define CPUID_EDX_ACPI (1<<22) +#define CPUID_EDX_MMX (1<<23) +#define CPUID_EDX_FXSR (1<<24) +#define CPUID_EDX_SSE (1<<25) +#define CPUID_EDX_SSE2 (1<<26) +#define CPUID_EDX_SS (1<<27) +#define CPUID_EDX_HTT (1<<28) +#define CPUID_EDX_TM1 (1<<29) +#define CPUID_EDX_IA64 (1<<30) +#define CPUID_EDX_PBE (1<<31) +/* CPUID.07H:EBX.TSC_ADJUST*/ +#define CPUID_EBX_TSC_ADJ (1<<1) +/* CPUID.07H:EDX.IBRS_IBPB*/ +#define CPUID_EDX_IBRS_IBPB (1<<26) +/* CPUID.07H:EDX.STIBP*/ +#define CPUID_EDX_STIBP (1<<27) +/* CPUID.80000001H:EDX.Page1GB*/ +#define CPUID_EDX_PAGE1GB (1<<26) +/* CPUID.07H:EBX.INVPCID*/ +#define CPUID_EBX_INVPCID (1<<10) +/* CPUID.01H:ECX.PCID*/ +#define CPUID_ECX_PCID (1<<17) + +/* CPUID source operands */ +#define CPUID_VENDORSTRING 0 +#define CPUID_FEATURES 1 +#define CPUID_TLB 2 +#define CPUID_SERIALNUM 3 +#define CPUID_EXTEND_FEATURE 7 +#define CPUID_EXTEND_FUNCTION_1 0x80000001 + + +enum cpuid_cache_idx { + CPUID_VENDORSTRING_CACHE_IDX = 0, + CPUID_FEATURES_CACHE_IDX, + CPUID_EXTEND_FEATURE_CACHE_IDX, + CPUID_EXTEND_FEATURE_CACHE_MAX +}; + +struct cpuid_cache_entry { + uint32_t a; + uint32_t b; + uint32_t c; + uint32_t d; + uint32_t inited; + uint32_t reserved; +}; + +static inline void native_cpuid_count(uint32_t op, uint32_t count, + uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) +{ + /* Execute CPUID instruction and save results */ + asm volatile("cpuid":"=a"(*a), "=b"(*b), + "=c"(*c), "=d"(*d) + : "a"(op), "c" (count)); +} + +void cpuid_count(uint32_t op, uint32_t count, + uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d); + +#define cpuid(op, a, b, c, d) cpuid_count(op, 0, a, b, c, d) + +void emulate_cpuid(struct vcpu *vcpu, uint32_t src_op, uint32_t *eax_ptr, + uint32_t *ebx_ptr, uint32_t *ecx_ptr, uint32_t *edx_ptr); + +#endif /* CPUID_H_ */ diff --git a/hypervisor/include/arch/x86/gdt.h b/hypervisor/include/arch/x86/gdt.h new file mode 100644 index 000000000..6c2f18bf5 --- /dev/null +++ b/hypervisor/include/arch/x86/gdt.h @@ -0,0 +1,314 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef GDT_H +#define GDT_H + +/* GDT is defined in assembly so it can be used to switch modes before long mode + * is established. + * With 64-bit EFI this is not required since are already in long mode when EFI + * transfers control to the hypervisor. However, for any instantiation of the + * ACRN Hypervisor that requires a boot from reset the GDT will be + * used as mode transitions are being made to ultimately end up in long mode. + * For this reason we establish the GDT in assembly. + * This should not affect usage and convenience of interacting with the GDT in C + * as the complete definition of the GDT is driven by the defines in this file. + * + * Unless it proves to be not viable we will use a single GDT for all hypervisor + * CPUs, with space for per CPU LDT and TSS. + */ + +/* + * Segment selectors in x86-64 and i386 are the same size, 8 bytes. + * Local Descriptor Table (LDT) selectors are 16 bytes on x86-64 instead of 8 + * bytes. + * Task State Segment (TSS) selectors are 16 bytes on x86-64 instead of 8 bytes. + */ +#define X64_SEG_DESC_SIZE (0x8) /* In long mode SEG Descriptors are 8 bytes */ +#define X64_LDT_DESC_SIZE (0x10)/* In long mode LDT Descriptors are 16 bytes */ +#define X64_TSS_DESC_SIZE (0x10)/* In long mode TSS Descriptors are 16 bytes */ + +/***************************************************************************** + * + * BEGIN: Definition of the GDT. + * + * NOTE: + * If you change the size of the GDT or rearrange the location of descriptors + * within the GDT you must change both the defines and the C structure header. + * + *****************************************************************************/ +/* Number of global 8 byte segments descriptor(s) */ +#define HOST_GDT_RING0_SEG_SELECTORS (0x3) /* rsvd, code, data */ +/* Offsets of global 8 byte segment descriptors */ +#define HOST_GDT_RING0_RSVD_SEL (0x0000) +#define HOST_GDT_RING0_CODE_SEL (0x0008) +#define HOST_GDT_RING0_DATA_SEL (0x0010) +/* Number of global 16 byte LDT descriptor(s) */ +#define HOST_GDT_RING0_TSS_SELECTORS (0x1) +/* One for each CPU in the hypervisor. */ + +/***************************************************************************** + * + * END: Definition of the GDT. + * + *****************************************************************************/ + +/* Offset to start of LDT Descriptors */ +#define HOST_GDT_RING0_LDT_SEL \ + (HOST_GDT_RING0_SEG_SELECTORS * X64_SEG_DESC_SIZE) +/* Offset to start of LDT Descriptors */ +#define HOST_GDT_RING0_CPU_TSS_SEL (HOST_GDT_RING0_LDT_SEL) +/* Size of the GDT */ +#define HOST_GDT_SIZE \ + (HOST_GDT_RING0_CPU_TSS_SEL + \ + (HOST_GDT_RING0_TSS_SELECTORS * X64_TSS_DESC_SIZE)) + +/* Defined position of Interrupt Stack Tables */ +#define MACHINE_CHECK_IST (0x1) +#define DOUBLE_FAULT_IST (0x2) +#define STACK_FAULT_IST (0x3) + +#ifndef ASSEMBLER + +#include +#include + +#define TSS_AVAIL (9) + +/* + * Definition of an 8 byte code segment descriptor. + */ +union code_segment_descriptor { + uint64_t value; + struct { + union { + uint32_t value; + struct { + uint32_t limit_15_0:16; + uint32_t base_15_0:16; + } bits; + } low32; + union { + uint32_t value; + struct { + uint32_t base_23_16:8; + uint32_t accessed:1; + uint32_t readeable:1; + uint32_t conforming:1; + uint32_t bit11_set:1; + uint32_t bit12_set:1; + uint32_t dpl:2; + uint32_t present:1; + uint32_t limit_19_16:4; + uint32_t avl:1; + uint32_t x64flag:1; + uint32_t dflt:1; + uint32_t granularity:1; + uint32_t base_31_24:8; + } bits; + } high32; + }; +} __aligned(8); + +/* + * Definition of an 8 byte data segment descriptor. + */ +union data_segment_descriptor { + uint64_t value; + struct { + union { + uint32_t value; + struct { + uint32_t limit_15_0:16; + uint32_t base_15_0:16; + } bits; + } low32; + union { + uint32_t value; + struct { + uint32_t base_23_16:8; + uint32_t accessed:1; + uint32_t writeable:1; + uint32_t expansion:1; + uint32_t bit11_clr:1; + uint32_t bit12_set:1; + uint32_t dpl:2; + uint32_t present:1; + uint32_t limit_19_16:4; + uint32_t avl:1; + uint32_t rsvd_clr:1; + uint32_t big:1; + uint32_t granularity:1; + uint32_t base_31_24:8; + } bits; + } high32; + }; +} __aligned(8); + +/* + * Definition of an 8 byte system segment descriptor. + */ +union system_segment_descriptor { + uint64_t value; + struct { + union { + uint32_t value; + struct { + uint32_t limit_15_0:16; + uint32_t base_15_0:16; + } bits; + } low32; + union { + uint32_t value; + struct { + uint32_t base_23_16:8; + uint32_t type:4; + uint32_t bit12_clr:1; + uint32_t dpl:2; + uint32_t present:1; + uint32_t limit_19_16:4; + uint32_t rsvd_1:1; + uint32_t rsvd_2_clr:1; + uint32_t rsvd_3:1; + uint32_t granularity:1; + uint32_t base_31_24:8; + } bits; + } high32; + }; +} __aligned(8); + +/* + * Definition of 16 byte TSS and LDT selectors. + */ +union tss_64_descriptor { + uint64_t value; + struct { + union { + uint32_t value; + struct { + uint32_t limit_15_0:16; + uint32_t base_15_0:16; + } bits; + } low32; + union { + uint32_t value; + struct { + uint32_t base_23_16:8; + uint32_t type:4; + uint32_t bit12_clr:1; + uint32_t dpl:2; + uint32_t present:1; + uint32_t limit_19_16:4; + uint32_t rsvd_1:1; + uint32_t rsvd_2_clr:1; + uint32_t rsvd_3:1; + uint32_t granularity:1; + uint32_t base_31_24:8; + } bits; + } high32; + uint32_t base_addr_63_32; + union { + uint32_t value; + struct { + uint32_t rsvd_7_0:8; + uint32_t bits_12_8_clr:4; + uint32_t rsvd_31_13:20; + } bits; + } offset_12; + }; +} __aligned(8); + +/***************************************************************************** + * + * BEGIN: Definition of the GDT. + * + * NOTE: + * If you change the size of the GDT or rearrange the location of descriptors + * within the GDT you must change both the defines and the C structure header. + * + *****************************************************************************/ +struct host_gdt { + uint64_t rsvd; + + union code_segment_descriptor host_gdt_code_descriptor; + union data_segment_descriptor host_gdt_data_descriptor; + union tss_64_descriptor host_gdt_tss_descriptors; +} __aligned(8); + +/***************************************************************************** + * + * END: Definition of the GDT. + * + *****************************************************************************/ + +/* + * x86-64 Task State Segment (TSS) definition. + */ +struct tss_64 { + uint32_t rsvd1; + uint64_t rsp0; + uint64_t rsp1; + uint64_t rsp2; + uint32_t rsvd2; + uint32_t rsvd3; + uint64_t ist1; + uint64_t ist2; + uint64_t ist3; + uint64_t ist4; + uint64_t ist5; + uint64_t ist6; + uint64_t ist7; + uint32_t rsvd4; + uint32_t rsvd5; + uint16_t rsvd6; + uint16_t io_map_base_addr; +} __packed __aligned(16); + +/* + * Definition of the GDT descriptor. + */ +struct host_gdt_descriptor { + unsigned short len; + struct host_gdt *gdt; +} __packed; + +extern struct host_gdt HOST_GDT; +extern struct host_gdt_descriptor HOST_GDTR; +void load_gdtr_and_tr(void); + +EXTERN_CPU_DATA(struct tss_64, tss); +EXTERN_CPU_DATA(struct host_gdt, gdt); +EXTERN_CPU_DATA(uint8_t[STACK_SIZE], mc_stack) __aligned(16); +EXTERN_CPU_DATA(uint8_t[STACK_SIZE], df_stack) __aligned(16); +EXTERN_CPU_DATA(uint8_t[STACK_SIZE], sf_stack) __aligned(16); + +#endif /* end #ifndef ASSEMBLER */ + +#endif /* GDT_H */ diff --git a/hypervisor/include/arch/x86/guest/guest.h b/hypervisor/include/arch/x86/guest/guest.h new file mode 100644 index 000000000..683433fa2 --- /dev/null +++ b/hypervisor/include/arch/x86/guest/guest.h @@ -0,0 +1,115 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef GUEST_H +#define GUEST_H + +/* Defines for VM Launch and Resume */ +#define VM_RESUME 0 +#define VM_LAUNCH 1 + +#define ACRN_DBG_PTIRQ 6 +#define ACRN_DBG_IRQ 6 + +#ifndef ASSEMBLER + +#define foreach_vcpu(idx, vm, vcpu) \ + for (idx = 0, vcpu = vm->hw.vcpu_array[idx]; \ + (idx < vm->hw.num_vcpus) & (vcpu != NULL); \ + idx++, vcpu = vm->hw.vcpu_array[idx]) + +struct vhm_request; + +/* + * VCPU related APIs + */ +#define ACRN_REQUEST_EVENT 0 +#define ACRN_REQUEST_EXTINT 1 +#define ACRN_REQUEST_NMI 2 +#define ACRN_REQUEST_GP 3 +#define ACRN_REQUEST_TMR_UPDATE 4 +#define ACRN_REQUEST_TLB_FLUSH 5 + +#define E820_MAX_ENTRIES 32 + +struct e820_mem_params { + uint64_t mem_bottom; + uint64_t mem_top; + uint64_t max_ram_blk_base; /* used for the start address of UOS */ + uint64_t max_ram_blk_size; +}; + +int prepare_vm0_memmap_and_e820(struct vm *vm); + +/* Definition for a mem map lookup */ +struct vm_lu_mem_map { + struct list_head list; /* EPT mem map lookup list*/ + void *hpa; /* Host physical start address of the map*/ + void *gpa; /* Guest physical start address of the map */ + uint64_t size; /* Size of map */ +}; + +/* + * VM related APIs + */ +bool is_vm0(struct vm *vm); +bool vm_lapic_disabled(struct vm *vm); +uint64_t vcpumask2pcpumask(struct vm *vm, uint64_t vdmask); +int init_vm0_boot_info(struct vm *vm); + +uint64_t gva2gpa(struct vm *vm, uint64_t cr3, uint64_t gva); + +struct vcpu *get_primary_vcpu(struct vm *vm); +struct vcpu *vcpu_from_vid(struct vm *vm, int vcpu_id); +struct vcpu *vcpu_from_pid(struct vm *vm, int pcpu_id); + +void init_e820(void); +void obtain_e820_mem_info(void); +extern uint32_t e820_entries; +extern struct e820_entry e820[E820_MAX_ENTRIES]; +extern uint32_t boot_regs[]; +extern struct e820_mem_params e820_mem; + +int rdmsr_handler(struct vcpu *vcpu); +int wrmsr_handler(struct vcpu *vcpu); +void init_msr_emulation(struct vcpu *vcpu); + +extern const char vm_exit[]; +int vmx_vmrun(struct run_context *context, int ops, int ibrs); + +int load_guest(struct vm *vm, struct vcpu *vcpu); +int general_sw_loader(struct vm *vm, struct vcpu *vcpu); + +typedef int (*vm_sw_loader_t)(struct vm *, struct vcpu *); +extern vm_sw_loader_t vm_sw_loader; + +#endif /* !ASSEMBLER */ + +#endif /* GUEST_H*/ diff --git a/hypervisor/include/arch/x86/guest/vcpu.h b/hypervisor/include/arch/x86/guest/vcpu.h new file mode 100644 index 000000000..0d347ecf6 --- /dev/null +++ b/hypervisor/include/arch/x86/guest/vcpu.h @@ -0,0 +1,288 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VCPU_H_ +#define _VCPU_H_ + +#define ACRN_VCPU_MMIO_COMPLETE (0) + +/* Size of various elements within the VCPU structure */ +#define REG_SIZE 8 + +/* Number of GPRs saved / restored for guest in VCPU structure */ +#define NUM_GPRS 15 +#define GUEST_STATE_AREA_SIZE 512 + +/* Indexes of GPRs saved / restored for guest */ +#define VMX_MACHINE_T_GUEST_RAX_INDEX 0 +#define VMX_MACHINE_T_GUEST_RBX_INDEX 1 +#define VMX_MACHINE_T_GUEST_RCX_INDEX 2 +#define VMX_MACHINE_T_GUEST_RDX_INDEX 3 +#define VMX_MACHINE_T_GUEST_RBP_INDEX 4 +#define VMX_MACHINE_T_GUEST_RSI_INDEX 5 +#define VMX_MACHINE_T_GUEST_R8_INDEX 6 +#define VMX_MACHINE_T_GUEST_R9_INDEX 7 +#define VMX_MACHINE_T_GUEST_R10_INDEX 8 +#define VMX_MACHINE_T_GUEST_R11_INDEX 9 +#define VMX_MACHINE_T_GUEST_R12_INDEX 10 +#define VMX_MACHINE_T_GUEST_R13_INDEX 11 +#define VMX_MACHINE_T_GUEST_R14_INDEX 12 +#define VMX_MACHINE_T_GUEST_R15_INDEX 13 +#define VMX_MACHINE_T_GUEST_RDI_INDEX 14 + +/* Offsets of GPRs for guest within the VCPU data structure */ +#define VMX_MACHINE_T_GUEST_RAX_OFFSET (VMX_MACHINE_T_GUEST_RAX_INDEX*REG_SIZE) +#define VMX_MACHINE_T_GUEST_RBX_OFFSET (VMX_MACHINE_T_GUEST_RBX_INDEX*REG_SIZE) +#define VMX_MACHINE_T_GUEST_RCX_OFFSET (VMX_MACHINE_T_GUEST_RCX_INDEX*REG_SIZE) +#define VMX_MACHINE_T_GUEST_RDX_OFFSET (VMX_MACHINE_T_GUEST_RDX_INDEX*REG_SIZE) +#define VMX_MACHINE_T_GUEST_RBP_OFFSET (VMX_MACHINE_T_GUEST_RBP_INDEX*REG_SIZE) +#define VMX_MACHINE_T_GUEST_RSI_OFFSET (VMX_MACHINE_T_GUEST_RSI_INDEX*REG_SIZE) +#define VMX_MACHINE_T_GUEST_RDI_OFFSET (VMX_MACHINE_T_GUEST_RDI_INDEX*REG_SIZE) +#define VMX_MACHINE_T_GUEST_R8_OFFSET (VMX_MACHINE_T_GUEST_R8_INDEX*REG_SIZE) +#define VMX_MACHINE_T_GUEST_R9_OFFSET (VMX_MACHINE_T_GUEST_R9_INDEX*REG_SIZE) +#define VMX_MACHINE_T_GUEST_R10_OFFSET (VMX_MACHINE_T_GUEST_R10_INDEX*REG_SIZE) +#define VMX_MACHINE_T_GUEST_R11_OFFSET (VMX_MACHINE_T_GUEST_R11_INDEX*REG_SIZE) +#define VMX_MACHINE_T_GUEST_R12_OFFSET (VMX_MACHINE_T_GUEST_R12_INDEX*REG_SIZE) +#define VMX_MACHINE_T_GUEST_R13_OFFSET (VMX_MACHINE_T_GUEST_R13_INDEX*REG_SIZE) +#define VMX_MACHINE_T_GUEST_R14_OFFSET (VMX_MACHINE_T_GUEST_R14_INDEX*REG_SIZE) +#define VMX_MACHINE_T_GUEST_R15_OFFSET (VMX_MACHINE_T_GUEST_R15_INDEX*REG_SIZE) + +/* Hard-coded offset of cr2 in struct run_context!! */ +#define VMX_MACHINE_T_GUEST_CR2_OFFSET (128) + +/* Hard-coded offset of cr2 in struct run_context!! */ +#define VMX_MACHINE_T_GUEST_SPEC_CTRL_OFFSET (192) + +/*sizes of various registers within the VCPU data structure */ +#define VMX_CPU_S_FXSAVE_GUEST_AREA_SIZE GUEST_STATE_AREA_SIZE + +#ifndef ASSEMBLER + +enum vcpu_state { + VCPU_INIT, + VCPU_RUNNING, + VCPU_PAUSED, + VCPU_ZOMBIE, + VCPU_UNKNOWN_STATE, +}; + +struct cpu_regs { + uint64_t rax; + uint64_t rbx; + uint64_t rcx; + uint64_t rdx; + uint64_t rbp; + uint64_t rsi; + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + uint64_t rdi; +}; + +struct segment { + uint64_t selector; + uint64_t base; + uint64_t limit; + uint64_t attr; +}; + +struct run_context { +/* Contains the guest register set. + * NOTE: This must be the first element in the structure, so that the offsets + * in vmx_asm.S match + */ + union { + struct cpu_regs regs; + uint64_t longs[NUM_GPRS]; + } guest_cpu_regs; + + /** The guests CR registers 0, 2, 3 and 4. */ + uint64_t cr0; + + /* VMX_MACHINE_T_GUEST_CR2_OFFSET = + * offsetof(struct run_context, cr2) = 128 + */ + uint64_t cr2; + uint64_t cr3; + uint64_t cr4; + + uint64_t rip; + uint64_t rsp; + uint64_t rflags; + + uint64_t dr7; + uint64_t tsc_offset; + + /* MSRs */ + /* VMX_MACHINE_T_GUEST_SPEC_CTRL_OFFSET = + * offsetof(struct run_context, ia32_spec_ctrl) = 192 + */ + uint64_t ia32_spec_ctrl; + uint64_t ia32_star; + uint64_t ia32_lstar; + uint64_t ia32_fmask; + uint64_t ia32_kernel_gs_base; + + uint64_t ia32_pat; + uint64_t ia32_efer; + uint64_t ia32_sysenter_cs; + uint64_t ia32_sysenter_esp; + uint64_t ia32_sysenter_eip; + uint64_t ia32_debugctl; + + /* segment registers */ + struct segment cs; + struct segment ss; + struct segment ds; + struct segment es; + struct segment fs; + struct segment gs; + struct segment tr; + struct segment idtr; + struct segment ldtr; + struct segment gdtr; + + /* The 512 bytes area to save the FPU/MMX/SSE states for the guest */ + uint64_t + fxstore_guest_area[VMX_CPU_S_FXSAVE_GUEST_AREA_SIZE / sizeof(uint64_t)] + __aligned(16); +}; + +/* 2 worlds: 0 for Normal World, 1 for Secure World */ +#define NR_WORLD 2 +#define NORMAL_WORLD 0 +#define SECURE_WORLD 1 + +struct vcpu_arch { + int cur_context; + struct run_context contexts[NR_WORLD]; + + /* A pointer to the VMCS for this CPU. */ + void *vmcs; + + /* Holds the information needed for IRQ/exception handling. */ + struct { + /* The number of the exception to raise. */ + int exception; + + /* The error number for the exception. */ + int error; + } exception_info; + + uint8_t lapic_mask; + uint32_t irq_window_enabled; + uint32_t nrexits; + + /* Auxiliary TSC value */ + uint64_t msr_tsc_aux; + + /* VCPU context state information */ + uint64_t exit_reason; + uint64_t exit_interrupt_info; + uint64_t exit_qualification; + uint8_t inst_len; + + /* Information related to secondary / AP VCPU start-up */ + uint8_t cpu_mode; + uint8_t nr_sipi; + uint32_t sipi_vector; + + /* interrupt injection information */ + uint64_t pending_intr; + + /* per vcpu lapic */ + void *vlapic; +}; + +struct vm; +struct vcpu { + int pcpu_id; /* Physical CPU ID of this VCPU */ + int vcpu_id; /* virtual identifier for VCPU */ + struct vcpu_arch arch_vcpu; + /* Architecture specific definitions for this VCPU */ + struct vm *vm; /* Reference to the VM this VCPU belongs to */ + void *entry_addr; /* Entry address for this VCPU when first started */ + + /* State of this VCPU before suspend */ + volatile enum vcpu_state prev_state; + volatile enum vcpu_state state; /* State of this VCPU */ + /* State of debug request for this VCPU */ + volatile enum vcpu_state dbg_req_state; + unsigned long sync; /*hold the bit events*/ + struct vlapic *vlapic; /* per vCPU virtualized LAPIC */ + + struct list_head run_list; /* inserted to schedule runqueue */ + unsigned long pending_pre_work; /* any pre work pending? */ + bool launched; /* Whether the vcpu is launched on target pcpu */ + unsigned int paused_cnt; /* how many times vcpu is paused */ + unsigned int running; /* vcpu is picked up and run? */ + unsigned int ioreq_pending; /* ioreq is ongoing or not? */ + + struct vhm_request req; /* used by io/ept emulation */ + struct mem_io mmio; /* used by io/ept emulation */ + + /* save guest msr tsc aux register. + * Before VMENTRY, save guest MSR_TSC_AUX to this fields. + * After VMEXIT, restore this fields to guest MSR_TSC_AUX. + * This is only temperary workaround. Once MSR emulation + * is enabled, we should remove this fields and related + * code. + */ + uint64_t msr_tsc_aux_guest; + uint64_t *guest_msrs; +}; + +#define is_vcpu_bsp(vcpu) ((vcpu)->vcpu_id == 0) +/* do not update Guest RIP for next VM Enter */ +#define VCPU_RETAIN_RIP(vcpu) ((vcpu)->arch_vcpu.inst_len = 0) + +/* External Interfaces */ +int create_vcpu(int cpu_id, struct vm *vm, struct vcpu **rtn_vcpu_handle); +int start_vcpu(struct vcpu *vcpu); +int shutdown_vcpu(struct vcpu *vcpu); +int destroy_vcpu(struct vcpu *vcpu); + +void reset_vcpu(struct vcpu *vcpu); +void init_vcpu(struct vcpu *vcpu); +void pause_vcpu(struct vcpu *vcpu, enum vcpu_state new_state); +void resume_vcpu(struct vcpu *vcpu); +void schedule_vcpu(struct vcpu *vcpu); +int prepare_vcpu(struct vm *vm, int pcpu_id); + +void request_vcpu_pre_work(struct vcpu *vcpu, int pre_work_id); + +#endif + +#endif diff --git a/hypervisor/include/arch/x86/guest/vioapic.h b/hypervisor/include/arch/x86/guest/vioapic.h new file mode 100644 index 000000000..09faf5edc --- /dev/null +++ b/hypervisor/include/arch/x86/guest/vioapic.h @@ -0,0 +1,57 @@ +/*- + * Copyright (c) 2013 Tycho Nightingale + * Copyright (c) 2013 Neel Natu + * Copyright (c) 2017 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _VIOAPIC_H_ +#define _VIOAPIC_H_ + +#define VIOAPIC_BASE 0xFEC00000UL +#define VIOAPIC_SIZE 4096UL + +struct vioapic *vioapic_init(struct vm *vm); +void vioapic_cleanup(struct vioapic *vioapic); + +int vioapic_assert_irq(struct vm *vm, int irq); +int vioapic_deassert_irq(struct vm *vm, int irq); +int vioapic_pulse_irq(struct vm *vm, int irq); +void vioapic_update_tmr(struct vcpu *vcpu); + +int vioapic_mmio_write(void *vm, uint64_t gpa, + uint64_t wval, int size); +int vioapic_mmio_read(void *vm, uint64_t gpa, + uint64_t *rval, int size); + +int vioapic_pincount(struct vm *vm); +void vioapic_process_eoi(struct vm *vm, int vector); +bool vioapic_get_rte(struct vm *vm, int pin, void *rte); +int vioapic_mmio_access_handler(struct vcpu *vcpu, struct mem_io *mmio, + void *handler_private_data); + +int get_vioapic_info(char *str, int str_max, int vmid); +#endif diff --git a/hypervisor/include/arch/x86/guest/vlapic.h b/hypervisor/include/arch/x86/guest/vlapic.h new file mode 100644 index 000000000..752d3c0b1 --- /dev/null +++ b/hypervisor/include/arch/x86/guest/vlapic.h @@ -0,0 +1,132 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * Copyright (c) 2017 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _VLAPIC_H_ +#define _VLAPIC_H_ + +struct vlapic; + +/* APIC write handlers */ +void vlapic_set_cr8(struct vlapic *vlapic, uint64_t val); +uint64_t vlapic_get_cr8(struct vlapic *vlapic); + +/* + * Returns 0 if there is no eligible vector that can be delivered to the + * guest at this time and non-zero otherwise. + * + * If an eligible vector number is found and 'vecptr' is not NULL then it will + * be stored in the location pointed to by 'vecptr'. + * + * Note that the vector does not automatically transition to the ISR as a + * result of calling this function. + */ +int vlapic_pending_intr(struct vlapic *vlapic, int *vecptr); + +/* + * Transition 'vector' from IRR to ISR. This function is called with the + * vector returned by 'vlapic_pending_intr()' when the guest is able to + * accept this interrupt (i.e. RFLAGS.IF = 1 and no conditions exist that + * block interrupt delivery). + */ +void vlapic_intr_accepted(struct vlapic *vlapic, int vector); + +struct vlapic *vm_lapic_from_vcpuid(struct vm *vm, int vcpu_id); +struct vlapic *vm_lapic_from_pcpuid(struct vm *vm, int pcpu_id); +bool vlapic_msr(uint32_t num); +int vlapic_rdmsr(struct vcpu *vcpu, uint32_t msr, uint64_t *rval, bool *retu); +int vlapic_wrmsr(struct vcpu *vcpu, uint32_t msr, uint64_t wval, bool *retu); + +int vlapic_mmio_read(struct vcpu *vcpu, uint64_t gpa, uint64_t *rval, int size); +int vlapic_mmio_write(struct vcpu *vcpu, uint64_t gpa, uint64_t wval, int size); + +/* + * Signals to the LAPIC that an interrupt at 'vector' needs to be generated + * to the 'cpu', the state is recorded in IRR. + */ +int vlapic_set_intr(struct vcpu *vcpu, int vector, bool trig); + +#define LAPIC_TRIG_LEVEL true +#define LAPIC_TRIG_EDGE false +static inline int +vlapic_intr_level(struct vcpu *vcpu, int vector) +{ + return vlapic_set_intr(vcpu, vector, LAPIC_TRIG_LEVEL); +} + +static inline int +vlapic_intr_edge(struct vcpu *vcpu, int vector) +{ + return vlapic_set_intr(vcpu, vector, LAPIC_TRIG_EDGE); +} + +/* + * Triggers the LAPIC local interrupt (LVT) 'vector' on 'cpu'. 'cpu' can + * be set to -1 to trigger the interrupt on all CPUs. + */ +int vlapic_set_local_intr(struct vm *vm, int cpu, int vector); + +int vlapic_intr_msi(struct vm *vm, uint64_t addr, uint64_t msg); + +void vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, + bool phys, int delmode, int vec); + +/* Reset the trigger-mode bits for all vectors to be edge-triggered */ +void vlapic_reset_tmr(struct vlapic *vlapic); + +/* + * Set the trigger-mode bit associated with 'vector' to level-triggered if + * the (dest,phys,delmode) tuple resolves to an interrupt being delivered to + * this 'vlapic'. + */ +void vlapic_set_tmr_one_vec(struct vlapic *vlapic, int delmode, + int vector, bool level); + +void +vlapic_apicv_batch_set_tmr(struct vlapic *vlapic); + +int vlapic_mmio_access_handler(struct vcpu *vcpu, struct mem_io *mmio, + void *handler_private_data); + +uint32_t vlapic_get_id(struct vlapic *vlapic); +uint8_t vlapic_get_apicid(struct vlapic *vlapic); + +int vlapic_create(struct vcpu *vcpu); +void vlapic_free(struct vcpu *vcpu); +void vlapic_init(struct vlapic *vlapic); +bool vlapic_enabled(struct vlapic *vlapic); +uint64_t apicv_get_apic_access_addr(struct vm *vm); +uint64_t apicv_get_apic_page_addr(struct vlapic *vlapic); +bool vlapic_apicv_enabled(struct vcpu *vcpu); +void apicv_inject_pir(struct vlapic *vlapic); +int apicv_access_exit_handler(struct vcpu *vcpu); +int apicv_write_exit_handler(struct vcpu *vcpu); +int apicv_virtualized_eoi_exit_handler(struct vcpu *vcpu); + +void calcvdest(struct vm *vm, uint64_t *dmask, uint32_t dest, bool phys); +#endif /* _VLAPIC_H_ */ diff --git a/hypervisor/include/arch/x86/guest/vm.h b/hypervisor/include/arch/x86/guest/vm.h new file mode 100644 index 000000000..5bf65c8e5 --- /dev/null +++ b/hypervisor/include/arch/x86/guest/vm.h @@ -0,0 +1,202 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef VM_H_ +#define VM_H_ + +enum vm_privilege_level { + VM_PRIVILEGE_LEVEL_HIGH = 0, + VM_PRIVILEGE_LEVEL_MEDIUM, + VM_PRIVILEGE_LEVEL_LOW +}; + +#define MAX_VM_NAME_LEN 16 +struct vm_attr { + char name[16]; /* Virtual machine name string */ + int id; /* Virtual machine identifier */ + int boot_idx; /* Index indicating the boot sequence for this VM */ +}; + +struct vm_hw_info { + int num_vcpus; /* Number of total virtual cores */ + uint32_t created_vcpus; /* Number of created vcpus */ + struct vcpu **vcpu_array; /* vcpu array of this VM */ + uint64_t gpa_lowtop; /* top lowmem gpa of this VM */ +}; + +struct sw_linux { + void *ramdisk_src_addr; + void *ramdisk_load_addr; + uint32_t ramdisk_size; + void *bootargs_src_addr; + void *bootargs_load_addr; + uint32_t bootargs_size; + void *dtb_src_addr; + void *dtb_load_addr; + uint32_t dtb_size; +}; + +struct sw_kernel_info { + void *kernel_src_addr; + void *kernel_load_addr; + void *kernel_entry_addr; + uint32_t kernel_size; +}; + +struct vm_sw_info { + int kernel_type; /* Guest kernel type */ + /* Kernel information (common for all guest types) */ + struct sw_kernel_info kernel_info; + /* Additional information specific to Linux guests */ + struct sw_linux linux_info; + /* GPA Address of guest OS's request buffer */ + uint64_t req_buf; +}; + +/* VM guest types */ +#define VM_LINUX_GUEST 0x02 +#define VM_MONO_GUEST 0x01 + +enum vpic_wire_mode { + VPIC_WIRE_INTR = 0, + VPIC_WIRE_LAPIC, + VPIC_WIRE_IOAPIC, + VPIC_WIRE_NULL +}; + +/* Enumerated type for VM states */ +enum vm_state { + VM_CREATED = 0, /* VM created / awaiting start (boot) */ + VM_STARTED, /* VM started (booted) */ + VM_PAUSED, /* VM paused */ + VM_STATE_UNKNOWN +}; + +/* Structure for VM state information */ +struct vm_state_info { + enum vm_state state; /* State of the VM */ + unsigned int privilege; /* Privilege level of the VM */ + unsigned int boot_count;/* Number of times the VM has booted */ + +}; + +struct vm_arch { + void *guest_pml4; /* Guest pml4 */ + void *ept; /* EPT hierarchy */ + void *m2p; /* machine address to guest physical address */ + void *tmp_pg_array; /* Page array for tmp guest paging struct */ + void *iobitmap[2];/* IO bitmap page array base address for this VM */ + void *msr_bitmap; /* MSR bitmap page base address for this VM */ + void *virt_ioapic; /* Virtual IOAPIC base address */ + /** + * A link to the IO handler of this VM. + * We only register io handle to this link + * when create VM on sequences and ungister it when + * destory VM. So there no need lock to prevent preempt. + * Besides, there only a few io handlers now, we don't + * need binary search temporary. + */ + struct vm_io_handler *io_handler; + + /* reference to virtual platform to come here (as needed) */ +}; + +struct vpic; +struct vm { + struct vm_attr attr; /* Reference to this VM's attributes */ + struct vm_hw_info hw; /* Reference to this VM's HW information */ + struct vm_sw_info sw; /* Reference to SW associated with this VM */ + struct vm_arch arch_vm; /* Reference to this VM's arch information */ + struct vm_state_info state_info;/* State info of this VM */ + enum vm_state state; /* VM state */ + struct vcpu *current_vcpu; /* VCPU that caused vm exit */ + void *vuart; /* Virtual UART */ + struct vpic *vpic; /* Virtual PIC */ + uint32_t vpic_wire_mode; + struct iommu_domain *iommu_domain; /* iommu domain of this VM */ + struct list_head list; /* list of VM */ + spinlock_t spinlock; /* Spin-lock used to protect VM modifications */ + + struct list_head mmio_list; /* list for mmio. This list is not updated + * when vm is active. So no lock needed + */ + + struct _vm_shared_memory *shared_memory_area; + + struct { + struct _vm_virtual_device_node *head; + struct _vm_virtual_device_node *tail; + } virtual_device_list; + + /* passthrough device link */ + struct list_head ptdev_list; + spinlock_t ptdev_lock; + + unsigned char GUID[16]; + unsigned int secure_world_enabled; +}; + +struct vm_description { + /* Virtual machine identifier, assigned by the system */ + char *vm_attr_name; + /* The logical CPU IDs associated with this VM - The first CPU listed + * will be the VM's BSP + */ + int *vm_hw_logical_core_ids; + unsigned char GUID[16]; /* GUID of the vm will be created */ + int vm_hw_num_cores; /* Number of virtual cores */ + /* Indicates to APs that the BSP has created a VM for this + * description + */ + bool vm_created; + /* Index indicating VM's privilege level */ + unsigned int vm_state_info_privilege; + unsigned int secure_world_enabled; /* secure_world enabled? */ +}; + +struct vm_description_array { + int num_vm_desc; + struct vm_description vm_desc_array[]; +}; + +int shutdown_vm(struct vm *vm); +int pause_vm(struct vm *vm); +int start_vm(struct vm *vm); +int create_vm(struct vm_description *vm_desc, struct vm **vm); +int prepare_vm0(void); + +struct vm *get_vm_from_vmid(int vm_id); +struct vm_description *get_vm_desc(int idx); + +extern struct list_head vm_list; +extern spinlock_t vm_list_lock; +extern bool x2apic_enabled; + +#endif /* VM_H_ */ diff --git a/hypervisor/include/arch/x86/guest/vpic.h b/hypervisor/include/arch/x86/guest/vpic.h new file mode 100644 index 000000000..fb7c1614d --- /dev/null +++ b/hypervisor/include/arch/x86/guest/vpic.h @@ -0,0 +1,110 @@ +/*- + * Copyright (c) 2014 Tycho Nightingale + * Copyright (c) 2017 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _VPIC_H_ +#define _VPIC_H_ + +#define ICU_IMR_OFFSET 1 + +/* Initialization control word 1. Written to even address. */ +#define ICW1_IC4 0x01 /* ICW4 present */ +#define ICW1_SNGL 0x02 /* 1 = single, 0 = cascaded */ +#define ICW1_ADI 0x04 /* 1 = 4, 0 = 8 byte vectors */ +#define ICW1_LTIM 0x08 /* 1 = level trigger, 0 = edge */ +#define ICW1_RESET 0x10 /* must be 1 */ +/* 0x20 - 0x80 - in 8080/8085 mode only */ + +/* Initialization control word 2. Written to the odd address. */ +/* No definitions, it is the base vector of the IDT for 8086 mode */ + +/* Initialization control word 3. Written to the odd address. */ +/* For a master PIC, bitfield indicating a slave 8259 on given input */ +/* For slave, lower 3 bits are the slave's ID binary id on master */ + +/* Initialization control word 4. Written to the odd address. */ +#define ICW4_8086 0x01 /* 1 = 8086, 0 = 8080 */ +#define ICW4_AEOI 0x02 /* 1 = Auto EOI */ +#define ICW4_MS 0x04 /* 1 = buffered master, 0 = slave */ +#define ICW4_BUF 0x08 /* 1 = enable buffer mode */ +#define ICW4_SFNM 0x10 /* 1 = special fully nested mode */ + +/* Operation control words. Written after initialization. */ + +/* Operation control word type 1 */ +/* + * No definitions. Written to the odd address. Bitmask for interrupts. + * 1 = disabled. + */ + +/* Operation control word type 2. Bit 3 (0x08) must be zero. Even address. */ +#define OCW2_L0 0x01 /* Level */ +#define OCW2_L1 0x02 +#define OCW2_L2 0x04 +/* 0x08 must be 0 to select OCW2 vs OCW3 */ +/* 0x10 must be 0 to select OCW2 vs ICW1 */ +#define OCW2_EOI 0x20 /* 1 = EOI */ +#define OCW2_SL 0x40 /* EOI mode */ +#define OCW2_R 0x80 /* EOI mode */ + +/* Operation control word type 3. Bit 3 (0x08) must be set. Even address. */ +#define OCW3_RIS 0x01 /* 1 = read IS, 0 = read IR */ +#define OCW3_RR 0x02 /* register read */ +#define OCW3_P 0x04 /* poll mode command */ +/* 0x08 must be 1 to select OCW3 vs OCW2 */ +#define OCW3_SEL 0x08 /* must be 1 */ +/* 0x10 must be 0 to select OCW3 vs ICW1 */ +#define OCW3_SMM 0x20 /* special mode mask */ +#define OCW3_ESMM 0x40 /* enable SMM */ + +#define IO_ELCR1 0x4d0 +#define IO_ELCR2 0x4d1 + +enum vpic_trigger { + EDGE_TRIGGER, + LEVEL_TRIGGER +}; + +void *vpic_init(struct vm *vm); +void vpic_cleanup(struct vm *vm); + +int vpic_assert_irq(struct vm *vm, int irq); +int vpic_deassert_irq(struct vm *vm, int irq); +int vpic_pulse_irq(struct vm *vm, int irq); + +void vpic_pending_intr(struct vm *vm, int *vecptr); +void vpic_intr_accepted(struct vm *vm, int vector); +int vpic_set_irq_trigger(struct vm *vm, int irq, enum vpic_trigger trigger); +int vpic_get_irq_trigger(struct vm *vm, int irq, enum vpic_trigger *trigger); + +struct vm_io_handler *vpic_create_io_handler(int flags, uint32_t port, + uint32_t len); + +bool vpic_is_pin_mask(struct vpic *vpic, uint8_t virt_pin); + +#endif /* _VPIC_H_ */ diff --git a/hypervisor/include/arch/x86/hv_arch.h b/hypervisor/include/arch/x86/hv_arch.h new file mode 100644 index 000000000..4b099b544 --- /dev/null +++ b/hypervisor/include/arch/x86/hv_arch.h @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef HV_ARCH_H +#define HV_ARCH_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#endif /* HV_ARCH_H */ diff --git a/hypervisor/include/arch/x86/idt.h b/hypervisor/include/arch/x86/idt.h new file mode 100644 index 000000000..4cb5e11f9 --- /dev/null +++ b/hypervisor/include/arch/x86/idt.h @@ -0,0 +1,111 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef IDT_H +#define IDT_H + +/* + * IDT is defined in assembly so we handle exceptions as early as possible. + */ + +/* Interrupt Descriptor Table (LDT) selectors are 16 bytes on x86-64 instead of + * 8 bytes. + */ +#define X64_IDT_DESC_SIZE (0x10) +/* Number of the HOST IDT entries */ +#define HOST_IDT_ENTRIES (0x100) +/* Size of the IDT */ +#define HOST_IDT_SIZE (HOST_IDT_ENTRIES * X64_IDT_DESC_SIZE) + +#ifndef ASSEMBLER + +/* + * Definition of an 16 byte IDT selector. + */ +union idt_64_descriptor { + uint64_t value; + struct { + union { + uint32_t value; + struct { + uint32_t offset_15_0:16; + uint32_t segment_sel:16; + } bits; + } low32; + union { + uint32_t value; + struct { + uint32_t ist:3; + uint32_t bit_3_clr:1; + uint32_t bit_4_clr:1; + uint32_t bits_5_7_clr:3; + uint32_t type:4; + uint32_t bit_12_clr:1; + uint32_t dpl:2; + uint32_t present:1; + uint32_t offset_31_16:16; + } bits; + } high32; + uint32_t offset_63_32; + uint32_t rsvd; + }; +} __aligned(8); + +/***************************************************************************** + * + * Definition of the IDT. + * + *****************************************************************************/ +struct host_idt { + union idt_64_descriptor host_idt_descriptors[HOST_IDT_ENTRIES]; +} __aligned(8); + +/* + * Definition of the IDT descriptor. + */ +struct host_idt_descriptor { + unsigned short len; + struct host_idt *idt; +} __packed; + +extern struct host_idt HOST_IDT; +extern struct host_idt_descriptor HOST_IDTR; + +static inline void set_idt(struct host_idt_descriptor *idtd) +{ + + asm volatile (" lidtq %[idtd]\n" : /* no output parameters */ + : /* input parameters */ + [idtd] "m"(*idtd)); +} + +#endif /* end #ifndef ASSEMBLER */ + +#endif /* IDT_H */ diff --git a/hypervisor/include/arch/x86/intr_ctx.h b/hypervisor/include/arch/x86/intr_ctx.h new file mode 100644 index 000000000..76edeba2a --- /dev/null +++ b/hypervisor/include/arch/x86/intr_ctx.h @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef INTR_CTX_H +#define INTR_CTX_H + +/* + * Definition of the stack frame layout + */ +struct intr_ctx { + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + uint64_t rbx; + uint64_t rbp; + + uint64_t rax; + uint64_t rcx; + uint64_t rdx; + uint64_t rsi; + uint64_t rdi; + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + + uint64_t vector; + uint64_t error_code; + uint64_t rip; + uint64_t cs; + uint64_t rflags; + uint64_t rsp; + uint64_t ss; +}; + +#endif /* INTR_CTX_H */ diff --git a/hypervisor/include/arch/x86/io.h b/hypervisor/include/arch/x86/io.h new file mode 100644 index 000000000..53be42306 --- /dev/null +++ b/hypervisor/include/arch/x86/io.h @@ -0,0 +1,622 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef IO_H +#define IO_H + +/* Definition of a IO port range */ +struct vm_io_range { + uint16_t base; /* IO port base */ + uint16_t len; /* IO port range */ + int flags; /* IO port attributes */ +}; + +/* Write 1 byte to specified I/O port */ +static inline void io_write_byte(uint8_t value, uint16_t port) +{ + asm volatile ("outb %0,%1"::"a" (value), "dN"(port)); +} + +/* Read 1 byte from specified I/O port */ +static inline uint8_t io_read_byte(uint16_t port) +{ + uint8_t value; + + asm volatile ("inb %1,%0":"=a" (value):"dN"(port)); + return value; +} + +/* Write 2 bytes to specified I/O port */ +static inline void io_write_word(uint16_t value, uint16_t port) +{ + asm volatile ("outw %0,%1"::"a" (value), "dN"(port)); +} + +/* Read 2 bytes from specified I/O port */ +static inline uint16_t io_read_word(uint16_t port) +{ + uint16_t value; + + asm volatile ("inw %1,%0":"=a" (value):"dN"(port)); + return value; +} + +/* Write 4 bytes to specified I/O port */ +static inline void io_write_long(uint32_t value, uint16_t port) +{ + asm volatile ("outl %0,%1"::"a" (value), "dN"(port)); +} + +/* Read 4 bytes from specified I/O port */ +static inline uint32_t io_read_long(uint16_t port) +{ + uint32_t value; + + asm volatile ("inl %1,%0":"=a" (value):"dN"(port)); + return value; +} + +static inline void io_write(uint32_t v, ioport_t addr, size_t sz) +{ + if (sz == 1) + io_write_byte(v, addr); + else if (sz == 2) + io_write_word(v, addr); + else + io_write_long(v, addr); +} + +static inline uint32_t io_read(ioport_t addr, size_t sz) +{ + if (sz == 1) + return io_read_byte(addr); + if (sz == 2) + return io_read_word(addr); + return io_read_long(addr); +} + +struct vm_io_handler; +struct vm; +struct vcpu; + +typedef +uint32_t (*io_read_fn_t)(struct vm_io_handler *, struct vm *, + ioport_t, size_t); + +typedef +void (*io_write_fn_t)(struct vm_io_handler *, struct vm *, + ioport_t, size_t, uint32_t); + +/* Describes a single IO handler description entry. */ +struct vm_io_handler_desc { + + /** The base address of the IO range for this description. */ + ioport_t addr; + /** The number of bytes covered by this description. */ + size_t len; + + /** A pointer to the "read" function. + * + * The read function is called from the hypervisor whenever + * a read access to a range described in "ranges" occur. + * The arguments to the callback are: + * + * - The address of the port to read from. + * - The width of the read operation (1,2 or 4). + * + * The implementation must return the ports content as + * byte, word or doubleword (depending on the width). + * + * If the pointer is null, a read of 1's is assumed. + */ + + io_read_fn_t io_read; + /** A pointer to the "write" function. + * + * The write function is called from the hypervisor code + * whenever a write access to a range described in "ranges" + * occur. The arguments to the callback are: + * + * - The address of the port to write to. + * - The width of the write operation (1,2 or 4). + * - The value to write as byte, word or doubleword + * (depending on the width) + * + * The implementation must write the value to the port. + * + * If the pointer is null, the write access is ignored. + */ + + io_write_fn_t io_write; +}; + +struct vm_io_handler { + struct vm_io_handler *next; + struct vm_io_handler_desc desc; +}; + +#define IO_ATTR_R 0 +#define IO_ATTR_RW 1 +#define IO_ATTR_NO_ACCESS 2 + +/* External Interfaces */ +int io_instr_handler(struct vcpu *vcpu); +void setup_io_bitmap(struct vm *vm); +void free_io_emulation_resource(struct vm *vm); +void register_io_emulation_handler(struct vm *vm, struct vm_io_range *range, + io_read_fn_t io_read_fn_ptr, + io_write_fn_t io_write_fn_ptr); +int dm_emulate_pio_post(struct vcpu *vcpu); + +/** Writes a 32 bit value to a memory mapped IO device. + * + * @param value The 32 bit value to write. + * @param addr The memory address to write to. + */ +static inline void mmio_write_long(uint32_t value, mmio_addr_t addr) +{ + *((uint32_t *)addr) = value; +} + +/** Writes a 16 bit value to a memory mapped IO device. + * + * @param value The 16 bit value to write. + * @param addr The memory address to write to. + */ +static inline void mmio_write_word(uint32_t value, mmio_addr_t addr) +{ + *((uint16_t *)addr) = value; +} + +/** Writes an 8 bit value to a memory mapped IO device. + * + * @param value The 8 bit value to write. + * @param addr The memory address to write to. + */ +static inline void mmio_write_byte(uint32_t value, mmio_addr_t addr) +{ + *((uint8_t *)addr) = value; +} + +/** Reads a 32 bit value from a memory mapped IO device. + * + * @param addr The memory address to read from. + * + * @return The 32 bit value read from the given address. + */ +static inline uint32_t mmio_read_long(mmio_addr_t addr) +{ + return *((uint32_t *)addr); +} + +/** Reads a 16 bit value from a memory mapped IO device. + * + * @param addr The memory address to read from. + * + * @return The 16 bit value read from the given address. + */ +static inline uint16_t mmio_read_word(mmio_addr_t addr) +{ + return *((uint16_t *)addr); +} + +/** Reads an 8 bit value from a memory mapped IO device. + * + * @param addr The memory address to read from. + * + * @return The 8 bit value read from the given address. + */ +static inline uint8_t mmio_read_byte(mmio_addr_t addr) +{ + return *((uint8_t *)addr); +} + +/** Sets bits in a 32 bit value from a memory mapped IO device. + * + * @param mask Contains the bits to set at the memory address. + * Bits set in this mask are set in the memory + * location. + * @param addr The memory address to read from/write to. + */ +static inline void mmio_or_long(uint32_t mask, mmio_addr_t addr) +{ + *((uint32_t *)addr) |= mask; +} + +/** Sets bits in a 16 bit value from a memory mapped IO device. + * + * @param mask Contains the bits to set at the memory address. + * Bits set in this mask are set in the memory + * location. + * @param addr The memory address to read from/write to. + */ +static inline void mmio_or_word(uint32_t mask, mmio_addr_t addr) +{ + *((uint16_t *)addr) |= mask; +} + +/** Sets bits in an 8 bit value from a memory mapped IO device. + * + * @param mask Contains the bits to set at the memory address. + * Bits set in this mask are set in the memory + * location. + * @param addr The memory address to read from/write to. + */ +static inline void mmio_or_byte(uint32_t mask, mmio_addr_t addr) +{ + *((uint8_t *)addr) |= mask; +} + +/** Clears bits in a 32 bit value from a memory mapped IO device. + * + * @param mask Contains the bits to clear at the memory address. + * Bits set in this mask are cleared in the memory + * location. + * @param addr The memory address to read from/write to. + */ +static inline void mmio_and_long(uint32_t mask, mmio_addr_t addr) +{ + *((uint32_t *)addr) &= ~mask; +} + +/** Clears bits in a 16 bit value from a memory mapped IO device. + * + * @param mask Contains the bits to clear at the memory address. + * Bits set in this mask are cleared in the memory + * location. + * @param addr The memory address to read from/write to. + */ +static inline void mmio_and_word(uint32_t mask, mmio_addr_t addr) +{ + *((uint16_t *)addr) &= ~mask; +} + +/** Clears bits in an 8 bit value from a memory mapped IO device. + * + * @param mask Contains the bits to clear at the memory address. + * Bits set in this mask are cleared in the memory + * location. + * @param addr The memory address to read from/write to. + */ +static inline void mmio_and_byte(uint32_t mask, mmio_addr_t addr) +{ + *((uint8_t *)addr) &= ~mask; +} + +/** Performs a read-modify-write cycle for a 32 bit value from a MMIO device. + * + * Reads a 32 bit value from a memory mapped IO device, sets and clears + * bits and writes the value back. If a bit is specified in both, the 'set' + * and in the 'clear' mask, it is undefined whether the resulting bit is set + * or cleared. + * + * @param set Contains the bits to set. Bits set in this mask + * are set at the memory address. + * @param clear Contains the bits to clear. Bits set in this + * mask are cleared at the memory address. + * @param addr The memory address to read from/write to. + */ +static inline void mmio_rmw_long(uint32_t set, uint32_t clear, mmio_addr_t addr) +{ + *((uint32_t *)addr) = + (*((uint32_t *)addr) & ~clear) | set; +} + +/** Performs a read-modify-write cycle for a 16 bit value from a MMIO device. + * + * Reads a 16 bit value from a memory mapped IO device, sets and clears + * bits and writes the value back. If a bit is specified in both, the 'set' + * and in the 'clear' mask, it is undefined whether the resulting bit is set + * or cleared. + * + * @param set Contains the bits to set. Bits set in this mask + * are set at the memory address. + * @param clear Contains the bits to clear. Bits set in this + * mask are cleared at the memory address. + * @param addr The memory address to read from/write to. + */ +static inline void mmio_rmw_word(uint32_t set, uint32_t clear, mmio_addr_t addr) +{ + *((uint16_t *)addr) = + (*((uint16_t *)addr) & ~clear) | set; +} + +/** Performs a read-modify-write cycle for an 8 bit value from a MMIO device. + * + * Reads an 8 bit value from a memory mapped IO device, sets and clears + * bits and writes the value back. If a bit is specified in both, the 'set' + * and in the 'clear' mask, it is undefined whether the resulting bit is set + * or cleared. + * + * @param set Contains the bits to set. Bits set in this mask + * are set at the memory address. + * @param clear Contains the bits to clear. Bits set in this + * mask are cleared at the memory address. + * @param addr The memory address to read from/write to. + */ +static inline void mmio_rmw_byte(uint32_t set, uint32_t clear, mmio_addr_t addr) +{ + *((uint8_t *)addr) = (*((uint8_t *)addr) & ~clear) | set; +} + +/** Writes a 32 bit value to a memory mapped IO device (ROM code version). + * + * @param value The 32 bit value to write. + * @param addr The memory address to write to. + */ +static inline void __mmio_write_long(uint32_t value, mmio_addr_t addr) +{ + *((uint32_t *)addr) = value; +} + +/** Writes a 16 bit value to a memory mapped IO device (ROM code version). + * + * @param value The 16 bit value to write. + * @param addr The memory address to write to. + */ +static inline void __mmio_write_word(uint32_t value, mmio_addr_t addr) +{ + *((uint16_t *)addr) = value; +} + +/** Writes an 8 bit value to a memory mapped IO device (ROM code version). + * + * @param value The 8 bit value to write. + * @param addr The memory address to write to. + */ +static inline void __mmio_write_byte(uint32_t value, mmio_addr_t addr) +{ + *((uint8_t *)addr) = value; +} + +/** Reads a 32 bit value from a memory mapped IO device (ROM code version). + * + * @param addr The memory address to read from. + * + * @return The 32 bit value read from the given address. + */ +static inline uint32_t __mmio_read_long(mmio_addr_t addr) +{ + return *((uint32_t *)addr); +} + +/** Reads a 16 bit value from a memory mapped IO device (ROM code version). + * + * @param addr The memory address to read from. + * + * @return The 16 bit value read from the given address. + */ +static inline uint16_t __mmio_read_word(mmio_addr_t addr) +{ + return *((uint16_t *)addr); +} + +/** Reads an 8 bit value from a memory mapped IO device (ROM code version). + * + * @param addr The memory address to read from. + * + * @return The 32 16 value read from the given address. + */ +static inline uint8_t __mmio_read_byte(mmio_addr_t addr) +{ + return *((uint8_t *)addr); +} + +/** Sets bits in a 32 bit value from a MMIO device (ROM code version). + * + * @param mask Contains the bits to set at the memory address. + * Bits set in this mask are set in the memory + * location. + * @param addr The memory address to read from/write to. + */ +static inline void __mmio_or_long(uint32_t mask, mmio_addr_t addr) +{ + *((uint32_t *)addr) |= mask; +} + +/** Sets bits in a 16 bit value from a MMIO device (ROM code version). + * + * @param mask Contains the bits to set at the memory address. + * Bits set in this mask are set in the memory + * location. + * @param addr The memory address to read from/write to. + */ +static inline void __mmio_or_word(uint32_t mask, mmio_addr_t addr) +{ + *((uint16_t *)addr) |= mask; +} + +/** Sets bits in an 8 bit value from a MMIO device (ROM code version). + * + * @param mask Contains the bits to set at the memory address. + * Bits set in this mask are set in the memory + * location. + * @param addr The memory address to read from/write to. + */ +static inline void __mmio_or_byte(uint32_t mask, mmio_addr_t addr) +{ + *((uint8_t *)addr) |= mask; +} + +/** Clears bits in a 32 bit value from a MMIO device (ROM code version). + * + * @param mask Contains the bits to clear at the memory address. + * Bits set in this mask are cleared in the memory + * location. + * @param addr The memory address to read from/write to. + */ +static inline void __mmio_and_long(uint32_t mask, mmio_addr_t addr) +{ + *((uint32_t *)addr) &= ~mask; +} + +/** Clears bits in a 16 bit value from a MMIO device (ROM code version). + * + * @param mask Contains the bits to clear at the memory address. + * Bits set in this mask are cleared in the memory + * location. + * @param addr The memory address to read from/write to. + */ +static inline void __mmio_and_word(uint32_t mask, mmio_addr_t addr) +{ + *((uint16_t *)addr) &= ~mask; +} + +/** Clears bits in an 8 bit value from a MMIO device (ROM code version). + * + * @param mask Contains the bits to clear at the memory address. + * Bits set in this mask are cleared in the memory + * location. + * @param addr The memory address to read from/write to. + */ +static inline void __mmio_and_byte(uint32_t mask, mmio_addr_t addr) +{ + *((uint8_t *)addr) &= ~mask; +} + +/** Performs a read-modify-write cycle for a 32 bit value from a MMIO device + * (ROM code version). + * + * Reads a 32 bit value from a memory mapped IO device, sets and clears + * bits and writes the value back. If a bit is specified in both, the 'set' + * and in the 'clear' mask, it is undefined whether the resulting bit is set + * or cleared. + * + * @param set Contains the bits to set. Bits set in this mask + * are set at the memory address. + * @param clear Contains the bits to clear. Bits set in this + * mask are cleared at the memory address. + * @param addr The memory address to read from/write to. + */ +static inline void +__mmio_rmw_long(uint32_t set, uint32_t clear, mmio_addr_t addr) +{ + *((uint32_t *)addr) = + (*((uint32_t *)addr) & ~clear) | set; +} + +/** Performs a read-modify-write cycle for a 16 bit value from a MMIO device + * (ROM code version). + * + * Reads a 16 bit value from a memory mapped IO device, sets and clears + * bits and writes the value back. If a bit is specified in both, the 'set' + * and in the 'clear' mask, it is undefined whether the resulting bit is set + * or cleared. + * + * @param set Contains the bits to set. Bits set in this mask + * are set at the memory address. + * @param clear Contains the bits to clear. Bits set in this + * mask are cleared at the memory address. + * @param addr The memory address to read from/write to. + */ +static inline void +__mmio_rmw_word(uint32_t set, uint32_t clear, mmio_addr_t addr) +{ + *((uint16_t *)addr) = + (*((uint16_t *)addr) & ~clear) | set; +} + +/** Performs a read-modify-write cycle for an 8 bit value from a MMIO device + * (ROM code version). + * + * Reads an 8 bit value from a memory mapped IO device, sets and clears + * bits and writes the value back. If a bit is specified in both, the 'set' + * and in the 'clear' mask, it is undefined whether the resulting bit is set + * or cleared. + * + * @param set Contains the bits to set. Bits set in this mask + * are set at the memory address. + * @param clear Contains the bits to clear. Bits set in this + * mask are cleared at the memory address. + * @param addr The memory address to read from/write to. + */ +static inline void +__mmio_rmw_byte(uint32_t set, uint32_t clear, mmio_addr_t addr) +{ + *((uint8_t *)addr) = (*((uint8_t *)addr) & ~clear) | set; +} + +/** Reads a 32 Bit memory mapped IO register, mask it and write it back into + * memory mapped IO register. + * + * @param addr The address of the memory mapped IO register. + * @param mask The mask to apply to the value read. + * @param value The 32 bit value to write. + */ +static inline void setl(mmio_addr_t addr, uint32_t mask, uint32_t value) +{ + mmio_write_long((mmio_read_long(addr) & ~mask) | value, addr); +} + +/** Reads a 16 Bit memory mapped IO register, mask it and write it back into + * memory mapped IO register. + * + * @param addr The address of the memory mapped IO register. + * @param mask The mask to apply to the value read. + * @param value The 16 bit value to write. + */ +static inline void setw(mmio_addr_t addr, uint32_t mask, uint32_t value) +{ + mmio_write_word((mmio_read_word(addr) & ~mask) | value, addr); +} + +/** Reads a 8 Bit memory mapped IO register, mask it and write it back into + * memory mapped IO register. + * + * @param addr The address of the memory mapped IO register. + * @param mask The mask to apply to the value read. + * @param value The 8 bit value to write. + */ +static inline void setb(mmio_addr_t addr, uint32_t mask, uint32_t value) +{ + mmio_write_byte((mmio_read_byte(addr) & ~mask) | value, addr); +} + +/* MMIO memory access types */ +enum mem_io_type { + HV_MEM_IO_READ = 0, + HV_MEM_IO_WRITE, +}; + +/* MMIO emulation related structures */ +#define MMIO_TRANS_VALID 1 +#define MMIO_TRANS_INVALID 0 +struct mem_io { + uint64_t paddr; /* Physical address being accessed */ + enum mem_io_type read_write; /* 0 = read / 1 = write operation */ + uint8_t access_size; /* Access size being emulated */ + uint8_t sign_extend_read; /* 1 if sign extension required for read */ + uint64_t value; /* Value read or value to write */ + uint8_t mmio_status; /* Indicates if this MMIO transaction is valid */ + /* Used to store emulation context for this mmio transaction */ + void *private_data; +}; + +#endif /* _IO_H defined */ diff --git a/hypervisor/include/arch/x86/ioapic.h b/hypervisor/include/arch/x86/ioapic.h new file mode 100644 index 000000000..17ea8bcdd --- /dev/null +++ b/hypervisor/include/arch/x86/ioapic.h @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef IOAPIC_H +#define IOAPIC_H + +/* IOAPIC_MAX_LINES is architecturally defined. + * The usable RTEs may be a subset of the total on a per IO APIC basis. + */ +#define IOAPIC_MAX_LINES 120 +#define NR_LEGACY_IRQ 16 +#define NR_MAX_GSI (NR_IOAPICS*IOAPIC_MAX_LINES) + +#define GSI_MASK_IRQ(irq) irq_gsi_mask_unmask((irq), true) +#define GSI_UNMASK_IRQ(irq) irq_gsi_mask_unmask((irq), false) +#define GSI_SET_RTE(irq, rte) ioapic_set_rte((irq), (rte)) + +void setup_ioapic_irq(void); +int get_ioapic_info(char *str, int str_max_len); + +bool irq_is_gsi(int irq); +int irq_gsi_num(void); +int irq_to_pin(int irq); +int pin_to_irq(int pin); +void irq_gsi_mask_unmask(int irq, bool mask); +void ioapic_set_rte(int irq, uint64_t rte); +void ioapic_get_rte(int irq, uint64_t *rte); + +extern uint16_t legacy_irq_to_pin[]; +#endif /* IOAPIC_H */ diff --git a/hypervisor/include/arch/x86/irq.h b/hypervisor/include/arch/x86/irq.h new file mode 100644 index 000000000..854e0403d --- /dev/null +++ b/hypervisor/include/arch/x86/irq.h @@ -0,0 +1,164 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef IRQ_H +#define IRQ_H + +/* vectors for normal, usually for devices */ +#define VECTOR_FOR_NOR_LOWPRI_START 0x20 +#define VECTOR_FOR_NOR_LOWPRI_END 0x7F +#define VECTOR_FOR_NOR_HIGHPRI_START 0x80 +#define VECTOR_FOR_NOR_HIGHPRI_END 0xDF +#define VECTOR_FOR_NOR_END VECTOR_FOR_NOR_HIGHPRI_END + +#define VECTOR_FOR_INTR_START VECTOR_FOR_NOR_LOWPRI_START + +/* vectors for priority, usually for HV service */ +#define VECTOR_FOR_PRI_START 0xE0 +#define VECTOR_FOR_PRI_END 0xFF +#define VECTOR_TIMER 0xEF +#define VECTOR_NOTIFY_VCPU 0xF0 +#define VECTOR_VIRT_IRQ_VHM 0xF7 +#define VECTOR_SPURIOUS 0xFF + +#define NR_MAX_VECTOR 0xFF +#define VECTOR_INVALID (NR_MAX_VECTOR + 1) +#define IRQ_INVALID (NR_MAX_IRQS+1) + +#define NR_MAX_IRQS (256+16) +#define DEFAULT_DEST_MODE IOAPIC_RTE_DESTLOG +#define DEFAULT_DELIVERY_MODE IOAPIC_RTE_DELLOPRI +#define ALL_CPUS_MASK ((1 << phy_cpu_num) - 1) + +struct irq_desc; + +enum irq_mode { + IRQ_PULSE, + IRQ_ASSERT, + IRQ_DEASSERT, +}; + +enum irq_state { + IRQ_NOT_ASSIGNED = 0, + IRQ_ASSIGNED_SHARED, + IRQ_ASSIGNED_NOSHARE, +}; + +enum irq_desc_state { + IRQ_DESC_PENDING, + IRQ_DESC_IN_PROCESS, +}; + +typedef int (*dev_handler_t)(int irq, void*); +struct dev_handler_node { + char name[32]; + void *dev_data; + dev_handler_t dev_handler; + struct dev_handler_node *next; + struct irq_desc *desc; +}; + +struct irq_routing_entry { + unsigned short bdf; /* BDF */ + int irq; /* PCI cfg offset 0x3C: IRQ pin */ + int intx; /* PCI cfg offset 0x3D: 0-3 = INTA,INTB,INTC,INTD*/ +}; + +int irq_mark_used(int irq); +int irq_alloc(void); + +int irq_desc_alloc_vector(int irq, bool lowpri); +void irq_desc_try_free_vector(int irq); + +int irq_to_vector(int irq); +int dev_to_irq(struct dev_handler_node *node); +int dev_to_vector(struct dev_handler_node *node); + +int handle_level_interrupt_common(struct irq_desc *desc, void *handler_data); +int common_handler_edge(struct irq_desc *desc, void *handler_data); +int common_dev_handler_level(struct irq_desc *desc, void *handler_data); +int quick_handler_nolock(struct irq_desc *desc, void *handler_data); + +typedef int (*irq_handler_t)(struct irq_desc*, void*); +void update_irq_handler(int irq, irq_handler_t func); + +int init_default_irqs(unsigned int cpu); + +int dispatch_interrupt(struct intr_ctx *ctx); + +struct dev_handler_node* +pri_register_handler(int irq, + int vector, + dev_handler_t func, + void *dev_data, + const char *name); + +struct dev_handler_node* +normal_register_handler(int irq, + dev_handler_t func, + void *dev_data, + bool share, + bool lowpri, + const char *name); +void unregister_handler_common(struct dev_handler_node *node); + +int get_cpu_interrupt_info(char *str, int str_max); + +void setup_notification(void); + +typedef int (*spurious_handler_t)(int); +extern spurious_handler_t spurious_handler; + +/* + * Some MSI message definitions + */ +#define MSI_ADDR_MASK 0xfff00000 +#define MSI_ADDR_BASE 0xfee00000 +#define MSI_ADDR_RH 0x00000008 /* Redirection Hint */ +#define MSI_ADDR_LOG 0x00000004 /* Destination Mode */ + +/* RFLAGS */ +#define HV_ARCH_VCPU_RFLAGS_IF (1<<9) + +/* Interruptability State info */ +#define HV_ARCH_VCPU_BLOCKED_BY_MOVSS (1<<1) +#define HV_ARCH_VCPU_BLOCKED_BY_STI (1<<0) + +int vcpu_inject_extint(struct vcpu *vcpu); +int vcpu_inject_nmi(struct vcpu *vcpu); +int vcpu_inject_gp(struct vcpu *vcpu); +int vcpu_make_request(struct vcpu *vcpu, int eventid); + +int exception_handler(struct vcpu *vcpu); +int interrupt_win_exiting_handler(struct vcpu *vcpu); +int external_interrupt_handler(struct vcpu *vcpu); +int acrn_do_intr_process(struct vcpu *vcpu); +int interrupt_init(uint32_t logical_id); +#endif /* IRQ_H */ diff --git a/hypervisor/include/arch/x86/lapic.h b/hypervisor/include/arch/x86/lapic.h new file mode 100644 index 000000000..494ffd5b0 --- /dev/null +++ b/hypervisor/include/arch/x86/lapic.h @@ -0,0 +1,174 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef INTR_LAPIC_H +#define INTR_LAPIC_H + +#define DEBUG_LAPIC 0 + +enum intr_lapic_icr_delivery_mode { + INTR_LAPIC_ICR_FIXED = 0x0, + INTR_LAPIC_ICR_LP = 0x1, + INTR_LAPIC_ICR_SMI = 0x2, + INTR_LAPIC_ICR_NMI = 0x4, + INTR_LAPIC_ICR_INIT = 0x5, + INTR_LAPIC_ICR_STARTUP = 0x6, +}; + +enum intr_lapic_icr_dest_mode { + INTR_LAPIC_ICR_PHYSICAL = 0x0, + INTR_LAPIC_ICR_LOGICAL = 0x1 +}; + +enum intr_lapic_icr_level { + INTR_LAPIC_ICR_DEASSERT = 0x0, + INTR_LAPIC_ICR_ASSERT = 0x1, +}; + +enum intr_lapic_icr_trigger { + INTR_LAPIC_ICR_EDGE = 0x0, + INTR_LAPIC_ICR_LEVEL = 0x1, +}; + +enum intr_lapic_icr_shorthand { + INTR_LAPIC_ICR_USE_DEST_ARRAY = 0x0, + INTR_LAPIC_ICR_SELF = 0x1, + INTR_LAPIC_ICR_ALL_INC_SELF = 0x2, + INTR_LAPIC_ICR_ALL_EX_SELF = 0x3, +}; + +/* Default LAPIC base */ +#define LAPIC_BASE 0xFEE00000 + +/* LAPIC register offset for memory mapped IO access */ +#define LAPIC_ID_REGISTER 0x00000020 +#define LAPIC_VERSION_REGISTER 0x00000030 +#define LAPIC_TASK_PRIORITY_REGISTER 0x00000080 +#define LAPIC_ARBITRATION_PRIORITY_REGISTER 0x00000090 +#define LAPIC_PROCESSOR_PRIORITY_REGISTER 0x000000A0 +#define LAPIC_EOI_REGISTER 0x000000B0 +#define LAPIC_REMOTE_READ_REGISTER 0x000000C0 +#define LAPIC_LOGICAL_DESTINATION_REGISTER 0x000000D0 +#define LAPIC_DESTINATION_FORMAT_REGISTER 0x000000E0 +#define LAPIC_SPURIOUS_VECTOR_REGISTER 0x000000F0 +#define LAPIC_IN_SERVICE_REGISTER_0 0x00000100 +#define LAPIC_IN_SERVICE_REGISTER_1 0x00000110 +#define LAPIC_IN_SERVICE_REGISTER_2 0x00000120 +#define LAPIC_IN_SERVICE_REGISTER_3 0x00000130 +#define LAPIC_IN_SERVICE_REGISTER_4 0x00000140 +#define LAPIC_IN_SERVICE_REGISTER_5 0x00000150 +#define LAPIC_IN_SERVICE_REGISTER_6 0x00000160 +#define LAPIC_IN_SERVICE_REGISTER_7 0x00000170 +#define LAPIC_TRIGGER_MODE_REGISTER_0 0x00000180 +#define LAPIC_TRIGGER_MODE_REGISTER_1 0x00000190 +#define LAPIC_TRIGGER_MODE_REGISTER_2 0x000001A0 +#define LAPIC_TRIGGER_MODE_REGISTER_3 0x000001B0 +#define LAPIC_TRIGGER_MODE_REGISTER_4 0x000001C0 +#define LAPIC_TRIGGER_MODE_REGISTER_5 0x000001D0 +#define LAPIC_TRIGGER_MODE_REGISTER_6 0x000001E0 +#define LAPIC_TRIGGER_MODE_REGISTER_7 0x000001F0 +#define LAPIC_INT_REQUEST_REGISTER_0 0x00000200 +#define LAPIC_INT_REQUEST_REGISTER_1 0x00000210 +#define LAPIC_INT_REQUEST_REGISTER_2 0x00000220 +#define LAPIC_INT_REQUEST_REGISTER_3 0x00000230 +#define LAPIC_INT_REQUEST_REGISTER_4 0x00000240 +#define LAPIC_INT_REQUEST_REGISTER_5 0x00000250 +#define LAPIC_INT_REQUEST_REGISTER_6 0x00000260 +#define LAPIC_INT_REQUEST_REGISTER_7 0x00000270 +#define LAPIC_ERROR_STATUS_REGISTER 0x00000280 +#define LAPIC_LVT_CMCI_REGISTER 0x000002F0 +#define LAPIC_INT_COMMAND_REGISTER_0 0x00000300 +#define LAPIC_INT_COMMAND_REGISTER_1 0x00000310 +#define LAPIC_LVT_TIMER_REGISTER 0x00000320 +#define LAPIC_LVT_THERMAL_SENSOR_REGISTER 0x00000330 +#define LAPIC_LVT_PMC_REGISTER 0x00000340 +#define LAPIC_LVT_LINT0_REGISTER 0x00000350 +#define LAPIC_LVT_LINT1_REGISTER 0x00000360 +#define LAPIC_LVT_ERROR_REGISTER 0x00000370 +#define LAPIC_INITIAL_COUNT_REGISTER 0x00000380 +#define LAPIC_CURRENT_COUNT_REGISTER 0x00000390 +#define LAPIC_DIVIDE_CONFIGURATION_REGISTER 0x000003E0 + +/* LAPIC CPUID bit and bitmask definitions */ +#define CPUID_OUT_RDX_APIC_PRESENT ((uint64_t) 1 << 9) +#define CPUID_OUT_RCX_X2APIC_PRESENT ((uint64_t) 1 << 21) + +/* LAPIC MSR bit and bitmask definitions */ +#define MSR_01B_XAPIC_GLOBAL_ENABLE ((uint64_t) 1 << 11) + +/* LAPIC register bit and bitmask definitions */ +#define LAPIC_SVR_VECTOR 0x000000FF +#define LAPIC_SVR_APIC_ENABLE_MASK 0x00000100 + +#define LAPIC_LVT_MASK 0x00010000 +#define LAPIC_DELIVERY_MODE_EXTINT_MASK 0x00000700 + +/* LAPIC Timer bit and bitmask definitions */ +#define LAPIC_TMR_ONESHOT ((uint32_t) 0x0 << 17) +#define LAPIC_TMR_PERIODIC ((uint32_t) 0x1 << 17) +#define LAPIC_TMR_TSC_DEADLINE ((uint32_t) 0x2 << 17) + +enum intr_cpu_startup_shorthand { + INTR_CPU_STARTUP_USE_DEST, + INTR_CPU_STARTUP_ALL_EX_SELF, + INTR_CPU_STARTUP_UNKNOWN, +}; + +union lapic_id { + uint32_t value; + struct { + uint8_t xapic_id; + uint8_t rsvd[3]; + } xapic; + union { + uint32_t value; + struct { + uint8_t xapic_id; + uint8_t xapic_edid; + uint8_t rsvd[2]; + } ioxapic_view; + struct { + uint32_t x2apic_id:4; + uint32_t x2apic_cluster:28; + } ldr_view; + } x2apic; +}; + +int early_init_lapic(void); +int init_lapic(uint32_t cpu_id); +int send_lapic_eoi(void); +uint32_t get_cur_lapic_id(void); +int send_startup_ipi(enum intr_cpu_startup_shorthand cpu_startup_shorthand, + uint32_t cpu_startup_dest, + paddr_t cpu_startup_start_address); +/* API to send an IPI to a single guest */ +void send_single_ipi(uint32_t pcpu_id, uint32_t vector); + +#endif /* INTR_LAPIC_H */ diff --git a/hypervisor/include/arch/x86/mmu.h b/hypervisor/include/arch/x86/mmu.h new file mode 100644 index 000000000..c11703362 --- /dev/null +++ b/hypervisor/include/arch/x86/mmu.h @@ -0,0 +1,394 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MMU_H +#define MMU_H + +/* Size of all page-table entries (in bytes) */ +#define IA32E_COMM_ENTRY_SIZE 8 + +/* Definitions common for all IA-32e related paging entries */ +#define IA32E_COMM_P_BIT 0x0000000000000001 +#define IA32E_COMM_RW_BIT 0x0000000000000002 +#define IA32E_COMM_US_BIT 0x0000000000000004 +#define IA32E_COMM_PWT_BIT 0x0000000000000008 +#define IA32E_COMM_PCD_BIT 0x0000000000000010 +#define IA32E_COMM_A_BIT 0x0000000000000020 +#define IA32E_COMM_XD_BIT 0x8000000000000000 + +/* Defines for EPT paging entries */ +#define IA32E_EPT_R_BIT 0x0000000000000001 +#define IA32E_EPT_W_BIT 0x0000000000000002 +#define IA32E_EPT_X_BIT 0x0000000000000004 +#define IA32E_EPT_UNCACHED (0<<3) +#define IA32E_EPT_WC (1<<3) +#define IA32E_EPT_WT (4<<3) +#define IA32E_EPT_WP (5<<3) +#define IA32E_EPT_WB (6<<3) +#define IA32E_EPT_PAT_IGNORE 0x0000000000000040 +#define IA32E_EPT_ACCESS_FLAG 0x0000000000000100 +#define IA32E_EPT_DIRTY_FLAG 0x0000000000000200 +#define IA32E_EPT_SNOOP_CTRL 0x0000000000000800 +#define IA32E_EPT_SUPPRESS_VE 0x8000000000000000 + +/* Definitions common or ignored for all IA-32e related paging entries */ +#define IA32E_COMM_D_BIT 0x0000000000000040 +#define IA32E_COMM_G_BIT 0x0000000000000100 + +/* Definitions exclusive to a Page Map Level 4 Entry (PML4E) */ +#define IA32E_PML4E_INDEX_MASK_START 39 +#define IA32E_PML4E_ADDR_MASK 0x0000FF8000000000 + +/* Definitions exclusive to a Page Directory Pointer Table Entry (PDPTE) */ +#define IA32E_PDPTE_D_BIT 0x0000000000000040 +#define IA32E_PDPTE_PS_BIT 0x0000000000000080 +#define IA32E_PDPTE_PAT_BIT 0x0000000000001000 +#define IA32E_PDPTE_ADDR_MASK 0x0000FFFFC0000000 +#define IA32E_PDPTE_INDEX_MASK_START \ + (IA32E_PML4E_INDEX_MASK_START - IA32E_INDEX_MASK_BITS) + +/* Definitions exclusive to a Page Directory Entry (PDE) 1G or 2M */ +#define IA32E_PDE_D_BIT 0x0000000000000040 +#define IA32E_PDE_PS_BIT 0x0000000000000080 +#define IA32E_PDE_PAT_BIT 0x0000000000001000 +#define IA32E_PDE_ADDR_MASK 0x0000FFFFFFE00000 +#define IA32E_PDE_INDEX_MASK_START \ + (IA32E_PDPTE_INDEX_MASK_START - IA32E_INDEX_MASK_BITS) + +/* Definitions exclusive to Page Table Entries (PTE) */ +#define IA32E_PTE_D_BIT 0x0000000000000040 +#define IA32E_PTE_PAT_BIT 0x0000000000000080 +#define IA32E_PTE_G_BIT 0x0000000000000100 +#define IA32E_PTE_ADDR_MASK 0x0000FFFFFFFFF000 +#define IA32E_PTE_INDEX_MASK_START \ + (IA32E_PDE_INDEX_MASK_START - IA32E_INDEX_MASK_BITS) + +/** The 'Present' bit in a 32 bit paging page directory entry */ +#define MMU_32BIT_PDE_P 0x00000001 +/** The 'Read/Write' bit in a 32 bit paging page directory entry */ +#define MMU_32BIT_PDE_RW 0x00000002 +/** The 'User/Supervisor' bit in a 32 bit paging page directory entry */ +#define MMU_32BIT_PDE_US 0x00000004 +/** The 'Page Write Through' bit in a 32 bit paging page directory entry */ +#define MMU_32BIT_PDE_PWT 0x00000008 +/** The 'Page Cache Disable' bit in a 32 bit paging page directory entry */ +#define MMU_32BIT_PDE_PCD 0x00000010 +/** The 'Accessed' bit in a 32 bit paging page directory entry */ +#define MMU_32BIT_PDE_A 0x00000020 +/** The 'Dirty' bit in a 32 bit paging page directory entry */ +#define MMU_32BIT_PDE_D 0x00000040 +/** The 'Page Size' bit in a 32 bit paging page directory entry */ +#define MMU_32BIT_PDE_PS 0x00000080 +/** The 'Global' bit in a 32 bit paging page directory entry */ +#define MMU_32BIT_PDE_G 0x00000100 +/** The 'PAT' bit in a page 32 bit paging directory entry */ +#define MMU_32BIT_PDE_PAT 0x00001000 +/** The flag that indicates that the page fault was caused by a non present + * page. + */ +#define PAGE_FAULT_P_FLAG 0x00000001 +/** The flag that indicates that the page fault was caused by a write access. */ +#define PAGE_FAULT_WR_FLAG 0x00000002 +/** The flag that indicates that the page fault was caused in user mode. */ +#define PAGE_FAULT_US_FLAG 0x00000004 +/** The flag that indicates that the page fault was caused by a reserved bit + * violation. + */ +#define PAGE_FAULT_RSVD_FLAG 0x00000008 +/** The flag that indicates that the page fault was caused by an instruction + * fetch. + */ +#define PAGE_FAULT_ID_FLAG 0x00000010 + +/* Defines used for common memory sizes */ +#define MEM_1K 1024UL +#define MEM_2K (MEM_1K * 2UL) +#define MEM_4K (MEM_1K * 4UL) +#define MEM_8K (MEM_1K * 8UL) +#define MEM_16K (MEM_1K * 16UL) +#define MEM_32K (MEM_1K * 32UL) +#define MEM_64K (MEM_1K * 64UL) +#define MEM_128K (MEM_1K * 128UL) +#define MEM_256K (MEM_1K * 256UL) +#define MEM_512K (MEM_1K * 512UL) +#define MEM_1M (MEM_1K * 1024UL) +#define MEM_2M (MEM_1M * 2UL) +#define MEM_4M (MEM_1M * 4UL) +#define MEM_8M (MEM_1M * 8UL) +#define MEM_16M (MEM_1M * 16UL) +#define MEM_32M (MEM_1M * 32UL) +#define MEM_64M (MEM_1M * 64UL) +#define MEM_128M (MEM_1M * 128UL) +#define MEM_256M (MEM_1M * 256UL) +#define MEM_512M (MEM_1M * 512UL) +#define MEM_1G (MEM_1M * 1024UL) +#define MEM_2G (MEM_1G * 2UL) +#define MEM_3G (MEM_1G * 3UL) +#define MEM_4G (MEM_1G * 4UL) +#define MEM_5G (MEM_1G * 5UL) +#define MEM_6G (MEM_1G * 6UL) + +#ifndef ASSEMBLER + +/* Define cache line size (in bytes) */ +#define CACHE_LINE_SIZE 64 + +/* Size of all page structures for IA-32e */ +#define IA32E_STRUCT_SIZE MEM_4K + +/* IA32E Paging constants */ +#define IA32E_INDEX_MASK_BITS 9 +#define IA32E_NUM_ENTRIES 512 +#define IA32E_INDEX_MASK (uint64_t)(IA32E_NUM_ENTRIES - 1) +#define IA32E_REF_MASK 0x7FFFFFFFFFFFF000 +#define IA32E_FIRST_BLOCK_INDEX 1 + +/* Macro to get PML4 index given an address */ +#define IA32E_PML4E_INDEX_CALC(address) \ + (uint32_t)((((uint64_t)address >> IA32E_PML4E_INDEX_MASK_START) & \ + IA32E_INDEX_MASK) * sizeof(uint64_t)) + +/* Macro to get PDPT index given an address */ +#define IA32E_PDPTE_INDEX_CALC(address) \ + (uint32_t)((((uint64_t)address >> IA32E_PDPTE_INDEX_MASK_START) & \ + IA32E_INDEX_MASK) * sizeof(uint64_t)) + +/* Macro to get PD index given an address */ +#define IA32E_PDE_INDEX_CALC(address) \ + (uint32_t)((((uint64_t)address >> IA32E_PDE_INDEX_MASK_START) & \ + IA32E_INDEX_MASK) * sizeof(uint64_t)) + +/* Macro to get PT index given an address */ +#define IA32E_PTE_INDEX_CALC(address) \ + (uint32_t)((((uint64_t)address >> IA32E_PTE_INDEX_MASK_START) & \ + IA32E_INDEX_MASK) * sizeof(uint64_t)) + +/* Macro to obtain a 2 MB page offset from given linear address */ +#define IA32E_GET_2MB_PG_OFFSET(address) \ + (address & 0x001FFFFF) + +/* Macro to obtain a 4KB page offset from given linear address */ +#define IA32E_GET_4KB_PG_OFFSET(address) \ + (address & 0x00000FFF) + +/* + * The following generic attributes MMU_MEM_ATTR_FLAG_xxx may be OR'd with one + * and only one of the MMU_MEM_ATTR_TYPE_xxx definitions + */ + +/* Generic memory attributes */ +#define MMU_MEM_ATTR_READ 0x00000001 +#define MMU_MEM_ATTR_WRITE 0x00000002 +#define MMU_MEM_ATTR_EXECUTE 0x00000004 +#define MMU_MEM_ATTR_USER 0x00000008 +#define MMU_MEM_ATTR_WB_CACHE 0x00000040 +#define MMU_MEM_ATTR_WT_CACHE 0x00000080 +#define MMU_MEM_ATTR_UNCACHED 0x00000100 +#define MMU_MEM_ATTR_WC 0x00000200 +#define MMU_MEM_ATTR_WP 0x00000400 + +/* Definitions for memory types related to x64 */ +#define MMU_MEM_ATTR_BIT_READ_WRITE IA32E_COMM_RW_BIT +#define MMU_MEM_ATTR_BIT_USER_ACCESSIBLE IA32E_COMM_US_BIT +#define MMU_MEM_ATTR_BIT_EXECUTE_DISABLE IA32E_COMM_XD_BIT + +/* Selection of Page Attribute Table (PAT) entries with PAT, PCD and PWT + * encoding. See also pat.h + */ +/* Selects PAT0 WB */ +#define MMU_MEM_ATTR_TYPE_CACHED_WB (0x0000000000000000) +/* Selects PAT1 WT */ +#define MMU_MEM_ATTR_TYPE_CACHED_WT (IA32E_COMM_PWT_BIT) +/* Selects PAT2 UCM */ +#define MMU_MEM_ATTR_TYPE_UNCACHED_MINUS (IA32E_COMM_PCD_BIT) +/* Selects PAT3 UC */ +#define MMU_MEM_ATTR_TYPE_UNCACHED \ + (IA32E_COMM_PCD_BIT | IA32E_COMM_PWT_BIT) +/* Selects PAT6 WC */ +#define MMU_MEM_ATTR_TYPE_WRITE_COMBINED \ + (IA32E_PDPTE_PAT_BIT | IA32E_COMM_PCD_BIT) +/* Selects PAT7 WP */ +#define MMU_MEM_ATTR_TYPE_WRITE_PROTECTED \ + (IA32E_PDPTE_PAT_BIT | IA32E_COMM_PCD_BIT | IA32E_COMM_PWT_BIT) + +#define ROUND_PAGE_UP(addr) (((addr) + CPU_PAGE_SIZE - 1) & IA32E_REF_MASK) +#define ROUND_PAGE_DOWN(addr) ((addr) & IA32E_REF_MASK) + +struct map_params { + /* enum _page_table_type: HOST or EPT*/ + int page_table_type; + /* used HVA->HPA for HOST, used GPA->HPA for EPT */ + void *pml4_base; + /* used HPA->HVA for HOST, used HPA->GPA for EPT */ + void *pml4_inverted; +}; +struct entry_params { + uint32_t entry_level; + uint32_t entry_present; + uint64_t entry_base; + uint64_t entry_off; + uint64_t entry_val; + uint64_t page_size; +}; + +enum _page_table_type { + PT_HOST = 0, /* Mapping for hypervisor */ + PT_EPT = 1, + PAGETABLE_TYPE_UNKNOWN, +}; + +/* Represent the 4 levels of translation tables in IA-32e paging mode */ +enum _page_table_level { + IA32E_PML4 = 0, + IA32E_PDPT = 1, + IA32E_PD = 2, + IA32E_PT = 3, + IA32E_UNKNOWN, +}; + +/* Page table entry present */ +enum _page_table_present { + PT_NOT_PRESENT = 0, + PT_PRESENT = 1, +}; + +/* Page size */ +#define PAGE_SIZE_4K MEM_4K +#define PAGE_SIZE_2M MEM_2M +#define PAGE_SIZE_1G MEM_1G + +/* Macros for reading/writing memory */ +#define MEM_READ8(addr) (*(volatile uint8_t *)(addr)) +#define MEM_WRITE8(addr, data) \ + (*(volatile uint8_t *)(addr) = (uint8_t)(data)) +#define MEM_READ16(addr) (*(volatile uint16_t *)(addr)) +#define MEM_WRITE16(addr, data) \ + (*(volatile uint16_t *)(addr) = (uint16_t)(data)) +#define MEM_READ32(addr) (*(volatile uint32_t *)(addr)) +#define MEM_WRITE32(addr, data) \ + (*(volatile uint32_t *)(addr) = (uint32_t)(data)) +#define MEM_READ64(addr) (*(volatile uint64_t *)(addr)) +#define MEM_WRITE64(addr, data) \ + (*(volatile uint64_t *)(addr) = (uint64_t)(data)) + +/* Typedef for MMIO handler and range check routine */ +typedef int(*hv_mem_io_handler_t)(struct vcpu *, struct mem_io *, void *); + +/* Structure for MMIO handler node */ +struct mem_io_node { + hv_mem_io_handler_t read_write; + void *handler_private_data; + struct list_head list; + uint64_t range_start; + uint64_t range_end; +}; + +void *get_paging_pml4(void); +void *alloc_paging_struct(); +void enable_paging(void *pml4_base_addr); +void init_paging(void); +void map_mem(struct map_params *map_params, void *paddr, void *vaddr, + uint64_t size, uint32_t flags); +void unmap_mem(struct map_params *map_params, void *paddr, void *vaddr, + uint64_t size, uint32_t flags); +void modify_mem(struct map_params *map_params, void *paddr, void *vaddr, + uint64_t size, uint32_t flags); +void mmu_invept(struct vcpu *vcpu); +void obtain_last_page_table_entry(struct map_params *map_params, + struct entry_params *entry, void *addr, bool direct); + +int register_mmio_emulation_handler(struct vm *vm, + hv_mem_io_handler_t read_write, uint64_t start, + uint64_t end, void *handler_private_data); + +void unregister_mmio_emulation_handler(struct vm *vm, uint64_t start, + uint64_t end); + +#pragma pack(1) + +/** Defines a single entry in an E820 memory map. */ +struct e820_entry { + /** The base address of the memory range. */ + uint64_t baseaddr; + /** The length of the memory range. */ + uint64_t length; + /** The type of memory region. */ + uint32_t type; +}; + +#pragma pack() + +/* E820 memory types */ +#define E820_TYPE_RAM 1 /* EFI 1, 2, 3, 4, 5, 6, 7 */ +#define E820_TYPE_RESERVED 2 +/* EFI 0, 11, 12, 13 (everything not used elsewhere) */ +#define E820_TYPE_ACPI_RECLAIM 3 /* EFI 9 */ +#define E820_TYPE_ACPI_NVS 4 /* EFI 10 */ +#define E820_TYPE_UNUSABLE 5 /* EFI 8 */ + +/** Calculates the page table address for a given address. + * + * @param pd The base address of the page directory. + * @param vaddr The virtual address to calculate the page table address for. + * + * @return A pointer to the page table for the specified virtual address. + * + */ +static inline void *mmu_pt_for_pde(uint32_t *pd, uint32_t vaddr) +{ + return pd + ((vaddr >> 22) + 1) * 1024; +} + +#define CACHE_FLUSH_INVALIDATE_ALL() \ +{ \ + asm volatile (" wbinvd\n" : : : "memory"); \ +} + +/* External variable declarations */ +extern uint8_t CPU_Boot_Page_Tables_Start_VM[]; + +/* External Interfaces */ +int is_ept_supported(void); +void *create_guest_paging(struct vm *vm); +void destroy_ept(struct vm *vm); +uint64_t gpa2hpa(struct vm *vm, uint64_t gpa); +uint64_t gpa2hpa_check(struct vm *vm, uint64_t gpa, + uint64_t size, int *found, bool assert); +uint64_t hpa2gpa(struct vm *vm, uint64_t hpa); +int ept_mmap(struct vm *vm, uint64_t hpa, + uint64_t gpa, uint64_t size, uint32_t type, uint32_t prot); + +int ept_violation_handler(struct vcpu *vcpu); +int ept_misconfig_handler(struct vcpu *vcpu); +int dm_emulate_mmio_post(struct vcpu *vcpu); + +#endif /* ASSEMBLER not defined */ + +#endif /* MMU_H */ diff --git a/hypervisor/include/arch/x86/msr.h b/hypervisor/include/arch/x86/msr.h new file mode 100644 index 000000000..dd618fa87 --- /dev/null +++ b/hypervisor/include/arch/x86/msr.h @@ -0,0 +1,563 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MSR_H +#define MSR_H + +/* architectural (common) MSRs */ +#define MSR_IA32_P5_MC_ADDR 0x00000000 +/* Machine check address for MC exception handler */ +#define MSR_IA32_P5_MC_TYPE 0x00000001 +/* Machine check error type for MC exception handler */ +#define MSR_IA32_MONITOR_FILTER_SIZE 0x00000006 +/* System coherence line size for MWAIT/MONITOR */ +#define MSR_IA32_TIME_STAMP_COUNTER 0x00000010 /* TSC as MSR */ +#define MSR_IA32_PLATFORM_ID 0x00000017 /* Platform ID */ +#define MSR_IA32_APIC_BASE 0x0000001B +/* Information about LAPIC */ +#define MSR_IA32_FEATURE_CONTROL 0x0000003A +/* Speculation Control */ +#define MSR_IA32_SPEC_CTRL 0x00000048 +/* Prediction Command */ +#define MSR_IA32_PRED_CMD 0x00000049 +/* Control Features in Intel 64 processor */ +#define MSR_IA32_ADJUST_TSC 0x0000003B /* Adjust TSC value */ +#define MSR_IA32_BIOS_UPDT_TRIG 0x00000079 +/* BIOS update trigger */ +#define MSR_IA32_BIOS_SIGN_ID 0x0000008B +/* BIOS update signature */ +#define MSR_IA32_SMM_MONITOR_CTL 0x0000009B +/* SMM monitor configuration */ +#define MSR_IA32_PMC0 0x000000C1 +/* General performance counter 0 */ +#define MSR_IA32_PMC1 0x000000C2 +/* General performance counter 1 */ +#define MSR_IA32_PMC2 0x000000C3 +/* General performance counter 2 */ +#define MSR_IA32_PMC3 0x000000C4 +/* General performance counter 3 */ +#define MSR_IA32_MPERF 0x000000E7 +/* Max. qualified performance clock counter */ +#define MSR_IA32_APERF 0x000000E8 +/* Actual performance clock counter */ +#define MSR_IA32_MTRR_CAP 0x000000FE /* MTRR capability */ +#define MSR_IA32_SYSENTER_CS 0x00000174 /* CS for sysenter */ +#define MSR_IA32_SYSENTER_ESP 0x00000175 /* ESP for sysenter */ +#define MSR_IA32_SYSENTER_EIP 0x00000176 /* EIP for sysenter */ +#define MSR_IA32_MCG_CAP 0x00000179 +/* Global machine check capability */ +#define MSR_IA32_MCG_STATUS 0x0000017A +/* Global machine check status */ +#define MSR_IA32_MCG_CTL 0x0000017B +/* Global machine check control */ +#define MSR_IA32_PERFEVTSEL0 0x00000186 +/* Performance Event Select Register 0 */ +#define MSR_IA32_PERFEVTSEL1 0x00000187 +/* Performance Event Select Register 1 */ +#define MSR_IA32_PERFEVTSEL2 0x00000188 +/* Performance Event Select Register 2 */ +#define MSR_IA32_PERFEVTSEL3 0x00000189 +/* Performance Event Select Register 3 */ +#define MSR_IA32_PERF_STATUS 0x00000198 +/* Current performance state */ +#define MSR_IA32_PERF_CTL 0x00000199 +/* Performance control */ +#define MSR_IA32_CLOCK_MODULATION 0x0000019A +/* Clock modulation control */ +#define MSR_IA32_THERM_INTERRUPT 0x0000019B +/* Thermal interrupt control */ +#define MSR_IA32_THERM_STATUS 0x0000019C +/* Thermal status information */ +#define MSR_IA32_MISC_ENABLE 0x000001A0 +/* Enable misc. processor features */ +#define MSR_IA32_ENERGY_PERF_BIAS 0x000001B0 +/* Performance energy bias hint */ +#define MSR_IA32_DEBUGCTL 0x000001D9 +/* Trace/Profile resource control */ +#define MSR_IA32_SMRR_PHYSBASE 0x000001F2 /* SMRR base address */ +#define MSR_IA32_SMRR_PHYSMASK 0x000001F3 /* SMRR range mask */ +#define MSR_IA32_PLATFORM_DCA_CAP 0x000001F8 /* DCA capability */ +#define MSR_IA32_CPU_DCA_CAP 0x000001F9 +/* Prefetch hint type capability */ +#define MSR_IA32_DCA_0_CAP 0x000001FA +/* DCA type 0 status/control */ +#define MSR_IA32_MTRR_PHYSBASE_0 0x00000200 +/* variable range MTRR base 0 */ +#define MSR_IA32_MTRR_PHYSMASK_0 0x00000201 +/* variable range MTRR mask 0 */ +#define MSR_IA32_MTRR_PHYSBASE_1 0x00000202 +/* variable range MTRR base 1 */ +#define MSR_IA32_MTRR_PHYSMASK_1 0x00000203 +/* variable range MTRR mask 1 */ +#define MSR_IA32_MTRR_PHYSBASE_2 0x00000204 +/* variable range MTRR base 2 */ +#define MSR_IA32_MTRR_PHYSMASK_2 0x00000205 +/* variable range MTRR mask 2 */ +#define MSR_IA32_MTRR_PHYSBASE_3 0x00000206 +/* variable range MTRR base 3 */ +#define MSR_IA32_MTRR_PHYSMASK_3 0x00000207 +/* variable range MTRR mask 3 */ +#define MSR_IA32_MTRR_PHYSBASE_4 0x00000208 +/* variable range MTRR base 4 */ +#define MSR_IA32_MTRR_PHYSMASK_4 0x00000209 +/* variable range MTRR mask 4 */ +#define MSR_IA32_MTRR_PHYSBASE_5 0x0000020A +/* variable range MTRR base 5 */ +#define MSR_IA32_MTRR_PHYSMASK_5 0x0000020B +/* variable range MTRR mask 5 */ +#define MSR_IA32_MTRR_PHYSBASE_6 0x0000020C +/* variable range MTRR base 6 */ +#define MSR_IA32_MTRR_PHYSMASK_6 0x0000020D +/* variable range MTRR mask 6 */ +#define MSR_IA32_MTRR_PHYSBASE_7 0x0000020E +/* variable range MTRR base 7 */ +#define MSR_IA32_MTRR_PHYSMASK_7 0x0000020F +/* variable range MTRR mask 7 */ +#define MSR_IA32_MTRR_PHYSBASE_8 0x00000210 +/* variable range MTRR base 8 */ +#define MSR_IA32_MTRR_PHYSMASK_8 0x00000211 +/* variable range MTRR mask 8 */ +#define MSR_IA32_MTRR_PHYSBASE_9 0x00000212 +/* variable range MTRR base 9 */ +#define MSR_IA32_MTRR_PHYSMASK_9 0x00000213 +/* variable range MTRR mask 9 */ +#define MSR_IA32_MTRR_FIX64K_00000 0x00000250 +/* fixed range MTRR 16K/0x00000 */ +#define MSR_IA32_MTRR_FIX16K_80000 0x00000258 +/* fixed range MTRR 16K/0x80000 */ +#define MSR_IA32_MTRR_FIX16K_A0000 0x00000259 +/* fixed range MTRR 16K/0xA0000 */ +#define MSR_IA32_MTRR_FIX4K_C0000 0x00000268 +/* fixed range MTRR 4K/0xC0000 */ +#define MSR_IA32_MTRR_FIX4K_C8000 0x00000269 +/* fixed range MTRR 4K/0xC8000 */ +#define MSR_IA32_MTRR_FIX4K_D0000 0x0000026A +/* fixed range MTRR 4K/0xD0000 */ +#define MSR_IA32_MTRR_FIX4K_D8000 0x0000026B +/* fixed range MTRR 4K/0xD8000 */ +#define MSR_IA32_MTRR_FIX4K_E0000 0x0000026C +/* fixed range MTRR 4K/0xE0000 */ +#define MSR_IA32_MTRR_FIX4K_E8000 0x0000026D +/* fixed range MTRR 4K/0xE8000 */ +#define MSR_IA32_MTRR_FIX4K_F0000 0x0000026E +/* fixed range MTRR 4K/0xF0000 */ +#define MSR_IA32_MTRR_FIX4K_F8000 0x0000026F +/* fixed range MTRR 4K/0xF8000 */ +#define MSR_IA32_PAT 0x00000277 /* PAT */ +#define MSR_IA32_MC0_CTL2 0x00000280 +/* Corrected error count threshold 0 */ +#define MSR_IA32_MC1_CTL2 0x00000281 +/* Corrected error count threshold 1 */ +#define MSR_IA32_MC2_CTL2 0x00000282 +/* Corrected error count threshold 2 */ +#define MSR_IA32_MC3_CTL2 0x00000283 +/* Corrected error count threshold 3 */ +#define MSR_IA32_MC4_CTL2 0x00000284 +/* Corrected error count threshold 4 */ +#define MSR_IA32_MC5_CTL2 0x00000285 +/* Corrected error count threshold 5 */ +#define MSR_IA32_MC6_CTL2 0x00000286 +/* Corrected error count threshold 6 */ +#define MSR_IA32_MC7_CTL2 0x00000287 +/* Corrected error count threshold 7 */ +#define MSR_IA32_MC8_CTL2 0x00000288 +/* Corrected error count threshold 8 */ +#define MSR_IA32_MC9_CTL2 0x00000289 +/* Corrected error count threshold 9 */ +#define MSR_IA32_MC10_CTL2 0x0000028A +/* Corrected error count threshold 10 */ +#define MSR_IA32_MC11_CTL2 0x0000028B +/* Corrected error count threshold 11 */ +#define MSR_IA32_MC12_CTL2 0x0000028C +/* Corrected error count threshold 12 */ +#define MSR_IA32_MC13_CTL2 0x0000028D +/* Corrected error count threshold 13 */ +#define MSR_IA32_MC14_CTL2 0x0000028E +/* Corrected error count threshold 14 */ +#define MSR_IA32_MC15_CTL2 0x0000028F +/* Corrected error count threshold 15 */ +#define MSR_IA32_MC16_CTL2 0x00000290 +/* Corrected error count threshold 16 */ +#define MSR_IA32_MC17_CTL2 0x00000291 +/* Corrected error count threshold 17 */ +#define MSR_IA32_MC18_CTL2 0x00000292 +/* Corrected error count threshold 18 */ +#define MSR_IA32_MC19_CTL2 0x00000293 +/* Corrected error count threshold 19 */ +#define MSR_IA32_MC20_CTL2 0x00000294 +/* Corrected error count threshold 20 */ +#define MSR_IA32_MC21_CTL2 0x00000295 +/* Corrected error count threshold 21 */ +#define MSR_IA32_MTRR_DEF_TYPE 0x000002FF +/* Default memory type/MTRR control */ +#define MSR_IA32_FIXED_CTR0 0x00000309 +/* Fixed-function performance counter 0 */ +#define MSR_IA32_FIXED_CTR1 0x0000030A +/* Fixed-function performance counter 1 */ +#define MSR_IA32_FIXED_CTR2 0x0000030B +/* Fixed-function performance counter 2 */ +#define MSR_IA32_PERF_CAPABILITIES 0x00000345 +/* Performance capability */ +#define MSR_IA32_FIXED_CTR_CTL 0x0000038D +/* Fixed-function performance counter control */ +#define MSR_IA32_PERF_GLOBAL_STATUS 0x0000038E +/* Global performance counter status */ +#define MSR_IA32_PERF_GLOBAL_CTRL 0x0000038F +/* Global performance counter control */ +#define MSR_IA32_PERF_GLOBAL_OVF_CTRL 0x00000390 +/* Global performance counter overflow control */ +#define MSR_IA32_PEBS_ENABLE 0x000003F1 /* PEBS control */ +#define MSR_IA32_MC0_CTL 0x00000400 /* MC 0 control */ +#define MSR_IA32_MC0_STATUS 0x00000401 /* MC 0 status */ +#define MSR_IA32_MC0_ADDR 0x00000402 /* MC 0 address */ +#define MSR_IA32_MC0_MISC 0x00000403 /* MC 0 misc. */ +#define MSR_IA32_MC1_CTL 0x00000404 /* MC 1 control */ +#define MSR_IA32_MC1_STATUS 0x00000405 /* MC 1 status */ +#define MSR_IA32_MC1_ADDR 0x00000406 /* MC 1 address */ +#define MSR_IA32_MC1_MISC 0x00000407 /* MC 1 misc. */ +#define MSR_IA32_MC2_CTL 0x00000408 /* MC 2 control */ +#define MSR_IA32_MC2_STATUS 0x00000409 /* MC 2 status */ +#define MSR_IA32_MC2_ADDR 0x0000040A /* MC 2 address */ +#define MSR_IA32_MC2_MISC 0x0000040B /* MC 2 misc. */ +#define MSR_IA32_MC3_CTL 0x0000040C /* MC 3 control */ +#define MSR_IA32_MC3_STATUS 0x0000040D /* MC 3 status */ +#define MSR_IA32_MC3_ADDR 0x0000040E /* MC 3 address */ +#define MSR_IA32_MC3_MISC 0x0000040F /* MC 3 misc. */ +#define MSR_IA32_MC4_CTL 0x00000410 /* MC 4 control */ +#define MSR_IA32_MC4_STATUS 0x00000411 /* MC 4 status */ +#define MSR_IA32_MC4_ADDR 0x00000412 /* MC 4 address */ +#define MSR_IA32_MC4_MISC 0x00000413 /* MC 4 misc. */ +#define MSR_IA32_MC5_CTL 0x00000414 /* MC 5 control */ +#define MSR_IA32_MC5_STATUS 0x00000415 /* MC 5 status */ +#define MSR_IA32_MC5_ADDR 0x00000416 /* MC 5 address */ +#define MSR_IA32_MC5_MISC 0x00000417 /* MC 5 misc. */ +#define MSR_IA32_MC6_CTL 0x00000418 /* MC 6 control */ +#define MSR_IA32_MC6_STATUS 0x00000419 /* MC 6 status */ +#define MSR_IA32_MC6_ADDR 0x0000041A /* MC 6 address */ +#define MSR_IA32_MC6_MISC 0x0000041B /* MC 6 misc. */ +#define MSR_IA32_MC7_CTL 0x0000041C /* MC 7 control */ +#define MSR_IA32_MC7_STATUS 0x0000041D /* MC 7 status */ +#define MSR_IA32_MC7_ADDR 0x0000041E /* MC 7 address */ +#define MSR_IA32_MC7_MISC 0x0000041F /* MC 7 misc. */ +#define MSR_IA32_MC8_CTL 0x00000420 /* MC 8 control */ +#define MSR_IA32_MC8_STATUS 0x00000421 /* MC 8 status */ +#define MSR_IA32_MC8_ADDR 0x00000422 /* MC 8 address */ +#define MSR_IA32_MC8_MISC 0x00000423 /* MC 8 misc. */ +#define MSR_IA32_MC9_CTL 0x00000424 /* MC 9 control */ +#define MSR_IA32_MC9_STATUS 0x00000425 /* MC 9 status */ +#define MSR_IA32_MC9_ADDR 0x00000426 /* MC 9 address */ +#define MSR_IA32_MC9_MISC 0x00000427 /* MC 9 misc. */ +#define MSR_IA32_MC10_CTL 0x00000428 /* MC 10 control */ +#define MSR_IA32_MC10_STATUS 0x00000429 /* MC 10 status */ +#define MSR_IA32_MC10_ADDR 0x0000042A /* MC 10 address */ +#define MSR_IA32_MC10_MISC 0x0000042B /* MC 10 misc. */ +#define MSR_IA32_MC11_CTL 0x0000042C /* MC 11 control */ +#define MSR_IA32_MC11_STATUS 0x0000042D /* MC 11 status */ +#define MSR_IA32_MC11_ADDR 0x0000042E /* MC 11 address */ +#define MSR_IA32_MC11_MISC 0x0000042F /* MC 11 misc. */ +#define MSR_IA32_MC12_CTL 0x00000430 /* MC 12 control */ +#define MSR_IA32_MC12_STATUS 0x00000431 /* MC 12 status */ +#define MSR_IA32_MC12_ADDR 0x00000432 /* MC 12 address */ +#define MSR_IA32_MC12_MISC 0x00000433 /* MC 12 misc. */ +#define MSR_IA32_MC13_CTL 0x00000434 /* MC 13 control */ +#define MSR_IA32_MC13_STATUS 0x00000435 /* MC 13 status */ +#define MSR_IA32_MC13_ADDR 0x00000436 /* MC 13 address */ +#define MSR_IA32_MC13_MISC 0x00000437 /* MC 13 misc. */ +#define MSR_IA32_MC14_CTL 0x00000438 /* MC 14 control */ +#define MSR_IA32_MC14_STATUS 0x00000439 /* MC 14 status */ +#define MSR_IA32_MC14_ADDR 0x0000043A /* MC 14 address */ +#define MSR_IA32_MC14_MISC 0x0000043B /* MC 14 misc. */ +#define MSR_IA32_MC15_CTL 0x0000043C /* MC 15 control */ +#define MSR_IA32_MC15_STATUS 0x0000043D /* MC 15 status */ +#define MSR_IA32_MC15_ADDR 0x0000043E /* MC 15 address */ +#define MSR_IA32_MC15_MISC 0x0000043F /* MC 15 misc. */ +#define MSR_IA32_MC16_CTL 0x00000440 /* MC 16 control */ +#define MSR_IA32_MC16_STATUS 0x00000441 /* MC 16 status */ +#define MSR_IA32_MC16_ADDR 0x00000442 /* MC 16 address */ +#define MSR_IA32_MC16_MISC 0x00000443 /* MC 16 misc. */ +#define MSR_IA32_MC17_CTL 0x00000444 /* MC 17 control */ +#define MSR_IA32_MC17_STATUS 0x00000445 /* MC 17 status */ +#define MSR_IA32_MC17_ADDR 0x00000446 /* MC 17 address */ +#define MSR_IA32_MC17_MISC 0x00000447 /* MC 17 misc. */ +#define MSR_IA32_MC18_CTL 0x00000448 /* MC 18 control */ +#define MSR_IA32_MC18_STATUS 0x00000449 /* MC 18 status */ +#define MSR_IA32_MC18_ADDR 0x0000044A /* MC 18 address */ +#define MSR_IA32_MC18_MISC 0x0000044B /* MC 18 misc. */ +#define MSR_IA32_MC19_CTL 0x0000044C /* MC 19 control */ +#define MSR_IA32_MC19_STATUS 0x0000044D /* MC 19 status */ +#define MSR_IA32_MC19_ADDR 0x0000044E /* MC 19 address */ +#define MSR_IA32_MC19_MISC 0x0000044F /* MC 19 misc. */ +#define MSR_IA32_MC20_CTL 0x00000450 /* MC 20 control */ +#define MSR_IA32_MC20_STATUS 0x00000451 /* MC 20 status */ +#define MSR_IA32_MC20_ADDR 0x00000452 /* MC 20 address */ +#define MSR_IA32_MC20_MISC 0x00000453 /* MC 20 misc. */ +#define MSR_IA32_MC21_CTL 0x00000454 /* MC 21 control */ +#define MSR_IA32_MC21_STATUS 0x00000455 /* MC 21 status */ +#define MSR_IA32_MC21_ADDR 0x00000456 /* MC 21 address */ +#define MSR_IA32_MC21_MISC 0x00000457 /* MC 21 misc. */ +#define MSR_IA32_VMX_BASIC 0x00000480 +/* Capability reporting register basic VMX capabilities */ +#define MSR_IA32_VMX_PINBASED_CTLS 0x00000481 +/* Capability reporting register pin based VM execution controls */ +#define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482 +/* Capability reporting register primary processor based VM execution controls*/ +#define MSR_IA32_VMX_EXIT_CTLS 0x00000483 +/* Capability reporting register VM exit controls */ +#define MSR_IA32_VMX_ENTRY_CTLS 0x00000484 +/* Capability reporting register VM entry controls */ +#define MSR_IA32_VMX_MISC 0x00000485 +/* Reporting register misc. VMX capabilities */ +#define MSR_IA32_VMX_CR0_FIXED0 0x00000486 +/* Capability reporting register of CR0 bits fixed to 0 */ +#define MSR_IA32_VMX_CR0_FIXED1 0x00000487 +/* Capability reporting register of CR0 bits fixed to 1 */ +#define MSR_IA32_VMX_CR4_FIXED0 0x00000488 +/* Capability reporting register of CR4 bits fixed to 0 */ +#define MSR_IA32_VMX_CR4_FIXED1 0x00000489 +/* Capability reporting register of CR4 bits fixed to 1 */ +#define MSR_IA32_VMX_VMCS_ENUM 0x0000048A +/* Capability reporting register of VMCS field enumeration */ +#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048B +/* Capability reporting register of secondary processor based VM execution + * controls + */ +#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048C +/* Capability reporting register of EPT and VPID */ +#define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x0000048D +/* Capability reporting register of pin based VM execution flex controls */ +#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048E +/* Capability reporting register of primary processor based VM execution flex + * controls + */ +#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048F +/* Capability reporting register of VM exit flex controls */ +#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 +/* Capability reporting register of VM entry flex controls */ +#define MSR_IA32_DS_AREA 0x00000600 /* DS save area */ +/* APIC TSC deadline MSR */ +#define MSR_IA32_TSC_DEADLINE 0x000006E0 +#define MSR_IA32_EXT_XAPICID 0x00000802 /* x2APIC ID */ +#define MSR_IA32_EXT_APIC_VERSION 0x00000803 /* x2APIC version */ +#define MSR_IA32_EXT_APIC_TPR 0x00000808 +/* x2APIC task priority */ +#define MSR_IA32_EXT_APIC_PPR 0x0000080A +/* x2APIC processor priority */ +#define MSR_IA32_EXT_APIC_EOI 0x0000080B /* x2APIC EOI */ +#define MSR_IA32_EXT_APIC_LDR 0x0000080D +/* x2APIC logical destination */ +#define MSR_IA32_EXT_APIC_SIVR 0x0000080F +/* x2APIC spurious interrupt vector */ +#define MSR_IA32_EXT_APIC_ISR0 0x00000810 +/* x2APIC in-service register 0 */ +#define MSR_IA32_EXT_APIC_ISR1 0x00000811 +/* x2APIC in-service register 1 */ +#define MSR_IA32_EXT_APIC_ISR2 0x00000812 +/* x2APIC in-service register 2 */ +#define MSR_IA32_EXT_APIC_ISR3 0x00000813 +/* x2APIC in-service register 3 */ +#define MSR_IA32_EXT_APIC_ISR4 0x00000814 +/* x2APIC in-service register 4 */ +#define MSR_IA32_EXT_APIC_ISR5 0x00000815 +/* x2APIC in-service register 5 */ +#define MSR_IA32_EXT_APIC_ISR6 0x00000816 +/* x2APIC in-service register 6 */ +#define MSR_IA32_EXT_APIC_ISR7 0x00000817 +/* x2APIC in-service register 7 */ +#define MSR_IA32_EXT_APIC_TMR0 0x00000818 +/* x2APIC trigger mode register 0 */ +#define MSR_IA32_EXT_APIC_TMR1 0x00000819 +/* x2APIC trigger mode register 1 */ +#define MSR_IA32_EXT_APIC_TMR2 0x0000081A +/* x2APIC trigger mode register 2 */ +#define MSR_IA32_EXT_APIC_TMR3 0x0000081B +/* x2APIC trigger mode register 3 */ +#define MSR_IA32_EXT_APIC_TMR4 0x0000081C +/* x2APIC trigger mode register 4 */ +#define MSR_IA32_EXT_APIC_TMR5 0x0000081D +/* x2APIC trigger mode register 5 */ +#define MSR_IA32_EXT_APIC_TMR6 0x0000081E +/* x2APIC trigger mode register 6 */ +#define MSR_IA32_EXT_APIC_TMR7 0x0000081F +/* x2APIC trigger mode register 7 */ +#define MSR_IA32_EXT_APIC_IRR0 0x00000820 +/* x2APIC interrupt request register 0 */ +#define MSR_IA32_EXT_APIC_IRR1 0x00000821 +/* x2APIC interrupt request register 1 */ +#define MSR_IA32_EXT_APIC_IRR2 0x00000822 +/* x2APIC interrupt request register 2 */ +#define MSR_IA32_EXT_APIC_IRR3 0x00000823 +/* x2APIC interrupt request register 3 */ +#define MSR_IA32_EXT_APIC_IRR4 0x00000824 +/* x2APIC interrupt request register 4 */ +#define MSR_IA32_EXT_APIC_IRR5 0x00000825 +/* x2APIC interrupt request register 5 */ +#define MSR_IA32_EXT_APIC_IRR6 0x00000826 +/* x2APIC interrupt request register 6 */ +#define MSR_IA32_EXT_APIC_IRR7 0x00000827 +/* x2APIC interrupt request register 7 */ +#define MSR_IA32_EXT_APIC_ESR 0x00000828 +/* x2APIC error status */ +#define MSR_IA32_EXT_APIC_LVT_CMCI 0x0000082F +/* x2APIC LVT corrected machine check interrupt register */ +#define MSR_IA32_EXT_APIC_ICR 0x00000830 +/* x2APIC interrupt command register */ +#define MSR_IA32_EXT_APIC_LVT_TIMER 0x00000832 +/* x2APIC LVT timer interrupt register */ +#define MSR_IA32_EXT_APIC_LVT_THERMAL 0x00000833 +/* x2APIC LVT thermal sensor interrupt register */ +#define MSR_IA32_EXT_APIC_LVT_PMI 0x00000834 +/* x2APIC LVT performance monitor interrupt register */ +#define MSR_IA32_EXT_APIC_LVT_LINT0 0x00000835 +/* x2APIC LVT LINT0 register */ +#define MSR_IA32_EXT_APIC_LVT_LINT1 0x00000836 +/* x2APIC LVT LINT1 register */ +#define MSR_IA32_EXT_APIC_LVT_ERROR 0x00000837 +/* x2APIC LVT error register */ +#define MSR_IA32_EXT_APIC_INIT_COUNT 0x00000838 +/* x2APIC initial count register */ +#define MSR_IA32_EXT_APIC_CUR_COUNT 0x00000839 +/* x2APIC current count register */ +#define MSR_IA32_EXT_APIC_DIV_CONF 0x0000083E +/* x2APIC divide configuration register */ +#define MSR_IA32_EXT_APIC_SELF_IPI 0x0000083F +/* x2APIC self IPI register */ +#define MSR_IA32_EFER 0xC0000080 +/* Extended feature enables */ +#define MSR_IA32_STAR 0xC0000081 +/* System call target address */ +#define MSR_IA32_LSTAR 0xC0000082 +/* IA-32e mode system call target address */ +#define MSR_IA32_FMASK 0xC0000084 +/* System call flag mask */ +#define MSR_IA32_FS_BASE 0xC0000100 +/* Map of BASE address of FS */ +#define MSR_IA32_GS_BASE 0xC0000101 +/* Map of BASE address of GS */ +#define MSR_IA32_KERNEL_GS_BASE 0xC0000102 +/* Swap target of BASE address of GS */ +#define MSR_IA32_TSC_AUX 0xC0000103 /* Auxiliary TSC */ + +/* ATOM specific MSRs */ +#define MSR_ATOM_EBL_CR_POWERON 0x0000002A +/* Processor hard power-on configuration */ +#define MSR_ATOM_LASTBRANCH_0_FROM_IP 0x00000040 +/* Last branch record 0 from IP */ +#define MSR_ATOM_LASTBRANCH_1_FROM_IP 0x00000041 +/* Last branch record 1 from IP */ +#define MSR_ATOM_LASTBRANCH_2_FROM_IP 0x00000042 +/* Last branch record 2 from IP */ +#define MSR_ATOM_LASTBRANCH_3_FROM_IP 0x00000043 +/* Last branch record 3 from IP */ +#define MSR_ATOM_LASTBRANCH_4_FROM_IP 0x00000044 +/* Last branch record 4 from IP */ +#define MSR_ATOM_LASTBRANCH_5_FROM_IP 0x00000045 +/* Last branch record 5 from IP */ +#define MSR_ATOM_LASTBRANCH_6_FROM_IP 0x00000046 +/* Last branch record 6 from IP */ +#define MSR_ATOM_LASTBRANCH_7_FROM_IP 0x00000047 +/* Last branch record 7 from IP */ +#define MSR_ATOM_LASTBRANCH_0_TO_LIP 0x00000060 +/* Last branch record 0 to IP */ +#define MSR_ATOM_LASTBRANCH_1_TO_LIP 0x00000061 +/* Last branch record 1 to IP */ +#define MSR_ATOM_LASTBRANCH_2_TO_LIP 0x00000062 +/* Last branch record 2 to IP */ +#define MSR_ATOM_LASTBRANCH_3_TO_LIP 0x00000063 +/* Last branch record 3 to IP */ +#define MSR_ATOM_LASTBRANCH_4_TO_LIP 0x00000064 +/* Last branch record 4 to IP */ +#define MSR_ATOM_LASTBRANCH_5_TO_LIP 0x00000065 +/* Last branch record 5 to IP */ +#define MSR_ATOM_LASTBRANCH_6_TO_LIP 0x00000066 +/* Last branch record 6 to IP */ +#define MSR_ATOM_LASTBRANCH_7_TO_LIP 0x00000067 +/* Last branch record 7 to IP */ +#define MSR_ATOM_FSB_FREQ 0x000000CD /* Scalable bus speed */ +#define MSR_PLATFORM_INFO 0x000000CE +/* Maximum resolved bus ratio */ +#define MSR_ATOM_BBL_CR_CTL3 0x0000011E /* L2 hardware enabled */ +#define MSR_ATOM_THERM2_CTL 0x0000019D +/* Mode of automatic thermal monitor */ +#define MSR_ATOM_LASTBRANCH_TOS 0x000001C9 +/* Last branch record stack TOS */ +#define MSR_ATOM_LER_FROM_LIP 0x000001DD +/* Last exception record from linear IP */ +#define MSR_ATOM_LER_TO_LIP 0x000001DE +/* Last exception record to linear IP */ + +/* LINCROFT specific MSRs */ +#define MSR_LNC_BIOS_CACHE_AS_RAM 0x000002E0 /* Configure CAR */ + +/* MSR_IA32_VMX_EPT_VPID_CAP: EPT and VPID capability bits */ +#define MSR_VMX_EPT_VPID_CAP_1GB (1UL << 17)/* EPT 1GB page */ +#define MSR_VMX_INVEPT (1UL << 20)/* INVEPT */ +#define MSR_VMX_INVEPT_SINGLE_CONTEXT (1UL << 25)/* INVEPT Single */ +#define MSR_VMX_INVEPT_GLOBAL_CONTEXT (1UL << 26)/* INVEPT Global */ +#define MSR_VMX_INVVPID (1UL << 32)/* INVVPID */ +#define MSR_VMX_INVVPID_SINGLE_CONTEXT (1UL << 41)/* INVVPID Single */ +#define MSR_VMX_INVVPID_GLOBAL_CONTEXT (1UL << 42)/* INVVPID Global */ + +/* EFER bits */ +#define MSR_IA32_EFER_SCE_BIT (1<<0) +#define MSR_IA32_EFER_LME_BIT (1<<8) /* IA32e mode enable */ +#define MSR_IA32_EFER_LMA_BIT (1<<10) /* IA32e mode active */ +#define MSR_IA32_EFER_NXE_BIT (1<<11) + +/* FEATURE CONTROL bits */ +#define MSR_IA32_FEATURE_CONTROL_LOCK (1<<0) +#define MSR_IA32_FEATURE_CONTROL_VMX_SMX (1<<1) +#define MSR_IA32_FEATURE_CONTROL_VMX_NO_SMX (1<<2) + +/* PAT memory type definitions */ +#define PAT_MEM_TYPE_UC 0x00 /* uncached */ +#define PAT_MEM_TYPE_WC 0x01 /* write combining */ +#define PAT_MEM_TYPE_WT 0x04 /* write through */ +#define PAT_MEM_TYPE_WP 0x05 /* write protected */ +#define PAT_MEM_TYPE_WB 0x06 /* writeback */ +#define PAT_MEM_TYPE_UCM 0x07 /* uncached minus */ + +/* MTRR memory type definitions */ +#define MTRR_MEM_TYPE_UC 0x00 /* uncached */ +#define MTRR_MEM_TYPE_WC 0x01 /* write combining */ +#define MTRR_MEM_TYPE_WT 0x04 /* write through */ +#define MTRR_MEM_TYPE_WP 0x05 /* write protected */ +#define MTRR_MEM_TYPE_WB 0x06 /* writeback */ + +/* misc. MTRR flag definitions */ +#define MTRR_ENABLE 0x800 /* MTRR enable */ +#define MTRR_FIX_ENABLE 0x400 /* fixed range MTRR enable */ +#define MTRR_VALID 0x800 /* MTRR setting is valid */ + +/* SPEC & PRED bit */ +#define SPEC_ENABLE_IBRS (1<<0) +#define SPEC_ENABLE_STIBP (1<<1) +#define PRED_SET_IBPB (1<<0) + +#endif /* MSR_H */ diff --git a/hypervisor/include/arch/x86/multiboot.h b/hypervisor/include/arch/x86/multiboot.h new file mode 100644 index 000000000..22ef4f375 --- /dev/null +++ b/hypervisor/include/arch/x86/multiboot.h @@ -0,0 +1,105 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MULTIBOOT_H +#define MULTIBOOT_H + +#define MULTIBOOT_INFO_MAGIC 0x2BADB002 +#define MULTIBOOT_INFO_HAS_CMDLINE 0x00000004 +#define MULTIBOOT_INFO_HAS_MODS 0x00000008 + +struct multiboot_info { + uint32_t mi_flags; + + /* Valid if mi_flags sets MULTIBOOT_INFO_HAS_MEMORY. */ + uint32_t mi_mem_lower; + uint32_t mi_mem_upper; + + /* Valid if mi_flags sets MULTIBOOT_INFO_HAS_BOOT_DEVICE. */ + uint8_t mi_boot_device_part3; + uint8_t mi_boot_device_part2; + uint8_t mi_boot_device_part1; + uint8_t mi_boot_device_drive; + + /* Valid if mi_flags sets MULTIBOOT_INFO_HAS_CMDLINE. */ + uint32_t mi_cmdline; + + /* Valid if mi_flags sets MULTIBOOT_INFO_HAS_MODS. */ + uint32_t mi_mods_count; + uint32_t mi_mods_addr; + + /* Valid if mi_flags sets MULTIBOOT_INFO_HAS_{AOUT,ELF}_SYMS. */ + uint32_t mi_elfshdr_num; + uint32_t mi_elfshdr_size; + uint32_t mi_elfshdr_addr; + uint32_t mi_elfshdr_shndx; + + /* Valid if mi_flags sets MULTIBOOT_INFO_HAS_MMAP. */ + uint32_t mi_mmap_length; + uint32_t mi_mmap_addr; + + /* Valid if mi_flags sets MULTIBOOT_INFO_HAS_DRIVES. */ + uint32_t mi_drives_length; + uint32_t mi_drives_addr; + + /* Valid if mi_flags sets MULTIBOOT_INFO_HAS_CONFIG_TABLE. */ + uint32_t unused_mi_config_table; + + /* Valid if mi_flags sets MULTIBOOT_INFO_HAS_LOADER_NAME. */ + uint32_t mi_loader_name; + + /* Valid if mi_flags sets MULTIBOOT_INFO_HAS_APM. */ + uint32_t unused_mi_apm_table; + + /* Valid if mi_flags sets MULTIBOOT_INFO_HAS_VBE. */ + uint32_t unused_mi_vbe_control_info; + uint32_t unused_mi_vbe_mode_info; + uint32_t unused_mi_vbe_interface_seg; + uint32_t unused_mi_vbe_interface_off; + uint32_t unused_mi_vbe_interface_len; +}; + +struct multiboot_mmap { + uint32_t size; + uint64_t baseaddr; + uint64_t length; + uint32_t type; +} __packed; + +struct multiboot_module { + uint32_t mm_mod_start; + uint32_t mm_mod_end; + uint32_t mm_string; + uint32_t mm_reserved; +}; + +int parse_hv_cmdline(void); + +#endif diff --git a/hypervisor/include/arch/x86/softirq.h b/hypervisor/include/arch/x86/softirq.h new file mode 100644 index 000000000..9ae5b8bb0 --- /dev/null +++ b/hypervisor/include/arch/x86/softirq.h @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SOFTIRQ_H +#define SOFTIRQ_H + +#define SOFTIRQ_TIMER 0 +#define SOFTIRQ_DEV_ASSIGN 1 +#define SOFTIRQ_MAX 2 +#define SOFTIRQ_MASK ((1UL<> 0) +#define VM_EXIT_CR_ACCESS_ACCESS_TYPE(exit_qual) \ + (VM_EXIT_QUALIFICATION_BIT_MASK(exit_qual, 5, 4) >> 4) +#define VM_EXIT_CR_ACCESS_LMSW_OP(exit_qual) \ + (VM_EXIT_QUALIFICATION_BIT_MASK(exit_qual, 6, 6) >> 6) +#define VM_EXIT_CR_ACCESS_REG_IDX(exit_qual) \ + (VM_EXIT_QUALIFICATION_BIT_MASK(exit_qual, 11, 8) >> 8) +#define VM_EXIT_CR_ACCESS_LMSW_SRC_DATE(exit_qual) \ + (VM_EXIT_QUALIFICATION_BIT_MASK(exit_qual, 31, 16) >> 16) + +/* MACROs to access IO Access Info using exit qualification field */ +#define VM_EXIT_IO_INSTRUCTION_SIZE(exit_qual) \ + (VM_EXIT_QUALIFICATION_BIT_MASK(exit_qual, 2, 0) >> 0) +#define VM_EXIT_IO_INSTRUCTION_ACCESS_DIRECTION(exit_qual) \ + (VM_EXIT_QUALIFICATION_BIT_MASK(exit_qual, 3, 3) >> 3) +#define VM_EXIT_IO_INSTRUCTION_IS_STRING(exit_qual) \ + (VM_EXIT_QUALIFICATION_BIT_MASK(exit_qual, 4, 4) >> 4) +#define VM_EXIT_IO_INSTRUCTION_IS_REP_PREFIXED(exit_qual) \ + (VM_EXIT_QUALIFICATION_BIT_MASK(exit_qual, 5, 5) >> 5) +#define VM_EXIT_IO_INSTRUCTION_IS_OPERAND_ENCODING(exit_qual) \ + (VM_EXIT_QUALIFICATION_BIT_MASK(exit_qual, 6, 6) >> 6) +#define VM_EXIT_IO_INSTRUCTION_PORT_NUMBER(exit_qual) \ + (VM_EXIT_QUALIFICATION_BIT_MASK(exit_qual, 31, 16) >> 16) + +#endif /* VMEXIT_H_ */ diff --git a/hypervisor/include/arch/x86/vmx.h b/hypervisor/include/arch/x86/vmx.h new file mode 100644 index 000000000..6d21e1be7 --- /dev/null +++ b/hypervisor/include/arch/x86/vmx.h @@ -0,0 +1,433 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef VMX_H_ +#define VMX_H_ + +/* 16-bit control fields */ +#define VMX_VPID 0x00000000 +/* 16-bit guest-state fields */ +#define VMX_GUEST_ES_SEL 0x00000800 +#define VMX_GUEST_CS_SEL 0x00000802 +#define VMX_GUEST_SS_SEL 0x00000804 +#define VMX_GUEST_DS_SEL 0x00000806 +#define VMX_GUEST_FS_SEL 0x00000808 +#define VMX_GUEST_GS_SEL 0x0000080a +#define VMX_GUEST_LDTR_SEL 0x0000080c +#define VMX_GUEST_TR_SEL 0x0000080e +#define VMX_GUEST_INTR_STATUS 0x00000810 +/* 16-bit host-state fields */ +#define VMX_HOST_ES_SEL 0x00000c00 +#define VMX_HOST_CS_SEL 0x00000c02 +#define VMX_HOST_SS_SEL 0x00000c04 +#define VMX_HOST_DS_SEL 0x00000c06 +#define VMX_HOST_FS_SEL 0x00000c08 +#define VMX_HOST_GS_SEL 0x00000c0a +#define VMX_HOST_TR_SEL 0x00000c0c +/* 64-bit control fields */ +#define VMX_IO_BITMAP_A_FULL 0x00002000 +#define VMX_IO_BITMAP_A_HIGH 0x00002001 +#define VMX_IO_BITMAP_B_FULL 0x00002002 +#define VMX_IO_BITMAP_B_HIGH 0x00002003 +#define VMX_MSR_BITMAP_FULL 0x00002004 +#define VMX_MSR_BITMAP_HIGH 0x00002005 +#define VMX_EXIT_MSR_STORE_ADDR_FULL 0x00002006 +#define VMX_EXIT_MSR_STORE_ADDR_HIGH 0x00002007 +#define VMX_EXIT_MSR_LOAD_ADDR_FULL 0x00002008 +#define VMX_EXIT_MSR_LOAD_ADDR_HIGH 0x00002009 +#define VMX_ENTRY_MSR_LOAD_ADDR_FULL 0x0000200a +#define VMX_ENTRY_MSR_LOAD_ADDR_HIGH 0x0000200b +#define VMX_EXECUTIVE_VMCS_PTR_FULL 0x0000200c +#define VMX_EXECUTIVE_VMCS_PTR_HIGH 0x0000200d +#define VMX_TSC_OFFSET_FULL 0x00002010 +#define VMX_TSC_OFFSET_HIGH 0x00002011 +#define VMX_VIRTUAL_APIC_PAGE_ADDR_FULL 0x00002012 +#define VMX_VIRTUAL_APIC_PAGE_ADDR_HIGH 0x00002013 +#define VMX_APIC_ACCESS_ADDR_FULL 0x00002014 +#define VMX_APIC_ACCESS_ADDR_HIGH 0x00002015 +#define VMX_EPT_POINTER_FULL 0x0000201A +#define VMX_EPT_POINTER_HIGH 0x0000201B +#define VMX_EOI_EXIT0_FULL 0x0000201C +#define VMX_EOI_EXIT0_HIGH 0x0000201D +#define VMX_EOI_EXIT1_FULL 0x0000201E +#define VMX_EOI_EXIT1_HIGH 0x0000201F +#define VMX_EOI_EXIT2_FULL 0x00002020 +#define VMX_EOI_EXIT2_HIGH 0x00002021 +#define VMX_EOI_EXIT3_FULL 0x00002022 +#define VMX_EOI_EXIT3_HIGH 0x00002023 +#define VMX_EOI_EXIT(vector) (VMX_EOI_EXIT0_FULL + ((vector) / 64) * 2) +/* 64-bit read-only data fields */ +#define VMX_GUEST_PHYSICAL_ADDR_FULL 0x00002400 +#define VMX_GUEST_PHYSICAL_ADDR_HIGH 0x00002401 +/* 64-bit guest-state fields */ +#define VMX_VMS_LINK_PTR_FULL 0x00002800 +#define VMX_VMS_LINK_PTR_HIGH 0x00002801 +#define VMX_GUEST_IA32_DEBUGCTL_FULL 0x00002802 +#define VMX_GUEST_IA32_DEBUGCTL_HIGH 0x00002803 +#define VMX_GUEST_IA32_PAT_FULL 0x00002804 +#define VMX_GUEST_IA32_PAT_HIGH 0x00002805 +#define VMX_GUEST_IA32_EFER_FULL 0x00002806 +#define VMX_GUEST_IA32_EFER_HIGH 0x00002807 +#define VMX_GUEST_IA32_PERF_CTL_FULL 0x00002808 +#define VMX_GUEST_IA32_PERF_CTL_HIGH 0x00002809 +#define VMX_GUEST_PDPTE0_FULL 0x0000280A +#define VMX_GUEST_PDPTE0_HIGH 0x0000280B +#define VMX_GUEST_PDPTE1_FULL 0x0000280C +#define VMX_GUEST_PDPTE1_HIGH 0x0000280D +#define VMX_GUEST_PDPTE2_FULL 0x0000280E +#define VMX_GUEST_PDPTE2_HIGH 0x0000280F +#define VMX_GUEST_PDPTE3_FULL 0x00002810 +#define VMX_GUEST_PDPTE3_HIGH 0x00002811 +/* 64-bit host-state fields */ +#define VMX_HOST_IA32_PAT_FULL 0x00002C00 +#define VMX_HOST_IA32_PAT_HIGH 0x00002C01 +#define VMX_HOST_IA32_EFER_FULL 0x00002C02 +#define VMX_HOST_IA32_EFER_HIGH 0x00002C03 +#define VMX_HOST_IA32_PERF_CTL_FULL 0x00002C04 +#define VMX_HOST_IA32_PERF_CTL_HIGH 0x00002C05 +/* 32-bit control fields */ +#define VMX_PIN_VM_EXEC_CONTROLS 0x00004000 +#define VMX_PROC_VM_EXEC_CONTROLS 0x00004002 +#define VMX_EXCEPTION_BITMAP 0x00004004 +#define VMX_PF_EC_MASK 0x00004006 +#define VMX_PF_EC_MATCH 0x00004008 +#define VMX_CR3_TARGET_COUNT 0x0000400a +#define VMX_EXIT_CONTROLS 0x0000400c +#define VMX_EXIT_MSR_STORE_COUNT 0x0000400e +#define VMX_EXIT_MSR_LOAD_COUNT 0x00004010 +#define VMX_ENTRY_CONTROLS 0x00004012 +#define VMX_ENTRY_MSR_LOAD_COUNT 0x00004014 +#define VMX_ENTRY_INT_INFO_FIELD 0x00004016 +#define VMX_ENTRY_EXCEPTION_EC 0x00004018 +#define VMX_ENTRY_INSTR_LENGTH 0x0000401a +#define VMX_TPR_THRESHOLD 0x0000401c +#define VMX_PROC_VM_EXEC_CONTROLS2 0x0000401E +#define VMX_PLE_GAP 0x00004020 +#define VMX_PLE_WINDOW 0x00004022 +/* 32-bit read-only data fields */ +#define VMX_INSTR_ERROR 0x00004400 +#define VMX_EXIT_REASON 0x00004402 +#define VMX_EXIT_INT_INFO 0x00004404 +#define VMX_EXIT_INT_EC 0x00004406 +#define VMX_IDT_VEC_INFO_FIELD 0x00004408 +#define VMX_IDT_VEC_EC 0x0000440a +#define VMX_EXIT_INSTR_LEN 0x0000440c +#define VMX_INSTR_INFO 0x0000440e +/* 32-bit guest-state fields */ +#define VMX_GUEST_ES_LIMIT 0x00004800 +#define VMX_GUEST_CS_LIMIT 0x00004802 +#define VMX_GUEST_SS_LIMIT 0x00004804 +#define VMX_GUEST_DS_LIMIT 0x00004806 +#define VMX_GUEST_FS_LIMIT 0x00004808 +#define VMX_GUEST_GS_LIMIT 0x0000480a +#define VMX_GUEST_LDTR_LIMIT 0x0000480c +#define VMX_GUEST_TR_LIMIT 0x0000480e +#define VMX_GUEST_GDTR_LIMIT 0x00004810 +#define VMX_GUEST_IDTR_LIMIT 0x00004812 +#define VMX_GUEST_ES_ATTR 0x00004814 +#define VMX_GUEST_CS_ATTR 0x00004816 +#define VMX_GUEST_SS_ATTR 0x00004818 +#define VMX_GUEST_DS_ATTR 0x0000481a +#define VMX_GUEST_FS_ATTR 0x0000481c +#define VMX_GUEST_GS_ATTR 0x0000481e +#define VMX_GUEST_LDTR_ATTR 0x00004820 +#define VMX_GUEST_TR_ATTR 0x00004822 +#define VMX_GUEST_INTERRUPTIBILITY_INFO 0x00004824 +#define VMX_GUEST_ACTIVITY_STATE 0x00004826 +#define VMX_GUEST_SMBASE 0x00004828 +#define VMX_GUEST_IA32_SYSENTER_CS 0x0000482a +#define VMX_GUEST_TIMER 0x0000482E +/* 32-bit host-state fields */ +#define VMX_HOST_IA32_SYSENTER_CS 0x00004c00 +/* natural-width control fields */ +#define VMX_CR0_MASK 0x00006000 +#define VMX_CR4_MASK 0x00006002 +#define VMX_CR0_READ_SHADOW 0x00006004 +#define VMX_CR4_READ_SHADOW 0x00006006 +#define VMX_CR3_TARGET_0 0x00006008 +#define VMX_CR3_TARGET_1 0x0000600a +#define VMX_CR3_TARGET_2 0x0000600c +#define VMX_CR3_TARGET_3 0x0000600e +/* natural-width read-only data fields */ +#define VMX_EXIT_QUALIFICATION 0x00006400 +#define VMX_IO_RCX 0x00006402 +#define VMX_IO_RDI 0x00006406 +#define VMX_GUEST_LINEAR_ADDR 0x0000640a +/* natural-width guest-state fields */ +#define VMX_GUEST_CR0 0x00006800 +#define VMX_GUEST_CR3 0x00006802 +#define VMX_GUEST_CR4 0x00006804 +#define VMX_GUEST_ES_BASE 0x00006806 +#define VMX_GUEST_CS_BASE 0x00006808 +#define VMX_GUEST_SS_BASE 0x0000680a +#define VMX_GUEST_DS_BASE 0x0000680c +#define VMX_GUEST_FS_BASE 0x0000680e +#define VMX_GUEST_GS_BASE 0x00006810 +#define VMX_GUEST_LDTR_BASE 0x00006812 +#define VMX_GUEST_TR_BASE 0x00006814 +#define VMX_GUEST_GDTR_BASE 0x00006816 +#define VMX_GUEST_IDTR_BASE 0x00006818 +#define VMX_GUEST_DR7 0x0000681a +#define VMX_GUEST_RSP 0x0000681c +#define VMX_GUEST_RIP 0x0000681e +#define VMX_GUEST_RFLAGS 0x00006820 +#define VMX_GUEST_PENDING_DEBUG_EXCEPT 0x00006822 +#define VMX_GUEST_IA32_SYSENTER_ESP 0x00006824 +#define VMX_GUEST_IA32_SYSENTER_EIP 0x00006826 +/* natural-width host-state fields */ +#define VMX_HOST_CR0 0x00006c00 +#define VMX_HOST_CR3 0x00006c02 +#define VMX_HOST_CR4 0x00006c04 +#define VMX_HOST_FS_BASE 0x00006c06 +#define VMX_HOST_GS_BASE 0x00006c08 +#define VMX_HOST_TR_BASE 0x00006c0a +#define VMX_HOST_GDTR_BASE 0x00006c0c +#define VMX_HOST_IDTR_BASE 0x00006c0e +#define VMX_HOST_IA32_SYSENTER_ESP 0x00006c10 +#define VMX_HOST_IA32_SYSENTER_EIP 0x00006c12 +#define VMX_HOST_RSP 0x00006c14 +#define VMX_HOST_RIP 0x00006c16 +/* + * Basic VM exit reasons + */ +#define VMX_EXIT_REASON_EXCEPTION_OR_NMI 0x00000000 +#define VMX_EXIT_REASON_EXTERNAL_INTERRUPT 0x00000001 +#define VMX_EXIT_REASON_TRIPLE_FAULT 0x00000002 +#define VMX_EXIT_REASON_INIT_SIGNAL 0x00000003 +#define VMX_EXIT_REASON_STARTUP_IPI 0x00000004 +#define VMX_EXIT_REASON_IO_SMI 0x00000005 +#define VMX_EXIT_REASON_OTHER_SMI 0x00000006 +#define VMX_EXIT_REASON_INTERRUPT_WINDOW 0x00000007 +#define VMX_EXIT_REASON_NMI_WINDOW 0x00000008 +#define VMX_EXIT_REASON_TASK_SWITCH 0x00000009 +#define VMX_EXIT_REASON_CPUID 0x0000000A +#define VMX_EXIT_REASON_GETSEC 0x0000000B +#define VMX_EXIT_REASON_HLT 0x0000000C +#define VMX_EXIT_REASON_INVD 0x0000000D +#define VMX_EXIT_REASON_INVLPG 0x0000000E +#define VMX_EXIT_REASON_RDPMC 0x0000000F +#define VMX_EXIT_REASON_RDTSC 0x00000010 +#define VMX_EXIT_REASON_RSM 0x00000011 +#define VMX_EXIT_REASON_VMCALL 0x00000012 +#define VMX_EXIT_REASON_VMCLEAR 0x00000013 +#define VMX_EXIT_REASON_VMLAUNCH 0x00000014 +#define VMX_EXIT_REASON_VMPTRLD 0x00000015 +#define VMX_EXIT_REASON_VMPTRST 0x00000016 +#define VMX_EXIT_REASON_VMREAD 0x00000017 +#define VMX_EXIT_REASON_VMRESUME 0x00000018 +#define VMX_EXIT_REASON_VMWRITE 0x00000019 +#define VMX_EXIT_REASON_VMXOFF 0x0000001A +#define VMX_EXIT_REASON_VMXON 0x0000001B +#define VMX_EXIT_REASON_CR_ACCESS 0x0000001C +#define VMX_EXIT_REASON_DR_ACCESS 0x0000001D +#define VMX_EXIT_REASON_IO_INSTRUCTION 0x0000001E +#define VMX_EXIT_REASON_RDMSR 0x0000001F +#define VMX_EXIT_REASON_WRMSR 0x00000020 +#define VMX_EXIT_REASON_ENTRY_FAILURE_INVALID_GUEST_STATE 0x00000021 +#define VMX_EXIT_REASON_ENTRY_FAILURE_MSR_LOADING 0x00000022 + /* entry 0x23 (35) is missing */ +#define VMX_EXIT_REASON_MWAIT 0x00000024 +#define VMX_EXIT_REASON_MONITOR_TRAP 0x00000025 + /* entry 0x26 (38) is missing */ +#define VMX_EXIT_REASON_MONITOR 0x00000027 +#define VMX_EXIT_REASON_PAUSE 0x00000028 +#define VMX_EXIT_REASON_ENTRY_FAILURE_MACHINE_CHECK 0x00000029 + /* entry 0x2A (42) is missing */ +#define VMX_EXIT_REASON_TPR_BELOW_THRESHOLD 0x0000002B +#define VMX_EXIT_REASON_APIC_ACCESS 0x0000002C +#define VMX_EXIT_REASON_VIRTUALIZED_EOI 0x0000002D +#define VMX_EXIT_REASON_GDTR_IDTR_ACCESS 0x0000002E +#define VMX_EXIT_REASON_LDTR_TR_ACCESS 0x0000002F +#define VMX_EXIT_REASON_EPT_VIOLATION 0x00000030 +#define VMX_EXIT_REASON_EPT_MISCONFIGURATION 0x00000031 +#define VMX_EXIT_REASON_INVEPT 0x00000032 +#define VMX_EXIT_REASON_RDTSCP 0x00000033 +#define VMX_EXIT_REASON_VMX_PREEMPTION_TIMER_EXPIRED 0x00000034 +#define VMX_EXIT_REASON_INVVPID 0x00000035 +#define VMX_EXIT_REASON_WBINVD 0x00000036 +#define VMX_EXIT_REASON_XSETBV 0x00000037 +#define VMX_EXIT_REASON_APIC_WRITE 0x00000038 + +/* VMX execution control bits (pin based) */ +#define VMX_PINBASED_CTLS_IRQ_EXIT (1<<0) +#define VMX_PINBASED_CTLS_NMI_EXIT (1<<3) +#define VMX_PINBASED_CTLS_VIRT_NMI (1<<5) +#define VMX_PINBASED_CTLS_ENABLE_PTMR (1<<6) +#define VMX_PINBASED_CTLS_POST_IRQ (1<<7) + +/* VMX execution control bits (processor based) */ +#define VMX_PROCBASED_CTLS_IRQ_WIN (1<<2) +#define VMX_PROCBASED_CTLS_TSC_OFF (1<<3) +#define VMX_PROCBASED_CTLS_HLT (1<<7) +#define VMX_PROCBASED_CTLS_INVLPG (1<<9) +#define VMX_PROCBASED_CTLS_MWAIT (1<<10) +#define VMX_PROCBASED_CTLS_RDPMC (1<<11) +#define VMX_PROCBASED_CTLS_RDTSC (1<<12) +#define VMX_PROCBASED_CTLS_CR3_LOAD (1<<15) +#define VMX_PROCBASED_CTLS_CR3_STORE (1<<16) +#define VMX_PROCBASED_CTLS_CR8_LOAD (1<<19) +#define VMX_PROCBASED_CTLS_CR8_STORE (1<<20) +#define VMX_PROCBASED_CTLS_TPR_SHADOW (1<<21) +#define VMX_PROCBASED_CTLS_NMI_WINEXIT (1<<22) +#define VMX_PROCBASED_CTLS_MOV_DR (1<<23) +#define VMX_PROCBASED_CTLS_UNCOND_IO (1<<24) +#define VMX_PROCBASED_CTLS_IO_BITMAP (1<<25) +#define VMX_PROCBASED_CTLS_MON_TRAP (1<<27) +#define VMX_PROCBASED_CTLS_MSR_BITMAP (1<<28) +#define VMX_PROCBASED_CTLS_MONITOR (1<<29) +#define VMX_PROCBASED_CTLS_PAUSE (1<<30) +#define VMX_PROCBASED_CTLS_SECONDARY (1<<31) +#define VMX_PROCBASED_CTLS2_VAPIC (1<<0) +#define VMX_PROCBASED_CTLS2_EPT (1<<1) +#define VMX_PROCBASED_CTLS2_DESC_TABLE (1<<2) +#define VMX_PROCBASED_CTLS2_RDTSCP (1<<3) +#define VMX_PROCBASED_CTLS2_VX2APIC (1<<4) +#define VMX_PROCBASED_CTLS2_VPID (1<<5) +#define VMX_PROCBASED_CTLS2_WBINVD (1<<6) +#define VMX_PROCBASED_CTLS2_UNRESTRICT (1<<7) +#define VMX_PROCBASED_CTLS2_VAPIC_REGS (1<<8) +#define VMX_PROCBASED_CTLS2_VIRQ (1<<9) +#define VMX_PROCBASED_CTLS2_PAUSE_LOOP (1<<10) +#define VMX_PROCBASED_CTLS2_RDRAND (1<<11) +#define VMX_PROCBASED_CTLS2_INVPCID (1<<12) +#define VMX_PROCBASED_CTLS2_VM_FUNCS (1<<13) +#define VMX_PROCBASED_CTLS2_VMCS_SHADW (1<<14) +#define VMX_PROCBASED_CTLS2_RDSEED (1<<16) +#define VMX_PROCBASED_CTLS2_EPT_VE (1<<18) +#define VMX_PROCBASED_CTLS2_XSVE_XRSTR (1<<20) + +/* VMX exit control bits */ +#define VMX_EXIT_CTLS_SAVE_DBG (1<<2) +#define VMX_EXIT_CTLS_HOST_ADDR64 (1<<9) +#define VMX_EXIT_CTLS_LOAD_PERF (1<<12) +#define VMX_EXIT_CTLS_ACK_IRQ (1<<15) +#define VMX_EXIT_CTLS_SAVE_PAT (1<<18) +#define VMX_EXIT_CTLS_LOAD_PAT (1<<19) +#define VMX_EXIT_CTLS_SAVE_EFER (1<<20) +#define VMX_EXIT_CTLS_LOAD_EFER (1<<21) +#define VMX_EXIT_CTLS_SAVE_PTMR (1<<22) + +/* VMX entry control bits */ +#define VMX_ENTRY_CTLS_LOAD_DBG (1<<2) +#define VMX_ENTRY_CTLS_IA32E_MODE (1<<9) +#define VMX_ENTRY_CTLS_ENTRY_SMM (1<<10) +#define VMX_ENTRY_CTLS_DEACT_DUAL (1<<11) +#define VMX_ENTRY_CTLS_LOAD_PERF (1<<13) +#define VMX_ENTRY_CTLS_LOAD_PAT (1<<14) +#define VMX_ENTRY_CTLS_LOAD_EFER (1<<15) + +/* VMX entry/exit Interrupt info */ +#define VMX_INT_INFO_ERR_CODE_VALID (1<<11) +#define VMX_INT_INFO_VALID (1<<31) +#define VMX_INT_TYPE_EXT_INT 0 +#define VMX_INT_TYPE_NMI 2 +#define VMX_INT_TYPE_HW_EXP 3 +#define VMX_INT_TYPE_SW_EXP 6 + +#define VM_SUCCESS 0 +#define VM_FAIL -1 + +#define VMX_VMENTRY_FAIL 0x80000000 + +#ifndef ASSEMBLER + +#define RFLAGS_C (1<<0) +#define RFLAGS_Z (1<<6) + +/* + * Handling of CR0: + * + * - PE (0) Must always be 1. Attempt to write to it must lead to a VM exit. + * - MP (1) coprocessor related => no action needed + * - EM (2) coprocessor related => no action needed + * - TS (3) no action needed + * - ET (4) typically hardcoded to 1. => no action needed + * - NE (5) coprocessor related => no action needed + * - WP (16) inhibits supervisor level procedures to write into ro-pages + * => no action needed + * - AM (18) alignment mask => no action needed + * - NW (29) not write through => no action + * - CD (30) cache disable => no action + * - PG (31) paging => must always be 1. Attempt to write to it must lead to + * a VM exit. + */ + +/* we must guard protected mode and paging */ +#define CR0_GUEST_HOST_MASK (CR0_PE | CR0_PG | CR0_WP) +/* initially, the guest runs in protected mode enabled, but with no paging */ +#define CR0_READ_SHADOW CR0_PE + +/* + * Handling of CR4: + * + * - VME (0) must always be 0 => must lead to a VM exit + * - PVI (1) must always be 0 => must lead to a VM exit + * - TSD (2) don't care + * - DE (3) don't care + * - PSE (4) must always be 1 => must lead to a VM exit + * - PAE (5) must always be 0 => must lead to a VM exit + * - MCE (6) don't care + * - PGE (7) => important for TLB flush + * - PCE (8) don't care + * - OSFXSR (9) don't care + * - OSXMMEXCPT (10) don't care + * - VMXE (13) must always be 1 => must lead to a VM exit + * - SMXE (14) must always be 0 => must lead to a VM exit + * - PCIDE (17) => important for TLB flush + * - OSXSAVE (18) don't care + */ + +#define CR4_GUEST_HOST_MASK (CR4_VME | CR4_PVI | CR4_PSE | CR4_PAE | \ + CR4_VMXE | CR4_SMXE | CR4_PGE | CR4_PCIDE) +#define CR4_READ_SHADOW (CR4_PGE | CR4_PSE) + +/* VCPU config definitions */ +#define REAL_MODE 1 +#define PAGE_PROTECTED_MODE 2 + +/* External Interfaces */ +int check_vmx_support(void); +int exec_vmxon_instr(void); +uint64_t exec_vmread(uint32_t field); +uint64_t exec_vmread64(uint32_t field_full); +void exec_vmwrite(uint32_t field, uint64_t value); +void exec_vmwrite64(uint32_t field_full, uint64_t value); +int init_vmcs(struct vcpu *vcpu); + +int exec_vmclear(void *addr); +int exec_vmptrld(void *addr); + +static inline uint8_t get_vcpu_mode(struct vcpu *vcpu) +{ + return vcpu->arch_vcpu.cpu_mode; +} +#endif /* ASSEMBLER */ + +#endif /* VMX_H_ */ diff --git a/hypervisor/include/arch/x86/vtd.h b/hypervisor/include/arch/x86/vtd.h new file mode 100644 index 000000000..2d2385980 --- /dev/null +++ b/hypervisor/include/arch/x86/vtd.h @@ -0,0 +1,258 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef VTD_H +#define VTD_H +/* + * Intel IOMMU register specification per version 1.0 public spec. + */ + +#define DMAR_VER_REG 0x0 /* Arch version supported by this IOMMU */ +#define DMAR_CAP_REG 0x8 /* Hardware supported capabilities */ +#define DMAR_ECAP_REG 0x10 /* Extended capabilities supported */ +#define DMAR_GCMD_REG 0x18 /* Global command register */ +#define DMAR_GSTS_REG 0x1c /* Global status register */ +#define DMAR_RTADDR_REG 0x20 /* Root entry table */ +#define DMAR_CCMD_REG 0x28 /* Context command reg */ +#define DMAR_FSTS_REG 0x34 /* Fault Status register */ +#define DMAR_FECTL_REG 0x38 /* Fault control register */ +#define DMAR_FEDATA_REG 0x3c /* Fault event interrupt data register */ +#define DMAR_FEADDR_REG 0x40 /* Fault event interrupt addr register */ +#define DMAR_FEUADDR_REG 0x44 /* Upper address register */ +#define DMAR_AFLOG_REG 0x58 /* Advanced Fault control */ +#define DMAR_PMEN_REG 0x64 /* Enable Protected Memory Region */ +#define DMAR_PLMBASE_REG 0x68 /* PMRR Low addr */ +#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */ +#define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */ +#define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */ +#define DMAR_IQH_REG 0x80 /* Invalidation queue head register */ +#define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */ +#define DMAR_IQ_SHIFT 4 /* Invalidation queue head/tail shift */ +#define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */ +#define DMAR_ICS_REG 0x9c /* Invalidation complete status register */ +#define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */ + +#define DMAR_VER_MAJOR(v) (((v) & 0xf0) >> 4) +#define DMAR_VER_MINOR(v) ((v) & 0x0f) + +/* + * Decoding Capability Register + */ +#define iommu_cap_pi(c) (((c) >> 59) & 1) +#define iommu_cap_read_drain(c) (((c) >> 55) & 1) +#define iommu_cap_write_drain(c) (((c) >> 54) & 1) +#define iommu_cap_max_amask_val(c) (((c) >> 48) & 0x3f) +#define iommu_cap_num_fault_regs(c) ((((c) >> 40) & 0xff) + 1) +#define iommu_cap_pgsel_inv(c) (((c) >> 39) & 1) + +#define iommu_cap_super_page_val(c) (((c) >> 34) & 0xf) +#define iommu_cap_super_offset(c) \ + (((find_first_bit(&iommu_cap_super_page_val(c), 4)) \ + * OFFSET_STRIDE) + 21) + +#define iommu_cap_fault_reg_offset(c) ((((c) >> 24) & 0x3ff) * 16) +#define iommu_cap_max_fault_reg_offset(c) \ + (iommu_cap_fault_reg_offset(c) + iommu_cap_num_fault_regs(c) * 16) + +#define iommu_cap_zlr(c) (((c) >> 22) & 1) +#define iommu_cap_isoch(c) (((c) >> 23) & 1) +#define iommu_cap_mgaw(c) ((((c) >> 16) & 0x3f) + 1) +#define iommu_cap_sagaw(c) (((c) >> 8) & 0x1f) +#define iommu_cap_caching_mode(c) (((c) >> 7) & 1) +#define iommu_cap_phmr(c) (((c) >> 6) & 1) +#define iommu_cap_plmr(c) (((c) >> 5) & 1) +#define iommu_cap_rwbf(c) (((c) >> 4) & 1) +#define iommu_cap_afl(c) (((c) >> 3) & 1) +#define iommu_cap_ndoms(c) (((unsigned long)1) << (4 + 2 * ((c) & 0x7))) + +/* + * Decoding Extended Capability Register + */ +#define iommu_ecap_c(c) (((c) >> 0) & 1) +#define iommu_ecap_qi(c) (((c) >> 1) & 1) +#define iommu_ecap_dt(c) (((c) >> 2) & 1) +#define iommu_ecap_ir(c) (((c) >> 3) & 1) +#define iommu_ecap_eim(c) (((c) >> 4) & 1) +#define iommu_ecap_pt(c) (((c) >> 6) & 1) +#define iommu_ecap_sc(c) (((c) >> 7) & 1) +#define iommu_ecap_iro(c) (((c) >> 8) & 0x3ff) +#define iommu_ecap_mhmv(c) (((c) >> 20) & 0xf) +#define iommu_ecap_ecs(c) (((c) >> 24) & 1) +#define iommu_ecap_mts(c) (((c) >> 25) & 1) +#define iommu_ecap_nest(c) (((c) >> 26) & 1) +#define iommu_ecap_dis(c) (((c) >> 27) & 1) +#define iommu_ecap_prs(c) (((c) >> 29) & 1) +#define iommu_ecap_ers(c) (((c) >> 30) & 1) +#define iommu_ecap_srs(c) (((c) >> 31) & 1) +#define iommu_ecap_nwfs(c) (((c) >> 33) & 1) +#define iommu_ecap_eafs(c) (((c) >> 34) & 1) +#define iommu_ecap_pss(c) (((c) >> 35) & 0x1f) +#define iommu_ecap_pasid(c) (((c) >> 40) & 1) +#define iommu_ecap_dit(c) (((c) >> 41) & 1) +#define iommu_ecap_pds(c) (((c) >> 42) & 1) + +/* PMEN_REG */ +#define DMA_PMEN_EPM (((uint32_t)1)<<31) +#define DMA_PMEN_PRS (((uint32_t)1)<<0) + +/* GCMD_REG */ +#define DMA_GCMD_TE (((uint32_t)1) << 31) +#define DMA_GCMD_SRTP (((uint32_t)1) << 30) +#define DMA_GCMD_SFL (((uint32_t)1) << 29) +#define DMA_GCMD_EAFL (((uint32_t)1) << 28) +#define DMA_GCMD_WBF (((uint32_t)1) << 27) +#define DMA_GCMD_QIE (((uint32_t)1) << 26) +#define DMA_GCMD_SIRTP (((uint32_t)1) << 24) +#define DMA_GCMD_IRE (((uint32_t) 1) << 25) +#define DMA_GCMD_CFI (((uint32_t) 1) << 23) + +/* GSTS_REG */ +#define DMA_GSTS_TES (((uint32_t)1) << 31) +#define DMA_GSTS_RTPS (((uint32_t)1) << 30) +#define DMA_GSTS_FLS (((uint32_t)1) << 29) +#define DMA_GSTS_AFLS (((uint32_t)1) << 28) +#define DMA_GSTS_WBFS (((uint32_t)1) << 27) +#define DMA_GSTS_QIES (((uint32_t)1) << 26) +#define DMA_GSTS_IRTPS (((uint32_t)1) << 24) +#define DMA_GSTS_IRES (((uint32_t)1) << 25) +#define DMA_GSTS_CFIS (((uint32_t)1) << 23) + +/* CCMD_REG */ +#define DMA_CCMD_ICC (((uint64_t)1) << 63) +#define DMA_CCMD_ICC_32 (((uint32_t)1) << 31) +#define DMA_CCMD_GLOBAL_INVL (((uint64_t)1) << 61) +#define DMA_CCMD_DOMAIN_INVL (((uint64_t)2) << 61) +#define DMA_CCMD_DEVICE_INVL (((uint64_t)3) << 61) +#define DMA_CCMD_FM(m) (((uint64_t)((m) & 0x3)) << 32) +#define DMA_CCMD_MASK_NOBIT 0 +#define DMA_CCMD_MASK_1BIT 1 +#define DMA_CCMD_MASK_2BIT 2 +#define DMA_CCMD_MASK_3BIT 3 +#define DMA_CCMD_SID(s) (((uint64_t)((s) & 0xffff)) << 16) +#define DMA_CCMD_DID(d) ((uint64_t)((d) & 0xffff)) +#define DMA_CCMD_GET_CAIG_32(v) (((uint32_t)(v) >> 27) & 0x3) + +/* IOTLB_REG */ +#define DMA_IOTLB_IVT (((uint64_t)1) << 63) +#define DMA_IOTLB_IVT_32 (((uint32_t)1) << 31) +#define DMA_IOTLB_GLOBAL_INVL (((uint64_t)1) << 60) +#define DMA_IOTLB_DOMAIN_INVL (((uint64_t)2) << 60) +#define DMA_IOTLB_PAGE_INVL (((uint64_t)3) << 60) +#define DMA_IOTLB_DR (((uint64_t)1) << 49) +#define DMA_IOTLB_DW (((uint64_t)1) << 48) +#define DMA_IOTLB_DID(d) \ + (((uint64_t)((d) & 0xffff)) << 32) +#define DMA_IOTLB_GET_IAIG_32(v) (((uint32_t)(v) >> 25) & 0x3) + +/* INVALIDATE_ADDRESS_REG */ +#define DMA_IOTLB_INVL_ADDR_AM(m) ((uint64_t)((m) & 0x3f)) +#define DMA_IOTLB_INVL_ADDR_IH_UNMODIFIED (((uint64_t)1) << 6) + +/* FECTL_REG */ +#define DMA_FECTL_IM (((uint32_t)1) << 31) + +/* FSTS_REG */ +#define DMA_FSTS_PFO(s) (((s) >> 0) & 1) +#define DMA_FSTS_PPF(s) (((s) >> 1) & 1) +#define DMA_FSTS_AFO(s) (((s) >> 2) & 1) +#define DMA_FSTS_APF(s) (((s) >> 3) & 1) +#define DMA_FSTS_IQE(s) (((s) >> 4) & 1) +#define DMA_FSTS_ICE(s) (((s) >> 5) & 1) +#define DMA_FSTS_ITE(s) (((s) >> 6) & 1) +#define DMA_FSTS_PRO(s) (((s) >> 7) & 1) +#define DMA_FSTS_FRI(s) (((s) >> 8) & 0xFF) + +/* FRCD_REGs: upper 64 bits*/ +#define DMA_FRCD_UP_F(r) (((r) >> 63) & 1) +#define DMA_FRCD_UP_T(r) (((r) >> 62) & 1) +#define DMA_FRCD_UP_AT(r) (((r) >> 60) & 3) +#define DMA_FRCD_UP_PASID(r) (((r) >> 40) & 0xfffff) +#define DMA_FRCD_UP_FR(r) (((r) >> 32) & 0xff) +#define DMA_FRCD_UP_PP(r) (((r) >> 31) & 1) +#define DMA_FRCD_UP_EXE(r) (((r) >> 30) & 1) +#define DMA_FRCD_UP_PRIV(r) (((r) >> 29) & 1) +#define DMA_FRCD_UP_SID(r) (((r) >> 0) & 0xffff) + +#define DMAR_CONTEXT_TRANSLATION_TYPE_TRANSLATED 0x00 +#define DMAR_CONTEXT_TRANSLATION_TYPE_RESERVED 0x01 +#define DMAR_CONTEXT_TRANSLATION_TYPE_PASSED_THROUGH 0x02 + +#define DRHD_FLAG_INCLUDE_PCI_ALL_MASK (1) + +#define DEVFUN(dev, fun) (((dev & 0x1F) << 3) | ((fun & 0x7))) + +struct dmar_dev_scope { + uint8_t bus; + uint8_t devfun; +}; + +struct dmar_drhd { + uint32_t dev_cnt; + uint16_t segment; + uint8_t flags; + bool ignore; + uint64_t reg_base_addr; + /* assume no pci device hotplug support */ + struct dmar_dev_scope *devices; +}; + +struct dmar_info { + uint32_t drhd_count; + struct dmar_drhd *drhd_units; +}; + +extern struct dmar_info *get_dmar_info(void); + +struct iommu_domain; + +/* Assign a device specified by bus & devfun to a iommu domain */ +int assign_iommu_device(struct iommu_domain *domain, + uint8_t bus, uint8_t devfun); + +/* Unassign a device specified by bus & devfun to a iommu domain */ +int unassign_iommu_device(struct iommu_domain *domain, + uint8_t bus, uint8_t devfun); + +/* Create a iommu domain for a VM specified by vm_id */ +struct iommu_domain *create_iommu_domain(int vm_id, + void *translation_table, int addr_width); + +/* Destroy the iommu domain */ +int destroy_iommu_domain(struct iommu_domain *domain); + +/* Enable translation of iommu*/ +void enable_iommu(void); + +/* Disable translation of iommu*/ +void disable_iommu(void); + +/* iommu initialization */ +int init_iommu(void); +#endif diff --git a/hypervisor/include/common/acrn_efi.h b/hypervisor/include/common/acrn_efi.h new file mode 100644 index 000000000..3939614e2 --- /dev/null +++ b/hypervisor/include/common/acrn_efi.h @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UEFI_H +#define UEFI_H + +typedef struct { + uint16_t limit; + uint64_t *base; +} __attribute__((packed)) dt_addr_t; + +struct efi_ctx { + void* entry; + void* handle; + void* table; + dt_addr_t gdt; + dt_addr_t idt; + uint16_t tr_sel; + uint16_t ldt_sel; + uint64_t cr0; + uint64_t cr3; + uint64_t cr4; + uint64_t rflags; + uint16_t cs_sel; + uint32_t cs_ar; + uint16_t es_sel; + uint16_t ss_sel; + uint16_t ds_sel; + uint16_t fs_sel; + uint16_t gs_sel; + uint64_t rsp; + uint64_t efer; +}__attribute__((packed)); + +#endif /* UEFI_H*/ diff --git a/hypervisor/include/common/hypercall.h b/hypervisor/include/common/hypercall.h new file mode 100644 index 000000000..5444bd296 --- /dev/null +++ b/hypervisor/include/common/hypercall.h @@ -0,0 +1,361 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file hypercall.h + * + * @brief public APIs for hypercall + */ + +#ifndef HYPERCALL_H +#define HYPERCALL_H + +struct vhm_request; + +int acrn_insert_request_wait(struct vcpu *vcpu, struct vhm_request *req); +int acrn_insert_request_nowait(struct vcpu *vcpu, struct vhm_request *req); +int get_req_info(char *str, int str_max); + +int acrn_vpic_inject_irq(struct vm *vm, int irq, enum irq_mode mode); + +/** + * @brief Hypercall + * + * @addtogroup acrn_hypercall ACRN Hypercall + * @{ + */ + +/** + * @brief Get hypervisor api version + * + * The function only return api version information when VM is VM0. + * + * @param VM Pointer to VM data structure + * @param param guest physical memory address. The api version returned + * will be copied to this gpa + * + * @return 0 on success, non-zero on error. + */ +int64_t hcall_get_api_version(struct vm *vm, uint64_t param); + +/** + * @brief create virtual machine + * + * Create a virtual machine based on parameter, currently there is no + * limitation for calling times of this function, will add MAX_VM_NUM + * support later. + * + * @param VM Pointer to VM data structure + * @param param guest physical memory address. This gpa points to + * struct acrn_create_vm + * + * @return 0 on success, non-zero on error. + */ +int64_t hcall_create_vm(struct vm *vm, uint64_t param); + +/** + * @brief destroy virtual machine + * + * Destroy a virtual machine, it will pause target VM then shutdown it. + * The function will return -1 if the target VM does not exist. + * + * @param vmid ID of the VM + * + * @return 0 on success, non-zero on error. + */ +int64_t hcall_destroy_vm(uint64_t vmid); + +/** + * @brief resume virtual machine + * + * Resume a virtual machine, it will schedule target VM's vcpu to run. + * The function will return -1 if the target VM does not exist or the + * IOReq buffer page for the VM is not ready. + * + * @param vmid ID of the VM + * + * @return 0 on success, non-zero on error. + */ +int64_t hcall_resume_vm(uint64_t vmid); + +/** + * @brief pause virtual machine + * + * Pause a virtual machine, if the VM is already paused, the function + * will return 0 directly for success. + * The function will return -1 if the target VM does not exist. + * + * @param vmid ID of the VM + * + * @return 0 on success, non-zero on error. + */ +int64_t hcall_pause_vm(uint64_t vmid); + +/** + * @brief create vcpu + * + * Create a vcpu based on parameter for a VM, it will allocate vcpu from + * freed physical cpus, if there is no available pcpu, the function will + * return -1. + * + * @param VM Pointer to VM data structure + * @param vmid ID of the VM + * @param param guest physical addressx. This gpa points to + * struct acrn_create_vcpu + * + * @return 0 on success, non-zero on error. + */ +int64_t hcall_create_vcpu(struct vm *vm, uint64_t vmid, uint64_t param); + +/** + * @brief assert IRQ line + * + * Assert a virtual IRQ line for a VM, which could be from ISA or IOAPIC, + * normally it will active a level IRQ. + * The function will return -1 if the target VM does not exist. + * + * @param VM Pointer to VM data structure + * @param vmid ID of the VM + * @param param guest physical address. This gpa points to struct acrn_irqline + * + * @return 0 on success, non-zero on error. + */ +int64_t hcall_assert_irqline(struct vm *vm, uint64_t vmid, uint64_t param); + +/** + * @brief deassert IRQ line + * + * Deassert a virtual IRQ line for a VM, which could be from ISA or IOAPIC, + * normally it will deactive a level IRQ. + * The function will return -1 if the target VM does not exist. + * + * @param VM Pointer to VM data structure + * @param vmid ID of the VM + * @param param guest physical address. This gpa points to struct acrn_irqline + * + * @return 0 on success, non-zero on error. + */ +int64_t hcall_deassert_irqline(struct vm *vm, uint64_t vmid, uint64_t param); + +/** + * @brief trigger a pulse on IRQ line + * + * Trigger a pulse on a virtual IRQ line for a VM, which could be from ISA + * or IOAPIC, normally it triggers an edge IRQ. + * The function will return -1 if the target VM does not exist. + * + * @param VM Pointer to VM data structure + * @param vmid ID of the VM + * @param param guest physical address. This gpa points to struct acrn_irqline + * + * @return 0 on success, non-zero on error. + */ +int64_t hcall_pulse_irqline(struct vm *vm, uint64_t vmid, uint64_t param); + +/** + * @brief inject MSI interrupt + * + * Inject a MSI interrupt for a VM. + * The function will return -1 if the target VM does not exist. + * + * @param VM Pointer to VM data structure + * @param vmid ID of the VM + * @param param guest physical address. This gpa points to struct acrn_msi_entry + * + * @return 0 on success, non-zero on error. + */ +int64_t hcall_inject_msi(struct vm *vm, uint64_t vmid, uint64_t param); + +/** + * @brief set ioreq shared buffer + * + * Set the ioreq share buffer for a VM. + * The function will return -1 if the target VM does not exist. + * + * @param VM Pointer to VM data structure + * @param vmid ID of the VM + * @param param guest physical address. This gpa points to + * struct acrn_set_ioreq_buffer + * + * @return 0 on success, non-zero on error. + */ +int64_t hcall_set_ioreq_buffer(struct vm *vm, uint64_t vmid, uint64_t param); + +/** + * @brief notify request done + * + * Notify the requestor VCPU for the completion of an ioreq. + * The function will return -1 if the target VM does not exist. + * + * @param vmid ID of the VM + * @param param vcpu ID of the requestor + * + * @return 0 on success, non-zero on error. + */ +int64_t hcall_notify_req_finish(uint64_t vmid, uint64_t param); + +/** + * @brief setup ept memory mapping + * + * Set the ept memory mapping for a VM. + * The function will return -1 if the target VM does not exist. + * + * @param VM Pointer to VM data structure + * @param vmid ID of the VM + * @param param guest physical address. This gpa points to + * struct vm_set_memmap + * + * @return 0 on success, non-zero on error. + */ +int64_t hcall_set_vm_memmap(struct vm *vm, uint64_t vmid, uint64_t param); + +/** + * @brief remap PCI MSI interrupt + * + * Remap a PCI MSI interrupt from a VM's virtual vector to native vector. + * The function will return -1 if the target VM does not exist. + * + * @param VM Pointer to VM data structure + * @param vmid ID of the VM + * @param param guest physical address. This gpa points to + * struct acrn_vm_pci_msix_remap + * + * @return 0 on success, non-zero on error. + */ +int64_t hcall_remap_pci_msix(struct vm *vm, uint64_t vmid, uint64_t param); + +/** + * @brief translate guest physical address ot host physical address + * + * Translate guest physical address to host physical address for a VM. + * The function will return -1 if the target VM does not exist. + * + * @param VM Pointer to VM data structure + * @param vmid ID of the VM + * @param param guest physical address. This gpa points to struct vm_gpa2hpa + * + * @return 0 on success, non-zero on error. + */ +int64_t hcall_gpa_to_hpa(struct vm *vm, uint64_t vmid, uint64_t param); + +/** + * @brief Assign one passthrough dev to VM. + * + * @param VM Pointer to VM data structure + * @param vmid ID of the VM + * @param param guest physical address. This gpa points to + * physical BDF of the assigning ptdev + * + * @return 0 on success, non-zero on error. + */ +int64_t hcall_assign_ptdev(struct vm *vm, uint64_t vmid, uint64_t param); + +/** + * @brief Deassign one passthrough dev from VM. + * + * @param VM Pointer to VM data structure + * @param vmid ID of the VM + * @param param guest physical address. This gpa points to + * physical BDF of the deassigning ptdev + * + * @return 0 on success, non-zero on error. + */ +int64_t hcall_deassign_ptdev(struct vm *vm, uint64_t vmid, uint64_t param); + +/** + * @brief Set interrupt mapping info of ptdev. + * + * @param VM Pointer to VM data structure + * @param vmid ID of the VM + * @param param guest physical address. This gpa points to data structure of + * hc_ptdev_irq including intr remapping info + * + * @return 0 on success, non-zero on error. + */ +int64_t hcall_set_ptdev_intr_info(struct vm *vm, uint64_t vmid, uint64_t param); + +/** + * @brief Clear interrupt mapping info of ptdev. + * + * @param VM Pointer to VM data structure + * @param vmid ID of the VM + * @param param guest physical address. This gpa points to data structure of + * hc_ptdev_irq including intr remapping info + * + * @return 0 on success, non-zero on error. + */ +int64_t hcall_reset_ptdev_intr_info(struct vm *vm, uint64_t vmid, + uint64_t param); + +/** + * @brief Setup a share buffer for a VM. + * + * @param VM Pointer to VM data structure + * @param param guest physical address. This gpa points to + * struct sbuf_setup_param + * + * @return 0 on success, non-zero on error. + */ +int64_t hcall_setup_sbuf(struct vm *vm, uint64_t param); + +/** + * @} + */ + +static inline int check_result(int found) +{ + return found ? 0 : -1; +} + +#define copy_from_vm(vm, ptr, gpa) ({ \ + int found = 0; \ + typeof(*(ptr)) *h_ptr = (ptr); \ + typeof(*(ptr)) *g_ptr = \ + (void *)gpa2hpa_check(vm, gpa, \ + sizeof(*h_ptr), &found, true); \ + if (found) { \ + *h_ptr = *g_ptr; \ + } \ + check_result(found); \ +}) + +#define copy_to_vm(vm, ptr, gpa) ({ \ + int found = 0; \ + typeof(*(ptr)) *h_ptr = (ptr); \ + typeof(*(ptr)) *g_ptr = \ + (void *)gpa2hpa_check(vm, gpa, \ + sizeof(*h_ptr), &found, true); \ + if (found) { \ + *g_ptr = *h_ptr; \ + } \ + check_result(found); \ +}) + +#endif /* HYPERCALL_H*/ diff --git a/hypervisor/include/common/schedule.h b/hypervisor/include/common/schedule.h new file mode 100644 index 000000000..71f3c11bc --- /dev/null +++ b/hypervisor/include/common/schedule.h @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _HV_CORE_SCHEDULE_ +#define _HV_CORE_SCHEDULE_ + +#define NEED_RESCHEDULED (1) + +void init_scheduler(void); +void get_schedule_lock(int pcpu_id); +void release_schedule_lock(int pcpu_id); + +void set_pcpu_used(int pcpu_id); +int allocate_pcpu(void); +void free_pcpu(int pcpu_id); + +void add_vcpu_to_runqueue(struct vcpu *vcpu); +void remove_vcpu_from_runqueue(struct vcpu *vcpu); + +void default_idle(void); + +void make_reschedule_request(struct vcpu *vcpu); +int need_rescheduled(int pcpu_id); +void schedule(void); + +void vcpu_thread(struct vcpu *vcpu); +#endif + diff --git a/hypervisor/include/debug/assert.h b/hypervisor/include/debug/assert.h new file mode 100644 index 000000000..83b26c1b5 --- /dev/null +++ b/hypervisor/include/debug/assert.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ASSERT_H +#define ASSERT_H + +#ifdef HV_DEBUG +void __assert(uint32_t line, const char *file, char *txt); + +#define ASSERT(x, ...) \ + if (!(x)) {\ + pr_fatal(__VA_ARGS__);\ + __assert(__LINE__, __FILE__, "fatal error");\ + } +#else +#define ASSERT(x, ...) \ + if (!(x)) { \ + do { \ + asm volatile ("pause" ::: "memory"); \ + } while (1); \ + } +#endif + +/* Force a compilation error if condition is false */ +#define STATIC_ASSERT(condition) ((void)sizeof(char[(condition) ? 1 : -1])) + +#endif /* ASSERT_H */ diff --git a/hypervisor/include/debug/console.h b/hypervisor/include/debug/console.h new file mode 100644 index 000000000..1ef2306d3 --- /dev/null +++ b/hypervisor/include/debug/console.h @@ -0,0 +1,115 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CONSOLE_H +#define CONSOLE_H + +#ifdef HV_DEBUG +/** Initializes the console module. + * + * @param cdev A pointer to the character device to use for the console. + * + * @return '0' on success. Any other value indicates an error. + */ + +int console_init(void); + +/** Writes a NUL terminated string to the console. + * + * @param str A pointer to the NUL terminated string to write. + * + * @return The number of characters written or -1 if an error occurred + * and no character was written. + */ + +int console_puts(const char *str); + +/** Writes a given number of characters to the console. + * + * @param str A pointer to character array to write. + * @param len The number of characters to write. + * + * @return The number of characters written or -1 if an error occurred + * and no character was written. + */ + +int console_write(const char *str, size_t len); + +/** Writes a single character to the console. + * + * @param ch The character to write. + * + * @preturn The number of characters written or -1 if an error + * occurred before any character was written. + */ + +int console_putc(int ch); + +/** Dumps an array to the console. + * + * This function dumps an array of bytes to the console + * in a hexadecimal format. + * + * @param p A pointer to the byte array to dump. + * @param len The number of bytes to dump. + */ + +void console_dump_bytes(const void *p, unsigned int len); + +void console_setup_timer(void); + +uint32_t get_serial_handle(void); +#else +static inline int console_init(void) +{ + return 0; +} +static inline int console_puts(__unused const char *str) +{ + return 0; +} +static inline int console_write(__unused const char *str, + __unused size_t len) +{ + return 0; +} +static inline int console_putc(__unused int ch) +{ + return 0; +} +static inline void console_dump_bytes(__unused const void *p, + __unused unsigned int len) +{ +} +static inline void console_setup_timer(void) {} +static inline uint32_t get_serial_handle(void) { return 0; } +#endif + +#endif /* CONSOLE_H */ diff --git a/hypervisor/include/debug/dump.h b/hypervisor/include/debug/dump.h new file mode 100644 index 000000000..1d3ebf30b --- /dev/null +++ b/hypervisor/include/debug/dump.h @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DUMP_H +#define DUMP_H + +struct intr_ctx; + +#ifdef HV_DEBUG +#define CALL_TRACE_HIERARCHY_MAX 20 +#define DUMP_STACK_SIZE 0x200 + +void dump_exception(struct intr_ctx *ctx, uint32_t cpu_id); +void dump_interrupt(struct intr_ctx *ctx); + +#else + +static inline void dump_exception(__unused struct intr_ctx *ctx, + __unused uint32_t cpu_id) +{ +} + +static inline void dump_interrupt(__unused struct intr_ctx *ctx) +{ +} +#endif + +#endif /* DUMP_H */ diff --git a/hypervisor/include/debug/logmsg.h b/hypervisor/include/debug/logmsg.h new file mode 100644 index 000000000..80a5f314b --- /dev/null +++ b/hypervisor/include/debug/logmsg.h @@ -0,0 +1,109 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef LOGMSG_H +#define LOGMSG_H + +/* Logging severity levels */ +#define LOG_FATAL 1 +#define LOG_ERROR 2 +#define LOG_WARNING 3 +#define LOG_INFO 4 +#define LOG_DEBUG 5 + +/* Logging flags */ +#define LOG_FLAG_STDOUT 0x00000001 +#define LOG_FLAG_MEMORY 0x00000002 + +#if defined(HV_DEBUG) + +extern uint32_t console_loglevel; +extern uint32_t mem_loglevel; +void init_logmsg(uint32_t mem_size, uint32_t flags); +void print_logmsg_buffer(uint32_t cpu_id); +void do_logmsg(uint32_t severity, const char *fmt, ...); + +#else /* HV_DEBUG */ + +static inline void init_logmsg(__unused uint32_t mem_size, + __unused uint32_t flags) +{ +} + +static inline void do_logmsg(__unused uint32_t severity, + __unused const char *fmt, ...) +{ +} + +static inline void print_logmsg_buffer(__unused uint32_t cpu_id) +{ +} + +#endif /* HV_DEBUG */ + +#ifndef pr_fmt +#define pr_fmt(fmt) fmt +#endif + +#define pr_fatal(fmt, ...) \ + do { \ + do_logmsg(LOG_FATAL, pr_fmt(fmt), ##__VA_ARGS__); \ + } while (0) + +#define pr_err(fmt, ...) \ + do { \ + do_logmsg(LOG_ERROR, pr_fmt(fmt), ##__VA_ARGS__); \ + } while (0) + +#define pr_warn(fmt, ...) \ + do { \ + do_logmsg(LOG_WARNING, pr_fmt(fmt), ##__VA_ARGS__); \ + } while (0) + +#define pr_info(fmt, ...) \ + do { \ + do_logmsg(LOG_INFO, pr_fmt(fmt), ##__VA_ARGS__); \ + } while (0) + +#define pr_dbg(fmt, ...) \ + do { \ + do_logmsg(LOG_DEBUG, pr_fmt(fmt), ##__VA_ARGS__); \ + } while (0) + +#define dev_dbg(lvl, fmt, ...) \ + do { \ + do_logmsg(lvl, pr_fmt(fmt), ##__VA_ARGS__); \ + } while (0) + +#define panic(...) \ + do { pr_fatal("Instruction Decode PANIC: " __VA_ARGS__); \ + while (1) { asm volatile ("pause" ::: "memory"); }; } while (0) + +#endif /* LOGMSG_H */ diff --git a/hypervisor/include/debug/printf.h b/hypervisor/include/debug/printf.h new file mode 100644 index 000000000..2bd98943a --- /dev/null +++ b/hypervisor/include/debug/printf.h @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef PRINTF_H +#define PRINTF_H + +/** The well known printf() function. + * + * Formats a string and writes it to the console output. + * + * @param fmt A pointer to the NUL terminated format string. + * + * @return The number of characters actually written or a negative + * number if an error occurred. + */ + +int printf(const char *fmt, ...); + +/** The well known vprintf() function. + * + * Formats a string and writes it to the console output. + * + * @param fmt A pointer to the NUL terminated format string. + * @param args The variable long argument list as va_list. + * @return The number of characters actually written or a negative + * number if an error occurred. + */ + +int vprintf(const char *fmt, va_list args); + +/** The well known vsnprintf() function. + * + * Formats and writes a string with a max. size to memory. + * + * @param dst A pointer to the destination memory. + * @param sz The size of the destination memory. + * @param fmt A pointer to the NUL terminated format string. + * @param args The variable long argument list as va_list. + * @return The number of bytes which would be written, even if the destination + * is smaller. On error a negative number is returned. + */ + +int vsnprintf(char *dst, int sz, const char *fmt, va_list args); + +/** The well known snprintf() function. + * + * Formats a string and writes it to the console output. + * + * @param dest Pointer to the destination memory. + * @param sz Max. size of dest. + * @param fmt A pointer to the NUL terminated format string. + * + * @return The number of characters would by written or a negative + * number if an error occurred. + * + * @bug sz == 0 doesn't work + */ + +int snprintf(char *dest, int sz, const char *fmt, ...); + +#endif /* PRINTF_H */ diff --git a/hypervisor/include/debug/sbuf.h b/hypervisor/include/debug/sbuf.h new file mode 100644 index 000000000..45f7ac097 --- /dev/null +++ b/hypervisor/include/debug/sbuf.h @@ -0,0 +1,155 @@ +/* + * SHARED BUFFER + * + * Copyright (C) 2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Li Fei + * + */ + +#ifndef SHARED_BUFFER_H +#define SHARED_BUFFER_H + +#define SBUF_MAGIC 0x5aa57aa71aa13aa3 +#define SBUF_MAX_SIZE (1 << 22) +#define SBUF_HEAD_SIZE 64 + +/* sbuf flags */ +#define OVERRUN_CNT_EN (1 << 0) /* whether overrun counting is enabled */ +#define OVERWRITE_EN (1 << 1) /* whether overwrite is enabled */ + +/** + * (sbuf) head + buf (store (ele_num - 1) elements at most) + * buffer empty: tail == head + * buffer full: (tail + ele_size) % size == head + * + * Base of memory for elements + * | + * | + * ---------------------------------------------------------------------- + * | struct shared_buf | raw data (ele_size)| ... | raw data (ele_size) | + * ---------------------------------------------------------------------- + * | + * | + * struct shared_buf *buf + */ + +enum { + ACRN_TRACE, + ACRN_HVLOG, + ACRN_SBUF_ID_MAX, +}; + +/* Make sure sizeof(struct shared_buf) == SBUF_HEAD_SIZE */ +struct shared_buf { + uint64_t magic; + uint32_t ele_num; /* number of elements */ + uint32_t ele_size; /* sizeof of elements */ + uint32_t head; /* offset from base, to read */ + uint32_t tail; /* offset from base, to write */ + uint64_t flags; + uint32_t overrun_cnt; /* count of overrun */ + uint32_t size; /* ele_num * ele_size */ + uint32_t padding[6]; +}; + +#ifdef HV_DEBUG + +EXTERN_CPU_DATA(uint64_t * [ACRN_SBUF_ID_MAX], sbuf); + +static inline void sbuf_clear_flags(struct shared_buf *sbuf, uint64_t flags) +{ + sbuf->flags &= ~flags; +} + +static inline void sbuf_set_flags(struct shared_buf *sbuf, uint64_t flags) +{ + sbuf->flags = flags; +} + +static inline void sbuf_add_flags(struct shared_buf *sbuf, uint64_t flags) +{ + sbuf->flags |= flags; +} + +struct shared_buf *sbuf_allocate(uint32_t ele_num, uint32_t ele_size); +void sbuf_free(struct shared_buf *sbuf); +int sbuf_get(struct shared_buf *sbuf, uint8_t *data); +int sbuf_put(struct shared_buf *sbuf, uint8_t *data); +int sbuf_share_setup(uint32_t pcpu_id, uint32_t sbuf_id, uint64_t *hva); + +#else /* HV_DEBUG */ + +static inline void sbuf_clear_flags( + __unused struct shared_buf *sbuf, + __unused uint64_t flags) +{ +} + +static inline void sbuf_set_flags( + __unused struct shared_buf *sbuf, + __unused uint64_t flags) +{ +} + +static inline void sbuf_add_flags( + __unused struct shared_buf *sbuf, + __unused uint64_t flags) +{ +} + +static inline struct shared_buf *sbuf_allocate( + __unused uint32_t ele_num, + __unused uint32_t ele_size) +{ + return NULL; +} + +static inline void sbuf_free( + __unused struct shared_buf *sbuf) +{ +} + +static inline int sbuf_get( + __unused struct shared_buf *sbuf, + __unused uint8_t *data) +{ + return 0; +} + +static inline int sbuf_put( + __unused struct shared_buf *sbuf, + __unused uint8_t *data) +{ + return 0; +} + +#endif /* HV_DEBUG */ + +#endif /* SHARED_BUFFER_H */ diff --git a/hypervisor/include/debug/serial.h b/hypervisor/include/debug/serial.h new file mode 100644 index 000000000..8408d7429 --- /dev/null +++ b/hypervisor/include/debug/serial.h @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SERIAL_H +#define SERIAL_H + +#ifdef HV_DEBUG +int serial_init(void); +#else +static inline int serial_init(void) { return 0; } +#endif + +void uart16550_set_property(int enabled, int port_mapped, uint64_t base_addr); + +#endif diff --git a/hypervisor/include/debug/shell.h b/hypervisor/include/debug/shell.h new file mode 100644 index 000000000..43e6f656d --- /dev/null +++ b/hypervisor/include/debug/shell.h @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SHELL_H +#define SHELL_H + +/* Switching key combinations for shell and uart console */ +#define GUEST_CONSOLE_TO_HV_SWITCH_KEY 0 /* CTRL + SPACE */ + +#ifdef HV_DEBUG +int shell_init(void); +void shell_kick_session(void); +int shell_switch_console(void); +#else +static inline int shell_init(void) { return 0; } +static inline void shell_kick_session(void) {} +static inline int shell_switch_console(void) { return 0; } +#endif + +#endif /* SHELL_H */ diff --git a/hypervisor/include/debug/trace.h b/hypervisor/include/debug/trace.h new file mode 100644 index 000000000..38e4c18c2 --- /dev/null +++ b/hypervisor/include/debug/trace.h @@ -0,0 +1,280 @@ +/* + * ACRN TRACE + * + * Copyright (C) 2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Li Fei + * + */ + +#ifndef TRACE_H +#define TRACE_H + + /* TIMER EVENT */ +#define TRACE_TIMER_ACTION_ADDED 0x1 +#define TRACE_TIMER_ACTION_PCKUP 0x2 +#define TRACE_TIMER_ACTION_UPDAT 0x3 +#define TRACE_TIMER_IRQ 0x4 + +#define TRACE_VM_EXIT 0x10 +#define TRACE_VM_ENTER 0X11 +#define TRC_VMEXIT_ENTRY 0x10000 + +#define TRC_VMEXIT_EXCEPTION_OR_NMI (TRC_VMEXIT_ENTRY + 0x00000000) +#define TRC_VMEXIT_EXTERNAL_INTERRUPT (TRC_VMEXIT_ENTRY + 0x00000001) +#define TRC_VMEXIT_INTERRUPT_WINDOW (TRC_VMEXIT_ENTRY + 0x00000002) +#define TRC_VMEXIT_CPUID (TRC_VMEXIT_ENTRY + 0x00000004) +#define TRC_VMEXIT_RDTSC (TRC_VMEXIT_ENTRY + 0x00000010) +#define TRC_VMEXIT_VMCALL (TRC_VMEXIT_ENTRY + 0x00000012) +#define TRC_VMEXIT_CR_ACCESS (TRC_VMEXIT_ENTRY + 0x0000001C) +#define TRC_VMEXIT_IO_INSTRUCTION (TRC_VMEXIT_ENTRY + 0x0000001E) +#define TRC_VMEXIT_RDMSR (TRC_VMEXIT_ENTRY + 0x0000001F) +#define TRC_VMEXIT_WRMSR (TRC_VMEXIT_ENTRY + 0x00000020) +#define TRC_VMEXIT_EPT_VIOLATION (TRC_VMEXIT_ENTRY + 0x00000030) +#define TRC_VMEXIT_EPT_MISCONFIGURATION (TRC_VMEXIT_ENTRY + 0x00000031) +#define TRC_VMEXIT_RDTSCP (TRC_VMEXIT_ENTRY + 0x00000033) +#define TRC_VMEXIT_APICV_WRITE (TRC_VMEXIT_ENTRY + 0x00000038) +#define TRC_VMEXIT_APICV_ACCESS (TRC_VMEXIT_ENTRY + 0x00000039) +#define TRC_VMEXIT_APICV_VIRT_EOI (TRC_VMEXIT_ENTRY + 0x0000003A) + +#define TRC_VMEXIT_UNHANDLED 0x20000 + +#ifdef HV_DEBUG + +#include + +#define GEN_CASE(id) case (id): { id##_FMT; break; } + +#define TRACE_CUSTOM 0xFC +#define TRACE_FUNC_ENTER 0xFD +#define TRACE_FUNC_EXIT 0xFE +#define TRACE_STR 0xFF + +#define TRACE_TIMER_ACTION_ADDED_FMT \ +{PR("TIMER_ACTION ADDED: ID %d, deadline %llx total: %d\n", \ + (p)->a, ((uint64_t)((p)->c)<<32)|(p)->b, (p)->d); } + +#define TRACE_TIMER_ACTION_PCKUP_FMT \ +{PR("TIMER_ACTION PCKUP: ID %d, deadline %llx total: %d\n", \ + (p)->a, ((uint64_t)((p)->c)<<32)|(p)->b, (p)->d); } + +#define TRACE_TIMER_ACTION_UPDAT_FMT \ +{PR("TIMER_ACTION UPDAT: ID %d, deadline %llx total: %d\n", \ + (p)->a, ((unsigned long)((p)->c)<<32)|(p)->b, (p)->d); } + +#define TRACE_TIMER_IRQ_FMT \ +PR("TIMER_IRQ total: %llx\n", (p)->e) + +#define TRACE_CUSTOM_FMT \ +PR("CUSTOM: 0x%llx 0x%llx\n", (p)->e, (p)->f) + +#define TRACE_FUNC_ENTER_FMT \ +PR("ENTER: %s\n", (p)->str) + +#define TRACE_FUNC_EXIT_FMT \ +PR("EXIT : %s\n", (p)->str) + +#define TRACE_STR_FMT \ +PR("STR: %s\n", (p)->str) + +#define ALL_CASES \ + GEN_CASE(TRACE_TIMER_ACTION_ADDED); \ + GEN_CASE(TRACE_TIMER_ACTION_PCKUP); \ + GEN_CASE(TRACE_TIMER_ACTION_UPDAT); \ + GEN_CASE(TRACE_TIMER_IRQ); \ + GEN_CASE(TRACE_CUSTOM); \ + GEN_CASE(TRACE_STR); \ + GEN_CASE(TRACE_FUNC_ENTER); \ + GEN_CASE(TRACE_FUNC_EXIT); + +/* sizeof(trace_entry) == 3 x 64bit */ +struct trace_entry { + uint64_t tsc; /* TSC */ + uint64_t id; + union { + struct { + uint32_t a, b, c, d; + }; + struct { + uint8_t a1, a2, a3, a4; + uint8_t b1, b2, b3, b4; + uint8_t c1, c2, c3, c4; + uint8_t d1, d2, d3, d4; + }; + struct { + uint64_t e; + uint64_t f; + }; + char str[16]; + }; +} __attribute__((aligned(8))); + +static inline bool +trace_check(int cpu_id, __unused int evid) +{ + if (cpu_id >= phy_cpu_num) + return false; + + if (!per_cpu(sbuf, cpu_id)[ACRN_TRACE]) + return false; + + return true; +} + +static inline void +_trace_put(int cpu_id, int evid, struct trace_entry *entry) +{ + struct shared_buf *sbuf = (struct shared_buf *) + per_cpu(sbuf, cpu_id)[ACRN_TRACE]; + + entry->tsc = rdtsc(); + entry->id = evid; + sbuf_put(sbuf, (uint8_t *)entry); +} + +static inline void +TRACE_2L(int evid, uint64_t e, uint64_t f) +{ + struct trace_entry entry; + int cpu_id = get_cpu_id(); + + if (!trace_check(cpu_id, evid)) + return; + + entry.e = e; + entry.f = f; + _trace_put(cpu_id, evid, &entry); +} + +static inline void +TRACE_4I(int evid, uint32_t a, uint32_t b, uint32_t c, + uint32_t d) +{ + struct trace_entry entry; + int cpu_id = get_cpu_id(); + + if (!trace_check(cpu_id, evid)) + return; + + entry.a = a; + entry.b = b; + entry.c = c; + entry.d = d; + _trace_put(cpu_id, evid, &entry); +} + +static inline void +TRACE_6C(int evid, uint8_t a1, uint8_t a2, uint8_t a3, + uint8_t a4, uint8_t b1, uint8_t b2) +{ + struct trace_entry entry; + int cpu_id = get_cpu_id(); + + if (!trace_check(cpu_id, evid)) + return; + + entry.a1 = a1; + entry.a2 = a2; + entry.a3 = a3; + entry.a4 = a4; + entry.b1 = b1; + entry.b2 = b2; + _trace_put(cpu_id, evid, &entry); +} + +#define TRACE_ENTER TRACE_16STR(TRACE_FUNC_ENTER, __func__) +#define TRACE_EXIT TRACE_16STR(TRACE_FUNC_EXIT, __func__) + +static inline void +TRACE_16STR(int evid, const char name[]) +{ + struct trace_entry entry; + int cpu_id = get_cpu_id(); + int len; + int i; + + if (!trace_check(cpu_id, evid)) + return; + + entry.e = 0; + entry.f = 0; + + len = strnlen_s(name, 20); + len = (len > 16) ? 16 : len; + for (i = 0; i < len; i++) + entry.str[i] = name[i]; + + entry.str[15] = 0; + _trace_put(cpu_id, evid, &entry); +} + +#else /* HV_DEBUG */ + +#define TRACE_TIMER_ACTION_ADDED_FMT +#define TRACE_TIMER_ACTION_PCKUP_FMT +#define TRACE_TIMER_ACTION_UPDAT_FMT +#define TRACE_TIMER_IRQ_FMT +#define TRACE_CUSTOM_FMT +#define TRACE_FUNC_ENTER_FMT +#define TRACE_FUNC_EXIT_FMT +#define TRACE_STR_FMT + +#define TRACE_ENTER +#define TRACE_EXIT + +static inline void +TRACE_2L(__unused int evid, + __unused uint64_t e, + __unused uint64_t f) +{ +} + +static inline void +TRACE_4I(__unused int evid, + __unused uint32_t a, + __unused uint32_t b, + __unused uint32_t c, + __unused uint32_t d) +{ +} + +static inline void +TRACE_6C(__unused int evid, + __unused uint8_t a1, + __unused uint8_t a2, + __unused uint8_t a3, + __unused uint8_t a4, + __unused uint8_t b1, + __unused uint8_t b2) +{ +} + +#endif /* HV_DEBUG */ + +#endif /* TRACE_H */ diff --git a/hypervisor/include/debug/vuart.h b/hypervisor/include/debug/vuart.h new file mode 100644 index 000000000..41f04d46a --- /dev/null +++ b/hypervisor/include/debug/vuart.h @@ -0,0 +1,84 @@ +/*- + * Copyright (c) 2013 Neel Natu + * Copyright (c) 2018 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _VUART_H_ +#define _VUART_H_ + +struct fifo { + char *buf; + int rindex; /* index to read from */ + int windex; /* index to write to */ + int num; /* number of characters in the fifo */ + int size; /* size of the fifo */ +}; + +struct vuart { + char data; /* Data register (R/W) */ + char ier; /* Interrupt enable register (R/W) */ + char lcr; /* Line control register (R/W) */ + char mcr; /* Modem control register (R/W) */ + char lsr; /* Line status register (R/W) */ + char msr; /* Modem status register (R/W) */ + char fcr; /* FIFO control register (W) */ + char scr; /* Scratch register (R/W) */ + char dll; /* Baudrate divisor latch LSB */ + char dlh; /* Baudrate divisor latch MSB */ + + struct fifo rxfifo; + struct fifo txfifo; + int base; + + bool thre_int_pending; /* THRE interrupt pending */ + bool active; + struct vm *vm; + spinlock_t lock; /* protects all softc elements */ +}; + +#ifdef HV_DEBUG +void *vuart_init(struct vm *vm); +struct vuart *vuart_console_active(void); +void vuart_console_tx_chars(void); +void vuart_console_rx_chars(uint32_t serial_handle); +#else +static inline void *vuart_init(__unused struct vm *vm) +{ + return NULL; +} +static inline struct vuart *vuart_console_active(void) +{ + return NULL; +} +static inline void vuart_console_tx_chars(void) {} +static inline void vuart_console_rx_chars( + __unused uint32_t serial_handle) +{ +} +#endif /*HV_DEBUG*/ + +#endif diff --git a/hypervisor/include/hv_debug.h b/hypervisor/include/hv_debug.h new file mode 100644 index 000000000..6dc6ba366 --- /dev/null +++ b/hypervisor/include/hv_debug.h @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef HV_DEBUG_H +#define HV_DEBUG_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#endif /* HV_DEBUG_H */ diff --git a/hypervisor/include/hv_lib.h b/hypervisor/include/hv_lib.h new file mode 100644 index 000000000..7f8e7c667 --- /dev/null +++ b/hypervisor/include/hv_lib.h @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef HV_LIB_H +#define HV_LIB_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#endif /* HV_LIB_H */ diff --git a/hypervisor/include/hypervisor.h b/hypervisor/include/hypervisor.h new file mode 100644 index 000000000..0b533bc94 --- /dev/null +++ b/hypervisor/include/hypervisor.h @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/************************************************************************ + * + * FILE NAME + * + * hypervisor.h + * + * DESCRIPTION + * + * This file includes config header file "bsp_cfg.h" and other + * hypervisor used header files. + * It should be included in all the source files. + * + * + ************************************************************************/ +#ifndef HYPERVISOR_H +#define HYPERVISOR_H + +/* Include config header file containing config options */ +#include "bsp_cfg.h" + +#ifndef ASSEMBLER +/* hpa <--> hva, now it is 1:1 mapping */ +#define HPA2HVA(x) ((void *)(x)) +#define HVA2HPA(x) ((uint64_t)(x)) +/* gpa --> hpa -->hva */ +#define GPA2HVA(vm, x) HPA2HVA(gpa2hpa(vm, x)) +#endif /* !ASSEMBLER */ + +#endif /* HYPERVISOR_H */ diff --git a/hypervisor/include/lib/bits.h b/hypervisor/include/lib/bits.h new file mode 100644 index 000000000..dbe73bec8 --- /dev/null +++ b/hypervisor/include/lib/bits.h @@ -0,0 +1,575 @@ +/*- + * Copyright (c) 1998 Doug Rabson + * Copyright (c) 2017 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef BITS_H +#define BITS_H + +#define BUS_LOCK "lock ; " +/* + * #define atomic_set_char(P, V) (*(unsigned char *)(P) |= (V)) + */ +static inline void atomic_set_char(unsigned char *p, unsigned char v) +{ + __asm __volatile(BUS_LOCK "orb %b1,%0" + : "+m" (*p) + : "iq" (v) + : "cc", "memory"); +} + +/* + * #define atomic_clear_char(P, V) (*(unsigned char *)(P) &= ~(V)) + */ +static inline void atomic_clear_char(unsigned char *p, unsigned char v) +{ + __asm __volatile(BUS_LOCK "andb %b1,%0" + : "+m" (*p) + : "iq" (~v) + : "cc", "memory"); +} + +/* + * #define atomic_add_char(P, V) (*(unsigned char *)(P) += (V)) + */ +static inline void atomic_add_char(unsigned char *p, unsigned char v) +{ + __asm __volatile(BUS_LOCK "addb %b1,%0" + : "+m" (*p) + : "iq" (v) + : "cc", "memory"); +} + +/* + * #define atomic_subtract_char(P, V) (*(unsigned char *)(P) -= (V)) + */ +static inline void atomic_subtract_char(unsigned char *p, unsigned char v) +{ + __asm __volatile(BUS_LOCK "subb %b1,%0" + : "+m" (*p) + : "iq" (v) + : "cc", "memory"); +} + +/* + * #define atomic_set_short(P, V) (*(unsigned short *)(P) |= (V)) + */ +static inline void atomic_set_short(unsigned short *p, unsigned short v) +{ + __asm __volatile(BUS_LOCK "orw %w1,%0" + : "+m" (*p) + : "ir" (v) + : "cc", "memory"); +} + +/* + * #define atomic_clear_short(P, V) (*(unsigned short *)(P) &= ~(V)) + */ +static inline void atomic_clear_short(unsigned short *p, unsigned short v) +{ + __asm __volatile(BUS_LOCK "andw %w1,%0" + : "+m" (*p) + : "ir" (~v) + : "cc", "memory"); +} + +/* + * #define atomic_add_short(P, V) (*(unsigned short *)(P) += (V)) + */ +static inline void atomic_add_short(unsigned short *p, unsigned short v) +{ + __asm __volatile(BUS_LOCK "addw %w1,%0" + : "+m" (*p) + : "ir" (v) + : "cc", "memory"); +} + +/* + * #define atomic_subtract_short(P, V) (*(unsigned short *)(P) -= (V)) + */ +static inline void atomic_subtract_short(unsigned short *p, unsigned short v) +{ + __asm __volatile(BUS_LOCK "subw %w1,%0" + : "+m" (*p) + : "ir" (v) + : "cc", "memory"); +} + +/* + * #define atomic_set_int(P, V) (*(unsigned int *)(P) |= (V)) + */ +static inline void atomic_set_int(unsigned int *p, unsigned int v) +{ + __asm __volatile(BUS_LOCK "orl %1,%0" + : "+m" (*p) + : "ir" (v) + : "cc", "memory"); +} + +/* + * #define atomic_clear_int(P, V) (*(unsigned int *)(P) &= ~(V)) + */ +static inline void atomic_clear_int(unsigned int *p, unsigned int v) +{ + __asm __volatile(BUS_LOCK "andl %1,%0" + : "+m" (*p) + : "ir" (~v) + : "cc", "memory"); +} + +/* + * #define atomic_add_int(P, V) (*(unsigned int *)(P) += (V)) + */ +static inline void atomic_add_int(unsigned int *p, unsigned int v) +{ + __asm __volatile(BUS_LOCK "addl %1,%0" + : "+m" (*p) + : "ir" (v) + : "cc", "memory"); +} + +/* + * #define atomic_subtract_int(P, V) (*(unsigned int *)(P) -= (V)) + */ +static inline void atomic_subtract_int(unsigned int *p, unsigned int v) +{ + __asm __volatile(BUS_LOCK "subl %1,%0" + : "+m" (*p) + : "ir" (v) + : "cc", "memory"); +} + +/* + * #define atomic_swap_int(P, V) \ + * (return (*(unsigned int *)(P)); *(unsigned int *)(P) = (V);) + */ +static inline int atomic_swap_int(unsigned int *p, unsigned int v) +{ + __asm __volatile(BUS_LOCK "xchgl %1,%0" + : "+m" (*p), "+r" (v) + : + : "cc", "memory"); + return v; +} + +/* + * #define atomic_readandclear_int(P) \ + * (return (*(unsigned int *)(P)); *(unsigned int *)(P) = 0;) + */ +#define atomic_readandclear_int(p) atomic_swap_int(p, 0) + +/* + * #define atomic_set_long(P, V) (*(unsigned long *)(P) |= (V)) + */ +static inline void atomic_set_long(unsigned long *p, unsigned long v) +{ + __asm __volatile(BUS_LOCK "orq %1,%0" + : "+m" (*p) + : "ir" (v) + : "cc", "memory"); +} + +/* + * #define atomic_clear_long(P, V) (*(u_long *)(P) &= ~(V)) + */ +static inline void atomic_clear_long(unsigned long *p, unsigned long v) +{ + __asm __volatile(BUS_LOCK "andq %1,%0" + : "+m" (*p) + : "ir" (~v) + : "cc", "memory"); +} + +/* + * #define atomic_add_long(P, V) (*(unsigned long *)(P) += (V)) + */ +static inline void atomic_add_long(unsigned long *p, unsigned long v) +{ + __asm __volatile(BUS_LOCK "addq %1,%0" + : "+m" (*p) + : "ir" (v) + : "cc", "memory"); +} + +/* + * #define atomic_subtract_long(P, V) (*(unsigned long *)(P) -= (V)) + */ +static inline void atomic_subtract_long(unsigned long *p, unsigned long v) +{ + __asm __volatile(BUS_LOCK "subq %1,%0" + : "+m" (*p) + : "ir" (v) + : "cc", "memory"); +} + +/* + * #define atomic_swap_long(P, V) \ + * (return (*(unsigned long *)(P)); *(unsigned long *)(P) = (V);) + */ +static inline long atomic_swap_long(unsigned long *p, unsigned long v) +{ + __asm __volatile(BUS_LOCK "xchgq %1,%0" + : "+m" (*p), "+r" (v) + : + : "cc", "memory"); + return v; +} + +/* + * #define atomic_readandclear_long(P) \ + * (return (*(unsigned long *)(P)); *(unsigned long *)(P) = 0;) + */ +#define atomic_readandclear_long(p) atomic_swap_long(p, 0) + +/* + * #define atomic_load_acq_int(P) (*(unsigned int*)(P)) + */ +static inline int atomic_load_acq_int(unsigned int *p) +{ + int ret; + + __asm __volatile("movl %1,%0" + : "=r"(ret) + : "m" (*p) + : "cc", "memory"); + return ret; +} + +/* + * #define atomic_store_rel_int(P, V) (*(unsigned int *)(P) = (V)) + */ +static inline void atomic_store_rel_int(unsigned int *p, unsigned int v) +{ + __asm __volatile("movl %1,%0" + : "=m" (*p) + : "r" (v) + : "cc", "memory"); +} + +/* + * #define atomic_load_acq_long(P) (*(unsigned long*)(P)) + */ +static inline long atomic_load_acq_long(unsigned long *p) +{ + long ret; + + __asm __volatile("movq %1,%0" + : "=r"(ret) + : "m" (*p) + : "cc", "memory"); + return ret; +} + +/* + * #define atomic_store_rel_long(P, V) (*(unsigned long *)(P) = (V)) + */ +static inline void atomic_store_rel_long(unsigned long *p, unsigned long v) +{ + __asm __volatile("movq %1,%0" + : "=m" (*p) + : "r" (v) + : "cc", "memory"); +} + +static inline int atomic_cmpxchg_int(unsigned int *p, + int old, int new) +{ + int ret; + + __asm __volatile(BUS_LOCK "cmpxchgl %2,%1" + : "=a" (ret), "+m" (*p) + : "r" (new), "0" (old) + : "memory"); + + return ret; +} + +#define atomic_load_acq_32 atomic_load_acq_int +#define atomic_store_rel_32 atomic_store_rel_int +#define atomic_load_acq_64 atomic_load_acq_long +#define atomic_store_rel_64 atomic_store_rel_long + +/* + * #define atomic_xadd_int(P, V) \ + * (return (*(unsigned long *)(P)); *(unsigned long *)(P) += (V);) + */ +static inline int atomic_xadd_int(unsigned int *p, unsigned int v) +{ + __asm __volatile(BUS_LOCK "xaddl %0,%1" + : "+r" (v), "+m" (*p) + : + : "cc", "memory"); + return v; +} + +static inline int atomic_add_return(int v, unsigned int *p) +{ + return v + atomic_xadd_int(p, v); +} + +static inline int atomic_sub_return(int v, unsigned int *p) +{ + return atomic_xadd_int(p, -v) - v; +} + +#define atomic_inc_return(v) atomic_add_return(1, (v)) +#define atomic_dec_return(v) atomic_sub_return(1, (v)) + +static inline unsigned int +bsrl(unsigned int mask) +{ + unsigned int result; + + __asm __volatile("bsrl %1,%0" + : "=r" (result) + : "rm" (mask)); + return result; +} + +static inline unsigned long +bsrq(unsigned long mask) +{ + unsigned long result; + + __asm __volatile("bsrq %1,%0" + : "=r" (result) + : "rm" (mask)); + return result; +} + +/** + * + * fls - Find the Last (most significant) bit Set in value and + * return the bit index of that bit. + * + * Bits are numbered starting at 0,the least significant bit. + * A return value of -1 means that the argument was zero. + * + * Examples: + * fls (0x0) = -1 + * fls (0x01) = 0 + * fls (0xf0) = 7 + * ... + * fls (0x80000001) = 31 + * + * @param mask: 'int' type value + * + * @return value: zero-based bit index, -1 means 'mask' was zero. + * + * **/ +static inline int +fls(int mask) +{ + return (mask == 0 ? -1 : (int)bsrl((unsigned int)mask)); +} + +/* 64bit version of fls(). */ +static inline int +flsl(long mask) +{ + return (mask == 0 ? -1 : (int)bsrq((unsigned long)mask)); +} + +static inline unsigned long +bsfq(unsigned long mask) +{ + unsigned long result; + + __asm __volatile("bsfq %1,%0" + : "=r" (result) + : "rm" (mask)); + return result; +} + +/** + * + * ffsl - Find the First (least significant) bit Set in value(Long type) + * and return the index of that bit. + * + * Bits are numbered starting at 0,the least significant bit. + * A return value of -1 means that the argument was zero. + * + * Examples: + * ffsl (0x0) = -1 + * ffsl (0x01) = 0 + * ffsl (0xf0) = 4 + * ffsl (0xf00) = 8 + * ... + * ffsl (0x8000000000000001) = 0 + * ffsl (0xf000000000000000) = 60 + * + * @param mask: 'long' type value + * + * @return value: zero-based bit index, -1 means 'mask' was zero. + * + * **/ +static inline int +ffsl(long mask) +{ + return (mask == 0 ? -1 : (int)bsfq((unsigned long)mask)); +} + +static inline void +bitmap_set(int mask, unsigned long *bits) +{ + /* (*bits) |= (1UL< + +/* + * FUNCTION + * hkdf_sha256 + * + * Description + * HMAC-based Extract-and-Expand Key Derivation Function. + * + * Parameters: + * out_key Pointer to key buffer which is used to save + * hkdf_sha256 result + * out_len The length of out_key + * secret Pointer to input keying material + * secret_len The length of secret + * salt Pointer to salt buffer, it is optional + * if not provided (salt == NULL), it is set internally + * to a string of hashlen(32) zeros + * salt_len The length of the salt value + * Ignored if salt is NULL + * info Pointer to application specific information, it is + * optional + * Ignored if info == NULL or a zero-length string + * info_len: The length of the info, ignored if info is NULL + * + * OUTPUTS + * 1 - Success + * 0 - Failure + */ +int hkdf_sha256(uint8_t *out_key, size_t out_len, + const uint8_t *secret, size_t secret_len, + const uint8_t *salt, size_t salt_len, + const uint8_t *info, size_t info_len); + +#endif /* HKDF_H */ diff --git a/hypervisor/include/lib/errno.h b/hypervisor/include/lib/errno.h new file mode 100644 index 000000000..b06635e89 --- /dev/null +++ b/hypervisor/include/lib/errno.h @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ERRNO_H +#define ERRNO_H + +/** Indicates that not enough memory. */ +#define ENOMEM 1 +/** Indicates that argument is not valid. */ +#define EINVAL 2 +/** Indicates that no such dev. */ +#define ENODEV 3 +/** Indicates that there is IO error. */ +#define EIO 4 +/** Indicates that target is busy. */ +#define EBUSY 5 + +#endif /* ERRNO_H */ diff --git a/hypervisor/include/lib/list.h b/hypervisor/include/lib/list.h new file mode 100644 index 000000000..f7e42bbf0 --- /dev/null +++ b/hypervisor/include/lib/list.h @@ -0,0 +1,123 @@ +/*- + * Copyright (C) 2005-2011 HighPoint Technologies, Inc. + * Copyright (c) 2017 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ +#ifndef LIST_H_ +#define LIST_H_ + +struct list_head { + struct list_head *next, *prev; +}; + +#define INIT_LIST_HEAD(ptr) do { (ptr)->next = (ptr); (ptr)->prev = (ptr); } \ + while (0) + +static inline void __list_add(struct list_head *_new, struct list_head *prev, + struct list_head *next) +{ + next->prev = _new; + _new->next = next; + _new->prev = prev; + prev->next = _new; +} + +static inline void list_add(struct list_head *_new, struct list_head *head) +{ + __list_add(_new, head, head->next); +} + +static inline void list_add_tail(struct list_head *_new, + struct list_head *head) +{ + __list_add(_new, head->prev, head); +} + +static inline void __list_del(struct list_head *prev, struct list_head *next) +{ + next->prev = prev; + prev->next = next; +} + +static inline void list_del(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); +} + +static inline void list_del_init(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + INIT_LIST_HEAD(entry); +} + +static inline int list_empty(struct list_head *head) +{ + return head->next == head; +} + +static inline void __list_splice(struct list_head *list, + struct list_head *head) +{ + struct list_head *first = list->next; + struct list_head *last = list->prev; + struct list_head *at = head->next; + + first->prev = head; + head->next = first; + + last->next = at; + at->prev = last; +} + +static inline void list_splice(struct list_head *list, struct list_head *head) +{ + if (!list_empty(list)) + __list_splice(list, head); +} + +static inline void list_splice_init(struct list_head *list, + struct list_head *head) +{ + if (!list_empty(list)) { + __list_splice(list, head); + INIT_LIST_HEAD(list); + } +} + +#define list_entry(ptr, type, member) \ + ((type *)((char *)(ptr)-(uint64_t)(&((type *)0)->member))) + +#define list_for_each(pos, head) \ + for (pos = (head)->next; pos != (head); pos = pos->next) + +#define list_for_each_safe(pos, n, head) \ + for (pos = (head)->next, n = pos->next; pos != (head); \ + pos = n, n = pos->next) + +#define get_first_item(attached, type, member) \ + ((type *)((char *)((attached)->next)-(uint64_t)(&((type *)0)->member))) + +#endif /* LIST_H_ */ diff --git a/hypervisor/include/lib/macros.h b/hypervisor/include/lib/macros.h new file mode 100644 index 000000000..7e30ba7bd --- /dev/null +++ b/hypervisor/include/lib/macros.h @@ -0,0 +1,73 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MACROS_H +#define MACROS_H + +/** Replaces 'x' by the string "x". */ +#define __CPP_STRING(x) #x +/** Replaces 'x' by its value. */ +#define CPP_STRING(x) __CPP_STRING(x) + +/** Creates a bitfield mask. + * + * @param pos The position of the LSB within the mask. + * @param width The width of the bitfield in bits. + * + * @return The bitfield mask. + */ + +#define BITFIELD_MASK(pos, width) (((1<<(width))-1)<<(pos)) +#define BITFIELD_VALUE(v, pos, width) (((v)<<(pos)) & (((1<<(width))-1)<<(pos))) + +#define MAKE_BITFIELD_MASK(id) BITFIELD_MASK(id ## _POS, id ## _WIDTH) +#define MAKE_BITFIELD_VALUE(v, id) BITFIELD_VALUE(v, id ## _POS, id ## _WIDTH) + +/** Defines a register within a register block. */ +#define REGISTER(base, off) (base ## _BASE + (off)) + +#define MAKE_MMIO_REGISTER_ADDRESS(chip, module, register) \ + (chip ## _ ## module ## _BASE + \ + (chip ## _ ## module ## _ ## register ## _REGISTER)) + +/* Macro used to check if a value is aligned to the required boundary. + * Returns TRUE if aligned; FALSE if not aligned + * NOTE: The required alignment must be a power of 2 (2, 4, 8, 16, 32, etc) + */ +#define MEM_ALIGNED_CHECK(value, req_align) \ + (((uint64_t)(value) & ((uint64_t)(req_align) - (uint64_t)1)) == 0) + +#if !defined(ASSEMBLER) && !defined(LINKER_SCRIPT) + +#define ARRAY_LENGTH(x) (sizeof(x)/sizeof((x)[0])) + +#endif + +#endif /* INCLUDE_MACROS_H defined */ diff --git a/hypervisor/include/lib/mem_mgt.h b/hypervisor/include/lib/mem_mgt.h new file mode 100644 index 000000000..6c9807942 --- /dev/null +++ b/hypervisor/include/lib/mem_mgt.h @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __MEM_MGT_H__ +#define __MEM_MGT_H__ + +/* Macros */ +#define BITMAP_WORD_SIZE 32 + +struct mem_pool { + void *start_addr; /* Start Address of Memory Pool */ + spinlock_t spinlock; /* To protect Memory Allocation */ + uint32_t size; /* Size of Memory Pool in Bytes */ + uint32_t buff_size; /* Size of one Buffer in Bytes */ + uint32_t total_buffs; /* Total Buffers in Memory Pool */ + uint32_t bmp_size; /* Size of Bitmap Array */ + uint32_t *bitmap; /* Pointer to allocation bitmap */ + uint32_t *contiguity_bitmap; /* Pointer to contiguity bitmap */ +}; + +/* APIs exposing memory allocation/deallocation abstractions */ +void *malloc(unsigned int num_bytes); +void *calloc(unsigned int num_elements, unsigned int element_size); +void *alloc_page(); +void *alloc_pages(unsigned int page_num); +void free(void *ptr); + +#endif /* MEM_MGT_H_ */ diff --git a/hypervisor/include/lib/rtl.h b/hypervisor/include/lib/rtl.h new file mode 100644 index 000000000..2bba8361c --- /dev/null +++ b/hypervisor/include/lib/rtl.h @@ -0,0 +1,82 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef RTL_H +#define RTL_H + +#include + +union u_qword { + struct { + uint32_t low; + uint32_t high; + } dwords; + + uint64_t qword; + +}; + +struct udiv_result { + union u_qword q; + union u_qword r; + +}; + +/* Function prototypes */ +void udelay(int us); +void *memchr(const void *void_s, int c, size_t n); +void *memmove(void *s1, const void *s2, size_t n); +int strcmp(const char *s1, const char *s2); +int strncmp(const char *s1, const char *s2, size_t n); +char *strcpy_s(char *d, size_t dmax, const char *s); +char *strncpy_s(char *d, size_t dmax, const char *s, size_t slen); +char *strchr(const char *s, int ch); +void mdelay(unsigned int ms); +size_t strnlen_s(const char *str, size_t maxlen); +void *memset(void *base, uint8_t v, size_t n); +void *memcpy_s(void *d, size_t dmax, const void *s, size_t slen); +int udiv64(uint64_t dividend, uint64_t divisor, struct udiv_result *res); +int udiv32(uint32_t dividend, uint32_t divisor, struct udiv_result *res); + +extern uint64_t tsc_clock_freq; +#define US_TO_TICKS(x) ((x)*tsc_clock_freq/1000000UL) +#define TIME_MS_DELTA US_TO_TICKS(1000UL) + +#define TICKS_TO_US(x) ((((x) * (1000000UL >> 8)) / tsc_clock_freq) << 8) +#define TICKS_TO_MS(x) (((x) * 1000UL) / tsc_clock_freq) + +static inline uint64_t rdtsc(void) +{ + uint32_t lo, hi; + + asm volatile("rdtsc" : "=a" (lo), "=d" (hi)); + return ((uint64_t)hi << 32) | lo; +} +#endif /* RTL_H */ diff --git a/hypervisor/include/lib/spinlock.h b/hypervisor/include/lib/spinlock.h new file mode 100644 index 000000000..f234a82d9 --- /dev/null +++ b/hypervisor/include/lib/spinlock.h @@ -0,0 +1,110 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPINLOCK_H +#define SPINLOCK_H + +#include + +#ifndef ASSEMBLER + +#include + +/** The architecture dependent spinlock type. */ +typedef struct _spinlock { + uint32_t head; + uint32_t tail; + +} spinlock_t; + +/* Function prototypes */ +int spinlock_init(spinlock_t *lock); +int spinlock_obtain(spinlock_t *lock); + +static inline int spinlock_release(spinlock_t *lock) +{ + /* Increment tail of queue */ + asm volatile (" lock incl %[tail]\n" + : + : [tail] "m" (lock->tail) + : "cc", "memory"); + + return 0; +} + +#else /* ASSEMBLER */ + +/** The offset of the head element. */ +#define SYNC_SPINLOCK_HEAD_OFFSET 0 + +/** The offset of the tail element. */ +#define SYNC_SPINLOCK_TAIL_OFFSET 4 + +.macro spinlock_obtain lock + movl $1, % eax + lea \lock, % ebx + lock xaddl % eax, SYNC_SPINLOCK_HEAD_OFFSET(%ebx) + cmpl % eax, SYNC_SPINLOCK_TAIL_OFFSET(%ebx) + jz 1f +2 : + pause + cmpl % eax, SYNC_SPINLOCK_TAIL_OFFSET(%ebx) + jnz 2b +1 : +.endm + +#define spinlock_obtain(x) spinlock_obtain lock = (x) + +.macro spinlock_release lock + lea \lock, % ebx + lock incl SYNC_SPINLOCK_TAIL_OFFSET(%ebx) +.endm + +#define spinlock_release(x) spinlock_release lock = (x) + +#endif /* ASSEMBLER */ + +#define spinlock_rflags unsigned long cpu_int_value + +#define spinlock_irq_obtain(l) (CPU_IRQ_DISABLE(), spinlock_obtain((l))) +#define spinlock_irqsave_obtain(l) \ + do { \ + CPU_INT_ALL_DISABLE(); \ + spinlock_obtain((l)); \ + } while (0) + +#define spinlock_irq_release(l) (spinlock_release((l)), CPU_IRQ_ENABLE()) +#define spinlock_irqrestore_release(l) \ + do { \ + spinlock_release((l)); \ + CPU_INT_ALL_RESTORE(); \ + } while (0) + +#endif /* SPINLOCK_H */ diff --git a/hypervisor/include/lib/stdarg.h b/hypervisor/include/lib/stdarg.h new file mode 100644 index 000000000..a58619c3a --- /dev/null +++ b/hypervisor/include/lib/stdarg.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef STDARG_H +#define STDARG_H + +#include + +#define va_start(x, y) __builtin_va_start((x), (y)) +#define va_end(x) __builtin_va_end(x) + +#endif /* STDARG_H */ diff --git a/hypervisor/include/lib/strtol.h b/hypervisor/include/lib/strtol.h new file mode 100644 index 000000000..854cf72d3 --- /dev/null +++ b/hypervisor/include/lib/strtol.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef STRTOL_H +#define STRTOL_H + +int atoi(const char *str); +long strtol(const char *nptr, char **endptr, register int base); +uint64_t strtoul(const char *nptr, char **endptr, register int base); + +#endif /* !STRTOL_H */ diff --git a/hypervisor/include/lib/types.h b/hypervisor/include/lib/types.h new file mode 100644 index 000000000..546ec70dc --- /dev/null +++ b/hypervisor/include/lib/types.h @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef TYPES_H +#define TYPES_H + +/* Define NULL value */ +#define HV_NULL 0 + +/* Defines for TRUE / FALSE conditions */ +#define HV_FALSE 0 +#define HV_TRUE 1 + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +#define __aligned(x) __attribute__((aligned(x))) +#define __packed __attribute__((packed)) +#define __unused __attribute__((unused)) + +#ifndef ASSEMBLER + +/* Define standard data types. These definitions allow software components + * to perform in the same manner on different target platforms. + */ +typedef unsigned int uint32_t; +typedef signed char int8_t; +typedef unsigned char uint8_t; +typedef signed short int16_t; +typedef unsigned short uint16_t; +typedef signed int int32_t; +typedef unsigned int uint32_t; +typedef unsigned long uint64_t; +typedef signed long int64_t; +typedef unsigned int size_t; +typedef unsigned long mmio_addr_t; +typedef unsigned long vaddr_t; +typedef unsigned long paddr_t; +typedef unsigned long ioport_t; +typedef __builtin_va_list va_list; + +typedef uint8_t bool; + +#ifndef NULL +#define NULL ((void *) 0) +#endif + +#ifndef true +#define true 1 +#define false 0 +#endif + +#ifndef UINT64_MAX +#define UINT64_MAX (-1UL) +#endif + +#endif /* ASSEMBLER */ + +#endif /* INCLUDE_TYPES_H defined */ diff --git a/hypervisor/include/lib/util.h b/hypervisor/include/lib/util.h new file mode 100644 index 000000000..86e9b930c --- /dev/null +++ b/hypervisor/include/lib/util.h @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UTIL_H +#define UTIL_H + +/** Add an offset (in bytes) to an (base)address. + * + * @param addr Baseaddress + * @param off Offset + * @return Returns baseaddress + offset in bytes. + */ +#define ADD_OFFSET(addr, off) (void *)(((uint8_t *)(addr))+(off)) + +#define offsetof(st, m) __builtin_offsetof(st, m) + +/** Round an integer (x) up to a multiple of y */ +#define INT_ROUNDUP(x, y) (((x)+((y)-1))&-(y)) + +/** Round an integer up to a multiple of 4 */ +#define INT_ROUNDUP4(x) INT_ROUNDUP(x, 4) + +/** Round an integer up to a multiple of 8 */ +#define INT_ROUNDUP8(x) INT_ROUNDUP(x, 8) + +/** Round an integer up to a multiple of 8 */ +#define INT_ROUNDUP16(x) INT_ROUNDUP(x, 16) + +/** Roundup (x/y) to ( x/y + (x%y) ? 1 : 0) **/ +#define INT_DIV_ROUNDUP(x, y) (((x)+(y)-1)/(y)) + +#endif /* UTIL_H */ diff --git a/hypervisor/include/public/acrn_common.h b/hypervisor/include/public/acrn_common.h new file mode 100644 index 000000000..057e92233 --- /dev/null +++ b/hypervisor/include/public/acrn_common.h @@ -0,0 +1,289 @@ +/* + * common definition + * + * Copyright (C) 2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file acrn_common.h + * + * @brief acrn common data structure for hypercall or ioctl + */ + +#ifndef ACRN_COMMON_H +#define ACRN_COMMON_H + +#include + +/* + * Common structures for ACRN/VHM/DM + */ + +/* + * IO request + */ +#define VHM_REQUEST_MAX 16 + +#define REQ_STATE_PENDING 0 +#define REQ_STATE_SUCCESS 1 +#define REQ_STATE_PROCESSING 2 +#define REQ_STATE_FAILED -1 + +#define REQ_PORTIO 0 +#define REQ_MMIO 1 +#define REQ_PCICFG 2 +#define REQ_WP 3 + +#define REQUEST_READ 0 +#define REQUEST_WRITE 1 + +/** + * @brief Hypercall + * + * @addtogroup acrn_hypercall ACRN Hypercall + * @{ + */ + +struct mmio_request { + uint32_t direction; + uint32_t reserved; + int64_t address; + int64_t size; + int64_t value; +} __aligned(8); + +struct pio_request { + uint32_t direction; + uint32_t reserved; + int64_t address; + int64_t size; + int32_t value; +} __aligned(8); + +struct pci_request { + uint32_t direction; + uint32_t reserved[3];/* need keep same header fields with pio_request */ + int64_t size; + int32_t value; + int32_t bus; + int32_t dev; + int32_t func; + int32_t reg; +} __aligned(8); + +/* vhm_request are 256Bytes aligned */ +struct vhm_request { + /* offset: 0bytes - 63bytes */ + union { + uint32_t type; + int32_t reserved0[16]; + }; + /* offset: 64bytes-127bytes */ + union { + struct pio_request pio_request; + struct pci_request pci_request; + struct mmio_request mmio_request; + int64_t reserved1[8]; + } reqs; + + /* True: valid req which need VHM to process. + * ACRN write, VHM read only + **/ + int32_t valid; + + /* the client which is distributed to handle this request */ + int32_t client; + + /* 1: VHM had processed and success + * 0: VHM had not yet processed + * -1: VHM failed to process. Invalid request + * VHM write, ACRN read only + */ + int32_t processed; +} __aligned(256); + +struct vhm_request_buffer { + union { + struct vhm_request req_queue[VHM_REQUEST_MAX]; + int8_t reserved[4096]; + }; +} __aligned(4096); + +/** + * @brief Info to create a VM, the parameter for HC_CREATE_VM hypercall + */ +struct acrn_create_vm { + /** created vmid return to VHM. Keep it first field */ + int32_t vmid; + + /** VCPU numbers this VM want to create */ + uint32_t vcpu_num; + + /** the GUID of this VM */ + uint8_t GUID[16]; + + /** whether Secure World is enabled for this VM */ + uint8_t secure_world_enabled; + + /** Reserved for future use*/ + uint8_t reserved[31]; +} __aligned(8); + +/** + * @brief Info to create a VCPU + * + * the parameter for HC_CREATE_VCPU hypercall + */ +struct acrn_create_vcpu { + /** the virtual CPU ID for the VCPU created */ + uint32_t vcpu_id; + + /** the physical CPU ID for the VCPU created */ + uint32_t pcpu_id; +} __aligned(8); + +/** + * @brief Info to set ioreq buffer for a created VM + * + * the parameter for HC_SET_IOREQ_BUFFER hypercall + */ +struct acrn_set_ioreq_buffer { + /** guest physical address of VM request_buffer */ + uint64_t req_buf; +} __aligned(8); + +/** Interrupt type for acrn_irqline: inject interrupt to IOAPIC */ +#define ACRN_INTR_TYPE_ISA 0 + +/** Interrupt type for acrn_irqline: inject interrupt to both PIC and IOAPIC */ +#define ACRN_INTR_TYPE_IOAPIC 1 + +/** + * @brief Info to assert/deassert/pulse a virtual IRQ line for a VM + * + * the parameter for HC_ASSERT_IRQLINE/HC_DEASSERT_IRQLINE/HC_PULSE_IRQLINE + * hypercall + */ +struct acrn_irqline { + /** interrupt type which could be IOAPIC or ISA */ + uint32_t intr_type; + + /** reserved for alignment padding */ + uint32_t reserved; + + /** pic IRQ for ISA type */ + uint64_t pic_irq; + + /** ioapic IRQ for IOAPIC & ISA TYPE, + * if -1 then this IRQ will not be injected + */ + uint64_t ioapic_irq; +} __aligned(8); + +/** + * @brief Info to inject a MSI interrupt to VM + * + * the parameter for HC_INJECT_MSI hypercall + */ +struct acrn_msi_entry { + /** MSI addr[19:12] with dest VCPU ID */ + uint64_t msi_addr; + + /** MSI data[7:0] with vector */ + uint64_t msi_data; +} __aligned(8); + +/** + * @brief Info to inject a NMI interrupt for a VM + */ +struct acrn_nmi_entry { + /** virtual CPU ID to inject */ + int64_t vcpu_id; +} __aligned(8); + +/** + * @brief Info to remap pass-through PCI MSI for a VM + * + * the parameter for HC_VM_PCI_MSIX_REMAP hypercall + */ +struct acrn_vm_pci_msix_remap { + /** pass-through PCI device virtual BDF# */ + uint16_t virt_bdf; + + /** pass-through PCI device physical BDF# */ + uint16_t phys_bdf; + + /** pass-through PCI device MSI/MSI-X cap control data */ + uint16_t msi_ctl; + + /** reserved for alignment padding */ + uint16_t reserved; + + /** pass-through PCI device MSI address to remap, which will + * return the caller after remapping + */ + uint64_t msi_addr; /* IN/OUT: msi address to fix */ + + /** pass-through PCI device MSI data to remap, which will + * return the caller after remapping + */ + uint32_t msi_data; + + /** pass-through PCI device is MSI or MSI-X + * 0 - MSI, 1 - MSI-X + */ + int32_t msix; + + /** if the pass-through PCI device is MSI-X, this field contains + * the MSI-X entry table index + */ + int32_t msix_entry_index; + + /** if the pass-through PCI device is MSI-X, this field contains + * Vector Control for MSI-X Entry, field defined in MSI-X spec + */ + uint32_t vector_ctl; +} __aligned(8); + +/** + * @brief The guest config pointer offset. + * + * It's designed to support passing DM config data pointer, based on it, + * hypervisor would parse then pass DM defined configuration to GUEST VCPU + * when booting guest VM. + * the address 0xd0000 here is designed by DM, as it arranged all memory + * layout below 1M, DM should make sure there is no overlap for the address + * 0xd0000 usage. + */ +#define GUEST_CFG_OFFSET 0xd0000 + +/** + * @} + */ +#endif /* ACRN_COMMON_H */ diff --git a/hypervisor/include/public/acrn_hv_defs.h b/hypervisor/include/public/acrn_hv_defs.h new file mode 100644 index 000000000..4620be594 --- /dev/null +++ b/hypervisor/include/public/acrn_hv_defs.h @@ -0,0 +1,232 @@ +/* + * hypercall definition + * + * Copyright (C) 2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file acrn_hv_defs.h + * + * @brief acrn data structure for hypercall + */ + +#ifndef ACRN_HV_DEFS_H +#define ACRN_HV_DEFS_H + +/* + * Common structures for ACRN/VHM/DM + */ +#include "acrn_common.h" + +/* + * Common structures for HV/VHM + */ + +#define _HC_ID(x, y) (((x)<<24)|(y)) + +#define HC_ID 0x80UL + +/* general */ +#define HC_ID_GEN_BASE 0x0UL +#define HC_GET_API_VERSION _HC_ID(HC_ID, HC_ID_GEN_BASE + 0x00) + +/* VM management */ +#define HC_ID_VM_BASE 0x10UL +#define HC_CREATE_VM _HC_ID(HC_ID, HC_ID_VM_BASE + 0x00) +#define HC_DESTROY_VM _HC_ID(HC_ID, HC_ID_VM_BASE + 0x01) +#define HC_START_VM _HC_ID(HC_ID, HC_ID_VM_BASE + 0x02) +#define HC_PAUSE_VM _HC_ID(HC_ID, HC_ID_VM_BASE + 0x03) +#define HC_CREATE_VCPU _HC_ID(HC_ID, HC_ID_VM_BASE + 0x04) + +/* IRQ and Interrupts */ +#define HC_ID_IRQ_BASE 0x20UL +#define HC_ASSERT_IRQLINE _HC_ID(HC_ID, HC_ID_IRQ_BASE + 0x00) +#define HC_DEASSERT_IRQLINE _HC_ID(HC_ID, HC_ID_IRQ_BASE + 0x01) +#define HC_PULSE_IRQLINE _HC_ID(HC_ID, HC_ID_IRQ_BASE + 0x02) +#define HC_INJECT_MSI _HC_ID(HC_ID, HC_ID_IRQ_BASE + 0x03) + +/* DM ioreq management */ +#define HC_ID_IOREQ_BASE 0x30UL +#define HC_SET_IOREQ_BUFFER _HC_ID(HC_ID, HC_ID_IOREQ_BASE + 0x00) +#define HC_NOTIFY_REQUEST_FINISH _HC_ID(HC_ID, HC_ID_IOREQ_BASE + 0x01) + +/* Guest memory management */ +#define HC_ID_MEM_BASE 0x40UL +#define HC_VM_SET_MEMMAP _HC_ID(HC_ID, HC_ID_MEM_BASE + 0x00) +#define HC_VM_GPA2HPA _HC_ID(HC_ID, HC_ID_MEM_BASE + 0x01) + +/* PCI assignment*/ +#define HC_ID_PCI_BASE 0x50UL +#define HC_ASSIGN_PTDEV _HC_ID(HC_ID, HC_ID_PCI_BASE + 0x00) +#define HC_DEASSIGN_PTDEV _HC_ID(HC_ID, HC_ID_PCI_BASE + 0x01) +#define HC_VM_PCI_MSIX_REMAP _HC_ID(HC_ID, HC_ID_PCI_BASE + 0x02) +#define HC_SET_PTDEV_INTR_INFO _HC_ID(HC_ID, HC_ID_PCI_BASE + 0x03) +#define HC_RESET_PTDEV_INTR_INFO _HC_ID(HC_ID, HC_ID_PCI_BASE + 0x04) + +/* DEBUG */ +#define HC_ID_DBG_BASE 0x60UL +#define HC_SETUP_SBUF _HC_ID(HC_ID, HC_ID_DBG_BASE + 0x00) + +/* Trusty */ +#define HC_ID_TRUSTY_BASE 0x70UL +#define HC_LAUNCH_TRUSTY _HC_ID(HC_ID, HC_ID_TRUSTY_BASE + 0x00) +#define HC_WORLD_SWITCH _HC_ID(HC_ID, HC_ID_TRUSTY_BASE + 0x01) +#define HC_GET_SEC_INFO _HC_ID(HC_ID, HC_ID_TRUSTY_BASE + 0x02) + +#define ACRN_DOM0_VMID (0UL) +#define ACRN_INVALID_VMID (-1) +#define ACRN_INVALID_HPA (-1UL) + +/* Generic memory attributes */ +#define MEM_ACCESS_READ 0x00000001 +#define MEM_ACCESS_WRITE 0x00000002 +#define MEM_ACCESS_EXEC 0x00000004 +#define MEM_ACCESS_RWX (MEM_ACCESS_READ | MEM_ACCESS_WRITE | \ + MEM_ACCESS_EXEC) +#define MEM_ACCESS_RIGHT_MASK 0x00000007 +#define MEM_TYPE_WB 0x00000040 +#define MEM_TYPE_WT 0x00000080 +#define MEM_TYPE_UC 0x00000100 +#define MEM_TYPE_WC 0x00000200 +#define MEM_TYPE_WP 0x00000400 +#define MEM_TYPE_MASK 0x000007C0 + +/** + * @brief Hypercall + * + * @defgroup acrn_hypercall ACRN Hypercall + * @{ + */ + +/** + * @brief Info to set ept mapping + * + * the parameter for HC_VM_SET_MEMMAP hypercall + */ +struct vm_set_memmap { +#define MAP_MEM 0 +#define MAP_MMIO 1 +#define MAP_UNMAP 2 + /** map type: MAP_MEM, MAP_MMIO or MAP_UNMAP */ + uint32_t type; + + /** reserved for alignment padding */ + uint32_t reserved; + + /** guest physical address to map */ + uint64_t remote_gpa; + + /** VM0's guest physcial address which remote gpa will be mapped to */ + uint64_t vm0_gpa; + + /** length of the map range */ + uint64_t length; + + /** memory attributes: memory type + RWX access right */ + uint32_t prot; +} __aligned(8); + +/** + * Setup parameter for share buffer, used for HC_SETUP_SBUF hypercall + */ +struct sbuf_setup_param { + /** sbuf physical cpu id */ + uint32_t pcpu_id; + + /** sbuf id */ + uint32_t sbuf_id; + + /** sbuf's guest physical address */ + uint64_t gpa; +} __aligned(8); + +/** + * Gpa to hpa translation parameter, used for HC_VM_GPA2HPA hypercall + */ +struct vm_gpa2hpa { + /** gpa to do translation */ + uint64_t gpa; + + /** hpa to return after translation */ + uint64_t hpa; +} __aligned(8); + +/** + * Intr mapping info per ptdev, the parameter for HC_SET_PTDEV_INTR_INFO + * hypercall + */ +struct hc_ptdev_irq { +#define IRQ_INTX 0 +#define IRQ_MSI 1 +#define IRQ_MSIX 2 + /** irq mapping type: INTX or MSI */ + uint32_t type; + + /** virtual BDF of the ptdev */ + uint16_t virt_bdf; + + /** physical BDF of the ptdev */ + uint16_t phys_bdf; + + union { + /** INTX remapping info */ + struct { + /** virtual IOAPIC/PIC pin */ + uint32_t virt_pin; + + /** physical IOAPIC pin */ + uint32_t phys_pin; + + /** is virtual pin from PIC */ + uint32_t pic_pin; + } intx; + + /** MSIx remapping info */ + struct { + /** vector count of MSI/MSIX */ + uint32_t vector_cnt; + } msix; + } is; /* irq source */ +} __aligned(8); + +/** + * Hypervisor api version info, return it for HC_GET_API_VERSION hyercall + */ +struct hc_api_version { + /** hypervisor api major version */ + uint32_t major_version; + + /** hypervisor api minor version */ + uint32_t minor_version; +} __aligned(8); + +/** + * @} + */ +#endif /* ACRN_HV_DEFS_H */ diff --git a/hypervisor/lib/crypto/hkdf.c b/hypervisor/lib/crypto/hkdf.c new file mode 100644 index 000000000..4b20f3841 --- /dev/null +++ b/hypervisor/lib/crypto/hkdf.c @@ -0,0 +1,163 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "tinycrypt/hmac.h" +#include "tinycrypt/sha256.h" + +#define SHA256_HASH_SIZE 32 /* SHA-256 length */ + +static uint8_t *hmac_sha256(uint8_t *out, unsigned int out_len, + const void *key, size_t key_len, + const uint8_t *data, size_t data_len) +{ + struct tc_hmac_state h; + + memset(&h, 0x0, sizeof(h)); + + if (!tc_hmac_set_key(&h, key, key_len) || + !tc_hmac_init(&h) || + !tc_hmac_update(&h, data, data_len) || + !tc_hmac_final(out, out_len, &h)) { + out = NULL; + } + + memset(&h, 0x0, sizeof(h)); + + return out; +} + +/* This function implements HKDF extract + * https://tools.ietf.org/html/rfc5869#section-2.2 + */ +static int hkdf_sha256_extract(uint8_t *out_key, size_t out_len, + const uint8_t *secret, size_t secret_len, + const uint8_t *salt, size_t salt_len) +{ + uint8_t salt0[SHA256_HASH_SIZE]; + + /* salt is optional for hkdf_sha256, it can be NULL. + * The implement of tc_hmac_set_key in tinycrypt can't + * accept NULL pointer, so salt0 is used here and set + * to all 0s + */ + if (!salt || salt_len == 0) { + memset(salt0, 0, SHA256_HASH_SIZE); + salt = salt0; + salt_len = SHA256_HASH_SIZE; + } + + if (!hmac_sha256(out_key, out_len, + salt, salt_len, + secret, secret_len)) + return 0; + + return 1; +} + +/* This function implements HKDF expand + * https://tools.ietf.org/html/rfc5869#section-2.3 + */ +static int hkdf_sha256_expand(uint8_t *out_key, size_t out_len, + const uint8_t *prk, size_t prk_len, + const uint8_t *info, size_t info_len) +{ + const size_t digest_len = SHA256_HASH_SIZE; + uint8_t T[SHA256_HASH_SIZE]; + size_t n, done = 0; + unsigned int i; + int ret = 0; + struct tc_hmac_state h; + + n = (out_len + digest_len - 1) / digest_len; + if (n > 255) + return 0; + + memset(&h, 0x0, sizeof(h)); + + for (i = 0; i < n; i++) { + uint8_t ctr = i + 1; + size_t todo; + + tc_hmac_set_key(&h, prk, prk_len); + tc_hmac_init(&h); + if (i != 0 && (!tc_hmac_update(&h, T, digest_len))) + goto out; + + if (!tc_hmac_update(&h, info, info_len) || + !tc_hmac_update(&h, &ctr, 1) || + !tc_hmac_final(T, digest_len, &h)) { + goto out; + } + + todo = digest_len; + /* Check if the length of left buffer is smaller than + * 32 to make sure no buffer overflow in below memcpy + */ + if (done + todo > out_len) + todo = out_len - done; + + memcpy_s(out_key + done, todo, T, todo); + done += todo; + } + + ret = 1; + +out: + memset(&h, 0x0, sizeof(h)); + memset(T, 0x0, SHA256_HASH_SIZE); + + return ret; +} + +/* https://tools.ietf.org/html/rfc5869#section-2 */ +int hkdf_sha256(uint8_t *out_key, size_t out_len, + const uint8_t *secret, size_t secret_len, + const uint8_t *salt, size_t salt_len, + const uint8_t *info, size_t info_len) +{ + uint8_t prk[SHA256_HASH_SIZE]; + size_t prk_len = SHA256_HASH_SIZE; + + if (!hkdf_sha256_extract(prk, prk_len, + secret, secret_len, + salt, salt_len)) { + return 0; + } + + if (!hkdf_sha256_expand(out_key, out_len, + prk, prk_len, + info, info_len)) { + return 0; + } + + return 1; +} diff --git a/hypervisor/lib/crypto/tinycrypt/COPYRIGHT b/hypervisor/lib/crypto/tinycrypt/COPYRIGHT new file mode 100644 index 000000000..4bec36363 --- /dev/null +++ b/hypervisor/lib/crypto/tinycrypt/COPYRIGHT @@ -0,0 +1,36 @@ + +================================================================================ + + TinyCrypt Cryptographic Library + +================================================================================ + + Copyright (c) 2017, Intel Corporation. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + - Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + - Neither the name of the Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +================================================================================ diff --git a/hypervisor/lib/crypto/tinycrypt/hmac.c b/hypervisor/lib/crypto/tinycrypt/hmac.c new file mode 100644 index 000000000..0a787d49b --- /dev/null +++ b/hypervisor/lib/crypto/tinycrypt/hmac.c @@ -0,0 +1,147 @@ +/* hmac.c - TinyCrypt implementation of the HMAC algorithm */ + +/* + * Copyright (C) 2017 by Intel Corporation, All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "hmac.h" + +static void rekey(uint8_t *key, const uint8_t *new_key, unsigned int key_size) +{ + const uint8_t inner_pad = (uint8_t) 0x36; + const uint8_t outer_pad = (uint8_t) 0x5c; + unsigned int i; + + for (i = 0; i < key_size; ++i) { + key[i] = inner_pad ^ new_key[i]; + key[i + TC_SHA256_BLOCK_SIZE] = outer_pad ^ new_key[i]; + } + for (; i < TC_SHA256_BLOCK_SIZE; ++i) { + key[i] = inner_pad; key[i + TC_SHA256_BLOCK_SIZE] = outer_pad; + } +} + +int tc_hmac_set_key(struct tc_hmac_state *ctx, const uint8_t *key, + unsigned int key_size) +{ + + /* input sanity check: */ + if (ctx == (struct tc_hmac_state *) 0 || + key == (const uint8_t *) 0 || + key_size == 0) { + return TC_CRYPTO_FAIL; + } + + const uint8_t dummy_key[key_size]; + struct tc_hmac_state dummy_state; + + if (key_size <= TC_SHA256_BLOCK_SIZE) { + /* + * The next three lines consist of dummy calls just to avoid + * certain timing attacks. Without these dummy calls, + * adversaries would be able to learn whether the key_size is + * greater than TC_SHA256_BLOCK_SIZE by measuring the time + * consumed in this process. + */ + (void)tc_sha256_init(&dummy_state.hash_state); + (void)tc_sha256_update(&dummy_state.hash_state, + dummy_key, + key_size); + (void)tc_sha256_final(&dummy_state.key[TC_SHA256_DIGEST_SIZE], + &dummy_state.hash_state); + + /* Actual code for when key_size <= TC_SHA256_BLOCK_SIZE: */ + rekey(ctx->key, key, key_size); + } else { + (void)tc_sha256_init(&ctx->hash_state); + (void)tc_sha256_update(&ctx->hash_state, key, key_size); + (void)tc_sha256_final(&ctx->key[TC_SHA256_DIGEST_SIZE], + &ctx->hash_state); + rekey(ctx->key, + &ctx->key[TC_SHA256_DIGEST_SIZE], + TC_SHA256_DIGEST_SIZE); + } + + return TC_CRYPTO_SUCCESS; +} + +int tc_hmac_init(struct tc_hmac_state *ctx) +{ + + /* input sanity check: */ + if (ctx == (struct tc_hmac_state *) 0) + return TC_CRYPTO_FAIL; + + (void) tc_sha256_init(&ctx->hash_state); + (void) tc_sha256_update(&ctx->hash_state, ctx->key, + TC_SHA256_BLOCK_SIZE); + + return TC_CRYPTO_SUCCESS; +} + +int tc_hmac_update(struct tc_hmac_state *ctx, + const void *data, unsigned int data_length) +{ + + /* input sanity check: */ + if (ctx == (struct tc_hmac_state *) 0) + return TC_CRYPTO_FAIL; + + (void)tc_sha256_update(&ctx->hash_state, data, data_length); + + return TC_CRYPTO_SUCCESS; +} + +int tc_hmac_final(uint8_t *tag, unsigned int taglen, + struct tc_hmac_state *ctx) +{ + + /* input sanity check: */ + if (tag == (uint8_t *) 0 || + taglen != TC_SHA256_DIGEST_SIZE || + ctx == (struct tc_hmac_state *) 0) { + return TC_CRYPTO_FAIL; + } + + (void)tc_sha256_final(tag, &ctx->hash_state); + + (void)tc_sha256_init(&ctx->hash_state); + (void)tc_sha256_update(&ctx->hash_state, + &ctx->key[TC_SHA256_BLOCK_SIZE], + TC_SHA256_BLOCK_SIZE); + (void)tc_sha256_update(&ctx->hash_state, tag, TC_SHA256_DIGEST_SIZE); + (void)tc_sha256_final(tag, &ctx->hash_state); + + /* destroy the current state */ + memset(ctx, 0, sizeof(*ctx)); + + return TC_CRYPTO_SUCCESS; +} diff --git a/hypervisor/lib/crypto/tinycrypt/hmac.h b/hypervisor/lib/crypto/tinycrypt/hmac.h new file mode 100644 index 000000000..e23b14c8a --- /dev/null +++ b/hypervisor/lib/crypto/tinycrypt/hmac.h @@ -0,0 +1,139 @@ +/* hmac.h - TinyCrypt interface to an HMAC implementation */ + +/* + * Copyright (C) 2017 by Intel Corporation, All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * @brief Interface to an HMAC implementation. + * + * Overview: HMAC is a message authentication code based on hash functions. + * TinyCrypt hard codes SHA-256 as the hash function. A message + * authentication code based on hash functions is also called a + * keyed cryptographic hash function since it performs a + * transformation specified by a key in an arbitrary length data + * set into a fixed length data set (also called tag). + * + * Security: The security of the HMAC depends on the length of the key and + * on the security of the hash function. Note that HMAC primitives + * are much less affected by collision attacks than their + * corresponding hash functions. + * + * Requires: SHA-256 + * + * Usage: 1) call tc_hmac_set_key to set the HMAC key. + * + * 2) call tc_hmac_init to initialize a struct hash_state before + * processing the data. + * + * 3) call tc_hmac_update to process the next input segment; + * tc_hmac_update can be called as many times as needed to process + * all of the segments of the input; the order is important. + * + * 4) call tc_hmac_final to out put the tag. + */ + +#ifndef __TC_HMAC_H__ +#define __TC_HMAC_H__ + +#include "sha256.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct tc_hmac_state { + /* the internal state required by h */ + struct tc_sha256_state hash_state; + /* HMAC key schedule */ + uint8_t key[2*TC_SHA256_BLOCK_SIZE]; +}; + +/** + * @brief HMAC set key procedure + * Configures ctx to use key + * @return returns TC_CRYPTO_SUCCESS (1) + * returns TC_CRYPTO_FAIL (0) if + * ctx == NULL or + * key == NULL or + * key_size == 0 + * @param ctx IN/OUT -- the struct tc_hmac_state to initial + * @param key IN -- the HMAC key to configure + * @param key_size IN -- the HMAC key size + */ +int tc_hmac_set_key(struct tc_hmac_state *ctx, const uint8_t *key, + unsigned int key_size); + +/** + * @brief HMAC init procedure + * Initializes ctx to begin the next HMAC operation + * @return returns TC_CRYPTO_SUCCESS (1) + * returns TC_CRYPTO_FAIL (0) if: ctx == NULL or key == NULL + * @param ctx IN/OUT -- struct tc_hmac_state buffer to init + */ +int tc_hmac_init(struct tc_hmac_state *ctx); + +/** + * @brief HMAC update procedure + * Mixes data_length bytes addressed by data into state + * @return returns TC_CRYPTO_SUCCCESS (1) + * returns TC_CRYPTO_FAIL (0) if: ctx == NULL or key == NULL + * @note Assumes state has been initialized by tc_hmac_init + * @param ctx IN/OUT -- state of HMAC computation so far + * @param data IN -- data to incorporate into state + * @param data_length IN -- size of data in bytes + */ +int tc_hmac_update(struct tc_hmac_state *ctx, const void *data, + unsigned int data_length); + +/** + * @brief HMAC final procedure + * Writes the HMAC tag into the tag buffer + * @return returns TC_CRYPTO_SUCCESS (1) + * returns TC_CRYPTO_FAIL (0) if: + * tag == NULL or + * ctx == NULL or + * key == NULL or + * taglen != TC_SHA256_DIGEST_SIZE + * @note ctx is erased before exiting. This should never be changed/removed. + * @note Assumes the tag bufer is at least sizeof(hmac_tag_size(state)) bytes + * state has been initialized by tc_hmac_init + * @param tag IN/OUT -- buffer to receive computed HMAC tag + * @param taglen IN -- size of tag in bytes + * @param ctx IN/OUT -- the HMAC state for computing tag + */ +int tc_hmac_final(uint8_t *tag, unsigned int taglen, + struct tc_hmac_state *ctx); + +#ifdef __cplusplus +} +#endif + +#endif /*__TC_HMAC_H__*/ diff --git a/hypervisor/lib/crypto/tinycrypt/sha256.c b/hypervisor/lib/crypto/tinycrypt/sha256.c new file mode 100644 index 000000000..429a134b0 --- /dev/null +++ b/hypervisor/lib/crypto/tinycrypt/sha256.c @@ -0,0 +1,217 @@ +/* sha256.c - TinyCrypt SHA-256 crypto hash algorithm implementation */ + +/* + * Copyright (C) 2017 by Intel Corporation, All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "sha256.h" + +static void compress(unsigned int *iv, const uint8_t *data); + +int tc_sha256_init(struct tc_sha256_state *s) +{ + /* input sanity check: */ + if (s == (struct tc_sha256_state *) 0) + return TC_CRYPTO_FAIL; + + /* + * Setting the initial state values. + * These values correspond to the first 32 bits of the fractional parts + * of the square roots of the first 8 primes: 2, 3, 5, 7, 11, 13, 17 + * and 19. + */ + memset((uint8_t *) s, 0x00, sizeof(*s)); + s->iv[0] = 0x6a09e667; + s->iv[1] = 0xbb67ae85; + s->iv[2] = 0x3c6ef372; + s->iv[3] = 0xa54ff53a; + s->iv[4] = 0x510e527f; + s->iv[5] = 0x9b05688c; + s->iv[6] = 0x1f83d9ab; + s->iv[7] = 0x5be0cd19; + + return TC_CRYPTO_SUCCESS; +} + +int tc_sha256_update(struct tc_sha256_state *s, const uint8_t *data, + size_t datalen) +{ + /* input sanity check: */ + if (s == (struct tc_sha256_state *) 0 || + data == (void *) 0) { + return TC_CRYPTO_FAIL; + } else if (datalen == 0) { + return TC_CRYPTO_SUCCESS; + } + + while (datalen-- > 0) { + s->leftover[s->leftover_offset++] = *(data++); + if (s->leftover_offset >= TC_SHA256_BLOCK_SIZE) { + compress(s->iv, s->leftover); + s->leftover_offset = 0; + s->bits_hashed += (TC_SHA256_BLOCK_SIZE << 3); + } + } + + return TC_CRYPTO_SUCCESS; +} + +int tc_sha256_final(uint8_t *digest, struct tc_sha256_state *s) +{ + unsigned int i; + + /* input sanity check: */ + if (digest == (uint8_t *) 0 || + s == (struct tc_sha256_state *) 0) { + return TC_CRYPTO_FAIL; + } + + s->bits_hashed += (s->leftover_offset << 3); + + s->leftover[s->leftover_offset++] = 0x80; /* always room for one byte */ + if (s->leftover_offset > (sizeof(s->leftover) - 8)) { + /* there is not room for all the padding in this block */ + memset(s->leftover + s->leftover_offset, 0x00, + sizeof(s->leftover) - s->leftover_offset); + compress(s->iv, s->leftover); + s->leftover_offset = 0; + } + + /* add the padding and the length in big-Endian format */ + memset(s->leftover + s->leftover_offset, 0x00, + sizeof(s->leftover) - 8 - s->leftover_offset); + s->leftover[sizeof(s->leftover) - 1] = (uint8_t)(s->bits_hashed); + s->leftover[sizeof(s->leftover) - 2] = (uint8_t)(s->bits_hashed >> 8); + s->leftover[sizeof(s->leftover) - 3] = (uint8_t)(s->bits_hashed >> 16); + s->leftover[sizeof(s->leftover) - 4] = (uint8_t)(s->bits_hashed >> 24); + s->leftover[sizeof(s->leftover) - 5] = (uint8_t)(s->bits_hashed >> 32); + s->leftover[sizeof(s->leftover) - 6] = (uint8_t)(s->bits_hashed >> 40); + s->leftover[sizeof(s->leftover) - 7] = (uint8_t)(s->bits_hashed >> 48); + s->leftover[sizeof(s->leftover) - 8] = (uint8_t)(s->bits_hashed >> 56); + + /* hash the padding and length */ + compress(s->iv, s->leftover); + + /* copy the iv out to digest */ + for (i = 0; i < TC_SHA256_STATE_BLOCKS; ++i) { + unsigned int t = *((unsigned int *) &s->iv[i]); + *digest++ = (uint8_t)(t >> 24); + *digest++ = (uint8_t)(t >> 16); + *digest++ = (uint8_t)(t >> 8); + *digest++ = (uint8_t)(t); + } + + /* destroy the current state */ + memset(s, 0, sizeof(*s)); + + return TC_CRYPTO_SUCCESS; +} + +/* + * Initializing SHA-256 Hash constant words K. + * These values correspond to the first 32 bits of the fractional parts of the + * cube roots of the first 64 primes between 2 and 311. + */ +static const unsigned int k256[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, + 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, + 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, + 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b, + 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, + 0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +static inline unsigned int ROTR(unsigned int a, unsigned int n) +{ + return (((a) >> n) | ((a) << (32 - n))); +} + +#define Sigma0(a)(ROTR((a), 2) ^ ROTR((a), 13) ^ ROTR((a), 22)) +#define Sigma1(a)(ROTR((a), 6) ^ ROTR((a), 11) ^ ROTR((a), 25)) +#define sigma0(a)(ROTR((a), 7) ^ ROTR((a), 18) ^ ((a) >> 3)) +#define sigma1(a)(ROTR((a), 17) ^ ROTR((a), 19) ^ ((a) >> 10)) + +#define Ch(a, b, c)(((a) & (b)) ^ ((~(a)) & (c))) +#define Maj(a, b, c)(((a) & (b)) ^ ((a) & (c)) ^ ((b) & (c))) + +static inline unsigned int BigEndian(const uint8_t **c) +{ + unsigned int n = 0; + + n = (((unsigned int)(*((*c)++))) << 24); + n |= ((unsigned int)(*((*c)++)) << 16); + n |= ((unsigned int)(*((*c)++)) << 8); + n |= ((unsigned int)(*((*c)++))); + return n; +} + +static void compress(unsigned int *iv, const uint8_t *data) +{ + unsigned int a, b, c, d, e, f, g, h; + unsigned int s0, s1; + unsigned int t1, t2; + unsigned int work_space[16]; + unsigned int n; + unsigned int i; + + a = iv[0]; b = iv[1]; c = iv[2]; d = iv[3]; + e = iv[4]; f = iv[5]; g = iv[6]; h = iv[7]; + + for (i = 0; i < 16; ++i) { + n = BigEndian(&data); + t1 = work_space[i] = n; + t1 += h + Sigma1(e) + Ch(e, f, g) + k256[i]; + t2 = Sigma0(a) + Maj(a, b, c); + h = g; g = f; f = e; e = d + t1; + d = c; c = b; b = a; a = t1 + t2; + } + + for ( ; i < 64; ++i) { + s0 = work_space[(i+1)&0x0f]; + s0 = sigma0(s0); + s1 = work_space[(i+14)&0x0f]; + s1 = sigma1(s1); + + t1 = work_space[i&0xf] += s0 + s1 + work_space[(i+9)&0xf]; + t1 += h + Sigma1(e) + Ch(e, f, g) + k256[i]; + t2 = Sigma0(a) + Maj(a, b, c); + h = g; g = f; f = e; e = d + t1; + d = c; c = b; b = a; a = t1 + t2; + } + + iv[0] += a; iv[1] += b; iv[2] += c; iv[3] += d; + iv[4] += e; iv[5] += f; iv[6] += g; iv[7] += h; +} diff --git a/hypervisor/lib/crypto/tinycrypt/sha256.h b/hypervisor/lib/crypto/tinycrypt/sha256.h new file mode 100644 index 000000000..9d0fb468e --- /dev/null +++ b/hypervisor/lib/crypto/tinycrypt/sha256.h @@ -0,0 +1,130 @@ +/* sha256.h - TinyCrypt interface to a SHA-256 implementation */ + +/* + * Copyright (C) 2017 by Intel Corporation, All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * @brief Interface to a SHA-256 implementation. + * + * Overview: SHA-256 is a NIST approved cryptographic hashing algorithm + * specified in FIPS 180. A hash algorithm maps data of arbitrary + * size to data of fixed length. + * + * Security: SHA-256 provides 128 bits of security against collision attacks + * and 256 bits of security against pre-image attacks. SHA-256 does + * NOT behave like a random oracle, but it can be used as one if + * the string being hashed is prefix-free encoded before hashing. + * + * Usage: 1) call tc_sha256_init to initialize a struct + * tc_sha256_state before hashing a new string. + * + * 2) call tc_sha256_update to hash the next string segment; + * tc_sha256_update can be called as many times as needed to hash + * all of the segments of a string; the order is important. + * + * 3) call tc_sha256_final to out put the digest from a hashing + * operation. + */ + +#ifndef __TC_SHA256_H__ +#define __TC_SHA256_H__ + +#include + +#ifdef __cplusplus + +extern "C" { +#endif + +#define TC_CRYPTO_SUCCESS 1 +#define TC_CRYPTO_FAIL 0 +#define TC_SHA256_BLOCK_SIZE (64) +#define TC_SHA256_DIGEST_SIZE (32) +#define TC_SHA256_STATE_BLOCKS (TC_SHA256_DIGEST_SIZE/4) + +struct tc_sha256_state { + unsigned int iv[TC_SHA256_STATE_BLOCKS]; + uint64_t bits_hashed; + uint8_t leftover[TC_SHA256_BLOCK_SIZE]; + size_t leftover_offset; +}; + +/** + * @brief SHA256 initialization procedure + * Initializes s + * @return returns TC_CRYPTO_SUCCESS (1) + * returns TC_CRYPTO_FAIL (0) if s == NULL + * @param s Sha256 state struct + */ +int tc_sha256_init(struct tc_sha256_state *s); + +/** + * @brief SHA256 update procedure + * Hashes data_length bytes addressed by data into state s + * @return returns TC_CRYPTO_SUCCESS (1) + * returns TC_CRYPTO_FAIL (0) if: + * s == NULL, + * s->iv == NULL, + * data == NULL + * @note Assumes s has been initialized by tc_sha256_init + * @warning The state buffer 'leftover' is left in memory after processing + * If your application intends to have sensitive data in this + * buffer, remind to erase it after the data has been processed + * @param s Sha256 state struct + * @param data message to hash + * @param datalen length of message to hash + */ +int tc_sha256_update(struct tc_sha256_state *s, const uint8_t *data, + size_t datalen); + +/** + * @brief SHA256 final procedure + * Inserts the completed hash computation into digest + * @return returns TC_CRYPTO_SUCCESS (1) + * returns TC_CRYPTO_FAIL (0) if: + * s == NULL, + * s->iv == NULL, + * digest == NULL + * @note Assumes: s has been initialized by tc_sha256_init + * digest points to at least TC_SHA256_DIGEST_SIZE bytes + * @warning The state buffer 'leftover' is left in memory after processing + * If your application intends to have sensitive data in this + * buffer, remind to erase it after the data has been processed + * @param digest unsigned eight bit integer + * @param Sha256 state struct + */ +int tc_sha256_final(uint8_t *digest, struct tc_sha256_state *s); + +#ifdef __cplusplus +} +#endif + +#endif /* __TC_SHA256_H__ */ diff --git a/hypervisor/lib/div.c b/hypervisor/lib/div.c new file mode 100644 index 000000000..c3601d6e0 --- /dev/null +++ b/hypervisor/lib/div.c @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +static int do_udiv32(uint32_t dividend, uint32_t divisor, + struct udiv_result *res) +{ + + uint32_t mask; + /* dividend is always greater than or equal to the divisor. Neither + * divisor nor dividend are 0. Thus: * clz(dividend) and clz(divisor) + * are valid * clz(dividend)<=clz(divisor) + */ + + mask = clz(divisor) - clz(dividend); + /* align divisor and dividend */ + divisor <<= mask; + mask = 1U << mask; + /* division loop */ + do { + if (dividend >= divisor) { + dividend -= divisor; + res->q.dwords.low |= mask; + } + divisor >>= 1; + } while (((mask >>= 1) != 0) && (dividend != 0)); + /* dividend now contains the reminder */ + res->r.dwords.low = dividend; + return 0; +} + +int udiv32(uint32_t dividend, uint32_t divisor, struct udiv_result *res) +{ + + /* initialize the result */ + res->q.dwords.low = res->r.dwords.low = 0; + /* test for "division by 0" condition */ + if (divisor == 0) { + res->q.dwords.low = 0xffffffff; + return !0; + } + /* trivial case: divisor==dividend */ + if (divisor == dividend) { + res->q.dwords.low = 1; + return 0; + } + /* trivial case: divisor>dividend */ + if (divisor > dividend) { + res->r.dwords.low = dividend; + return 0; + } + /* now that the trivial cases are eliminated we can call the generic + * function. + */ + return do_udiv32(dividend, divisor, res); +} + +int udiv64(uint64_t dividend, uint64_t divisor, struct udiv_result *res) +{ + + uint64_t mask; + uint64_t bits; + + /* initialize the result */ + res->q.qword = res->r.qword = 0; + /* test for "division by 0" condition */ + if (divisor == 0) { + res->q.qword = 0xffffffffffffffffull; + return -1; + } + /* trivial case: divisor==dividend */ + if (divisor == dividend) { + res->q.qword = 1; + return 0; + } + /* trivial case: divisor>dividend */ + if (divisor > dividend) { + res->r.qword = dividend; + return 0; + } + /* simplified case: only 32 bit operands Note that the preconditions + * for do_udiv32() are fulfilled, since the tests were made above. + */ + if (((divisor >> 32) == 0) && ((dividend >> 32) == 0)) + return do_udiv32((uint32_t) dividend, (uint32_t) divisor, res); + + /* dividend is always greater than or equal to the divisor. Neither + * divisor nor dividend are 0. Thus: * clz(dividend) and clz(divisor) + * are valid * clz(dividend)<=clz(divisor) + */ + + /* align divisor and dividend. */ + bits = clz64(divisor) - clz64(dividend); + divisor <<= bits; + mask = 1ULL << bits; + /* division loop */ + do { + if (dividend >= divisor) { + dividend -= divisor; + res->q.qword |= mask; + } + divisor >>= 1; + mask >>= 1; + } while ((bits-- != 0) && (dividend != 0)); + + res->r.qword = dividend; + return 0; +} diff --git a/hypervisor/lib/mdelay.c b/hypervisor/lib/mdelay.c new file mode 100644 index 000000000..ac1a3660d --- /dev/null +++ b/hypervisor/lib/mdelay.c @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +void mdelay(uint32_t loop_count) +{ + /* Loop until done */ + while (loop_count-- != 0) { + /* Delay for 1 ms */ + udelay(1000); + } +} diff --git a/hypervisor/lib/mem_mgt.c b/hypervisor/lib/mem_mgt.c new file mode 100644 index 000000000..21841b234 --- /dev/null +++ b/hypervisor/lib/mem_mgt.c @@ -0,0 +1,324 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +/************************************************************************/ +/* Memory pool declaration (block size = MALLOC_ALIGN) */ +/************************************************************************/ +#define __bss_noinit __attribute__((__section__(".bss_noinit"))) + +static uint8_t __bss_noinit Malloc_Heap[HEAP_SIZE] __aligned(MALLOC_ALIGN); + +#define MALLOC_HEAP_BUFF_SIZE MALLOC_ALIGN +#define MALLOC_HEAP_TOTAL_BUFF (HEAP_SIZE/MALLOC_HEAP_BUFF_SIZE) +#define MALLOC_HEAP_BITMAP_SIZE \ + INT_DIV_ROUNDUP(MALLOC_HEAP_TOTAL_BUFF, BITMAP_WORD_SIZE) +static uint32_t Malloc_Heap_Bitmap[MALLOC_HEAP_BITMAP_SIZE]; +static uint32_t Malloc_Heap_Contiguity_Bitmap[MALLOC_HEAP_BITMAP_SIZE]; + +struct mem_pool Memory_Pool = { + .start_addr = Malloc_Heap, + .spinlock = {.head = 0, .tail = 0}, + .size = HEAP_SIZE, + .buff_size = MALLOC_HEAP_BUFF_SIZE, + .total_buffs = MALLOC_HEAP_TOTAL_BUFF, + .bmp_size = MALLOC_HEAP_BITMAP_SIZE, + .bitmap = Malloc_Heap_Bitmap, + .contiguity_bitmap = Malloc_Heap_Contiguity_Bitmap +}; + +/************************************************************************/ +/* Memory pool declaration (block size = CPU_PAGE_SIZE) */ +/************************************************************************/ +static uint8_t __bss_noinit +Paging_Heap[NUM_ALLOC_PAGES][CPU_PAGE_SIZE] __aligned(CPU_PAGE_SIZE); + +#define PAGING_HEAP_BUFF_SIZE CPU_PAGE_SIZE +#define PAGING_HEAP_TOTAL_BUFF NUM_ALLOC_PAGES +#define PAGING_HEAP_BITMAP_SIZE \ + INT_DIV_ROUNDUP(PAGING_HEAP_TOTAL_BUFF, BITMAP_WORD_SIZE) +static uint32_t Paging_Heap_Bitmap[PAGING_HEAP_BITMAP_SIZE]; +static uint32_t Paging_Heap_Contiguity_Bitmap[MALLOC_HEAP_BITMAP_SIZE]; + +struct mem_pool Paging_Memory_Pool = { + .start_addr = Paging_Heap, + .spinlock = {.head = 0, .tail = 0}, + .size = NUM_ALLOC_PAGES * CPU_PAGE_SIZE, + .buff_size = PAGING_HEAP_BUFF_SIZE, + .total_buffs = PAGING_HEAP_TOTAL_BUFF, + .bmp_size = PAGING_HEAP_BITMAP_SIZE, + .bitmap = Paging_Heap_Bitmap, + .contiguity_bitmap = Paging_Heap_Contiguity_Bitmap +}; + +static void *allocate_mem(struct mem_pool *pool, unsigned int num_bytes) +{ + + void *memory = NULL; + uint32_t idx, bit_idx; + uint32_t requested_buffs; + + /* Check if provided memory pool exists */ + if (pool == NULL) + return NULL; + + /* Acquire the pool lock */ + spinlock_obtain(&pool->spinlock); + + /* Calculate number of buffers to be allocated from memory pool */ + requested_buffs = INT_DIV_ROUNDUP(num_bytes, pool->buff_size); + + for (idx = 0; idx < pool->bmp_size; idx++) { + /* Find the first occurrence of requested_buffs number of free + * buffers. The 0th bit in bitmap represents a free buffer. + */ + for (bit_idx = get_first_zero_bit(pool->bitmap[idx]); + bit_idx < BITMAP_WORD_SIZE; bit_idx++) { + /* Check if selected buffer is free */ + if (pool->bitmap[idx] & (1 << bit_idx)) + continue; + + /* Declare temporary variables to be used locally in + * this block + */ + uint32_t i; + uint32_t tmp_bit_idx = bit_idx; + uint32_t tmp_idx = idx; + + /* Check requested_buffs number of buffers availability + * in memory-pool right after selected buffer + */ + for (i = 1; i < requested_buffs; i++) { + /* Check if tmp_bit_idx is out-of-range */ + if (++tmp_bit_idx == BITMAP_WORD_SIZE) { + /* Break the loop if tmp_idx is + * out-of-range + */ + if (++tmp_idx == pool->bmp_size) + break; + /* Reset tmp_bit_idx */ + tmp_bit_idx = 0; + } + + /* Break if selected buffer is not free */ + if (pool->bitmap[tmp_idx] & (1 << tmp_bit_idx)) + break; + } + + /* Check if requested_buffs number of free contiguous + * buffers are found in memory pool + */ + if (i == requested_buffs) { + /* Get start address of first buffer among + * selected free contiguous buffer in the + * memory pool + */ + memory = (char *)pool->start_addr + + pool->buff_size * (idx * BITMAP_WORD_SIZE + + bit_idx); + + /* Update allocation bitmaps information for + * selected buffers + */ + for (i = 0; i < requested_buffs; i++) { + /* Set allocation bit in bitmap for + * this buffer + */ + pool->bitmap[idx] |= (1 << bit_idx); + + /* Set contiguity information for this + * buffer in contiguity-bitmap + */ + if (i < (requested_buffs - 1)) { + /* Set contiguity bit to 1 if + * this buffer is not the last + * of selected contiguous + * buffers array + */ + pool->contiguity_bitmap[idx] |= + (1 << bit_idx); + } else { + /* Set contiguity bit to 0 if + * this buffer is not the last + * of selected contiguous + * buffers array + */ + pool->contiguity_bitmap[idx] &= + ~(1 << bit_idx); + } + + /* Check if bit_idx is out-of-range */ + if (++bit_idx == BITMAP_WORD_SIZE) { + /* Increment idx */ + idx++; + /* Reset bit_idx */ + bit_idx = 0; + } + } + + /* Release the pool lock. */ + spinlock_release(&pool->spinlock); + + return memory; + } + /* Update bit_idx and idx */ + bit_idx = tmp_bit_idx; + idx = tmp_idx; + } + } + + /* Release the pool lock. */ + spinlock_release(&pool->spinlock); + + return (void *)NULL; +} + +static void deallocate_mem(struct mem_pool *pool, void *ptr) +{ + uint32_t *bitmask, *contiguity_bitmask; + uint32_t bmp_idx, bit_idx, buff_idx; + + if ((pool != NULL) && (ptr != NULL)) { + /* Acquire the pool lock */ + spinlock_obtain(&pool->spinlock); + + /* Map the buffer address to its index. */ + buff_idx = ((char *)ptr - (char *)pool->start_addr) / + pool->buff_size; + + /* De-allocate all allocated contiguous memory buffers */ + while (buff_idx < pool->total_buffs) { + /* Translate the buffer index to bitmap index. */ + bmp_idx = buff_idx / BITMAP_WORD_SIZE; + bit_idx = buff_idx % BITMAP_WORD_SIZE; + + /* Get bitmap's reference for this buffer */ + bitmask = &pool->bitmap[bmp_idx]; + contiguity_bitmask = &pool->contiguity_bitmap[bmp_idx]; + + /* Mark the buffer as free */ + if (*bitmask & (1 << bit_idx)) + *bitmask ^= (1 << bit_idx); + else + break; + + /* Reset the Contiguity bit of buffer */ + if (*contiguity_bitmask & (1 << bit_idx)) + *contiguity_bitmask ^= (1 << bit_idx); + else + break; + + /* Increment buff_idx */ + buff_idx++; + } + + /* Release the pool lock. */ + spinlock_release(&pool->spinlock); + } +} + +void *malloc(unsigned int num_bytes) +{ + void *memory = NULL; + + /* Check if bytes requested extend page-size */ + if (num_bytes < CPU_PAGE_SIZE) { + /* Request memory allocation from smaller segmented memory pool + */ + memory = allocate_mem(&Memory_Pool, num_bytes); + } else { + int page_num = + (num_bytes + CPU_PAGE_SIZE - 1) >> CPU_PAGE_SHIFT; + /* Request memory allocation through alloc_page */ + memory = alloc_pages(page_num); + } + + /* Check if memory allocation is successful */ + ASSERT(memory != NULL, ""); + + /* Return memory pointer to caller */ + return memory; +} + +void *alloc_pages(unsigned int page_num) +{ + void *memory = NULL; + + /* Request memory allocation from Page-aligned memory pool */ + memory = allocate_mem(&Paging_Memory_Pool, page_num * CPU_PAGE_SIZE); + + /* Check if memory allocation is successful */ + ASSERT(memory != NULL, ""); + + return memory; +} + +void *alloc_page(void) +{ + return alloc_pages(1); +} + +void *calloc(unsigned int num_elements, unsigned int element_size) +{ + void *memory = malloc(num_elements * element_size); + + /* Determine if memory was allocated */ + if (memory != NULL) { + /* Zero all the memory */ + memset(memory, 0, num_elements * element_size); + } + + /* Return pointer to memory */ + return memory; +} + +void free(void *ptr) +{ + /* Check if ptr belongs to 16-Bytes aligned Memory Pool */ + if ((Memory_Pool.start_addr < ptr) && + (ptr < (Memory_Pool.start_addr + + (Memory_Pool.total_buffs * Memory_Pool.buff_size)))) { + /* Free buffer in 16-Bytes aligned Memory Pool */ + deallocate_mem(&Memory_Pool, ptr); + } + /* Check if ptr belongs to page aligned Memory Pool */ + else if ((Paging_Memory_Pool.start_addr < ptr) && + (ptr < (Paging_Memory_Pool.start_addr + + (Paging_Memory_Pool.total_buffs * + Paging_Memory_Pool.buff_size)))) { + /* Free buffer in page aligned Memory Pool */ + deallocate_mem(&Paging_Memory_Pool, ptr); + } +} diff --git a/hypervisor/lib/memchr.c b/hypervisor/lib/memchr.c new file mode 100644 index 000000000..76a30d3ca --- /dev/null +++ b/hypervisor/lib/memchr.c @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +void *memchr(const void *void_s, int c, size_t n) +{ + unsigned char val = (unsigned char)c; + unsigned char *ptr = (unsigned char *)void_s; + unsigned char *end = ptr + n; + + while (ptr < end) { + + if (*ptr++ == val) + return ((void *)(ptr - 1)); + } + + return NULL; +} diff --git a/hypervisor/lib/memcpy.c b/hypervisor/lib/memcpy.c new file mode 100644 index 000000000..1fa00bd5a --- /dev/null +++ b/hypervisor/lib/memcpy.c @@ -0,0 +1,115 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + + +/*********************************************************************** + * + * FUNCTION + * + * memcpy_s + * + * DESCRIPTION + * + * Copies at most slen bytes from src address to dest address, + * up to dmax. + * + * INPUTS + * + * d pointer to Destination address + * dmax maximum length of dest + * s pointer to Source address + * slen maximum number of bytes of src to copy + * + * OUTPUTS + * + * void * pointer to destination address + * + ***********************************************************************/ +void *memcpy_s(void *d, size_t dmax, const void *s, size_t slen) +{ + + uint8_t *dest8; + uint8_t *src8; + + /*same memory block, no need to copy*/ + if (d == s) + return d; + + ASSERT((slen != 0) && (dmax != 0) && (dmax >= slen), + "invalid slen or dmax."); + + ASSERT(((d > s) && (d > s + slen - 1)) + || ((d < s) && (s > d + dmax - 1)), + "overlap happened."); + + dest8 = (uint8_t *)d; + src8 = (uint8_t *)s; + + /*small data block*/ + if (slen < 8) { + while (slen) { + *dest8++ = *src8++; + slen--; + } + + return d; + } + + /*make sure 8bytes-aligned for at least one addr.*/ + if ((!MEM_ALIGNED_CHECK(src8, 8)) && (!MEM_ALIGNED_CHECK(dest8, 8))) { + for (; slen && (((uint64_t)src8) & 7); slen--) + *dest8++ = *src8++; + } + + /*copy main data blocks, with rep prefix*/ + if (slen > 8) { + uint32_t ecx; + + asm volatile ("cld; rep; movsq" + : "=&c"(ecx), "=&D"(dest8), "=&S"(src8) + : "0" (slen / 8), "1" (dest8), "2" (src8) + : "memory"); + + slen = slen % 8; + } + + /*tail bytes*/ + while (slen) { + *dest8++ = *src8++; + slen--; + } + + return d; +} diff --git a/hypervisor/lib/memset.c b/hypervisor/lib/memset.c new file mode 100644 index 000000000..c6aef79ec --- /dev/null +++ b/hypervisor/lib/memset.c @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +void *memset(void *base, uint8_t v, size_t n) +{ + uint8_t *dest_p; + size_t n_q; + size_t count; + + dest_p = (uint8_t *)base; + + if ((dest_p == NULL) || (n == 0)) + return NULL; + + /*do the few bytes to get uint64_t alignment*/ + count = n; + for (; count && ((uint64_t)dest_p & 7); count--) + *dest_p++ = v; + + /*64-bit mode*/ + n_q = count >> 3; + asm volatile("cld ; rep ; stosq ; movl %3,%%ecx ; rep ; stosb" + : "+c"(n_q), "+D"(dest_p) + : "a" (v * 0x0101010101010101U), + "r"((unsigned int)count & 7)); + + return (void *)dest_p; +} diff --git a/hypervisor/lib/spinlock.c b/hypervisor/lib/spinlock.c new file mode 100644 index 000000000..9c8d5d6fe --- /dev/null +++ b/hypervisor/lib/spinlock.c @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +inline int spinlock_init(spinlock_t *lock) +{ + memset(lock, 0, sizeof(spinlock_t)); + return 0; +} +int spinlock_obtain(spinlock_t *lock) +{ + + /* The lock function atomically increments and exchanges the head + * counter of the queue. If the old head of the queue is equal to the + * tail, we have locked the spinlock. Otherwise we have to wait. + */ + + asm volatile (" lock xaddl %%eax,%[head]\n" + " cmpl %%eax,%[tail]\n" + " jz 1f\n" + "2: pause\n" + " cmpl %%eax,%[tail]\n" + " jnz 2b\n" + "1:\n" + : + : "a" (1), + [head] "m"(lock->head), + [tail] "m"(lock->tail) + : "cc", "memory"); + return 0; +} diff --git a/hypervisor/lib/stdlib.c b/hypervisor/lib/stdlib.c new file mode 100644 index 000000000..942ec94cf --- /dev/null +++ b/hypervisor/lib/stdlib.c @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +char hexdigit(int decimal_val) +{ + static const char hexdigits[] = { '0', '1', '2', '3', '4', '5', '6', + '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; + + /* Return hex character */ + return hexdigits[decimal_val & 0x0F]; +} + +int strcmp(const char *s1, const char *s2) +{ + while (*s1 && *s2 && *s1 == *s2) { + s1++; + s2++; + } + + return *s1 - *s2; +} + +int strncmp(const char *s1, const char *s2, size_t n) +{ + while (n - 1 && *s1 && *s2 && *s1 == *s2) { + s1++; + s2++; + n--; + } + + return *s1 - *s2; +} diff --git a/hypervisor/lib/strchr.c b/hypervisor/lib/strchr.c new file mode 100644 index 000000000..0d0164dca --- /dev/null +++ b/hypervisor/lib/strchr.c @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +char *strchr(const char *s, int ch) +{ + while (*s && (*s != ch)) + ++s; + + return (*s) ? ((char *)s) : 0; +} diff --git a/hypervisor/lib/strcpy.c b/hypervisor/lib/strcpy.c new file mode 100644 index 000000000..b14f6ef9e --- /dev/null +++ b/hypervisor/lib/strcpy.c @@ -0,0 +1,98 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +/** + *strcpy_s + * + * description: + * This function copies the string pointed to by s to a buffer + * pointed by d. + * + * + * input: + * d pointer to dest buffer. + * + * dmax maximum length of dest buffer + * + * s pointer to the source string + * + * return value: + * dest pointer to dest string if string is copied + * successfully,or else return null. + * + * notes: + * 1) both d and s shall not be null pointers. + * 2) dmax shall not 0. + */ +char *strcpy_s(char *d, size_t dmax, const char *s) +{ + + char *dest_base; + size_t dest_avail; + uint64_t overlap_guard; + + ASSERT(s != NULL, "invalid input s."); + ASSERT((d != NULL) && (dmax != 0), "invalid input d or dmax."); + + if (s == d) + return d; + + overlap_guard = (uint64_t)((d > s) ? (d - s - 1) : (s - d - 1)); + + dest_avail = dmax; + dest_base = d; + + while (dest_avail > 0) { + ASSERT(overlap_guard != 0, "overlap happened."); + + *d = *s; + if (*d == '\0') + return dest_base; + + d++; + s++; + dest_avail--; + overlap_guard--; + } + + ASSERT(false, "dest buffer has no enough space."); + + /* + * to avoid a string that is not + * null-terminated in dest buffer + */ + dest_base[dmax - 1] = '\0'; + return NULL; +} diff --git a/hypervisor/lib/strncpy.c b/hypervisor/lib/strncpy.c new file mode 100644 index 000000000..30b69e326 --- /dev/null +++ b/hypervisor/lib/strncpy.c @@ -0,0 +1,107 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +/* + * strncpy_s + * + * description: + * This function copies maximum 'slen'characters from string pointed + * by s to a buffer pointed by d. + * + * input: + * d pointer to dest buffer. + * + * dmax maximum length of dest buffer. + * + * s pointer to the source string. + * + * slen the maximum number of characters to copy from source + * string. + * + * return value: + * dest pointer to dest string if source string is copied + * successfully, or else return null. + * + * notes: + * 1) both dmax and slen should not be 0. + * 2) both d and s should not be null pointers. + * 3) will assert() if overlap happens or dest buffer has no + * enough space. + */ +char *strncpy_s(char *d, size_t dmax, const char *s, size_t slen) +{ + char *dest_base; + size_t dest_avail; + uint64_t overlap_guard; + + ASSERT((d != NULL) && (s != NULL), "invlaid input d or s"); + ASSERT((dmax != 0) && (slen != 0), "invlaid input dmax or slen"); + + if (d == s) + return d; + + overlap_guard = (uint64_t)((d > s) ? (d - s - 1) : (s - d - 1)); + + dest_base = d; + dest_avail = dmax; + + while (dest_avail > 0) { + ASSERT(overlap_guard != 0, "overlap happened."); + + if (slen == 0) { + *d = '\0'; + return dest_base; + } + + *d = *s; + if (*d == '\0') + return dest_base; + + d++; + s++; + slen--; + dest_avail--; + overlap_guard--; + } + + ASSERT(false, "dest buffer has no enough space."); + + /* + * to avoid a string that is not + * null-terminated in dest buffer + */ + dest_base[dmax - 1] = '\0'; + return NULL; +} diff --git a/hypervisor/lib/strnlen.c b/hypervisor/lib/strnlen.c new file mode 100644 index 000000000..0b4ad625f --- /dev/null +++ b/hypervisor/lib/strnlen.c @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + + +/** + * + * strnlen_s + * + * description: + * The function calculates the length of the string pointed + * to by str. + * + * + * input: + * str pointer to the null-terminated string to be examined. + * + * dmax maximum number of characer to examine. + * + * + * return value: + * string length, excluding the null character. + * will return 0 if str is null. + */ +size_t strnlen_s(const char *str, size_t maxlen) +{ + size_t count; + + if (str == NULL) + return 0; + + count = 0; + while (*str) { + if (maxlen == 0) + break; + + count++; + maxlen--; + str++; + } + + return count; +} diff --git a/hypervisor/lib/strtol.c b/hypervisor/lib/strtol.c new file mode 100644 index 000000000..5da9c148d --- /dev/null +++ b/hypervisor/lib/strtol.c @@ -0,0 +1,377 @@ +/* + * Copyright (c) 1990 The Regents of the University of California. + * Copyright (c) 2017 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. [rescinded 22 July 1999] + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* FIXME: It'd be nice to configure around these, but the include files are too + * painful. These macros should at least be more portable than hardwired hex + * constants. + */ + +#include + +/* Categories. */ + +enum { + /* In C99 */ + _sch_isblank = 0x0001, /* space \t */ + _sch_iscntrl = 0x0002, /* nonprinting characters */ + _sch_isdigit = 0x0004, /* 0-9 */ + _sch_islower = 0x0008, /* a-z */ + _sch_isprint = 0x0010, /* any printing character including ' ' */ + _sch_ispunct = 0x0020, /* all punctuation */ + _sch_isspace = 0x0040, /* space \t \n \r \f \v */ + _sch_isupper = 0x0080, /* A-Z */ + _sch_isxdigit = 0x0100, /* 0-9A-Fa-f */ + + /* Extra categories useful to cpplib. */ + _sch_isidst = 0x0200, /* A-Za-z_ */ + _sch_isvsp = 0x0400, /* \n \r */ + _sch_isnvsp = 0x0800, /* space \t \f \v \0 */ + + /* Combinations of the above. */ + _sch_isalpha = _sch_isupper|_sch_islower, /* A-Za-z */ + _sch_isalnum = _sch_isalpha|_sch_isdigit, /* A-Za-z0-9 */ + _sch_isidnum = _sch_isidst|_sch_isdigit, /* A-Za-z0-9_ */ + _sch_isgraph = _sch_isalnum|_sch_ispunct, /* isprint and not space */ + _sch_iscppsp = _sch_isvsp|_sch_isnvsp, /* isspace + \0 */ + /* basic charset of ISO C (plus ` and @) */ + _sch_isbasic = _sch_isprint|_sch_iscppsp +}; + +/* Shorthand */ +#define bl _sch_isblank +#define cn _sch_iscntrl +#define di _sch_isdigit +#define is _sch_isidst +#define lo _sch_islower +#define nv _sch_isnvsp +#define pn _sch_ispunct +#define pr _sch_isprint +#define sp _sch_isspace +#define up _sch_isupper +#define vs _sch_isvsp +#define xd _sch_isxdigit + +/* Masks. */ +#define L ((const uint16_t)(lo | is | pr)) /* lower case letter */ +#define XL ((const uint16_t)(lo | is | xd | pr))/* lowercase hex digit */ +#define U ((const uint16_t)(up | is | pr)) /* upper case letter */ +#define XU ((const uint16_t)(up | is | xd | pr))/* uppercase hex digit */ +#define D ((const uint16_t)(di | xd | pr)) /* decimal digit */ +#define P ((const uint16_t)(pn | pr)) /* punctuation */ +#define _ ((const uint16_t)(pn | is | pr)) /* underscore */ + +#define C ((const uint16_t)(cn)) /* control character */ +#define Z ((const uint16_t)(nv | cn)) /* NUL */ +#define M ((const uint16_t)(nv | sp | cn)) /* cursor movement: \f \v */ +#define V ((const uint16_t)(vs | sp | cn)) /* vertical space: \r \n */ +#define T ((const uint16_t)(nv | sp | bl | cn))/* tab */ +#define S ((const uint16_t)(nv | sp | bl | pr))/* space */ + +/* Character classification. */ +const uint16_t _sch_istable[256] = { + Z, C, C, C, C, C, C, C, /* NUL SOH STX ETX EOT ENQ ACK BEL */ + C, T, V, M, M, V, C, C, /* BS HT LF VT FF CR SO SI */ + C, C, C, C, C, C, C, C, /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB */ + C, C, C, C, C, C, C, C, /* CAN EM SUB ESC FS GS RS US */ + S, P, P, P, P, P, P, P, /* SP ! " # $ % & ' */ + P, P, P, P, P, P, P, P, /* ( ) * + , - . / */ + D, D, D, D, D, D, D, D, /* 0 1 2 3 4 5 6 7 */ + D, D, P, P, P, P, P, P, /* 8 9 : ; < = > ? */ + P, XU, XU, XU, XU, XU, XU, U, /* @ A B C D E F G */ + U, U, U, U, U, U, U, U, /* H I J K L M N O */ + U, U, U, U, U, U, U, U, /* P Q R S T U V W */ + U, U, U, P, P, P, P, _, /* X Y Z [ \ ] ^ _ */ + P, XL, XL, XL, XL, XL, XL, L, /* ` a b c d e f g */ + L, L, L, L, L, L, L, L, /* h i j k l m n o */ + L, L, L, L, L, L, L, L, /* p q r s t u v w */ + L, L, L, P, P, P, P, C, /* x y z { | } ~ DEL */ + + /* high half of unsigned char is locale-specific, so all tests are + * false in "C" locale + */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +#define _sch_test(c, bit) (_sch_istable[(c) & 0xff] & (uint16_t)(bit)) + +#define ISALPHA(c) _sch_test(c, _sch_isalpha) +#define ISALNUM(c) _sch_test(c, _sch_isalnum) +#define ISBLANK(c) _sch_test(c, _sch_isblank) +#define ISCNTRL(c) _sch_test(c, _sch_iscntrl) +#define ISDIGIT(c) _sch_test(c, _sch_isdigit) +#define ISGRAPH(c) _sch_test(c, _sch_isgraph) +#define ISLOWER(c) _sch_test(c, _sch_islower) +#define ISPRINT(c) _sch_test(c, _sch_isprint) +#define ISPUNCT(c) _sch_test(c, _sch_ispunct) +#define ISSPACE(c) _sch_test(c, _sch_isspace) +#define ISUPPER(c) _sch_test(c, _sch_isupper) +#define ISXDIGIT(c) _sch_test(c, _sch_isxdigit) + +#define ISIDNUM(c) _sch_test(c, _sch_isidnum) +#define ISIDST(c) _sch_test(c, _sch_isidst) +#define IS_ISOBASIC(c) _sch_test(c, _sch_isbasic) +#define IS_VSPACE(c) _sch_test(c, _sch_isvsp) +#define IS_NVSPACE(c) _sch_test(c, _sch_isnvsp) +#define IS_SPACE_OR_NUL(c) _sch_test(c, _sch_iscppsp) + +/* Character transformation. */ +const uint8_t _sch_tolower[256] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, + + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + + 91, 92, 93, 94, 95, 96, + + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + + 123, 124, 125, 126, 127, + + 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, + 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, + 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, + 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, + 184, 185, 186, 187, 188, 189, 190, 191, + + 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, + 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, + 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, + 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, + 248, 249, 250, 251, 252, 253, 254, 255, +}; + +const uint8_t _sch_toupper[256] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, + + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + + 91, 92, 93, 94, 95, 96, + + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + + 123, 124, 125, 126, 127, + + 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, + 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, + 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, + 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, + 184, 185, 186, 187, 188, 189, 190, 191, + + 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, + 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, + 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, + 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, + 248, 249, 250, 251, 252, 253, 254, 255, +}; +#define TOUPPER(c) _sch_toupper[(c) & 0xff] +#define TOLOWER(c) _sch_tolower[(c) & 0xff] + +#ifndef ULONG_MAX +#define ULONG_MAX ((uint64_t)(~0L)) /* 0xFFFFFFFF */ +#endif + +#ifndef LONG_MAX +#define LONG_MAX ((long)(ULONG_MAX >> 1)) /* 0x7FFFFFFF */ +#endif + +#ifndef LONG_MIN +#define LONG_MIN ((long)(~LONG_MAX)) /* 0x80000000 */ +#endif + +/* + * Convert a string to a long integer. + * + * Ignores `locale' stuff. Assumes that the upper and lower case + * alphabets and digits are each contiguous. + */ +long +strtol(const char *nptr, char **endptr, register int base) +{ + register const char *s = nptr; + register uint64_t acc; + register int c; + register uint64_t cutoff; + register int neg = 0, any, cutlim; + + /* + * Skip white space and pick up leading +/- sign if any. + * If base is 0, allow 0x for hex and 0 for octal, else + * assume decimal; if base is already 16, allow 0x. + */ + do { + c = *s++; + } while (ISSPACE(c)); + if (c == '-') { + neg = 1; + c = *s++; + } else if (c == '+') + c = *s++; + if ((base == 0 || base == 16) && + c == '0' && (*s == 'x' || *s == 'X')) { + c = s[1]; + s += 2; + base = 16; + } + if (base == 0) + base = c == '0' ? 8 : 10; + + /* + * Compute the cutoff value between legal numbers and illegal + * numbers. That is the largest legal value, divided by the + * base. An input number that is greater than this value, if + * followed by a legal input character, is too big. One that + * is equal to this value may be valid or not; the limit + * between valid and invalid numbers is then based on the last + * digit. For instance, if the range for longs is + * [-2147483648..2147483647] and the input base is 10, + * cutoff will be set to 214748364 and cutlim to either + * 7 (neg==0) or 8 (neg==1), meaning that if we have accumulated + * a value > 214748364, or equal but the next digit is > 7 (or 8), + * the number is too big, and we will return a range error. + * + * Set any if any `digits' consumed; make it negative to indicate + * overflow. + */ + cutoff = neg ? -(uint64_t)LONG_MIN : LONG_MAX; + cutlim = cutoff % (uint64_t)base; + cutoff /= (uint64_t)base; + for (acc = 0, any = 0;; c = *s++) { + if (ISDIGIT(c)) + c -= '0'; + else if (ISALPHA(c)) + c -= ISUPPER(c) ? 'A' - 10 : 'a' - 10; + else + break; + if (c >= base) + break; + if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim)) + any = -1; + else { + any = 1; + acc *= base; + acc += c; + } + } + if (any < 0) + acc = neg ? LONG_MIN : LONG_MAX; + else if (neg) + acc = -acc; + if (endptr != 0) + *endptr = (char *) (any ? s - 1 : nptr); + return acc; +} + +/* + * Convert a string to an uint64_t integer. + * + * Ignores `locale' stuff. Assumes that the upper and lower case + * alphabets and digits are each contiguous. + */ +uint64_t +strtoul(const char *nptr, char **endptr, register int base) +{ + register const char *s = nptr; + register uint64_t acc; + register int c; + register uint64_t cutoff; + register int neg = 0, any, cutlim; + + /* + * See strtol for comments as to the logic used. + */ + do { + c = *s++; + } while (ISSPACE(c)); + if (c == '-') { + neg = 1; + c = *s++; + } else if (c == '+') + c = *s++; + if ((base == 0 || base == 16) && + c == '0' && (*s == 'x' || *s == 'X')) { + c = s[1]; + s += 2; + base = 16; + } + if (base == 0) + base = c == '0' ? 8 : 10; + cutoff = (uint64_t)ULONG_MAX / (uint64_t)base; + cutlim = (uint64_t)ULONG_MAX % (uint64_t)base; + for (acc = 0, any = 0;; c = *s++) { + if (ISDIGIT(c)) + c -= '0'; + else if (ISALPHA(c)) + c -= ISUPPER(c) ? 'A' - 10 : 'a' - 10; + else + break; + if (c >= base) + break; + if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim)) + any = -1; + else { + any = 1; + acc *= base; + acc += c; + } + } + if (any < 0) + acc = ULONG_MAX; + else if (neg) + acc = -acc; + if (endptr != 0) + *endptr = (char *) (any ? s - 1 : nptr); + return acc; +} + +int +atoi(const char *str) +{ + return (int)strtol(str, (char **)NULL, 10); +} diff --git a/hypervisor/lib/udelay.c b/hypervisor/lib/udelay.c new file mode 100644 index 000000000..1a0576ece --- /dev/null +++ b/hypervisor/lib/udelay.c @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +void udelay(int loop_count) +{ + uint64_t dest_tsc, delta_tsc; + + /* Calculate number of ticks to wait */ + delta_tsc = TIME_MS_DELTA * loop_count; + dest_tsc = rdtsc() + delta_tsc; + + /* Loop until time expired */ + while + (rdtsc() < dest_tsc); +} diff --git a/hypervisor/license_header b/hypervisor/license_header new file mode 100644 index 000000000..ec77d82e4 --- /dev/null +++ b/hypervisor/license_header @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */