diff --git a/hypervisor/arch/x86/guest/nested.c b/hypervisor/arch/x86/guest/nested.c index a9b061f01..af1b2720b 100644 --- a/hypervisor/arch/x86/guest/nested.c +++ b/hypervisor/arch/x86/guest/nested.c @@ -228,9 +228,157 @@ int32_t read_vmx_msr(struct acrn_vcpu *vcpu, uint32_t msr, uint64_t *val) return err; } +/* make it 1 to be able to build. correctly initialize it in next patch */ +#define MAX_SHADOW_VMCS_FIELDS 1 +/* + * VMCS fields included in the dual-purpose VMCS: as shadow for L1 and + * as hardware VMCS for nested guest (L2). + * + * TODO: This list is for TGL and CFL machines and the fields + * for advacned APICv features such as Posted Interrupt and Virtual + * Interrupt Delivery are not included, as these are not available + * on those platforms. + * + * Certain fields, e.g. VMX_TSC_MULTIPLIER_FULL is available only if + * "use TSC scaling” is supported. Thus a static array may not work + * for all platforms. + */ +static const uint32_t vmcs_shadowing_fields[MAX_SHADOW_VMCS_FIELDS] = { +}; + /* to be shared by all vCPUs for all nested guests */ static uint64_t vmcs_shadowing_bitmap[PAGE_SIZE / sizeof(uint64_t)] __aligned(PAGE_SIZE); +/* + * This is an array of offsets into a structure of type "struct acrn_vmcs12" + * 16 offsets for a total of 16 GROUPs. 4 "field widths" by 4 "field types". + * "Field type" is either Control, Read-Only Data, Guest State or Host State. + * Refer to the definition of "struct acrn_vmcs12" on how the fields are + * grouped together for these offsets to work in tandem. + * Refer to Intel SDM Appendix B Field Encoding in VMCS for info on how + * fields are grouped and indexed within a group. + */ +static const uint16_t vmcs12_group_offset_table[16] = { + offsetof(struct acrn_vmcs12, vpid), /* 16-bit Control Fields */ + offsetof(struct acrn_vmcs12, padding), /* 16-bit Read-Only Fields */ + offsetof(struct acrn_vmcs12, guest_es), /* 16-bit Guest-State Fields */ + offsetof(struct acrn_vmcs12, host_es), /* 16-bit Host-State Fields */ + offsetof(struct acrn_vmcs12, io_bitmap_a), /* 64-bit Control Fields */ + offsetof(struct acrn_vmcs12, guest_phys_addr), /* 64-bit Read-Only Data Fields */ + offsetof(struct acrn_vmcs12, vmcs_link_ptr), /* 64-bit Guest-State Fields */ + offsetof(struct acrn_vmcs12, host_ia32_pat), /* 64-bit Host-State Fields */ + offsetof(struct acrn_vmcs12, pin_based_exec_ctrl), /* 32-bit Control Fields */ + offsetof(struct acrn_vmcs12, vm_instr_error), /* 32-bit Read-Only Data Fields */ + offsetof(struct acrn_vmcs12, guest_es_limit), /* 32-bit Guest-State Fields */ + offsetof(struct acrn_vmcs12, host_ia32_sysenter_cs), /* 32-bit Host-State Fields */ + offsetof(struct acrn_vmcs12, cr0_guest_host_mask), /* Natural-width Control Fields */ + offsetof(struct acrn_vmcs12, exit_qual), /* Natural-width Read-Only Data Fields */ + offsetof(struct acrn_vmcs12, guest_cr0), /* Natural-width Guest-State Fields */ + offsetof(struct acrn_vmcs12, host_cr0), /* Natural-width Host-State Fields */ +}; + +/* + * field_idx is the index of the field within the group. + * + * Access-type is 0 for all widths except for 64-bit + * For 64-bit if Access-type is 1, offset is moved to + * high 4 bytes of the field. + */ +#define OFFSET_INTO_VMCS12(group_idx, field_idx, width_in_bytes, access_type) \ + (vmcs12_group_offset_table[group_idx] + \ + field_idx * width_in_bytes + \ + access_type * sizeof(uint32_t)) + +/* Given a vmcs field, this API returns the offset into "struct acrn_vmcs12" */ +static uint16_t vmcs_field_to_vmcs12_offset(uint32_t vmcs_field) +{ + /* + * Refer to Appendix B Field Encoding in VMCS in SDM + * A value of group index 0001b is not valid because there are no 16-bit + * Read-Only fields. + * + * TODO: check invalid VMCS field + */ + uint16_t group_idx = (VMX_VMCS_FIELD_WIDTH(vmcs_field) << 2U) | VMX_VMCS_FIELD_TYPE(vmcs_field); + uint8_t field_width = VMX_VMCS_FIELD_WIDTH(vmcs_field); + uint8_t width_in_bytes; + + if (field_width == VMX_VMCS_FIELD_WIDTH_16) { + width_in_bytes = 2U; + } else if (field_width == VMX_VMCS_FIELD_WIDTH_32) { + width_in_bytes = 4U; + } else { + /* + * Natural-width or 64-bit + */ + width_in_bytes = 8U; + } + + return OFFSET_INTO_VMCS12(group_idx, + VMX_VMCS_FIELD_INDEX(vmcs_field), width_in_bytes, /* field index within the group */ + VMX_VMCS_FIELD_ACCESS_HIGH(vmcs_field)); +} + +/* + * Given a vmcs field and the pointer to the vmcs12, this API returns the + * corresponding value from the VMCS + */ +static uint64_t vmcs12_read_field(uint64_t vmcs_hva, uint32_t field) +{ + uint64_t *ptr = (uint64_t *)(vmcs_hva + vmcs_field_to_vmcs12_offset(field)); + uint64_t val64 = 0UL; + + switch (VMX_VMCS_FIELD_WIDTH(field)) { + case VMX_VMCS_FIELD_WIDTH_16: + val64 = *(uint16_t *)ptr; + break; + case VMX_VMCS_FIELD_WIDTH_32: + val64 = *(uint32_t *)ptr; + break; + case VMX_VMCS_FIELD_WIDTH_64: + if (!!VMX_VMCS_FIELD_ACCESS_HIGH(field)) { + val64 = *(uint32_t *)ptr; + } else { + val64 = *ptr; + } + break; + case VMX_VMCS_FIELD_WIDTH_NATURAL: + default: + val64 = *ptr; + break; + } + + return val64; +} + +/* + * Write the given VMCS field to the given vmcs12 data structure. + */ +static void vmcs12_write_field(uint64_t vmcs_hva, uint32_t field, uint64_t val64) +{ + uint64_t *ptr = (uint64_t *)(vmcs_hva + vmcs_field_to_vmcs12_offset(field)); + + switch (VMX_VMCS_FIELD_WIDTH(field)) { + case VMX_VMCS_FIELD_WIDTH_16: + *(uint16_t *)ptr = (uint16_t)val64; + break; + case VMX_VMCS_FIELD_WIDTH_32: + *(uint32_t *)ptr = (uint32_t)val64; + break; + case VMX_VMCS_FIELD_WIDTH_64: + if (!!VMX_VMCS_FIELD_ACCESS_HIGH(field)) { + *(uint32_t *)ptr = (uint32_t)val64; + } else { + *ptr = val64; + } + break; + case VMX_VMCS_FIELD_WIDTH_NATURAL: + default: + *ptr = val64; + break; + } +} + void nested_vmx_result(enum VMXResult result, int error_number) { uint64_t rflags = exec_vmread(VMX_GUEST_RFLAGS); @@ -495,10 +643,16 @@ int32_t vmxoff_vmexit_handler(struct acrn_vcpu *vcpu) * @pre vcpu != NULL * @pre vmcs02 is current */ -static void sync_vmcs02_to_vmcs12(__unused struct acrn_vcpu *vcpu) +static void sync_vmcs02_to_vmcs12(struct acrn_vcpu *vcpu) { - /* Implemented in next patch */ - return; + uint64_t vmcs12 = (uint64_t)&vcpu->arch.nested.vmcs12; + uint64_t val64; + uint32_t idx; + + for (idx = 0; idx < MAX_SHADOW_VMCS_FIELDS; idx++) { + val64 = exec_vmread(vmcs_shadowing_fields[idx]); + vmcs12_write_field(vmcs12, vmcs_shadowing_fields[idx], val64); + } } /** @@ -507,10 +661,20 @@ static void sync_vmcs02_to_vmcs12(__unused struct acrn_vcpu *vcpu) * @pre vcpu != NULL * @pre vmcs02 is current */ -static void sync_vmcs12_to_vmcs02(__unused struct acrn_vcpu *vcpu) +static void sync_vmcs12_to_vmcs02(struct acrn_vcpu *vcpu) { - /* Implemented in next patch */ - return; + uint64_t vmcs12 = (uint64_t)&vcpu->arch.nested.vmcs12; + uint64_t val64; + uint32_t idx; + + for (idx = 0; idx < MAX_SHADOW_VMCS_FIELDS; idx++) { + val64 = vmcs12_read_field(vmcs12, vmcs_shadowing_fields[idx]); + exec_vmwrite(vmcs_shadowing_fields[idx], val64); + } + + /* Sync VMCS fields that are not shadowing */ + val64 = vmcs12_read_field(vmcs12, VMX_MSR_BITMAP_FULL); + exec_vmwrite(VMX_MSR_BITMAP_FULL, gpa2hpa(vcpu->vm, val64)); } /* diff --git a/hypervisor/include/arch/x86/asm/guest/nested.h b/hypervisor/include/arch/x86/asm/guest/nested.h index d5a2bbe4a..973853574 100644 --- a/hypervisor/include/arch/x86/asm/guest/nested.h +++ b/hypervisor/include/arch/x86/asm/guest/nested.h @@ -45,6 +45,20 @@ union value_64 { MSR_IA32_VMX_VMFUNC, \ MSR_IA32_VMX_PROCBASED_CTLS3 +/* refer to ISDM APPENDIX B: FIELD ENCODING IN VMCS */ +#define VMX_VMCS_FIELD_ACCESS_HIGH(v) (((v) >> 0U) & 0x1U) +#define VMX_VMCS_FIELD_INDEX(v) (((v) >> 1U) & 0x1ffU) +#define VMX_VMCS_FIELD_TYPE(v) (((v) >> 10U) & 0x3U) +#define VMX_VMCS_FIELD_TYPE_CTL (0U) +#define VMX_VMCS_FIELD_TYPE_VMEXIT (1U) +#define VMX_VMCS_FIELD_TYPE_GUEST (2U) +#define VMX_VMCS_FIELD_TYPE_HOST (3U) +#define VMX_VMCS_FIELD_WIDTH(v) (((v) >> 13U) & 0x3U) +#define VMX_VMCS_FIELD_WIDTH_16 (0U) +#define VMX_VMCS_FIELD_WIDTH_64 (1U) +#define VMX_VMCS_FIELD_WIDTH_32 (2U) +#define VMX_VMCS_FIELD_WIDTH_NATURAL (3U) + /* * VM-Exit Instruction-Information Field * @@ -80,8 +94,210 @@ union value_64 { */ #define VMCS12_REVISION_ID 0x15407E12U -/* Implemented in next patch */ -struct acrn_vmcs12 {}; +/* + * struct acrn_vmcs12 describes the emulated VMCS for the nested guest (L2). + */ +struct acrn_vmcs12 { + uint8_t vmcs_hdr[4]; + uint32_t abort; + + /* + * Rest of the memory is used for "VMCS Data" + * Layout of VMCS Data is non-architectural and processor + * implemetation specific. + */ + uint32_t launch_state; + + /* 16-bit Control Fields */ + uint16_t vpid; + uint16_t posted_intr_nv; + uint16_t eptp_index; + + /* 16-bit Read-only Fields */ + uint16_t padding; + + /* 16-bit Guest-State Fields */ + uint16_t guest_es; + uint16_t guest_cs; + uint16_t guest_ss; + uint16_t guest_ds; + uint16_t guest_fs; + uint16_t guest_gs; + uint16_t guest_ldtr; + uint16_t guest_tr; + uint16_t guest_intr_status; + uint16_t pml_index; + + /* 16-bit Host-State Fields */ + uint16_t host_es; + uint16_t host_cs; + uint16_t host_ss; + uint16_t host_ds; + uint16_t host_fs; + uint16_t host_gs; + uint16_t host_tr; + + /* 64-bit Control Fields */ + uint64_t io_bitmap_a; + uint64_t io_bitmap_b; + uint64_t msr_bitmap; + uint64_t vm_exit_msr_store_addr; + uint64_t vm_exit_msr_load_addr; + uint64_t vm_entry_load_addr; + uint64_t executive_vmcs_ptr; + uint64_t pml_addr; + uint64_t tsc_offset; + uint64_t virtual_apic_addr; + uint64_t apic_access_addr; + uint64_t posted_interrupt_desc_addr; + uint64_t vm_func_controls; + uint64_t ept_pointer; + uint64_t eoi_exit_bitmap0; + uint64_t eoi_exit_bitmap1; + uint64_t eoi_exit_bitmap2; + uint64_t eoi_exit_bitmap3; + uint64_t eptp_list_addr; + uint64_t vmread_bitmap_addr; + uint64_t vmwrite_bitmap_addr; + uint64_t virt_exception_info_addr; + uint64_t xss_exiting_bitmap; + uint64_t encls_exiting_bitmap; + uint64_t sub_page_permission_ptr; + uint64_t tsc_multiplier; + + /* 64-bit Read-Only Data Fields */ + uint64_t guest_phys_addr; + + /* 64-bit Guest-State Fields */ + uint64_t vmcs_link_ptr; + uint64_t guest_ia32_debugctl; + uint64_t guest_ia32_pat; + uint64_t guest_ia32_efer; + uint64_t ia32_perf_global_ctrl; + uint64_t guest_pdpte0; + uint64_t guest_pdpte1; + uint64_t guest_pdpte2; + uint64_t guest_pdpte3; + uint64_t guest_ia32_bndcfgs; + uint64_t guest_ia32_rtit_ctl; + + /* 64-bit Host-State Fields */ + uint64_t host_ia32_pat; + uint64_t host_ia32_efer; + uint64_t host_ia32_perf_global_ctrl; + + /* 32-bit Control Fields */ + uint32_t pin_based_exec_ctrl; + uint32_t proc_based_exec_ctrl; + uint32_t exception_bitmap; + uint32_t page_fault_error_code_mask; + uint32_t page_fault_error_code_match; + uint32_t cr3_target_count; + uint32_t vm_exit_controls; + uint32_t vm_exit_msr_store_count; + uint32_t vm_exit_msr_load_count; + uint32_t vm_entry_controls; + uint32_t vm_entry_msr_load_count; + uint32_t vm_entry_intr_info_field; + uint32_t vm_entry_exception_err_code; + uint32_t vm_entry_instr_len; + uint32_t tpr_threshold; + uint32_t proc_based_exec_ctrl2; + uint32_t ple_gap; + uint32_t ple_window; + + /* 32-bit Read-Only Data Fields */ + uint32_t vm_instr_error; + uint32_t exit_reason; + uint32_t vm_exit_intr_info; + uint32_t vm_exit_intr_error_code; + uint32_t idt_vectoring_info_field; + uint32_t idt_vectoring_error_code; + uint32_t vm_exit_instr_len; + uint32_t vm_exit_instr_info; + + /* 32-bit Guest-State Fields */ + uint32_t guest_es_limit; + uint32_t guest_cs_limit; + uint32_t guest_ss_limit; + uint32_t guest_ds_limit; + uint32_t guest_fs_limit; + uint32_t guest_gs_limit; + uint32_t guest_ldtr_limit; + uint32_t guest_tr_limit; + uint32_t guest_gdtr_limit; + uint32_t guest_idtr_limit; + uint32_t guest_es_ar; + uint32_t guest_cs_ar; + uint32_t guest_ss_ar; + uint32_t guest_ds_ar; + uint32_t guest_fs_ar; + uint32_t guest_gs_ar; + uint32_t guest_ldtr_ar; + uint32_t guest_tr_ar; + uint32_t guest_intr_state; + uint32_t guest_activity_state; + uint32_t guest_smbase; + uint32_t guest_ia32_sysenter_cs; + uint32_t vmx_preempt_timer_val; + + /* 32-bit Host-State Fields */ + uint32_t host_ia32_sysenter_cs; + + /* Natural-width Control Fields */ + uint64_t cr0_guest_host_mask; + uint64_t cr4_guest_host_mask; + uint64_t cr0_read_shadow; + uint64_t cr4_read_shadow; + uint64_t cr3_target_val0; + uint64_t cr3_target_val1; + uint64_t cr3_target_val2; + uint64_t cr3_target_val3; + + /* Natural-width Read-Only Data Fields */ + uint64_t exit_qual; + uint64_t io_rcx; + uint64_t io_rsi; + uint64_t io_rdi; + uint64_t io_rip; + uint64_t guest_linear_addr; + + /* Natural-width Guest-State Fields */ + uint64_t guest_cr0; + uint64_t guest_cr3; + uint64_t guest_cr4; + uint64_t guest_es_base; + uint64_t guest_cs_base; + uint64_t guest_ss_base; + uint64_t guest_ds_base; + uint64_t guest_fs_base; + uint64_t guest_gs_base; + uint64_t guest_ldtr_base; + uint64_t guest_tr_base; + uint64_t guest_gdtr_base; + uint64_t guest_idtr_base; + uint64_t guest_dr7; + uint64_t guest_rsp; + uint64_t guest_rip; + uint64_t guest_rflags; + uint64_t guest_pending_debug_excp; + uint64_t guest_ia32_sysenter_esp; + uint64_t guest_ia32_sysenter_eip; + + /** Natural-width Host-State Fields */ + uint64_t host_cr0; + uint64_t host_cr3; + uint64_t host_cr4; + uint64_t host_fs_base; + uint64_t host_gs_base; + uint64_t host_tr_base; + uint64_t host_gdtr_base; + uint64_t host_idtr_base; + uint64_t host_ia32_sysenter_esp; + uint64_t host_ia32_sysenter_eip; + uint64_t host_rsp; + uint64_t host_rip; +}; enum VMXResult { VMsucceed, @@ -96,6 +312,8 @@ int32_t vmptrld_vmexit_handler(struct acrn_vcpu *vcpu); #ifdef CONFIG_NVMX_ENABLED struct acrn_nested { uint8_t vmcs02[PAGE_SIZE]; /* VMCS to run L2 and as Link Pointer in VMCS01 */ + + /* TODO: change this to uint8_t vmcs12[PAGE_SIZE] */ struct acrn_vmcs12 vmcs12; /* To cache L1's VMCS12*/ uint64_t current_vmcs12_ptr; /* GPA */ uint64_t vmxon_ptr; /* GPA */