hv: drop the temperory stack for AP startup

By switching AP wakeup from broadcast to one by one, we could
set correct stack for each AP and drop the temp stack used during
AP boot.

Tracked-On: #2034
Signed-off-by: Yin Fengwei <fengwei.yin@intel.com>
Acked-by: Anthony Xu <anthony.xu@intel.com>
This commit is contained in:
Yin Fengwei 2018-12-11 17:46:02 +08:00 committed by wenlingz
parent 74849cd983
commit 69dc939243
9 changed files with 48 additions and 85 deletions

View File

@ -33,7 +33,7 @@
* the macros involved are changed.
*/
.extern cpu_secondary_init
.extern init_secondary_cpu
.section .trampoline_reset,"ax"
@ -149,20 +149,7 @@ trampoline_start64:
mov %eax, %fs
mov %eax, %gs
/* Obtain CPU spin-lock to serialize trampoline for different APs */
movq trampoline_spinlock_ptr(%rip), %rdi
spinlock_obtain(%rdi)
/* Initialize temporary stack pointer
NOTE: Using the PML4 memory (PDPT address is top of memory
for the PML4 page) for the temporary stack
as we are only using the very first entry in
this page and the stack is growing down from
the top of this page. This stack is only
used for a VERY short period of time, so
this reuse of PML4 memory should be acceptable. */
lea trampoline_pdpt_addr(%rip), %rsp
movq secondary_cpu_stack(%rip), %rsp
/* Jump to C entry */
movq main_entry(%rip), %rax
@ -173,11 +160,11 @@ trampoline_start64:
.align 8
.global main_entry
main_entry:
.quad cpu_secondary_init /* default entry is AP start entry */
.quad init_secondary_cpu /* default entry is AP start entry */
.global trampoline_spinlock_ptr
trampoline_spinlock_ptr:
.quad trampoline_spinlock
.global secondary_cpu_stack
secondary_cpu_stack:
.quad 0
/* GDT table */
.align 4

View File

@ -11,11 +11,6 @@
#include <trampoline.h>
#include <e820.h>
spinlock_t trampoline_spinlock = {
.head = 0U,
.tail = 0U
};
struct per_cpu_region per_cpu_data[CONFIG_MAX_PCPU_NUM] __aligned(PAGE_SIZE);
uint16_t phys_cpu_num = 0U;
static uint64_t pcpu_sync = 0UL;
@ -499,34 +494,22 @@ static uint16_t get_cpu_id_from_lapic_id(uint32_t lapic_id)
return INVALID_CPU_ID;
}
/*
* Start all secondary CPUs.
*/
void start_cpus(void)
static void start_cpu(uint16_t pcpu_id)
{
uint32_t timeout;
uint16_t expected_up;
/* secondary cpu start up will wait for pcpu_sync -> 0UL */
atomic_store64(&pcpu_sync, 1UL);
/* Update the stack for pcpu */
stac();
write_trampoline_stack_sym(pcpu_id);
clac();
/* Set flag showing number of CPUs expected to be up to all
* cpus
*/
expected_up = phys_cpu_num;
send_startup_ipi(INTR_CPU_STARTUP_USE_DEST, pcpu_id, startup_paddr);
/* Broadcast IPIs to all other CPUs,
* In this case, INTR_CPU_STARTUP_ALL_EX_SELF decides broadcasting
* IPIs, INVALID_CPU_ID is parameter value to destination pcpu_id.
*/
send_startup_ipi(INTR_CPU_STARTUP_ALL_EX_SELF,
INVALID_CPU_ID, startup_paddr);
/* Wait until global count is equal to expected CPU up count or
/* Wait until the pcpu with pcpu_id is running and set the active bitmap or
* configured time-out has expired
*/
timeout = (uint32_t)CONFIG_CPU_UP_TIMEOUT * 1000U;
while ((atomic_load16(&up_count) != expected_up) && (timeout != 0U)) {
while ((bitmap_test(pcpu_id, &pcpu_active_bitmap) == false) && (timeout != 0U)) {
/* Delay 10us */
udelay(10U);
@ -534,8 +517,8 @@ void start_cpus(void)
timeout -= 10U;
}
/* Check to see if all expected CPUs are actually up */
if (atomic_load16(&up_count) != expected_up) {
/* Check to see if expected CPU is actually up */
if (bitmap_test(pcpu_id, &pcpu_active_bitmap) == false) {
/* Print error */
pr_fatal("Secondary CPUs failed to come up");
@ -543,6 +526,22 @@ void start_cpus(void)
do {
} while (1);
}
}
void start_cpus(void)
{
uint16_t i;
/* secondary cpu start up will wait for pcpu_sync -> 0UL */
atomic_store64(&pcpu_sync, 1UL);
for (i = 0U; i < phys_cpu_num; i++) {
if (get_cpu_id() == i) {
continue;
}
start_cpu(i);
}
/* Trigger event to allow secondary CPUs to continue */
atomic_store64(&pcpu_sync, 0UL);

View File

@ -108,14 +108,11 @@ void bsp_boot_init(void)
SWITCH_TO(rsp, bsp_boot_post);
}
static void cpu_secondary_post(void)
void init_secondary_cpu(void)
{
uint16_t pcpu_id;
/* Release secondary boot spin-lock to allow one of the next CPU(s) to
* perform this common initialization
*/
spinlock_release(&trampoline_spinlock);
init_cpu_pre(INVALID_CPU_ID);
pcpu_id = get_cpu_id();
@ -125,18 +122,3 @@ static void cpu_secondary_post(void)
enter_guest_mode(pcpu_id);
}
/* NOTE: this function is using temp stack, and after SWITCH_TO(runtime_sp, to)
* it will switch to runtime stack.
*/
void cpu_secondary_init(void)
{
uint64_t rsp;
init_cpu_pre(INVALID_CPU_ID);
/* Switch to run-time stack */
rsp = (uint64_t)(&get_cpu_var(stack)[CONFIG_STACK_SIZE - 1]);
rsp &= ~(CPU_STACK_ALIGN - 1UL);
SWITCH_TO(rsp, cpu_secondary_post);
}

View File

@ -168,9 +168,6 @@ void enter_s3(struct acrn_vm *vm, uint32_t pm1a_cnt_val, uint32_t pm1b_cnt_val)
asm_enter_s3(vm, pm1a_cnt_val, pm1b_cnt_val);
/* release the lock aquired in trampoline code */
spinlock_release(&trampoline_spinlock);
resume_lapic();
resume_iommu();
resume_ioapic();

View File

@ -39,6 +39,17 @@ void write_trampoline_sym(const void *sym, uint64_t val)
clflush(hva);
}
void write_trampoline_stack_sym(uint16_t pcpu_id)
{
uint64_t *hva, stack_sym_addr;
hva = (uint64_t *)(hpa2hva(trampoline_start16_paddr) + trampoline_relo_addr(secondary_cpu_stack));
stack_sym_addr = (uint64_t)&per_cpu(stack, pcpu_id)[CONFIG_STACK_SIZE - 1];
*hva = stack_sym_addr + get_hv_image_delta();
clflush(hva);
}
static void update_trampoline_code_refs(uint64_t dest_pa)
{
void *ptr;
@ -83,10 +94,6 @@ static void update_trampoline_code_refs(uint64_t dest_pa)
/* update trampoline's main entry pointer */
ptr = hpa2hva(dest_pa + trampoline_relo_addr(main_entry));
*(uint64_t *)ptr += get_hv_image_delta();
/* update trampoline's spinlock pointer */
ptr = hpa2hva(dest_pa + trampoline_relo_addr(&trampoline_spinlock_ptr));
*(uint64_t *)ptr += get_hv_image_delta();
}
uint64_t prepare_trampoline(void)

View File

@ -28,7 +28,6 @@
.extern cpu_ctx
.extern load_gdtr_and_tr
.extern do_acpi_s3
.extern trampoline_spinlock
.global asm_enter_s3
asm_enter_s3:
@ -99,14 +98,6 @@ asm_enter_s3:
call do_acpi_s3
/* if do_acpi_s3 returns, which means ACRN can't enter S3 state.
* Then trampoline will not be executed and we need to acquire
* trampoline_spinlock here to match release in enter_sleep
*/
mov $trampoline_spinlock, %rdi
spinlock_obtain(%rdi)
/*
* When system resume from S3, trampoline_start64 will
* jump to restore_s3_context after setup temporary stack.

View File

@ -207,9 +207,9 @@ extern uint8_t ld_bss_end;
/* In trampoline range, hold the jump target which trampline will jump to */
extern uint64_t main_entry[1];
extern uint64_t secondary_cpu_stack[1];
extern int32_t ibrs_type;
extern spinlock_t trampoline_spinlock;
/*
* To support per_cpu access, we use a special struct "per_cpu_region" to hold

View File

@ -9,6 +9,6 @@
#define SP_BOTTOM_MAGIC 0x696e746cUL
void bsp_boot_init(void);
void cpu_secondary_init(void);
void init_secondary_cpu(void);
#endif /* INIT_H*/

View File

@ -8,6 +8,7 @@
extern uint64_t read_trampoline_sym(const void *sym);
extern void write_trampoline_sym(const void *sym, uint64_t val);
extern void write_trampoline_stack_sym(uint16_t pcpu_id);
extern uint64_t prepare_trampoline(void);
/* external symbols that are helpful for relocation */
@ -22,7 +23,6 @@ extern uint8_t cpu_boot_page_tables_ptr;
extern uint8_t trampoline_pdpt_addr;
extern uint8_t trampoline_gdt_ptr;
extern uint8_t trampoline_start64_fixup;
extern uint8_t trampoline_spinlock_ptr;
extern uint64_t trampoline_start16_paddr;