From 69dc939243961c45960aa7f65f0b4515cc36540a Mon Sep 17 00:00:00 2001 From: Yin Fengwei Date: Tue, 11 Dec 2018 17:46:02 +0800 Subject: [PATCH] hv: drop the temperory stack for AP startup By switching AP wakeup from broadcast to one by one, we could set correct stack for each AP and drop the temp stack used during AP boot. Tracked-On: #2034 Signed-off-by: Yin Fengwei Acked-by: Anthony Xu --- hypervisor/arch/x86/boot/trampoline.S | 25 +++-------- hypervisor/arch/x86/cpu.c | 53 ++++++++++++------------ hypervisor/arch/x86/init.c | 22 +--------- hypervisor/arch/x86/pm.c | 3 -- hypervisor/arch/x86/trampoline.c | 15 +++++-- hypervisor/arch/x86/wakeup.S | 9 ---- hypervisor/include/arch/x86/cpu.h | 2 +- hypervisor/include/arch/x86/init.h | 2 +- hypervisor/include/arch/x86/trampoline.h | 2 +- 9 files changed, 48 insertions(+), 85 deletions(-) diff --git a/hypervisor/arch/x86/boot/trampoline.S b/hypervisor/arch/x86/boot/trampoline.S index 192670aee..d53840a0a 100644 --- a/hypervisor/arch/x86/boot/trampoline.S +++ b/hypervisor/arch/x86/boot/trampoline.S @@ -33,7 +33,7 @@ * the macros involved are changed. */ - .extern cpu_secondary_init + .extern init_secondary_cpu .section .trampoline_reset,"ax" @@ -149,20 +149,7 @@ trampoline_start64: mov %eax, %fs mov %eax, %gs - /* Obtain CPU spin-lock to serialize trampoline for different APs */ - movq trampoline_spinlock_ptr(%rip), %rdi - spinlock_obtain(%rdi) - - /* Initialize temporary stack pointer - NOTE: Using the PML4 memory (PDPT address is top of memory - for the PML4 page) for the temporary stack - as we are only using the very first entry in - this page and the stack is growing down from - the top of this page. This stack is only - used for a VERY short period of time, so - this reuse of PML4 memory should be acceptable. */ - - lea trampoline_pdpt_addr(%rip), %rsp + movq secondary_cpu_stack(%rip), %rsp /* Jump to C entry */ movq main_entry(%rip), %rax @@ -173,11 +160,11 @@ trampoline_start64: .align 8 .global main_entry main_entry: - .quad cpu_secondary_init /* default entry is AP start entry */ + .quad init_secondary_cpu /* default entry is AP start entry */ - .global trampoline_spinlock_ptr -trampoline_spinlock_ptr: - .quad trampoline_spinlock + .global secondary_cpu_stack +secondary_cpu_stack: + .quad 0 /* GDT table */ .align 4 diff --git a/hypervisor/arch/x86/cpu.c b/hypervisor/arch/x86/cpu.c index 65b18a4b1..76b3e338f 100644 --- a/hypervisor/arch/x86/cpu.c +++ b/hypervisor/arch/x86/cpu.c @@ -11,11 +11,6 @@ #include #include -spinlock_t trampoline_spinlock = { - .head = 0U, - .tail = 0U -}; - struct per_cpu_region per_cpu_data[CONFIG_MAX_PCPU_NUM] __aligned(PAGE_SIZE); uint16_t phys_cpu_num = 0U; static uint64_t pcpu_sync = 0UL; @@ -499,34 +494,22 @@ static uint16_t get_cpu_id_from_lapic_id(uint32_t lapic_id) return INVALID_CPU_ID; } -/* - * Start all secondary CPUs. - */ -void start_cpus(void) +static void start_cpu(uint16_t pcpu_id) { uint32_t timeout; - uint16_t expected_up; - /* secondary cpu start up will wait for pcpu_sync -> 0UL */ - atomic_store64(&pcpu_sync, 1UL); + /* Update the stack for pcpu */ + stac(); + write_trampoline_stack_sym(pcpu_id); + clac(); - /* Set flag showing number of CPUs expected to be up to all - * cpus - */ - expected_up = phys_cpu_num; + send_startup_ipi(INTR_CPU_STARTUP_USE_DEST, pcpu_id, startup_paddr); - /* Broadcast IPIs to all other CPUs, - * In this case, INTR_CPU_STARTUP_ALL_EX_SELF decides broadcasting - * IPIs, INVALID_CPU_ID is parameter value to destination pcpu_id. - */ - send_startup_ipi(INTR_CPU_STARTUP_ALL_EX_SELF, - INVALID_CPU_ID, startup_paddr); - - /* Wait until global count is equal to expected CPU up count or + /* Wait until the pcpu with pcpu_id is running and set the active bitmap or * configured time-out has expired */ timeout = (uint32_t)CONFIG_CPU_UP_TIMEOUT * 1000U; - while ((atomic_load16(&up_count) != expected_up) && (timeout != 0U)) { + while ((bitmap_test(pcpu_id, &pcpu_active_bitmap) == false) && (timeout != 0U)) { /* Delay 10us */ udelay(10U); @@ -534,8 +517,8 @@ void start_cpus(void) timeout -= 10U; } - /* Check to see if all expected CPUs are actually up */ - if (atomic_load16(&up_count) != expected_up) { + /* Check to see if expected CPU is actually up */ + if (bitmap_test(pcpu_id, &pcpu_active_bitmap) == false) { /* Print error */ pr_fatal("Secondary CPUs failed to come up"); @@ -543,6 +526,22 @@ void start_cpus(void) do { } while (1); } +} + +void start_cpus(void) +{ + uint16_t i; + + /* secondary cpu start up will wait for pcpu_sync -> 0UL */ + atomic_store64(&pcpu_sync, 1UL); + + for (i = 0U; i < phys_cpu_num; i++) { + if (get_cpu_id() == i) { + continue; + } + + start_cpu(i); + } /* Trigger event to allow secondary CPUs to continue */ atomic_store64(&pcpu_sync, 0UL); diff --git a/hypervisor/arch/x86/init.c b/hypervisor/arch/x86/init.c index b9ab22254..f2a47f9a7 100644 --- a/hypervisor/arch/x86/init.c +++ b/hypervisor/arch/x86/init.c @@ -108,14 +108,11 @@ void bsp_boot_init(void) SWITCH_TO(rsp, bsp_boot_post); } -static void cpu_secondary_post(void) +void init_secondary_cpu(void) { uint16_t pcpu_id; - /* Release secondary boot spin-lock to allow one of the next CPU(s) to - * perform this common initialization - */ - spinlock_release(&trampoline_spinlock); + init_cpu_pre(INVALID_CPU_ID); pcpu_id = get_cpu_id(); @@ -125,18 +122,3 @@ static void cpu_secondary_post(void) enter_guest_mode(pcpu_id); } - -/* NOTE: this function is using temp stack, and after SWITCH_TO(runtime_sp, to) - * it will switch to runtime stack. - */ -void cpu_secondary_init(void) -{ - uint64_t rsp; - - init_cpu_pre(INVALID_CPU_ID); - - /* Switch to run-time stack */ - rsp = (uint64_t)(&get_cpu_var(stack)[CONFIG_STACK_SIZE - 1]); - rsp &= ~(CPU_STACK_ALIGN - 1UL); - SWITCH_TO(rsp, cpu_secondary_post); -} diff --git a/hypervisor/arch/x86/pm.c b/hypervisor/arch/x86/pm.c index 54f50c360..ed3568625 100644 --- a/hypervisor/arch/x86/pm.c +++ b/hypervisor/arch/x86/pm.c @@ -168,9 +168,6 @@ void enter_s3(struct acrn_vm *vm, uint32_t pm1a_cnt_val, uint32_t pm1b_cnt_val) asm_enter_s3(vm, pm1a_cnt_val, pm1b_cnt_val); - /* release the lock aquired in trampoline code */ - spinlock_release(&trampoline_spinlock); - resume_lapic(); resume_iommu(); resume_ioapic(); diff --git a/hypervisor/arch/x86/trampoline.c b/hypervisor/arch/x86/trampoline.c index 81b5fe51d..c827e73d2 100644 --- a/hypervisor/arch/x86/trampoline.c +++ b/hypervisor/arch/x86/trampoline.c @@ -39,6 +39,17 @@ void write_trampoline_sym(const void *sym, uint64_t val) clflush(hva); } +void write_trampoline_stack_sym(uint16_t pcpu_id) +{ + uint64_t *hva, stack_sym_addr; + hva = (uint64_t *)(hpa2hva(trampoline_start16_paddr) + trampoline_relo_addr(secondary_cpu_stack)); + + stack_sym_addr = (uint64_t)&per_cpu(stack, pcpu_id)[CONFIG_STACK_SIZE - 1]; + *hva = stack_sym_addr + get_hv_image_delta(); + + clflush(hva); +} + static void update_trampoline_code_refs(uint64_t dest_pa) { void *ptr; @@ -83,10 +94,6 @@ static void update_trampoline_code_refs(uint64_t dest_pa) /* update trampoline's main entry pointer */ ptr = hpa2hva(dest_pa + trampoline_relo_addr(main_entry)); *(uint64_t *)ptr += get_hv_image_delta(); - - /* update trampoline's spinlock pointer */ - ptr = hpa2hva(dest_pa + trampoline_relo_addr(&trampoline_spinlock_ptr)); - *(uint64_t *)ptr += get_hv_image_delta(); } uint64_t prepare_trampoline(void) diff --git a/hypervisor/arch/x86/wakeup.S b/hypervisor/arch/x86/wakeup.S index 14d64c70f..7451fe5c0 100644 --- a/hypervisor/arch/x86/wakeup.S +++ b/hypervisor/arch/x86/wakeup.S @@ -28,7 +28,6 @@ .extern cpu_ctx .extern load_gdtr_and_tr .extern do_acpi_s3 - .extern trampoline_spinlock .global asm_enter_s3 asm_enter_s3: @@ -99,14 +98,6 @@ asm_enter_s3: call do_acpi_s3 - /* if do_acpi_s3 returns, which means ACRN can't enter S3 state. - * Then trampoline will not be executed and we need to acquire - * trampoline_spinlock here to match release in enter_sleep - */ - mov $trampoline_spinlock, %rdi - spinlock_obtain(%rdi) - - /* * When system resume from S3, trampoline_start64 will * jump to restore_s3_context after setup temporary stack. diff --git a/hypervisor/include/arch/x86/cpu.h b/hypervisor/include/arch/x86/cpu.h index 4853870d7..e22818374 100644 --- a/hypervisor/include/arch/x86/cpu.h +++ b/hypervisor/include/arch/x86/cpu.h @@ -207,9 +207,9 @@ extern uint8_t ld_bss_end; /* In trampoline range, hold the jump target which trampline will jump to */ extern uint64_t main_entry[1]; +extern uint64_t secondary_cpu_stack[1]; extern int32_t ibrs_type; -extern spinlock_t trampoline_spinlock; /* * To support per_cpu access, we use a special struct "per_cpu_region" to hold diff --git a/hypervisor/include/arch/x86/init.h b/hypervisor/include/arch/x86/init.h index dadafefc6..b8825a0c7 100644 --- a/hypervisor/include/arch/x86/init.h +++ b/hypervisor/include/arch/x86/init.h @@ -9,6 +9,6 @@ #define SP_BOTTOM_MAGIC 0x696e746cUL void bsp_boot_init(void); -void cpu_secondary_init(void); +void init_secondary_cpu(void); #endif /* INIT_H*/ diff --git a/hypervisor/include/arch/x86/trampoline.h b/hypervisor/include/arch/x86/trampoline.h index a28f427a7..cdc52ad3c 100644 --- a/hypervisor/include/arch/x86/trampoline.h +++ b/hypervisor/include/arch/x86/trampoline.h @@ -8,6 +8,7 @@ extern uint64_t read_trampoline_sym(const void *sym); extern void write_trampoline_sym(const void *sym, uint64_t val); +extern void write_trampoline_stack_sym(uint16_t pcpu_id); extern uint64_t prepare_trampoline(void); /* external symbols that are helpful for relocation */ @@ -22,7 +23,6 @@ extern uint8_t cpu_boot_page_tables_ptr; extern uint8_t trampoline_pdpt_addr; extern uint8_t trampoline_gdt_ptr; extern uint8_t trampoline_start64_fixup; -extern uint8_t trampoline_spinlock_ptr; extern uint64_t trampoline_start16_paddr;