From 10963b04d127c3a321b30467b986f13cbd09db66 Mon Sep 17 00:00:00 2001 From: Yifan Liu Date: Fri, 27 Aug 2021 13:59:27 +0800 Subject: [PATCH] hv: Fix vcpu signaling racing problem in lock instruction emulation In lock instruction emulation, we use vcpu_make_request and signal_event pairs to shoot down/release other vcpus. However, vcpu_make_request is async and does not guarantee an execution of wait_event on target vcpu, and we want wait_event to be consistent with signal_event. Consider following scenarios: 1, When target vcpu's state has not yet turned to VCPU_RUNNING, vcpu_make_request on ACRN_REQUEST_SPLIT_LOCK does not make sense, and will not result in wait_event. 2, When target vcpu is already requested on ACRN_REQUEST_SPLIT_LOCK (i.e., the corresponding bit in pending_req is set) but not yet handled, the vcpu_make_request call does not result in wait_event as 1 bit is not enough to cache multiple requests. This patch tries to add checks in vcpu_kick_lock_instr_emulation and vcpu_complete_lock_instr_emulation to resolve these issues. Tracked-On: #6502 Signed-off-by: Yifan Liu Reviewed-by: Jason Chen CJ --- hypervisor/arch/x86/guest/lock_instr_emul.c | 24 +++++++++++++++++--- hypervisor/arch/x86/guest/virq.c | 6 +++++ hypervisor/include/arch/x86/asm/guest/virq.h | 1 + 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/hypervisor/arch/x86/guest/lock_instr_emul.c b/hypervisor/arch/x86/guest/lock_instr_emul.c index dd47b3bee..0f12f5eb8 100644 --- a/hypervisor/arch/x86/guest/lock_instr_emul.c +++ b/hypervisor/arch/x86/guest/lock_instr_emul.c @@ -45,7 +45,7 @@ void vcpu_kick_lock_instr_emulation(struct acrn_vcpu *cur_vcpu) get_vm_lock(cur_vcpu->vm); foreach_vcpu(i, cur_vcpu->vm, other) { - if (other != cur_vcpu) { + if ((other != cur_vcpu) && (other->state == VCPU_RUNNING)) { vcpu_make_request(other, ACRN_REQUEST_SPLIT_LOCK); } } @@ -59,8 +59,26 @@ void vcpu_complete_lock_instr_emulation(struct acrn_vcpu *cur_vcpu) if (cur_vcpu->vm->hw.created_vcpus > 1U) { foreach_vcpu(i, cur_vcpu->vm, other) { - if (other != cur_vcpu) { - signal_event(&other->events[VCPU_EVENT_SPLIT_LOCK]); + if ((other != cur_vcpu) && (other->state == VCPU_RUNNING)) { + /* + * When we vcpu_make_request above, there is a time window between kick_vcpu and + * the target vcpu actually does acrn_handle_pending_request (and eventually wait_event). + * It is possible that the issuing vcpu has already completed lock emulation and + * calls signal_event, while the target vcpu has not yet reaching acrn_handle_pending_request. + * + * This causes problem: Say we have vcpuA make request to vcpuB. + * During the above said time window, if A does kick_lock/complete_lock pair multiple times, + * or some other vcpu C makes request to B after A releases the vm lock below, then the bit + * in B's pending_req will be cleared only once, and B will call wait_event only once, + * while other vcpu may call signal_event many times to B. I.e., one bit is not enough + * to cache multiple requests. + * + * To work this around, we try to cancel the request (clear the bit in pending_req) if it + * is unhandled, and signal_event only when the request was already handled. + */ + if (!vcpu_try_cancel_request(other, ACRN_REQUEST_SPLIT_LOCK)) { + signal_event(&other->events[VCPU_EVENT_SPLIT_LOCK]); + } } } diff --git a/hypervisor/arch/x86/guest/virq.c b/hypervisor/arch/x86/guest/virq.c index 2b8388729..11e9afab0 100644 --- a/hypervisor/arch/x86/guest/virq.c +++ b/hypervisor/arch/x86/guest/virq.c @@ -133,6 +133,12 @@ void vcpu_make_request(struct acrn_vcpu *vcpu, uint16_t eventid) kick_vcpu(vcpu); } +/* Return true if an unhandled request is cancelled, false otherwise. */ +bool vcpu_try_cancel_request(struct acrn_vcpu *vcpu, uint16_t eventid) +{ + return bitmap_test_and_clear_lock(eventid, &vcpu->arch.pending_req); +} + /* * @retval true when INT is injected to guest. * @retval false when otherwise diff --git a/hypervisor/include/arch/x86/asm/guest/virq.h b/hypervisor/include/arch/x86/asm/guest/virq.h index eaf4231f3..fda5c67f3 100644 --- a/hypervisor/include/arch/x86/asm/guest/virq.h +++ b/hypervisor/include/arch/x86/asm/guest/virq.h @@ -103,6 +103,7 @@ void vcpu_inject_ud(struct acrn_vcpu *vcpu); */ void vcpu_inject_ss(struct acrn_vcpu *vcpu); void vcpu_make_request(struct acrn_vcpu *vcpu, uint16_t eventid); +bool vcpu_try_cancel_request(struct acrn_vcpu *vcpu, uint16_t eventid); /* * @pre vcpu != NULL