|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH 3/3] x86/spec-ctrl: Fix NMI race condition with VT-x MSR_SPEC_CTRL handling
The logic was based on a mistaken understanding of how NMI blocking on vmexit
works. NMIs are only blocked for EXIT_REASON_NMI, and not for general exits.
Therefore, an NMI can in general hit early in the vmx_asm_vmexit_handler path,
and the guest's value will be clobbered before it is saved.
Switch to using MSR load/save lists. This causes the guest value to be saved
atomically with respect to NMIs/MCEs/etc.
First, update vmx_cpuid_policy_changed() to configure the load/save lists at
the same time as configuring the intercepts. This function is always used in
remote context, so extend the vmx_vmcs_{enter,exit}() block to cover the whole
function, rather than having multiple remote acquisitions of the same VMCS.
vmx_add_guest_msr() can fail, but only in ways which are entirely fatal to the
guest, so handle failures using domain_crash(). vmx_del_msr() can fail with
-ESRCH but we don't matter, and this path will be taken during domain create
on a system lacking IBRSB.
Second, update vmx_msr_{read,write}_intercept() to use the load/save lists
rather than vcpu_msrs, and update the comment to describe the new state
location.
Finally, adjust the entry/exit asm. Drop DO_SPEC_CTRL_ENTRY_FROM_HVM
entirely, and use a shorter code sequence to simply reload Xen's setting from
the top-of-stack block.
Because the guest values are loaded/saved atomically, we do not need to use
the shadowing logic to cope with late NMIs/etc, so we can omit
DO_SPEC_CTRL_EXIT_TO_GUEST entirely and VMRESUME/VMLAUNCH with Xen's value in
context. Furthermore, we can drop the SPEC_CTRL_ENTRY_FROM_PV too, as there's
no need to switch back to Xen's context on an early failure.
Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
CC: Jan Beulich <JBeulich@xxxxxxxx>
CC: Roger Pau Monné <roger.pau@xxxxxxxxxx>
CC: Wei Liu <wl@xxxxxxx>
CC: Jun Nakajima <jun.nakajima@xxxxxxxxx>
CC: Kevin Tian <kevin.tian@xxxxxxxxx>
Needs backporting as far as people can tolerate.
If the entry/exit logic were in C, I'd ASSERT() that shadow tracking is off,
but this is awkard to arrange in asm.
---
xen/arch/x86/hvm/vmx/entry.S | 19 ++++++++++-------
xen/arch/x86/hvm/vmx/vmx.c | 36 +++++++++++++++++++++++++++-----
xen/arch/x86/include/asm/msr.h | 10 ++++++++-
xen/arch/x86/include/asm/spec_ctrl_asm.h | 32 ++++------------------------
4 files changed, 56 insertions(+), 41 deletions(-)
diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S
index 30139ae58e9d..297ed8685126 100644
--- a/xen/arch/x86/hvm/vmx/entry.S
+++ b/xen/arch/x86/hvm/vmx/entry.S
@@ -35,7 +35,14 @@ ENTRY(vmx_asm_vmexit_handler)
/* SPEC_CTRL_ENTRY_FROM_VMX Req: b=curr %rsp=regs/cpuinfo, Clob:
acd */
ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_HVM
- ALTERNATIVE "", DO_SPEC_CTRL_ENTRY_FROM_HVM, X86_FEATURE_SC_MSR_HVM
+
+ .macro restore_spec_ctrl
+ mov $MSR_SPEC_CTRL, %ecx
+ movzbl CPUINFO_xen_spec_ctrl(%rsp), %eax
+ xor %edx, %edx
+ wrmsr
+ .endm
+ ALTERNATIVE "", restore_spec_ctrl, X86_FEATURE_SC_MSR_HVM
/* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
/* Hardware clears MSR_DEBUGCTL on VMExit. Reinstate it if debugging
Xen. */
@@ -83,7 +90,6 @@ UNLIKELY_END(realmode)
/* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
/* SPEC_CTRL_EXIT_TO_VMX Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob:
cd */
- ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_HVM
ALTERNATIVE "", __stringify(verw CPUINFO_verw_sel(%rsp)),
X86_FEATURE_SC_VERW_HVM
mov VCPU_hvm_guest_cr2(%rbx),%rax
@@ -119,12 +125,11 @@ UNLIKELY_END(realmode)
SAVE_ALL
/*
- * PV variant needed here as no guest code has executed (so
- * MSR_SPEC_CTRL can't have changed value), and NMIs/MCEs are liable
- * to hit (in which case the HVM variant might corrupt things).
+ * SPEC_CTRL_ENTRY notes
+ *
+ * If we end up here, no guest code has executed. We still have Xen's
+ * choice of MSR_SPEC_CTRL in context, and the RSB is safe.
*/
- SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo Clob: acd */
- /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
call vmx_vmentry_failure
jmp .Lvmx_process_softirqs
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 28ee6393f11e..d7feb5f9c455 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -592,6 +592,7 @@ void vmx_update_exception_bitmap(struct vcpu *v)
static void vmx_cpuid_policy_changed(struct vcpu *v)
{
const struct cpuid_policy *cp = v->domain->arch.cpuid;
+ int rc = 0;
if ( opt_hvm_fep ||
(v->domain->arch.cpuid->x86_vendor != boot_cpu_data.x86_vendor) )
@@ -601,17 +602,27 @@ static void vmx_cpuid_policy_changed(struct vcpu *v)
vmx_vmcs_enter(v);
vmx_update_exception_bitmap(v);
- vmx_vmcs_exit(v);
/*
* We can safely pass MSR_SPEC_CTRL through to the guest, even if STIBP
* isn't enumerated in hardware, as SPEC_CTRL_STIBP is ignored.
*/
if ( cp->feat.ibrsb )
+ {
vmx_clear_msr_intercept(v, MSR_SPEC_CTRL, VMX_MSR_RW);
+
+ rc = vmx_add_guest_msr(v, MSR_SPEC_CTRL, 0);
+ if ( rc )
+ goto err;
+ }
else
+ {
vmx_set_msr_intercept(v, MSR_SPEC_CTRL, VMX_MSR_RW);
+ /* Can only fail with -ESRCH, and we don't care. */
+ vmx_del_msr(v, MSR_SPEC_CTRL, VMX_MSR_GUEST);
+ }
+
/* MSR_PRED_CMD is safe to pass through if the guest knows about it. */
if ( cp->feat.ibrsb || cp->extd.ibpb )
vmx_clear_msr_intercept(v, MSR_PRED_CMD, VMX_MSR_RW);
@@ -623,6 +634,15 @@ static void vmx_cpuid_policy_changed(struct vcpu *v)
vmx_clear_msr_intercept(v, MSR_FLUSH_CMD, VMX_MSR_RW);
else
vmx_set_msr_intercept(v, MSR_FLUSH_CMD, VMX_MSR_RW);
+
+ err:
+ vmx_vmcs_exit(v);
+
+ if ( rc )
+ {
+ printk(XENLOG_G_ERR "MSR load/save list error: %d", rc);
+ domain_crash(v->domain);
+ }
}
int vmx_guest_x86_mode(struct vcpu *v)
@@ -3128,7 +3148,6 @@ static int is_last_branch_msr(u32 ecx)
static int vmx_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
{
struct vcpu *curr = current;
- const struct vcpu_msrs *msrs = curr->arch.msrs;
uint64_t tmp;
HVM_DBG_LOG(DBG_LEVEL_MSR, "ecx=%#x", msr);
@@ -3136,7 +3155,11 @@ static int vmx_msr_read_intercept(unsigned int msr,
uint64_t *msr_content)
switch ( msr )
{
case MSR_SPEC_CTRL: /* guest_rdmsr() has already performed #GP checks. */
- *msr_content = msrs->spec_ctrl.raw;
+ if ( vmx_read_guest_msr(curr, msr, msr_content) )
+ {
+ ASSERT_UNREACHABLE();
+ goto gp_fault;
+ }
break;
case MSR_IA32_SYSENTER_CS:
@@ -3336,7 +3359,6 @@ void vmx_vlapic_msr_changed(struct vcpu *v)
static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content)
{
struct vcpu *v = current;
- struct vcpu_msrs *msrs = v->arch.msrs;
const struct cpuid_policy *cp = v->domain->arch.cpuid;
HVM_DBG_LOG(DBG_LEVEL_MSR, "ecx=%#x, msr_value=%#"PRIx64, msr,
msr_content);
@@ -3346,7 +3368,11 @@ static int vmx_msr_write_intercept(unsigned int msr,
uint64_t msr_content)
uint64_t rsvd, tmp;
case MSR_SPEC_CTRL: /* guest_wrmsr() has already performed #GP checks. */
- msrs->spec_ctrl.raw = msr_content;
+ if ( vmx_write_guest_msr(v, msr, msr_content) )
+ {
+ ASSERT_UNREACHABLE();
+ goto gp_fault;
+ }
return X86EMUL_OKAY;
case MSR_IA32_SYSENTER_CS:
diff --git a/xen/arch/x86/include/asm/msr.h b/xen/arch/x86/include/asm/msr.h
index 0b2176a9bc53..5f851958992b 100644
--- a/xen/arch/x86/include/asm/msr.h
+++ b/xen/arch/x86/include/asm/msr.h
@@ -287,7 +287,15 @@ extern struct msr_policy raw_msr_policy,
/* Container object for per-vCPU MSRs */
struct vcpu_msrs
{
- /* 0x00000048 - MSR_SPEC_CTRL */
+ /*
+ * 0x00000048 - MSR_SPEC_CTRL
+ *
+ * For PV guests, this holds the guest kernel value. It is accessed on
+ * every entry/exit path.
+ *
+ * For VT-x guests, the guest value is held in the MSR guest load/save
+ * list.
+ */
struct {
uint32_t raw;
} spec_ctrl;
diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h
b/xen/arch/x86/include/asm/spec_ctrl_asm.h
index 18ecfcd70375..ddce8b33fd17 100644
--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h
+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h
@@ -42,9 +42,10 @@
* path, or late in the exit path after restoring the guest value. This
* will corrupt the guest value.
*
- * Factor 1 is dealt with by relying on NMIs/MCEs being blocked immediately
- * after VMEXIT. The VMEXIT-specific code reads MSR_SPEC_CTRL and updates
- * current before loading Xen's MSR_SPEC_CTRL setting.
+ * Factor 1 is dealt with:
+ * - On VMX by using MSR load/save lists to have vmentry/exit atomically
+ * load/save the guest value. Xen's value is loaded in regular code, and
+ * there is no need to use the shadow logic (below).
*
* Factor 2 is harder. We maintain a shadow_spec_ctrl value, and a use_shadow
* boolean in the per cpu spec_ctrl_flags. The synchronous use is:
@@ -126,31 +127,6 @@
#endif
.endm
-.macro DO_SPEC_CTRL_ENTRY_FROM_HVM
-/*
- * Requires %rbx=current, %rsp=regs/cpuinfo
- * Clobbers %rax, %rcx, %rdx
- *
- * The common case is that a guest has direct access to MSR_SPEC_CTRL, at
- * which point we need to save the guest value before setting IBRS for Xen.
- * Unilaterally saving the guest value is shorter and faster than checking.
- */
- mov $MSR_SPEC_CTRL, %ecx
- rdmsr
-
- /* Stash the value from hardware. */
- mov VCPU_arch_msrs(%rbx), %rdx
- mov %eax, VCPUMSR_spec_ctrl_raw(%rdx)
- xor %edx, %edx
-
- /* Clear SPEC_CTRL shadowing *before* loading Xen's value. */
- andb $~SCF_use_shadow, CPUINFO_spec_ctrl_flags(%rsp)
-
- /* Load Xen's intended value. */
- movzbl CPUINFO_xen_spec_ctrl(%rsp), %eax
- wrmsr
-.endm
-
.macro DO_SPEC_CTRL_ENTRY maybexen:req
/*
* Requires %rsp=regs (also cpuinfo if !maybexen)
--
2.11.0
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |