Xen project Mailing List

Re: [Xen-devel] [PATCH v2 for-4.7 5/5] x86/hvm: Fix invalidation for emulated invlpg instructions

On 09/05/16 19:27, Andrew Cooper wrote: > hap_invlpg() is reachable from the instruction emulator, which means > introspection and tests using hvm_fep can end up here. As such, crashing the > domain is not an appropriate action to take. > > Fixing this involves rearranging the callgraph. > > paging_invlpg() is now the central entry point. It first checks for the > non-canonical NOP case, and calls ino the paging subsystem. If a real flush > is needed, it will call the appropriate handler for the vcpu. This allows the > PV callsites of paging_invlpg() to be simplified. > > The sole user of hvm_funcs.invlpg_intercept() is altered to use > paging_invlpg() instead, allowing the .invlpg_intercept() hook to be removed. > > For both VMX and SVM, the existing $VENDOR_invlpg_intercept() is split in > half. $VENDOR_invlpg_intercept() stays as the intercept handler only (which > just calls paging_invlpg()), and new $VENDOR_invlpg() functions do the > ASID/VPID management. These later functions are made available in hvm_funcs > for paging_invlpg() to use. > > As a result, correct ASID/VPID management occurs for the hvmemul path, even if > it did not originate from an real hardware intercept. > > Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> I haven't reviewed it in detail but it looks like a good idea; and based on the other Reviewed-bys (and with or without Jan's suggested clean-up): Acked-by: George Dunlap <george.dunlap@xxxxxxxxxx> > --- > CC: Jan Beulich <JBeulich@xxxxxxxx> > CC: Paul Durrant <paul.durrant@xxxxxxxxxx> > CC: George Dunlap <george.dunlap@xxxxxxxxxxxxx> > CC: Tim Deegan <tim@xxxxxxx> > CC: Jun Nakajima <jun.nakajima@xxxxxxxxx> > CC: Kevin Tian <kevin.tian@xxxxxxxxx> > CC: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx> > CC: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx> > CC: Wei Liu <wei.liu2@xxxxxxxxxx> > > v2: > * Split performance improvement for AMD into previous patch > * Add vcpu parameter to new invlpg() function, and avoid assuming 'current' > * Modify paging_invlpg() to be void, and issue the PV TLB flush as well > --- > xen/arch/x86/hvm/emulate.c | 4 ++-- > xen/arch/x86/hvm/svm/svm.c | 11 +++++++---- > xen/arch/x86/hvm/vmx/vmx.c | 14 +++++++++----- > xen/arch/x86/mm.c | 24 ++++++++++++++++++------ > xen/arch/x86/mm/hap/hap.c | 23 ++++++++++------------- > xen/include/asm-x86/hvm/hvm.h | 2 +- > xen/include/asm-x86/paging.h | 11 ++--------- > 7 files changed, 49 insertions(+), 40 deletions(-) > > diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c > index e6316be..b9cac8e 100644 > --- a/xen/arch/x86/hvm/emulate.c > +++ b/xen/arch/x86/hvm/emulate.c > @@ -1623,8 +1623,8 @@ static int hvmemul_invlpg( > rc = X86EMUL_OKAY; > } > > - if ( rc == X86EMUL_OKAY && is_canonical_address(addr) ) > - hvm_funcs.invlpg_intercept(addr); > + if ( rc == X86EMUL_OKAY ) > + paging_invlpg(current, addr); > > return rc; > } > diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c > index 081a5d8..679e615 100644 > --- a/xen/arch/x86/hvm/svm/svm.c > +++ b/xen/arch/x86/hvm/svm/svm.c > @@ -2222,10 +2222,13 @@ static void svm_invlpga_intercept( > > static void svm_invlpg_intercept(unsigned long vaddr) > { > - struct vcpu *curr = current; > HVMTRACE_LONG_2D(INVLPG, 0, TRC_PAR_LONG(vaddr)); > - if ( paging_invlpg(curr, vaddr) ) > - svm_asid_g_invlpg(curr, vaddr); > + paging_invlpg(current, vaddr); > +} > + > +static void svm_invlpg(struct vcpu *v, unsigned long vaddr) > +{ > + svm_asid_g_invlpg(v, vaddr); > } > > static struct hvm_function_table __initdata svm_function_table = { > @@ -2258,12 +2261,12 @@ static struct hvm_function_table __initdata > svm_function_table = { > .inject_trap = svm_inject_trap, > .init_hypercall_page = svm_init_hypercall_page, > .event_pending = svm_event_pending, > + .invlpg = svm_invlpg, > .cpuid_intercept = svm_cpuid_intercept, > .wbinvd_intercept = svm_wbinvd_intercept, > .fpu_dirty_intercept = svm_fpu_dirty_intercept, > .msr_read_intercept = svm_msr_read_intercept, > .msr_write_intercept = svm_msr_write_intercept, > - .invlpg_intercept = svm_invlpg_intercept, > .set_rdtsc_exiting = svm_set_rdtsc_exiting, > .get_insn_bytes = svm_get_insn_bytes, > > diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c > index bc4410f..3acf1ab 100644 > --- a/xen/arch/x86/hvm/vmx/vmx.c > +++ b/xen/arch/x86/hvm/vmx/vmx.c > @@ -81,7 +81,7 @@ static void vmx_wbinvd_intercept(void); > static void vmx_fpu_dirty_intercept(void); > static int vmx_msr_read_intercept(unsigned int msr, uint64_t *msr_content); > static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content); > -static void vmx_invlpg_intercept(unsigned long vaddr); > +static void vmx_invlpg(struct vcpu *v, unsigned long vaddr); > static int vmx_vmfunc_intercept(struct cpu_user_regs *regs); > > struct vmx_pi_blocking_vcpu { > @@ -2137,6 +2137,7 @@ static struct hvm_function_table __initdata > vmx_function_table = { > .inject_trap = vmx_inject_trap, > .init_hypercall_page = vmx_init_hypercall_page, > .event_pending = vmx_event_pending, > + .invlpg = vmx_invlpg, > .cpu_up = vmx_cpu_up, > .cpu_down = vmx_cpu_down, > .cpuid_intercept = vmx_cpuid_intercept, > @@ -2144,7 +2145,6 @@ static struct hvm_function_table __initdata > vmx_function_table = { > .fpu_dirty_intercept = vmx_fpu_dirty_intercept, > .msr_read_intercept = vmx_msr_read_intercept, > .msr_write_intercept = vmx_msr_write_intercept, > - .invlpg_intercept = vmx_invlpg_intercept, > .vmfunc_intercept = vmx_vmfunc_intercept, > .handle_cd = vmx_handle_cd, > .set_info_guest = vmx_set_info_guest, > @@ -2432,10 +2432,14 @@ static void vmx_dr_access(unsigned long > exit_qualification, > > static void vmx_invlpg_intercept(unsigned long vaddr) > { > - struct vcpu *curr = current; > HVMTRACE_LONG_2D(INVLPG, /*invlpga=*/ 0, TRC_PAR_LONG(vaddr)); > - if ( paging_invlpg(curr, vaddr) && cpu_has_vmx_vpid ) > - vpid_sync_vcpu_gva(curr, vaddr); > + paging_invlpg(current, vaddr); > +} > + > +static void vmx_invlpg(struct vcpu *v, unsigned long vaddr) > +{ > + if ( cpu_has_vmx_vpid ) > + vpid_sync_vcpu_gva(v, vaddr); > } > > static int vmx_vmfunc_intercept(struct cpu_user_regs *regs) > diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c > index 2bb920b..03a4d5b 100644 > --- a/xen/arch/x86/mm.c > +++ b/xen/arch/x86/mm.c > @@ -3386,10 +3386,8 @@ long do_mmuext_op( > case MMUEXT_INVLPG_LOCAL: > if ( unlikely(d != pg_owner) ) > rc = -EPERM; > - else if ( !paging_mode_enabled(d) > - ? __addr_ok(op.arg1.linear_addr) > - : paging_invlpg(curr, op.arg1.linear_addr) ) > - flush_tlb_one_local(op.arg1.linear_addr); > + else > + paging_invlpg(curr, op.arg1.linear_addr); > break; > > case MMUEXT_TLB_FLUSH_MULTI: > @@ -4510,8 +4508,7 @@ static int __do_update_va_mapping( > switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) ) > { > case UVMF_LOCAL: > - if ( !paging_mode_enabled(d) || paging_invlpg(v, va) ) > - flush_tlb_one_local(va); > + paging_invlpg(v, va); > break; > case UVMF_ALL: > flush_tlb_one_mask(d->domain_dirty_cpumask, va); > @@ -6478,6 +6475,21 @@ const unsigned long *__init > get_platform_badpages(unsigned int *array_size) > return bad_pages; > } > > +void paging_invlpg(struct vcpu *v, unsigned long va) > +{ > + if ( !is_canonical_address(va) ) > + return; > + > + if ( paging_mode_enabled(v->domain) && > + !paging_get_hostmode(v)->invlpg(v, va) ) > + return; > + > + if ( is_pv_vcpu(v) ) > + flush_tlb_one_local(va); > + else > + hvm_funcs.invlpg(v, va); > +} > + > /* > * Local variables: > * mode: C > diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c > index 4ab99bb..9c2cd49 100644 > --- a/xen/arch/x86/mm/hap/hap.c > +++ b/xen/arch/x86/mm/hap/hap.c > @@ -680,23 +680,20 @@ static int hap_page_fault(struct vcpu *v, unsigned long > va, > > /* > * HAP guests can handle invlpg without needing any action from Xen, so > - * should not be intercepting it. > + * should not be intercepting it. However, we need to correctly handle > + * getting here from instruction emulation. > */ > static bool_t hap_invlpg(struct vcpu *v, unsigned long va) > { > - if (nestedhvm_enabled(v->domain)) { > - /* Emulate INVLPGA: > - * Must perform the flush right now or an other vcpu may > - * use it when we use the next VMRUN emulation, otherwise. > - */ > - if ( vcpu_nestedhvm(v).nv_p2m ) > - p2m_flush(v, vcpu_nestedhvm(v).nv_p2m); > - return 1; > - } > + /* > + * Emulate INVLPGA: > + * Must perform the flush right now or an other vcpu may > + * use it when we use the next VMRUN emulation, otherwise. > + */ > + if ( nestedhvm_enabled(v->domain) && vcpu_nestedhvm(v).nv_p2m ) > + p2m_flush(v, vcpu_nestedhvm(v).nv_p2m); > > - HAP_ERROR("Intercepted a guest INVLPG (%pv) with HAP enabled\n", v); > - domain_crash(v->domain); > - return 0; > + return 1; > } > > static void hap_update_cr3(struct vcpu *v, int do_locking) > diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h > index add6ee8..ddbcbe8 100644 > --- a/xen/include/asm-x86/hvm/hvm.h > +++ b/xen/include/asm-x86/hvm/hvm.h > @@ -154,6 +154,7 @@ struct hvm_function_table { > void (*init_hypercall_page)(struct domain *d, void *hypercall_page); > > int (*event_pending)(struct vcpu *v); > + void (*invlpg)(struct vcpu *v, unsigned long vaddr); > > int (*cpu_up_prepare)(unsigned int cpu); > void (*cpu_dead)(unsigned int cpu); > @@ -172,7 +173,6 @@ struct hvm_function_table { > void (*fpu_dirty_intercept)(void); > int (*msr_read_intercept)(unsigned int msr, uint64_t *msr_content); > int (*msr_write_intercept)(unsigned int msr, uint64_t msr_content); > - void (*invlpg_intercept)(unsigned long vaddr); > int (*vmfunc_intercept)(struct cpu_user_regs *regs); > void (*handle_cd)(struct vcpu *v, unsigned long value); > void (*set_info_guest)(struct vcpu *v); > diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h > index ab131cc..a1401ab 100644 > --- a/xen/include/asm-x86/paging.h > +++ b/xen/include/asm-x86/paging.h > @@ -237,15 +237,8 @@ paging_fault(unsigned long va, struct cpu_user_regs > *regs) > return paging_get_hostmode(v)->page_fault(v, va, regs); > } > > -/* Handle invlpg requests on vcpus. > - * Returns 1 if the invlpg instruction should be issued on the hardware, > - * or 0 if it's safe not to do so. */ > -static inline bool_t paging_invlpg(struct vcpu *v, unsigned long va) > -{ > - return (paging_mode_external(v->domain) ? is_canonical_address(va) > - : __addr_ok(va)) && > - paging_get_hostmode(v)->invlpg(v, va); > -} > +/* Handle invlpg requests on vcpus. */ > +void paging_invlpg(struct vcpu *v, unsigned long va); > > /* Translate a guest virtual address to the frame number that the > * *guest* pagetables would map it to. Returns INVALID_GFN if the guest > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.