[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v2 for-4.7 5/5] x86/hvm: Fix invalidation for emulated invlpg instructions



On 09/05/16 19:27, Andrew Cooper wrote:
> hap_invlpg() is reachable from the instruction emulator, which means
> introspection and tests using hvm_fep can end up here.  As such, crashing the
> domain is not an appropriate action to take.
> 
> Fixing this involves rearranging the callgraph.
> 
> paging_invlpg() is now the central entry point.  It first checks for the
> non-canonical NOP case, and calls ino the paging subsystem.  If a real flush
> is needed, it will call the appropriate handler for the vcpu.  This allows the
> PV callsites of paging_invlpg() to be simplified.
> 
> The sole user of hvm_funcs.invlpg_intercept() is altered to use
> paging_invlpg() instead, allowing the .invlpg_intercept() hook to be removed.
> 
> For both VMX and SVM, the existing $VENDOR_invlpg_intercept() is split in
> half.  $VENDOR_invlpg_intercept() stays as the intercept handler only (which
> just calls paging_invlpg()), and new $VENDOR_invlpg() functions do the
> ASID/VPID management.  These later functions are made available in hvm_funcs
> for paging_invlpg() to use.
> 
> As a result, correct ASID/VPID management occurs for the hvmemul path, even if
> it did not originate from an real hardware intercept.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>

I haven't reviewed it in detail but it looks like a good idea; and based
on the other Reviewed-bys (and with or without Jan's suggested clean-up):

Acked-by: George Dunlap <george.dunlap@xxxxxxxxxx>

> ---
> CC: Jan Beulich <JBeulich@xxxxxxxx>
> CC: Paul Durrant <paul.durrant@xxxxxxxxxx>
> CC: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
> CC: Tim Deegan <tim@xxxxxxx>
> CC: Jun Nakajima <jun.nakajima@xxxxxxxxx>
> CC: Kevin Tian <kevin.tian@xxxxxxxxx>
> CC: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
> CC: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx>
> CC: Wei Liu <wei.liu2@xxxxxxxxxx>
> 
> v2:
>  * Split performance improvement for AMD into previous patch
>  * Add vcpu parameter to new invlpg() function, and avoid assuming 'current'
>  * Modify paging_invlpg() to be void, and issue the PV TLB flush as well
> ---
>  xen/arch/x86/hvm/emulate.c    |  4 ++--
>  xen/arch/x86/hvm/svm/svm.c    | 11 +++++++----
>  xen/arch/x86/hvm/vmx/vmx.c    | 14 +++++++++-----
>  xen/arch/x86/mm.c             | 24 ++++++++++++++++++------
>  xen/arch/x86/mm/hap/hap.c     | 23 ++++++++++-------------
>  xen/include/asm-x86/hvm/hvm.h |  2 +-
>  xen/include/asm-x86/paging.h  | 11 ++---------
>  7 files changed, 49 insertions(+), 40 deletions(-)
> 
> diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
> index e6316be..b9cac8e 100644
> --- a/xen/arch/x86/hvm/emulate.c
> +++ b/xen/arch/x86/hvm/emulate.c
> @@ -1623,8 +1623,8 @@ static int hvmemul_invlpg(
>          rc = X86EMUL_OKAY;
>      }
>  
> -    if ( rc == X86EMUL_OKAY && is_canonical_address(addr) )
> -        hvm_funcs.invlpg_intercept(addr);
> +    if ( rc == X86EMUL_OKAY )
> +        paging_invlpg(current, addr);
>  
>      return rc;
>  }
> diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
> index 081a5d8..679e615 100644
> --- a/xen/arch/x86/hvm/svm/svm.c
> +++ b/xen/arch/x86/hvm/svm/svm.c
> @@ -2222,10 +2222,13 @@ static void svm_invlpga_intercept(
>  
>  static void svm_invlpg_intercept(unsigned long vaddr)
>  {
> -    struct vcpu *curr = current;
>      HVMTRACE_LONG_2D(INVLPG, 0, TRC_PAR_LONG(vaddr));
> -    if ( paging_invlpg(curr, vaddr) )
> -        svm_asid_g_invlpg(curr, vaddr);
> +    paging_invlpg(current, vaddr);
> +}
> +
> +static void svm_invlpg(struct vcpu *v, unsigned long vaddr)
> +{
> +    svm_asid_g_invlpg(v, vaddr);
>  }
>  
>  static struct hvm_function_table __initdata svm_function_table = {
> @@ -2258,12 +2261,12 @@ static struct hvm_function_table __initdata 
> svm_function_table = {
>      .inject_trap          = svm_inject_trap,
>      .init_hypercall_page  = svm_init_hypercall_page,
>      .event_pending        = svm_event_pending,
> +    .invlpg               = svm_invlpg,
>      .cpuid_intercept      = svm_cpuid_intercept,
>      .wbinvd_intercept     = svm_wbinvd_intercept,
>      .fpu_dirty_intercept  = svm_fpu_dirty_intercept,
>      .msr_read_intercept   = svm_msr_read_intercept,
>      .msr_write_intercept  = svm_msr_write_intercept,
> -    .invlpg_intercept     = svm_invlpg_intercept,
>      .set_rdtsc_exiting    = svm_set_rdtsc_exiting,
>      .get_insn_bytes       = svm_get_insn_bytes,
>  
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index bc4410f..3acf1ab 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -81,7 +81,7 @@ static void vmx_wbinvd_intercept(void);
>  static void vmx_fpu_dirty_intercept(void);
>  static int vmx_msr_read_intercept(unsigned int msr, uint64_t *msr_content);
>  static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content);
> -static void vmx_invlpg_intercept(unsigned long vaddr);
> +static void vmx_invlpg(struct vcpu *v, unsigned long vaddr);
>  static int vmx_vmfunc_intercept(struct cpu_user_regs *regs);
>  
>  struct vmx_pi_blocking_vcpu {
> @@ -2137,6 +2137,7 @@ static struct hvm_function_table __initdata 
> vmx_function_table = {
>      .inject_trap          = vmx_inject_trap,
>      .init_hypercall_page  = vmx_init_hypercall_page,
>      .event_pending        = vmx_event_pending,
> +    .invlpg               = vmx_invlpg,
>      .cpu_up               = vmx_cpu_up,
>      .cpu_down             = vmx_cpu_down,
>      .cpuid_intercept      = vmx_cpuid_intercept,
> @@ -2144,7 +2145,6 @@ static struct hvm_function_table __initdata 
> vmx_function_table = {
>      .fpu_dirty_intercept  = vmx_fpu_dirty_intercept,
>      .msr_read_intercept   = vmx_msr_read_intercept,
>      .msr_write_intercept  = vmx_msr_write_intercept,
> -    .invlpg_intercept     = vmx_invlpg_intercept,
>      .vmfunc_intercept     = vmx_vmfunc_intercept,
>      .handle_cd            = vmx_handle_cd,
>      .set_info_guest       = vmx_set_info_guest,
> @@ -2432,10 +2432,14 @@ static void vmx_dr_access(unsigned long 
> exit_qualification,
>  
>  static void vmx_invlpg_intercept(unsigned long vaddr)
>  {
> -    struct vcpu *curr = current;
>      HVMTRACE_LONG_2D(INVLPG, /*invlpga=*/ 0, TRC_PAR_LONG(vaddr));
> -    if ( paging_invlpg(curr, vaddr) && cpu_has_vmx_vpid )
> -        vpid_sync_vcpu_gva(curr, vaddr);
> +    paging_invlpg(current, vaddr);
> +}
> +
> +static void vmx_invlpg(struct vcpu *v, unsigned long vaddr)
> +{
> +    if ( cpu_has_vmx_vpid )
> +        vpid_sync_vcpu_gva(v, vaddr);
>  }
>  
>  static int vmx_vmfunc_intercept(struct cpu_user_regs *regs)
> diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
> index 2bb920b..03a4d5b 100644
> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -3386,10 +3386,8 @@ long do_mmuext_op(
>          case MMUEXT_INVLPG_LOCAL:
>              if ( unlikely(d != pg_owner) )
>                  rc = -EPERM;
> -            else if ( !paging_mode_enabled(d)
> -                      ? __addr_ok(op.arg1.linear_addr)
> -                      : paging_invlpg(curr, op.arg1.linear_addr) )
> -                flush_tlb_one_local(op.arg1.linear_addr);
> +            else
> +                paging_invlpg(curr, op.arg1.linear_addr);
>              break;
>  
>          case MMUEXT_TLB_FLUSH_MULTI:
> @@ -4510,8 +4508,7 @@ static int __do_update_va_mapping(
>          switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
>          {
>          case UVMF_LOCAL:
> -            if ( !paging_mode_enabled(d) || paging_invlpg(v, va) )
> -                flush_tlb_one_local(va);
> +            paging_invlpg(v, va);
>              break;
>          case UVMF_ALL:
>              flush_tlb_one_mask(d->domain_dirty_cpumask, va);
> @@ -6478,6 +6475,21 @@ const unsigned long *__init 
> get_platform_badpages(unsigned int *array_size)
>      return bad_pages;
>  }
>  
> +void paging_invlpg(struct vcpu *v, unsigned long va)
> +{
> +    if ( !is_canonical_address(va) )
> +        return;
> +
> +    if ( paging_mode_enabled(v->domain) &&
> +         !paging_get_hostmode(v)->invlpg(v, va) )
> +        return;
> +
> +    if ( is_pv_vcpu(v) )
> +        flush_tlb_one_local(va);
> +    else
> +        hvm_funcs.invlpg(v, va);
> +}
> +
>  /*
>   * Local variables:
>   * mode: C
> diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
> index 4ab99bb..9c2cd49 100644
> --- a/xen/arch/x86/mm/hap/hap.c
> +++ b/xen/arch/x86/mm/hap/hap.c
> @@ -680,23 +680,20 @@ static int hap_page_fault(struct vcpu *v, unsigned long 
> va,
>  
>  /*
>   * HAP guests can handle invlpg without needing any action from Xen, so
> - * should not be intercepting it.
> + * should not be intercepting it.  However, we need to correctly handle
> + * getting here from instruction emulation.
>   */
>  static bool_t hap_invlpg(struct vcpu *v, unsigned long va)
>  {
> -    if (nestedhvm_enabled(v->domain)) {
> -        /* Emulate INVLPGA:
> -         * Must perform the flush right now or an other vcpu may
> -         * use it when we use the next VMRUN emulation, otherwise.
> -         */
> -        if ( vcpu_nestedhvm(v).nv_p2m )
> -            p2m_flush(v, vcpu_nestedhvm(v).nv_p2m);
> -        return 1;
> -    }
> +    /*
> +     * Emulate INVLPGA:
> +     * Must perform the flush right now or an other vcpu may
> +     * use it when we use the next VMRUN emulation, otherwise.
> +     */
> +    if ( nestedhvm_enabled(v->domain) && vcpu_nestedhvm(v).nv_p2m )
> +        p2m_flush(v, vcpu_nestedhvm(v).nv_p2m);
>  
> -    HAP_ERROR("Intercepted a guest INVLPG (%pv) with HAP enabled\n", v);
> -    domain_crash(v->domain);
> -    return 0;
> +    return 1;
>  }
>  
>  static void hap_update_cr3(struct vcpu *v, int do_locking)
> diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
> index add6ee8..ddbcbe8 100644
> --- a/xen/include/asm-x86/hvm/hvm.h
> +++ b/xen/include/asm-x86/hvm/hvm.h
> @@ -154,6 +154,7 @@ struct hvm_function_table {
>      void (*init_hypercall_page)(struct domain *d, void *hypercall_page);
>  
>      int  (*event_pending)(struct vcpu *v);
> +    void (*invlpg)(struct vcpu *v, unsigned long vaddr);
>  
>      int  (*cpu_up_prepare)(unsigned int cpu);
>      void (*cpu_dead)(unsigned int cpu);
> @@ -172,7 +173,6 @@ struct hvm_function_table {
>      void (*fpu_dirty_intercept)(void);
>      int (*msr_read_intercept)(unsigned int msr, uint64_t *msr_content);
>      int (*msr_write_intercept)(unsigned int msr, uint64_t msr_content);
> -    void (*invlpg_intercept)(unsigned long vaddr);
>      int (*vmfunc_intercept)(struct cpu_user_regs *regs);
>      void (*handle_cd)(struct vcpu *v, unsigned long value);
>      void (*set_info_guest)(struct vcpu *v);
> diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h
> index ab131cc..a1401ab 100644
> --- a/xen/include/asm-x86/paging.h
> +++ b/xen/include/asm-x86/paging.h
> @@ -237,15 +237,8 @@ paging_fault(unsigned long va, struct cpu_user_regs 
> *regs)
>      return paging_get_hostmode(v)->page_fault(v, va, regs);
>  }
>  
> -/* Handle invlpg requests on vcpus.
> - * Returns 1 if the invlpg instruction should be issued on the hardware,
> - * or 0 if it's safe not to do so. */
> -static inline bool_t paging_invlpg(struct vcpu *v, unsigned long va)
> -{
> -    return (paging_mode_external(v->domain) ? is_canonical_address(va)
> -                                            : __addr_ok(va)) &&
> -           paging_get_hostmode(v)->invlpg(v, va);
> -}
> +/* Handle invlpg requests on vcpus. */
> +void paging_invlpg(struct vcpu *v, unsigned long va);
>  
>  /* Translate a guest virtual address to the frame number that the
>   * *guest* pagetables would map it to.  Returns INVALID_GFN if the guest
> 


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.