[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCHv3] x86/xen: avoid m2p lookup when setting early page table entries



On 23/06/16 11:51, David Vrabel wrote:
> When page tables entries are set using xen_set_pte_init() during early
> boot there is no page fault handler that could handle a fault when
> performing an M2P lookup.
> 
> In 64 bit guests (usually dom0) early_ioremap() would fault in
> xen_set_pte_init() because an M2P lookup faults because the MFN is in
> MMIO space and not mapped in the M2P.  This lookup is done to see if
> the PFN in in the range used for the initial page table pages, so that
> the PTE may be set as read-only.
> 
> The M2P lookup can be avoided by moving the check (and clear of RW)
> earlier when the PFN is still available.
> 
> Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>
> ---
> Cc: Kevin Moraga <kmoragas@xxxxxxxxxx>
> 
> v3:
> - fold mask_rw_pte()/mask_rw_pteval() into their callers.
> 
> v2:
> - Remove __init annotation from xen_make_pte_init() since
>   PV_CALLEE_SAVE_REGS_THUNK always puts the thunk in .text.
> 
> - mask_rw_pte() -> mask_rw_pteval() for x86-64.
> ---
>  arch/x86/xen/mmu.c | 76 
> +++++++++++++++++++++++++-----------------------------
>  1 file changed, 35 insertions(+), 41 deletions(-)
> 
> diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
> index 478a2de..64d8f0b 100644
> --- a/arch/x86/xen/mmu.c
> +++ b/arch/x86/xen/mmu.c
> @@ -1551,41 +1551,6 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t 
> *pgd)
>  #endif
>  }
>  
> -#ifdef CONFIG_X86_32
> -static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
> -{
> -     /* If there's an existing pte, then don't allow _PAGE_RW to be set */
> -     if (pte_val_ma(*ptep) & _PAGE_PRESENT)
> -             pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
> -                            pte_val_ma(pte));
> -
> -     return pte;
> -}
> -#else /* CONFIG_X86_64 */
> -static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
> -{
> -     unsigned long pfn;
> -
> -     if (xen_feature(XENFEAT_writable_page_tables) ||
> -         xen_feature(XENFEAT_auto_translated_physmap) ||
> -         xen_start_info->mfn_list >= __START_KERNEL_map)
> -             return pte;
> -
> -     /*
> -      * Pages belonging to the initial p2m list mapped outside the default
> -      * address range must be mapped read-only. This region contains the
> -      * page tables for mapping the p2m list, too, and page tables MUST be
> -      * mapped read-only.
> -      */
> -     pfn = pte_pfn(pte);
> -     if (pfn >= xen_start_info->first_p2m_pfn &&
> -         pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames)
> -             pte = __pte_ma(pte_val_ma(pte) & ~_PAGE_RW);
> -
> -     return pte;
> -}
> -#endif /* CONFIG_X86_64 */
> -
>  /*
>   * Init-time set_pte while constructing initial pagetables, which
>   * doesn't allow RO page table pages to be remapped RW.
> @@ -1600,13 +1565,41 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t 
> pte)
>   * so always write the PTE directly and rely on Xen trapping and
>   * emulating any updates as necessary.
>   */
> -static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
> +__visible pte_t xen_make_pte_init(pteval_t pte)
>  {
> -     if (pte_mfn(pte) != INVALID_P2M_ENTRY)
> -             pte = mask_rw_pte(ptep, pte);
> -     else
> -             pte = __pte_ma(0);
> +#ifdef CONFIG_X86_64
> +     unsigned long pfn;
> +
> +     /*
> +      * Pages belonging to the initial p2m list mapped outside the default
> +      * address range must be mapped read-only. This region contains the
> +      * page tables for mapping the p2m list, too, and page tables MUST be
> +      * mapped read-only.
> +      */
> +     pfn = (pte & PTE_PFN_MASK) >> PAGE_SHIFT;
> +     if (xen_start_info->mfn_list < __START_KERNEL_map &&
> +         pfn >= xen_start_info->first_p2m_pfn &&
> +         pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames)
> +             pte &= ~_PAGE_RW;
> +#endif
> +     pte = pte_pfn_to_mfn(pte);
>  
> +     if ((pte & PTE_PFN_MASK) >> PAGE_SHIFT == INVALID_P2M_ENTRY)

How can this ever be true? I know this is just the open coded
variant form the original xen_set_pte_init(). Either the if isn't
needed at all or it should be corrected.

> +             pte = 0;
> +
> +     return native_make_pte(pte);
> +}
> +PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_init);
> +
> +static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
> +{
> +#ifdef CONFIG_X86_32
> +     /* If there's an existing pte, then don't allow _PAGE_RW to be set */
> +     if (pte_mfn(pte) != INVALID_P2M_ENTRY
> +         && pte_val_ma(*ptep) & _PAGE_PRESENT)

Even more obvious: do we really create ptes with INVALID_P2M_ENTRY and
_PAGE_PRESENT? I think the first part of the if can be dropped again.


Juergen

> +             pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
> +                            pte_val_ma(pte));
> +#endif
>       native_set_pte(ptep, pte);
>  }
>  
> @@ -2407,6 +2400,7 @@ static void __init xen_post_allocator_init(void)
>       pv_mmu_ops.alloc_pud = xen_alloc_pud;
>       pv_mmu_ops.release_pud = xen_release_pud;
>  #endif
> +     pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte);
>  
>  #ifdef CONFIG_X86_64
>       pv_mmu_ops.write_cr3 = &xen_write_cr3;
> @@ -2455,7 +2449,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst 
> = {
>       .pte_val = PV_CALLEE_SAVE(xen_pte_val),
>       .pgd_val = PV_CALLEE_SAVE(xen_pgd_val),
>  
> -     .make_pte = PV_CALLEE_SAVE(xen_make_pte),
> +     .make_pte = PV_CALLEE_SAVE(xen_make_pte_init),
>       .make_pgd = PV_CALLEE_SAVE(xen_make_pgd),
>  
>  #ifdef CONFIG_X86_PAE
> 


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.