[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCHv2] x86/xen: avoid m2p lookup when setting early page table entries



On 06/22/2016 07:19 AM, David Vrabel wrote:
> On 22/06/16 11:54, David Vrabel wrote:
>> On 21/06/16 20:31, Boris Ostrovsky wrote:
>>> On 06/21/2016 12:09 PM, David Vrabel wrote:
>>>> When page tables entries are set using xen_set_pte_init() during early
>>>> boot there is no page fault handler that could handle a fault when
>>>> performing an M2P lookup.
>>>>
>>>> In 64 guest (usually dom0) early_ioremap() would fault in
>>>> xen_set_pte_init() because an M2P lookup faults because the MFN is in
>>>> MMIO space and not mapped in the M2P.  This lookup is done to see if
>>>> the PFN in in the range used for the initial page table pages, so that
>>>> the PTE may be set as read-only.
>>>>
>>>> The M2P lookup can be avoided by moving the check (and clear of RW)
>>>> earlier when the PFN is still available.
>> [...]
>>>> --- a/arch/x86/xen/mmu.c
>>>> +++ b/arch/x86/xen/mmu.c
>>>> @@ -1562,7 +1562,7 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t 
>>>> pte)
>>>>    return pte;
>>>>  }
>>>>  #else /* CONFIG_X86_64 */
>>>> -static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
>>>> +static pteval_t __init mask_rw_pte(pteval_t pte)
>>>>  {
>>>>    unsigned long pfn;
>>>>  
>>>> @@ -1577,10 +1577,10 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t 
>>>> pte)
>>>>     * page tables for mapping the p2m list, too, and page tables MUST be
>>>>     * mapped read-only.
>>>>     */
>>>> -  pfn = pte_pfn(pte);
>>>> +  pfn = (pte & PTE_PFN_MASK) >> PAGE_SHIFT;
>>> Is it obvious that we are holding valid PFN at this point? It wasn't
>>> immediately obvious to me so I wonder whether a comment stating this
>>> would be useful here (yes, you mention it in the commit messages).
>> I don't understand what you mean by a "valid PFN"?
>>
>> This is only called from xen_make_pte_init() which is for converting
>> ptevals containing PFNs to MFNs.  Did that answer your question?

Yes.

>>
>> Would it be clearer if I just inlined the two functions like so:

Yes, I think it's much better this way. Otherwise just by looking at
mask_rw_pte() it may not be clear that we can get PFN directly from the
pte (which is what I meant when I said "valid PFN").

Not to mention that now we won't have two versions of mask_rw_pte().

-boris


> It would help if I included the right diff.
>
> 8<---------------------
> x86/xen: avoid m2p lookup when setting early page table entries
>
> When page tables entries are set using xen_set_pte_init() during early
> boot there is no page fault handler that could handle a fault when
> performing an M2P lookup.
>
> In 64 guest (usually dom0) early_ioremap() would fault in
> xen_set_pte_init() because an M2P lookup faults because the MFN is in
> MMIO space and not mapped in the M2P.  This lookup is done to see if
> the PFN in in the range used for the initial page table pages, so that
> the PTE may be set as read-only.
>
> The M2P lookup can be avoided by moving the check (and clear of RW)
> earlier when the PFN is still available.
>
> Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>
> Tested-by: Keven Moraga <kmoragas@xxxxxxxxxx>
> ---
> v3:
> - fold mask_rw_pte()/mask_rw_pteval() into their callers.
>
> v2:
> - Remove __init annotation from xen_make_pte_init() since
>   PV_CALLEE_SAVE_REGS_THUNK always puts the thunk in .text.
>
> - mask_rw_pte() -> mask_rw_pteval() for x86-64.
> ---
>  arch/x86/xen/mmu.c | 76
> +++++++++++++++++++++++++-----------------------------
>  1 file changed, 35 insertions(+), 41 deletions(-)
>
> diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
> index 478a2de..64d8f0b 100644
> --- a/arch/x86/xen/mmu.c
> +++ b/arch/x86/xen/mmu.c
> @@ -1551,41 +1551,6 @@ static void xen_pgd_free(struct mm_struct *mm,
> pgd_t *pgd)
>  #endif
>  }
>
> -#ifdef CONFIG_X86_32
> -static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
> -{
> -     /* If there's an existing pte, then don't allow _PAGE_RW to be set */
> -     if (pte_val_ma(*ptep) & _PAGE_PRESENT)
> -             pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
> -                            pte_val_ma(pte));
> -
> -     return pte;
> -}
> -#else /* CONFIG_X86_64 */
> -static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
> -{
> -     unsigned long pfn;
> -
> -     if (xen_feature(XENFEAT_writable_page_tables) ||
> -         xen_feature(XENFEAT_auto_translated_physmap) ||
> -         xen_start_info->mfn_list >= __START_KERNEL_map)
> -             return pte;
> -
> -     /*
> -      * Pages belonging to the initial p2m list mapped outside the default
> -      * address range must be mapped read-only. This region contains the
> -      * page tables for mapping the p2m list, too, and page tables MUST be
> -      * mapped read-only.
> -      */
> -     pfn = pte_pfn(pte);
> -     if (pfn >= xen_start_info->first_p2m_pfn &&
> -         pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames)
> -             pte = __pte_ma(pte_val_ma(pte) & ~_PAGE_RW);
> -
> -     return pte;
> -}
> -#endif /* CONFIG_X86_64 */
> -
>  /*
>   * Init-time set_pte while constructing initial pagetables, which
>   * doesn't allow RO page table pages to be remapped RW.
> @@ -1600,13 +1565,41 @@ static pte_t __init mask_rw_pte(pte_t *ptep,
> pte_t pte)
>   * so always write the PTE directly and rely on Xen trapping and
>   * emulating any updates as necessary.
>   */
> -static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
> +__visible pte_t xen_make_pte_init(pteval_t pte)
>  {
> -     if (pte_mfn(pte) != INVALID_P2M_ENTRY)
> -             pte = mask_rw_pte(ptep, pte);
> -     else
> -             pte = __pte_ma(0);
> +#ifdef CONFIG_X86_64
> +     unsigned long pfn;
> +
> +     /*
> +      * Pages belonging to the initial p2m list mapped outside the default
> +      * address range must be mapped read-only. This region contains the
> +      * page tables for mapping the p2m list, too, and page tables MUST be
> +      * mapped read-only.
> +      */
> +     pfn = (pte & PTE_PFN_MASK) >> PAGE_SHIFT;
> +     if (xen_start_info->mfn_list < __START_KERNEL_map &&
> +         pfn >= xen_start_info->first_p2m_pfn &&
> +         pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames)
> +             pte &= ~_PAGE_RW;
> +#endif
> +     pte = pte_pfn_to_mfn(pte);
>
> +     if ((pte & PTE_PFN_MASK) >> PAGE_SHIFT == INVALID_P2M_ENTRY)
> +             pte = 0;
> +
> +     return native_make_pte(pte);
> +}
> +PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_init);
> +
> +static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
> +{
> +#ifdef CONFIG_X86_32
> +     /* If there's an existing pte, then don't allow _PAGE_RW to be set */
> +     if (pte_mfn(pte) != INVALID_P2M_ENTRY
> +         && pte_val_ma(*ptep) & _PAGE_PRESENT)
> +             pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
> +                            pte_val_ma(pte));
> +#endif
>       native_set_pte(ptep, pte);
>  }
>
> @@ -2407,6 +2400,7 @@ static void __init xen_post_allocator_init(void)
>       pv_mmu_ops.alloc_pud = xen_alloc_pud;
>       pv_mmu_ops.release_pud = xen_release_pud;
>  #endif
> +     pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte);
>
>  #ifdef CONFIG_X86_64
>       pv_mmu_ops.write_cr3 = &xen_write_cr3;
> @@ -2455,7 +2449,7 @@ static const struct pv_mmu_ops xen_mmu_ops
> __initconst = {
>       .pte_val = PV_CALLEE_SAVE(xen_pte_val),
>       .pgd_val = PV_CALLEE_SAVE(xen_pgd_val),
>
> -     .make_pte = PV_CALLEE_SAVE(xen_make_pte),
> +     .make_pte = PV_CALLEE_SAVE(xen_make_pte_init),
>       .make_pgd = PV_CALLEE_SAVE(xen_make_pgd),
>
>  #ifdef CONFIG_X86_PAE



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.