[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] Re: [PATCH] x86: hold mm->page_table_lock while doing vmalloc_sync



 Ping?  Have you had any thoughts about possible x86-64 problems with this?

Thanks,
    J

On 10/14/2010 01:56 PM, Jeremy Fitzhardinge wrote:
>
> Take mm->page_table_lock while syncing the vmalloc region.  This prevents
> a race with the Xen pagetable pin/unpin code, which expects that the
> page_table_lock is already held.  If this race occurs, then Xen can see
> an inconsistent page type (a page can either be read/write or a pagetable
> page, and pin/unpin converts it between them), which will cause either
> the pin or the set_p[gm]d to fail; either will crash the kernel.
>
> vmalloc_sync_all() should be called rarely, so this extra use of
> page_table_lock should not interfere with its normal users.
>
> The mm pointer is stashed in the pgd page's index field, as that won't
> be otherwise used for pgd pages.
>
> Bug reported by Ian Campbell <ian.cambell@xxxxxxxxxxxxx>
> Derived from a patch by Jan Beulich <jbeulich@xxxxxxxxxx>
>
> Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@xxxxxxxxxx>
>
> diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
> index a34c785..422b363 100644
> --- a/arch/x86/include/asm/pgtable.h
> +++ b/arch/x86/include/asm/pgtable.h
> @@ -28,6 +28,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / 
> sizeof(unsigned long)];
>  extern spinlock_t pgd_lock;
>  extern struct list_head pgd_list;
>  
> +extern struct mm_struct *pgd_page_get_mm(struct page *page);
> +
>  #ifdef CONFIG_PARAVIRT
>  #include <asm/paravirt.h>
>  #else  /* !CONFIG_PARAVIRT */
> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
> index 4c4508e..b7f9ae1 100644
> --- a/arch/x86/mm/fault.c
> +++ b/arch/x86/mm/fault.c
> @@ -229,7 +229,16 @@ void vmalloc_sync_all(void)
>  
>               spin_lock_irqsave(&pgd_lock, flags);
>               list_for_each_entry(page, &pgd_list, lru) {
> -                     if (!vmalloc_sync_one(page_address(page), address))
> +                     spinlock_t *pgt_lock;
> +                     int ret;
> +
> +                     pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
> +
> +                     spin_lock(pgt_lock);
> +                     ret = vmalloc_sync_one(page_address(page), address);
> +                     spin_unlock(pgt_lock);
> +
> +                     if (!ret)
>                               break;
>               }
>               spin_unlock_irqrestore(&pgd_lock, flags);
> @@ -341,11 +350,19 @@ void vmalloc_sync_all(void)
>               spin_lock_irqsave(&pgd_lock, flags);
>               list_for_each_entry(page, &pgd_list, lru) {
>                       pgd_t *pgd;
> +                     spinlock_t *pgt_lock;
> +
>                       pgd = (pgd_t *)page_address(page) + pgd_index(address);
> +
> +                     pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
> +                     spin_lock(pgt_lock);
> +
>                       if (pgd_none(*pgd))
>                               set_pgd(pgd, *pgd_ref);
>                       else
>                               BUG_ON(pgd_page_vaddr(*pgd) != 
> pgd_page_vaddr(*pgd_ref));
> +
> +                     spin_unlock(pgt_lock);
>               }
>               spin_unlock_irqrestore(&pgd_lock, flags);
>       }
> diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
> index 5c4ee42..c70e57d 100644
> --- a/arch/x86/mm/pgtable.c
> +++ b/arch/x86/mm/pgtable.c
> @@ -87,7 +87,19 @@ static inline void pgd_list_del(pgd_t *pgd)
>  #define UNSHARED_PTRS_PER_PGD                                \
>       (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
>  
> -static void pgd_ctor(pgd_t *pgd)
> +
> +static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
> +{
> +     BUILD_BUG_ON(sizeof(virt_to_page(pgd)->index) < sizeof(mm));
> +     virt_to_page(pgd)->index = (pgoff_t)mm;
> +}
> +
> +struct mm_struct *pgd_page_get_mm(struct page *page)
> +{
> +     return (struct mm_struct *)page->index;
> +}
> +
> +static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
>  {
>       /* If the pgd points to a shared pagetable level (either the
>          ptes in non-PAE, or shared PMD in PAE), then just copy the
> @@ -105,8 +117,10 @@ static void pgd_ctor(pgd_t *pgd)
>       }
>  
>       /* list required to sync kernel mapping updates */
> -     if (!SHARED_KERNEL_PMD)
> +     if (!SHARED_KERNEL_PMD) {
> +             pgd_set_mm(pgd, mm);
>               pgd_list_add(pgd);
> +     }
>  }
>  
>  static void pgd_dtor(pgd_t *pgd)
> @@ -272,7 +286,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
>        */
>       spin_lock_irqsave(&pgd_lock, flags);
>  
> -     pgd_ctor(pgd);
> +     pgd_ctor(mm, pgd);
>       pgd_prepopulate_pmd(mm, pgd, pmds);
>  
>       spin_unlock_irqrestore(&pgd_lock, flags);
>
>


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.