[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCHv5 04/14] xen: remove scratch frames for ballooned pages and m2p override



On Tue, 27 Jan 2015, David Vrabel wrote:
> The scratch frame mappings for ballooned pages and the m2p override
> are broken.  Remove them in preparation for replacing them with
> simpler mechanisms that works.
> 
> The scratch pages did not ensure that the page was not in use.  In
> particular, the foreign page could still be in use by hardware.  If
> the guest reused the frame the hardware could read or write that
> frame.
> 
> The m2p override did not handle the same frame being granted by two
> different grant references.  Trying an M2P override lookup in this
> case is impossible.
> 
> With the m2p override removed, the grant map/unmap for the kernel
> mappings (for x86 PV) can be easily batched in
> set_foreign_p2m_mapping() and clear_foreign_p2m_mapping().
> 
> Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>

Reviewed-by: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx>


>  arch/x86/include/asm/xen/page.h |   18 +--
>  arch/x86/xen/p2m.c              |  254 
> ++-------------------------------------
>  drivers/xen/balloon.c           |   86 +------------
>  3 files changed, 14 insertions(+), 344 deletions(-)
> 
> diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
> index e9f52fe..358dcd3 100644
> --- a/arch/x86/include/asm/xen/page.h
> +++ b/arch/x86/include/asm/xen/page.h
> @@ -57,7 +57,6 @@ extern int set_foreign_p2m_mapping(struct 
> gnttab_map_grant_ref *map_ops,
>  extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref 
> *unmap_ops,
>                                    struct gnttab_unmap_grant_ref *kunmap_ops,
>                                    struct page **pages, unsigned int count);
> -extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long 
> pfn);
>  
>  /*
>   * Helper functions to write or read unsigned long values to/from
> @@ -154,21 +153,12 @@ static inline unsigned long mfn_to_pfn(unsigned long 
> mfn)
>               return mfn;
>  
>       pfn = mfn_to_pfn_no_overrides(mfn);
> -     if (__pfn_to_mfn(pfn) != mfn) {
> -             /*
> -              * If this appears to be a foreign mfn (because the pfn
> -              * doesn't map back to the mfn), then check the local override
> -              * table to see if there's a better pfn to use.
> -              *
> -              * m2p_find_override_pfn returns ~0 if it doesn't find anything.
> -              */
> -             pfn = m2p_find_override_pfn(mfn, ~0);
> -     }
> +     if (__pfn_to_mfn(pfn) != mfn)
> +             pfn = ~0;
>  
>       /*
> -      * pfn is ~0 if there are no entries in the m2p for mfn or if the
> -      * entry doesn't map back to the mfn and m2p_override doesn't have a
> -      * valid entry for it.
> +      * pfn is ~0 if there are no entries in the m2p for mfn or the
> +      * entry doesn't map back to the mfn.
>        */
>       if (pfn == ~0 && __pfn_to_mfn(mfn) == IDENTITY_FRAME(mfn))
>               pfn = mfn;
> diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
> index df40b28..c9bc53f 100644
> --- a/arch/x86/xen/p2m.c
> +++ b/arch/x86/xen/p2m.c
> @@ -84,8 +84,6 @@
>  
>  #define PMDS_PER_MID_PAGE    (P2M_MID_PER_PAGE / PTRS_PER_PTE)
>  
> -static void __init m2p_override_init(void);
> -
>  unsigned long *xen_p2m_addr __read_mostly;
>  EXPORT_SYMBOL_GPL(xen_p2m_addr);
>  unsigned long xen_p2m_size __read_mostly;
> @@ -402,8 +400,6 @@ void __init xen_vmalloc_p2m_tree(void)
>       xen_p2m_size = xen_max_p2m_pfn;
>  
>       xen_inv_extra_mem();
> -
> -     m2p_override_init();
>  }
>  
>  unsigned long get_phys_to_machine(unsigned long pfn)
> @@ -652,100 +648,21 @@ bool set_phys_to_machine(unsigned long pfn, unsigned 
> long mfn)
>       return true;
>  }
>  
> -#define M2P_OVERRIDE_HASH_SHIFT      10
> -#define M2P_OVERRIDE_HASH    (1 << M2P_OVERRIDE_HASH_SHIFT)
> -
> -static struct list_head *m2p_overrides;
> -static DEFINE_SPINLOCK(m2p_override_lock);
> -
> -static void __init m2p_override_init(void)
> -{
> -     unsigned i;
> -
> -     m2p_overrides = alloc_bootmem_align(
> -                             sizeof(*m2p_overrides) * M2P_OVERRIDE_HASH,
> -                             sizeof(unsigned long));
> -
> -     for (i = 0; i < M2P_OVERRIDE_HASH; i++)
> -             INIT_LIST_HEAD(&m2p_overrides[i]);
> -}
> -
> -static unsigned long mfn_hash(unsigned long mfn)
> -{
> -     return hash_long(mfn, M2P_OVERRIDE_HASH_SHIFT);
> -}
> -
> -/* Add an MFN override for a particular page */
> -static int m2p_add_override(unsigned long mfn, struct page *page,
> -                         struct gnttab_map_grant_ref *kmap_op)
> -{
> -     unsigned long flags;
> -     unsigned long pfn;
> -     unsigned long uninitialized_var(address);
> -     unsigned level;
> -     pte_t *ptep = NULL;
> -
> -     pfn = page_to_pfn(page);
> -     if (!PageHighMem(page)) {
> -             address = (unsigned long)__va(pfn << PAGE_SHIFT);
> -             ptep = lookup_address(address, &level);
> -             if (WARN(ptep == NULL || level != PG_LEVEL_4K,
> -                      "m2p_add_override: pfn %lx not mapped", pfn))
> -                     return -EINVAL;
> -     }
> -
> -     if (kmap_op != NULL) {
> -             if (!PageHighMem(page)) {
> -                     struct multicall_space mcs =
> -                             xen_mc_entry(sizeof(*kmap_op));
> -
> -                     MULTI_grant_table_op(mcs.mc,
> -                                     GNTTABOP_map_grant_ref, kmap_op, 1);
> -
> -                     xen_mc_issue(PARAVIRT_LAZY_MMU);
> -             }
> -     }
> -     spin_lock_irqsave(&m2p_override_lock, flags);
> -     list_add(&page->lru,  &m2p_overrides[mfn_hash(mfn)]);
> -     spin_unlock_irqrestore(&m2p_override_lock, flags);
> -
> -     /* p2m(m2p(mfn)) == mfn: the mfn is already present somewhere in
> -      * this domain. Set the FOREIGN_FRAME_BIT in the p2m for the other
> -      * pfn so that the following mfn_to_pfn(mfn) calls will return the
> -      * pfn from the m2p_override (the backend pfn) instead.
> -      * We need to do this because the pages shared by the frontend
> -      * (xen-blkfront) can be already locked (lock_page, called by
> -      * do_read_cache_page); when the userspace backend tries to use them
> -      * with direct_IO, mfn_to_pfn returns the pfn of the frontend, so
> -      * do_blockdev_direct_IO is going to try to lock the same pages
> -      * again resulting in a deadlock.
> -      * As a side effect get_user_pages_fast might not be safe on the
> -      * frontend pages while they are being shared with the backend,
> -      * because mfn_to_pfn (that ends up being called by GUPF) will
> -      * return the backend pfn rather than the frontend pfn. */
> -     pfn = mfn_to_pfn_no_overrides(mfn);
> -     if (__pfn_to_mfn(pfn) == mfn)
> -             set_phys_to_machine(pfn, FOREIGN_FRAME(mfn));
> -
> -     return 0;
> -}
> -
>  int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
>                           struct gnttab_map_grant_ref *kmap_ops,
>                           struct page **pages, unsigned int count)
>  {
>       int i, ret = 0;
> -     bool lazy = false;
>       pte_t *pte;
>  
>       if (xen_feature(XENFEAT_auto_translated_physmap))
>               return 0;
>  
> -     if (kmap_ops &&
> -         !in_interrupt() &&
> -         paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
> -             arch_enter_lazy_mmu_mode();
> -             lazy = true;
> +     if (kmap_ops) {
> +             ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
> +                                             kmap_ops, count);
> +             if (ret)
> +                     goto out;
>       }
>  
>       for (i = 0; i < count; i++) {
> @@ -773,160 +690,22 @@ int set_foreign_p2m_mapping(struct 
> gnttab_map_grant_ref *map_ops,
>                       ret = -ENOMEM;
>                       goto out;
>               }
> -
> -             if (kmap_ops) {
> -                     ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]);
> -                     if (ret)
> -                             goto out;
> -             }
>       }
>  
>  out:
> -     if (lazy)
> -             arch_leave_lazy_mmu_mode();
> -
>       return ret;
>  }
>  EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping);
>  
> -static struct page *m2p_find_override(unsigned long mfn)
> -{
> -     unsigned long flags;
> -     struct list_head *bucket;
> -     struct page *p, *ret;
> -
> -     if (unlikely(!m2p_overrides))
> -             return NULL;
> -
> -     ret = NULL;
> -     bucket = &m2p_overrides[mfn_hash(mfn)];
> -
> -     spin_lock_irqsave(&m2p_override_lock, flags);
> -
> -     list_for_each_entry(p, bucket, lru) {
> -             if (page_private(p) == mfn) {
> -                     ret = p;
> -                     break;
> -             }
> -     }
> -
> -     spin_unlock_irqrestore(&m2p_override_lock, flags);
> -
> -     return ret;
> -}
> -
> -static int m2p_remove_override(struct page *page,
> -                            struct gnttab_unmap_grant_ref *kunmap_op,
> -                            unsigned long mfn)
> -{
> -     unsigned long flags;
> -     unsigned long pfn;
> -     unsigned long uninitialized_var(address);
> -     unsigned level;
> -     pte_t *ptep = NULL;
> -
> -     pfn = page_to_pfn(page);
> -
> -     if (!PageHighMem(page)) {
> -             address = (unsigned long)__va(pfn << PAGE_SHIFT);
> -             ptep = lookup_address(address, &level);
> -
> -             if (WARN(ptep == NULL || level != PG_LEVEL_4K,
> -                      "m2p_remove_override: pfn %lx not mapped", pfn))
> -                     return -EINVAL;
> -     }
> -
> -     spin_lock_irqsave(&m2p_override_lock, flags);
> -     list_del(&page->lru);
> -     spin_unlock_irqrestore(&m2p_override_lock, flags);
> -
> -     if (kunmap_op != NULL) {
> -             if (!PageHighMem(page)) {
> -                     struct multicall_space mcs;
> -                     struct gnttab_unmap_and_replace *unmap_op;
> -                     struct page *scratch_page = get_balloon_scratch_page();
> -                     unsigned long scratch_page_address = (unsigned long)
> -                             __va(page_to_pfn(scratch_page) << PAGE_SHIFT);
> -
> -                     /*
> -                      * It might be that we queued all the m2p grant table
> -                      * hypercalls in a multicall, then m2p_remove_override
> -                      * get called before the multicall has actually been
> -                      * issued. In this case handle is going to -1 because
> -                      * it hasn't been modified yet.
> -                      */
> -                     if (kunmap_op->handle == -1)
> -                             xen_mc_flush();
> -                     /*
> -                      * Now if kmap_op->handle is negative it means that the
> -                      * hypercall actually returned an error.
> -                      */
> -                     if (kunmap_op->handle == GNTST_general_error) {
> -                             pr_warn("m2p_remove_override: pfn %lx mfn %lx, 
> failed to modify kernel mappings",
> -                                     pfn, mfn);
> -                             put_balloon_scratch_page();
> -                             return -1;
> -                     }
> -
> -                     xen_mc_batch();
> -
> -                     mcs = __xen_mc_entry(
> -                             sizeof(struct gnttab_unmap_and_replace));
> -                     unmap_op = mcs.args;
> -                     unmap_op->host_addr = kunmap_op->host_addr;
> -                     unmap_op->new_addr = scratch_page_address;
> -                     unmap_op->handle = kunmap_op->handle;
> -
> -                     MULTI_grant_table_op(mcs.mc,
> -                             GNTTABOP_unmap_and_replace, unmap_op, 1);
> -
> -                     mcs = __xen_mc_entry(0);
> -                     MULTI_update_va_mapping(mcs.mc, scratch_page_address,
> -                                     pfn_pte(page_to_pfn(scratch_page),
> -                                     PAGE_KERNEL_RO), 0);
> -
> -                     xen_mc_issue(PARAVIRT_LAZY_MMU);
> -
> -                     put_balloon_scratch_page();
> -             }
> -     }
> -
> -     /* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present
> -      * somewhere in this domain, even before being added to the
> -      * m2p_override (see comment above in m2p_add_override).
> -      * If there are no other entries in the m2p_override corresponding
> -      * to this mfn, then remove the FOREIGN_FRAME_BIT from the p2m for
> -      * the original pfn (the one shared by the frontend): the backend
> -      * cannot do any IO on this page anymore because it has been
> -      * unshared. Removing the FOREIGN_FRAME_BIT from the p2m entry of
> -      * the original pfn causes mfn_to_pfn(mfn) to return the frontend
> -      * pfn again. */
> -     mfn &= ~FOREIGN_FRAME_BIT;
> -     pfn = mfn_to_pfn_no_overrides(mfn);
> -     if (__pfn_to_mfn(pfn) == FOREIGN_FRAME(mfn) &&
> -                     m2p_find_override(mfn) == NULL)
> -             set_phys_to_machine(pfn, mfn);
> -
> -     return 0;
> -}
> -
>  int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
>                             struct gnttab_unmap_grant_ref *kunmap_ops,
>                             struct page **pages, unsigned int count)
>  {
>       int i, ret = 0;
> -     bool lazy = false;
>  
>       if (xen_feature(XENFEAT_auto_translated_physmap))
>               return 0;
>  
> -     if (kunmap_ops &&
> -         !in_interrupt() &&
> -         paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
> -             arch_enter_lazy_mmu_mode();
> -             lazy = true;
> -     }
> -
>       for (i = 0; i < count; i++) {
>               unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i]));
>               unsigned long pfn = page_to_pfn(pages[i]);
> @@ -940,32 +719,15 @@ int clear_foreign_p2m_mapping(struct 
> gnttab_unmap_grant_ref *unmap_ops,
>               WARN_ON(!PagePrivate(pages[i]));
>               ClearPagePrivate(pages[i]);
>               set_phys_to_machine(pfn, pages[i]->index);
> -
> -             if (kunmap_ops)
> -                     ret = m2p_remove_override(pages[i], &kunmap_ops[i], 
> mfn);
> -             if (ret)
> -                     goto out;
>       }
> -
> +     if (kunmap_ops)
> +             ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
> +                                             kunmap_ops, count);
>  out:
> -     if (lazy)
> -             arch_leave_lazy_mmu_mode();
>       return ret;
>  }
>  EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping);
>  
> -unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn)
> -{
> -     struct page *p = m2p_find_override(mfn);
> -     unsigned long ret = pfn;
> -
> -     if (p)
> -             ret = page_to_pfn(p);
> -
> -     return ret;
> -}
> -EXPORT_SYMBOL_GPL(m2p_find_override_pfn);
> -
>  #ifdef CONFIG_XEN_DEBUG_FS
>  #include <linux/debugfs.h>
>  #include "debugfs.h"
> diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
> index 3860d02..0b52d92 100644
> --- a/drivers/xen/balloon.c
> +++ b/drivers/xen/balloon.c
> @@ -92,7 +92,6 @@ EXPORT_SYMBOL_GPL(balloon_stats);
>  
>  /* We increase/decrease in batches which fit in a page */
>  static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)];
> -static DEFINE_PER_CPU(struct page *, balloon_scratch_page);
>  
>  
>  /* List of ballooned pages, threaded through the mem_map array. */
> @@ -423,22 +422,12 @@ static enum bp_state decrease_reservation(unsigned long 
> nr_pages, gfp_t gfp)
>               page = pfn_to_page(pfn);
>  
>  #ifdef CONFIG_XEN_HAVE_PVMMU
> -             /*
> -              * Ballooned out frames are effectively replaced with
> -              * a scratch frame.  Ensure direct mappings and the
> -              * p2m are consistent.
> -              */
>               if (!xen_feature(XENFEAT_auto_translated_physmap)) {
>                       if (!PageHighMem(page)) {
> -                             struct page *scratch_page = 
> get_balloon_scratch_page();
> -
>                               ret = HYPERVISOR_update_va_mapping(
>                                               (unsigned long)__va(pfn << 
> PAGE_SHIFT),
> -                                             
> pfn_pte(page_to_pfn(scratch_page),
> -                                                     PAGE_KERNEL_RO), 0);
> +                                             __pte_ma(0), 0);
>                               BUG_ON(ret);
> -
> -                             put_balloon_scratch_page();
>                       }
>                       __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
>               }
> @@ -500,18 +489,6 @@ static void balloon_process(struct work_struct *work)
>       mutex_unlock(&balloon_mutex);
>  }
>  
> -struct page *get_balloon_scratch_page(void)
> -{
> -     struct page *ret = get_cpu_var(balloon_scratch_page);
> -     BUG_ON(ret == NULL);
> -     return ret;
> -}
> -
> -void put_balloon_scratch_page(void)
> -{
> -     put_cpu_var(balloon_scratch_page);
> -}
> -
>  /* Resets the Xen limit, sets new target, and kicks off processing. */
>  void balloon_set_new_target(unsigned long target)
>  {
> @@ -605,61 +582,13 @@ static void __init balloon_add_region(unsigned long 
> start_pfn,
>       }
>  }
>  
> -static int alloc_balloon_scratch_page(int cpu)
> -{
> -     if (per_cpu(balloon_scratch_page, cpu) != NULL)
> -             return 0;
> -
> -     per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL);
> -     if (per_cpu(balloon_scratch_page, cpu) == NULL) {
> -             pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", 
> cpu);
> -             return -ENOMEM;
> -     }
> -
> -     return 0;
> -}
> -
> -
> -static int balloon_cpu_notify(struct notifier_block *self,
> -                                 unsigned long action, void *hcpu)
> -{
> -     int cpu = (long)hcpu;
> -     switch (action) {
> -     case CPU_UP_PREPARE:
> -             if (alloc_balloon_scratch_page(cpu))
> -                     return NOTIFY_BAD;
> -             break;
> -     default:
> -             break;
> -     }
> -     return NOTIFY_OK;
> -}
> -
> -static struct notifier_block balloon_cpu_notifier = {
> -     .notifier_call  = balloon_cpu_notify,
> -};
> -
>  static int __init balloon_init(void)
>  {
> -     int i, cpu;
> +     int i;
>  
>       if (!xen_domain())
>               return -ENODEV;
>  
> -     if (!xen_feature(XENFEAT_auto_translated_physmap)) {
> -             register_cpu_notifier(&balloon_cpu_notifier);
> -
> -             get_online_cpus();
> -             for_each_online_cpu(cpu) {
> -                     if (alloc_balloon_scratch_page(cpu)) {
> -                             put_online_cpus();
> -                             unregister_cpu_notifier(&balloon_cpu_notifier);
> -                             return -ENOMEM;
> -                     }
> -             }
> -             put_online_cpus();
> -     }
> -
>       pr_info("Initialising balloon driver\n");
>  
>       balloon_stats.current_pages = xen_pv_domain()
> @@ -696,15 +625,4 @@ static int __init balloon_init(void)
>  
>  subsys_initcall(balloon_init);
>  
> -static int __init balloon_clear(void)
> -{
> -     int cpu;
> -
> -     for_each_possible_cpu(cpu)
> -             per_cpu(balloon_scratch_page, cpu) = NULL;
> -
> -     return 0;
> -}
> -early_initcall(balloon_clear);
> -
>  MODULE_LICENSE("GPL");
> -- 
> 1.7.10.4
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxx
> http://lists.xen.org/xen-devel
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.