[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCHv5 04/14] xen: remove scratch frames for ballooned pages and m2p override
On Tue, 27 Jan 2015, David Vrabel wrote: > The scratch frame mappings for ballooned pages and the m2p override > are broken. Remove them in preparation for replacing them with > simpler mechanisms that works. > > The scratch pages did not ensure that the page was not in use. In > particular, the foreign page could still be in use by hardware. If > the guest reused the frame the hardware could read or write that > frame. > > The m2p override did not handle the same frame being granted by two > different grant references. Trying an M2P override lookup in this > case is impossible. > > With the m2p override removed, the grant map/unmap for the kernel > mappings (for x86 PV) can be easily batched in > set_foreign_p2m_mapping() and clear_foreign_p2m_mapping(). > > Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx> Reviewed-by: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx> > arch/x86/include/asm/xen/page.h | 18 +-- > arch/x86/xen/p2m.c | 254 > ++------------------------------------- > drivers/xen/balloon.c | 86 +------------ > 3 files changed, 14 insertions(+), 344 deletions(-) > > diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h > index e9f52fe..358dcd3 100644 > --- a/arch/x86/include/asm/xen/page.h > +++ b/arch/x86/include/asm/xen/page.h > @@ -57,7 +57,6 @@ extern int set_foreign_p2m_mapping(struct > gnttab_map_grant_ref *map_ops, > extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref > *unmap_ops, > struct gnttab_unmap_grant_ref *kunmap_ops, > struct page **pages, unsigned int count); > -extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long > pfn); > > /* > * Helper functions to write or read unsigned long values to/from > @@ -154,21 +153,12 @@ static inline unsigned long mfn_to_pfn(unsigned long > mfn) > return mfn; > > pfn = mfn_to_pfn_no_overrides(mfn); > - if (__pfn_to_mfn(pfn) != mfn) { > - /* > - * If this appears to be a foreign mfn (because the pfn > - * doesn't map back to the mfn), then check the local override > - * table to see if there's a better pfn to use. > - * > - * m2p_find_override_pfn returns ~0 if it doesn't find anything. > - */ > - pfn = m2p_find_override_pfn(mfn, ~0); > - } > + if (__pfn_to_mfn(pfn) != mfn) > + pfn = ~0; > > /* > - * pfn is ~0 if there are no entries in the m2p for mfn or if the > - * entry doesn't map back to the mfn and m2p_override doesn't have a > - * valid entry for it. > + * pfn is ~0 if there are no entries in the m2p for mfn or the > + * entry doesn't map back to the mfn. > */ > if (pfn == ~0 && __pfn_to_mfn(mfn) == IDENTITY_FRAME(mfn)) > pfn = mfn; > diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c > index df40b28..c9bc53f 100644 > --- a/arch/x86/xen/p2m.c > +++ b/arch/x86/xen/p2m.c > @@ -84,8 +84,6 @@ > > #define PMDS_PER_MID_PAGE (P2M_MID_PER_PAGE / PTRS_PER_PTE) > > -static void __init m2p_override_init(void); > - > unsigned long *xen_p2m_addr __read_mostly; > EXPORT_SYMBOL_GPL(xen_p2m_addr); > unsigned long xen_p2m_size __read_mostly; > @@ -402,8 +400,6 @@ void __init xen_vmalloc_p2m_tree(void) > xen_p2m_size = xen_max_p2m_pfn; > > xen_inv_extra_mem(); > - > - m2p_override_init(); > } > > unsigned long get_phys_to_machine(unsigned long pfn) > @@ -652,100 +648,21 @@ bool set_phys_to_machine(unsigned long pfn, unsigned > long mfn) > return true; > } > > -#define M2P_OVERRIDE_HASH_SHIFT 10 > -#define M2P_OVERRIDE_HASH (1 << M2P_OVERRIDE_HASH_SHIFT) > - > -static struct list_head *m2p_overrides; > -static DEFINE_SPINLOCK(m2p_override_lock); > - > -static void __init m2p_override_init(void) > -{ > - unsigned i; > - > - m2p_overrides = alloc_bootmem_align( > - sizeof(*m2p_overrides) * M2P_OVERRIDE_HASH, > - sizeof(unsigned long)); > - > - for (i = 0; i < M2P_OVERRIDE_HASH; i++) > - INIT_LIST_HEAD(&m2p_overrides[i]); > -} > - > -static unsigned long mfn_hash(unsigned long mfn) > -{ > - return hash_long(mfn, M2P_OVERRIDE_HASH_SHIFT); > -} > - > -/* Add an MFN override for a particular page */ > -static int m2p_add_override(unsigned long mfn, struct page *page, > - struct gnttab_map_grant_ref *kmap_op) > -{ > - unsigned long flags; > - unsigned long pfn; > - unsigned long uninitialized_var(address); > - unsigned level; > - pte_t *ptep = NULL; > - > - pfn = page_to_pfn(page); > - if (!PageHighMem(page)) { > - address = (unsigned long)__va(pfn << PAGE_SHIFT); > - ptep = lookup_address(address, &level); > - if (WARN(ptep == NULL || level != PG_LEVEL_4K, > - "m2p_add_override: pfn %lx not mapped", pfn)) > - return -EINVAL; > - } > - > - if (kmap_op != NULL) { > - if (!PageHighMem(page)) { > - struct multicall_space mcs = > - xen_mc_entry(sizeof(*kmap_op)); > - > - MULTI_grant_table_op(mcs.mc, > - GNTTABOP_map_grant_ref, kmap_op, 1); > - > - xen_mc_issue(PARAVIRT_LAZY_MMU); > - } > - } > - spin_lock_irqsave(&m2p_override_lock, flags); > - list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); > - spin_unlock_irqrestore(&m2p_override_lock, flags); > - > - /* p2m(m2p(mfn)) == mfn: the mfn is already present somewhere in > - * this domain. Set the FOREIGN_FRAME_BIT in the p2m for the other > - * pfn so that the following mfn_to_pfn(mfn) calls will return the > - * pfn from the m2p_override (the backend pfn) instead. > - * We need to do this because the pages shared by the frontend > - * (xen-blkfront) can be already locked (lock_page, called by > - * do_read_cache_page); when the userspace backend tries to use them > - * with direct_IO, mfn_to_pfn returns the pfn of the frontend, so > - * do_blockdev_direct_IO is going to try to lock the same pages > - * again resulting in a deadlock. > - * As a side effect get_user_pages_fast might not be safe on the > - * frontend pages while they are being shared with the backend, > - * because mfn_to_pfn (that ends up being called by GUPF) will > - * return the backend pfn rather than the frontend pfn. */ > - pfn = mfn_to_pfn_no_overrides(mfn); > - if (__pfn_to_mfn(pfn) == mfn) > - set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)); > - > - return 0; > -} > - > int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, > struct gnttab_map_grant_ref *kmap_ops, > struct page **pages, unsigned int count) > { > int i, ret = 0; > - bool lazy = false; > pte_t *pte; > > if (xen_feature(XENFEAT_auto_translated_physmap)) > return 0; > > - if (kmap_ops && > - !in_interrupt() && > - paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { > - arch_enter_lazy_mmu_mode(); > - lazy = true; > + if (kmap_ops) { > + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, > + kmap_ops, count); > + if (ret) > + goto out; > } > > for (i = 0; i < count; i++) { > @@ -773,160 +690,22 @@ int set_foreign_p2m_mapping(struct > gnttab_map_grant_ref *map_ops, > ret = -ENOMEM; > goto out; > } > - > - if (kmap_ops) { > - ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]); > - if (ret) > - goto out; > - } > } > > out: > - if (lazy) > - arch_leave_lazy_mmu_mode(); > - > return ret; > } > EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping); > > -static struct page *m2p_find_override(unsigned long mfn) > -{ > - unsigned long flags; > - struct list_head *bucket; > - struct page *p, *ret; > - > - if (unlikely(!m2p_overrides)) > - return NULL; > - > - ret = NULL; > - bucket = &m2p_overrides[mfn_hash(mfn)]; > - > - spin_lock_irqsave(&m2p_override_lock, flags); > - > - list_for_each_entry(p, bucket, lru) { > - if (page_private(p) == mfn) { > - ret = p; > - break; > - } > - } > - > - spin_unlock_irqrestore(&m2p_override_lock, flags); > - > - return ret; > -} > - > -static int m2p_remove_override(struct page *page, > - struct gnttab_unmap_grant_ref *kunmap_op, > - unsigned long mfn) > -{ > - unsigned long flags; > - unsigned long pfn; > - unsigned long uninitialized_var(address); > - unsigned level; > - pte_t *ptep = NULL; > - > - pfn = page_to_pfn(page); > - > - if (!PageHighMem(page)) { > - address = (unsigned long)__va(pfn << PAGE_SHIFT); > - ptep = lookup_address(address, &level); > - > - if (WARN(ptep == NULL || level != PG_LEVEL_4K, > - "m2p_remove_override: pfn %lx not mapped", pfn)) > - return -EINVAL; > - } > - > - spin_lock_irqsave(&m2p_override_lock, flags); > - list_del(&page->lru); > - spin_unlock_irqrestore(&m2p_override_lock, flags); > - > - if (kunmap_op != NULL) { > - if (!PageHighMem(page)) { > - struct multicall_space mcs; > - struct gnttab_unmap_and_replace *unmap_op; > - struct page *scratch_page = get_balloon_scratch_page(); > - unsigned long scratch_page_address = (unsigned long) > - __va(page_to_pfn(scratch_page) << PAGE_SHIFT); > - > - /* > - * It might be that we queued all the m2p grant table > - * hypercalls in a multicall, then m2p_remove_override > - * get called before the multicall has actually been > - * issued. In this case handle is going to -1 because > - * it hasn't been modified yet. > - */ > - if (kunmap_op->handle == -1) > - xen_mc_flush(); > - /* > - * Now if kmap_op->handle is negative it means that the > - * hypercall actually returned an error. > - */ > - if (kunmap_op->handle == GNTST_general_error) { > - pr_warn("m2p_remove_override: pfn %lx mfn %lx, > failed to modify kernel mappings", > - pfn, mfn); > - put_balloon_scratch_page(); > - return -1; > - } > - > - xen_mc_batch(); > - > - mcs = __xen_mc_entry( > - sizeof(struct gnttab_unmap_and_replace)); > - unmap_op = mcs.args; > - unmap_op->host_addr = kunmap_op->host_addr; > - unmap_op->new_addr = scratch_page_address; > - unmap_op->handle = kunmap_op->handle; > - > - MULTI_grant_table_op(mcs.mc, > - GNTTABOP_unmap_and_replace, unmap_op, 1); > - > - mcs = __xen_mc_entry(0); > - MULTI_update_va_mapping(mcs.mc, scratch_page_address, > - pfn_pte(page_to_pfn(scratch_page), > - PAGE_KERNEL_RO), 0); > - > - xen_mc_issue(PARAVIRT_LAZY_MMU); > - > - put_balloon_scratch_page(); > - } > - } > - > - /* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present > - * somewhere in this domain, even before being added to the > - * m2p_override (see comment above in m2p_add_override). > - * If there are no other entries in the m2p_override corresponding > - * to this mfn, then remove the FOREIGN_FRAME_BIT from the p2m for > - * the original pfn (the one shared by the frontend): the backend > - * cannot do any IO on this page anymore because it has been > - * unshared. Removing the FOREIGN_FRAME_BIT from the p2m entry of > - * the original pfn causes mfn_to_pfn(mfn) to return the frontend > - * pfn again. */ > - mfn &= ~FOREIGN_FRAME_BIT; > - pfn = mfn_to_pfn_no_overrides(mfn); > - if (__pfn_to_mfn(pfn) == FOREIGN_FRAME(mfn) && > - m2p_find_override(mfn) == NULL) > - set_phys_to_machine(pfn, mfn); > - > - return 0; > -} > - > int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, > struct gnttab_unmap_grant_ref *kunmap_ops, > struct page **pages, unsigned int count) > { > int i, ret = 0; > - bool lazy = false; > > if (xen_feature(XENFEAT_auto_translated_physmap)) > return 0; > > - if (kunmap_ops && > - !in_interrupt() && > - paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { > - arch_enter_lazy_mmu_mode(); > - lazy = true; > - } > - > for (i = 0; i < count; i++) { > unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i])); > unsigned long pfn = page_to_pfn(pages[i]); > @@ -940,32 +719,15 @@ int clear_foreign_p2m_mapping(struct > gnttab_unmap_grant_ref *unmap_ops, > WARN_ON(!PagePrivate(pages[i])); > ClearPagePrivate(pages[i]); > set_phys_to_machine(pfn, pages[i]->index); > - > - if (kunmap_ops) > - ret = m2p_remove_override(pages[i], &kunmap_ops[i], > mfn); > - if (ret) > - goto out; > } > - > + if (kunmap_ops) > + ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, > + kunmap_ops, count); > out: > - if (lazy) > - arch_leave_lazy_mmu_mode(); > return ret; > } > EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping); > > -unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn) > -{ > - struct page *p = m2p_find_override(mfn); > - unsigned long ret = pfn; > - > - if (p) > - ret = page_to_pfn(p); > - > - return ret; > -} > -EXPORT_SYMBOL_GPL(m2p_find_override_pfn); > - > #ifdef CONFIG_XEN_DEBUG_FS > #include <linux/debugfs.h> > #include "debugfs.h" > diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c > index 3860d02..0b52d92 100644 > --- a/drivers/xen/balloon.c > +++ b/drivers/xen/balloon.c > @@ -92,7 +92,6 @@ EXPORT_SYMBOL_GPL(balloon_stats); > > /* We increase/decrease in batches which fit in a page */ > static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)]; > -static DEFINE_PER_CPU(struct page *, balloon_scratch_page); > > > /* List of ballooned pages, threaded through the mem_map array. */ > @@ -423,22 +422,12 @@ static enum bp_state decrease_reservation(unsigned long > nr_pages, gfp_t gfp) > page = pfn_to_page(pfn); > > #ifdef CONFIG_XEN_HAVE_PVMMU > - /* > - * Ballooned out frames are effectively replaced with > - * a scratch frame. Ensure direct mappings and the > - * p2m are consistent. > - */ > if (!xen_feature(XENFEAT_auto_translated_physmap)) { > if (!PageHighMem(page)) { > - struct page *scratch_page = > get_balloon_scratch_page(); > - > ret = HYPERVISOR_update_va_mapping( > (unsigned long)__va(pfn << > PAGE_SHIFT), > - > pfn_pte(page_to_pfn(scratch_page), > - PAGE_KERNEL_RO), 0); > + __pte_ma(0), 0); > BUG_ON(ret); > - > - put_balloon_scratch_page(); > } > __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); > } > @@ -500,18 +489,6 @@ static void balloon_process(struct work_struct *work) > mutex_unlock(&balloon_mutex); > } > > -struct page *get_balloon_scratch_page(void) > -{ > - struct page *ret = get_cpu_var(balloon_scratch_page); > - BUG_ON(ret == NULL); > - return ret; > -} > - > -void put_balloon_scratch_page(void) > -{ > - put_cpu_var(balloon_scratch_page); > -} > - > /* Resets the Xen limit, sets new target, and kicks off processing. */ > void balloon_set_new_target(unsigned long target) > { > @@ -605,61 +582,13 @@ static void __init balloon_add_region(unsigned long > start_pfn, > } > } > > -static int alloc_balloon_scratch_page(int cpu) > -{ > - if (per_cpu(balloon_scratch_page, cpu) != NULL) > - return 0; > - > - per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL); > - if (per_cpu(balloon_scratch_page, cpu) == NULL) { > - pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", > cpu); > - return -ENOMEM; > - } > - > - return 0; > -} > - > - > -static int balloon_cpu_notify(struct notifier_block *self, > - unsigned long action, void *hcpu) > -{ > - int cpu = (long)hcpu; > - switch (action) { > - case CPU_UP_PREPARE: > - if (alloc_balloon_scratch_page(cpu)) > - return NOTIFY_BAD; > - break; > - default: > - break; > - } > - return NOTIFY_OK; > -} > - > -static struct notifier_block balloon_cpu_notifier = { > - .notifier_call = balloon_cpu_notify, > -}; > - > static int __init balloon_init(void) > { > - int i, cpu; > + int i; > > if (!xen_domain()) > return -ENODEV; > > - if (!xen_feature(XENFEAT_auto_translated_physmap)) { > - register_cpu_notifier(&balloon_cpu_notifier); > - > - get_online_cpus(); > - for_each_online_cpu(cpu) { > - if (alloc_balloon_scratch_page(cpu)) { > - put_online_cpus(); > - unregister_cpu_notifier(&balloon_cpu_notifier); > - return -ENOMEM; > - } > - } > - put_online_cpus(); > - } > - > pr_info("Initialising balloon driver\n"); > > balloon_stats.current_pages = xen_pv_domain() > @@ -696,15 +625,4 @@ static int __init balloon_init(void) > > subsys_initcall(balloon_init); > > -static int __init balloon_clear(void) > -{ > - int cpu; > - > - for_each_possible_cpu(cpu) > - per_cpu(balloon_scratch_page, cpu) = NULL; > - > - return 0; > -} > -early_initcall(balloon_clear); > - > MODULE_LICENSE("GPL"); > -- > 1.7.10.4 > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@xxxxxxxxxxxxx > http://lists.xen.org/xen-devel > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |