Xen project Mailing List

Re: [Xen-devel] [PATCH 8 of 9] Modify all internal p2m functions to use the new fine-grained locking

On Thu, Oct 27, 2011 at 1:33 PM, Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx> wrote: > xen/arch/x86/mm/hap/hap.c | 2 +- > xen/arch/x86/mm/hap/nested_hap.c | 21 ++- > xen/arch/x86/mm/p2m-ept.c | 26 +---- > xen/arch/x86/mm/p2m-pod.c | 42 +++++-- > xen/arch/x86/mm/p2m-pt.c | 20 +--- > xen/arch/x86/mm/p2m.c | 185 > ++++++++++++++++++++++++-------------- > xen/include/asm-ia64/mm.h | 5 + > xen/include/asm-x86/p2m.h | 45 +++++++++- > 8 files changed, 217 insertions(+), 129 deletions(-) > > > This patch only modifies code internal to the p2m, adding convenience > macros, etc. It will yield a compiling code base but an incorrect > hypervisor (external callers of queries into the p2m will not unlock). > Next patch takes care of external callers, split done for the benefit > of conciseness. It's not obvious to me where in this patch to find a description of what the new locking regime is. What does the _unlocked() mean? When do I have to call that vs a different one, and when do I have to lock / unlock / whatever? I think that should ideally be both in the commit message (at least a summary), and also in a comment in a header somewhere. Perhaps it is already in the patch somewhere, but a quick glance through didn't find it... > > Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx> > > diff -r 8a98179666de -r 471d4f2754d6 xen/arch/x86/mm/hap/hap.c > --- a/xen/arch/x86/mm/hap/hap.c > +++ b/xen/arch/x86/mm/hap/hap.c > @@ -861,7 +861,7 @@ hap_write_p2m_entry(struct vcpu *v, unsi > old_flags = l1e_get_flags(*p); > > if ( nestedhvm_enabled(d) && (old_flags & _PAGE_PRESENT) > - && !p2m_get_hostp2m(d)->defer_nested_flush ) { > + && !atomic_read(&(p2m_get_hostp2m(d)->defer_nested_flush)) ) { > /* We are replacing a valid entry so we need to flush nested p2ms, > * unless the only change is an increase in access rights. */ > mfn_t omfn = _mfn(l1e_get_pfn(*p)); > diff -r 8a98179666de -r 471d4f2754d6 xen/arch/x86/mm/hap/nested_hap.c > --- a/xen/arch/x86/mm/hap/nested_hap.c > +++ b/xen/arch/x86/mm/hap/nested_hap.c > @@ -105,8 +105,6 @@ nestedhap_fix_p2m(struct vcpu *v, struct > ASSERT(p2m); > ASSERT(p2m->set_entry); > > - p2m_lock(p2m); > - > /* If this p2m table has been flushed or recycled under our feet, > * leave it alone. We'll pick up the right one as we try to > * vmenter the guest. */ > @@ -122,11 +120,13 @@ nestedhap_fix_p2m(struct vcpu *v, struct > gfn = (L2_gpa >> PAGE_SHIFT) & mask; > mfn = _mfn((L0_gpa >> PAGE_SHIFT) & mask); > > + /* Not bumping refcount of pages underneath because we're getting > + * rid of whatever was there */ > + get_p2m(p2m, gfn, page_order); > rv = set_p2m_entry(p2m, gfn, mfn, page_order, p2mt, p2ma); > + put_p2m(p2m, gfn, page_order); > } > > - p2m_unlock(p2m); > - > if (rv == 0) { > gdprintk(XENLOG_ERR, > "failed to set entry for 0x%"PRIx64" -> 0x%"PRIx64"\n", > @@ -146,19 +146,26 @@ nestedhap_walk_L0_p2m(struct p2m_domain > mfn_t mfn; > p2m_type_t p2mt; > p2m_access_t p2ma; > + int rc; > > /* walk L0 P2M table */ > mfn = gfn_to_mfn_type_p2m(p2m, L1_gpa >> PAGE_SHIFT, &p2mt, &p2ma, > p2m_query, page_order); > > + rc = NESTEDHVM_PAGEFAULT_ERROR; > if ( p2m_is_paging(p2mt) || p2m_is_shared(p2mt) || !p2m_is_ram(p2mt) ) > - return NESTEDHVM_PAGEFAULT_ERROR; > + goto out; > > + rc = NESTEDHVM_PAGEFAULT_ERROR; > if ( !mfn_valid(mfn) ) > - return NESTEDHVM_PAGEFAULT_ERROR; > + goto out; > > *L0_gpa = (mfn_x(mfn) << PAGE_SHIFT) + (L1_gpa & ~PAGE_MASK); > - return NESTEDHVM_PAGEFAULT_DONE; > + rc = NESTEDHVM_PAGEFAULT_DONE; > + > +out: > + drop_p2m_gfn(p2m, L1_gpa >> PAGE_SHIFT, mfn_x(mfn)); > + return rc; > } > > /* This function uses L2_gpa to walk the P2M page table in L1. If the > diff -r 8a98179666de -r 471d4f2754d6 xen/arch/x86/mm/p2m-ept.c > --- a/xen/arch/x86/mm/p2m-ept.c > +++ b/xen/arch/x86/mm/p2m-ept.c > @@ -43,29 +43,16 @@ > #define is_epte_present(ept_entry) ((ept_entry)->epte & 0x7) > #define is_epte_superpage(ept_entry) ((ept_entry)->sp) > > -/* Non-ept "lock-and-check" wrapper */ > +/* Ept-specific check wrapper */ > static int ept_pod_check_and_populate(struct p2m_domain *p2m, unsigned long > gfn, > ept_entry_t *entry, int order, > p2m_query_t q) > { > - int r; > - > - /* This is called from the p2m lookups, which can happen with or > - * without the lock hed. */ > - p2m_lock_recursive(p2m); > - > /* Check to make sure this is still PoD */ > if ( entry->sa_p2mt != p2m_populate_on_demand ) > - { > - p2m_unlock(p2m); > return 0; > - } > > - r = p2m_pod_demand_populate(p2m, gfn, order, q); > - > - p2m_unlock(p2m); > - > - return r; > + return p2m_pod_demand_populate(p2m, gfn, order, q); > } > > static void ept_p2m_type_to_flags(ept_entry_t *entry, p2m_type_t type, > p2m_access_t access) > @@ -265,9 +252,9 @@ static int ept_next_level(struct p2m_dom > > ept_entry = (*table) + index; > > - /* ept_next_level() is called (sometimes) without a lock. Read > + /* ept_next_level() is called (never) without a lock. Read > * the entry once, and act on the "cached" entry after that to > - * avoid races. */ > + * avoid races. AAA */ > e = atomic_read_ept_entry(ept_entry); > > if ( !is_epte_present(&e) ) > @@ -733,7 +720,8 @@ void ept_change_entry_emt_with_range(str > int order = 0; > struct p2m_domain *p2m = p2m_get_hostp2m(d); > > - p2m_lock(p2m); > + /* This is a global operation, essentially */ > + get_p2m_global(p2m); > for ( gfn = start_gfn; gfn <= end_gfn; gfn++ ) > { > int level = 0; > @@ -773,7 +761,7 @@ void ept_change_entry_emt_with_range(str > ept_set_entry(p2m, gfn, mfn, order, e.sa_p2mt, e.access); > } > } > - p2m_unlock(p2m); > + put_p2m_global(p2m); > } > > /* > diff -r 8a98179666de -r 471d4f2754d6 xen/arch/x86/mm/p2m-pod.c > --- a/xen/arch/x86/mm/p2m-pod.c > +++ b/xen/arch/x86/mm/p2m-pod.c > @@ -102,8 +102,6 @@ p2m_pod_cache_add(struct p2m_domain *p2m > } > #endif > > - ASSERT(p2m_locked_by_me(p2m)); > - > /* > * Pages from domain_alloc and returned by the balloon driver aren't > * guaranteed to be zero; but by reclaiming zero pages, we implicitly > @@ -536,7 +534,7 @@ p2m_pod_decrease_reservation(struct doma > { > p2m_type_t t; > > - gfn_to_mfn_query(d, gpfn + i, &t); > + gfn_to_mfn_query_unlocked(d, gpfn + i, &t); > > if ( t == p2m_populate_on_demand ) > pod++; The rest of the code makes it seem like gfn_to_mfn_query() will grab the p2m lock for a range, but the _unlocked() version will not. Is that correct? Shouldn't this remain as it is then? > @@ -602,6 +600,7 @@ p2m_pod_decrease_reservation(struct doma > nonpod--; > ram--; > } > + drop_p2m_gfn(p2m, gpfn + i, mfn_x(mfn)); > } And how does this fit with the _query() call above? > > /* If there are no more non-PoD entries, tell decrease_reservation() that > @@ -661,12 +660,15 @@ p2m_pod_zero_check_superpage(struct p2m_ > for ( i=0; i<SUPERPAGE_PAGES; i++ ) > { > > - mfn = gfn_to_mfn_query(d, gfn + i, &type); > - > if ( i == 0 ) > { > + /* Only lock the p2m entry the first time, that will lock > + * server for the whole superpage */ > + mfn = gfn_to_mfn_query(d, gfn + i, &type); > mfn0 = mfn; > type0 = type; > + } else { > + mfn = gfn_to_mfn_query_unlocked(d, gfn + i, &type); > } > > /* Conditions that must be met for superpage-superpage: > @@ -773,6 +775,10 @@ out: > p2m_pod_cache_add(p2m, mfn_to_page(mfn0), PAGE_ORDER_2M); > p2m->pod.entry_count += SUPERPAGE_PAGES; > } > + > + /* We got p2m locks once for the whole superpage, with the original > + * mfn0. We drop it here. */ > + drop_p2m_gfn(p2m, gfn, mfn_x(mfn0)); > } > > /* On entry, PoD lock is held */ > @@ -894,6 +900,12 @@ p2m_pod_zero_check(struct p2m_domain *p2 > p2m->pod.entry_count++; > } > } > + > + /* Drop all p2m locks and references */ > + for ( i=0; i<count; i++ ) > + { > + drop_p2m_gfn(p2m, gfns[i], mfn_x(mfns[i])); > + } > > } > > @@ -928,7 +940,9 @@ p2m_pod_emergency_sweep_super(struct p2m > p2m->pod.reclaim_super = i ? i - SUPERPAGE_PAGES : 0; > } > > -#define POD_SWEEP_STRIDE 16 > +/* Note that spinlock recursion counters have 4 bits, so 16 or higher > + * will overflow a single 2M spinlock in a zero check. */ > +#define POD_SWEEP_STRIDE 15 > static void > p2m_pod_emergency_sweep(struct p2m_domain *p2m) > { > @@ -946,7 +960,7 @@ p2m_pod_emergency_sweep(struct p2m_domai > /* FIXME: Figure out how to avoid superpages */ > for ( i=p2m->pod.reclaim_single; i > 0 ; i-- ) > { > - gfn_to_mfn_query(p2m->domain, i, &t ); > + gfn_to_mfn_query_unlocked(p2m->domain, i, &t ); > if ( p2m_is_ram(t) ) > { > gfns[j] = i; > @@ -974,6 +988,7 @@ p2m_pod_emergency_sweep(struct p2m_domai > > } > > +/* The gfn and order need to be locked in the p2m before you walk in here */ > int > p2m_pod_demand_populate(struct p2m_domain *p2m, unsigned long gfn, > unsigned int order, > @@ -985,8 +1000,6 @@ p2m_pod_demand_populate(struct p2m_domai > mfn_t mfn; > int i; > > - ASSERT(p2m_locked_by_me(p2m)); > - > pod_lock(p2m); > /* This check is done with the pod lock held. This will make sure that > * even if d->is_dying changes under our feet, p2m_pod_empty_cache() > @@ -1008,8 +1021,6 @@ p2m_pod_demand_populate(struct p2m_domai > set_p2m_entry(p2m, gfn_aligned, _mfn(0), PAGE_ORDER_2M, > p2m_populate_on_demand, p2m->default_access); > audit_p2m(p2m, 1); > - /* This is because the ept/pt caller locks the p2m recursively */ > - p2m_unlock(p2m); > return 0; > } > > @@ -1132,7 +1143,9 @@ guest_physmap_mark_populate_on_demand(st > if ( rc != 0 ) > return rc; > > - p2m_lock(p2m); > + /* Pre-lock all the p2m entries. We don't take refs to the > + * pages, because there shouldn't be any pages underneath. */ > + get_p2m(p2m, gfn, order); > audit_p2m(p2m, 1); > > P2M_DEBUG("mark pod gfn=%#lx\n", gfn); > @@ -1140,7 +1153,8 @@ guest_physmap_mark_populate_on_demand(st > /* Make sure all gpfns are unused */ > for ( i = 0; i < (1UL << order); i++ ) > { > - omfn = gfn_to_mfn_query(d, gfn + i, &ot); > + p2m_access_t a; > + omfn = p2m->get_entry(p2m, gfn + i, &ot, &a, p2m_query, NULL); > if ( p2m_is_ram(ot) ) > { > printk("%s: gfn_to_mfn returned type %d!\n", > @@ -1169,9 +1183,9 @@ guest_physmap_mark_populate_on_demand(st > } > > audit_p2m(p2m, 1); > - p2m_unlock(p2m); > > out: > + put_p2m(p2m, gfn, order); > return rc; > } > > diff -r 8a98179666de -r 471d4f2754d6 xen/arch/x86/mm/p2m-pt.c > --- a/xen/arch/x86/mm/p2m-pt.c > +++ b/xen/arch/x86/mm/p2m-pt.c > @@ -487,31 +487,16 @@ out: > } > > > -/* Non-ept "lock-and-check" wrapper */ > +/* PT-specific check wrapper */ > static int p2m_pod_check_and_populate(struct p2m_domain *p2m, unsigned long > gfn, > l1_pgentry_t *p2m_entry, int order, > p2m_query_t q) > { > - int r; > - > - /* This is called from the p2m lookups, which can happen with or > - * without the lock hed. */ > - p2m_lock_recursive(p2m); > - audit_p2m(p2m, 1); > - > /* Check to make sure this is still PoD */ > if ( p2m_flags_to_type(l1e_get_flags(*p2m_entry)) != > p2m_populate_on_demand ) > - { > - p2m_unlock(p2m); > return 0; > - } > > - r = p2m_pod_demand_populate(p2m, gfn, order, q); > - > - audit_p2m(p2m, 1); > - p2m_unlock(p2m); > - > - return r; > + return p2m_pod_demand_populate(p2m, gfn, order, q); > } > > /* Read the current domain's p2m table (through the linear mapping). */ > @@ -894,6 +879,7 @@ static void p2m_change_type_global(struc > if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) == 0 ) > return; > > + /* Checks for exclusive lock */ > ASSERT(p2m_locked_by_me(p2m)); > > #if CONFIG_PAGING_LEVELS == 4 > diff -r 8a98179666de -r 471d4f2754d6 xen/arch/x86/mm/p2m.c > --- a/xen/arch/x86/mm/p2m.c > +++ b/xen/arch/x86/mm/p2m.c > @@ -143,9 +143,9 @@ void p2m_change_entry_type_global(struct > p2m_type_t ot, p2m_type_t nt) > { > struct p2m_domain *p2m = p2m_get_hostp2m(d); > - p2m_lock(p2m); > + get_p2m_global(p2m); > p2m->change_entry_type_global(p2m, ot, nt); > - p2m_unlock(p2m); > + put_p2m_global(p2m); > } > > mfn_t gfn_to_mfn_type_p2m(struct p2m_domain *p2m, unsigned long gfn, > @@ -162,12 +162,17 @@ mfn_t gfn_to_mfn_type_p2m(struct p2m_dom > return _mfn(gfn); > } > > + /* We take the lock for this single gfn. The caller has to put this lock > */ > + get_p2m_gfn(p2m, gfn); > + > mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order); > > #ifdef __x86_64__ > if ( q == p2m_unshare && p2m_is_shared(*t) ) > { > ASSERT(!p2m_is_nestedp2m(p2m)); > + /* p2m locking is recursive, so we won't deadlock going > + * into the sharing code */ > mem_sharing_unshare_page(p2m->domain, gfn, 0); > mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order); > } > @@ -179,13 +184,28 @@ mfn_t gfn_to_mfn_type_p2m(struct p2m_dom > /* Return invalid_mfn to avoid caller's access */ > mfn = _mfn(INVALID_MFN); > if (q == p2m_guest) > + { > + put_p2m_gfn(p2m, gfn); > domain_crash(p2m->domain); > + } > } > #endif > > + /* Take an extra reference to the page. It won't disappear beneath us */ > + if ( mfn_valid(mfn) ) > + { > + /* Use this because we don't necessarily know who owns the page */ > + if ( !page_get_owner_and_reference(mfn_to_page(mfn)) ) > + { > + mfn = _mfn(INVALID_MFN); > + } > + } > + > + /* We leave holding the p2m lock for this gfn */ > return mfn; > } > > +/* Appropriate locks held on entry */ > int set_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, > unsigned int page_order, p2m_type_t p2mt, p2m_access_t p2ma) > { > @@ -194,8 +214,6 @@ int set_p2m_entry(struct p2m_domain *p2m > unsigned int order; > int rc = 1; > > - ASSERT(p2m_locked_by_me(p2m)); > - > while ( todo ) > { > if ( hap_enabled(d) ) > @@ -217,6 +235,18 @@ int set_p2m_entry(struct p2m_domain *p2m > return rc; > } > > +void drop_p2m_gfn(struct p2m_domain *p2m, unsigned long gfn, > + unsigned long frame) > +{ > + mfn_t mfn = _mfn(frame); > + /* For non-translated domains, locks are never taken */ > + if ( !p2m || !paging_mode_translate(p2m->domain) ) > + return; > + if ( mfn_valid(mfn) ) > + put_page(mfn_to_page(mfn)); > + put_p2m_gfn(p2m, gfn); > +} > + > struct page_info *p2m_alloc_ptp(struct p2m_domain *p2m, unsigned long type) > { > struct page_info *pg; > @@ -262,12 +292,12 @@ int p2m_alloc_table(struct p2m_domain *p > unsigned long gfn = -1UL; > struct domain *d = p2m->domain; > > - p2m_lock(p2m); > + get_p2m_global(p2m); > > if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) != 0 ) > { > P2M_ERROR("p2m already allocated for this domain\n"); > - p2m_unlock(p2m); > + put_p2m_global(p2m); > return -EINVAL; > } > > @@ -283,7 +313,7 @@ int p2m_alloc_table(struct p2m_domain *p > > if ( p2m_top == NULL ) > { > - p2m_unlock(p2m); > + put_p2m_global(p2m); > return -ENOMEM; > } > > @@ -295,7 +325,7 @@ int p2m_alloc_table(struct p2m_domain *p > P2M_PRINTK("populating p2m table\n"); > > /* Initialise physmap tables for slot zero. Other code assumes this. */ > - p2m->defer_nested_flush = 1; > + atomic_set(&p2m->defer_nested_flush, 1); > if ( !set_p2m_entry(p2m, 0, _mfn(INVALID_MFN), 0, > p2m_invalid, p2m->default_access) ) > goto error; > @@ -323,10 +353,10 @@ int p2m_alloc_table(struct p2m_domain *p > } > spin_unlock(&p2m->domain->page_alloc_lock); > } > - p2m->defer_nested_flush = 0; > + atomic_set(&p2m->defer_nested_flush, 0); > > P2M_PRINTK("p2m table initialised (%u pages)\n", page_count); > - p2m_unlock(p2m); > + put_p2m_global(p2m); > return 0; > > error_unlock: > @@ -334,7 +364,7 @@ error_unlock: > error: > P2M_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%" > PRI_mfn "\n", gfn, mfn_x(mfn)); > - p2m_unlock(p2m); > + put_p2m_global(p2m); > return -ENOMEM; > } > > @@ -354,26 +384,28 @@ void p2m_teardown(struct p2m_domain *p2m > if (p2m == NULL) > return; > > + get_p2m_global(p2m); > + > #ifdef __x86_64__ > for ( gfn=0; gfn < p2m->max_mapped_pfn; gfn++ ) > { > - mfn = gfn_to_mfn_type_p2m(p2m, gfn, &t, &a, p2m_query, NULL); > + mfn = p2m->get_entry(p2m, gfn, &t, &a, p2m_query, NULL); > if ( mfn_valid(mfn) && (t == p2m_ram_shared) ) > { > ASSERT(!p2m_is_nestedp2m(p2m)); > + /* The p2m allows an exclusive global holder to recursively > + * lock sub-ranges. For this. */ > BUG_ON(mem_sharing_unshare_page(d, gfn, MEM_SHARING_DESTROY_GFN)); > } > > } > #endif > > - p2m_lock(p2m); > - > p2m->phys_table = pagetable_null(); > > while ( (pg = page_list_remove_head(&p2m->pages)) ) > d->arch.paging.free_page(d, pg); > - p2m_unlock(p2m); > + put_p2m_global(p2m); > } > > static void p2m_teardown_nestedp2m(struct domain *d) > @@ -401,6 +433,7 @@ void p2m_final_teardown(struct domain *d > } > > > +/* Locks held on entry */ > static void > p2m_remove_page(struct p2m_domain *p2m, unsigned long gfn, unsigned long mfn, > unsigned int page_order) > @@ -438,11 +471,11 @@ guest_physmap_remove_page(struct domain > unsigned long mfn, unsigned int page_order) > { > struct p2m_domain *p2m = p2m_get_hostp2m(d); > - p2m_lock(p2m); > + get_p2m(p2m, gfn, page_order); > audit_p2m(p2m, 1); > p2m_remove_page(p2m, gfn, mfn, page_order); > audit_p2m(p2m, 1); > - p2m_unlock(p2m); > + put_p2m(p2m, gfn, page_order); > } > > int > @@ -480,7 +513,7 @@ guest_physmap_add_entry(struct domain *d > if ( rc != 0 ) > return rc; > > - p2m_lock(p2m); > + get_p2m(p2m, gfn, page_order); > audit_p2m(p2m, 0); > > P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn); > @@ -488,12 +521,13 @@ guest_physmap_add_entry(struct domain *d > /* First, remove m->p mappings for existing p->m mappings */ > for ( i = 0; i < (1UL << page_order); i++ ) > { > - omfn = gfn_to_mfn_query(d, gfn + i, &ot); > + p2m_access_t a; > + omfn = p2m->get_entry(p2m, gfn + i, &ot, &a, p2m_query, NULL); > if ( p2m_is_grant(ot) ) > { > /* Really shouldn't be unmapping grant maps this way */ > + put_p2m(p2m, gfn, page_order); > domain_crash(d); > - p2m_unlock(p2m); > return -EINVAL; > } > else if ( p2m_is_ram(ot) ) > @@ -523,11 +557,12 @@ guest_physmap_add_entry(struct domain *d > && (ogfn != INVALID_M2P_ENTRY) > && (ogfn != gfn + i) ) > { > + p2m_access_t a; > /* This machine frame is already mapped at another physical > * address */ > P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n", > mfn + i, ogfn, gfn + i); > - omfn = gfn_to_mfn_query(d, ogfn, &ot); > + omfn = p2m->get_entry(p2m, ogfn, &ot, &a, p2m_query, NULL); > if ( p2m_is_ram(ot) ) > { > ASSERT(mfn_valid(omfn)); > @@ -567,7 +602,7 @@ guest_physmap_add_entry(struct domain *d > } > > audit_p2m(p2m, 1); > - p2m_unlock(p2m); > + put_p2m(p2m, gfn, page_order); > > return rc; > } > @@ -579,18 +614,19 @@ p2m_type_t p2m_change_type(struct domain > p2m_type_t ot, p2m_type_t nt) > { > p2m_type_t pt; > + p2m_access_t a; > mfn_t mfn; > struct p2m_domain *p2m = p2m_get_hostp2m(d); > > BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt)); > > - p2m_lock(p2m); > + get_p2m_gfn(p2m, gfn); > > - mfn = gfn_to_mfn_query(d, gfn, &pt); > + mfn = p2m->get_entry(p2m, gfn, &pt, &a, p2m_query, NULL); > if ( pt == ot ) > set_p2m_entry(p2m, gfn, mfn, 0, nt, p2m->default_access); > > - p2m_unlock(p2m); > + put_p2m_gfn(p2m, gfn); > > return pt; > } > @@ -608,20 +644,23 @@ void p2m_change_type_range(struct domain > > BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt)); > > - p2m_lock(p2m); > - p2m->defer_nested_flush = 1; > + atomic_set(&p2m->defer_nested_flush, 1); > > + /* We've been given a number instead of an order, so lock each > + * gfn individually */ > for ( gfn = start; gfn < end; gfn++ ) > { > - mfn = gfn_to_mfn_query(d, gfn, &pt); > + p2m_access_t a; > + get_p2m_gfn(p2m, gfn); > + mfn = p2m->get_entry(p2m, gfn, &pt, &a, p2m_query, NULL); > if ( pt == ot ) > set_p2m_entry(p2m, gfn, mfn, 0, nt, p2m->default_access); > + put_p2m_gfn(p2m, gfn); > } > > - p2m->defer_nested_flush = 0; > + atomic_set(&p2m->defer_nested_flush, 0); > if ( nestedhvm_enabled(d) ) > p2m_flush_nestedp2m(d); > - p2m_unlock(p2m); > } > > > @@ -631,17 +670,18 @@ set_mmio_p2m_entry(struct domain *d, uns > { > int rc = 0; > p2m_type_t ot; > + p2m_access_t a; > mfn_t omfn; > struct p2m_domain *p2m = p2m_get_hostp2m(d); > > if ( !paging_mode_translate(d) ) > return 0; > > - p2m_lock(p2m); > - omfn = gfn_to_mfn_query(d, gfn, &ot); > + get_p2m_gfn(p2m, gfn); > + omfn = p2m->get_entry(p2m, gfn, &ot, &a, p2m_query, NULL); > if ( p2m_is_grant(ot) ) > { > - p2m_unlock(p2m); > + put_p2m_gfn(p2m, gfn); > domain_crash(d); > return 0; > } > @@ -654,11 +694,11 @@ set_mmio_p2m_entry(struct domain *d, uns > P2M_DEBUG("set mmio %lx %lx\n", gfn, mfn_x(mfn)); > rc = set_p2m_entry(p2m, gfn, mfn, 0, p2m_mmio_direct, > p2m->default_access); > audit_p2m(p2m, 1); > - p2m_unlock(p2m); > + put_p2m_gfn(p2m, gfn); > if ( 0 == rc ) > gdprintk(XENLOG_ERR, > "set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n", > - mfn_x(gfn_to_mfn_query(d, gfn, &ot))); > + mfn_x(gfn_to_mfn_query_unlocked(d, gfn, &ot))); > return rc; > } > > @@ -668,13 +708,14 @@ clear_mmio_p2m_entry(struct domain *d, u > int rc = 0; > mfn_t mfn; > p2m_type_t t; > + p2m_access_t a; > struct p2m_domain *p2m = p2m_get_hostp2m(d); > > if ( !paging_mode_translate(d) ) > return 0; > > - p2m_lock(p2m); > - mfn = gfn_to_mfn_query(d, gfn, &t); > + get_p2m_gfn(p2m, gfn); > + mfn = p2m->get_entry(p2m, gfn, &t, &a, p2m_query, NULL); > > /* Do not use mfn_valid() here as it will usually fail for MMIO pages. */ > if ( (INVALID_MFN == mfn_x(mfn)) || (t != p2m_mmio_direct) ) > @@ -687,8 +728,7 @@ clear_mmio_p2m_entry(struct domain *d, u > audit_p2m(p2m, 1); > > out: > - p2m_unlock(p2m); > - > + put_p2m_gfn(p2m, gfn); > return rc; > } > > @@ -698,13 +738,14 @@ set_shared_p2m_entry(struct domain *d, u > struct p2m_domain *p2m = p2m_get_hostp2m(d); > int rc = 0; > p2m_type_t ot; > + p2m_access_t a; > mfn_t omfn; > > if ( !paging_mode_translate(p2m->domain) ) > return 0; > > - p2m_lock(p2m); > - omfn = gfn_to_mfn_query(p2m->domain, gfn, &ot); > + get_p2m_gfn(p2m, gfn); > + omfn = p2m->get_entry(p2m, gfn, &ot, &a, p2m_query, NULL); > /* At the moment we only allow p2m change if gfn has already been made > * sharable first */ > ASSERT(p2m_is_shared(ot)); > @@ -714,11 +755,11 @@ set_shared_p2m_entry(struct domain *d, u > > P2M_DEBUG("set shared %lx %lx\n", gfn, mfn_x(mfn)); > rc = set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_shared, p2m->default_access); > - p2m_unlock(p2m); > + put_p2m_gfn(p2m, gfn); > if ( 0 == rc ) > gdprintk(XENLOG_ERR, > "set_shared_p2m_entry: set_p2m_entry failed! mfn=%08lx\n", > - mfn_x(gfn_to_mfn_query(d, gfn, &ot))); > + mfn_x(gfn_to_mfn_query_unlocked(d, gfn, &ot))); > return rc; > } > > @@ -732,7 +773,7 @@ int p2m_mem_paging_nominate(struct domai > mfn_t mfn; > int ret; > > - p2m_lock(p2m); > + get_p2m_gfn(p2m, gfn); > > mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query, NULL); > > @@ -765,7 +806,7 @@ int p2m_mem_paging_nominate(struct domai > ret = 0; > > out: > - p2m_unlock(p2m); > + put_p2m_gfn(p2m, gfn); > return ret; > } > > @@ -778,7 +819,7 @@ int p2m_mem_paging_evict(struct domain * > struct p2m_domain *p2m = p2m_get_hostp2m(d); > int ret = -EINVAL; > > - p2m_lock(p2m); > + get_p2m_gfn(p2m, gfn); > > /* Get mfn */ > mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query, NULL); > @@ -824,7 +865,7 @@ int p2m_mem_paging_evict(struct domain * > put_page(page); > > out: > - p2m_unlock(p2m); > + put_p2m_gfn(p2m, gfn); > return ret; > } > > @@ -863,7 +904,7 @@ void p2m_mem_paging_populate(struct doma > req.type = MEM_EVENT_TYPE_PAGING; > > /* Fix p2m mapping */ > - p2m_lock(p2m); > + get_p2m_gfn(p2m, gfn); > mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query, NULL); > /* Allow only nominated or evicted pages to enter page-in path */ > if ( p2mt == p2m_ram_paging_out || p2mt == p2m_ram_paged ) > @@ -875,7 +916,7 @@ void p2m_mem_paging_populate(struct doma > set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_paging_in_start, a); > audit_p2m(p2m, 1); > } > - p2m_unlock(p2m); > + put_p2m_gfn(p2m, gfn); > > /* Pause domain if request came from guest and gfn has paging type */ > if ( p2m_is_paging(p2mt) && v->domain->domain_id == d->domain_id ) > @@ -908,7 +949,7 @@ int p2m_mem_paging_prep(struct domain *d > struct p2m_domain *p2m = p2m_get_hostp2m(d); > int ret = -ENOMEM; > > - p2m_lock(p2m); > + get_p2m_gfn(p2m, gfn); > > mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query, NULL); > > @@ -931,7 +972,7 @@ int p2m_mem_paging_prep(struct domain *d > ret = 0; > > out: > - p2m_unlock(p2m); > + put_p2m_gfn(p2m, gfn); > return ret; > } > > @@ -949,12 +990,12 @@ void p2m_mem_paging_resume(struct domain > /* Fix p2m entry if the page was not dropped */ > if ( !(rsp.flags & MEM_EVENT_FLAG_DROP_PAGE) ) > { > - p2m_lock(p2m); > + get_p2m_gfn(p2m, rsp.gfn); > mfn = p2m->get_entry(p2m, rsp.gfn, &p2mt, &a, p2m_query, NULL); > set_p2m_entry(p2m, rsp.gfn, mfn, 0, p2m_ram_rw, a); > set_gpfn_from_mfn(mfn_x(mfn), rsp.gfn); > audit_p2m(p2m, 1); > - p2m_unlock(p2m); > + put_p2m_gfn(p2m, rsp.gfn); > } > > /* Unpause domain */ > @@ -979,16 +1020,16 @@ void p2m_mem_access_check(unsigned long > p2m_access_t p2ma; > > /* First, handle rx2rw conversion automatically */ > - p2m_lock(p2m); > + get_p2m_gfn(p2m, gfn); > mfn = p2m->get_entry(p2m, gfn, &p2mt, &p2ma, p2m_query, NULL); > > if ( access_w && p2ma == p2m_access_rx2rw ) > { > p2m->set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2mt, p2m_access_rw); > - p2m_unlock(p2m); > + put_p2m_gfn(p2m, gfn); > return; > } > - p2m_unlock(p2m); > + put_p2m_gfn(p2m, gfn); > > /* Otherwise, check if there is a memory event listener, and send the > message along */ > res = mem_event_check_ring(d, &d->mem_access); > @@ -1006,9 +1047,9 @@ void p2m_mem_access_check(unsigned long > else > { > /* A listener is not required, so clear the access restrictions */ > - p2m_lock(p2m); > + get_p2m_gfn(p2m, gfn); > p2m->set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2mt, > p2m_access_rwx); > - p2m_unlock(p2m); > + put_p2m_gfn(p2m, gfn); > } > > return; > @@ -1064,7 +1105,7 @@ int p2m_set_mem_access(struct domain *d, > { > struct p2m_domain *p2m = p2m_get_hostp2m(d); > unsigned long pfn; > - p2m_access_t a; > + p2m_access_t a, _a; > p2m_type_t t; > mfn_t mfn; > int rc = 0; > @@ -1095,17 +1136,20 @@ int p2m_set_mem_access(struct domain *d, > return 0; > } > > - p2m_lock(p2m); > + /* Because we don't get an order, rather a number, we need to lock each > + * entry individually */ > for ( pfn = start_pfn; pfn < start_pfn + nr; pfn++ ) > { > - mfn = gfn_to_mfn_query(d, pfn, &t); > + get_p2m_gfn(p2m, pfn); > + mfn = p2m->get_entry(p2m, pfn, &t, &_a, p2m_query, NULL); > if ( p2m->set_entry(p2m, pfn, mfn, PAGE_ORDER_4K, t, a) == 0 ) > { > + put_p2m_gfn(p2m, pfn); > rc = -ENOMEM; > break; > } > + put_p2m_gfn(p2m, pfn); > } > - p2m_unlock(p2m); > return rc; > } > > @@ -1138,7 +1182,10 @@ int p2m_get_mem_access(struct domain *d, > return 0; > } > > + get_p2m_gfn(p2m, pfn); > mfn = p2m->get_entry(p2m, pfn, &t, &a, p2m_query, NULL); > + put_p2m_gfn(p2m, pfn); > + > if ( mfn_x(mfn) == INVALID_MFN ) > return -ESRCH; > > @@ -1175,7 +1222,7 @@ p2m_flush_table(struct p2m_domain *p2m) > struct domain *d = p2m->domain; > void *p; > > - p2m_lock(p2m); > + get_p2m_global(p2m); > > /* "Host" p2m tables can have shared entries &c that need a bit more > * care when discarding them */ > @@ -1203,7 +1250,7 @@ p2m_flush_table(struct p2m_domain *p2m) > d->arch.paging.free_page(d, pg); > page_list_add(top, &p2m->pages); > > - p2m_unlock(p2m); > + put_p2m_global(p2m); > } > > void > @@ -1245,7 +1292,7 @@ p2m_get_nestedp2m(struct vcpu *v, uint64 > p2m = nv->nv_p2m; > if ( p2m ) > { > - p2m_lock(p2m); > + get_p2m_global(p2m); > if ( p2m->cr3 == cr3 || p2m->cr3 == CR3_EADDR ) > { > nv->nv_flushp2m = 0; > @@ -1255,24 +1302,24 @@ p2m_get_nestedp2m(struct vcpu *v, uint64 > hvm_asid_flush_vcpu(v); > p2m->cr3 = cr3; > cpu_set(v->processor, p2m->p2m_dirty_cpumask); > - p2m_unlock(p2m); > + put_p2m_global(p2m); > nestedp2m_unlock(d); > return p2m; > } > - p2m_unlock(p2m); > + put_p2m_global(p2m); > } > > /* All p2m's are or were in use. Take the least recent used one, > * flush it and reuse. */ > p2m = p2m_getlru_nestedp2m(d, NULL); > p2m_flush_table(p2m); > - p2m_lock(p2m); > + get_p2m_global(p2m); > nv->nv_p2m = p2m; > p2m->cr3 = cr3; > nv->nv_flushp2m = 0; > hvm_asid_flush_vcpu(v); > cpu_set(v->processor, p2m->p2m_dirty_cpumask); > - p2m_unlock(p2m); > + put_p2m_global(p2m); > nestedp2m_unlock(d); > > return p2m; > diff -r 8a98179666de -r 471d4f2754d6 xen/include/asm-ia64/mm.h > --- a/xen/include/asm-ia64/mm.h > +++ b/xen/include/asm-ia64/mm.h > @@ -561,6 +561,11 @@ extern u64 translate_domain_pte(u64 ptev > ((get_gpfn_from_mfn((madr) >> PAGE_SHIFT) << PAGE_SHIFT) | \ > ((madr) & ~PAGE_MASK)) > > +/* Because x86-specific p2m fine-grained lock releases are called from common > + * code, we put a dummy placeholder here */ > +#define drop_p2m_gfn(p, g, m) ((void)0) > +#define drop_p2m_gfn_domain(p, g, m) ((void)0) > + > /* Internal use only: returns 0 in case of bad address. */ > extern unsigned long paddr_to_maddr(unsigned long paddr); > > diff -r 8a98179666de -r 471d4f2754d6 xen/include/asm-x86/p2m.h > --- a/xen/include/asm-x86/p2m.h > +++ b/xen/include/asm-x86/p2m.h > @@ -220,7 +220,7 @@ struct p2m_domain { > * tables on every host-p2m change. The setter of this flag > * is responsible for performing the full flush before releasing the > * host p2m's lock. */ > - int defer_nested_flush; > + atomic_t defer_nested_flush; > > /* Pages used to construct the p2m */ > struct page_list_head pages; > @@ -298,6 +298,15 @@ struct p2m_domain *p2m_get_p2m(struct vc > #define p2m_get_pagetable(p2m) ((p2m)->phys_table) > > > +/* No matter what value you get out of a query, the p2m has been locked for > + * that range. No matter what you do, you need to drop those locks. > + * You need to pass back the mfn obtained when locking, not the new one, > + * as the refcount of the original mfn was bumped. */ > +void drop_p2m_gfn(struct p2m_domain *p2m, unsigned long gfn, > + unsigned long mfn); > +#define drop_p2m_gfn_domain(d, g, m) \ > + drop_p2m_gfn(p2m_get_hostp2m((d)), (g), (m)) > + > /* Read a particular P2M table, mapping pages as we go. Most callers > * should _not_ call this directly; use the other gfn_to_mfn_* functions > * below unless you know you want to walk a p2m that isn't a domain's > @@ -327,6 +336,28 @@ static inline mfn_t gfn_to_mfn_type(stru > #define gfn_to_mfn_guest(d, g, t) gfn_to_mfn_type((d), (g), (t), p2m_guest) > #define gfn_to_mfn_unshare(d, g, t) gfn_to_mfn_type((d), (g), (t), > p2m_unshare) > > +/* This one applies to very specific situations in which you're querying > + * a p2m entry and will be done "immediately" (such as a printk or computing > a > + * return value). Use this only if there are no expectations of the p2m entry > + * holding steady. */ > +static inline mfn_t gfn_to_mfn_type_unlocked(struct domain *d, > + unsigned long gfn, p2m_type_t *t, > + p2m_query_t q) > +{ > + mfn_t mfn = gfn_to_mfn_type(d, gfn, t, q); > + drop_p2m_gfn_domain(d, gfn, mfn_x(mfn)); > + return mfn; > +} > + > +#define gfn_to_mfn_unlocked(d, g, t) \ > + gfn_to_mfn_type_unlocked((d), (g), (t), p2m_alloc) > +#define gfn_to_mfn_query_unlocked(d, g, t) \ > + gfn_to_mfn_type_unlocked((d), (g), (t), p2m_query) > +#define gfn_to_mfn_guest_unlocked(d, g, t) \ > + gfn_to_mfn_type_unlocked((d), (g), (t), p2m_guest) > +#define gfn_to_mfn_unshare_unlocked(d, g, t) \ > + gfn_to_mfn_type_unlocked((d), (g), (t), p2m_unshare) > + > /* Compatibility function exporting the old untyped interface */ > static inline unsigned long gmfn_to_mfn(struct domain *d, unsigned long gpfn) > { > @@ -338,6 +369,15 @@ static inline unsigned long gmfn_to_mfn( > return INVALID_MFN; > } > > +/* Same comments apply re unlocking */ > +static inline unsigned long gmfn_to_mfn_unlocked(struct domain *d, > + unsigned long gpfn) > +{ > + unsigned long mfn = gmfn_to_mfn(d, gpfn); > + drop_p2m_gfn_domain(d, gpfn, mfn); > + return mfn; > +} > + > /* General conversion function from mfn to gfn */ > static inline unsigned long mfn_to_gfn(struct domain *d, mfn_t mfn) > { > @@ -521,7 +561,8 @@ static inline int p2m_gfn_check_limit( > #define p2m_gfn_check_limit(d, g, o) 0 > #endif > > -/* Directly set a p2m entry: only for use by p2m code */ > +/* Directly set a p2m entry: only for use by p2m code. It expects locks to > + * be held on entry */ > int set_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, > unsigned int page_order, p2m_type_t p2mt, p2m_access_t > p2ma); > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@xxxxxxxxxxxxxxxxxxx > http://lists.xensource.com/xen-devel > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.