Xen project Mailing List

[Xen-devel] [PATCH 2/3] x86/mem_sharing: replace use of page_lock/unlock with our own lock

From: Tamas K Lengyel <tamas@xxxxxxxxxxxxx>

Date: Thu, 25 Apr 2019 09:32:51 -0600

Cc: Tamas K Lengyel <tamas@xxxxxxxxxxxxx>, Wei Liu <wei.liu2@xxxxxxxxxx>, George Dunlap <george.dunlap@xxxxxxxxxxxxx>, Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Jan Beulich <jbeulich@xxxxxxxx>, Roger Pau Monne <roger.pau@xxxxxxxxxx>

Delivery-date: Thu, 25 Apr 2019 15:33:19 +0000

List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

The page_lock/unlock functions were designed to be working with PV pagetable updates. It's recycled use in mem_sharing is somewhat odd and results in unecessary complications. Replace it with a separate per-page lock. Signed-off-by: Tamas K Lengyel <tamas@xxxxxxxxxxxxx> Cc: George Dunlap <george.dunlap@xxxxxxxxxxxxx> Cc: Jan Beulich <jbeulich@xxxxxxxx> Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> Cc: Wei Liu <wei.liu2@xxxxxxxxxx> Cc: Roger Pau Monne <roger.pau@xxxxxxxxxx> --- xen/arch/x86/mm/mem_sharing.c | 138 ++++++++++++++-------------------- xen/include/asm-x86/mm.h | 5 +- 2 files changed, 60 insertions(+), 83 deletions(-) diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c index dfc279d371..f63fe9a415 100644 --- a/xen/arch/x86/mm/mem_sharing.c +++ b/xen/arch/x86/mm/mem_sharing.c @@ -57,7 +57,7 @@ static DEFINE_PER_CPU(pg_lock_data_t, __pld); #define RMAP_HASHTAB_SIZE \ ((PAGE_SIZE << RMAP_HASHTAB_ORDER) / sizeof(struct list_head)) #define RMAP_USES_HASHTAB(page) \ - ((page)->sharing->hash_table.flag == NULL) + ((page)->sharing.info->hash_table.flag == NULL) #define RMAP_HEAVY_SHARED_PAGE RMAP_HASHTAB_SIZE /* A bit of hysteresis. We don't want to be mutating between list and hash * table constantly. */ @@ -77,9 +77,9 @@ static void _free_pg_shared_info(struct rcu_head *head) static inline void audit_add_list(struct page_info *page) { - INIT_LIST_HEAD(&page->sharing->entry); + INIT_LIST_HEAD(&page->sharing.info->entry); spin_lock(&shr_audit_lock); - list_add_rcu(&page->sharing->entry, &shr_audit_list); + list_add_rcu(&page->sharing.info->entry, &shr_audit_list); spin_unlock(&shr_audit_lock); } @@ -88,14 +88,14 @@ static inline void page_sharing_dispose(struct page_info *page) { /* Unlikely given our thresholds, but we should be careful. */ if ( unlikely(RMAP_USES_HASHTAB(page)) ) - free_xenheap_pages(page->sharing->hash_table.bucket, - RMAP_HASHTAB_ORDER); + free_xenheap_pages(page->sharing.info->hash_table.bucket, + RMAP_HASHTAB_ORDER); spin_lock(&shr_audit_lock); - list_del_rcu(&page->sharing->entry); + list_del_rcu(&page->sharing.info->entry); spin_unlock(&shr_audit_lock); - INIT_RCU_HEAD(&page->sharing->rcu_head); - call_rcu(&page->sharing->rcu_head, _free_pg_shared_info); + INIT_RCU_HEAD(&page->sharing.info->rcu_head); + call_rcu(&page->sharing.info->rcu_head, _free_pg_shared_info); } #else @@ -105,27 +105,22 @@ static inline void page_sharing_dispose(struct page_info *page) { /* Unlikely given our thresholds, but we should be careful. */ if ( unlikely(RMAP_USES_HASHTAB(page)) ) - free_xenheap_pages(page->sharing->hash_table.bucket, - RMAP_HASHTAB_ORDER); - xfree(page->sharing); + free_xenheap_pages(page->sharing.info->hash_table.bucket, + RMAP_HASHTAB_ORDER); + xfree(page->sharing.info); } #endif /* MEM_SHARING_AUDIT */ -static inline int mem_sharing_page_lock(struct page_info *pg) +static inline void mem_sharing_page_lock(struct page_info *pg) { - int rc; pg_lock_data_t *pld = &(this_cpu(__pld)); page_sharing_mm_pre_lock(); - rc = page_lock(pg); - if ( rc ) - { - preempt_disable(); - page_sharing_mm_post_lock(&pld->mm_unlock_level, - &pld->recurse_count); - } - return rc; + write_lock(&pg->sharing.lock); + preempt_disable(); + page_sharing_mm_post_lock(&pld->mm_unlock_level, + &pld->recurse_count); } static inline void mem_sharing_page_unlock(struct page_info *pg) @@ -135,7 +130,7 @@ static inline void mem_sharing_page_unlock(struct page_info *pg) page_sharing_mm_unlock(pld->mm_unlock_level, &pld->recurse_count); preempt_enable(); - page_unlock(pg); + write_unlock(&pg->sharing.lock); } static inline shr_handle_t get_next_handle(void) @@ -172,7 +167,7 @@ static inline void rmap_init(struct page_info *page) { /* We always start off as a doubly linked list. */ - INIT_LIST_HEAD(&page->sharing->gfns); + INIT_LIST_HEAD(&page->sharing.info->gfns); } /* Exceedingly simple "hash function" */ @@ -194,7 +189,7 @@ rmap_list_to_hash_table(struct page_info *page) for ( i = 0; i < RMAP_HASHTAB_SIZE; i++ ) INIT_LIST_HEAD(b + i); - list_for_each_safe(pos, tmp, &page->sharing->gfns) + list_for_each_safe(pos, tmp, &page->sharing.info->gfns) { gfn_info_t *gfn_info = list_entry(pos, gfn_info_t, list); struct list_head *bucket = b + HASH(gfn_info->domain, gfn_info->gfn); @@ -202,8 +197,8 @@ rmap_list_to_hash_table(struct page_info *page) list_add(pos, bucket); } - page->sharing->hash_table.bucket = b; - page->sharing->hash_table.flag = NULL; + page->sharing.info->hash_table.bucket = b; + page->sharing.info->hash_table.flag = NULL; return 0; } @@ -212,9 +207,9 @@ static inline void rmap_hash_table_to_list(struct page_info *page) { unsigned int i; - struct list_head *bucket = page->sharing->hash_table.bucket; + struct list_head *bucket = page->sharing.info->hash_table.bucket; - INIT_LIST_HEAD(&page->sharing->gfns); + INIT_LIST_HEAD(&page->sharing.info->gfns); for ( i = 0; i < RMAP_HASHTAB_SIZE; i++ ) { @@ -222,7 +217,7 @@ rmap_hash_table_to_list(struct page_info *page) list_for_each_safe(pos, tmp, head) { list_del(pos); - list_add(pos, &page->sharing->gfns); + list_add(pos, &page->sharing.info->gfns); } } @@ -268,9 +263,9 @@ rmap_add(gfn_info_t *gfn_info, struct page_info *page) (void)rmap_list_to_hash_table(page); head = (RMAP_USES_HASHTAB(page)) ? - page->sharing->hash_table.bucket + + page->sharing.info->hash_table.bucket + HASH(gfn_info->domain, gfn_info->gfn) : - &page->sharing->gfns; + &page->sharing.info->gfns; INIT_LIST_HEAD(&gfn_info->list); list_add(&gfn_info->list, head); @@ -284,8 +279,8 @@ rmap_retrieve(uint16_t domain_id, unsigned long gfn, struct list_head *le, *head; head = (RMAP_USES_HASHTAB(page)) ? - page->sharing->hash_table.bucket + HASH(domain_id, gfn) : - &page->sharing->gfns; + page->sharing.info->hash_table.bucket + HASH(domain_id, gfn) : + &page->sharing.info->gfns; list_for_each(le, head) { @@ -322,8 +317,8 @@ static inline void rmap_seed_iterator(struct page_info *page, struct rmap_iterator *ri) { ri->curr = (RMAP_USES_HASHTAB(page)) ? - page->sharing->hash_table.bucket : - &page->sharing->gfns; + page->sharing.info->hash_table.bucket : + &page->sharing.info->gfns; ri->next = ri->curr->next; ri->bucket = 0; } @@ -332,8 +327,8 @@ static inline gfn_info_t * rmap_iterate(struct page_info *page, struct rmap_iterator *ri) { struct list_head *head = (RMAP_USES_HASHTAB(page)) ? - page->sharing->hash_table.bucket + ri->bucket : - &page->sharing->gfns; + page->sharing.info->hash_table.bucket + ri->bucket : + &page->sharing.info->gfns; retry: if ( ri->next == head) @@ -344,7 +339,7 @@ retry: if ( ri->bucket >= RMAP_HASHTAB_SIZE ) /* No more hash table buckets */ return NULL; - head = page->sharing->hash_table.bucket + ri->bucket; + head = page->sharing.info->hash_table.bucket + ri->bucket; ri->curr = head; ri->next = ri->curr->next; goto retry; @@ -398,12 +393,8 @@ static struct page_info* mem_sharing_lookup(unsigned long mfn) struct page_info* page = mfn_to_page(_mfn(mfn)); if ( page_get_owner(page) == dom_cow ) { - /* Count has to be at least two, because we're called - * with the mfn locked (1) and this is supposed to be - * a shared page (1). */ unsigned long t = read_atomic(&page->u.inuse.type_info); ASSERT((t & PGT_type_mask) == PGT_shared_page); - ASSERT((t & PGT_count_mask) >= 2); ASSERT(SHARED_M2P(get_gpfn_from_mfn(mfn))); return page; } @@ -437,14 +428,7 @@ static int audit(void) pg = pg_shared_info->pg; mfn = page_to_mfn(pg); - /* If we can't lock it, it's definitely not a shared page */ - if ( !mem_sharing_page_lock(pg) ) - { - MEM_SHARING_DEBUG("mfn %lx in audit list, but cannot be locked (%lx)!\n", - mfn_x(mfn), pg->u.inuse.type_info); - errors++; - continue; - } + mem_sharing_page_lock(pg); /* Check if the MFN has correct type, owner and handle. */ if ( (pg->u.inuse.type_info & PGT_type_mask) != PGT_shared_page ) @@ -472,7 +456,7 @@ static int audit(void) } /* Check we have a list */ - if ( (!pg->sharing) || !rmap_has_entries(pg) ) + if ( (!pg->sharing.info) || !rmap_has_entries(pg) ) { MEM_SHARING_DEBUG("mfn %lx shared, but empty gfn list!\n", mfn_x(mfn)); @@ -517,8 +501,8 @@ static int audit(void) put_domain(d); nr_gfns++; } - /* The type count has an extra ref because we have locked the page */ - if ( (nr_gfns + 1) != (pg->u.inuse.type_info & PGT_count_mask) ) + + if ( nr_gfns != (pg->u.inuse.type_info & PGT_count_mask) ) { MEM_SHARING_DEBUG("Mismatched counts for MFN=%lx." "nr_gfns in list %lu, in type_info %lx\n", @@ -622,6 +606,7 @@ static int page_make_sharable(struct domain *d, return -E2BIG; } + rwlock_init(&page->sharing.lock); page_set_owner(page, dom_cow); drop_dom_ref = !domain_adjust_tot_pages(d, -1); page_list_del(page, &d->page_list); @@ -648,11 +633,7 @@ static int page_make_private(struct domain *d, struct page_info *page) return -EBUSY; } - /* We can only change the type if count is one */ - /* Because we are locking pages individually, we need to drop - * the lock here, while the page is typed. We cannot risk the - * race of page_unlock and then put_page_type. */ - expected_type = (PGT_shared_page | PGT_validated | PGT_locked | 2); + expected_type = (PGT_shared_page | PGT_validated | 1); if ( page->u.inuse.type_info != expected_type ) { spin_unlock(&d->page_alloc_lock); @@ -688,10 +669,7 @@ static inline struct page_info *__grab_shared_page(mfn_t mfn) return NULL; pg = mfn_to_page(mfn); - /* If the page is not validated we can't lock it, and if it's - * not validated it's obviously not shared. */ - if ( !mem_sharing_page_lock(pg) ) - return NULL; + mem_sharing_page_lock(pg); if ( mem_sharing_lookup(mfn_x(mfn)) == NULL ) { @@ -720,8 +698,7 @@ static int debug_mfn(mfn_t mfn) page->u.inuse.type_info, page_get_owner(page)->domain_id); - /* -1 because the page is locked and that's an additional type ref */ - num_refs = ((int) (page->u.inuse.type_info & PGT_count_mask)) - 1; + num_refs = (int) (page->u.inuse.type_info & PGT_count_mask) ; mem_sharing_page_unlock(page); return num_refs; } @@ -792,7 +769,7 @@ static int nominate_page(struct domain *d, gfn_t gfn, gfn_x(gfn), mfn_x(mfn), d->domain_id); BUG(); } - *phandle = pg->sharing->handle; + *phandle = pg->sharing.info->handle; ret = 0; mem_sharing_page_unlock(pg); goto out; @@ -839,33 +816,30 @@ static int nominate_page(struct domain *d, gfn_t gfn, if ( ret ) goto out; - /* Now that the page is validated, we can lock it. There is no - * race because we're holding the p2m entry, so no one else + /* There is no race because we're holding the p2m entry, so no one else * could be nominating this gfn */ - ret = -ENOENT; - if ( !mem_sharing_page_lock(page) ) - goto out; + mem_sharing_page_lock(page); /* Initialize the shared state */ ret = -ENOMEM; - if ( (page->sharing = + if ( (page->sharing.info = xmalloc(struct page_sharing_info)) == NULL ) { /* Making a page private atomically unlocks it */ BUG_ON(page_make_private(d, page) != 0); goto out; } - page->sharing->pg = page; + page->sharing.info->pg = page; rmap_init(page); /* Create the handle */ - page->sharing->handle = get_next_handle(); + page->sharing.info->handle = get_next_handle(); /* Create the local gfn info */ if ( mem_sharing_gfn_alloc(page, d, gfn_x(gfn)) == NULL ) { - xfree(page->sharing); - page->sharing = NULL; + xfree(page->sharing.info); + page->sharing.info = NULL; BUG_ON(page_make_private(d, page) != 0); goto out; } @@ -879,7 +853,7 @@ static int nominate_page(struct domain *d, gfn_t gfn, /* Update m2p entry to SHARED_M2P_ENTRY */ set_gpfn_from_mfn(mfn_x(mfn), SHARED_M2P_ENTRY); - *phandle = page->sharing->handle; + *phandle = page->sharing.info->handle; audit_add_list(page); mem_sharing_page_unlock(page); ret = 0; @@ -949,14 +923,14 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh, ASSERT(cmfn_type == p2m_ram_shared); /* Check that the handles match */ - if ( spage->sharing->handle != sh ) + if ( spage->sharing.info->handle != sh ) { ret = XENMEM_SHARING_OP_S_HANDLE_INVALID; mem_sharing_page_unlock(secondpg); mem_sharing_page_unlock(firstpg); goto err_out; } - if ( cpage->sharing->handle != ch ) + if ( cpage->sharing.info->handle != ch ) { ret = XENMEM_SHARING_OP_C_HANDLE_INVALID; mem_sharing_page_unlock(secondpg); @@ -990,11 +964,11 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh, BUG_ON(set_shared_p2m_entry(d, gfn->gfn, smfn)); put_domain(d); } - ASSERT(list_empty(&cpage->sharing->gfns)); + ASSERT(list_empty(&cpage->sharing.info->gfns)); /* Clear the rest of the shared state */ page_sharing_dispose(cpage); - cpage->sharing = NULL; + cpage->sharing.info = NULL; mem_sharing_page_unlock(secondpg); mem_sharing_page_unlock(firstpg); @@ -1037,7 +1011,7 @@ int mem_sharing_add_to_physmap(struct domain *sd, unsigned long sgfn, shr_handle ASSERT(smfn_type == p2m_ram_shared); /* Check that the handles match */ - if ( spage->sharing->handle != sh ) + if ( spage->sharing.info->handle != sh ) goto err_unlock; /* Make sure the target page is a hole in the physmap. These are typically @@ -1155,7 +1129,7 @@ int __mem_sharing_unshare_page(struct domain *d, * before destroying the rmap. */ mem_sharing_gfn_destroy(page, d, gfn_info); page_sharing_dispose(page); - page->sharing = NULL; + page->sharing.info = NULL; atomic_dec(&nr_shared_mfns); } else diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h index 6faa563167..594de6148f 100644 --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -133,7 +133,10 @@ struct page_info * of sharing share the version they expect to. * This list is allocated and freed when a page is shared/unshared. */ - struct page_sharing_info *sharing; + struct { + struct page_sharing_info *info; + rwlock_t lock; + } sharing; }; /* Reference count and various PGC_xxx flags and fields. */ -- 2.20.1 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.