[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 2/3] x86/mem_sharing: replace use of page_lock/unlock with our own lock



The page_lock/unlock functions were designed to be working with PV pagetable
updates. It's recycled use in mem_sharing is somewhat odd and results in
unecessary complications. Replace it with a separate per-page lock.

Signed-off-by: Tamas K Lengyel <tamas@xxxxxxxxxxxxx>
Cc: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
Cc: Jan Beulich <jbeulich@xxxxxxxx>
Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Cc: Wei Liu <wei.liu2@xxxxxxxxxx>
Cc: Roger Pau Monne <roger.pau@xxxxxxxxxx>
---
 xen/arch/x86/mm/mem_sharing.c | 138 ++++++++++++++--------------------
 xen/include/asm-x86/mm.h      |   5 +-
 2 files changed, 60 insertions(+), 83 deletions(-)

diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
index dfc279d371..f63fe9a415 100644
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -57,7 +57,7 @@ static DEFINE_PER_CPU(pg_lock_data_t, __pld);
 #define RMAP_HASHTAB_SIZE   \
         ((PAGE_SIZE << RMAP_HASHTAB_ORDER) / sizeof(struct list_head))
 #define RMAP_USES_HASHTAB(page) \
-        ((page)->sharing->hash_table.flag == NULL)
+        ((page)->sharing.info->hash_table.flag == NULL)
 #define RMAP_HEAVY_SHARED_PAGE   RMAP_HASHTAB_SIZE
 /* A bit of hysteresis. We don't want to be mutating between list and hash
  * table constantly. */
@@ -77,9 +77,9 @@ static void _free_pg_shared_info(struct rcu_head *head)
 
 static inline void audit_add_list(struct page_info *page)
 {
-    INIT_LIST_HEAD(&page->sharing->entry);
+    INIT_LIST_HEAD(&page->sharing.info->entry);
     spin_lock(&shr_audit_lock);
-    list_add_rcu(&page->sharing->entry, &shr_audit_list);
+    list_add_rcu(&page->sharing.info->entry, &shr_audit_list);
     spin_unlock(&shr_audit_lock);
 }
 
@@ -88,14 +88,14 @@ static inline void page_sharing_dispose(struct page_info 
*page)
 {
     /* Unlikely given our thresholds, but we should be careful. */
     if ( unlikely(RMAP_USES_HASHTAB(page)) )
-        free_xenheap_pages(page->sharing->hash_table.bucket, 
-                            RMAP_HASHTAB_ORDER);
+        free_xenheap_pages(page->sharing.info->hash_table.bucket,
+                           RMAP_HASHTAB_ORDER);
 
     spin_lock(&shr_audit_lock);
-    list_del_rcu(&page->sharing->entry);
+    list_del_rcu(&page->sharing.info->entry);
     spin_unlock(&shr_audit_lock);
-    INIT_RCU_HEAD(&page->sharing->rcu_head);
-    call_rcu(&page->sharing->rcu_head, _free_pg_shared_info);
+    INIT_RCU_HEAD(&page->sharing.info->rcu_head);
+    call_rcu(&page->sharing.info->rcu_head, _free_pg_shared_info);
 }
 
 #else
@@ -105,27 +105,22 @@ static inline void page_sharing_dispose(struct page_info 
*page)
 {
     /* Unlikely given our thresholds, but we should be careful. */
     if ( unlikely(RMAP_USES_HASHTAB(page)) )
-        free_xenheap_pages(page->sharing->hash_table.bucket, 
-                            RMAP_HASHTAB_ORDER);
-    xfree(page->sharing);
+        free_xenheap_pages(page->sharing.info->hash_table.bucket,
+                           RMAP_HASHTAB_ORDER);
+    xfree(page->sharing.info);
 }
 
 #endif /* MEM_SHARING_AUDIT */
 
-static inline int mem_sharing_page_lock(struct page_info *pg)
+static inline void mem_sharing_page_lock(struct page_info *pg)
 {
-    int rc;
     pg_lock_data_t *pld = &(this_cpu(__pld));
 
     page_sharing_mm_pre_lock();
-    rc = page_lock(pg);
-    if ( rc )
-    {
-        preempt_disable();
-        page_sharing_mm_post_lock(&pld->mm_unlock_level, 
-                                  &pld->recurse_count);
-    }
-    return rc;
+    write_lock(&pg->sharing.lock);
+    preempt_disable();
+    page_sharing_mm_post_lock(&pld->mm_unlock_level,
+                              &pld->recurse_count);
 }
 
 static inline void mem_sharing_page_unlock(struct page_info *pg)
@@ -135,7 +130,7 @@ static inline void mem_sharing_page_unlock(struct page_info 
*pg)
     page_sharing_mm_unlock(pld->mm_unlock_level, 
                            &pld->recurse_count);
     preempt_enable();
-    page_unlock(pg);
+    write_unlock(&pg->sharing.lock);
 }
 
 static inline shr_handle_t get_next_handle(void)
@@ -172,7 +167,7 @@ static inline void
 rmap_init(struct page_info *page)
 {
     /* We always start off as a doubly linked list. */
-    INIT_LIST_HEAD(&page->sharing->gfns);
+    INIT_LIST_HEAD(&page->sharing.info->gfns);
 }
 
 /* Exceedingly simple "hash function" */
@@ -194,7 +189,7 @@ rmap_list_to_hash_table(struct page_info *page)
     for ( i = 0; i < RMAP_HASHTAB_SIZE; i++ )
         INIT_LIST_HEAD(b + i);
 
-    list_for_each_safe(pos, tmp, &page->sharing->gfns)
+    list_for_each_safe(pos, tmp, &page->sharing.info->gfns)
     {
         gfn_info_t *gfn_info = list_entry(pos, gfn_info_t, list);
         struct list_head *bucket = b + HASH(gfn_info->domain, gfn_info->gfn);
@@ -202,8 +197,8 @@ rmap_list_to_hash_table(struct page_info *page)
         list_add(pos, bucket);
     }
 
-    page->sharing->hash_table.bucket = b;
-    page->sharing->hash_table.flag   = NULL;
+    page->sharing.info->hash_table.bucket = b;
+    page->sharing.info->hash_table.flag   = NULL;
 
     return 0;
 }
@@ -212,9 +207,9 @@ static inline void
 rmap_hash_table_to_list(struct page_info *page)
 {
     unsigned int i;
-    struct list_head *bucket = page->sharing->hash_table.bucket;
+    struct list_head *bucket = page->sharing.info->hash_table.bucket;
 
-    INIT_LIST_HEAD(&page->sharing->gfns);
+    INIT_LIST_HEAD(&page->sharing.info->gfns);
 
     for ( i = 0; i < RMAP_HASHTAB_SIZE; i++ )
     {
@@ -222,7 +217,7 @@ rmap_hash_table_to_list(struct page_info *page)
         list_for_each_safe(pos, tmp, head)
         {
             list_del(pos);
-            list_add(pos, &page->sharing->gfns);
+            list_add(pos, &page->sharing.info->gfns);
         }
     }
 
@@ -268,9 +263,9 @@ rmap_add(gfn_info_t *gfn_info, struct page_info *page)
         (void)rmap_list_to_hash_table(page);
 
     head = (RMAP_USES_HASHTAB(page)) ?
-        page->sharing->hash_table.bucket + 
+        page->sharing.info->hash_table.bucket +
                             HASH(gfn_info->domain, gfn_info->gfn) :
-        &page->sharing->gfns;
+        &page->sharing.info->gfns;
 
     INIT_LIST_HEAD(&gfn_info->list);
     list_add(&gfn_info->list, head);
@@ -284,8 +279,8 @@ rmap_retrieve(uint16_t domain_id, unsigned long gfn,
     struct list_head *le, *head;
 
     head = (RMAP_USES_HASHTAB(page)) ?
-        page->sharing->hash_table.bucket + HASH(domain_id, gfn) :
-        &page->sharing->gfns;
+        page->sharing.info->hash_table.bucket + HASH(domain_id, gfn) :
+        &page->sharing.info->gfns;
 
     list_for_each(le, head)
     {
@@ -322,8 +317,8 @@ static inline void
 rmap_seed_iterator(struct page_info *page, struct rmap_iterator *ri)
 {
     ri->curr = (RMAP_USES_HASHTAB(page)) ?
-                page->sharing->hash_table.bucket :
-                &page->sharing->gfns;
+                page->sharing.info->hash_table.bucket :
+                &page->sharing.info->gfns;
     ri->next = ri->curr->next; 
     ri->bucket = 0;
 }
@@ -332,8 +327,8 @@ static inline gfn_info_t *
 rmap_iterate(struct page_info *page, struct rmap_iterator *ri)
 {
     struct list_head *head = (RMAP_USES_HASHTAB(page)) ?
-                page->sharing->hash_table.bucket + ri->bucket :
-                &page->sharing->gfns;
+                page->sharing.info->hash_table.bucket + ri->bucket :
+                &page->sharing.info->gfns;
 
 retry:
     if ( ri->next == head)
@@ -344,7 +339,7 @@ retry:
             if ( ri->bucket >= RMAP_HASHTAB_SIZE )
                 /* No more hash table buckets */
                 return NULL;
-            head = page->sharing->hash_table.bucket + ri->bucket;
+            head = page->sharing.info->hash_table.bucket + ri->bucket;
             ri->curr = head;
             ri->next = ri->curr->next;
             goto retry;
@@ -398,12 +393,8 @@ static struct page_info* mem_sharing_lookup(unsigned long 
mfn)
         struct page_info* page = mfn_to_page(_mfn(mfn));
         if ( page_get_owner(page) == dom_cow )
         {
-            /* Count has to be at least two, because we're called
-             * with the mfn locked (1) and this is supposed to be 
-             * a shared page (1). */
             unsigned long t = read_atomic(&page->u.inuse.type_info);
             ASSERT((t & PGT_type_mask) == PGT_shared_page);
-            ASSERT((t & PGT_count_mask) >= 2);
             ASSERT(SHARED_M2P(get_gpfn_from_mfn(mfn)));
             return page;
         }
@@ -437,14 +428,7 @@ static int audit(void)
         pg = pg_shared_info->pg;
         mfn = page_to_mfn(pg);
 
-        /* If we can't lock it, it's definitely not a shared page */
-        if ( !mem_sharing_page_lock(pg) )
-        {
-           MEM_SHARING_DEBUG("mfn %lx in audit list, but cannot be locked 
(%lx)!\n",
-                              mfn_x(mfn), pg->u.inuse.type_info);
-           errors++;
-           continue;
-        }
+        mem_sharing_page_lock(pg);
 
         /* Check if the MFN has correct type, owner and handle. */ 
         if ( (pg->u.inuse.type_info & PGT_type_mask) != PGT_shared_page )
@@ -472,7 +456,7 @@ static int audit(void)
         }
 
         /* Check we have a list */
-        if ( (!pg->sharing) || !rmap_has_entries(pg) )
+        if ( (!pg->sharing.info) || !rmap_has_entries(pg) )
         {
            MEM_SHARING_DEBUG("mfn %lx shared, but empty gfn list!\n",
                              mfn_x(mfn));
@@ -517,8 +501,8 @@ static int audit(void)
             put_domain(d);
             nr_gfns++;
         }
-        /* The type count has an extra ref because we have locked the page */
-        if ( (nr_gfns + 1) != (pg->u.inuse.type_info & PGT_count_mask) )
+
+        if ( nr_gfns != (pg->u.inuse.type_info & PGT_count_mask) )
         {
             MEM_SHARING_DEBUG("Mismatched counts for MFN=%lx."
                               "nr_gfns in list %lu, in type_info %lx\n",
@@ -622,6 +606,7 @@ static int page_make_sharable(struct domain *d,
         return -E2BIG;
     }
 
+    rwlock_init(&page->sharing.lock);
     page_set_owner(page, dom_cow);
     drop_dom_ref = !domain_adjust_tot_pages(d, -1);
     page_list_del(page, &d->page_list);
@@ -648,11 +633,7 @@ static int page_make_private(struct domain *d, struct 
page_info *page)
         return -EBUSY;
     }
 
-    /* We can only change the type if count is one */
-    /* Because we are locking pages individually, we need to drop
-     * the lock here, while the page is typed. We cannot risk the 
-     * race of page_unlock and then put_page_type. */
-    expected_type = (PGT_shared_page | PGT_validated | PGT_locked | 2);
+    expected_type = (PGT_shared_page | PGT_validated | 1);
     if ( page->u.inuse.type_info != expected_type )
     {
         spin_unlock(&d->page_alloc_lock);
@@ -688,10 +669,7 @@ static inline struct page_info *__grab_shared_page(mfn_t 
mfn)
         return NULL;
     pg = mfn_to_page(mfn);
 
-    /* If the page is not validated we can't lock it, and if it's  
-     * not validated it's obviously not shared. */
-    if ( !mem_sharing_page_lock(pg) )
-        return NULL;
+    mem_sharing_page_lock(pg);
 
     if ( mem_sharing_lookup(mfn_x(mfn)) == NULL )
     {
@@ -720,8 +698,7 @@ static int debug_mfn(mfn_t mfn)
             page->u.inuse.type_info,
             page_get_owner(page)->domain_id);
 
-    /* -1 because the page is locked and that's an additional type ref */
-    num_refs = ((int) (page->u.inuse.type_info & PGT_count_mask)) - 1;
+    num_refs = (int) (page->u.inuse.type_info & PGT_count_mask) ;
     mem_sharing_page_unlock(page);
     return num_refs;
 }
@@ -792,7 +769,7 @@ static int nominate_page(struct domain *d, gfn_t gfn,
                     gfn_x(gfn), mfn_x(mfn), d->domain_id);
             BUG();
         }
-        *phandle = pg->sharing->handle;
+        *phandle = pg->sharing.info->handle;
         ret = 0;
         mem_sharing_page_unlock(pg);
         goto out;
@@ -839,33 +816,30 @@ static int nominate_page(struct domain *d, gfn_t gfn,
     if ( ret ) 
         goto out;
 
-    /* Now that the page is validated, we can lock it. There is no 
-     * race because we're holding the p2m entry, so no one else 
+    /* There is no race because we're holding the p2m entry, so no one else
      * could be nominating this gfn */
-    ret = -ENOENT;
-    if ( !mem_sharing_page_lock(page) )
-        goto out;
+    mem_sharing_page_lock(page);
 
     /* Initialize the shared state */
     ret = -ENOMEM;
-    if ( (page->sharing = 
+    if ( (page->sharing.info =
             xmalloc(struct page_sharing_info)) == NULL )
     {
         /* Making a page private atomically unlocks it */
         BUG_ON(page_make_private(d, page) != 0);
         goto out;
     }
-    page->sharing->pg = page;
+    page->sharing.info->pg = page;
     rmap_init(page);
 
     /* Create the handle */
-    page->sharing->handle = get_next_handle();  
+    page->sharing.info->handle = get_next_handle();
 
     /* Create the local gfn info */
     if ( mem_sharing_gfn_alloc(page, d, gfn_x(gfn)) == NULL )
     {
-        xfree(page->sharing);
-        page->sharing = NULL;
+        xfree(page->sharing.info);
+        page->sharing.info = NULL;
         BUG_ON(page_make_private(d, page) != 0);
         goto out;
     }
@@ -879,7 +853,7 @@ static int nominate_page(struct domain *d, gfn_t gfn,
     /* Update m2p entry to SHARED_M2P_ENTRY */
     set_gpfn_from_mfn(mfn_x(mfn), SHARED_M2P_ENTRY);
 
-    *phandle = page->sharing->handle;
+    *phandle = page->sharing.info->handle;
     audit_add_list(page);
     mem_sharing_page_unlock(page);
     ret = 0;
@@ -949,14 +923,14 @@ static int share_pages(struct domain *sd, gfn_t sgfn, 
shr_handle_t sh,
     ASSERT(cmfn_type == p2m_ram_shared);
 
     /* Check that the handles match */
-    if ( spage->sharing->handle != sh )
+    if ( spage->sharing.info->handle != sh )
     {
         ret = XENMEM_SHARING_OP_S_HANDLE_INVALID;
         mem_sharing_page_unlock(secondpg);
         mem_sharing_page_unlock(firstpg);
         goto err_out;
     }
-    if ( cpage->sharing->handle != ch )
+    if ( cpage->sharing.info->handle != ch )
     {
         ret = XENMEM_SHARING_OP_C_HANDLE_INVALID;
         mem_sharing_page_unlock(secondpg);
@@ -990,11 +964,11 @@ static int share_pages(struct domain *sd, gfn_t sgfn, 
shr_handle_t sh,
         BUG_ON(set_shared_p2m_entry(d, gfn->gfn, smfn));
         put_domain(d);
     }
-    ASSERT(list_empty(&cpage->sharing->gfns));
+    ASSERT(list_empty(&cpage->sharing.info->gfns));
 
     /* Clear the rest of the shared state */
     page_sharing_dispose(cpage);
-    cpage->sharing = NULL;
+    cpage->sharing.info = NULL;
 
     mem_sharing_page_unlock(secondpg);
     mem_sharing_page_unlock(firstpg);
@@ -1037,7 +1011,7 @@ int mem_sharing_add_to_physmap(struct domain *sd, 
unsigned long sgfn, shr_handle
     ASSERT(smfn_type == p2m_ram_shared);
 
     /* Check that the handles match */
-    if ( spage->sharing->handle != sh )
+    if ( spage->sharing.info->handle != sh )
         goto err_unlock;
 
     /* Make sure the target page is a hole in the physmap. These are typically
@@ -1155,7 +1129,7 @@ int __mem_sharing_unshare_page(struct domain *d,
          * before destroying the rmap. */
         mem_sharing_gfn_destroy(page, d, gfn_info);
         page_sharing_dispose(page);
-        page->sharing = NULL;
+        page->sharing.info = NULL;
         atomic_dec(&nr_shared_mfns);
     }
     else
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index 6faa563167..594de6148f 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -133,7 +133,10 @@ struct page_info
          * of sharing share the version they expect to.
          * This list is allocated and freed when a page is shared/unshared.
          */
-        struct page_sharing_info *sharing;
+        struct {
+            struct page_sharing_info *info;
+            rwlock_t lock;
+        } sharing;
     };
 
     /* Reference count and various PGC_xxx flags and fields. */
-- 
2.20.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.