[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 1/2] gnttab: Introduce rwlock to protect updates to grant table state



Rename lock to maptrack_lock and use it to protect maptrack state.

The new rwlock is used to prevent readers from accessing
inconsistent grant table state such as current
version, partially initialized active table pages,
etc.

Signed-off-by: Matt Wilson <msw@xxxxxxxxxx>
[chegger: ported to xen-staging, split into multiple commits]
Signed-off-by: Christoph Egger <chegger@xxxxxxxxx>
Cc: Jan Beulich <jbeulich@xxxxxxxx>
Cc: Keir Fraser <keir@xxxxxxx>
---
 docs/misc/grant-tables.txt    |   28 ++++++++-
 xen/arch/x86/mm.c             |    4 +-
 xen/common/grant_table.c      |  135 ++++++++++++++++++++++++-----------------
 xen/include/xen/grant_table.h |    9 ++-
 4 files changed, 115 insertions(+), 61 deletions(-)

diff --git a/docs/misc/grant-tables.txt b/docs/misc/grant-tables.txt
index 19db4ec..c9ae8f2 100644
--- a/docs/misc/grant-tables.txt
+++ b/docs/misc/grant-tables.txt
@@ -74,7 +74,33 @@ is complete.
  matching map track entry is then removed, as if unmap had been invoked.
  These are not used by the transfer mechanism.
   map->domid         : owner of the mapped frame
-  map->ref_and_flags : grant reference, ro/rw, mapped for host or device access
+  map->ref           : grant reference
+  map->flags         : ro/rw, mapped for host or device access
+
+********************************************************************************
+ Locking
+ ~~~~~~~
+ Xen uses several locks to serialize access to the internal grant table state.
+
+  grant_table->lock          : rwlock used to prevent readers from accessing
+                               inconsistent grant table state such as current
+                               version, partially initialized active table 
pages,
+                               etc.
+  grant_table->maptrack_lock : spinlock used to protect the maptrack state
+
+ The primary lock for the grant table is a read/write spinlock. All
+ functions that access members of struct grant_table must acquire a
+ read lock around critical sections. Any modification to the members
+ of struct grant_table (e.g., nr_status_frames, nr_grant_frames,
+ active frames, etc.) must only be made if the write lock is
+ held. These elements are read-mostly, and read critical sections can
+ be large, which makes a rwlock a good choice.
+
+ The maptrack state is protected by its own spinlock. Any access (read
+ or write) of struct grant_table members that have a "maptrack_"
+ prefix must be made while holding the maptrack lock. The maptrack
+ state can be rapidly modified under some workloads, and the critical
+ sections are very small, thus we use a spinlock to protect them.
 
 
********************************************************************************
 
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 522c43d..37c13b1 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -4565,7 +4565,7 @@ int xenmem_add_to_physmap_one(
                 mfn = virt_to_mfn(d->shared_info);
             break;
         case XENMAPSPACE_grant_table:
-            spin_lock(&d->grant_table->lock);
+            write_lock(&d->grant_table->lock);
 
             if ( d->grant_table->gt_version == 0 )
                 d->grant_table->gt_version = 1;
@@ -4587,7 +4587,7 @@ int xenmem_add_to_physmap_one(
                     mfn = virt_to_mfn(d->grant_table->shared_raw[idx]);
             }
 
-            spin_unlock(&d->grant_table->lock);
+            write_unlock(&d->grant_table->lock);
             break;
         case XENMAPSPACE_gmfn_range:
         case XENMAPSPACE_gmfn:
diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
index 8fba923..018b5b6 100644
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -227,23 +227,23 @@ double_gt_lock(struct grant_table *lgt, struct 
grant_table *rgt)
 {
     if ( lgt < rgt )
     {
-        spin_lock(&lgt->lock);
-        spin_lock(&rgt->lock);
+        spin_lock(&lgt->maptrack_lock);
+        spin_lock(&rgt->maptrack_lock);
     }
     else
     {
         if ( lgt != rgt )
-            spin_lock(&rgt->lock);
-        spin_lock(&lgt->lock);
+            spin_lock(&rgt->maptrack_lock);
+        spin_lock(&lgt->maptrack_lock);
     }
 }
 
 static inline void
 double_gt_unlock(struct grant_table *lgt, struct grant_table *rgt)
 {
-    spin_unlock(&lgt->lock);
+    spin_unlock(&lgt->maptrack_lock);
     if ( lgt != rgt )
-        spin_unlock(&rgt->lock);
+        spin_unlock(&rgt->maptrack_lock);
 }
 
 static inline int
@@ -261,10 +261,10 @@ static inline void
 put_maptrack_handle(
     struct grant_table *t, int handle)
 {
-    spin_lock(&t->lock);
+    spin_lock(&t->maptrack_lock);
     maptrack_entry(t, handle).ref = t->maptrack_head;
     t->maptrack_head = handle;
-    spin_unlock(&t->lock);
+    spin_unlock(&t->maptrack_lock);
 }
 
 static inline int
@@ -276,7 +276,7 @@ get_maptrack_handle(
     struct grant_mapping *new_mt;
     unsigned int          new_mt_limit, nr_frames;
 
-    spin_lock(&lgt->lock);
+    spin_lock(&lgt->maptrack_lock);
 
     while ( unlikely((handle = __get_maptrack_handle(lgt)) == -1) )
     {
@@ -305,7 +305,7 @@ get_maptrack_handle(
                  nr_frames + 1);
     }
 
-    spin_unlock(&lgt->lock);
+    spin_unlock(&lgt->maptrack_lock);
 
     return handle;
 }
@@ -502,7 +502,7 @@ static int grant_map_exists(const struct domain *ld,
     const struct active_grant_entry *act;
     unsigned int ref, max_iter;
     
-    ASSERT(spin_is_locked(&rgt->lock));
+    ASSERT(rw_is_locked(&rgt->lock));
 
     max_iter = min(*ref_count + (1 << GNTTABOP_CONTINUATION_ARG_SHIFT),
                    nr_grant_entries(rgt));
@@ -623,7 +623,7 @@ __gnttab_map_grant_ref(
     }
 
     rgt = rd->grant_table;
-    spin_lock(&rgt->lock);
+    read_lock(&rgt->lock);
 
     if ( rgt->gt_version == 0 )
         PIN_FAIL(unlock_out, GNTST_general_error,
@@ -696,7 +696,7 @@ __gnttab_map_grant_ref(
 
     cache_flags = (shah->flags & (GTF_PAT | GTF_PWT | GTF_PCD) );
 
-    spin_unlock(&rgt->lock);
+    read_unlock(&rgt->lock);
 
     /* pg may be set, with a refcount included, from __get_paged_frame */
     if ( !pg )
@@ -831,7 +831,7 @@ __gnttab_map_grant_ref(
         put_page(pg);
     }
 
-    spin_lock(&rgt->lock);
+    read_lock(&rgt->lock);
 
     act = &active_entry(rgt, op->ref);
 
@@ -851,7 +851,7 @@ __gnttab_map_grant_ref(
         gnttab_clear_flag(_GTF_reading, status);
 
  unlock_out:
-    spin_unlock(&rgt->lock);
+    read_unlock(&rgt->lock);
     op->status = rc;
     put_maptrack_handle(lgt, handle);
     rcu_unlock_domain(rd);
@@ -891,28 +891,28 @@ __gnttab_unmap_common(
     ld = current->domain;
     lgt = ld->grant_table;
 
+    read_lock(&lgt->lock);
     op->frame = (unsigned long)(op->dev_bus_addr >> PAGE_SHIFT);
 
     if ( unlikely(op->handle >= lgt->maptrack_limit) )
     {
+        read_unlock(&lgt->lock);
         gdprintk(XENLOG_INFO, "Bad handle (%d).\n", op->handle);
         op->status = GNTST_bad_handle;
         return;
     }
 
+    read_unlock(&lgt->lock);
     op->map = &maptrack_entry(lgt, op->handle);
-    spin_lock(&lgt->lock);
 
     if ( unlikely(!op->map->flags) )
     {
-        spin_unlock(&lgt->lock);
         gdprintk(XENLOG_INFO, "Zero flags for handle (%d).\n", op->handle);
         op->status = GNTST_bad_handle;
         return;
     }
 
     dom = op->map->domid;
-    spin_unlock(&lgt->lock);
 
     if ( unlikely((rd = rcu_lock_domain_by_id(dom)) == NULL) )
     {
@@ -944,6 +944,7 @@ __gnttab_unmap_common(
     }
 
     op->rd = rd;
+    read_lock(&rgt->lock);
     act = &active_entry(rgt, op->map->ref);
 
     if ( op->frame == 0 )
@@ -1004,6 +1005,7 @@ __gnttab_unmap_common(
 
  unmap_out:
     double_gt_unlock(lgt, rgt);
+    read_unlock(&rgt->lock);
     op->status = rc;
     rcu_unlock_domain(rd);
 }
@@ -1033,8 +1035,8 @@ __gnttab_unmap_common_complete(struct gnttab_unmap_common 
*op)
 
     rcu_lock_domain(rd);
     rgt = rd->grant_table;
-    spin_lock(&rgt->lock);
 
+    read_lock(&rgt->lock);
     if ( rgt->gt_version == 0 )
         goto unmap_out;
 
@@ -1098,7 +1100,7 @@ __gnttab_unmap_common_complete(struct gnttab_unmap_common 
*op)
         gnttab_clear_flag(_GTF_reading, status);
 
  unmap_out:
-    spin_unlock(&rgt->lock);
+    read_unlock(&rgt->lock);
     if ( put_handle )
     {
         op->map->flags = 0;
@@ -1284,10 +1286,13 @@ gnttab_unpopulate_status_frames(struct domain *d, 
struct grant_table *gt)
     gt->nr_status_frames = 0;
 }
 
+/* Grow the grant table. The caller must hold the grant table's
+ * write lock before calling this function.
+ */
 int
 gnttab_grow_table(struct domain *d, unsigned int req_nr_frames)
 {
-    /* d's grant table lock must be held by the caller */
+    /* d's grant table write lock must be held by the caller */
 
     struct grant_table *gt = d->grant_table;
     unsigned int i;
@@ -1392,7 +1397,7 @@ gnttab_setup_table(
     }
 
     gt = d->grant_table;
-    spin_lock(&gt->lock);
+    write_lock(&gt->lock);
 
     if ( gt->gt_version == 0 )
         gt->gt_version = 1;
@@ -1420,7 +1425,7 @@ gnttab_setup_table(
     }
 
  out3:
-    spin_unlock(&gt->lock);
+    write_unlock(&gt->lock);
  out2:
     rcu_unlock_domain(d);
  out1:
@@ -1462,13 +1467,13 @@ gnttab_query_size(
         goto query_out_unlock;
     }
 
-    spin_lock(&d->grant_table->lock);
+    read_lock(&d->grant_table->lock);
 
     op.nr_frames     = nr_grant_frames(d->grant_table);
     op.max_nr_frames = max_grant_frames;
     op.status        = GNTST_okay;
 
-    spin_unlock(&d->grant_table->lock);
+    read_unlock(&d->grant_table->lock);
 
  
  query_out_unlock:
@@ -1494,7 +1499,7 @@ gnttab_prepare_for_transfer(
     union grant_combo   scombo, prev_scombo, new_scombo;
     int                 retries = 0;
 
-    spin_lock(&rgt->lock);
+    read_lock(&rgt->lock);
 
     if ( rgt->gt_version == 0 )
     {
@@ -1545,11 +1550,11 @@ gnttab_prepare_for_transfer(
         scombo = prev_scombo;
     }
 
-    spin_unlock(&rgt->lock);
+    read_unlock(&rgt->lock);
     return 1;
 
  fail:
-    spin_unlock(&rgt->lock);
+    read_unlock(&rgt->lock);
     return 0;
 }
 
@@ -1741,7 +1746,7 @@ gnttab_transfer(
         TRACE_1D(TRC_MEM_PAGE_GRANT_TRANSFER, e->domain_id);
 
         /* Tell the guest about its new page frame. */
-        spin_lock(&e->grant_table->lock);
+        read_lock(&e->grant_table->lock);
 
         if ( e->grant_table->gt_version == 1 )
         {
@@ -1759,7 +1764,7 @@ gnttab_transfer(
         shared_entry_header(e->grant_table, gop.ref)->flags |=
             GTF_transfer_completed;
 
-        spin_unlock(&e->grant_table->lock);
+        read_unlock(&e->grant_table->lock);
 
         rcu_unlock_domain(e);
 
@@ -1797,7 +1802,7 @@ __release_grant_for_copy(
     released_read = 0;
     released_write = 0;
 
-    spin_lock(&rgt->lock);
+    read_lock(&rgt->lock);
 
     act = &active_entry(rgt, gref);
     sha = shared_entry_header(rgt, gref);
@@ -1838,7 +1843,7 @@ __release_grant_for_copy(
         released_read = 1;
     }
 
-    spin_unlock(&rgt->lock);
+    read_unlock(&rgt->lock);
 
     if ( td != rd )
     {
@@ -1896,7 +1901,7 @@ __acquire_grant_for_copy(
 
     *page = NULL;
 
-    spin_lock(&rgt->lock);
+    read_lock(&rgt->lock);
 
     if ( rgt->gt_version == 0 )
         PIN_FAIL(unlock_out, GNTST_general_error,
@@ -1965,17 +1970,21 @@ __acquire_grant_for_copy(
                 PIN_FAIL(unlock_out_clear, GNTST_general_error,
                          "transitive grant referenced bad domain %d\n",
                          trans_domid);
-            spin_unlock(&rgt->lock);
+
+            /* __acquire_grant_for_copy() could take the read lock on
+             * the right table (if rd == td), so we have to drop the
+             * lock here and reacquire */
+            read_unlock(&rgt->lock);
 
             rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id,
                                           readonly, &grant_frame, page,
                                           &trans_page_off, &trans_length, 0);
 
-            spin_lock(&rgt->lock);
+            read_lock(&rgt->lock);
             if ( rc != GNTST_okay ) {
                 __fixup_status_for_copy_pin(act, status);
+                read_unlock(&rgt->lock);
                 rcu_unlock_domain(td);
-                spin_unlock(&rgt->lock);
                 return rc;
             }
 
@@ -1987,7 +1996,7 @@ __acquire_grant_for_copy(
             {
                 __fixup_status_for_copy_pin(act, status);
                 rcu_unlock_domain(td);
-                spin_unlock(&rgt->lock);
+                read_unlock(&rgt->lock);
                 put_page(*page);
                 return __acquire_grant_for_copy(rd, gref, ldom, readonly,
                                                 frame, page, page_off, length,
@@ -2055,7 +2064,7 @@ __acquire_grant_for_copy(
     *length = act->length;
     *frame = act->frame;
 
-    spin_unlock(&rgt->lock);
+    read_unlock(&rgt->lock);
     return rc;
  
  unlock_out_clear:
@@ -2067,7 +2076,7 @@ __acquire_grant_for_copy(
         gnttab_clear_flag(_GTF_reading, status);
 
  unlock_out:
-    spin_unlock(&rgt->lock);
+    read_unlock(&rgt->lock);
     return rc;
 }
 
@@ -2241,7 +2250,7 @@ 
gnttab_set_version(XEN_GUEST_HANDLE_PARAM(gnttab_set_version_t) uop)
     if ( gt->gt_version == op.version )
         goto out;
 
-    spin_lock(&gt->lock);
+    write_lock(&gt->lock);
     /* Make sure that the grant table isn't currently in use when we
        change the version number, except for the first 8 entries which
        are allowed to be in use (xenstore/xenconsole keeps them mapped).
@@ -2327,7 +2336,7 @@ 
gnttab_set_version(XEN_GUEST_HANDLE_PARAM(gnttab_set_version_t) uop)
     gt->gt_version = op.version;
 
 out_unlock:
-    spin_unlock(&gt->lock);
+    write_unlock(&gt->lock);
 
 out:
     op.version = gt->gt_version;
@@ -2383,7 +2392,7 @@ 
gnttab_get_status_frames(XEN_GUEST_HANDLE_PARAM(gnttab_get_status_frames_t) uop,
 
     op.status = GNTST_okay;
 
-    spin_lock(&gt->lock);
+    read_lock(&gt->lock);
 
     for ( i = 0; i < op.nr_frames; i++ )
     {
@@ -2392,7 +2401,7 @@ 
gnttab_get_status_frames(XEN_GUEST_HANDLE_PARAM(gnttab_get_status_frames_t) uop,
             op.status = GNTST_bad_virt_addr;
     }
 
-    spin_unlock(&gt->lock);
+    read_unlock(&gt->lock);
 out2:
     rcu_unlock_domain(d);
 out1:
@@ -2441,7 +2450,7 @@ __gnttab_swap_grant_ref(grant_ref_t ref_a, grant_ref_t 
ref_b)
     struct active_grant_entry *act;
     s16 rc = GNTST_okay;
 
-    spin_lock(&gt->lock);
+    read_lock(&gt->lock);
 
     /* Bounds check on the grant refs */
     if ( unlikely(ref_a >= nr_grant_entries(d->grant_table)))
@@ -2481,7 +2490,7 @@ __gnttab_swap_grant_ref(grant_ref_t ref_a, grant_ref_t 
ref_b)
     }
 
 out:
-    spin_unlock(&gt->lock);
+    read_unlock(&gt->lock);
 
     rcu_unlock_domain(d);
 
@@ -2501,8 +2510,19 @@ 
gnttab_swap_grant_ref(XEN_GUEST_HANDLE_PARAM(gnttab_swap_grant_ref_t) uop,
             return i;
         if ( unlikely(__copy_from_guest(&op, uop, 1)) )
             return -EFAULT;
-        op.status = __gnttab_swap_grant_ref(op.ref_a, op.ref_b);
-        if ( unlikely(__copy_field_to_guest(uop, &op, status)) )
+        if ( unlikely(op.ref_a == op.ref_b) )
+        {
+            /* noop, so avoid acquiring the same active entry
+             * twice in __gnttab_swap_grant_ref(), which would
+             * case a deadlock.
+             */
+            op.status = GNTST_okay;
+        }
+        else
+        {
+            op.status = __gnttab_swap_grant_ref(op.ref_a, op.ref_b);
+        }
+        if ( unlikely(__copy_to_guest_offset(uop, i, &op, 1)) )
             return -EFAULT;
         guest_handle_add_offset(uop, 1);
     }
@@ -2552,12 +2572,12 @@ static int __gnttab_cache_flush(gnttab_cache_flush_t 
*cflush,
 
     if ( d != owner )
     {
-        spin_lock(&owner->grant_table->lock);
+        read_lock(&owner->grant_table->lock);
 
         ret = grant_map_exists(d, owner->grant_table, mfn, ref_count);
         if ( ret != 0 )
         {
-            spin_unlock(&owner->grant_table->lock);
+            read_unlock(&owner->grant_table->lock);
             rcu_unlock_domain(d);
             put_page(page);
             return ret;
@@ -2577,7 +2597,7 @@ static int __gnttab_cache_flush(gnttab_cache_flush_t 
*cflush,
         ret = 0;
 
     if ( d != owner )
-        spin_unlock(&owner->grant_table->lock);
+        read_unlock(&owner->grant_table->lock);
     unmap_domain_page(v);
     put_page(page);
 
@@ -2793,7 +2813,8 @@ grant_table_create(
         goto no_mem_0;
 
     /* Simple stuff. */
-    spin_lock_init(&t->lock);
+    rwlock_init(&t->lock);
+    spin_lock_init(&t->maptrack_lock);
     t->nr_grant_frames = INITIAL_NR_GRANT_FRAMES;
 
     /* Active grant table. */
@@ -2900,7 +2921,7 @@ gnttab_release_mappings(
         }
 
         rgt = rd->grant_table;
-        spin_lock(&rgt->lock);
+        read_lock(&rgt->lock);
 
         act = &active_entry(rgt, ref);
         sha = shared_entry_header(rgt, ref);
@@ -2960,7 +2981,7 @@ gnttab_release_mappings(
         if ( act->pin == 0 )
             gnttab_clear_flag(_GTF_reading, status);
 
-        spin_unlock(&rgt->lock);
+        read_unlock(&rgt->lock);
 
         rcu_unlock_domain(rd);
 
@@ -2978,6 +2999,8 @@ grant_table_destroy(
 
     if ( t == NULL )
         return;
+
+    write_lock(&t->lock);
     
     for ( i = 0; i < nr_grant_frames(t); i++ )
         free_xenheap_page(t->shared_raw[i]);
@@ -2995,6 +3018,8 @@ grant_table_destroy(
         free_xenheap_page(t->status[i]);
     xfree(t->status);
 
+    write_unlock(&t->lock);
+
     xfree(t);
     d->grant_table = NULL;
 }
@@ -3008,7 +3033,7 @@ static void gnttab_usage_print(struct domain *rd)
     printk("      -------- active --------       -------- shared --------\n");
     printk("[ref] localdom mfn      pin          localdom gmfn     flags\n");
 
-    spin_lock(&gt->lock);
+    read_lock(&gt->lock);
 
     if ( gt->gt_version == 0 )
         goto out;
@@ -3057,7 +3082,7 @@ static void gnttab_usage_print(struct domain *rd)
     }
 
  out:
-    spin_unlock(&gt->lock);
+    read_unlock(&gt->lock);
 
     if ( first )
         printk("grant-table for remote domain:%5d ... "
diff --git a/xen/include/xen/grant_table.h b/xen/include/xen/grant_table.h
index 32f5786..ca49b41 100644
--- a/xen/include/xen/grant_table.h
+++ b/xen/include/xen/grant_table.h
@@ -82,8 +82,11 @@ struct grant_table {
     struct grant_mapping **maptrack;
     unsigned int          maptrack_head;
     unsigned int          maptrack_limit;
-    /* Lock protecting updates to active and shared grant tables. */
-    spinlock_t            lock;
+    /* Lock protecting the maptrack page list, head, and limit */
+    spinlock_t            maptrack_lock;
+    /* Lock protecting updates to grant table state (version, active
+       entry list, etc.) */
+    rwlock_t              lock;
     /* The defined versions are 1 and 2.  Set to 0 if we don't know
        what version to use yet. */
     unsigned              gt_version;
@@ -101,7 +104,7 @@ gnttab_release_mappings(
     struct domain *d);
 
 /* Increase the size of a domain's grant table.
- * Caller must hold d's grant table lock.
+ * Caller must hold d's grant table write lock.
  */
 int
 gnttab_grow_table(struct domain *d, unsigned int req_nr_frames);
-- 
1.7.9.5


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.