WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 5 of 9] Fine-grained concurrency control structure fo

To: xen-devel@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-devel] [PATCH 5 of 9] Fine-grained concurrency control structure for the p2m
From: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
Date: Thu, 27 Oct 2011 00:33:50 -0400
Cc: andres@xxxxxxxxxxxxxx, keir.xen@xxxxxxxxx, tim@xxxxxxx, olaf@xxxxxxxxx, adin@xxxxxxxxxxxxxx
Delivery-date: Thu, 27 Oct 2011 05:32:13 -0700
Dkim-signature: v=1; a=rsa-sha1; c=relaxed; d=lagarcavilla.org; h= content-type:mime-version:content-transfer-encoding:subject :message-id:in-reply-to:references:date:from:to:cc; s= lagarcavilla.org; bh=1VN9IzvdvQxjk8eImz03lsvG+30=; b=OxJjn+zguup DCCRu0YMHIpJaszipCK0GKEbPRsj3K5JqgRJ75zvY+QGOELEfegWR+v1v6Bt8CDr 7yGx6LSlOE6SHmLMrl2YUcQwI8x1W4d0d6J5CKColtzAX7uTEwpZ5NJszeR1WsHO Eel2LxGqEs7CThy+1hQfzPnnH0g+jWCQ=
Domainkey-signature: a=rsa-sha1; c=nofws; d=lagarcavilla.org; h=content-type :mime-version:content-transfer-encoding:subject:message-id :in-reply-to:references:date:from:to:cc; q=dns; s= lagarcavilla.org; b=KpxytCWkT5dzma4Pw9tHunZndfU+hdQjE2Cp9ExBNSMw D9QNFC/bv/E204LvbmAWwp5dSGVqrGz0pXQYRBf398A9KlPDdaM1gUD2t1/laWc5 KZb0r1GlZoxLQHA710cdJfIOfDqfBYk2zdmpF/qpGMsn7fcMKefcopXrKtWponI=
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <patchbomb.1319690025@xxxxxxxxxxxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <patchbomb.1319690025@xxxxxxxxxxxxxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Mercurial-patchbomb/1.8.4
 xen/arch/x86/mm/hap/private.h |    1 +
 xen/arch/x86/mm/mm-locks.h    |   20 +-
 xen/arch/x86/mm/p2m-ept.c     |    1 +
 xen/arch/x86/mm/p2m-lock.h    |  613 ++++++++++++++++++++++++++++++++++++++++++
 xen/arch/x86/mm/p2m-pod.c     |    1 +
 xen/arch/x86/mm/p2m-pt.c      |    1 +
 xen/arch/x86/mm/p2m.c         |   24 +-
 xen/include/asm-x86/p2m.h     |    3 +-
 8 files changed, 652 insertions(+), 12 deletions(-)


Introduce a fine-grained concurrency control structure for the p2m. This
allows for locking 2M-aligned chunks of the p2m at a time, exclusively.
Recursive locking is allowed. Global locking of the whole p2m is also
allowed for certain operations. Simple deadlock detection heuristics are
put in place.

Note the patch creates backwards-compatible shortcuts that will lock the
p2m globally. So it should remain functionally identical to what is currently
in place.

Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>

diff -r 981073d78f7f -r a23e1262b124 xen/arch/x86/mm/hap/private.h
--- a/xen/arch/x86/mm/hap/private.h
+++ b/xen/arch/x86/mm/hap/private.h
@@ -21,6 +21,7 @@
 #define __HAP_PRIVATE_H__
 
 #include "../mm-locks.h"
+#include "../p2m-lock.h"
 
 /********************************************/
 /*          GUEST TRANSLATION FUNCS         */
diff -r 981073d78f7f -r a23e1262b124 xen/arch/x86/mm/mm-locks.h
--- a/xen/arch/x86/mm/mm-locks.h
+++ b/xen/arch/x86/mm/mm-locks.h
@@ -146,14 +146,22 @@ declare_mm_lock(nestedp2m)
 
 /* P2M lock (per-p2m-table)
  * 
- * This protects all updates to the p2m table.  Updates are expected to
- * be safe against concurrent reads, which do *not* require the lock. */
+ * This protects all updates to the p2m table.
+ * 
+ * In 64 bit mode we disable this because the lock becomes fine-grained,
+ * and several code paths cause inversion/deadlock:
+ *   -- PoD sweeps
+ *   -- mem_sharing_unshare_page
+ *   -- generally widespread recursive locking, which we don't support
+ *      (yet, I guess) on an "external" mm lock. */
 
+#ifndef __x86_64__
 declare_mm_lock(p2m)
-#define p2m_lock(p)           mm_lock(p2m, &(p)->lock)
-#define p2m_lock_recursive(p) mm_lock_recursive(p2m, &(p)->lock)
-#define p2m_unlock(p)         mm_unlock(&(p)->lock)
-#define p2m_locked_by_me(p)   mm_locked_by_me(&(p)->lock)
+#define _p2m_lock(p)           mm_lock(p2m, &(p)->lock)
+#define _p2m_lock_recursive(p) mm_lock_recursive(p2m, &(p)->lock)
+#define _p2m_unlock(p)         mm_unlock(&(p)->lock)
+#define _p2m_locked_by_me(p)   mm_locked_by_me(&(p)->lock)
+#endif /* __x86_64__ */
 
 /* PoD lock (per-p2m-table)
  * 
diff -r 981073d78f7f -r a23e1262b124 xen/arch/x86/mm/p2m-ept.c
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -33,6 +33,7 @@
 #include <xen/softirq.h>
 
 #include "mm-locks.h"
+#include "p2m-lock.h"
 
 #define atomic_read_ept_entry(__pepte)                              \
     ( (ept_entry_t) { .epte = atomic_read64(&(__pepte)->epte) } )
diff -r 981073d78f7f -r a23e1262b124 xen/arch/x86/mm/p2m-lock.h
--- /dev/null
+++ b/xen/arch/x86/mm/p2m-lock.h
@@ -0,0 +1,613 @@
+/******************************************************************************
+ * arch/x86/mm/p2m-lock.h
+ *
+ * Fine-grained locking of the p2m. Allow for concurrent updates to different
+ * regions of the p2m. Serially synchronize updates and lookups. Mutex 
+ * access on p2m entries while a CPU is using them.
+ *
+ * Copyright (c) 2011 Andres Lagar-Cavilla, GridCentric Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _XEN_P2M_LOCK_H
+#define _XEN_P2M_LOCK_H
+
+#include <xen/config.h>
+#include <xen/lib.h>
+/* See comment about space consideration for spinlocks below */
+#define NDEBUG
+#undef LOCK_PROFILE
+#include <xen/spinlock.h>
+#include <asm/atomic.h>
+#include <xen/xmalloc.h>
+#include <xen/paging.h>
+#include <asm/page.h>
+#include <asm/p2m.h>
+#include "mm-locks.h"
+
+/* Rationale:
+ *
+ * The motivating scenario is one in which you have at least three CPUs 
+ * operating on likely disjoint regions of the p2m: a paging utility, a sharing
+ * utility, and the domU vcpu. With yet another p2m-heavy utility (mem 
+ * access?), and/or a migrate/remus utility, the number of CPUs operating
+ * on disjoint regions increases. Not to mention multi-vcpu domUs.
+ *
+ * Therefore, p2m concurrency control is achieved through a hierarchical 
+ * tree of locks, to allow all these CPUs to work without bothering each other.
+ * (Without disallowing any other cases such as single-vcpu domU)
+ *
+ * Leafs in the tree of locks are represented by spinlocks.
+ *
+ * Inner nodes (or uppper levels), are represented by a spinlock and a count.
+ * The count indicates how many CPUs are locking a node beneath. 
+ *
+ * A cpu holds a leaf by grabbing the spinlock, and not letting go of it. On 
its 
+ * way to the leaf, for each inner node, it grabs the spinlock, increases the 
+ * count, and releases the spinlock.
+ *
+ * Leaf levels are recursive, the same CPU can lock them again.
+ *
+ * A cpu holds an inner node in exclusive mode by busy-waiting until the count 
+ * is zero, grabbing the spinlock, and not letting go of it.
+ *
+ * Unlocks work by releasing the current spinlock, and working your way up:
+ * grab spinlock, decrease count, release.
+ *
+ * No locker can be preempted. For that reason, there are no atomic promotions:
+ * you would end up with promoters deadlocking on their way up the tree.
+ *
+ * Today, there are effectively two levels: the global lock (an inner node), 
and
+ * 2M locks, leaf locks for contiguous, aligned, 2M extents (akin to 
superpages).
+ *
+ * The global level can be held exclusively for big hammer operations such as
+ * log dirty (re)set.
+ *
+ * For non-global locking, the global lock is grabbed non-exclusively. At each 
+ * 1G boundary we allocate, if we hadn't before, the corresponding set of 512 
+ * 2M locks. Allocation of 2M locks is itself protected by a regular
+ * spinlock (this is rare enough). Allocation functions on-demand because
+ * we can't really know a priori the "total" size of the p2m.
+ *
+ * It is expected that every query or modification to the p2m will lock the 
+ * appropriate range. Leafs are recurisve for this reason: commonly you query 
a 
+ * range and then you modify it.
+ *
+ * Conversely, all callers of queries and modifications, once done, need to 
undo
+ * their locking.
+ * 
+ * Because we mimic the page table structure of a 512-radix tree, we run into 
+ * space considerations with the spinlocks in this tree. So we need to be 
careful
+ * about space.
+ *
+ * For 32bit code, we currently bail out and default to one big lock. Sorry 
Atom :(
+ *
+ * Also note that the p2m tree of locks is included in the ordering constraints
+ * enforced by mm-locks.h. It is treated as an "external" lock in that code.
+ *
+ */
+
+#define P2M_ORDER_GLOBAL    ~0U
+
+/* The 32 bit case serves as a concise summary of the external API */
+#ifndef __x86_64__
+/* For 32 bits we default to one big lock */
+typedef struct __p2m_lock {
+    mm_lock_t lock;
+} p2m_lock_t;
+
+static inline int p2m_lock_init(struct p2m_domain *p2m)
+{
+    p2m_lock_t *p2ml = xmalloc(p2m_lock_t);
+    if ( !p2ml )
+        return -ENOMEM;
+    mm_lock_init(&p2ml->lock);
+    p2m->lock = p2ml;
+    return 0;
+}
+
+static inline void get_p2m(struct p2m_domain *p2m, unsigned long gfn, unsigned 
int order)
+{
+    _p2m_lock(p2m->lock);
+}
+
+static inline void put_p2m(struct p2m_domain *p2m, unsigned long gfn, unsigned 
int order)
+{
+    _p2m_unlock(p2m->lock);
+}
+
+static inline void p2m_lock_destroy(struct p2m_domain *p2m)
+{
+    xfree(p2m->lock);
+    p2m->lock = NULL;
+}
+
+/* Backwards compatiblity */
+#define p2m_lock(p)             _p2m_lock((p)->lock)
+#define p2m_lock_recursive(p)   _p2m_lock_recursive((p)->lock)
+#define p2m_locked_by_me(p)     _p2m_locked_by_me((p)->lock)
+#define p2m_unlock(p)           _p2m_unlock((p)->lock)
+
+#else /* __x86_64__ */
+/* If we were to have inner locks (say 1G locks, then the space considerations
+ * outlined below for leaf locks would also apply here. */
+typedef struct p2m_inner_lock {
+    spinlock_t lock;
+    atomic_t   count;
+} p2m_inner_lock_t;
+
+static inline void init_p2m_inner_lock(p2m_inner_lock_t *inner)
+{
+    spin_lock_init(&inner->lock);
+    _atomic_set(inner->count, 0);
+}
+
+/* We cannot risk reusing the code in common/spinlock.c, because it may
+ * have been compiled with LOCK_DEBUG or LOCK_PROFILE. This is unfortunate. */
+static inline void lock_p2m_inner(p2m_inner_lock_t *inner)
+{
+    spin_lock(&inner->lock);
+}
+
+static inline void unlock_p2m_inner(p2m_inner_lock_t *inner)
+{
+    spin_unlock(&inner->lock);
+}
+
+static inline void get_p2m_inner(p2m_inner_lock_t *inner)
+{
+    lock_p2m_inner(inner);
+    atomic_inc(&inner->count);
+    unlock_p2m_inner(inner);
+}
+
+static inline void put_p2m_inner(p2m_inner_lock_t *inner)
+{
+    lock_p2m_inner(inner);
+    atomic_dec(&inner->count);
+    unlock_p2m_inner(inner);
+}
+
+/* XXX Consider starvation here */
+static inline void get_p2m_inner_exclusive(p2m_inner_lock_t *inner)
+{
+    int count;
+retry:
+    while (1)
+    {
+        mb();
+        count = atomic_read(&inner->count);
+        if ( count == 0 )
+            break;
+        cpu_relax();
+    }
+
+    spin_lock(&inner->lock);
+    mb();
+    count = atomic_read(&inner->count);
+    if ( count )
+    {
+        spin_unlock(&inner->lock);
+        goto retry;
+    }
+    /* We leave holding the spinlock */
+}
+
+static inline void put_p2m_inner_exclusive(p2m_inner_lock_t *inner)
+{
+    spin_unlock(&inner->lock);
+}
+
+/* Because we operate under page-table sizing constraints, we need to be 
+ * extremely conscious about the space we're taking up. So we become somewhat 
+ * re-inventers of the wheel, and we disable many things. */
+typedef struct p2m_leaf_lock {
+    raw_spinlock_t raw;
+    u16 recurse_cpu:12;
+    u16 recurse_cnt:4;
+/* Padding to confine each inner lock to its own word */
+#define LEAF_PAD   4
+    uint8_t             pad[LEAF_PAD];
+} __attribute__((packed)) p2m_leaf_lock_t;
+
+/* BUILD_BUG_ON(sizeof(p2m_leaf_lock_t) != sizeof(unsigned long)); */
+
+static inline void init_p2m_leaf_lock(p2m_leaf_lock_t *lock)
+{
+    *lock = (p2m_leaf_lock_t) { _RAW_SPIN_LOCK_UNLOCKED, 0xfffu, 0, { } };
+}
+
+static inline int __p2m_spin_trylock_recursive(p2m_leaf_lock_t *lock)
+{
+    int cpu = smp_processor_id();
+
+    if ( likely(lock->recurse_cpu != cpu) )
+    {
+        if ( !_raw_spin_trylock(&lock->raw) )
+            return 0;
+        preempt_disable();
+        lock->recurse_cpu = cpu;
+    }
+
+    lock->recurse_cnt++;
+    return 1;
+}
+
+static inline void lock_p2m_leaf(p2m_leaf_lock_t *lock)
+{
+    while ( !__p2m_spin_trylock_recursive(lock) )
+        cpu_relax();
+}
+
+static inline void unlock_p2m_leaf(p2m_leaf_lock_t *lock)
+{
+    if ( likely(--lock->recurse_cnt == 0) )
+    {
+        lock->recurse_cpu = 0xfffu;
+        preempt_enable();
+        _raw_spin_unlock(&lock->raw);
+    }
+}
+
+/* Deadlock book-keeping, see below */
+#define MAX_LOCK_DEPTH  16
+
+/* The lock structure */
+typedef struct __p2m_lock 
+{
+    /* To enforce ordering in mm-locks */
+    int unlock_level;
+    /* To protect on-demand allocation of locks 
+     * (yeah you heard that right) */
+    spinlock_t alloc_lock;
+    /* Global lock */
+    p2m_inner_lock_t global;
+    /* 2M locks. Allocate on demand: fun */
+    p2m_leaf_lock_t  **locks_2m;
+    /* Book-keeping for deadlock detection. Could be a per-cpu. */
+    unsigned long deadlock_guard[NR_CPUS][MAX_LOCK_DEPTH + 1];
+    uint8_t lock_depth[NR_CPUS];
+    /* Is anybody holding this exclusively */
+    unsigned int exclusive_holder;
+    /* Order of pages allocates for first level of locks_2m */
+    uint8_t order;
+} p2m_lock_t;
+
+#define EXCLUSIVE_CPU_NULL  ~0U
+
+/* Some deadlock book-keeping. Say CPU A holds a lock on range A, CPU B holds 
a 
+ * lock on range B. Now, CPU A wants to lock range B and vice-versa. Deadlock.
+ * We detect this by remembering the start of the current locked range.
+ * We keep a fairly small stack of guards (8), because we don't anticipate
+ * a great deal of recursive locking because (a) recursive locking is rare 
+ * (b) it is evil (c) only PoD seems to do it (is PoD therefore evil?) */
+
+#define DEADLOCK_NULL   ~0UL
+
+#define CURRENT_GUARD(l)    ((l)->deadlock_guard[current->processor] \
+                                [(l)->lock_depth[current->processor]])
+
+#define DEADLOCK_CHECK(cond, action, _f, _a...) \
+do {                                            \
+    if ( (cond) )                               \
+    {                                           \
+        printk(_f, ##_a);                       \
+        action;                                 \
+    }                                           \
+} while(0)
+
+static inline void push_guard(p2m_lock_t *p2ml, unsigned long gfn)
+{
+    int cpu = current->processor;
+
+    DEADLOCK_CHECK(((p2ml->lock_depth[cpu] + 1) > MAX_LOCK_DEPTH), 
+                    BUG(), "CPU %u exceeded deadlock depth\n", cpu);
+
+    p2ml->lock_depth[cpu]++;
+    p2ml->deadlock_guard[cpu][p2ml->lock_depth[cpu]] = gfn;
+}
+
+static inline void pop_guard(p2m_lock_t *p2ml)
+{
+    int cpu = current->processor;
+
+    DEADLOCK_CHECK((!p2ml->lock_depth[cpu] == 0), BUG(), 
+                    "CPU %u underflow deadlock depth\n", cpu);
+
+    p2ml->lock_depth[cpu]--;
+}
+
+static inline int p2m_lock_init(struct p2m_domain *p2m)
+{
+    unsigned int i;
+    p2m_lock_t *p2ml;
+
+    p2ml = xmalloc(p2m_lock_t);
+    if ( !p2ml ) 
+        return -ENOMEM;
+
+    memset(p2ml, 0, sizeof(p2m_lock_t));
+
+    spin_lock_init(&p2ml->alloc_lock);
+    init_p2m_inner_lock(&p2ml->global);
+
+    p2ml->locks_2m = alloc_xenheap_page();
+    if ( !p2ml->locks_2m )
+    {
+        xfree(p2ml);
+        return -ENOMEM;
+    }
+    memset(p2ml->locks_2m, 0, PAGE_SIZE);
+
+    for (i = 0; i < NR_CPUS; i++)
+        p2ml->deadlock_guard[i][0] = DEADLOCK_NULL;
+
+    p2ml->exclusive_holder = EXCLUSIVE_CPU_NULL;
+
+    p2m->lock = p2ml;
+    return 0;    
+}
+
+/* Conversion macros for aligned boundaries */
+#define gfn_to_superpage(g, o)      (((g) & (~((1 << (o)) - 1))) >> (o))
+#define gfn_to_1g_sp(gfn)           gfn_to_superpage(gfn, PAGE_ORDER_1G)
+#define gfn_to_2m_sp(gfn)           gfn_to_superpage(gfn, PAGE_ORDER_2M)
+#define gfn_1g_to_2m(gfn_1g)        ((gfn_1g) << (PAGE_ORDER_1G - 
PAGE_ORDER_2M))
+#define gfn_1g_to_last_2m(gfn_1g)   (gfn_1g_to_2m(gfn_1g) + \
+                                        ((1 << (PAGE_ORDER_1G - 
PAGE_ORDER_2M)) - 1))
+#define gfn_1g_to_4k(gfn_1g)        ((gfn_1g) << PAGE_ORDER_1G)
+#define gfn_1g_to_last_4k(gfn_1g)   (gfn_1g_to_4k(gfn_1g) + ((1 << 
PAGE_ORDER_1G) - 1))
+
+/* Global lock accessors. Global lock is our only "inner" node. */
+#define p2m_exclusive_locked_by_me(l)    \
+     ((l)->lock->exclusive_holder == current->processor)
+
+static inline void get_p2m_global_exclusive(struct p2m_domain *p2m)
+{
+    p2m_lock_t *p2ml = p2m->lock;
+    DEADLOCK_CHECK((CURRENT_GUARD(p2ml) != DEADLOCK_NULL), BUG(),
+                    "P2M DEADLOCK: cpu %u prev range start %lx trying 
global\n",
+                    (unsigned) current->processor, CURRENT_GUARD(p2ml)); 
+
+    get_p2m_inner_exclusive(&p2ml->global);
+    p2ml->exclusive_holder = current->processor;
+}
+
+static inline void put_p2m_global_exclusive(struct p2m_domain *p2m)
+{
+    p2m_lock_t *p2ml = p2m->lock;
+    p2ml->exclusive_holder = EXCLUSIVE_CPU_NULL;
+    put_p2m_inner_exclusive(&p2ml->global);
+}
+
+/* Not to be confused with shortcut for external use */
+static inline void __get_p2m_global(struct p2m_domain *p2m)
+{
+    get_p2m_inner(&p2m->lock->global);
+}
+
+/* Not to be confused with shortcut for external use */
+static inline void __put_p2m_global(struct p2m_domain *p2m)
+{
+    put_p2m_inner(&p2m->lock->global);
+}
+
+/* 2M lock accessors */
+static inline p2m_leaf_lock_t *__get_2m_lock(p2m_lock_t *p2ml,
+                            unsigned long gfn_1g, unsigned long gfn_2m)
+{
+    p2m_leaf_lock_t *lock_2m_l1;
+    BUG_ON(gfn_1g >= (1 << PAGETABLE_ORDER));
+    BUG_ON(gfn_2m >= (1 << PAGETABLE_ORDER));
+    lock_2m_l1 = p2ml->locks_2m[gfn_1g];
+    BUG_ON(lock_2m_l1 == NULL);
+    return (lock_2m_l1 + gfn_2m);
+}
+
+static inline void get_p2m_2m(struct p2m_domain *p2m, unsigned long gfn_1g,
+                                unsigned long gfn_2m)
+{
+    lock_p2m_leaf(__get_2m_lock(p2m->lock, gfn_1g, gfn_2m));
+}
+
+static inline void put_p2m_2m(struct p2m_domain *p2m, unsigned long gfn_1g,
+                                unsigned long gfn_2m)
+{
+    unlock_p2m_leaf(__get_2m_lock(p2m->lock, gfn_1g, gfn_2m));
+}
+
+/* Allocate 2M locks we may not have allocated yet for this 1G superpage */
+static inline int alloc_locks_2m(struct p2m_domain *p2m, unsigned long gfn_1g)
+{
+    p2m_lock_t *p2ml = p2m->lock;
+
+    /* With a single page for l1, we cover a gfn space of 512GB (39 bits)
+     * Given that current x86_64 processors physically address 40 bits,
+     * we're in no immediate danger of overflowing this table for a domU.
+     * If necessary, the l1 itself can grow subject to proper locking 
+     * on the p2ml->alloc_lock */
+
+    /* Quick test for common case */
+    if ( likely(p2ml->locks_2m[gfn_1g] != NULL) ) 
+        return 0;
+
+    spin_lock(&(p2ml->alloc_lock));
+
+    if ( likely(p2ml->locks_2m[gfn_1g] == NULL) )
+    {
+        unsigned long j;
+        p2m_leaf_lock_t *p = alloc_xenheap_page();
+        if ( !p ) 
+        {
+            spin_unlock(&(p2ml->alloc_lock));
+            return -ENOMEM;
+        }
+
+        for (j = 0; j < (1 << PAGETABLE_ORDER); j++)
+            init_p2m_leaf_lock(&p[j]);
+
+        p2ml->locks_2m[gfn_1g] = p;
+    }
+
+    spin_unlock(&(p2ml->alloc_lock));
+    return 0;
+}
+
+static inline unsigned long __get_last_gfn(unsigned long gfn, unsigned int 
order)
+{
+    /* Underflow */
+    unsigned long last_gfn = gfn + (1 << order) - 1;
+    BUG_ON(last_gfn < gfn);
+    return last_gfn;
+}
+
+static inline void get_p2m(struct p2m_domain *p2m, unsigned long gfn, unsigned 
int order)
+{
+    unsigned long last_gfn, first_1g, last_1g, first_2m, last_2m, i, j;
+    p2m_lock_t *p2ml = p2m->lock;
+
+    /* Holders of the p2m in exclusive mode can lock sub ranges. We make that 
a no-op.
+     * however, locking exclusively again is considered rude and tasteless. */
+    if ( (p2m_exclusive_locked_by_me(p2m)) && (order != P2M_ORDER_GLOBAL) )
+        return;
+        
+    DEADLOCK_CHECK(((CURRENT_GUARD(p2ml) != DEADLOCK_NULL) &&
+                    (CURRENT_GUARD(p2ml) > gfn)), WARN(),
+                    "P2M DEADLOCK: cpu %d prev range start %lx new range start 
%lx",
+                    current->processor, CURRENT_GUARD(p2ml), gfn);
+
+    preempt_disable();
+
+    if ( order == P2M_ORDER_GLOBAL ) {
+        get_p2m_global_exclusive(p2m);
+        goto get_p2m_out;
+    } 
+
+    __get_p2m_global(p2m);
+    /* We're non-preemptible. We've disallowed global p2m locking. We
+     * will now (allocate and) lock all relevant 2M leafs */
+
+    last_gfn    = __get_last_gfn(gfn, order);
+    first_1g    = gfn_to_1g_sp(gfn);
+    last_1g     = gfn_to_1g_sp(last_gfn);
+
+    for (i = first_1g; i <= last_1g; i++) 
+    {
+        first_2m    = (gfn_1g_to_4k(i) > gfn) ? gfn_1g_to_2m(i) : 
gfn_to_2m_sp(gfn);
+        last_2m     = min(gfn_to_2m_sp(last_gfn), gfn_1g_to_last_2m(i));
+
+        if ( alloc_locks_2m(p2m, i) )
+        {
+            /* There really isn't much we can do at this point */
+            panic("Fine-grained p2m locking failed to alloc 2M locks"
+                  " for 1G page %lx, domain %hu\n", i, p2m->domain->domain_id);
+        }
+
+        for (j = first_2m; j <= last_2m; j++)
+        {
+            get_p2m_2m(p2m, i, j & ((1 << PAGETABLE_ORDER) - 1));
+        }
+    }
+
+get_p2m_out:
+    push_guard(p2ml, gfn);
+}
+
+/* Conversely to the get method, we unlock all leafs pro-actively here */
+static inline void put_p2m(struct p2m_domain *p2m, unsigned long gfn, unsigned 
int order)
+{
+    unsigned long last_gfn, first_1g, last_1g, first_2m, last_2m, i, j;
+    p2m_lock_t *p2ml = p2m->lock;
+
+    last_gfn = __get_last_gfn(gfn, order);
+
+    /* See comment about exclusive holders recursively locking sub-ranges in 
get_p2m */
+    if ( (p2m_exclusive_locked_by_me(p2m)) && (order != P2M_ORDER_GLOBAL) )
+        return;
+
+    if ( order == P2M_ORDER_GLOBAL )
+    {
+        put_p2m_global_exclusive(p2m);
+        goto cleanup;
+    }
+
+    first_1g    = gfn_to_1g_sp(gfn);
+    last_1g     = gfn_to_1g_sp(last_gfn);
+
+    for (i = first_1g; i <= last_1g; i++) 
+    {
+        first_2m    = (gfn_1g_to_4k(i) > gfn) ? gfn_1g_to_2m(i) : 
gfn_to_2m_sp(gfn);
+        last_2m     = min(gfn_to_2m_sp(last_gfn), gfn_1g_to_last_2m(i));
+
+        for (j = first_2m; j <= last_2m; j++)
+        {
+            put_p2m_2m(p2m, i, j & ((1 << PAGETABLE_ORDER) - 1));
+        }
+    }
+
+    __put_p2m_global(p2m);
+    
+cleanup:
+    pop_guard(p2ml);
+    preempt_enable();
+}
+
+static inline void p2m_lock_destroy(struct p2m_domain *p2m)
+{
+    unsigned int i;
+    p2m_lock_t *p2ml = p2m->lock;
+
+    get_p2m_global_exclusive(p2m);
+
+    for (i = 0; i < (1 << PAGETABLE_ORDER); i++)
+        if ( p2ml->locks_2m[i] )
+            free_xenheap_page(p2ml->locks_2m[i]);
+
+    free_xenheap_page(p2ml->locks_2m);
+
+    put_p2m_global_exclusive(p2m);
+
+    xfree(p2ml);
+    p2m->lock = NULL;
+}
+
+/* Backwards compatibility */
+#define p2m_lock(p)             get_p2m((p), 0, P2M_ORDER_GLOBAL)
+#define p2m_unlock(p)           put_p2m((p), 0, P2M_ORDER_GLOBAL)
+#define p2m_locked_by_me(p)     p2m_exclusive_locked_by_me((p))
+/* There is no backwards compatibility for this, unless we make the 
+ * global lock recursive */
+#define p2m_lock_recursive(p)   ((void)0) 
+
+#endif /* __x86_64__ */
+
+/* Commonly-used shortcus */
+#define get_p2m_global(p2m)     get_p2m((p2m), 0, P2M_ORDER_GLOBAL)
+#define put_p2m_global(p2m)     put_p2m((p2m), 0, P2M_ORDER_GLOBAL)
+
+#define get_p2m_gfn(p2m, gfn)   get_p2m((p2m), (gfn), 0)
+#define put_p2m_gfn(p2m, gfn)   put_p2m((p2m), (gfn), 0)
+
+#endif /* _XEN_P2M_LOCK_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 981073d78f7f -r a23e1262b124 xen/arch/x86/mm/p2m-pod.c
--- a/xen/arch/x86/mm/p2m-pod.c
+++ b/xen/arch/x86/mm/p2m-pod.c
@@ -34,6 +34,7 @@
 #include <asm/hvm/svm/amd-iommu-proto.h>
 
 #include "mm-locks.h"
+#include "p2m-lock.h"
 
 /* Override macros from asm/page.h to make them work with mfn_t */
 #undef mfn_to_page
diff -r 981073d78f7f -r a23e1262b124 xen/arch/x86/mm/p2m-pt.c
--- a/xen/arch/x86/mm/p2m-pt.c
+++ b/xen/arch/x86/mm/p2m-pt.c
@@ -39,6 +39,7 @@
 #include <asm/hvm/svm/amd-iommu-proto.h>
 
 #include "mm-locks.h"
+#include "p2m-lock.h"
 
 /* Override macros from asm/page.h to make them work with mfn_t */
 #undef mfn_to_page
diff -r 981073d78f7f -r a23e1262b124 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -38,6 +38,7 @@
 #include <asm/hvm/svm/amd-iommu-proto.h>
 
 #include "mm-locks.h"
+#include "p2m-lock.h"
 
 /* turn on/off 1GB host page table support for hap, default on */
 static bool_t __read_mostly opt_hap_1gb = 1;
@@ -69,9 +70,12 @@ boolean_param("hap_2mb", opt_hap_2mb);
 
 
 /* Init the datastructures for later use by the p2m code */
-static void p2m_initialise(struct domain *d, struct p2m_domain *p2m)
+static int p2m_initialise(struct domain *d, struct p2m_domain *p2m)
 {
-    mm_lock_init(&p2m->lock);
+    if (p2m_lock_init(p2m))
+    {
+        return -ENOMEM;
+    }
     mm_lock_init(&p2m->pod.lock);
     INIT_LIST_HEAD(&p2m->np2m_list);
     INIT_PAGE_LIST_HEAD(&p2m->pages);
@@ -89,7 +93,7 @@ static void p2m_initialise(struct domain
     else
         p2m_pt_init(p2m);
 
-    return;
+    return 0;
 }
 
 static int
@@ -103,7 +107,11 @@ p2m_init_nestedp2m(struct domain *d)
         d->arch.nested_p2m[i] = p2m = xzalloc(struct p2m_domain);
         if (p2m == NULL)
             return -ENOMEM;
-        p2m_initialise(d, p2m);
+        if (p2m_initialise(d, p2m))
+        {
+            xfree(p2m);
+            return -ENOMEM;
+        }
         p2m->write_p2m_entry = nestedp2m_write_p2m_entry;
         list_add(&p2m->np2m_list, &p2m_get_hostp2m(d)->np2m_list);
     }
@@ -118,7 +126,11 @@ int p2m_init(struct domain *d)
     p2m_get_hostp2m(d) = p2m = xzalloc(struct p2m_domain);
     if ( p2m == NULL )
         return -ENOMEM;
-    p2m_initialise(d, p2m);
+    if (p2m_initialise(d, p2m))
+    {
+        xfree(p2m);
+        return -ENOMEM;
+    }
 
     /* Must initialise nestedp2m unconditionally
      * since nestedhvm_enabled(d) returns false here.
@@ -331,6 +343,7 @@ static void p2m_teardown_nestedp2m(struc
     uint8_t i;
 
     for (i = 0; i < MAX_NESTEDP2M; i++) {
+        p2m_lock_destroy(d->arch.nested_p2m[i]);
         xfree(d->arch.nested_p2m[i]);
         d->arch.nested_p2m[i] = NULL;
     }
@@ -338,6 +351,7 @@ static void p2m_teardown_nestedp2m(struc
 
 void p2m_final_teardown(struct domain *d)
 {
+    p2m_lock_destroy(d->arch.p2m); 
     /* Iterate over all p2m tables per domain */
     xfree(d->arch.p2m);
     d->arch.p2m = NULL;
diff -r 981073d78f7f -r a23e1262b124 xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -187,9 +187,10 @@ typedef enum {
 #define p2m_is_broken(_t)   (p2m_to_mask(_t) & P2M_BROKEN_TYPES)
 
 /* Per-p2m-table state */
+struct __p2m_lock;
 struct p2m_domain {
     /* Lock that protects updates to the p2m */
-    mm_lock_t          lock;
+    struct __p2m_lock *lock;
 
     /* Shadow translated domain: p2m mapping */
     pagetable_t        phys_table;

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>