[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] x86/mm: Improve ring management for memory events. Do not lose guest events


  • To: xen-devel@xxxxxxxxxxxxxxxxxxx
  • From: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
  • Date: Wed, 11 Jan 2012 13:28:24 -0500
  • Cc: andres@xxxxxxxxxxxxxx, tim@xxxxxxx, olaf@xxxxxxxxx, adin@xxxxxxxxxxxxxx
  • Delivery-date: Wed, 11 Jan 2012 18:24:47 +0000
  • Domainkey-signature: a=rsa-sha1; c=nofws; d=lagarcavilla.org; h=content-type :mime-version:content-transfer-encoding:subject:message-id:date :from:to:cc; q=dns; s=lagarcavilla.org; b=Yg4+1IDlWg+tsAN+7FXnfB mJEBQZ8TvBQ+kYFVyaXNwsr71kRBxEbjVVKzczTNa3rf8EVkMSP1RSQLdGvUonau VQlP0lmC6lTWP0n+z06EaACKs7UcBzC8SVgJ9VByS1JRGnNf/OjdotWTtgilr3ML DDV2lK48oDhqjdOoor9BE=
  • List-id: Xen developer discussion <xen-devel.lists.xensource.com>

 xen/arch/x86/hvm/hvm.c          |   18 +-
 xen/arch/x86/mm/mem_event.c     |  298 +++++++++++++++++++++++++++++++++------
 xen/arch/x86/mm/mem_sharing.c   |   30 +--
 xen/arch/x86/mm/p2m.c           |   81 +++++-----
 xen/common/memory.c             |    7 +-
 xen/include/asm-x86/mem_event.h |   22 +-
 xen/include/asm-x86/p2m.h       |   12 +-
 xen/include/xen/mm.h            |    2 +
 xen/include/xen/sched.h         |   22 ++-
 9 files changed, 359 insertions(+), 133 deletions(-)


This patch is an amalgamation of the work done by Olaf Hering <olaf@xxxxxxxxx>
and our work.

It combines logic changes to simplify the memory event API, as well as
leveraging wait queues to deal with extreme conditions in which too many events 
are
generated by a guest vcpu.

In order to generate a new event, a slot in the ring is claimed. If a guest vcpu
is generating the event and there is no space, it is put on a wait queue. If a
foreign vcpu is generating the event and there is no space, the vcpu is expected
to retry its operation. If an error happens later, the function returns the
claimed slot via a cancel operation.

Thus, the API has only four calls: claim slot, cancel claimed slot, put request
in the ring, consume the response.

With all these mechanisms, no guest events are lost.
Our testing includes 1. ballooning down 512 MiBs; 2. using mem access n2rwx, in
which every page access in a four-vCPU guest results in an event, with no vCPU
pausing, and the four vCPUs touching all RAM. No guest events were lost in
either case, and qemu-dm had no mapping problems.

For full disclosure, here is Olaf's log of changes:

v7:
  - add ring accounting so that each vcpu can place at least one request

v6:
  - take foreign requests into account before calling wake_up_nr()
  - call wake_up_nr() outside of ring lock
 - rename ->bit to ->pause_flag
 - update comment in mem_event_enable()

v5:
  - rename mem_event_check_ring() to mem_event_claim_slot()
  - rename mem_event_put_req_producers() to mem_event_release_slot()
  - add local/foreign request accounting
  - keep room for at least one guest request

v4:
 - fix off-by-one bug in _mem_event_put_request
 - add mem_event_wake_requesters() and use wake_up_nr()
 - rename mem_event_mark_and_pause() and mem_event_mark_and_pause() functions
 - req_producers counts foreign request producers, rename member

v3:
 - rename ->mem_event_bit to ->bit
 - remove me_ from new VPF_ defines

v2:
 - p2m_mem_paging_populate: move vcpu_pause after put_request, otherwise the
   vcpu will not wake_up after a wait_event because the pause_count was
   increased twice. Fixes guest hangs.
 - update free space check in _mem_event_put_request()
 - simplify mem_event_put_request()


Signed-off-by: Adin Scannell <adin@xxxxxxxxxxx>
Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>

diff -r d3342b370b6f -r a85e7d46401b xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -4188,25 +4188,31 @@ static int hvm_memory_event_traps(long p
                                   unsigned long value, unsigned long old, 
                                   bool_t gla_valid, unsigned long gla) 
 {
+    int rc;
     struct vcpu* v = current;
     struct domain *d = v->domain;
     mem_event_request_t req;
-    int rc;
 
     if ( !(p & HVMPME_MODE_MASK) ) 
         return 0;
 
     if ( (p & HVMPME_onchangeonly) && (value == old) )
         return 1;
-    
-    rc = mem_event_check_ring(d, &d->mem_event->access);
-    if ( rc )
+
+    rc = mem_event_claim_slot(d, &d->mem_event->access);
+    if ( rc == -ENOSYS )
+    {
+        /* If there was no ring to handle the event, then
+         * simple continue executing normally. */
+        return 1;
+    }
+    else if ( rc < 0 )
         return rc;
-    
+
     memset(&req, 0, sizeof(req));
     req.type = MEM_EVENT_TYPE_ACCESS;
     req.reason = reason;
-    
+
     if ( (p & HVMPME_MODE_MASK) == HVMPME_mode_sync ) 
     {
         req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;    
diff -r d3342b370b6f -r a85e7d46401b xen/arch/x86/mm/mem_event.c
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -23,6 +23,7 @@
 
 #include <asm/domain.h>
 #include <xen/event.h>
+#include <xen/wait.h>
 #include <asm/p2m.h>
 #include <asm/mem_event.h>
 #include <asm/mem_paging.h>
@@ -41,6 +42,7 @@ static int mem_event_enable(
     struct domain *d,
     xen_domctl_mem_event_op_t *mec,
     struct mem_event_domain *med,
+    int pause_flag,
     xen_event_channel_notification_t notification_fn)
 {
     int rc;
@@ -93,6 +95,9 @@ static int mem_event_enable(
     put_gfn(dom_mem_event, ring_gfn);
     put_gfn(dom_mem_event, shared_gfn); 
 
+    /* Set the number of currently blocked vCPUs to 0. */
+    med->blocked = 0;
+
     /* Allocate event channel */
     rc = alloc_unbound_xen_event_channel(d->vcpu[0],
                                          current->domain->domain_id,
@@ -110,8 +115,11 @@ static int mem_event_enable(
 
     mem_event_ring_lock_init(med);
 
-    /* Wake any VCPUs paused for memory events */
-    mem_event_unpause_vcpus(d);
+    /* Save the pause flag for this particular ring. */
+    med->pause_flag = pause_flag;
+
+    /* Initialize the last-chance wait queue. */
+    init_waitqueue_head(&med->wq);
 
     return 0;
 
@@ -125,48 +133,218 @@ static int mem_event_enable(
     return rc;
 }
 
-static int mem_event_disable(struct mem_event_domain *med)
+static int mem_event_ring_available(struct mem_event_domain *med)
 {
-    unmap_domain_page(med->ring_page);
-    med->ring_page = NULL;
+    int avail_req = RING_FREE_REQUESTS(&med->front_ring);
+    avail_req -= med->target_producers;
+    avail_req -= med->foreign_producers;
 
-    unmap_domain_page(med->shared_page);
-    med->shared_page = NULL;
+    BUG_ON(avail_req < 0);
+
+    return avail_req;
+}
+
+/*
+ * mem_event_wake_requesters() will wakeup vcpus waiting for room in the
+ * ring. These vCPUs were paused on their way out after placing an event,
+ * but need to be resumed where the ring is capable of processing at least
+ * one event from them.
+ */
+static void mem_event_wake_blocked(struct domain *d, struct mem_event_domain 
*med)
+{
+    struct vcpu *v;
+    int online = d->max_vcpus;
+    int avail_req = mem_event_ring_available(med);
+
+    if( avail_req <= 0 || med->blocked == 0 )
+        return;
+
+    /*
+     * We ensure that we only have vCPUs online if there are enough free slots
+     * for their memory events to be processed.  This will ensure that no
+     * memory events are lost (due to the fact that certain types of events
+     * cannot be replayed, we need to ensure that there is space in the ring
+     * for when they are hit). 
+     * See comment below in mem_event_put_request().
+     */
+    for_each_vcpu ( d, v )
+        if ( test_bit(med->pause_flag, &v->pause_flags) )
+            online--;
+
+    ASSERT(online == (d->max_vcpus - med->blocked));
+
+    /* We remember which vcpu last woke up to avoid scanning always linearly
+     * from zero and starving higher-numbered vcpus under high load */
+    if ( d->vcpu )
+    {
+        int i, j, k;
+
+        for (i = med->last_vcpu_wake_up + 1, j = 0; j < d->max_vcpus; i++, j++)
+        {
+            k = i % d->max_vcpus;
+            v = d->vcpu[k];
+            if ( !v )
+                continue;
+
+            if ( !(med->blocked) || online >= avail_req )
+               break;
+
+            if ( test_and_clear_bit(med->pause_flag, &v->pause_flags) )
+            {
+                vcpu_unpause(v);
+                online++;
+                med->blocked--;
+                med->last_vcpu_wake_up = k;
+            }
+        }
+    }
+}
+
+/*
+ * In the event that a vCPU attempted to place an event in the ring and
+ * was unable to do so, it is queued on a wait queue.  These are woken as
+ * needed, and take precedence over the blocked vCPUs.
+ */
+static void mem_event_wake_queued(struct domain *d, struct mem_event_domain 
*med)
+{
+    int avail_req = mem_event_ring_available(med);
+
+    if ( avail_req > 0 )
+        wake_up_nr(&med->wq, avail_req);
+}
+
+/*
+ * mem_event_wake_waiters() will wakeup all vcpus waiting for the ring to
+ * become available.  If we have queued vCPUs, they get top priority. We
+ * are guaranteed that they will go through code paths that will eventually
+ * call mem_event_wake() again, ensuring that any blocked vCPUs will get
+ * unpaused once all the queued vCPUs have made it through.
+ */
+void mem_event_wake(struct domain *d, struct mem_event_domain *med)
+{
+    if (!list_empty(&med->wq.list))
+        mem_event_wake_queued(d, med);
+    else
+        mem_event_wake_blocked(d, med);
+}
+
+static int mem_event_disable(struct domain *d, struct mem_event_domain *med)
+{
+    if( med->ring_page )
+    {
+        struct vcpu *v;
+
+        mem_event_ring_lock(med);
+
+        if (!list_empty(&med->wq.list))
+        {
+            mem_event_ring_unlock(med);
+            return -EBUSY;
+        }
+
+        unmap_domain_page(med->ring_page);
+        med->ring_page = NULL;
+
+        unmap_domain_page(med->shared_page);
+        med->shared_page = NULL;
+
+        /* Wake up everybody */
+        wake_up_all(&med->wq);
+
+        /* Unblock all vCPUs */ 
+        for_each_vcpu ( d, v )
+        {
+            if ( test_and_clear_bit(med->pause_flag, &v->pause_flags) )
+            {
+                vcpu_unpause(v);
+                med->blocked--;
+            }
+        }
+
+        mem_event_ring_unlock(med);
+    }
 
     return 0;
 }
 
-void mem_event_put_request(struct domain *d, struct mem_event_domain *med, 
mem_event_request_t *req)
+static inline void mem_event_release_slot(struct domain *d,
+                                          struct mem_event_domain *med)
+{
+    /* Update the accounting */
+    if ( current->domain == d )
+        med->target_producers--;
+    else
+        med->foreign_producers--;
+
+    /* Kick any waiters */
+    mem_event_wake(d, med);
+}
+
+/*
+ * mem_event_mark_and_pause() tags vcpu and put it to sleep.
+ * The vcpu will resume execution in mem_event_wake_waiters().
+ */
+void mem_event_mark_and_pause(struct vcpu *v, struct mem_event_domain *med)
+{
+    if ( !test_and_set_bit(med->pause_flag, &v->pause_flags) )
+    {
+        vcpu_pause_nosync(v);
+        med->blocked++;
+    }
+}
+
+/*
+ * This must be preceeded by a call to claim_slot(), and is guaranteed to
+ * succeed.  As a side-effect however, the vCPU may be paused if the ring is
+ * overly full and its continued execution would cause stalling and excessive
+ * waiting.  The vCPU will be automatically unpaused when the ring clears.
+ */
+void mem_event_put_request(struct domain *d,
+                           struct mem_event_domain *med,
+                           mem_event_request_t *req)
 {
     mem_event_front_ring_t *front_ring;
+    int free_req, avail_req;
     RING_IDX req_prod;
 
-    mem_event_ring_lock(med);
-
-    front_ring = &med->front_ring;
-    req_prod = front_ring->req_prod_pvt;
-
     if ( current->domain != d )
     {
         req->flags |= MEM_EVENT_FLAG_FOREIGN;
         ASSERT( !(req->flags & MEM_EVENT_FLAG_VCPU_PAUSED) );
     }
 
+    mem_event_ring_lock(med);
+
+    /* Due to the reservations, this step must succeed. */
+    front_ring = &med->front_ring;
+    free_req = RING_FREE_REQUESTS(front_ring);
+    ASSERT(free_req > 0);
+
     /* Copy request */
+    req_prod = front_ring->req_prod_pvt;
     memcpy(RING_GET_REQUEST(front_ring, req_prod), req, sizeof(*req));
     req_prod++;
 
     /* Update ring */
-    med->req_producers--;
     front_ring->req_prod_pvt = req_prod;
     RING_PUSH_REQUESTS(front_ring);
 
+    /* We've actually *used* our reservation, so release the slot. */
+    mem_event_release_slot(d, med);
+
+    /* Give this vCPU a black eye if necessary, on the way out.
+     * See the comments above wake_blocked() for more information
+     * on how this mechanism works to avoid waiting. */
+    avail_req = mem_event_ring_available(med);
+    if( current->domain == d && avail_req < d->max_vcpus )
+        mem_event_mark_and_pause(current, med);
+
     mem_event_ring_unlock(med);
 
     notify_via_xen_event_channel(d, med->xen_port);
 }
 
-int mem_event_get_response(struct mem_event_domain *med, mem_event_response_t 
*rsp)
+int mem_event_get_response(struct domain *d, struct mem_event_domain *med, 
mem_event_response_t *rsp)
 {
     mem_event_front_ring_t *front_ring;
     RING_IDX rsp_cons;
@@ -190,57 +368,81 @@ int mem_event_get_response(struct mem_ev
     front_ring->rsp_cons = rsp_cons;
     front_ring->sring->rsp_event = rsp_cons + 1;
 
+    /* Kick any waiters -- since we've just consumed an event,
+     * there may be additional space available in the ring. */
+    mem_event_wake(d, med);
+
     mem_event_ring_unlock(med);
 
     return 1;
 }
 
-void mem_event_unpause_vcpus(struct domain *d)
-{
-    struct vcpu *v;
-
-    for_each_vcpu ( d, v )
-        if ( test_and_clear_bit(_VPF_mem_event, &v->pause_flags) )
-            vcpu_wake(v);
-}
-
-void mem_event_mark_and_pause(struct vcpu *v)
-{
-    set_bit(_VPF_mem_event, &v->pause_flags);
-    vcpu_sleep_nosync(v);
-}
-
-void mem_event_put_req_producers(struct mem_event_domain *med)
+void mem_event_cancel_slot(struct domain *d, struct mem_event_domain *med)
 {
     mem_event_ring_lock(med);
-    med->req_producers--;
+    mem_event_release_slot(d, med);
     mem_event_ring_unlock(med);
 }
 
-int mem_event_check_ring(struct domain *d, struct mem_event_domain *med)
+static int mem_event_grab_slot(struct mem_event_domain *med, int foreign)
 {
-    struct vcpu *curr = current;
-    int free_requests;
-    int ring_full = 1;
+    int avail_req;
 
     if ( !med->ring_page )
-        return -1;
+        return -ENOSYS;
 
     mem_event_ring_lock(med);
 
-    free_requests = RING_FREE_REQUESTS(&med->front_ring);
-    if ( med->req_producers < free_requests )
+    avail_req = mem_event_ring_available(med);
+    if( avail_req <= 0 )
     {
-        med->req_producers++;
-        ring_full = 0;
+        mem_event_ring_unlock(med);
+        return -EBUSY;
     }
 
-    if ( ring_full && (curr->domain == d) )
-        mem_event_mark_and_pause(curr);
+    if ( !foreign )
+        med->target_producers++;
+    else
+        med->foreign_producers++;
 
     mem_event_ring_unlock(med);
 
-    return ring_full;
+    return 0;
+}
+
+/* Simple try_grab wrapper for use in the wait_event() macro. */
+static int mem_event_wait_try_grab(struct mem_event_domain *med, int *rc)
+{
+    *rc = mem_event_grab_slot(med, 0);
+    return *rc;
+}
+
+/* Call mem_event_grab_slot() until the ring doesn't exist, or is available. */
+static int mem_event_wait_slot(struct mem_event_domain *med)
+{
+    int rc = -EBUSY;
+    wait_event(med->wq, mem_event_wait_try_grab(med, &rc) != -EBUSY);
+    return rc;
+}
+
+/*
+ * Determines whether or not the current vCPU belongs to the target domain,
+ * and calls the appropriate wait function.  If it is a guest vCPU, then we
+ * use mem_event_wait_slot() to reserve a slot.  As long as there is a ring,
+ * this function will always return 0 for a guest.  For a non-guest, we check
+ * for space and return -EBUSY if the ring is not available.
+ *
+ * Return codes: -ENOSYS: the ring is not yet configured
+ *               -EBUSY: the ring is busy
+ *               0: a spot has been reserved
+ *
+ */
+int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med)
+{
+    if ( current->domain == d )
+        return mem_event_wait_slot(med);
+    else
+        return mem_event_grab_slot(med, 1);
 }
 
 /* Registered with Xen-bound event channel for incoming notifications. */
@@ -316,14 +518,14 @@ int mem_event_domctl(struct domain *d, x
             if ( p2m->pod.entry_count )
                 break;
 
-            rc = mem_event_enable(d, mec, med, mem_paging_notification);
+            rc = mem_event_enable(d, mec, med, _VPF_mem_paging, 
mem_paging_notification);
         }
         break;
 
         case XEN_DOMCTL_MEM_EVENT_OP_PAGING_DISABLE:
         {
             if ( med->ring_page )
-                rc = mem_event_disable(med);
+                rc = mem_event_disable(d, med);
         }
         break;
 
@@ -355,14 +557,14 @@ int mem_event_domctl(struct domain *d, x
             if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
                 break;
 
-            rc = mem_event_enable(d, mec, med, mem_access_notification);
+            rc = mem_event_enable(d, mec, med, _VPF_mem_access, 
mem_access_notification);
         }
         break;
 
         case XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE:
         {
             if ( med->ring_page )
-                rc = mem_event_disable(med);
+                rc = mem_event_disable(d, med);
         }
         break;
 
diff -r d3342b370b6f -r a85e7d46401b xen/arch/x86/mm/mem_sharing.c
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -253,18 +253,10 @@ static void mem_sharing_audit(void)
 #endif
 
 
-static struct page_info* mem_sharing_alloc_page(struct domain *d, 
-                                                unsigned long gfn)
+static void mem_sharing_notify_helper(struct domain *d, unsigned long gfn)
 {
-    struct page_info* page;
     struct vcpu *v = current;
-    mem_event_request_t req;
-
-    page = alloc_domheap_page(d, 0); 
-    if(page != NULL) return page;
-
-    memset(&req, 0, sizeof(req));
-    req.type = MEM_EVENT_TYPE_SHARED;
+    mem_event_request_t req = { .type = MEM_EVENT_TYPE_SHARED };
 
     if ( v->domain != d )
     {
@@ -275,20 +267,21 @@ static struct page_info* mem_sharing_all
         gdprintk(XENLOG_ERR, 
                  "Failed alloc on unshare path for foreign (%d) lookup\n",
                  d->domain_id);
-        return page;
+        return;
     }
 
+    if (mem_event_claim_slot(d, &d->mem_event->share) < 0)
+    {
+        return;
+    }
+
+    req.flags = MEM_EVENT_FLAG_VCPU_PAUSED;
     vcpu_pause_nosync(v);
-    req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
-
-    if(mem_event_check_ring(d, &d->mem_event->share)) return page;
 
     req.gfn = gfn;
     req.p2mt = p2m_ram_shared;
     req.vcpu_id = v->vcpu_id;
     mem_event_put_request(d, &d->mem_event->share, &req);
-
-    return page;
 }
 
 unsigned int mem_sharing_get_nr_saved_mfns(void)
@@ -301,7 +294,7 @@ int mem_sharing_sharing_resume(struct do
     mem_event_response_t rsp;
 
     /* Get all requests off the ring */
-    while ( mem_event_get_response(&d->mem_event->share, &rsp) )
+    while ( mem_event_get_response(d, &d->mem_event->share, &rsp) )
     {
         if ( rsp.flags & MEM_EVENT_FLAG_DUMMY )
             continue;
@@ -658,13 +651,14 @@ gfn_found:
     if(ret == 0) goto private_page_found;
         
     old_page = page;
-    page = mem_sharing_alloc_page(d, gfn);
+    page = alloc_domheap_page(d, 0);
     if(!page) 
     {
         /* We've failed to obtain memory for private page. Need to re-add the
          * gfn_info to relevant list */
         list_add(&gfn_info->list, &hash_entry->gfns);
         put_gfn(d, gfn);
+        mem_sharing_notify_helper(d, gfn);
         shr_unlock();
         return -ENOMEM;
     }
diff -r d3342b370b6f -r a85e7d46401b xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -859,22 +859,26 @@ int p2m_mem_paging_evict(struct domain *
  * released by the guest. The pager is supposed to drop its reference of the
  * gfn.
  */
-void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn)
+int p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn)
 {
-    struct vcpu *v = current;
     mem_event_request_t req;
 
-    /* Check that there's space on the ring for this request */
-    if ( mem_event_check_ring(d, &d->mem_event->paging) == 0)
-    {
-        /* Send release notification to pager */
-        memset(&req, 0, sizeof(req));
-        req.flags |= MEM_EVENT_FLAG_DROP_PAGE;
-        req.gfn = gfn;
-        req.vcpu_id = v->vcpu_id;
+    /* We allow no ring in this unique case, because it won't affect
+     * correctness of the guest execution at this point.  If this is the only
+     * page that happens to be paged-out, we'll be okay..  but it's likely the
+     * guest will crash shortly anyways. */
+    int rc = mem_event_claim_slot(d, &d->mem_event->paging);
+    if ( rc < 0 )
+        return rc;
 
-        mem_event_put_request(d, &d->mem_event->paging, &req);
-    }
+    /* Send release notification to pager */
+    memset(&req, 0, sizeof(req));
+    req.type = MEM_EVENT_TYPE_PAGING;
+    req.gfn = gfn;
+    req.flags = MEM_EVENT_FLAG_DROP_PAGE;
+
+    mem_event_put_request(d, &d->mem_event->paging, &req);
+    return 0;
 }
 
 /**
@@ -898,7 +902,7 @@ void p2m_mem_paging_drop_page(struct dom
  * already sent to the pager. In this case the caller has to try again until 
the
  * gfn is fully paged in again.
  */
-void p2m_mem_paging_populate(struct domain *d, unsigned long gfn)
+int p2m_mem_paging_populate(struct domain *d, unsigned long gfn)
 {
     struct vcpu *v = current;
     mem_event_request_t req;
@@ -907,9 +911,17 @@ void p2m_mem_paging_populate(struct doma
     mfn_t mfn;
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
-    /* Check that there's space on the ring for this request */
-    if ( mem_event_check_ring(d, &d->mem_event->paging) )
-        return;
+    /* We're paging. There should be a ring */
+    int rc = mem_event_claim_slot(d, &d->mem_event->paging);
+    if ( rc == -ENOSYS )
+    {
+        gdprintk(XENLOG_ERR, "Domain %hu paging gfn %lx yet no ring "
+                             "in place\n", d->domain_id, gfn);
+        domain_crash(d);
+        return 0;
+    }
+    else if ( rc < 0 )
+        return rc;
 
     memset(&req, 0, sizeof(req));
     req.type = MEM_EVENT_TYPE_PAGING;
@@ -929,7 +941,7 @@ void p2m_mem_paging_populate(struct doma
     p2m_unlock(p2m);
 
     /* Pause domain if request came from guest and gfn has paging type */
-    if (  p2m_is_paging(p2mt) && v->domain == d )
+    if ( p2m_is_paging(p2mt) && v->domain == d )
     {
         vcpu_pause_nosync(v);
         req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
@@ -938,8 +950,8 @@ void p2m_mem_paging_populate(struct doma
     else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
     {
         /* gfn is already on its way back and vcpu is not paused */
-        mem_event_put_req_producers(&d->mem_event->paging);
-        return;
+        mem_event_cancel_slot(d, &d->mem_event->paging);
+        return 0;
     }
 
     /* Send request to pager */
@@ -948,6 +960,7 @@ void p2m_mem_paging_populate(struct doma
     req.vcpu_id = v->vcpu_id;
 
     mem_event_put_request(d, &d->mem_event->paging, &req);
+    return 0;
 }
 
 /**
@@ -1065,7 +1078,7 @@ void p2m_mem_paging_resume(struct domain
     mfn_t mfn;
 
     /* Pull all responses off the ring */
-    while( mem_event_get_response(&d->mem_event->paging, &rsp) )
+    while( mem_event_get_response(d, &d->mem_event->paging, &rsp) )
     {
         if ( rsp.flags & MEM_EVENT_FLAG_DUMMY )
             continue;
@@ -1090,9 +1103,6 @@ void p2m_mem_paging_resume(struct domain
         if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
             vcpu_unpause(d->vcpu[rsp.vcpu_id]);
     }
-
-    /* Unpause any domains that were paused because the ring was full */
-    mem_event_unpause_vcpus(d);
 }
 
 bool_t p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long 
gla, 
@@ -1103,7 +1113,6 @@ bool_t p2m_mem_access_check(unsigned lon
     unsigned long gfn = gpa >> PAGE_SHIFT;
     struct domain *d = v->domain;    
     struct p2m_domain* p2m = p2m_get_hostp2m(d);
-    int res;
     mfn_t mfn;
     p2m_type_t p2mt;
     p2m_access_t p2ma;
@@ -1126,17 +1135,16 @@ bool_t p2m_mem_access_check(unsigned lon
     p2m_unlock(p2m);
 
     /* Otherwise, check if there is a memory event listener, and send the 
message along */
-    res = mem_event_check_ring(d, &d->mem_event->access);
-    if ( res < 0 ) 
+    if ( mem_event_claim_slot(d, &d->mem_event->access) == -ENOSYS ) 
     {
         /* No listener */
         if ( p2m->access_required ) 
         {
-            printk(XENLOG_INFO 
-                   "Memory access permissions failure, no mem_event listener: 
pausing VCPU %d, dom %d\n",
-                   v->vcpu_id, d->domain_id);
-
-            mem_event_mark_and_pause(v);
+            gdprintk(XENLOG_INFO, "Memory access permissions failure, "
+                                  "no mem_event listener VCPU %d, dom %d\n",
+                                  v->vcpu_id, d->domain_id);
+            domain_crash(v->domain);
+            return 0;
         }
         else
         {
@@ -1149,11 +1157,7 @@ bool_t p2m_mem_access_check(unsigned lon
             }
             return 1;
         }
-
-        return 0;
     }
-    else if ( res > 0 )
-        return 0;  /* No space in buffer; VCPU paused */
 
     memset(&req, 0, sizeof(req));
     req.type = MEM_EVENT_TYPE_ACCESS;
@@ -1188,7 +1192,7 @@ void p2m_mem_access_resume(struct domain
     mem_event_response_t rsp;
 
     /* Pull all responses off the ring */
-    while( mem_event_get_response(&d->mem_event->access, &rsp) )
+    while( mem_event_get_response(d, &d->mem_event->access, &rsp) )
     {
         if ( rsp.flags & MEM_EVENT_FLAG_DUMMY )
             continue;
@@ -1196,13 +1200,8 @@ void p2m_mem_access_resume(struct domain
         if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
             vcpu_unpause(d->vcpu[rsp.vcpu_id]);
     }
-
-    /* Unpause any domains that were paused because the ring was full or no 
listener 
-     * was available */
-    mem_event_unpause_vcpus(d);
 }
 
-
 /* Set access type for a region of pfns.
  * If start_pfn == -1ul, sets the default access type */
 int p2m_set_mem_access(struct domain *d, unsigned long start_pfn, 
diff -r d3342b370b6f -r a85e7d46401b xen/common/memory.c
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -165,10 +165,13 @@ int guest_remove_page(struct domain *d, 
     mfn = mfn_x(get_gfn(d, gmfn, &p2mt)); 
     if ( unlikely(p2m_is_paging(p2mt)) )
     {
+        int rc; 
         guest_physmap_remove_page(d, gmfn, mfn, 0);
-        p2m_mem_paging_drop_page(d, gmfn);
+        rc = p2m_mem_paging_drop_page(d, gmfn);
         put_gfn(d, gmfn);
-        return 1;
+        /* drop_page returns zero on success, we return 1 on success.
+         * drop_page returns negative on error, never returns 1. */
+        return rc ? rc : 1;
     }
 #else
     mfn = gmfn_to_mfn(d, gmfn);
diff -r d3342b370b6f -r a85e7d46401b xen/include/asm-x86/mem_event.h
--- a/xen/include/asm-x86/mem_event.h
+++ b/xen/include/asm-x86/mem_event.h
@@ -24,18 +24,24 @@
 #ifndef __MEM_EVENT_H__
 #define __MEM_EVENT_H__
 
-/* Pauses VCPU while marking pause flag for mem event */
-void mem_event_mark_and_pause(struct vcpu *v);
-int mem_event_check_ring(struct domain *d, struct mem_event_domain *med);
-void mem_event_put_req_producers(struct mem_event_domain *med);
-void mem_event_put_request(struct domain *d, struct mem_event_domain *med, 
mem_event_request_t *req);
-int mem_event_get_response(struct mem_event_domain *med, mem_event_response_t 
*rsp);
-void mem_event_unpause_vcpus(struct domain *d);
+/* Returns 0 on success, -ENOSYS if there is no ring, -EBUSY if there is no
+ * available space. For success or -EBUSY, the vCPU may be left blocked
+ * temporarily to ensure that the ring does not lose future events.  In
+ * general, you must follow a claim_slot() call with either put_request() or
+ * cancel_slot(), both of which are guaranteed to succeed. */
+int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med);
+
+void mem_event_cancel_slot(struct domain *d, struct mem_event_domain *med);
+
+void mem_event_put_request(struct domain *d, struct mem_event_domain *med,
+                            mem_event_request_t *req);
+
+int mem_event_get_response(struct domain *d, struct mem_event_domain *med,
+                           mem_event_response_t *rsp);
 
 int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
                      XEN_GUEST_HANDLE(void) u_domctl);
 
-
 #endif /* __MEM_EVENT_H__ */
 
 
diff -r d3342b370b6f -r a85e7d46401b xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -478,18 +478,18 @@ int p2m_mem_paging_nominate(struct domai
 /* Evict a frame */
 int p2m_mem_paging_evict(struct domain *d, unsigned long gfn);
 /* Tell xenpaging to drop a paged out frame */
-void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn);
+int p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn);
 /* Start populating a paged out frame */
-void p2m_mem_paging_populate(struct domain *d, unsigned long gfn);
+int p2m_mem_paging_populate(struct domain *d, unsigned long gfn);
 /* Prepare the p2m for paging a frame in */
 int p2m_mem_paging_prep(struct domain *d, unsigned long gfn, uint64_t buffer);
 /* Resume normal operation (in case a domain was paused) */
 void p2m_mem_paging_resume(struct domain *d);
 #else
-static inline void p2m_mem_paging_drop_page(struct domain *d, unsigned long 
gfn)
-{ }
-static inline void p2m_mem_paging_populate(struct domain *d, unsigned long gfn)
-{ }
+static inline int p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn)
+{ return 0; }
+static inline int p2m_mem_paging_populate(struct domain *d, unsigned long gfn)
+{ return 0; }
 #endif
 
 #ifdef __x86_64__
diff -r d3342b370b6f -r a85e7d46401b xen/include/xen/mm.h
--- a/xen/include/xen/mm.h
+++ b/xen/include/xen/mm.h
@@ -318,6 +318,8 @@ page_list_splice(struct page_list_head *
 
 void scrub_one_page(struct page_info *);
 
+/* Returns 1 on success, 0 on error, negative if the ring
+ * for event propagation is full in the presence of paging */
 int guest_remove_page(struct domain *d, unsigned long gmfn);
 
 #define RAM_TYPE_CONVENTIONAL 0x00000001
diff -r d3342b370b6f -r a85e7d46401b xen/include/xen/sched.h
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -14,6 +14,7 @@
 #include <xen/nodemask.h>
 #include <xen/radix-tree.h>
 #include <xen/multicall.h>
+#include <xen/wait.h>
 #include <public/xen.h>
 #include <public/domctl.h>
 #include <public/sysctl.h>
@@ -183,7 +184,9 @@ struct mem_event_domain
 {
     /* ring lock */
     spinlock_t ring_lock;
-    unsigned int req_producers;
+    /* The ring has 64 entries */
+    unsigned char foreign_producers;
+    unsigned char target_producers;
     /* shared page */
     mem_event_shared_page_t *shared_page;
     /* shared ring page */
@@ -192,6 +195,14 @@ struct mem_event_domain
     mem_event_front_ring_t front_ring;
     /* event channel port (vcpu0 only) */
     int xen_port;
+    /* mem_event bit for vcpu->pause_flags */
+    int pause_flag;
+    /* list of vcpus waiting for room in the ring */
+    struct waitqueue_head wq;
+    /* the number of vCPUs blocked */
+    unsigned int blocked;
+    /* The last vcpu woken up */
+    unsigned int last_vcpu_wake_up;
 };
 
 struct mem_event_per_domain
@@ -615,9 +626,12 @@ static inline struct domain *next_domain
  /* VCPU affinity has changed: migrating to a new CPU. */
 #define _VPF_migrating       3
 #define VPF_migrating        (1UL<<_VPF_migrating)
- /* VCPU is blocked on memory-event ring. */
-#define _VPF_mem_event       4
-#define VPF_mem_event        (1UL<<_VPF_mem_event)
+ /* VCPU is blocked due to missing mem_paging ring. */
+#define _VPF_mem_paging      4
+#define VPF_mem_paging       (1UL<<_VPF_mem_paging)
+ /* VCPU is blocked due to missing mem_access ring. */
+#define _VPF_mem_access      5
+#define VPF_mem_access       (1UL<<_VPF_mem_access)
 
 static inline int vcpu_runnable(struct vcpu *v)
 {

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.