[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v8 4/5] x86/ioreq server: Asynchronously reset outstanding p2m_ioreq_server entries.



After an ioreq server has unmapped, the remaining p2m_ioreq_server
entries need to be reset back to p2m_ram_rw. This patch does this
asynchronously with the current p2m_change_entry_type_global()
interface.

This patch also disallows live migration, when there's still any
outstanding p2m_ioreq_server entry left. The core reason is our
current implementation of p2m_change_entry_type_global() can not
tell the state of p2m_ioreq_server entries(can not decide if an
entry is to be emulated or to be resynced).

Note: new field entry_count is introduced in struct p2m_domain,
to record the number of p2m_ioreq_server p2m page table entries.
One nature of these entries is that they only point to 4K sized
page frames, because all p2m_ioreq_server entries are originated
from p2m_ram_rw ones in p2m_change_type_one(). We do not need to
worry about the counting for 2M/1G sized pages.

Signed-off-by: Yu Zhang <yu.c.zhang@xxxxxxxxxxxxxxx>
---
Cc: Paul Durrant <paul.durrant@xxxxxxxxxx>
Cc: Jan Beulich <jbeulich@xxxxxxxx>
Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Cc: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
Cc: Jun Nakajima <jun.nakajima@xxxxxxxxx>
Cc: Kevin Tian <kevin.tian@xxxxxxxxx>

changes in v4: 
  - According to comments from Jan: use ASSERT() instead of 'if'
    condition in p2m_change_type_one().
  - According to comments from Jan: commit message changes to mention
    the p2m_ioreq_server are all based on 4K sized pages.

changes in v3: 
  - Move the synchronously resetting logic into patch 5.
  - According to comments from Jan: introduce p2m_check_changeable()
    to clarify the p2m type change code.
  - According to comments from George: use locks in the same order
    to avoid deadlock, call p2m_change_entry_type_global() after unmap
    of the ioreq server is finished.

changes in v2: 
  - Move the calculation of ioreq server page entry_cout into 
    p2m_change_type_one() so that we do not need a seperate lock.
    Note: entry_count is also calculated in resolve_misconfig()/
    do_recalc(), fortunately callers of both routines have p2m 
    lock protected already.
  - Simplify logic in hvmop_set_mem_type().
  - Introduce routine p2m_finish_type_change() to walk the p2m 
    table and do the p2m reset.
---
 xen/arch/x86/hvm/ioreq.c  |  8 ++++++++
 xen/arch/x86/mm/hap/hap.c |  9 +++++++++
 xen/arch/x86/mm/p2m-ept.c |  8 +++++++-
 xen/arch/x86/mm/p2m-pt.c  | 13 +++++++++++--
 xen/arch/x86/mm/p2m.c     | 20 ++++++++++++++++++++
 xen/include/asm-x86/p2m.h |  9 ++++++++-
 6 files changed, 63 insertions(+), 4 deletions(-)

diff --git a/xen/arch/x86/hvm/ioreq.c b/xen/arch/x86/hvm/ioreq.c
index fcb9f38..c129eb4 100644
--- a/xen/arch/x86/hvm/ioreq.c
+++ b/xen/arch/x86/hvm/ioreq.c
@@ -949,6 +949,14 @@ int hvm_map_mem_type_to_ioreq_server(struct domain *d, 
ioservid_t id,
 
     spin_unlock_recursive(&d->arch.hvm_domain.ioreq_server.lock);
 
+    if ( rc == 0 && flags == 0 )
+    {
+        struct p2m_domain *p2m = p2m_get_hostp2m(d);
+
+        if ( read_atomic(&p2m->ioreq.entry_count) )
+            p2m_change_entry_type_global(d, p2m_ioreq_server, p2m_ram_rw);
+    }
+
     return rc;
 }
 
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index a57b385..6ec950a 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -187,6 +187,15 @@ out:
  */
 static int hap_enable_log_dirty(struct domain *d, bool_t log_global)
 {
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+
+    /*
+     * Refuse to turn on global log-dirty mode if
+     * there's outstanding p2m_ioreq_server pages.
+     */
+    if ( log_global && read_atomic(&p2m->ioreq.entry_count) )
+        return -EBUSY;
+
     /* turn on PG_log_dirty bit in paging mode */
     paging_lock(d);
     d->arch.paging.mode |= PG_log_dirty;
diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
index cc1eb21..1df3d09 100644
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -544,6 +544,12 @@ static int resolve_misconfig(struct p2m_domain *p2m, 
unsigned long gfn)
                     e.ipat = ipat;
                     if ( e.recalc && p2m_is_changeable(e.sa_p2mt) )
                     {
+                         if ( e.sa_p2mt == p2m_ioreq_server )
+                         {
+                             p2m->ioreq.entry_count--;
+                             ASSERT(p2m->ioreq.entry_count >= 0);
+                         }
+
                          e.sa_p2mt = p2m_is_logdirty_range(p2m, gfn + i, gfn + 
i)
                                      ? p2m_ram_logdirty : p2m_ram_rw;
                          ept_p2m_type_to_flags(p2m, &e, e.sa_p2mt, e.access);
@@ -965,7 +971,7 @@ static mfn_t ept_get_entry(struct p2m_domain *p2m,
     if ( is_epte_valid(ept_entry) )
     {
         if ( (recalc || ept_entry->recalc) &&
-             p2m_is_changeable(ept_entry->sa_p2mt) )
+             p2m_check_changeable(ept_entry->sa_p2mt) )
             *t = p2m_is_logdirty_range(p2m, gfn, gfn) ? p2m_ram_logdirty
                                                       : p2m_ram_rw;
         else
diff --git a/xen/arch/x86/mm/p2m-pt.c b/xen/arch/x86/mm/p2m-pt.c
index 7e11b99..13c7014 100644
--- a/xen/arch/x86/mm/p2m-pt.c
+++ b/xen/arch/x86/mm/p2m-pt.c
@@ -436,11 +436,13 @@ static int do_recalc(struct p2m_domain *p2m, unsigned 
long gfn)
          needs_recalc(l1, *pent) )
     {
         l1_pgentry_t e = *pent;
+        p2m_type_t p2mt_old;
 
         if ( !valid_recalc(l1, e) )
             P2M_DEBUG("bogus recalc leaf at d%d:%lx:%u\n",
                       p2m->domain->domain_id, gfn, level);
-        if ( p2m_is_changeable(p2m_flags_to_type(l1e_get_flags(e))) )
+        p2mt_old = p2m_flags_to_type(l1e_get_flags(e));
+        if ( p2m_is_changeable(p2mt_old) )
         {
             unsigned long mask = ~0UL << (level * PAGETABLE_ORDER);
             p2m_type_t p2mt = p2m_is_logdirty_range(p2m, gfn & mask, gfn | 
~mask)
@@ -460,6 +462,13 @@ static int do_recalc(struct p2m_domain *p2m, unsigned long 
gfn)
                      mfn &= ~(_PAGE_PSE_PAT >> PAGE_SHIFT);
                 flags |= _PAGE_PSE;
             }
+
+            if ( p2mt_old == p2m_ioreq_server )
+            {
+                p2m->ioreq.entry_count--;
+                ASSERT(p2m->ioreq.entry_count >= 0);
+            }
+
             e = l1e_from_pfn(mfn, flags);
             p2m_add_iommu_flags(&e, level,
                                 (p2mt == p2m_ram_rw)
@@ -729,7 +738,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long gfn, 
mfn_t mfn,
 static inline p2m_type_t recalc_type(bool_t recalc, p2m_type_t t,
                                      struct p2m_domain *p2m, unsigned long gfn)
 {
-    if ( !recalc || !p2m_is_changeable(t) )
+    if ( !recalc || !p2m_check_changeable(t) )
         return t;
     return p2m_is_logdirty_range(p2m, gfn, gfn) ? p2m_ram_logdirty
                                                 : p2m_ram_rw;
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index 0edfc61..87f274d 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -954,6 +954,26 @@ int p2m_change_type_one(struct domain *d, unsigned long 
gfn,
                          p2m->default_access)
          : -EBUSY;
 
+    if ( !rc )
+    {
+        switch ( nt )
+        {
+        case p2m_ram_rw:
+            if ( ot == p2m_ioreq_server )
+            {
+                p2m->ioreq.entry_count--;
+                ASSERT(p2m->ioreq.entry_count >= 0);
+            }
+            break;
+        case p2m_ioreq_server:
+            ASSERT(ot == p2m_ram_rw);
+            p2m->ioreq.entry_count++;
+            break;
+        default:
+            break;
+        }
+    }
+
     gfn_unlock(p2m, gfn, 0);
 
     return rc;
diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
index 3786680..395f125 100644
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -120,7 +120,10 @@ typedef unsigned int p2m_query_t;
 
 /* Types that can be subject to bulk transitions. */
 #define P2M_CHANGEABLE_TYPES (p2m_to_mask(p2m_ram_rw) \
-                              | p2m_to_mask(p2m_ram_logdirty) )
+                              | p2m_to_mask(p2m_ram_logdirty) \
+                              | p2m_to_mask(p2m_ioreq_server) )
+
+#define P2M_IOREQ_TYPES (p2m_to_mask(p2m_ioreq_server))
 
 #define P2M_POD_TYPES (p2m_to_mask(p2m_populate_on_demand))
 
@@ -157,6 +160,7 @@ typedef unsigned int p2m_query_t;
 #define p2m_is_readonly(_t) (p2m_to_mask(_t) & P2M_RO_TYPES)
 #define p2m_is_discard_write(_t) (p2m_to_mask(_t) & P2M_DISCARD_WRITE_TYPES)
 #define p2m_is_changeable(_t) (p2m_to_mask(_t) & P2M_CHANGEABLE_TYPES)
+#define p2m_is_ioreq(_t) (p2m_to_mask(_t) & P2M_IOREQ_TYPES)
 #define p2m_is_pod(_t) (p2m_to_mask(_t) & P2M_POD_TYPES)
 #define p2m_is_grant(_t) (p2m_to_mask(_t) & P2M_GRANT_TYPES)
 /* Grant types are *not* considered valid, because they can be
@@ -178,6 +182,8 @@ typedef unsigned int p2m_query_t;
 
 #define p2m_allows_invalid_mfn(t) (p2m_to_mask(t) & P2M_INVALID_MFN_TYPES)
 
+#define p2m_check_changeable(t) (p2m_is_changeable(t) && !p2m_is_ioreq(t))
+
 typedef enum {
     p2m_host,
     p2m_nested,
@@ -349,6 +355,7 @@ struct p2m_domain {
           * are to be emulated by an ioreq server.
           */
          unsigned int flags;
+         long entry_count;
      } ioreq;
 };
 
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.