[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH RFC v2 1/4] x86/mm: Shadow and p2m changes for PV mem_access



Changes to shadow code
----------------------
If the shadow pagefault handler detects that a mem_access listener is
present, then it checks if an violation occurred. If it did, then the
vCPU is paused and an event is sent to the listener. The only instance
this does not occur is when a listener has registered for write
violations and Xen writes to a guest page.
Similarly if the propagation code detects that a mem_access listener is
present, then it creates the PTE after applying access permissions to it.
We do not police Xen writes to guest memory making PV on par with HVM.
The method to do this uses the CR0.WP bit was suggested by Jan Beulich.

P2M changes
-----------
Add a new p2m implementation for mem_access. The access permissions are
stashed in the shadow_flags field of the page_info structure as
suggested by Tim Deegan. p2m_mem_access_set_entry() sets the access value
of the mfn given as input and blows the shadow entries for the mfn.
p2m_mem_access_get_entry() returns the access value of the mfn given as
input.

Signed-off-by: Aravindh Puthiyaparambil <aravindp@xxxxxxxxx>
Cc: Ian Campbell <ian.campbell@xxxxxxxxxx>
Cc: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
Cc: Jan Beulich <jbeulich@xxxxxxxx>
Cc: Keir Fraser <keir@xxxxxxx>
Cc: Tim Deegan <tim@xxxxxxx>

---
Changes from RFC v1:
Removed shadow mem_access mode.
Removed the access lookup table and instead use the shadow_flags in the
page_info structure to stash the access permissions.
Modify p2m_access_to_flags() to only set restrictive permissions.
Replace if with case statement in p2m_mem_access_set_default().
Fix setting of default access value.
Do not police Xen writes to guest memory making PV on par with HVM.

NOTES
-----
Including sched.h in x86_64/uaccess.h caused a circular dependency.

make[3]: Entering directory `/kumo/knc-xen/xen/arch/x86'
gcc -o asm-offsets.s x86_64/asm-offsets.c <truncated>
In file included from /kumo/knc-xen/xen/include/asm/mm.h:9:0,
                 from /kumo/knc-xen/xen/include/xen/mm.h:115,
                 from /kumo/knc-xen/xen/include/asm/domain.h:5,
                 from /kumo/knc-xen/xen/include/xen/domain.h:6,
                 from /kumo/knc-xen/xen/include/xen/sched.h:10,
                 from x86_64/asm-offsets.c:10:
/kumo/knc-xen/xen/include/asm/uaccess.h: In function â__copy_to_userâ:
/kumo/knc-xen/xen/include/asm/uaccess.h:197:13: error: dereferencing pointer to 
incomplete type
/kumo/knc-xen/xen/include/asm/uaccess.h:197:13: error: dereferencing pointer to 
incomplete type
/kumo/knc-xen/xen/include/asm/uaccess.h:200:13: error: dereferencing pointer to 
incomplete type
/kumo/knc-xen/xen/include/asm/uaccess.h:200:13: error: dereferencing pointer to 
incomplete type
/kumo/knc-xen/xen/include/asm/uaccess.h:203:13: error: dereferencing pointer to 
incomplete type
/kumo/knc-xen/xen/include/asm/uaccess.h:203:13: error: dereferencing pointer to 
incomplete type
/kumo/knc-xen/xen/include/asm/uaccess.h:206:13: error: dereferencing pointer to 
incomplete type
/kumo/knc-xen/xen/include/asm/uaccess.h:206:13: error: dereferencing pointer to 
incomplete type
make[3]: *** [asm-offsets.s] Error 1

The fix for this is the reason for the include changes to mm.h, paging.h srat.c 
and vesa.c.
 
 xen/arch/x86/mm/Makefile             |   2 +-
 xen/arch/x86/mm/p2m-ma.c             | 148 +++++++++++++++++++++++++++++++++++
 xen/arch/x86/mm/p2m.c                |  52 ++++++++----
 xen/arch/x86/mm/paging.c             |   7 ++
 xen/arch/x86/mm/shadow/common.c      |  75 ++++++++++++++++--
 xen/arch/x86/mm/shadow/multi.c       | 101 +++++++++++++++++++++++-
 xen/arch/x86/mm/shadow/private.h     |   7 ++
 xen/arch/x86/srat.c                  |   1 +
 xen/arch/x86/usercopy.c              |  12 +++
 xen/common/page_alloc.c              |   3 +
 xen/drivers/video/vesa.c             |   1 +
 xen/include/asm-x86/domain.h         |   6 ++
 xen/include/asm-x86/mm.h             |   1 -
 xen/include/asm-x86/p2m.h            |  17 ++++
 xen/include/asm-x86/paging.h         |   1 +
 xen/include/asm-x86/shadow.h         |  15 ++++
 xen/include/asm-x86/x86_64/uaccess.h |   7 ++
 17 files changed, 430 insertions(+), 26 deletions(-)
 create mode 100644 xen/arch/x86/mm/p2m-ma.c

diff --git a/xen/arch/x86/mm/Makefile b/xen/arch/x86/mm/Makefile
index 73dcdf4..41128a4 100644
--- a/xen/arch/x86/mm/Makefile
+++ b/xen/arch/x86/mm/Makefile
@@ -2,7 +2,7 @@ subdir-y += shadow
 subdir-y += hap
 
 obj-y += paging.o
-obj-y += p2m.o p2m-pt.o p2m-ept.o p2m-pod.o
+obj-y += p2m.o p2m-pt.o p2m-ept.o p2m-pod.o p2m-ma.o
 obj-y += guest_walk_2.o
 obj-y += guest_walk_3.o
 obj-$(x86_64) += guest_walk_4.o
diff --git a/xen/arch/x86/mm/p2m-ma.c b/xen/arch/x86/mm/p2m-ma.c
new file mode 100644
index 0000000..d8ad12c
--- /dev/null
+++ b/xen/arch/x86/mm/p2m-ma.c
@@ -0,0 +1,148 @@
+/******************************************************************************
+ * arch/x86/mm/p2m-ma.c
+ *
+ * Implementation of p2m data structures, for use by PV mem_access code.
+ *
+ * Copyright (c) 2014 Cisco Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <xen/hypercall.h>
+#include <xen/sched.h>
+#include <asm/p2m.h>
+#include <asm/shadow.h>
+#include "mm-locks.h"
+
+/* Override macros from asm/page.h to make them work with mfn_t */
+#undef mfn_to_page
+#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
+
+/* Convert access permissions to page table flags */
+void p2m_access_to_flags(u32 *flags, p2m_access_t access)
+{
+    /*
+     * Restrict with access permissions while propagating more restrictive 
guest
+     * permissions.
+     */
+    switch ( access )
+    {
+    case p2m_access_r:
+        *flags &= ~_PAGE_RW;
+        *flags |= _PAGE_NX_BIT;
+        break;
+    case p2m_access_rx:
+    case p2m_access_rx2rw:
+        *flags &= ~_PAGE_RW;
+        break;
+    case p2m_access_rw:
+        *flags |= _PAGE_NX_BIT;
+        break;
+    case p2m_access_rwx:
+    default:
+        break;
+    }
+}
+
+/*
+ * Set the page permission of the mfn. This in effect removes all shadow
+ * mappings of that mfn. The access type of that mfn is stored in the access
+ * lookup table.
+ */
+static int
+p2m_mem_access_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
+                         unsigned int page_order, p2m_type_t p2mt,
+                         p2m_access_t p2ma)
+{
+    struct domain *d = p2m->domain;
+    struct page_info *page = mfn_to_page(mfn);
+
+    ASSERT(shadow_mode_enabled(d));
+
+    /*
+     * For PV domains we only support r, rw, rx, rx2rw and rwx access
+     * permissions
+     */
+    switch ( p2ma )
+    {
+    case p2m_access_n:
+    case p2m_access_w:
+    case p2m_access_x:
+    case p2m_access_wx:
+    case p2m_access_n2rwx:
+        return -EINVAL;
+    default:
+        break;
+    }
+
+    if ( page_get_owner(page) != d )
+        return -ENOENT;
+
+    paging_lock(d);
+
+    shadow_set_access(page, p2ma);
+
+    ASSERT(d->vcpu && d->vcpu[0]);
+    if ( sh_remove_all_mappings(d->vcpu[0], mfn) )
+        flush_tlb_mask(d->domain_dirty_cpumask);
+
+    paging_unlock(d);
+
+    return 0;
+}
+
+/* Get the page permission of the mfn from page_info->shadow_flags */
+static mfn_t
+p2m_mem_access_get_entry(struct p2m_domain *p2m, unsigned long gfn,
+                         p2m_type_t *t, p2m_access_t *a, p2m_query_t q,
+                         unsigned int *page_order)
+{
+    struct domain *d = p2m->domain;
+    /* For PV guests mfn == gfn */
+    mfn_t mfn = _mfn(gfn);
+    struct page_info *page = mfn_to_page(mfn);
+
+    ASSERT(shadow_mode_enabled(d));
+
+    *t = p2m_ram_rw;
+
+    if ( page_get_owner(page) != d )
+        return _mfn(INVALID_MFN);
+
+    *a = shadow_get_access(page);
+    return mfn;
+}
+
+/* Reset the set_entry and get_entry function pointers */
+void p2m_mem_access_reset(struct p2m_domain *p2m)
+{
+    p2m_pt_init(p2m);
+}
+
+/* Set the set_entry and get_entry function pointers */
+void p2m_mem_access_init(struct p2m_domain *p2m)
+{
+    p2m->set_entry = p2m_mem_access_set_entry;
+    p2m->get_entry = p2m_mem_access_get_entry;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index 642ec28..b275bfc 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -33,6 +33,7 @@
 #include <asm/mem_event.h>
 #include <public/mem_event.h>
 #include <asm/mem_sharing.h>
+#include <asm/shadow.h>
 #include <xen/event.h>
 #include <asm/hvm/nestedhvm.h>
 #include <asm/hvm/svm/amd-iommu-proto.h>
@@ -247,7 +248,9 @@ mfn_t __get_gfn_type_access(struct p2m_domain *p2m, 
unsigned long gfn,
     if ( q & P2M_UNSHARE )
         q |= P2M_ALLOC;
 
-    if ( !p2m || !paging_mode_translate(p2m->domain) )
+    if ( !p2m ||
+         (!paging_mode_translate(p2m->domain) &&
+         !mem_event_check_ring(&p2m->domain->mem_event->access)) )
     {
         /* Not necessarily true, but for non-translated guests, we claim
          * it's the most generic kind of memory */
@@ -284,7 +287,9 @@ mfn_t __get_gfn_type_access(struct p2m_domain *p2m, 
unsigned long gfn,
 
 void __put_gfn(struct p2m_domain *p2m, unsigned long gfn)
 {
-    if ( !p2m || !paging_mode_translate(p2m->domain) )
+    if ( !p2m ||
+         (!paging_mode_translate(p2m->domain) &&
+         !mem_event_check_ring(&p2m->domain->mem_event->access)) )
         /* Nothing to do in this case */
         return;
 
@@ -1426,18 +1431,10 @@ void p2m_mem_access_resume(struct domain *d)
     }
 }
 
-/* Set access type for a region of pfns.
- * If start_pfn == -1ul, sets the default access type */
-long p2m_set_mem_access(struct domain *d, unsigned long pfn, uint32_t nr,
-                        uint32_t start, uint32_t mask, xenmem_access_t access)
+int p2m_convert_xenmem_access(struct p2m_domain *p2m,
+                              xenmem_access_t mem_access, p2m_access_t *a)
 {
-    struct p2m_domain *p2m = p2m_get_hostp2m(d);
-    p2m_access_t a, _a;
-    p2m_type_t t;
-    mfn_t mfn;
-    long rc = 0;
-
-    static const p2m_access_t memaccess[] = {
+    static const p2m_access_t p2ma[] = {
 #define ACCESS(ac) [XENMEM_access_##ac] = p2m_access_##ac
         ACCESS(n),
         ACCESS(r),
@@ -1452,21 +1449,42 @@ long p2m_set_mem_access(struct domain *d, unsigned long 
pfn, uint32_t nr,
 #undef ACCESS
     };
 
-    switch ( access )
+    switch ( mem_access )
     {
-    case 0 ... ARRAY_SIZE(memaccess) - 1:
-        a = memaccess[access];
+    case 0 ... ARRAY_SIZE(p2ma) - 1:
+        *a = p2ma[mem_access];
         break;
     case XENMEM_access_default:
-        a = p2m->default_access;
+        *a = p2m->default_access;
         break;
     default:
         return -EINVAL;
     }
+    return 0;
+}
+
+/*
+ * Set access type for a region of pfns.
+ * If start_pfn == -1ul, sets the default access type for HVM domains
+ */
+long p2m_set_mem_access(struct domain *d, unsigned long pfn, uint32_t nr,
+                        uint32_t start, uint32_t mask, xenmem_access_t access)
+{
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+    p2m_access_t a, _a;
+    p2m_type_t t;
+    mfn_t mfn;
+    long rc = 0;
+
+    rc = p2m_convert_xenmem_access(p2m, access, &a);
+    if ( rc != 0 )
+        return rc;
 
     /* If request to set default access */
     if ( pfn == ~0ul )
     {
+        if ( is_pv_domain(d) )
+            return -ENOSYS;
         p2m->default_access = a;
         return 0;
     }
diff --git a/xen/arch/x86/mm/paging.c b/xen/arch/x86/mm/paging.c
index 32764ba..47397f1 100644
--- a/xen/arch/x86/mm/paging.c
+++ b/xen/arch/x86/mm/paging.c
@@ -627,6 +627,13 @@ void paging_teardown(struct domain *d)
     /* clean up log dirty resources. */
     paging_log_dirty_teardown(d);
 
+    /*
+     * Reset p2m setup in the case where a mem_access listener is present while
+     * the domain is being destroyed or it crashed without cleaning up.
+     */
+    if ( is_pv_domain(d) )
+        p2m_mem_access_reset(p2m_get_hostp2m(d));
+
     /* Move populate-on-demand cache back to domain_list for destruction */
     p2m_pod_empty_cache(d);
 }
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index 3c803b6..9aacd8e 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -36,6 +36,7 @@
 #include <asm/current.h>
 #include <asm/flushtlb.h>
 #include <asm/shadow.h>
+#include <asm/mem_event.h>
 #include <xen/numa.h>
 #include "private.h"
 
@@ -1356,7 +1357,7 @@ void shadow_prealloc(struct domain *d, u32 type, unsigned 
int count)
 
 /* Deliberately free all the memory we can: this will tear down all of
  * this domain's shadows */
-static void shadow_blow_tables(struct domain *d) 
+void shadow_blow_tables(struct domain *d)
 {
     struct page_info *sp, *t;
     struct vcpu *v = d->vcpu[0];
@@ -2435,15 +2436,20 @@ int sh_remove_all_mappings(struct vcpu *v, mfn_t gmfn)
     /* If that didn't catch the mapping, something is very wrong */
     if ( !sh_check_page_has_no_refs(page) )
     {
-        /* Don't complain if we're in HVM and there are some extra mappings: 
+        /*
+         * Don't complain if we're in HVM and there are some extra mappings:
          * The qemu helper process has an untyped mapping of this dom's RAM 
          * and the HVM restore program takes another.
          * Also allow one typed refcount for xenheap pages, to match
-         * share_xen_page_with_guest(). */
+         * share_xen_page_with_guest().
+         * PV domains that have a mem_access listener, runs in shadow mode
+         * without refcounts.
+         */
         if ( !(shadow_mode_external(v->domain)
                && (page->count_info & PGC_count_mask) <= 3
                && ((page->u.inuse.type_info & PGT_count_mask)
-                   == !!is_xen_heap_page(page))) )
+                   == !!is_xen_heap_page(page))) &&
+             !mem_event_check_ring(&v->domain->mem_event->access) )
         {
             SHADOW_ERROR("can't find all mappings of mfn %lx: "
                           "c=%08lx t=%08lx\n", mfn_x(gmfn), 
@@ -2953,7 +2959,7 @@ int shadow_enable(struct domain *d, u32 mode)
         paging_unlock(d);
     }
 
-    /* Allow p2m and log-dirty code to borrow shadow memory */
+    /* Allow p2m, log-dirty and mem_access code to borrow shadow memory */
     d->arch.paging.alloc_page = shadow_alloc_p2m_page;
     d->arch.paging.free_page = shadow_free_p2m_page;
 
@@ -3197,7 +3203,7 @@ static int shadow_one_bit_enable(struct domain *d, u32 
mode)
         }
     }
 
-    /* Allow p2m and log-dirty code to borrow shadow memory */
+    /* Allow p2m, log-dirty and mem_access code to borrow shadow memory */
     d->arch.paging.alloc_page = shadow_alloc_p2m_page;
     d->arch.paging.free_page = shadow_free_p2m_page;
 
@@ -3661,6 +3667,63 @@ out:
 }
 
 /**************************************************************************/
+/* mem_access support */
+
+/*
+ * Shadow specific code which is called in 
XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE
+ * for PV guests.
+ * Return 0 on success.
+ */
+int shadow_enable_mem_access(struct domain *d)
+{
+    int ret;
+
+    paging_lock(d);
+
+#if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL)
+    /*
+     * 32bit PV guests on 64bit xen behave like older 64bit linux: they
+     * change an l4e instead of cr3 to switch tables.  Give them the
+     * same optimization
+     */
+    if ( is_pv_32on64_domain(d) )
+        d->arch.paging.shadow.opt_flags = SHOPT_LINUX_L3_TOPLEVEL;
+#endif
+
+    ret = shadow_one_bit_enable(d, PG_SH_enable);
+    paging_unlock(d);
+
+    return ret;
+}
+
+/*
+ * Shadow specific code which is called in
+ * XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE for PV guests
+ */
+int shadow_disable_mem_access(struct domain *d)
+{
+    int ret;
+
+    paging_lock(d);
+    ret = shadow_one_bit_disable(d, PG_SH_enable);
+    paging_unlock(d);
+
+    return ret;
+}
+
+void shadow_set_access(struct page_info *page, p2m_access_t a)
+{
+    page->shadow_flags = (page->shadow_flags & ~SHF_access_mask) |
+                         a << SHF_access_shift;
+
+}
+
+p2m_access_t shadow_get_access(struct page_info *page)
+{
+    return (page->shadow_flags & SHF_access_mask) >> SHF_access_shift;
+}
+
+/**************************************************************************/
 /* Shadow-control XEN_DOMCTL dispatcher */
 
 int shadow_domctl(struct domain *d, 
diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index c6c9d10..db30396 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -38,6 +38,8 @@
 #include <asm/hvm/cacheattr.h>
 #include <asm/mtrr.h>
 #include <asm/guest_pt.h>
+#include <asm/mem_event.h>
+#include <asm/mem_access.h>
 #include <public/sched.h>
 #include "private.h"
 #include "types.h"
@@ -625,6 +627,14 @@ _sh_propagate(struct vcpu *v,
             }
     }
 
+    /* Propagate access permissions */
+    if ( unlikely(mem_event_check_ring(&d->mem_event->access)) &&
+         level == 1 && !sh_mfn_is_a_page_table(target_mfn) )
+    {
+        p2m_access_t a = shadow_get_access(mfn_to_page(target_mfn));
+        p2m_access_to_flags(&sflags, a);
+    }
+
     // Set the A&D bits for higher level shadows.
     // Higher level entries do not, strictly speaking, have dirty bits, but
     // since we use shadow linear tables, each of these entries may, at some
@@ -2822,6 +2832,7 @@ static int sh_page_fault(struct vcpu *v,
     int r;
     fetch_type_t ft = 0;
     p2m_type_t p2mt;
+    mem_event_request_t *req_ptr = NULL;
     uint32_t rc;
     int version;
 #if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
@@ -3009,7 +3020,84 @@ static int sh_page_fault(struct vcpu *v,
 
     /* What mfn is the guest trying to access? */
     gfn = guest_l1e_get_gfn(gw.l1e);
-    gmfn = get_gfn(d, gfn, &p2mt);
+    if ( likely(!mem_event_check_ring(&d->mem_event->access)) )
+        gmfn = get_gfn(d, gfn, &p2mt);
+    /*
+     * A mem_access listener is present, so we will first check if a violation
+     * has occurred.
+     */
+    else
+    {
+        struct p2m_domain *p2m = p2m_get_hostp2m(v->domain);
+        p2m_access_t p2ma;
+
+        gmfn = get_gfn_type_access(p2m, gfn_x(gfn), &p2mt, &p2ma, 0, NULL);
+        if ( mfn_valid(gmfn) && !sh_mfn_is_a_page_table(gmfn)
+             && regs->error_code & PFEC_page_present
+             && !(regs->error_code & PFEC_reserved_bit) )
+        {
+            int violation = 0;
+            bool_t access_w = !!(regs->error_code & PFEC_write_access);
+            bool_t access_x = !!(regs->error_code & PFEC_insn_fetch);
+            bool_t access_r = access_x ? 0 : !access_w;
+
+            /* If the access is against the permissions, then send to 
mem_event */
+            switch ( p2ma )
+            {
+            case p2m_access_r:
+                violation = access_w || access_x;
+                break;
+            case p2m_access_rx:
+            case p2m_access_rx2rw:
+                violation = access_w;
+                break;
+            case p2m_access_rw:
+                violation = access_x;
+                break;
+            case p2m_access_rwx:
+            default:
+                break;
+            }
+
+            /*
+             * Do not police writes to guest memory from the Xen hypervisor.
+             * This keeps PV mem_access on par with HVM. Turn off CR0.WP here 
to
+             * allow the write to go through if the guest has marked the page 
as
+             * writable. Turn it back on in the guest access functions
+             * __copy_to_user / __put_user_size() after the write is completed.
+             */
+            if ( violation && access_w &&
+                 regs->eip >= XEN_VIRT_START && regs->eip <= XEN_VIRT_END )
+            {
+                unsigned long cr0 = read_cr0();
+
+                violation = 0;
+                if ( cr0 & X86_CR0_WP &&
+                     guest_l1e_get_flags(gw.l1e) & _PAGE_RW )
+                {
+                    cr0 &= ~X86_CR0_WP;
+                    write_cr0(cr0);
+                    v->arch.pv_vcpu.need_cr0_wp_set = 1;
+                }
+            }
+
+            if ( violation )
+            {
+                paddr_t gpa = (mfn_x(gmfn) << PAGE_SHIFT) +
+                              (va & ((1 << PAGE_SHIFT) - 1));
+                if ( !p2m_mem_access_check(gpa, 1, va, access_r, access_w,
+                                           access_x, &req_ptr) )
+                {
+                    SHADOW_PRINTK("Page access %c%c%c for gmfn=%"PRI_mfn" 
p2ma: %d\n",
+                                  (access_r ? 'r' : '-'),
+                                  (access_w ? 'w' : '-'),
+                                  (access_x ? 'x' : '-'), mfn_x(gmfn), p2ma);
+                    /* Rights not promoted, vcpu paused, work here is done */
+                    goto out_put_gfn;
+                }
+            }
+        }
+    }
 
     if ( shadow_mode_refcounts(d) && 
          ((!p2m_is_valid(p2mt) && !p2m_is_grant(p2mt)) ||
@@ -3214,7 +3302,18 @@ static int sh_page_fault(struct vcpu *v,
     SHADOW_PRINTK("fixed\n");
     shadow_audit_tables(v);
     paging_unlock(d);
+ out_put_gfn:
     put_gfn(d, gfn_x(gfn));
+
+    /* Send access violation to mem_access listener */
+    if ( unlikely(req_ptr != NULL) )
+    {
+        SHADOW_PRINTK("mem_access SEND violation mfn: 0x%"PRI_mfn"\n",
+                      mfn_x(gmfn));
+        mem_access_send_req(d, req_ptr);
+        xfree(req_ptr);
+    }
+
     return EXCRET_fault_fixed;
 
  emulate:
diff --git a/xen/arch/x86/mm/shadow/private.h b/xen/arch/x86/mm/shadow/private.h
index b778fcf..eddb3db 100644
--- a/xen/arch/x86/mm/shadow/private.h
+++ b/xen/arch/x86/mm/shadow/private.h
@@ -260,6 +260,13 @@ static inline int sh_type_has_up_pointer(struct vcpu *v, 
unsigned int t)
 
 #define SHF_L1_ANY  (SHF_L1_32|SHF_L1_PAE|SHF_L1_64)
 
+/*
+ * Bits 14-17 of page_info->shadow_flags are used to store the p2m_access_t
+ * values of PV shadow domain pages.
+ */
+#define SHF_access_shift (SH_type_max_shadow + 1u)
+#define SHF_access_mask (0xfu << SHF_access_shift)
+
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 
 /* Marks a guest L1 page table which is shadowed but not write-protected.
  * If set, then *only* L1 shadows (SHF_L1_*) are allowed. 
diff --git a/xen/arch/x86/srat.c b/xen/arch/x86/srat.c
index 2b05272..83d46bc 100644
--- a/xen/arch/x86/srat.c
+++ b/xen/arch/x86/srat.c
@@ -18,6 +18,7 @@
 #include <xen/acpi.h>
 #include <xen/numa.h>
 #include <xen/pfn.h>
+#include <xen/errno.h>
 #include <asm/e820.h>
 #include <asm/page.h>
 
diff --git a/xen/arch/x86/usercopy.c b/xen/arch/x86/usercopy.c
index 4cc78f5..eecf429 100644
--- a/xen/arch/x86/usercopy.c
+++ b/xen/arch/x86/usercopy.c
@@ -45,6 +45,18 @@ unsigned long __copy_to_user_ll(void __user *to, const void 
*from, unsigned n)
         : "memory" );
     clac();
 
+    /*
+     * A mem_access listener was present and Xen tried to write to guest 
memory.
+     * To allow this write to go through without an event being sent to the
+     * listener or the pagetable entry being modified, we disabled CR0.WP in 
the
+     * shadow pagefault handler. We are enabling it back here again.
+     */
+    if ( unlikely(current->arch.pv_vcpu.need_cr0_wp_set) )
+    {
+        write_cr0(read_cr0() | X86_CR0_WP);
+        current->arch.pv_vcpu.need_cr0_wp_set = 0;
+    }
+
     return __n;
 }
 
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 7b4092d..5b6f747 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -43,6 +43,7 @@
 #include <asm/page.h>
 #include <asm/numa.h>
 #include <asm/flushtlb.h>
+#include <asm/shadow.h>
 #ifdef CONFIG_X86
 #include <asm/p2m.h>
 #include <asm/setup.h> /* for highmem_start only */
@@ -1660,6 +1661,8 @@ int assign_pages(
         page_set_owner(&pg[i], d);
         smp_wmb(); /* Domain pointer must be visible before updating refcnt. */
         pg[i].count_info = PGC_allocated | 1;
+        if ( is_pv_domain(d) )
+            shadow_set_access(&pg[i], p2m_get_hostp2m(d)->default_access);
         page_list_add_tail(&pg[i], &d->page_list);
     }
 
diff --git a/xen/drivers/video/vesa.c b/xen/drivers/video/vesa.c
index 575db62..e7aa54a 100644
--- a/xen/drivers/video/vesa.c
+++ b/xen/drivers/video/vesa.c
@@ -10,6 +10,7 @@
 #include <xen/xmalloc.h>
 #include <xen/kernel.h>
 #include <xen/vga.h>
+#include <xen/errno.h>
 #include <asm/io.h>
 #include <asm/page.h>
 #include "font.h"
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index abf55fb..f7b0262 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -380,6 +380,12 @@ struct pv_vcpu
     /* Deferred VA-based update state. */
     bool_t need_update_runstate_area;
     struct vcpu_time_info pending_system_time;
+
+    /*
+     * Flag that tracks if CR0.WP needs to be set after a Xen write to guest
+     * memory when a PV domain has a mem_access listener attached to it.
+     */
+    bool_t need_cr0_wp_set;
 };
 
 struct arch_vcpu
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index d253117..ec95feb 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -6,7 +6,6 @@
 #include <xen/list.h>
 #include <xen/spinlock.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
 
 /*
  * Per-page-frame information.
diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
index 0ddbadb..029eea8 100644
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -603,6 +603,10 @@ bool_t p2m_mem_access_check(paddr_t gpa, bool_t gla_valid, 
unsigned long gla,
 /* Resumes the running of the VCPU, restarting the last instruction */
 void p2m_mem_access_resume(struct domain *d);
 
+/* Convert xenmem_access_t to p2m_access_t */
+int p2m_convert_xenmem_access(struct p2m_domain *p2m,
+                              xenmem_access_t mem_access, p2m_access_t *a);
+
 /* Set access type for a region of pfns.
  * If start_pfn == -1ul, sets the default access type */
 long p2m_set_mem_access(struct domain *d, unsigned long start_pfn, uint32_t nr,
@@ -613,6 +617,19 @@ long p2m_set_mem_access(struct domain *d, unsigned long 
start_pfn, uint32_t nr,
 int p2m_get_mem_access(struct domain *d, unsigned long pfn,
                        xenmem_access_t *access);
 
+/*
+ * Functions specific to the p2m-ma implementation
+ */
+
+/* Set up p2m function pointers mem_access implementation */
+void p2m_mem_access_init(struct p2m_domain *p2m);
+
+/* Reset p2m function pointers */
+void p2m_mem_access_reset(struct p2m_domain *p2m);
+
+/* Convert access permissions to page table flags */
+void p2m_access_to_flags(u32 *flags, p2m_access_t access);
+
 /* 
  * Internal functions, only called by other p2m code
  */
diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h
index 9b8f8de..d30c569 100644
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -32,6 +32,7 @@
 #include <xen/domain_page.h>
 #include <asm/flushtlb.h>
 #include <asm/domain.h>
+#include <asm/uaccess.h>
 
 /*****************************************************************************
  * Macros to tell which paging mode a domain is in */
diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h
index f40cab4..0420dd8 100644
--- a/xen/include/asm-x86/shadow.h
+++ b/xen/include/asm-x86/shadow.h
@@ -86,6 +86,18 @@ int shadow_disable_log_dirty(struct domain *d);
 /* shadow code to call when bitmap is being cleaned */
 void shadow_clean_dirty_bitmap(struct domain *d);
 
+/* shadow code to call when mem_access is enabled */
+int shadow_enable_mem_access(struct domain *d);
+
+/* shadow code to call when mem access is disabled */
+int shadow_disable_mem_access(struct domain *d);
+
+/* Set the access value in shadow_flags */
+void shadow_set_access(struct page_info *page, p2m_access_t a);
+
+/* Get the access value from shadow_flags */
+p2m_access_t shadow_get_access(struct page_info *page);
+
 /* Update all the things that are derived from the guest's CR0/CR3/CR4.
  * Called to initialize paging structures if the paging mode
  * has changed, and when bringing up a VCPU for the first time. */
@@ -114,6 +126,9 @@ static inline void shadow_remove_all_shadows(struct vcpu 
*v, mfn_t gmfn)
 /* Discard _all_ mappings from the domain's shadows. */
 void shadow_blow_tables_per_domain(struct domain *d);
 
+/* Tear down all of this domain's shadows */
+void shadow_blow_tables(struct domain *d);
+
 #endif /* _XEN_SHADOW_H */
 
 /*
diff --git a/xen/include/asm-x86/x86_64/uaccess.h 
b/xen/include/asm-x86/x86_64/uaccess.h
index 953abe7..6d13ec6 100644
--- a/xen/include/asm-x86/x86_64/uaccess.h
+++ b/xen/include/asm-x86/x86_64/uaccess.h
@@ -1,6 +1,8 @@
 #ifndef __X86_64_UACCESS_H
 #define __X86_64_UACCESS_H
 
+#include <xen/sched.h>
+
 #define COMPAT_ARG_XLAT_VIRT_BASE ((void *)ARG_XLAT_START(current))
 #define COMPAT_ARG_XLAT_SIZE      (2*PAGE_SIZE)
 struct vcpu;
@@ -65,6 +67,11 @@ do {                                                         
        \
        case 8: __put_user_asm(x,ptr,retval,"q","","ir",errret);break;  \
        default: __put_user_bad();                                      \
        }                                                               \
+    if ( unlikely(current->arch.pv_vcpu.need_cr0_wp_set) ) \
+    { \
+        write_cr0(read_cr0() | X86_CR0_WP); \
+        current->arch.pv_vcpu.need_cr0_wp_set = 0; \
+    } \
 } while (0)
 
 #define __get_user_size(x,ptr,size,retval,errret)                      \
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.