[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v5 20/23] x86/mm: split out PV mm code to pv/mm.c



A load of functions are moved:

1. {get,put}_page_from_l{2,3,4}e
2. pv_{alloc,free}_page_type
3. all helpers for the above

The l1e functions can't be moved because they are needed by shadow
code as well.

Fix coding style issues while moving.

Signed-off-by: Wei Liu <wei.liu2@xxxxxxxxxx>
---
 xen/arch/x86/mm.c    | 797 ---------------------------------------------------
 xen/arch/x86/pv/mm.c | 790 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 790 insertions(+), 797 deletions(-)

diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index c3a26fe03d..3f8d22650d 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -523,108 +523,6 @@ static void invalidate_shadow_ldt(struct vcpu *v, int 
flush)
 }
 
 
-static int alloc_segdesc_page(struct page_info *page)
-{
-    const struct domain *owner = page_get_owner(page);
-    struct desc_struct *descs = __map_domain_page(page);
-    unsigned i;
-
-    for ( i = 0; i < 512; i++ )
-        if ( unlikely(!check_descriptor(owner, &descs[i])) )
-            break;
-
-    unmap_domain_page(descs);
-
-    return i == 512 ? 0 : -EINVAL;
-}
-
-int get_page_and_type_from_mfn(mfn_t mfn, unsigned long type, struct domain *d,
-                               int partial, int preemptible)
-{
-    struct page_info *page = mfn_to_page(mfn);
-    int rc;
-
-    if ( likely(partial >= 0) &&
-         unlikely(!get_page_from_mfn(mfn, d)) )
-        return -EINVAL;
-
-    rc = (preemptible ?
-          get_page_type_preemptible(page, type) :
-          (get_page_type(page, type) ? 0 : -EINVAL));
-
-    if ( unlikely(rc) && partial >= 0 &&
-         (!preemptible || page != current->arch.old_guest_table) )
-        put_page(page);
-
-    return rc;
-}
-
-static void put_data_page(
-    struct page_info *page, int writeable)
-{
-    if ( writeable )
-        put_page_and_type(page);
-    else
-        put_page(page);
-}
-
-/*
- * We allow root tables to map each other (a.k.a. linear page tables). It
- * needs some special care with reference counts and access permissions:
- *  1. The mapping entry must be read-only, or the guest may get write access
- *     to its own PTEs.
- *  2. We must only bump the reference counts for an *already validated*
- *     L2 table, or we can end up in a deadlock in get_page_type() by waiting
- *     on a validation that is required to complete that validation.
- *  3. We only need to increment the reference counts for the mapped page
- *     frame if it is mapped by a different root table. This is sufficient and
- *     also necessary to allow validation of a root table mapping itself.
- */
-#define define_get_linear_pagetable(level)                                  \
-static int                                                                  \
-get_##level##_linear_pagetable(                                             \
-    level##_pgentry_t pde, unsigned long pde_pfn, struct domain *d)         \
-{                                                                           \
-    unsigned long x, y;                                                     \
-    struct page_info *page;                                                 \
-    unsigned long pfn;                                                      \
-                                                                            \
-    if ( (level##e_get_flags(pde) & _PAGE_RW) )                             \
-    {                                                                       \
-        gdprintk(XENLOG_WARNING,                                            \
-                 "Attempt to create linear p.t. with write perms\n");       \
-        return 0;                                                           \
-    }                                                                       \
-                                                                            \
-    if ( (pfn = level##e_get_pfn(pde)) != pde_pfn )                         \
-    {                                                                       \
-        /* Make sure the mapped frame belongs to the correct domain. */     \
-        if ( unlikely(!get_page_from_mfn(_mfn(pfn), d)) )                   \
-            return 0;                                                       \
-                                                                            \
-        /*                                                                  \
-         * Ensure that the mapped frame is an already-validated page table. \
-         * If so, atomically increment the count (checking for overflow).   \
-         */                                                                 \
-        page = mfn_to_page(_mfn(pfn));                                      \
-        y = page->u.inuse.type_info;                                        \
-        do {                                                                \
-            x = y;                                                          \
-            if ( unlikely((x & PGT_count_mask) == PGT_count_mask) ||        \
-                 unlikely((x & (PGT_type_mask|PGT_validated)) !=            \
-                          (PGT_##level##_page_table|PGT_validated)) )       \
-            {                                                               \
-                put_page(page);                                             \
-                return 0;                                                   \
-            }                                                               \
-        }                                                                   \
-        while ( (y = cmpxchg(&page->u.inuse.type_info, x, x + 1)) != x );   \
-    }                                                                       \
-                                                                            \
-    return 1;                                                               \
-}
-
-
 bool is_iomem_page(mfn_t mfn)
 {
     struct page_info *page;
@@ -913,108 +811,6 @@ get_page_from_l1e(
     return -EBUSY;
 }
 
-
-/* NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'. */
-/*
- * get_page_from_l2e returns:
- *   1 => page not present
- *   0 => success
- *  <0 => error code
- */
-define_get_linear_pagetable(l2);
-int
-get_page_from_l2e(
-    l2_pgentry_t l2e, unsigned long pfn, struct domain *d)
-{
-    unsigned long mfn = l2e_get_pfn(l2e);
-    int rc;
-
-    if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
-        return 1;
-
-    if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) )
-    {
-        gdprintk(XENLOG_WARNING, "Bad L2 flags %x\n",
-                 l2e_get_flags(l2e) & L2_DISALLOW_MASK);
-        return -EINVAL;
-    }
-
-    if ( !(l2e_get_flags(l2e) & _PAGE_PSE) )
-    {
-        rc = get_page_and_type_from_mfn(_mfn(mfn), PGT_l1_page_table, d, 0, 0);
-        if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
-            rc = 0;
-        return rc;
-    }
-
-    return -EINVAL;
-}
-
-
-/*
- * get_page_from_l3e returns:
- *   1 => page not present
- *   0 => success
- *  <0 => error code
- */
-define_get_linear_pagetable(l3);
-int
-get_page_from_l3e(
-    l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int partial)
-{
-    int rc;
-
-    if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
-        return 1;
-
-    if ( unlikely((l3e_get_flags(l3e) & l3_disallow_mask(d))) )
-    {
-        gdprintk(XENLOG_WARNING, "Bad L3 flags %x\n",
-                 l3e_get_flags(l3e) & l3_disallow_mask(d));
-        return -EINVAL;
-    }
-
-    rc = get_page_and_type_from_mfn(
-        l3e_get_mfn(l3e), PGT_l2_page_table, d, partial, 1);
-    if ( unlikely(rc == -EINVAL) &&
-         !is_pv_32bit_domain(d) &&
-         get_l3_linear_pagetable(l3e, pfn, d) )
-        rc = 0;
-
-    return rc;
-}
-
-/*
- * get_page_from_l4e returns:
- *   1 => page not present
- *   0 => success
- *  <0 => error code
- */
-define_get_linear_pagetable(l4);
-int
-get_page_from_l4e(
-    l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int partial)
-{
-    int rc;
-
-    if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
-        return 1;
-
-    if ( unlikely((l4e_get_flags(l4e) & L4_DISALLOW_MASK)) )
-    {
-        gdprintk(XENLOG_WARNING, "Bad L4 flags %x\n",
-                 l4e_get_flags(l4e) & L4_DISALLOW_MASK);
-        return -EINVAL;
-    }
-
-    rc = get_page_and_type_from_mfn(
-        l4e_get_mfn(l4e), PGT_l3_page_table, d, partial, 1);
-    if ( unlikely(rc == -EINVAL) && get_l4_linear_pagetable(l4e, pfn, d) )
-        rc = 0;
-
-    return rc;
-}
-
 void put_page_from_l1e(l1_pgentry_t l1e, struct domain *l1e_owner)
 {
     unsigned long     pfn = l1e_get_pfn(l1e);
@@ -1074,292 +870,6 @@ void put_page_from_l1e(l1_pgentry_t l1e, struct domain 
*l1e_owner)
     }
 }
 
-
-/*
- * NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'.
- * Note also that this automatically deals correctly with linear p.t.'s.
- */
-int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
-{
-    if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) || (l2e_get_pfn(l2e) == pfn) )
-        return 1;
-
-    if ( l2e_get_flags(l2e) & _PAGE_PSE )
-    {
-        struct page_info *page = l2e_get_page(l2e);
-        unsigned int i;
-
-        for ( i = 0; i < (1u << PAGETABLE_ORDER); i++, page++ )
-            put_page_and_type(page);
-    } else
-        put_page_and_type(l2e_get_page(l2e));
-
-    return 0;
-}
-
-int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, int partial,
-                      bool defer)
-{
-    struct page_info *pg;
-
-    if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || (l3e_get_pfn(l3e) == pfn) )
-        return 1;
-
-    if ( unlikely(l3e_get_flags(l3e) & _PAGE_PSE) )
-    {
-        unsigned long mfn = l3e_get_pfn(l3e);
-        int writeable = l3e_get_flags(l3e) & _PAGE_RW;
-
-        ASSERT(!(mfn & ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1)));
-        do {
-            put_data_page(mfn_to_page(_mfn(mfn)), writeable);
-        } while ( ++mfn & ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1) );
-
-        return 0;
-    }
-
-    pg = l3e_get_page(l3e);
-
-    if ( unlikely(partial > 0) )
-    {
-        ASSERT(!defer);
-        return put_page_type_preemptible(pg);
-    }
-
-    if ( defer )
-    {
-        current->arch.old_guest_table = pg;
-        return 0;
-    }
-
-    return put_page_and_type_preemptible(pg);
-}
-
-int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn, int partial,
-                      bool defer)
-{
-    if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) &&
-         (l4e_get_pfn(l4e) != pfn) )
-    {
-        struct page_info *pg = l4e_get_page(l4e);
-
-        if ( unlikely(partial > 0) )
-        {
-            ASSERT(!defer);
-            return put_page_type_preemptible(pg);
-        }
-
-        if ( defer )
-        {
-            current->arch.old_guest_table = pg;
-            return 0;
-        }
-
-        return put_page_and_type_preemptible(pg);
-    }
-    return 1;
-}
-
-static int alloc_l1_table(struct page_info *page)
-{
-    struct domain *d = page_get_owner(page);
-    l1_pgentry_t  *pl1e;
-    unsigned int   i;
-    int            ret = 0;
-
-    pl1e = __map_domain_page(page);
-
-    for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
-    {
-        switch ( ret = get_page_from_l1e(pl1e[i], d, d) )
-        {
-        default:
-            goto fail;
-        case 0:
-            break;
-        case _PAGE_RW ... _PAGE_RW | PAGE_CACHE_ATTRS:
-            ASSERT(!(ret & ~(_PAGE_RW | PAGE_CACHE_ATTRS)));
-            l1e_flip_flags(pl1e[i], ret);
-            break;
-        }
-
-        pl1e[i] = adjust_guest_l1e(pl1e[i], d);
-    }
-
-    unmap_domain_page(pl1e);
-    return 0;
-
- fail:
-    gdprintk(XENLOG_WARNING, "Failure in alloc_l1_table: slot %#x\n", i);
-    while ( i-- > 0 )
-        put_page_from_l1e(pl1e[i], d);
-
-    unmap_domain_page(pl1e);
-    return ret;
-}
-
-int create_pae_xen_mappings(struct domain *d, l3_pgentry_t *pl3e)
-{
-    struct page_info *page;
-    l3_pgentry_t     l3e3;
-
-    if ( !is_pv_32bit_domain(d) )
-        return 1;
-
-    pl3e = (l3_pgentry_t *)((unsigned long)pl3e & PAGE_MASK);
-
-    /* 3rd L3 slot contains L2 with Xen-private mappings. It *must* exist. */
-    l3e3 = pl3e[3];
-    if ( !(l3e_get_flags(l3e3) & _PAGE_PRESENT) )
-    {
-        gdprintk(XENLOG_WARNING, "PAE L3 3rd slot is empty\n");
-        return 0;
-    }
-
-    /*
-     * The Xen-private mappings include linear mappings. The L2 thus cannot
-     * be shared by multiple L3 tables. The test here is adequate because:
-     *  1. Cannot appear in slots != 3 because get_page_type() checks the
-     *     PGT_pae_xen_l2 flag, which is asserted iff the L2 appears in slot 3
-     *  2. Cannot appear in another page table's L3:
-     *     a. alloc_l3_table() calls this function and this check will fail
-     *     b. mod_l3_entry() disallows updates to slot 3 in an existing table
-     */
-    page = l3e_get_page(l3e3);
-    BUG_ON(page->u.inuse.type_info & PGT_pinned);
-    BUG_ON((page->u.inuse.type_info & PGT_count_mask) == 0);
-    BUG_ON(!(page->u.inuse.type_info & PGT_pae_xen_l2));
-    if ( (page->u.inuse.type_info & PGT_count_mask) != 1 )
-    {
-        gdprintk(XENLOG_WARNING, "PAE L3 3rd slot is shared\n");
-        return 0;
-    }
-
-    return 1;
-}
-
-static int alloc_l2_table(struct page_info *page, unsigned long type,
-                          int preemptible)
-{
-    struct domain *d = page_get_owner(page);
-    unsigned long  pfn = mfn_x(page_to_mfn(page));
-    l2_pgentry_t  *pl2e;
-    unsigned int   i;
-    int            rc = 0;
-
-    pl2e = map_domain_page(_mfn(pfn));
-
-    for ( i = page->nr_validated_ptes; i < L2_PAGETABLE_ENTRIES; i++ )
-    {
-        if ( preemptible && i > page->nr_validated_ptes
-             && hypercall_preempt_check() )
-        {
-            page->nr_validated_ptes = i;
-            rc = -ERESTART;
-            break;
-        }
-
-        if ( !is_guest_l2_slot(d, type, i) ||
-             (rc = get_page_from_l2e(pl2e[i], pfn, d)) > 0 )
-            continue;
-
-        if ( rc < 0 )
-        {
-            gdprintk(XENLOG_WARNING, "Failure in alloc_l2_table: slot %#x\n", 
i);
-            while ( i-- > 0 )
-                if ( is_guest_l2_slot(d, type, i) )
-                    put_page_from_l2e(pl2e[i], pfn);
-            break;
-        }
-
-        pl2e[i] = adjust_guest_l2e(pl2e[i], d);
-    }
-
-    if ( rc >= 0 && (type & PGT_pae_xen_l2) )
-    {
-        /* Xen private mappings. */
-        memcpy(&pl2e[COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d)],
-               &compat_idle_pg_table_l2[
-                   l2_table_offset(HIRO_COMPAT_MPT_VIRT_START)],
-               COMPAT_L2_PAGETABLE_XEN_SLOTS(d) * sizeof(*pl2e));
-    }
-
-    unmap_domain_page(pl2e);
-    return rc > 0 ? 0 : rc;
-}
-
-static int alloc_l3_table(struct page_info *page)
-{
-    struct domain *d = page_get_owner(page);
-    unsigned long  pfn = mfn_x(page_to_mfn(page));
-    l3_pgentry_t  *pl3e;
-    unsigned int   i;
-    int            rc = 0, partial = page->partial_pte;
-
-    pl3e = map_domain_page(_mfn(pfn));
-
-    /*
-     * PAE guests allocate full pages, but aren't required to initialize
-     * more than the first four entries; when running in compatibility
-     * mode, however, the full page is visible to the MMU, and hence all
-     * 512 entries must be valid/verified, which is most easily achieved
-     * by clearing them out.
-     */
-    if ( is_pv_32bit_domain(d) )
-        memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e));
-
-    for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES;
-          i++, partial = 0 )
-    {
-        if ( is_pv_32bit_domain(d) && (i == 3) )
-        {
-            if ( !(l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) ||
-                 (l3e_get_flags(pl3e[i]) & l3_disallow_mask(d)) )
-                rc = -EINVAL;
-            else
-                rc = get_page_and_type_from_mfn(
-                    l3e_get_mfn(pl3e[i]),
-                    PGT_l2_page_table | PGT_pae_xen_l2, d, partial, 1);
-        }
-        else if ( (rc = get_page_from_l3e(pl3e[i], pfn, d, partial)) > 0 )
-            continue;
-
-        if ( rc == -ERESTART )
-        {
-            page->nr_validated_ptes = i;
-            page->partial_pte = partial ?: 1;
-        }
-        else if ( rc == -EINTR && i )
-        {
-            page->nr_validated_ptes = i;
-            page->partial_pte = 0;
-            rc = -ERESTART;
-        }
-        if ( rc < 0 )
-            break;
-
-        pl3e[i] = adjust_guest_l3e(pl3e[i], d);
-    }
-
-    if ( rc >= 0 && !create_pae_xen_mappings(d, pl3e) )
-        rc = -EINVAL;
-    if ( rc < 0 && rc != -ERESTART && rc != -EINTR )
-    {
-        gdprintk(XENLOG_WARNING, "Failure in alloc_l3_table: slot %#x\n", i);
-        if ( i )
-        {
-            page->nr_validated_ptes = i;
-            page->partial_pte = 0;
-            current->arch.old_guest_table = page;
-        }
-        while ( i-- > 0 )
-            pl3e[i] = unadjust_guest_l3e(pl3e[i], d);
-    }
-
-    unmap_domain_page(pl3e);
-    return rc > 0 ? 0 : rc;
-}
-
 bool fill_ro_mpt(mfn_t mfn)
 {
     l4_pgentry_t *l4tab = map_domain_page(mfn);
@@ -1384,184 +894,6 @@ void zap_ro_mpt(mfn_t mfn)
     unmap_domain_page(l4tab);
 }
 
-static int alloc_l4_table(struct page_info *page)
-{
-    struct domain *d = page_get_owner(page);
-    unsigned long  pfn = mfn_x(page_to_mfn(page));
-    l4_pgentry_t  *pl4e = map_domain_page(_mfn(pfn));
-    unsigned int   i;
-    int            rc = 0, partial = page->partial_pte;
-
-    for ( i = page->nr_validated_ptes; i < L4_PAGETABLE_ENTRIES;
-          i++, partial = 0 )
-    {
-        if ( !is_guest_l4_slot(d, i) ||
-             (rc = get_page_from_l4e(pl4e[i], pfn, d, partial)) > 0 )
-            continue;
-
-        if ( rc == -ERESTART )
-        {
-            page->nr_validated_ptes = i;
-            page->partial_pte = partial ?: 1;
-        }
-        else if ( rc < 0 )
-        {
-            if ( rc != -EINTR )
-                gdprintk(XENLOG_WARNING,
-                         "Failure in alloc_l4_table: slot %#x\n", i);
-            if ( i )
-            {
-                page->nr_validated_ptes = i;
-                page->partial_pte = 0;
-                if ( rc == -EINTR )
-                    rc = -ERESTART;
-                else
-                {
-                    if ( current->arch.old_guest_table )
-                        page->nr_validated_ptes++;
-                    current->arch.old_guest_table = page;
-                }
-            }
-        }
-        if ( rc < 0 )
-        {
-            unmap_domain_page(pl4e);
-            return rc;
-        }
-
-        pl4e[i] = adjust_guest_l4e(pl4e[i], d);
-    }
-
-    if ( rc >= 0 )
-    {
-        init_guest_l4_table(pl4e, d, !VM_ASSIST(d, m2p_strict));
-        atomic_inc(&d->arch.pv_domain.nr_l4_pages);
-        rc = 0;
-    }
-    unmap_domain_page(pl4e);
-
-    return rc;
-}
-
-static void free_l1_table(struct page_info *page)
-{
-    struct domain *d = page_get_owner(page);
-    l1_pgentry_t *pl1e;
-    unsigned int  i;
-
-    pl1e = __map_domain_page(page);
-
-    for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
-        put_page_from_l1e(pl1e[i], d);
-
-    unmap_domain_page(pl1e);
-}
-
-
-static int free_l2_table(struct page_info *page, int preemptible)
-{
-    struct domain *d = page_get_owner(page);
-    unsigned long pfn = mfn_x(page_to_mfn(page));
-    l2_pgentry_t *pl2e;
-    unsigned int  i = page->nr_validated_ptes - 1;
-    int err = 0;
-
-    pl2e = map_domain_page(_mfn(pfn));
-
-    ASSERT(page->nr_validated_ptes);
-    do {
-        if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) &&
-             put_page_from_l2e(pl2e[i], pfn) == 0 &&
-             preemptible && i && hypercall_preempt_check() )
-        {
-           page->nr_validated_ptes = i;
-           err = -ERESTART;
-        }
-    } while ( !err && i-- );
-
-    unmap_domain_page(pl2e);
-
-    if ( !err )
-        page->u.inuse.type_info &= ~PGT_pae_xen_l2;
-
-    return err;
-}
-
-static int free_l3_table(struct page_info *page)
-{
-    struct domain *d = page_get_owner(page);
-    unsigned long pfn = mfn_x(page_to_mfn(page));
-    l3_pgentry_t *pl3e;
-    int rc = 0, partial = page->partial_pte;
-    unsigned int  i = page->nr_validated_ptes - !partial;
-
-    pl3e = map_domain_page(_mfn(pfn));
-
-    do {
-        rc = put_page_from_l3e(pl3e[i], pfn, partial, 0);
-        if ( rc < 0 )
-            break;
-        partial = 0;
-        if ( rc > 0 )
-            continue;
-        pl3e[i] = unadjust_guest_l3e(pl3e[i], d);
-    } while ( i-- );
-
-    unmap_domain_page(pl3e);
-
-    if ( rc == -ERESTART )
-    {
-        page->nr_validated_ptes = i;
-        page->partial_pte = partial ?: -1;
-    }
-    else if ( rc == -EINTR && i < L3_PAGETABLE_ENTRIES - 1 )
-    {
-        page->nr_validated_ptes = i + 1;
-        page->partial_pte = 0;
-        rc = -ERESTART;
-    }
-    return rc > 0 ? 0 : rc;
-}
-
-static int free_l4_table(struct page_info *page)
-{
-    struct domain *d = page_get_owner(page);
-    unsigned long pfn = mfn_x(page_to_mfn(page));
-    l4_pgentry_t *pl4e = map_domain_page(_mfn(pfn));
-    int rc = 0, partial = page->partial_pte;
-    unsigned int  i = page->nr_validated_ptes - !partial;
-
-    do {
-        if ( is_guest_l4_slot(d, i) )
-            rc = put_page_from_l4e(pl4e[i], pfn, partial, 0);
-        if ( rc < 0 )
-            break;
-        partial = 0;
-    } while ( i-- );
-
-    if ( rc == -ERESTART )
-    {
-        page->nr_validated_ptes = i;
-        page->partial_pte = partial ?: -1;
-    }
-    else if ( rc == -EINTR && i < L4_PAGETABLE_ENTRIES - 1 )
-    {
-        page->nr_validated_ptes = i + 1;
-        page->partial_pte = 0;
-        rc = -ERESTART;
-    }
-
-    unmap_domain_page(pl4e);
-
-    if ( rc >= 0 )
-    {
-        atomic_dec(&d->arch.pv_domain.nr_l4_pages);
-        rc = 0;
-    }
-
-    return rc;
-}
-
 int page_lock(struct page_info *page)
 {
     unsigned long x, nx;
@@ -1969,135 +1301,6 @@ void get_page_light(struct page_info *page)
     while ( unlikely(y != x) );
 }
 
-int pv_alloc_page_type(struct page_info *page, unsigned long type,
-                       int preemptible)
-{
-    struct domain *owner = page_get_owner(page);
-    int rc;
-
-    /* A page table is dirtied when its type count becomes non-zero. */
-    if ( likely(owner != NULL) )
-        paging_mark_dirty(owner, page_to_mfn(page));
-
-    switch ( type & PGT_type_mask )
-    {
-    case PGT_l1_page_table:
-        rc = alloc_l1_table(page);
-        break;
-    case PGT_l2_page_table:
-        rc = alloc_l2_table(page, type, preemptible);
-        break;
-    case PGT_l3_page_table:
-        ASSERT(preemptible);
-        rc = alloc_l3_table(page);
-        break;
-    case PGT_l4_page_table:
-        ASSERT(preemptible);
-        rc = alloc_l4_table(page);
-        break;
-    case PGT_seg_desc_page:
-        rc = alloc_segdesc_page(page);
-        break;
-    default:
-        printk("Bad type in %s %lx t=%" PRtype_info " c=%lx\n", __func__,
-               type, page->u.inuse.type_info,
-               page->count_info);
-        rc = -EINVAL;
-        BUG();
-    }
-
-    /* No need for atomic update of type_info here: noone else updates it. */
-    smp_wmb();
-    switch ( rc )
-    {
-    case 0:
-        page->u.inuse.type_info |= PGT_validated;
-        break;
-    case -EINTR:
-        ASSERT((page->u.inuse.type_info &
-                (PGT_count_mask|PGT_validated|PGT_partial)) == 1);
-        page->u.inuse.type_info &= ~PGT_count_mask;
-        break;
-    default:
-        ASSERT(rc < 0);
-        gdprintk(XENLOG_WARNING, "Error while validating mfn %" PRI_mfn
-                 " (pfn %" PRI_pfn ") for type %" PRtype_info
-                 ": caf=%08lx taf=%" PRtype_info "\n",
-                 mfn_x(page_to_mfn(page)),
-                 get_gpfn_from_mfn(mfn_x(page_to_mfn(page))),
-                 type, page->count_info, page->u.inuse.type_info);
-        if ( page != current->arch.old_guest_table )
-            page->u.inuse.type_info = 0;
-        else
-        {
-            ASSERT((page->u.inuse.type_info &
-                    (PGT_count_mask | PGT_validated)) == 1);
-    case -ERESTART:
-            get_page_light(page);
-            page->u.inuse.type_info |= PGT_partial;
-        }
-        break;
-    }
-
-    return rc;
-}
-
-
-int pv_free_page_type(struct page_info *page, unsigned long type,
-                      int preemptible)
-{
-    struct domain *owner = page_get_owner(page);
-    unsigned long gmfn;
-    int rc;
-
-    if ( likely(owner != NULL) && unlikely(paging_mode_enabled(owner)) )
-    {
-        /* A page table is dirtied when its type count becomes zero. */
-        paging_mark_dirty(owner, page_to_mfn(page));
-
-        ASSERT(!shadow_mode_refcounts(owner));
-
-        gmfn = mfn_to_gmfn(owner, mfn_x(page_to_mfn(page)));
-        ASSERT(VALID_M2P(gmfn));
-        /* Page sharing not supported for shadowed domains */
-        if(!SHARED_M2P(gmfn))
-            shadow_remove_all_shadows(owner, _mfn(gmfn));
-    }
-
-    if ( !(type & PGT_partial) )
-    {
-        page->nr_validated_ptes = 1U << PAGETABLE_ORDER;
-        page->partial_pte = 0;
-    }
-
-    switch ( type & PGT_type_mask )
-    {
-    case PGT_l1_page_table:
-        free_l1_table(page);
-        rc = 0;
-        break;
-    case PGT_l2_page_table:
-        rc = free_l2_table(page, preemptible);
-        break;
-    case PGT_l3_page_table:
-        ASSERT(preemptible);
-        rc = free_l3_table(page);
-        break;
-    case PGT_l4_page_table:
-        ASSERT(preemptible);
-        rc = free_l4_table(page);
-        break;
-    default:
-        gdprintk(XENLOG_WARNING, "type %" PRtype_info " mfn %" PRI_mfn "\n",
-                 type, mfn_x(page_to_mfn(page)));
-        rc = -EINVAL;
-        BUG();
-    }
-
-    return rc;
-}
-
-
 static int __put_final_page_type(
     struct page_info *page, unsigned long type, int preemptible)
 {
diff --git a/xen/arch/x86/pv/mm.c b/xen/arch/x86/pv/mm.c
index d0fc14dfa6..47cdf58dcf 100644
--- a/xen/arch/x86/pv/mm.c
+++ b/xen/arch/x86/pv/mm.c
@@ -19,11 +19,14 @@
  * License along with this program; If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <xen/event.h>
 #include <xen/guest_access.h>
 
 #include <asm/current.h>
+#include <asm/mm.h>
 #include <asm/p2m.h>
 #include <asm/setup.h>
+#include <asm/shadow.h>
 
 #include "mm.h"
 
@@ -41,6 +44,273 @@ static l4_pgentry_t __read_mostly split_l4e;
 #define root_pgt_pv_xen_slots ROOT_PAGETABLE_PV_XEN_SLOTS
 #endif
 
+/*
+ * We allow root tables to map each other (a.k.a. linear page tables). It
+ * needs some special care with reference counts and access permissions:
+ *  1. The mapping entry must be read-only, or the guest may get write access
+ *     to its own PTEs.
+ *  2. We must only bump the reference counts for an *already validated*
+ *     L2 table, or we can end up in a deadlock in get_page_type() by waiting
+ *     on a validation that is required to complete that validation.
+ *  3. We only need to increment the reference counts for the mapped page
+ *     frame if it is mapped by a different root table. This is sufficient and
+ *     also necessary to allow validation of a root table mapping itself.
+ */
+#define define_get_linear_pagetable(level)                                  \
+static int                                                                  \
+get_##level##_linear_pagetable(                                             \
+    level##_pgentry_t pde, unsigned long pde_pfn, struct domain *d)         \
+{                                                                           \
+    unsigned long x, y;                                                     \
+    struct page_info *page;                                                 \
+    unsigned long pfn;                                                      \
+                                                                            \
+    if ( (level##e_get_flags(pde) & _PAGE_RW) )                             \
+    {                                                                       \
+        gdprintk(XENLOG_WARNING,                                            \
+                 "Attempt to create linear p.t. with write perms\n");       \
+        return 0;                                                           \
+    }                                                                       \
+                                                                            \
+    if ( (pfn = level##e_get_pfn(pde)) != pde_pfn )                         \
+    {                                                                       \
+        /* Make sure the mapped frame belongs to the correct domain. */     \
+        if ( unlikely(!get_page_from_mfn(_mfn(pfn), d)) )                   \
+            return 0;                                                       \
+                                                                            \
+        /*                                                                  \
+         * Ensure that the mapped frame is an already-validated page table. \
+         * If so, atomically increment the count (checking for overflow).   \
+         */                                                                 \
+        page = mfn_to_page(_mfn(pfn));                                      \
+        y = page->u.inuse.type_info;                                        \
+        do {                                                                \
+            x = y;                                                          \
+            if ( unlikely((x & PGT_count_mask) == PGT_count_mask) ||        \
+                 unlikely((x & (PGT_type_mask|PGT_validated)) !=            \
+                          (PGT_##level##_page_table|PGT_validated)) )       \
+            {                                                               \
+                put_page(page);                                             \
+                return 0;                                                   \
+            }                                                               \
+        }                                                                   \
+        while ( (y = cmpxchg(&page->u.inuse.type_info, x, x + 1)) != x );   \
+    }                                                                       \
+                                                                            \
+    return 1;                                                               \
+}
+
+int get_page_and_type_from_mfn(mfn_t mfn, unsigned long type, struct domain *d,
+                               int partial, int preemptible)
+{
+    struct page_info *page = mfn_to_page(mfn);
+    int rc;
+
+    if ( likely(partial >= 0) &&
+         unlikely(!get_page_from_mfn(mfn, d)) )
+        return -EINVAL;
+
+    rc = (preemptible ?
+          get_page_type_preemptible(page, type) :
+          (get_page_type(page, type) ? 0 : -EINVAL));
+
+    if ( unlikely(rc) && partial >= 0 &&
+         (!preemptible || page != current->arch.old_guest_table) )
+        put_page(page);
+
+    return rc;
+}
+
+/* NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'. */
+/*
+ * get_page_from_l2e returns:
+ *   1 => page not present
+ *   0 => success
+ *  <0 => error code
+ */
+define_get_linear_pagetable(l2);
+int get_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn, struct domain *d)
+{
+    unsigned long mfn = l2e_get_pfn(l2e);
+    int rc;
+
+    if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
+        return 1;
+
+    if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) )
+    {
+        gdprintk(XENLOG_WARNING, "Bad L2 flags %x\n",
+                 l2e_get_flags(l2e) & L2_DISALLOW_MASK);
+        return -EINVAL;
+    }
+
+    if ( !(l2e_get_flags(l2e) & _PAGE_PSE) )
+    {
+        rc = get_page_and_type_from_mfn(_mfn(mfn), PGT_l1_page_table, d, 0, 0);
+        if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
+            rc = 0;
+        return rc;
+    }
+
+    return -EINVAL;
+}
+
+
+/*
+ * get_page_from_l3e returns:
+ *   1 => page not present
+ *   0 => success
+ *  <0 => error code
+ */
+define_get_linear_pagetable(l3);
+int get_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, struct domain *d,
+                      int partial)
+{
+    int rc;
+
+    if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
+        return 1;
+
+    if ( unlikely((l3e_get_flags(l3e) & l3_disallow_mask(d))) )
+    {
+        gdprintk(XENLOG_WARNING, "Bad L3 flags %x\n",
+                 l3e_get_flags(l3e) & l3_disallow_mask(d));
+        return -EINVAL;
+    }
+
+    rc = get_page_and_type_from_mfn(
+        l3e_get_mfn(l3e), PGT_l2_page_table, d, partial, 1);
+    if ( unlikely(rc == -EINVAL) &&
+         !is_pv_32bit_domain(d) &&
+         get_l3_linear_pagetable(l3e, pfn, d) )
+        rc = 0;
+
+    return rc;
+}
+
+/*
+ * get_page_from_l4e returns:
+ *   1 => page not present
+ *   0 => success
+ *  <0 => error code
+ */
+define_get_linear_pagetable(l4);
+int get_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn, struct domain *d,
+                      int partial)
+{
+    int rc;
+
+    if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
+        return 1;
+
+    if ( unlikely((l4e_get_flags(l4e) & L4_DISALLOW_MASK)) )
+    {
+        gdprintk(XENLOG_WARNING, "Bad L4 flags %x\n",
+                 l4e_get_flags(l4e) & L4_DISALLOW_MASK);
+        return -EINVAL;
+    }
+
+    rc = get_page_and_type_from_mfn(
+        l4e_get_mfn(l4e), PGT_l3_page_table, d, partial, 1);
+    if ( unlikely(rc == -EINVAL) && get_l4_linear_pagetable(l4e, pfn, d) )
+        rc = 0;
+
+    return rc;
+}
+
+/*
+ * NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'.
+ * Note also that this automatically deals correctly with linear p.t.'s.
+ */
+int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
+{
+    if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) || (l2e_get_pfn(l2e) == pfn) )
+        return 1;
+
+    if ( l2e_get_flags(l2e) & _PAGE_PSE )
+    {
+        struct page_info *page = l2e_get_page(l2e);
+        unsigned int i;
+
+        for ( i = 0; i < (1u << PAGETABLE_ORDER); i++, page++ )
+            put_page_and_type(page);
+    } else
+        put_page_and_type(l2e_get_page(l2e));
+
+    return 0;
+}
+
+static void put_data_page(struct page_info *page, bool writeable)
+{
+    if ( writeable )
+        put_page_and_type(page);
+    else
+        put_page(page);
+}
+
+int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, int partial,
+                      bool defer)
+{
+    struct page_info *pg;
+
+    if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || (l3e_get_pfn(l3e) == pfn) )
+        return 1;
+
+    if ( unlikely(l3e_get_flags(l3e) & _PAGE_PSE) )
+    {
+        unsigned long mfn = l3e_get_pfn(l3e);
+        bool writeable = l3e_get_flags(l3e) & _PAGE_RW;
+
+        ASSERT(!(mfn & ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1)));
+        do {
+            put_data_page(mfn_to_page(_mfn(mfn)), writeable);
+        } while ( ++mfn & ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1) );
+
+        return 0;
+    }
+
+    pg = l3e_get_page(l3e);
+
+    if ( unlikely(partial > 0) )
+    {
+        ASSERT(!defer);
+        return put_page_type_preemptible(pg);
+    }
+
+    if ( defer )
+    {
+        current->arch.old_guest_table = pg;
+        return 0;
+    }
+
+    return put_page_and_type_preemptible(pg);
+}
+
+int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn, int partial,
+                      bool defer)
+{
+    if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) &&
+         (l4e_get_pfn(l4e) != pfn) )
+    {
+        struct page_info *pg = l4e_get_page(l4e);
+
+        if ( unlikely(partial > 0) )
+        {
+            ASSERT(!defer);
+            return put_page_type_preemptible(pg);
+        }
+
+        if ( defer )
+        {
+            current->arch.old_guest_table = pg;
+            return 0;
+        }
+
+        return put_page_and_type_preemptible(pg);
+    }
+    return 1;
+}
+
 /*
  * Get a mapping of a PV guest's l1e for this linear address.  The return
  * pointer should be unmapped using unmap_domain_page().
@@ -215,6 +485,526 @@ void pv_arch_init_memory(void)
 #endif
 }
 
+static int alloc_segdesc_page(struct page_info *page)
+{
+    const struct domain *owner = page_get_owner(page);
+    struct desc_struct *descs = __map_domain_page(page);
+    unsigned i;
+
+    for ( i = 0; i < 512; i++ )
+        if ( unlikely(!check_descriptor(owner, &descs[i])) )
+            break;
+
+    unmap_domain_page(descs);
+
+    return i == 512 ? 0 : -EINVAL;
+}
+
+static int alloc_l1_table(struct page_info *page)
+{
+    struct domain *d = page_get_owner(page);
+    l1_pgentry_t  *pl1e;
+    unsigned int   i;
+    int            ret = 0;
+
+    pl1e = __map_domain_page(page);
+
+    for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
+    {
+        switch ( ret = get_page_from_l1e(pl1e[i], d, d) )
+        {
+        default:
+            goto fail;
+        case 0:
+            break;
+        case _PAGE_RW ... _PAGE_RW | PAGE_CACHE_ATTRS:
+            ASSERT(!(ret & ~(_PAGE_RW | PAGE_CACHE_ATTRS)));
+            l1e_flip_flags(pl1e[i], ret);
+            break;
+        }
+
+        pl1e[i] = adjust_guest_l1e(pl1e[i], d);
+    }
+
+    unmap_domain_page(pl1e);
+    return 0;
+
+ fail:
+    gdprintk(XENLOG_WARNING, "Failure in alloc_l1_table: slot %#x\n", i);
+    while ( i-- > 0 )
+        put_page_from_l1e(pl1e[i], d);
+
+    unmap_domain_page(pl1e);
+    return ret;
+}
+
+int create_pae_xen_mappings(struct domain *d, l3_pgentry_t *pl3e)
+{
+    struct page_info *page;
+    l3_pgentry_t     l3e3;
+
+    if ( !is_pv_32bit_domain(d) )
+        return 1;
+
+    pl3e = (l3_pgentry_t *)((unsigned long)pl3e & PAGE_MASK);
+
+    /* 3rd L3 slot contains L2 with Xen-private mappings. It *must* exist. */
+    l3e3 = pl3e[3];
+    if ( !(l3e_get_flags(l3e3) & _PAGE_PRESENT) )
+    {
+        gdprintk(XENLOG_WARNING, "PAE L3 3rd slot is empty\n");
+        return 0;
+    }
+
+    /*
+     * The Xen-private mappings include linear mappings. The L2 thus cannot
+     * be shared by multiple L3 tables. The test here is adequate because:
+     *  1. Cannot appear in slots != 3 because get_page_type() checks the
+     *     PGT_pae_xen_l2 flag, which is asserted iff the L2 appears in slot 3
+     *  2. Cannot appear in another page table's L3:
+     *     a. alloc_l3_table() calls this function and this check will fail
+     *     b. mod_l3_entry() disallows updates to slot 3 in an existing table
+     */
+    page = l3e_get_page(l3e3);
+    BUG_ON(page->u.inuse.type_info & PGT_pinned);
+    BUG_ON((page->u.inuse.type_info & PGT_count_mask) == 0);
+    BUG_ON(!(page->u.inuse.type_info & PGT_pae_xen_l2));
+    if ( (page->u.inuse.type_info & PGT_count_mask) != 1 )
+    {
+        gdprintk(XENLOG_WARNING, "PAE L3 3rd slot is shared\n");
+        return 0;
+    }
+
+    return 1;
+}
+
+static int alloc_l2_table(struct page_info *page, unsigned long type,
+                          int preemptible)
+{
+    struct domain *d = page_get_owner(page);
+    unsigned long  pfn = mfn_x(page_to_mfn(page));
+    l2_pgentry_t  *pl2e;
+    unsigned int   i;
+    int            rc = 0;
+
+    pl2e = map_domain_page(_mfn(pfn));
+
+    for ( i = page->nr_validated_ptes; i < L2_PAGETABLE_ENTRIES; i++ )
+    {
+        if ( preemptible && i > page->nr_validated_ptes
+             && hypercall_preempt_check() )
+        {
+            page->nr_validated_ptes = i;
+            rc = -ERESTART;
+            break;
+        }
+
+        if ( !is_guest_l2_slot(d, type, i) ||
+             (rc = get_page_from_l2e(pl2e[i], pfn, d)) > 0 )
+            continue;
+
+        if ( rc < 0 )
+        {
+            gdprintk(XENLOG_WARNING, "Failure in alloc_l2_table: slot %#x\n", 
i);
+            while ( i-- > 0 )
+                if ( is_guest_l2_slot(d, type, i) )
+                    put_page_from_l2e(pl2e[i], pfn);
+            break;
+        }
+
+        pl2e[i] = adjust_guest_l2e(pl2e[i], d);
+    }
+
+    if ( rc >= 0 && (type & PGT_pae_xen_l2) )
+    {
+        /* Xen private mappings. */
+        memcpy(&pl2e[COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d)],
+               &compat_idle_pg_table_l2[
+                   l2_table_offset(HIRO_COMPAT_MPT_VIRT_START)],
+               COMPAT_L2_PAGETABLE_XEN_SLOTS(d) * sizeof(*pl2e));
+    }
+
+    unmap_domain_page(pl2e);
+    return rc > 0 ? 0 : rc;
+}
+
+static int alloc_l3_table(struct page_info *page)
+{
+    struct domain *d = page_get_owner(page);
+    unsigned long  pfn = mfn_x(page_to_mfn(page));
+    l3_pgentry_t  *pl3e;
+    unsigned int   i;
+    int            rc = 0, partial = page->partial_pte;
+
+    pl3e = map_domain_page(_mfn(pfn));
+
+    /*
+     * PAE guests allocate full pages, but aren't required to initialize
+     * more than the first four entries; when running in compatibility
+     * mode, however, the full page is visible to the MMU, and hence all
+     * 512 entries must be valid/verified, which is most easily achieved
+     * by clearing them out.
+     */
+    if ( is_pv_32bit_domain(d) )
+        memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e));
+
+    for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES;
+          i++, partial = 0 )
+    {
+        if ( is_pv_32bit_domain(d) && (i == 3) )
+        {
+            if ( !(l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) ||
+                 (l3e_get_flags(pl3e[i]) & l3_disallow_mask(d)) )
+                rc = -EINVAL;
+            else
+                rc = get_page_and_type_from_mfn(
+                    l3e_get_mfn(pl3e[i]),
+                    PGT_l2_page_table | PGT_pae_xen_l2, d, partial, 1);
+        }
+        else if ( (rc = get_page_from_l3e(pl3e[i], pfn, d, partial)) > 0 )
+            continue;
+
+        if ( rc == -ERESTART )
+        {
+            page->nr_validated_ptes = i;
+            page->partial_pte = partial ?: 1;
+        }
+        else if ( rc == -EINTR && i )
+        {
+            page->nr_validated_ptes = i;
+            page->partial_pte = 0;
+            rc = -ERESTART;
+        }
+        if ( rc < 0 )
+            break;
+
+        pl3e[i] = adjust_guest_l3e(pl3e[i], d);
+    }
+
+    if ( rc >= 0 && !create_pae_xen_mappings(d, pl3e) )
+        rc = -EINVAL;
+    if ( rc < 0 && rc != -ERESTART && rc != -EINTR )
+    {
+        gdprintk(XENLOG_WARNING, "Failure in alloc_l3_table: slot %#x\n", i);
+        if ( i )
+        {
+            page->nr_validated_ptes = i;
+            page->partial_pte = 0;
+            current->arch.old_guest_table = page;
+        }
+        while ( i-- > 0 )
+            pl3e[i] = unadjust_guest_l3e(pl3e[i], d);
+    }
+
+    unmap_domain_page(pl3e);
+    return rc > 0 ? 0 : rc;
+}
+
+static int alloc_l4_table(struct page_info *page)
+{
+    struct domain *d = page_get_owner(page);
+    unsigned long  pfn = mfn_x(page_to_mfn(page));
+    l4_pgentry_t  *pl4e = map_domain_page(_mfn(pfn));
+    unsigned int   i;
+    int            rc = 0, partial = page->partial_pte;
+
+    for ( i = page->nr_validated_ptes; i < L4_PAGETABLE_ENTRIES;
+          i++, partial = 0 )
+    {
+        if ( !is_guest_l4_slot(d, i) ||
+             (rc = get_page_from_l4e(pl4e[i], pfn, d, partial)) > 0 )
+            continue;
+
+        if ( rc == -ERESTART )
+        {
+            page->nr_validated_ptes = i;
+            page->partial_pte = partial ?: 1;
+        }
+        else if ( rc < 0 )
+        {
+            if ( rc != -EINTR )
+                gdprintk(XENLOG_WARNING,
+                         "Failure in alloc_l4_table: slot %#x\n", i);
+            if ( i )
+            {
+                page->nr_validated_ptes = i;
+                page->partial_pte = 0;
+                if ( rc == -EINTR )
+                    rc = -ERESTART;
+                else
+                {
+                    if ( current->arch.old_guest_table )
+                        page->nr_validated_ptes++;
+                    current->arch.old_guest_table = page;
+                }
+            }
+        }
+        if ( rc < 0 )
+        {
+            unmap_domain_page(pl4e);
+            return rc;
+        }
+
+        pl4e[i] = adjust_guest_l4e(pl4e[i], d);
+    }
+
+    if ( rc >= 0 )
+    {
+        init_guest_l4_table(pl4e, d, !VM_ASSIST(d, m2p_strict));
+        atomic_inc(&d->arch.pv_domain.nr_l4_pages);
+        rc = 0;
+    }
+    unmap_domain_page(pl4e);
+
+    return rc;
+}
+
+int pv_alloc_page_type(struct page_info *page, unsigned long type,
+                       int preemptible)
+{
+    struct domain *owner = page_get_owner(page);
+    int rc;
+
+    /* A page table is dirtied when its type count becomes non-zero. */
+    if ( likely(owner != NULL) )
+        paging_mark_dirty(owner, page_to_mfn(page));
+
+    switch ( type & PGT_type_mask )
+    {
+    case PGT_l1_page_table:
+        rc = alloc_l1_table(page);
+        break;
+    case PGT_l2_page_table:
+        rc = alloc_l2_table(page, type, preemptible);
+        break;
+    case PGT_l3_page_table:
+        ASSERT(preemptible);
+        rc = alloc_l3_table(page);
+        break;
+    case PGT_l4_page_table:
+        ASSERT(preemptible);
+        rc = alloc_l4_table(page);
+        break;
+    case PGT_seg_desc_page:
+        rc = alloc_segdesc_page(page);
+        break;
+    default:
+        printk("Bad type in %s %lx t=%" PRtype_info " c=%lx\n", __func__,
+               type, page->u.inuse.type_info,
+               page->count_info);
+        rc = -EINVAL;
+        BUG();
+    }
+
+    /* No need for atomic update of type_info here: noone else updates it. */
+    smp_wmb();
+    switch ( rc )
+    {
+    case 0:
+        page->u.inuse.type_info |= PGT_validated;
+        break;
+    case -EINTR:
+        ASSERT((page->u.inuse.type_info &
+                (PGT_count_mask|PGT_validated|PGT_partial)) == 1);
+        page->u.inuse.type_info &= ~PGT_count_mask;
+        break;
+    default:
+        ASSERT(rc < 0);
+        gdprintk(XENLOG_WARNING, "Error while validating mfn %" PRI_mfn
+                 " (pfn %" PRI_pfn ") for type %" PRtype_info
+                 ": caf=%08lx taf=%" PRtype_info "\n",
+                 mfn_x(page_to_mfn(page)),
+                 get_gpfn_from_mfn(mfn_x(page_to_mfn(page))),
+                 type, page->count_info, page->u.inuse.type_info);
+        if ( page != current->arch.old_guest_table )
+            page->u.inuse.type_info = 0;
+        else
+        {
+            ASSERT((page->u.inuse.type_info &
+                    (PGT_count_mask | PGT_validated)) == 1);
+    case -ERESTART:
+            get_page_light(page);
+            page->u.inuse.type_info |= PGT_partial;
+        }
+        break;
+    }
+
+    return rc;
+}
+
+static void free_l1_table(struct page_info *page)
+{
+    struct domain *d = page_get_owner(page);
+    l1_pgentry_t *pl1e;
+    unsigned int  i;
+
+    pl1e = __map_domain_page(page);
+
+    for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
+        put_page_from_l1e(pl1e[i], d);
+
+    unmap_domain_page(pl1e);
+}
+
+
+static int free_l2_table(struct page_info *page, int preemptible)
+{
+    struct domain *d = page_get_owner(page);
+    unsigned long pfn = mfn_x(page_to_mfn(page));
+    l2_pgentry_t *pl2e;
+    unsigned int  i = page->nr_validated_ptes - 1;
+    int err = 0;
+
+    pl2e = map_domain_page(_mfn(pfn));
+
+    ASSERT(page->nr_validated_ptes);
+    do {
+        if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) &&
+             put_page_from_l2e(pl2e[i], pfn) == 0 &&
+             preemptible && i && hypercall_preempt_check() )
+        {
+           page->nr_validated_ptes = i;
+           err = -ERESTART;
+        }
+    } while ( !err && i-- );
+
+    unmap_domain_page(pl2e);
+
+    if ( !err )
+        page->u.inuse.type_info &= ~PGT_pae_xen_l2;
+
+    return err;
+}
+
+static int free_l3_table(struct page_info *page)
+{
+    struct domain *d = page_get_owner(page);
+    unsigned long pfn = mfn_x(page_to_mfn(page));
+    l3_pgentry_t *pl3e;
+    int rc = 0, partial = page->partial_pte;
+    unsigned int  i = page->nr_validated_ptes - !partial;
+
+    pl3e = map_domain_page(_mfn(pfn));
+
+    do {
+        rc = put_page_from_l3e(pl3e[i], pfn, partial, 0);
+        if ( rc < 0 )
+            break;
+        partial = 0;
+        if ( rc > 0 )
+            continue;
+        pl3e[i] = unadjust_guest_l3e(pl3e[i], d);
+    } while ( i-- );
+
+    unmap_domain_page(pl3e);
+
+    if ( rc == -ERESTART )
+    {
+        page->nr_validated_ptes = i;
+        page->partial_pte = partial ?: -1;
+    }
+    else if ( rc == -EINTR && i < L3_PAGETABLE_ENTRIES - 1 )
+    {
+        page->nr_validated_ptes = i + 1;
+        page->partial_pte = 0;
+        rc = -ERESTART;
+    }
+    return rc > 0 ? 0 : rc;
+}
+
+static int free_l4_table(struct page_info *page)
+{
+    struct domain *d = page_get_owner(page);
+    unsigned long pfn = mfn_x(page_to_mfn(page));
+    l4_pgentry_t *pl4e = map_domain_page(_mfn(pfn));
+    int rc = 0, partial = page->partial_pte;
+    unsigned int  i = page->nr_validated_ptes - !partial;
+
+    do {
+        if ( is_guest_l4_slot(d, i) )
+            rc = put_page_from_l4e(pl4e[i], pfn, partial, 0);
+        if ( rc < 0 )
+            break;
+        partial = 0;
+    } while ( i-- );
+
+    if ( rc == -ERESTART )
+    {
+        page->nr_validated_ptes = i;
+        page->partial_pte = partial ?: -1;
+    }
+    else if ( rc == -EINTR && i < L4_PAGETABLE_ENTRIES - 1 )
+    {
+        page->nr_validated_ptes = i + 1;
+        page->partial_pte = 0;
+        rc = -ERESTART;
+    }
+
+    unmap_domain_page(pl4e);
+
+    if ( rc >= 0 )
+    {
+        atomic_dec(&d->arch.pv_domain.nr_l4_pages);
+        rc = 0;
+    }
+
+    return rc;
+}
+
+int pv_free_page_type(struct page_info *page, unsigned long type,
+                      int preemptible)
+{
+    struct domain *owner = page_get_owner(page);
+    unsigned long gmfn;
+    int rc;
+
+    if ( likely(owner != NULL) && unlikely(paging_mode_enabled(owner)) )
+    {
+        /* A page table is dirtied when its type count becomes zero. */
+        paging_mark_dirty(owner, page_to_mfn(page));
+
+        ASSERT(!shadow_mode_refcounts(owner));
+
+        gmfn = mfn_to_gmfn(owner, mfn_x(page_to_mfn(page)));
+        ASSERT(VALID_M2P(gmfn));
+        /* Page sharing not supported for shadowed domains */
+        if(!SHARED_M2P(gmfn))
+            shadow_remove_all_shadows(owner, _mfn(gmfn));
+    }
+
+    if ( !(type & PGT_partial) )
+    {
+        page->nr_validated_ptes = 1U << PAGETABLE_ORDER;
+        page->partial_pte = 0;
+    }
+
+    switch ( type & PGT_type_mask )
+    {
+    case PGT_l1_page_table:
+        free_l1_table(page);
+        rc = 0;
+        break;
+    case PGT_l2_page_table:
+        rc = free_l2_table(page, preemptible);
+        break;
+    case PGT_l3_page_table:
+        ASSERT(preemptible);
+        rc = free_l3_table(page);
+        break;
+    case PGT_l4_page_table:
+        ASSERT(preemptible);
+        rc = free_l4_table(page);
+        break;
+    default:
+        gdprintk(XENLOG_WARNING, "type %" PRtype_info " mfn %" PRI_mfn "\n",
+                 type, mfn_x(page_to_mfn(page)));
+        rc = -EINVAL;
+        BUG();
+    }
+
+    return rc;
+}
+
 /*
  * Local variables:
  * mode: C
-- 
2.11.0


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.