[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH,RFC 9/17] 32-on-64 memory ops



Index: 2006-10-04/xen/arch/x86/domain.c
===================================================================
--- 2006-10-04.orig/xen/arch/x86/domain.c       2006-10-04 15:18:36.000000000 
+0200
+++ 2006-10-04/xen/arch/x86/domain.c    2006-10-04 15:18:51.000000000 +0200
@@ -114,6 +114,58 @@ void dump_pageframe_info(struct domain *
     }
 }
 
+#ifdef CONFIG_COMPAT
+int setup_arg_xlat_area(struct domain *d, unsigned int vcpu_id, l4_pgentry_t 
*l4tab)
+{
+    unsigned i;
+    struct page_info *pg;
+
+    if ( !d->arch.mm_arg_xlat_l3 )
+    {
+        pg = alloc_domheap_page(NULL);
+        if ( !pg )
+            return -ENOMEM;
+        d->arch.mm_arg_xlat_l3 = clear_page(page_to_virt(pg));
+    }
+
+    l4tab[l4_table_offset(COMPAT_ARG_XLAT_VIRT_BASE)] =
+        l4e_from_paddr(__pa(d->arch.mm_arg_xlat_l3), __PAGE_HYPERVISOR);
+
+    for ( i = 0; i < COMPAT_ARG_XLAT_PAGES; ++i )
+    {
+        unsigned long va = COMPAT_ARG_XLAT_VIRT_START(vcpu_id) + i * PAGE_SIZE;
+        l2_pgentry_t *l2tab;
+        l1_pgentry_t *l1tab;
+
+        if ( !l3e_get_intpte(d->arch.mm_arg_xlat_l3[l3_table_offset(va)]) )
+        {
+            pg = alloc_domheap_page(NULL);
+            if ( !pg )
+                return -ENOMEM;
+            clear_page(page_to_virt(pg));
+            d->arch.mm_arg_xlat_l3[l3_table_offset(va)] = l3e_from_page(pg, 
PAGE_HYPERVISOR);
+        }
+        l2tab = l3e_to_l2e(d->arch.mm_arg_xlat_l3[l3_table_offset(va)]);
+        if ( !l2e_get_intpte(l2tab[l2_table_offset(va)]) )
+        {
+            pg = alloc_domheap_page(NULL);
+            if ( !pg )
+                return -ENOMEM;
+            clear_page(page_to_virt(pg));
+            l2tab[l2_table_offset(va)] = l2e_from_page(pg, PAGE_HYPERVISOR);
+        }
+        l1tab = l2e_to_l1e(l2tab[l2_table_offset(va)]);
+        BUG_ON(l1e_get_intpte(l1tab[l1_table_offset(va)]));
+        pg = alloc_domheap_page(NULL);
+        if ( !pg )
+            return -ENOMEM;
+        l1tab[l1_table_offset(va)] = l1e_from_page(pg, PAGE_HYPERVISOR);
+    }
+
+    return 0;
+}
+#endif
+
 struct vcpu *alloc_vcpu_struct(struct domain *d, unsigned int vcpu_id)
 {
     struct vcpu *v;
@@ -161,6 +213,13 @@ struct vcpu *alloc_vcpu_struct(struct do
             l4e_from_paddr(__pa(d->arch.mm_perdomain_l3), __PAGE_HYPERVISOR);
         v->arch.guest_table = pagetable_from_page(pg);
         v->arch.guest_table_user = v->arch.guest_table;
+
+        if ( setup_arg_xlat_area(d, vcpu_id, l4tab) < 0 )
+        {
+            free_xenheap_page(l4tab);
+            xfree(v);
+            return NULL;
+        }
     }
 #endif
 
@@ -273,6 +332,46 @@ void arch_domain_destroy(struct domain *
     free_domheap_page(virt_to_page(d->arch.mm_perdomain_l3));
 #endif
 
+#ifdef CONFIG_COMPAT
+    if ( d->arch.mm_arg_xlat_l3 )
+    {
+        struct page_info *pg;
+        unsigned l3;
+
+        for ( l3 = 0; l3 < L3_PAGETABLE_ENTRIES; ++l3 )
+        {
+            if ( l3e_get_intpte(d->arch.mm_arg_xlat_l3[l3]) )
+            {
+                l2_pgentry_t *l2tab = l3e_to_l2e(d->arch.mm_arg_xlat_l3[l3]);
+                unsigned l2;
+
+                for ( l2 = 0; l2 < L2_PAGETABLE_ENTRIES; ++l2 )
+                {
+                    if ( l2e_get_intpte(l2tab[l2]) )
+                    {
+                        l1_pgentry_t *l1tab = l2e_to_l1e(l2tab[l2]);
+                        unsigned l1;
+
+                        for ( l1 = 0; l1 < L1_PAGETABLE_ENTRIES; ++l1 )
+                        {
+                            if ( l1e_get_intpte(l1tab[l1]) )
+                            {
+                                pg = l1e_get_page(l1tab[l1]);
+                                free_domheap_page(pg);
+                            }
+                        }
+                        pg = l2e_get_page(l2tab[l2]);
+                        free_domheap_page(pg);
+                    }
+                }
+                pg = l3e_get_page(d->arch.mm_arg_xlat_l3[l3]);
+                free_domheap_page(pg);
+            }
+        }
+        free_domheap_page(virt_to_page(d->arch.mm_arg_xlat_l3));
+    }
+#endif
+
     free_xenheap_page(d->shared_info);
 }
 
@@ -931,55 +1030,131 @@ unsigned long hypercall_create_continuat
 
         for ( i = 0; *p != '\0'; i++ )
             mcs->call.args[i] = next_arg(p, args);
+        if ( IS_COMPAT(current->domain) )
+        {
+            for ( ; i < 6; i++ )
+                mcs->call.args[i] = 0;
+        }
     }
     else
     {
         regs       = guest_cpu_user_regs();
-#if defined(__i386__)
         regs->eax  = op;
+        regs->eip -= 2;  /* re-execute 'syscall' / 'int 0x82' */
 
-        if ( supervisor_mode_kernel || hvm_guest(current) )
-            regs->eip &= ~31; /* re-execute entire hypercall entry stub */
+#if defined(__x86_64__)
+        if ( !IS_COMPAT(current->domain) )
+        {
+            for ( i = 0; *p != '\0'; i++ )
+            {
+                arg = next_arg(p, args);
+                switch ( i )
+                {
+                case 0: regs->rdi = arg; break;
+                case 1: regs->rsi = arg; break;
+                case 2: regs->rdx = arg; break;
+                case 3: regs->r10 = arg; break;
+                case 4: regs->r8  = arg; break;
+                case 5: regs->r9  = arg; break;
+                }
+            }
+        }
         else
-            regs->eip -= 2;   /* re-execute 'int 0x82' */
-
-        for ( i = 0; *p != '\0'; i++ )
+#endif
         {
-            arg = next_arg(p, args);
-            switch ( i )
+            if ( supervisor_mode_kernel || hvm_guest(current) )
+                regs->eip &= ~31; /* re-execute entire hypercall entry stub */
+
+            for ( i = 0; *p != '\0'; i++ )
             {
-            case 0: regs->ebx = arg; break;
-            case 1: regs->ecx = arg; break;
-            case 2: regs->edx = arg; break;
-            case 3: regs->esi = arg; break;
-            case 4: regs->edi = arg; break;
-            case 5: regs->ebp = arg; break;
+                arg = next_arg(p, args);
+                switch ( i )
+                {
+                case 0: regs->ebx = arg; break;
+                case 1: regs->ecx = arg; break;
+                case 2: regs->edx = arg; break;
+                case 3: regs->esi = arg; break;
+                case 4: regs->edi = arg; break;
+                case 5: regs->ebp = arg; break;
+                }
             }
         }
-#elif defined(__x86_64__)
-        regs->rax  = op;
-        regs->rip -= 2;  /* re-execute 'syscall' */
+    }
 
-        for ( i = 0; *p != '\0'; i++ )
+    va_end(args);
+
+    return op;
+}
+
+#ifdef CONFIG_COMPAT
+int hypercall_xlat_continuation(unsigned int mask, ...)
+{
+    int rc = 0;
+    struct mc_state *mcs = &this_cpu(mc_state);
+    struct cpu_user_regs *regs = guest_cpu_user_regs();
+    unsigned int i, cval = 0;
+    unsigned long nval = 0;
+    va_list args;
+
+    va_start(args, mask);
+
+    if ( test_bit(_MCSF_in_multicall, &mcs->flags) )
+    {
+        for ( i = 0; i < 6; ++i, mask >>= 1 )
         {
-            arg = next_arg(p, args);
+            if ( (mask & 1) )
+            {
+                nval = va_arg(args, unsigned long);
+                cval = va_arg(args, unsigned int);
+            }
+            if ( (mask & 1) && mcs->call.args[i] == nval )
+            {
+                ++rc;
+            }
+            else
+            {
+                cval = mcs->call.args[i];
+                BUG_ON(mcs->call.args[i] != cval);
+            }
+            mcs->compat_call.args[i] = cval;
+        }
+    }
+    else
+    {
+        for ( i = 0; i < 6; ++i, mask >>= 1 )
+        {
+            unsigned long *reg;
+
             switch ( i )
             {
-            case 0: regs->rdi = arg; break;
-            case 1: regs->rsi = arg; break;
-            case 2: regs->rdx = arg; break;
-            case 3: regs->r10 = arg; break;
-            case 4: regs->r8  = arg; break;
-            case 5: regs->r9  = arg; break;
+            case 0: reg = &regs->ebx; break;
+            case 1: reg = &regs->ecx; break;
+            case 2: reg = &regs->edx; break;
+            case 3: reg = &regs->esi; break;
+            case 4: reg = &regs->edi; break;
+            case 5: reg = &regs->ebp; break;
+            default: BUG(); reg = NULL; break;
+            }
+            if ( (mask & 1) )
+            {
+                nval = va_arg(args, unsigned long);
+                cval = va_arg(args, unsigned int);
+            }
+            if ( (mask & 1) && *reg == nval )
+            {
+                *reg = cval;
+                ++rc;
             }
+            else
+                BUG_ON(*reg != (unsigned int)*reg);
         }
-#endif
     }
 
     va_end(args);
 
-    return op;
+    return rc;
 }
+#endif
 
 static void relinquish_memory(struct domain *d, struct list_head *list)
 {
Index: 2006-10-04/xen/arch/x86/domain_build.c
===================================================================
--- 2006-10-04.orig/xen/arch/x86/domain_build.c 2006-10-04 15:16:05.000000000 
+0200
+++ 2006-10-04/xen/arch/x86/domain_build.c      2006-10-04 15:18:51.000000000 
+0200
@@ -665,7 +665,11 @@ int construct_dom0(struct domain *d,
         l4e_from_paddr(__pa(d->arch.mm_perdomain_l3), __PAGE_HYPERVISOR);
     v->arch.guest_table = pagetable_from_paddr(__pa(l4start));
     if ( IS_COMPAT(d) )
+    {
         v->arch.guest_table_user = v->arch.guest_table;
+        if ( setup_arg_xlat_area(d, 0, l4start) < 0 )
+            panic("Not enough RAM for domain 0 hypercall argument 
translation.\n");
+    }
 
     l4tab += l4_table_offset(dsi.v_start);
     mfn = alloc_spfn;
Index: 2006-10-04/xen/arch/x86/mm.c
===================================================================
--- 2006-10-04.orig/xen/arch/x86/mm.c   2006-10-04 15:18:45.000000000 +0200
+++ 2006-10-04/xen/arch/x86/mm.c        2006-10-04 15:18:51.000000000 +0200
@@ -1106,9 +1106,12 @@ static int alloc_l4_table(struct page_in
     pl4e[l4_table_offset(LINEAR_PT_VIRT_START)] =
         l4e_from_pfn(pfn, __PAGE_HYPERVISOR);
     pl4e[l4_table_offset(PERDOMAIN_VIRT_START)] =
-        l4e_from_page(
-            virt_to_page(page_get_owner(page)->arch.mm_perdomain_l3),
-            __PAGE_HYPERVISOR);
+        l4e_from_page(virt_to_page(d->arch.mm_perdomain_l3),
+                      __PAGE_HYPERVISOR);
+    if ( IS_COMPAT(d) )
+        pl4e[l4_table_offset(COMPAT_ARG_XLAT_VIRT_BASE)] =
+            l4e_from_page(virt_to_page(d->arch.mm_arg_xlat_l3),
+                          __PAGE_HYPERVISOR);
 
     return 1;
 
@@ -2732,7 +2735,9 @@ int do_update_va_mapping(unsigned long v
             flush_tlb_mask(d->domain_dirty_cpumask);
             break;
         default:
-            if ( unlikely(get_user(vmask, (unsigned long *)bmap_ptr)) )
+            if ( unlikely(!IS_COMPAT(d) ?
+                          get_user(vmask, (unsigned long *)bmap_ptr) :
+                          get_user(vmask, (unsigned int *)bmap_ptr)) )
                 rc = -EFAULT;
             pmask = vcpumask_to_pcpumask(d, vmask);
             flush_tlb_mask(pmask);
Index: 2006-10-04/xen/arch/x86/x86_64/Makefile
===================================================================
--- 2006-10-04.orig/xen/arch/x86/x86_64/Makefile        2006-10-04 
15:06:22.000000000 +0200
+++ 2006-10-04/xen/arch/x86/x86_64/Makefile     2006-10-04 15:18:51.000000000 
+0200
@@ -6,5 +6,6 @@ obj-y += traps.o
 ifeq ($(CONFIG_COMPAT),y)
 # extra dependencies
 entry.o:       compat/entry.S
+mm.o:          compat/mm.c
 traps.o:       compat/traps.c
 endif
Index: 2006-10-04/xen/arch/x86/x86_64/compat/entry.S
===================================================================
--- 2006-10-04.orig/xen/arch/x86/x86_64/compat/entry.S  2006-10-04 
15:11:03.000000000 +0200
+++ 2006-10-04/xen/arch/x86/x86_64/compat/entry.S       2006-10-04 
15:18:51.000000000 +0200
@@ -282,15 +282,11 @@ CFIX14:
 #define compat_mmu_update domain_crash_synchronous
 #define compat_set_gdt domain_crash_synchronous
 #define compat_platform_op domain_crash_synchronous
-#define compat_update_descriptor domain_crash_synchronous
-#define compat_memory_op domain_crash_synchronous
 #define compat_multicall domain_crash_synchronous
-#define compat_update_va_mapping domain_crash_synchronous
 #define compat_set_timer_op domain_crash_synchronous
 #define compat_event_channel_op_compat domain_crash_synchronous
 #define compat_physdev_op_compat domain_crash_synchronous
 #define compat_grant_table_op domain_crash_synchronous
-#define compat_update_va_mapping_otherdomain domain_crash_synchronous
 #define compat_vcpu_op domain_crash_synchronous
 #define compat_mmuext_op domain_crash_synchronous
 #define compat_acm_op domain_crash_synchronous
Index: 2006-10-04/xen/arch/x86/x86_64/compat/mm.c
===================================================================
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ 2006-10-04/xen/arch/x86/x86_64/compat/mm.c  2006-10-04 15:18:51.000000000 
+0200
@@ -0,0 +1,128 @@
+#ifdef CONFIG_COMPAT
+
+#include <compat/memory.h>
+
+int compat_update_descriptor(u32 pa_lo, u32 pa_hi, u32 desc_lo, u32 desc_hi)
+{
+    return do_update_descriptor(pa_lo | ((u64)pa_hi << 32),
+                                desc_lo | ((u64)desc_hi << 32));
+}
+
+int compat_arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
+{
+    struct compat_machphys_mfn_list xmml;
+    l2_pgentry_t l2e;
+    unsigned long v;
+    compat_pfn_t mfn;
+    unsigned int i;
+    int rc = 0;
+
+    switch ( op )
+    {
+    case XENMEM_add_to_physmap:
+    {
+        struct compat_add_to_physmap cmp;
+        struct xen_add_to_physmap *nat = (void 
*)COMPAT_ARG_XLAT_VIRT_START(current->vcpu_id);
+
+        if ( copy_from_guest(&cmp, arg, 1) )
+            return -EFAULT;
+
+        XLAT_add_to_physmap(nat, &cmp);
+        rc = arch_memory_op(op, guest_handle_from_ptr(nat, void));
+
+        break;
+    }
+
+    case XENMEM_memory_map:
+    case XENMEM_machine_memory_map:
+    {
+        struct compat_memory_map cmp;
+        struct xen_memory_map *nat = (void 
*)COMPAT_ARG_XLAT_VIRT_START(current->vcpu_id);
+
+        if ( copy_from_guest(&cmp, arg, 1) )
+            return -EFAULT;
+#define XLAT_memory_map_HNDL_buffer(_d_, _s_) \
+        guest_from_compat_handle((_d_)->buffer, (_s_)->buffer)
+        XLAT_memory_map(nat, &cmp);
+#undef XLAT_memory_map_HNDL_buffer
+
+        rc = arch_memory_op(op, guest_handle_from_ptr(nat, void));
+        if ( rc < 0 )
+            break;
+
+#define XLAT_memory_map_HNDL_buffer(_d_, _s_) ((void)0)
+        XLAT_memory_map(&cmp, nat);
+#undef XLAT_memory_map_HNDL_buffer
+        if ( copy_to_guest(arg, &cmp, 1) )
+            rc = -EFAULT;
+
+        break;
+    }
+
+    case XENMEM_machphys_mapping:
+    {
+        static /*const*/ struct compat_machphys_mapping mapping = {
+            .v_start = MACH2PHYS_COMPAT_VIRT_START,
+            .v_end   = MACH2PHYS_COMPAT_VIRT_END,
+            .max_mfn = MACH2PHYS_COMPAT_NR_ENTRIES - 1
+        };
+
+        if ( copy_to_guest(arg, &mapping, 1) )
+            rc = -EFAULT;
+
+        break;
+    }
+
+    case XENMEM_machphys_mfn_list:
+        if ( copy_from_guest(&xmml, arg, 1) )
+            return -EFAULT;
+
+        for ( i = 0, v = RDWR_COMPAT_MPT_VIRT_START;
+              (i != xmml.max_extents) && (v != RDWR_COMPAT_MPT_VIRT_END);
+              i++, v += 1 << L2_PAGETABLE_SHIFT )
+        {
+            l2e = compat_idle_pg_table_l2[l2_table_offset(v)];
+            if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
+                break;
+            mfn = l2e_get_pfn(l2e) + l1_table_offset(v);
+            if ( copy_to_compat_offset(xmml.extent_start, i, &mfn, 1) )
+                return -EFAULT;
+        }
+
+        xmml.nr_extents = i;
+        if ( copy_to_guest(arg, &xmml, 1) )
+            rc = -EFAULT;
+
+        break;
+
+    default:
+        rc = -ENOSYS;
+        break;
+    }
+
+    return rc;
+}
+
+int compat_update_va_mapping(unsigned int va, u32 lo, u32 hi,
+                             unsigned int flags)
+{
+    return do_update_va_mapping(va, lo | ((u64)hi << 32), flags);
+}
+
+int compat_update_va_mapping_otherdomain(unsigned long va, u32 lo, u32 hi,
+                                         unsigned long flags,
+                                         domid_t domid)
+{
+    return do_update_va_mapping_otherdomain(va, lo | ((u64)hi << 32), flags, 
domid);
+}
+#endif /* CONFIG_COMPAT */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
Index: 2006-10-04/xen/arch/x86/x86_64/mm.c
===================================================================
--- 2006-10-04.orig/xen/arch/x86/x86_64/mm.c    2006-10-04 15:18:45.000000000 
+0200
+++ 2006-10-04/xen/arch/x86/x86_64/mm.c 2006-10-04 15:18:51.000000000 +0200
@@ -28,6 +28,7 @@
 #include <asm/page.h>
 #include <asm/flushtlb.h>
 #include <asm/fixmap.h>
+#include <asm/hypercall.h>
 #include <asm/msr.h>
 #include <public/memory.h>
 
@@ -383,6 +384,8 @@ int check_descriptor(const struct domain
     return 0;
 }
 
+#include "compat/mm.c"
+
 /*
  * Local variables:
  * mode: C
Index: 2006-10-04/xen/common/compat/Makefile
===================================================================
--- 2006-10-04.orig/xen/common/compat/Makefile  2006-10-04 15:10:46.000000000 
+0200
+++ 2006-10-04/xen/common/compat/Makefile       2006-10-04 15:18:51.000000000 
+0200
@@ -1,4 +1,5 @@
 obj-y += kernel.o
+obj-y += memory.o
 obj-y += xlat.o
 
 # extra dependencies
Index: 2006-10-04/xen/common/compat/memory.c
===================================================================
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ 2006-10-04/xen/common/compat/memory.c       2006-10-04 15:18:51.000000000 
+0200
@@ -0,0 +1,358 @@
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/hypercall.h>
+#include <xen/guest_access.h>
+#include <xen/sched.h>
+#include <xen/event.h>
+#include <asm/current.h>
+#include <compat/memory.h>
+
+int compat_memory_op(unsigned int cmd, XEN_GUEST_HANDLE(void) compat)
+{
+    int rc, split, op = cmd & MEMOP_CMD_MASK;
+    unsigned int start_extent = cmd >> MEMOP_EXTENT_SHIFT;
+
+    do
+    {
+        unsigned int i, end_extent = 0;
+        union {
+            XEN_GUEST_HANDLE(void) hnd;
+            struct xen_memory_reservation *rsrv;
+            struct xen_memory_exchange *xchg;
+            struct xen_translate_gpfn_list *xlat;
+        } nat;
+        union {
+            struct compat_memory_reservation rsrv;
+            struct compat_memory_exchange xchg;
+            struct compat_translate_gpfn_list xlat;
+        } cmp;
+
+        set_xen_guest_handle(nat.hnd, (void 
*)COMPAT_ARG_XLAT_VIRT_START(current->vcpu_id));
+        split = 0;
+        switch ( op )
+        {
+            xen_pfn_t *space;
+
+        case XENMEM_increase_reservation:
+        case XENMEM_decrease_reservation:
+        case XENMEM_populate_physmap:
+            if ( copy_from_guest(&cmp.rsrv, compat, 1) )
+                return start_extent;
+
+            /* Is size too large for us to encode a continuation? */
+            if ( cmp.rsrv.nr_extents > (UINT_MAX >> MEMOP_EXTENT_SHIFT) )
+                return start_extent;
+
+            if ( !compat_handle_is_null(cmp.rsrv.extent_start) &&
+                 !compat_handle_okay(cmp.rsrv.extent_start, 
cmp.rsrv.nr_extents) )
+                return start_extent;
+
+            end_extent = start_extent + (COMPAT_ARG_XLAT_SIZE - 
sizeof(*nat.rsrv)) /
+                                        sizeof(*space);
+            if ( end_extent > cmp.rsrv.nr_extents )
+                end_extent = cmp.rsrv.nr_extents;
+
+            space = (xen_pfn_t *)(nat.rsrv + 1);
+#define XLAT_memory_reservation_HNDL_extent_start(_d_, _s_) \
+            do \
+            { \
+                if ( !compat_handle_is_null((_s_)->extent_start) ) \
+                { \
+                    set_xen_guest_handle((_d_)->extent_start, space - 
start_extent); \
+                    if ( op != XENMEM_increase_reservation ) \
+                    { \
+                        for ( i = start_extent; i < end_extent; ++i ) \
+                        { \
+                            compat_pfn_t pfn; \
+                            if ( __copy_from_compat_offset(&pfn, 
(_s_)->extent_start, i, 1) ) \
+                            { \
+                                end_extent = i; \
+                                split = -1; \
+                                break; \
+                            } \
+                            *space++ = pfn; \
+                        } \
+                    } \
+                } \
+                else \
+                { \
+                    set_xen_guest_handle((_d_)->extent_start, NULL); \
+                    end_extent = cmp.rsrv.nr_extents; \
+                } \
+            } while (0)
+            XLAT_memory_reservation(nat.rsrv, &cmp.rsrv);
+#undef XLAT_memory_reservation_HNDL_extent_start
+
+            if ( end_extent < cmp.rsrv.nr_extents )
+            {
+                nat.rsrv->nr_extents = end_extent;
+                ++split;
+            }
+
+            break;
+
+        case XENMEM_exchange:
+        {
+            int order_delta;
+
+            if ( copy_from_guest(&cmp.xchg, compat, 1) )
+                return -EFAULT;
+
+            order_delta = cmp.xchg.out.extent_order - cmp.xchg.in.extent_order;
+            /* Various sanity checks. */
+            if ( (cmp.xchg.nr_exchanged > cmp.xchg.in.nr_extents) ||
+                 (order_delta > 0 && (cmp.xchg.nr_exchanged & ((1U << 
order_delta) - 1))) ||
+                 /* Sizes of input and output lists do not overflow an int? */
+                 ((~0U >> cmp.xchg.in.extent_order) < cmp.xchg.in.nr_extents) 
||
+                 ((~0U >> cmp.xchg.out.extent_order) < 
cmp.xchg.out.nr_extents) ||
+                 /* Sizes of input and output lists match? */
+                 ((cmp.xchg.in.nr_extents << cmp.xchg.in.extent_order) !=
+                  (cmp.xchg.out.nr_extents << cmp.xchg.out.extent_order)) )
+                return -EINVAL;
+
+            start_extent = cmp.xchg.nr_exchanged;
+            end_extent = (COMPAT_ARG_XLAT_SIZE - sizeof(*nat.xchg)) /
+                         (((1U << __builtin_abs(order_delta)) + 1) *
+                          sizeof(*space));
+            if ( end_extent == 0 )
+            {
+                printk("Cannot translate compatibility mode XENMEM_exchange 
extents (%u,%u)\n",
+                       cmp.xchg.in.extent_order, cmp.xchg.out.extent_order);
+                return -E2BIG;
+            }
+            if ( order_delta > 0 )
+                end_extent <<= order_delta;
+            end_extent += start_extent;
+            if ( end_extent > cmp.xchg.in.nr_extents )
+                end_extent = cmp.xchg.in.nr_extents;
+
+            space = (xen_pfn_t *)(nat.xchg + 1);
+            /* Code below depends upon .in preceding .out. */
+            BUILD_BUG_ON(offsetof(xen_memory_exchange_t, in) > 
offsetof(xen_memory_exchange_t, out));
+#define XLAT_memory_reservation_HNDL_extent_start(_d_, _s_) \
+            do \
+            { \
+                set_xen_guest_handle((_d_)->extent_start, space - 
start_extent); \
+                for ( i = start_extent; i < end_extent; ++i ) \
+                { \
+                    compat_pfn_t pfn; \
+                    if ( __copy_from_compat_offset(&pfn, (_s_)->extent_start, 
i, 1) ) \
+                        return -EFAULT; \
+                    *space++ = pfn; \
+                } \
+                if ( order_delta > 0 ) \
+                { \
+                    start_extent >>= order_delta; \
+                    end_extent >>= order_delta; \
+                } \
+                else \
+                { \
+                    start_extent <<= -order_delta; \
+                    end_extent <<= -order_delta; \
+                } \
+                order_delta = -order_delta; \
+            } while (0)
+            XLAT_memory_exchange(nat.xchg, &cmp.xchg);
+#undef XLAT_memory_reservation_HNDL_extent_start
+
+            if ( end_extent < cmp.xchg.in.nr_extents )
+            {
+                nat.xchg->in.nr_extents = end_extent;
+                if ( order_delta >= 0 )
+                    nat.xchg->out.nr_extents = end_extent >> order_delta;
+                else
+                    nat.xchg->out.nr_extents = end_extent << order_delta;
+                ++split;
+            }
+
+            break;
+        }
+
+        case XENMEM_current_reservation:
+        case XENMEM_maximum_reservation:
+        {
+#define xen_domid_t domid_t
+#define compat_domid_t domid_compat_t
+            CHECK_TYPE(domid);
+#undef compat_domid_t
+#undef xen_domid_t
+        }
+        case XENMEM_maximum_ram_page:
+            nat.hnd = compat;
+            break;
+
+        case XENMEM_translate_gpfn_list:
+            if ( copy_from_guest(&cmp.xlat, compat, 1) )
+                return -EFAULT;
+
+            /* Is size too large for us to encode a continuation? */
+            if ( cmp.xlat.nr_gpfns > (UINT_MAX >> MEMOP_EXTENT_SHIFT) )
+                return -EINVAL;
+
+            if ( !compat_handle_okay(cmp.xlat.gpfn_list, cmp.xlat.nr_gpfns) ||
+                 !compat_handle_okay(cmp.xlat.mfn_list,  cmp.xlat.nr_gpfns) )
+                return -EFAULT;
+
+            end_extent = start_extent + (COMPAT_ARG_XLAT_SIZE - 
sizeof(*nat.xlat)) /
+                                        sizeof(*space);
+            if ( end_extent > cmp.xlat.nr_gpfns )
+                end_extent = cmp.xlat.nr_gpfns;
+
+            space = (xen_pfn_t *)(nat.xlat + 1);
+            /* Code below depends upon .gpfn_list preceding .mfn_list. */
+            BUILD_BUG_ON(offsetof(xen_translate_gpfn_list_t, gpfn_list) > 
offsetof(xen_translate_gpfn_list_t,
mfn_list));
+#define XLAT_translate_gpfn_list_HNDL_gpfn_list(_d_, _s_) \
+            do \
+            { \
+                set_xen_guest_handle((_d_)->gpfn_list, space - start_extent); \
+                for ( i = start_extent; i < end_extent; ++i ) \
+                { \
+                    compat_pfn_t pfn; \
+                    if ( __copy_from_compat_offset(&pfn, (_s_)->gpfn_list, i, 
1) ) \
+                        return -EFAULT; \
+                    *space++ = pfn; \
+                } \
+            } while (0)
+#define XLAT_translate_gpfn_list_HNDL_mfn_list(_d_, _s_) \
+            (_d_)->mfn_list = (_d_)->gpfn_list
+            XLAT_translate_gpfn_list(nat.xlat, &cmp.xlat);
+#undef XLAT_translate_gpfn_list_HNDL_mfn_list
+#undef XLAT_translate_gpfn_list_HNDL_gpfn_list
+
+            if ( end_extent < cmp.xlat.nr_gpfns )
+            {
+                nat.xlat->nr_gpfns = end_extent;
+                ++split;
+            }
+
+            break;
+
+        default:
+            return compat_arch_memory_op(cmd, compat);
+        }
+
+        rc = do_memory_op(cmd, nat.hnd);
+        if ( rc < 0 )
+            return rc;
+
+        if ( hypercall_xlat_continuation(0x02, nat.hnd, compat) )
+            split = -1;
+
+        switch ( op )
+        {
+        case XENMEM_increase_reservation:
+        case XENMEM_decrease_reservation:
+        case XENMEM_populate_physmap:
+            end_extent = split >= 0 ? rc : rc >> MEMOP_EXTENT_SHIFT;
+            if ( op != XENMEM_decrease_reservation &&
+                 !guest_handle_is_null(nat.rsrv->extent_start) )
+            {
+                for ( ; start_extent < end_extent; ++start_extent )
+                {
+                    compat_pfn_t pfn = nat.rsrv->extent_start.p[start_extent];
+
+                    BUG_ON(pfn != nat.rsrv->extent_start.p[start_extent]);
+                    if ( __copy_to_compat_offset(cmp.rsrv.extent_start, 
start_extent, &pfn, 1) )
+                    {
+                        if ( split >= 0 )
+                        {
+                            rc = start_extent;
+                            split = 0;
+                        }
+                        else
+                            /*
+                             * Short of being able to cancel the continuation,
+                             * force it to restart here; eventually we shall
+                             * get out of this state.
+                             */
+                            rc = (start_extent << MEMOP_EXTENT_SHIFT) | op;
+                        break;
+                    }
+                }
+            }
+            else
+                start_extent = end_extent;
+            break;
+
+        case XENMEM_exchange:
+        {
+            DEFINE_XEN_GUEST_HANDLE(compat_memory_exchange_t);
+            int order_delta;
+
+            BUG_ON(rc);
+            BUG_ON(end_extent < nat.xchg->nr_exchanged);
+            end_extent = nat.xchg->nr_exchanged;
+
+            order_delta = cmp.xchg.out.extent_order - cmp.xchg.in.extent_order;
+            if ( order_delta > 0 )
+            {
+                start_extent >>= order_delta;
+                BUG_ON(end_extent & ((1U << order_delta) - 1));
+                end_extent >>= order_delta;
+            }
+            else
+            {
+                start_extent <<= -order_delta;
+                end_extent <<= -order_delta;
+            }
+
+            for ( ; start_extent < end_extent; ++start_extent )
+            {
+                compat_pfn_t pfn = nat.xchg->out.extent_start.p[start_extent];
+
+                BUG_ON(pfn != nat.xchg->out.extent_start.p[start_extent]);
+                /* Note that we ignore errors accessing the output extent 
list. */
+                __copy_to_compat_offset(cmp.xchg.out.extent_start, 
start_extent, &pfn, 1);
+            }
+
+            cmp.xchg.nr_exchanged = nat.xchg->nr_exchanged;
+            if ( copy_field_to_guest(guest_handle_cast(compat, 
compat_memory_exchange_t),
+                                     &cmp.xchg, nr_exchanged) )
+            {
+                if ( split < 0 )
+                    /* Cannot cancel the continuation... */
+                    domain_crash_synchronous();
+                return -EFAULT;
+            }
+            break;
+        }
+
+        case XENMEM_maximum_ram_page:
+        case XENMEM_current_reservation:
+        case XENMEM_maximum_reservation:
+            break;
+
+        case XENMEM_translate_gpfn_list:
+            if ( split < 0 )
+                end_extent = rc >> MEMOP_EXTENT_SHIFT;
+            else
+                BUG_ON(rc);
+
+            for ( ; start_extent < end_extent; ++start_extent )
+            {
+                compat_pfn_t pfn = nat.xlat->mfn_list.p[start_extent];
+
+                BUG_ON(pfn != nat.xlat->mfn_list.p[start_extent]);
+                if ( __copy_to_compat_offset(cmp.xlat.mfn_list, start_extent, 
&pfn, 1) )
+                {
+                    if ( split < 0 )
+                        /* Cannot cancel the continuation... */
+                        domain_crash_synchronous();
+                    return -EFAULT;
+                }
+            }
+            break;
+
+        default:
+            domain_crash_synchronous();
+            break;
+        }
+
+        cmd = op | (start_extent << MEMOP_EXTENT_SHIFT);
+        if ( split > 0 && hypercall_preempt_check() )
+            return hypercall_create_continuation(
+                __HYPERVISOR_memory_op, "ih", cmd, compat);
+    } while ( split > 0 );
+
+    return rc;
+}
Index: 2006-10-04/xen/common/memory.c
===================================================================
--- 2006-10-04.orig/xen/common/memory.c 2006-08-21 18:02:24.000000000 +0200
+++ 2006-10-04/xen/common/memory.c      2006-10-04 15:18:51.000000000 +0200
@@ -17,18 +17,12 @@
 #include <xen/shadow.h>
 #include <xen/iocap.h>
 #include <xen/guest_access.h>
+#include <xen/hypercall.h>
 #include <xen/errno.h>
 #include <asm/current.h>
 #include <asm/hardirq.h>
 #include <public/memory.h>
 
-/*
- * To allow safe resume of do_memory_op() after preemption, we need to know 
- * at what point in the page list to resume. For this purpose I steal the 
- * high-order bits of the @cmd parameter, which are otherwise unused and zero.
- */
-#define START_EXTENT_SHIFT 4 /* cmd[:4] == start_extent */
-
 static long
 increase_reservation(
     struct domain *d, 
@@ -236,7 +230,7 @@ translate_gpfn_list(
         return -EFAULT;
 
     /* Is size too large for us to encode a continuation? */
-    if ( op.nr_gpfns > (ULONG_MAX >> START_EXTENT_SHIFT) )
+    if ( op.nr_gpfns > (ULONG_MAX >> MEMOP_EXTENT_SHIFT) )
         return -EINVAL;
 
     if ( !guest_handle_okay(op.gpfn_list, op.nr_gpfns) ||
@@ -517,20 +511,20 @@ long do_memory_op(unsigned long cmd, XEN
     struct xen_memory_reservation reservation;
     domid_t domid;
 
-    op = cmd & ((1 << START_EXTENT_SHIFT) - 1);
+    op = cmd & MEMOP_CMD_MASK;
 
     switch ( op )
     {
     case XENMEM_increase_reservation:
     case XENMEM_decrease_reservation:
     case XENMEM_populate_physmap:
-        start_extent = cmd >> START_EXTENT_SHIFT;
+        start_extent = cmd >> MEMOP_EXTENT_SHIFT;
 
         if ( copy_from_guest(&reservation, arg, 1) )
             return start_extent;
 
         /* Is size too large for us to encode a continuation? */
-        if ( reservation.nr_extents > (ULONG_MAX >> START_EXTENT_SHIFT) )
+        if ( reservation.nr_extents > (ULONG_MAX >> MEMOP_EXTENT_SHIFT) )
             return start_extent;
 
         if ( unlikely(start_extent > reservation.nr_extents) )
@@ -594,7 +588,7 @@ long do_memory_op(unsigned long cmd, XEN
         if ( preempted )
             return hypercall_create_continuation(
                 __HYPERVISOR_memory_op, "lh",
-                op | (rc << START_EXTENT_SHIFT), arg);
+                op | (rc << MEMOP_EXTENT_SHIFT), arg);
 
         break;
 
@@ -626,14 +620,14 @@ long do_memory_op(unsigned long cmd, XEN
         break;
 
     case XENMEM_translate_gpfn_list:
-        progress = cmd >> START_EXTENT_SHIFT;
+        progress = cmd >> MEMOP_EXTENT_SHIFT;
         rc = translate_gpfn_list(
             guest_handle_cast(arg, xen_translate_gpfn_list_t),
             &progress);
         if ( rc == -EAGAIN )
             return hypercall_create_continuation(
                 __HYPERVISOR_memory_op, "lh",
-                op | (progress << START_EXTENT_SHIFT), arg);
+                op | (progress << MEMOP_EXTENT_SHIFT), arg);
         break;
 
     default:
Index: 2006-10-04/xen/include/asm-x86/config.h
===================================================================
--- 2006-10-04.orig/xen/include/asm-x86/config.h        2006-10-04 
15:16:05.000000000 +0200
+++ 2006-10-04/xen/include/asm-x86/config.h     2006-10-04 15:18:51.000000000 
+0200
@@ -114,7 +114,7 @@ static inline void FORCE_CRASH(void) 
 /*
  * Memory layout:
  *  0x0000000000000000 - 0x00007fffffffffff [128TB, 2^47 bytes, PML4:0-255]
- *    Guest-defined use.
+ *    Guest-defined use (see below for compatibility mode guests).
  *  0x0000800000000000 - 0xffff7fffffffffff [16EB]
  *    Inaccessible: current arch only supports 48-bit sign-extended VAs.
  *  0xffff800000000000 - 0xffff803fffffffff [256GB, 2^38 bytes, PML4:256]
@@ -147,6 +147,18 @@ static inline void FORCE_CRASH(void) 
  *    Reserved for future use.
  *  0xffff880000000000 - 0xffffffffffffffff [120TB, PML4:272-511]
  *    Guest-defined use.
+ *
+ * Compatibility guest area layout:
+ *  0x0000000000000000 - 0x00000000f57fffff [3928MB,            PML4:0]
+ *    Guest-defined use.
+ *  0x0000000f58000000 - 0x00000000ffffffff [168MB,             PML4:0]
+ *    Read-only machine-to-phys translation table (GUEST ACCESSIBLE).
+ *  0x0000000000000000 - 0x00000000ffffffff [508GB,             PML4:0]
+ *    Unused.
+ *  0x0000008000000000 - 0x000000ffffffffff [512GB, 2^39 bytes, PML4:1]
+ *    Hypercall argument translation area.
+ *  0x0000010000000000 - 0x00007fffffffffff [127TB, 2^46 bytes, PML4:2-255]
+ *    Reserved for future use.
  */
 
 
@@ -216,6 +228,14 @@ static inline void FORCE_CRASH(void) 
 #define COMPAT_L2_PAGETABLE_XEN_SLOTS \
     (COMPAT_L2_PAGETABLE_LAST_XEN_SLOT - COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT + 
1)
 
+#define COMPAT_ARG_XLAT_VIRT_BASE      (1UL << ROOT_PAGETABLE_SHIFT)
+#define COMPAT_ARG_XLAT_SHIFT          0
+#define COMPAT_ARG_XLAT_PAGES          (1U << COMPAT_ARG_XLAT_SHIFT)
+#define COMPAT_ARG_XLAT_SIZE           (COMPAT_ARG_XLAT_PAGES << PAGE_SHIFT)
+#define COMPAT_ARG_XLAT_VIRT_START(vcpu_id) \
+    (COMPAT_ARG_XLAT_VIRT_BASE + ((unsigned long)(vcpu_id) << \
+                                  (PAGE_SHIFT + COMPAT_ARG_XLAT_SHIFT + 1)))
+
 #define PGT_base_page_table     PGT_l4_page_table
 
 #define __HYPERVISOR_CS64 0xe008
Index: 2006-10-04/xen/include/asm-x86/domain.h
===================================================================
--- 2006-10-04.orig/xen/include/asm-x86/domain.h        2006-09-21 
11:09:00.000000000 +0200
+++ 2006-10-04/xen/include/asm-x86/domain.h     2006-10-04 15:18:51.000000000 
+0200
@@ -98,6 +98,10 @@ struct arch_domain
     struct mapcache mapcache;
 #endif
 
+#ifdef CONFIG_COMPAT
+    l3_pgentry_t *mm_arg_xlat_l3;
+#endif
+
     /* I/O-port admin-specified access capabilities. */
     struct rangeset *ioport_caps;
 
Index: 2006-10-04/xen/include/asm-x86/mm.h
===================================================================
--- 2006-10-04.orig/xen/include/asm-x86/mm.h    2006-10-04 15:18:45.000000000 
+0200
+++ 2006-10-04/xen/include/asm-x86/mm.h 2006-10-04 15:18:51.000000000 +0200
@@ -394,8 +394,18 @@ int __sync_lazy_execstate(void);
 /* Arch-specific portion of memory_op hypercall. */
 long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg);
 long subarch_memory_op(int op, XEN_GUEST_HANDLE(void) arg);
+#ifdef CONFIG_COMPAT
+int compat_arch_memory_op(int op, XEN_GUEST_HANDLE(void));
+int compat_subarch_memory_op(int op, XEN_GUEST_HANDLE(void));
+#endif
 
 int steal_page(
     struct domain *d, struct page_info *page, unsigned int memflags);
 
+#ifdef CONFIG_COMPAT
+int setup_arg_xlat_area(struct domain *, unsigned int vcpu_id, l4_pgentry_t *);
+#else
+# define setup_arg_xlat_area(dom, vcpu_id, l4tab) 0
+#endif
+
 #endif /* __ASM_X86_MM_H__ */
Index: 2006-10-04/xen/include/xen/hypercall.h
===================================================================
--- 2006-10-04.orig/xen/include/xen/hypercall.h 2006-08-28 08:32:38.000000000 
+0200
+++ 2006-10-04/xen/include/xen/hypercall.h      2006-10-04 15:18:51.000000000 
+0200
@@ -42,9 +42,17 @@ extern long
 do_platform_op(
     XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op);
 
+/*
+ * To allow safe resume of do_memory_op() after preemption, we need to know
+ * at what point in the page list to resume. For this purpose I steal the
+ * high-order bits of the @cmd parameter, which are otherwise unused and zero.
+ */
+#define MEMOP_EXTENT_SHIFT 4 /* cmd[:4] == start_extent */
+#define MEMOP_CMD_MASK     ((1 << MEMOP_EXTENT_SHIFT) - 1)
+
 extern long
 do_memory_op(
-    int cmd,
+    unsigned long cmd,
     XEN_GUEST_HANDLE(void) arg);
 
 extern long
@@ -102,4 +110,13 @@ do_hvm_op(
     unsigned long op,
     XEN_GUEST_HANDLE(void) arg);
 
+#ifdef CONFIG_COMPAT
+
+extern int
+compat_memory_op(
+    unsigned int cmd,
+    XEN_GUEST_HANDLE(void) arg);
+
+#endif
+
 #endif /* __XEN_HYPERCALL_H__ */
Index: 2006-10-04/xen/include/xen/compat.h
===================================================================
--- 2006-10-04.orig/xen/include/xen/compat.h    2006-10-04 15:11:04.000000000 
+0200
+++ 2006-10-04/xen/include/xen/compat.h 2006-10-04 15:18:51.000000000 +0200
@@ -143,6 +143,8 @@
                    &((k compat_ ## n *)0)->f1.f2.f3) * 2]
 
 
+int hypercall_xlat_continuation(unsigned int mask, ...);
+
 /* In-place translation functons: */
 struct start_info;
 void xlat_start_info(struct start_info *, enum XLAT_start_info_console);
Index: 2006-10-04/xen/include/xlat.lst
===================================================================
--- 2006-10-04.orig/xen/include/xlat.lst        2006-10-04 15:11:04.000000000 
+0200
+++ 2006-10-04/xen/include/xlat.lst     2006-10-04 15:18:51.000000000 +0200
@@ -4,3 +4,8 @@
 ?      dom0_vga_console_info           xen.h
 !      start_info                      xen.h
 ?      vcpu_time_info                  xen.h
+!      add_to_physmap                  memory.h
+!      memory_exchange                 memory.h
+!      memory_map                      memory.h
+!      memory_reservation              memory.h
+!      translate_gpfn_list             memory.h


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.