WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] Subject: PAE support

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] Subject: PAE support
From: BitKeeper Bot <riel@xxxxxxxxxxx>
Date: Tue, 31 May 2005 15:39:28 +0000
Delivery-date: Tue, 31 May 2005 16:03:11 +0000
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: Xen Development List <xen-devel@xxxxxxxxxxxxxxxxxxx>
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
ChangeSet 1.1616, 2005/05/31 16:39:28+01:00, kaf24@xxxxxxxxxxxxxxxxxxxx

        Subject: PAE support
        
        This patch adds initial support for PAE paging to xen.
        This patch does:
        
         * boot Xen itself with PAE paging enabled.
         * add PAE support to the dom0 domain builder.
        
        Some notes on the design and the changes:
        
         * There are two new config options: CONFIG_X86_PAE (boolean,
           same name Linux uses to simply things) and
           CONFIG_PAGING_LEVELS (int, possible values are 2,3,4).  I've
           used #if CONFIG_PAGING_LEVELS for stuff which simply depends
           on the number of paging levels in the code common for
           x86-32/64, and CONFIG_X86_PAE for special PAE quirks or
           i386-only stuff.  I've tried to avoid ifdefs if possible
           though, often I rearranged code to make it work in both
           PAE and non-PAE case instead.
        
         * idle_pg_table:  3rd level is statically initialized, 2nd
           level is contignous in physical and virtual memory, so it can
           be addressed linear (the dom0 builder uses the same trick to
           simplify things a bit btw.).  There are two new symbols:
           idle_pg_table_l3 and idle_pg_table_l2 for the two tables.
           idle_pg_table is aliased to the toplevel page table, i.e.
           idle_pg_table_l3 in PAE mode and idle_pg_table_l2 in non-pae
           mode.  The idle l3 table is actually never ever touched after
           boot, the l2 table is accessed via idle_pg_table_l2 and
           addressed linear in both PAE and non-PAE mode.
        
         * I've added a "intpte_t" type and a PRIpte define, modeled
           after the C99 inttypes.h header, for page table entries.
        
        Signed-off-by: Gerd Knorr <kraxel@xxxxxxxxxxx>



 arch/x86/audit.c                     |    4 
 arch/x86/boot/x86_32.S               |   43 ++++
 arch/x86/dom0_ops.c                  |    2 
 arch/x86/domain.c                    |    8 
 arch/x86/domain_build.c              |   92 +++++++--
 arch/x86/idle0_task.c                |    3 
 arch/x86/mm.c                        |  348 ++++++++++++++++++++++++-----------
 arch/x86/setup.c                     |    2 
 arch/x86/shadow.c                    |   51 +++--
 arch/x86/traps.c                     |    2 
 arch/x86/vmx.c                       |   12 -
 arch/x86/vmx_io.c                    |    6 
 arch/x86/vmx_vmcs.c                  |    4 
 arch/x86/x86_32/domain_page.c        |    2 
 arch/x86/x86_32/mm.c                 |  140 +++++++++-----
 arch/x86/x86_32/traps.c              |   17 +
 arch/x86/x86_64/mm.c                 |    2 
 include/asm-x86/config.h             |   33 ++-
 include/asm-x86/domain.h             |    6 
 include/asm-x86/mm.h                 |   14 -
 include/asm-x86/page.h               |  151 +++++++++++++--
 include/asm-x86/shadow.h             |    6 
 include/asm-x86/smp.h                |    7 
 include/asm-x86/types.h              |    6 
 include/asm-x86/x86_32/page-2level.h |   49 ++++
 include/asm-x86/x86_32/page-3level.h |   56 +++++
 include/asm-x86/x86_32/page.h        |  127 ------------
 include/asm-x86/x86_64/page.h        |  188 ++----------------
 include/public/arch-x86_32.h         |    6 
 29 files changed, 830 insertions(+), 557 deletions(-)


diff -Nru a/xen/arch/x86/audit.c b/xen/arch/x86/audit.c
--- a/xen/arch/x86/audit.c      2005-05-31 12:04:00 -04:00
+++ b/xen/arch/x86/audit.c      2005-05-31 12:04:00 -04:00
@@ -408,9 +408,9 @@
 
         for_each_exec_domain(d, ed)
         {
-            if ( pagetable_val(ed->arch.guest_table) )
+            if ( pagetable_get_phys(ed->arch.guest_table) )
                 adjust(&frame_table[pagetable_get_pfn(ed->arch.guest_table)], 
1);
-            if ( pagetable_val(ed->arch.shadow_table) )
+            if ( pagetable_get_phys(ed->arch.shadow_table) )
                 adjust(&frame_table[pagetable_get_pfn(ed->arch.shadow_table)], 
0);
             if ( ed->arch.monitor_shadow_ref )
                 adjust(&frame_table[ed->arch.monitor_shadow_ref], 0);
diff -Nru a/xen/arch/x86/boot/x86_32.S b/xen/arch/x86/boot/x86_32.S
--- a/xen/arch/x86/boot/x86_32.S        2005-05-31 12:04:00 -04:00
+++ b/xen/arch/x86/boot/x86_32.S        2005-05-31 12:04:00 -04:00
@@ -101,6 +101,22 @@
         xor     %eax,%eax
         rep     stosb
 
+#ifdef CONFIG_X86_PAE
+        /* Initialize low and high mappings of all memory with 2MB pages */
+        mov     $idle_pg_table_l2-__PAGE_OFFSET,%edi
+        mov     $0xe3,%eax                  /* PRESENT+RW+A+D+2MB */
+1:      mov     %eax,__PAGE_OFFSET>>18(%edi) /* high mapping */
+        stosl                                /* low mapping */
+        add     $4,%edi
+        add     $(1<<L2_PAGETABLE_SHIFT),%eax
+        cmp     $DIRECTMAP_PHYS_END+0xe3,%eax
+        jne     1b
+1:      stosl   /* low mappings cover as much physmem as possible */
+        add     $4,%edi
+        add     $(1<<L2_PAGETABLE_SHIFT),%eax
+        cmp     $__HYPERVISOR_VIRT_START+0xe3,%eax
+        jne     1b
+#else
         /* Initialize low and high mappings of all memory with 4MB pages */
         mov     $idle_pg_table-__PAGE_OFFSET,%edi
         mov     $0xe3,%eax                  /* PRESENT+RW+A+D+4MB */
@@ -113,6 +129,7 @@
         add     $(1<<L2_PAGETABLE_SHIFT),%eax
         cmp     $__HYPERVISOR_VIRT_START+0xe3,%eax
         jne     1b
+#endif
         
         /* Initialise IDT with simple error defaults. */
         lea     ignore_int,%edx
@@ -204,10 +221,17 @@
         .quad 0x0000000000000000     /* unused */
         .quad 0x00cf9a000000ffff     /* 0xe008 ring 0 4.00GB code at 0x0 */
         .quad 0x00cf92000000ffff     /* 0xe010 ring 0 4.00GB data at 0x0 */
+#ifdef CONFIG_X86_PAE
+        .quad 0x00cfba00000067ff
+        .quad 0x00cfb200000067ff
+        .quad 0x00cffa00000067ff
+        .quad 0x00cff200000067ff
+#else
         .quad 0x00cfba000000c3ff     /* 0xe019 ring 1 3.95GB code at 0x0 */
         .quad 0x00cfb2000000c3ff     /* 0xe021 ring 1 3.95GB data at 0x0 */
         .quad 0x00cffa000000c3ff     /* 0xe02b ring 3 3.95GB code at 0x0 */
         .quad 0x00cff2000000c3ff     /* 0xe033 ring 3 3.95GB data at 0x0 */
+#endif
         .quad 0x0000000000000000     /* unused                           */
         .fill 2*NR_CPUS,8,0          /* space for TSS and LDT per CPU    */
 
@@ -215,10 +239,27 @@
 /* Maximum STACK_ORDER for x86/32 is 1. We must therefore ensure that the */
 /* CPU0 stack is aligned on an even page boundary!                        */
 ENTRY(cpu0_stack)
-
         .org 0x2000 + STACK_SIZE
+
+#ifdef CONFIG_X86_PAE
+
 ENTRY(idle_pg_table)
+ENTRY(idle_pg_table_l3)
+        .quad 0x100000 + 0x2000 + STACK_SIZE + 1*PAGE_SIZE + 0x01
+        .quad 0x100000 + 0x2000 + STACK_SIZE + 2*PAGE_SIZE + 0x01
+        .quad 0x100000 + 0x2000 + STACK_SIZE + 3*PAGE_SIZE + 0x01
+        .quad 0x100000 + 0x2000 + STACK_SIZE + 4*PAGE_SIZE + 0x01
+        .org 0x2000 + STACK_SIZE + 1*PAGE_SIZE
+ENTRY(idle_pg_table_l2)
+        .org 0x2000 + STACK_SIZE + 5*PAGE_SIZE
+
+#else /* CONFIG_X86_PAE */
 
+ENTRY(idle_pg_table)
+ENTRY(idle_pg_table_l2) # Initial page directory is 4kB
         .org 0x2000 + STACK_SIZE + PAGE_SIZE
+
+#endif /* CONFIG_X86_PAE */
+
 ENTRY(stext)
 ENTRY(_stext)
diff -Nru a/xen/arch/x86/dom0_ops.c b/xen/arch/x86/dom0_ops.c
--- a/xen/arch/x86/dom0_ops.c   2005-05-31 12:04:00 -04:00
+++ b/xen/arch/x86/dom0_ops.c   2005-05-31 12:04:00 -04:00
@@ -405,7 +405,7 @@
         c->flags |= VGCF_VMX_GUEST;
 #endif
 
-    c->pt_base = pagetable_val(ed->arch.guest_table);
+    c->pt_base = pagetable_get_phys(ed->arch.guest_table);
 
     c->vm_assist = ed->domain->vm_assist;
 }
diff -Nru a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     2005-05-31 12:04:01 -04:00
+++ b/xen/arch/x86/domain.c     2005-05-31 12:04:01 -04:00
@@ -460,7 +460,7 @@
         //      trust the VMX domain builder.  Xen should validate this
         //      page table, and/or build the table itself, or ???
         //
-        if ( !pagetable_val(d->arch.phys_table) )
+        if ( !pagetable_get_phys(d->arch.phys_table) )
             d->arch.phys_table = ed->arch.guest_table;
 
         if ( (error = vmx_final_setup_guest(ed, c)) )
@@ -660,7 +660,7 @@
     struct exec_domain    *ed = current;
 
     if ( unlikely(copy_from_user(&stu, (void *)regs->rsp, sizeof(stu))) ||
-         unlikely(pagetable_val(ed->arch.guest_table_user) == 0) )
+         unlikely(pagetable_get_phys(ed->arch.guest_table_user) == 0) )
         return -EFAULT;
 
     toggle_guest_mode(ed);
@@ -978,7 +978,7 @@
     /* Drop the in-use references to page-table bases. */
     for_each_exec_domain ( d, ed )
     {
-        if ( pagetable_val(ed->arch.guest_table) != 0 )
+        if ( pagetable_get_phys(ed->arch.guest_table) != 0 )
         {
             if ( shadow_mode_refcounts(d) )
                 
put_page(&frame_table[pagetable_get_pfn(ed->arch.guest_table)]);
@@ -988,7 +988,7 @@
             ed->arch.guest_table = mk_pagetable(0);
         }
 
-        if ( pagetable_val(ed->arch.guest_table_user) != 0 )
+        if ( pagetable_get_phys(ed->arch.guest_table_user) != 0 )
         {
             if ( shadow_mode_refcounts(d) )
                 
put_page(&frame_table[pagetable_get_pfn(ed->arch.guest_table_user)]);
diff -Nru a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       2005-05-31 12:04:00 -04:00
+++ b/xen/arch/x86/domain_build.c       2005-05-31 12:04:00 -04:00
@@ -44,15 +44,15 @@
 #if defined(__i386__)
 /* No ring-3 access in initial leaf page tables. */
 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
+#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#define L3_PROT (_PAGE_PRESENT)
 #elif defined(__x86_64__)
 /* Allow ring-3 access in long mode as guest cannot use ring 1. */
 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
-#endif
-/* Don't change these: Linux expects just these bits to be set. */
-/* (And that includes the bogus _PAGE_DIRTY!) */
 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
 #define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
 #define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#endif
 
 #define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
 #define round_pgdown(_p)  ((_p)&PAGE_MASK)
@@ -91,7 +91,11 @@
 #elif defined(__x86_64__)
     char *image_start  = __va(_image_start);
     char *initrd_start = __va(_initrd_start);
+#endif
+#if CONFIG_PAGING_LEVELS >= 4
     l4_pgentry_t *l4tab = NULL, *l4start = NULL;
+#endif
+#if CONFIG_PAGING_LEVELS >= 3
     l3_pgentry_t *l3tab = NULL, *l3start = NULL;
 #endif
     l2_pgentry_t *l2tab = NULL, *l2start = NULL;
@@ -143,7 +147,7 @@
         panic("Not enough RAM for DOM0 reservation.\n");
     alloc_start = page_to_phys(page);
     alloc_end   = alloc_start + (d->tot_pages << PAGE_SHIFT);
-    
+
     if ( (rc = parseelfimage(&dsi)) != 0 )
         return rc;
 
@@ -172,10 +176,15 @@
         v_end            = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1);
         if ( (v_end - vstack_end) < (512UL << 10) )
             v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */
-#if defined(__i386__)
+#if defined(__i386__) && !defined(CONFIG_X86_PAE)
         if ( (((v_end - dsi.v_start + ((1UL<<L2_PAGETABLE_SHIFT)-1)) >> 
                L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
             break;
+#elif defined(__i386__) && defined(CONFIG_X86_PAE)
+        /* 5 pages: 1x 3rd + 4x 2nd level */
+        if ( (((v_end - dsi.v_start + ((1UL<<L2_PAGETABLE_SHIFT)-1)) >> 
+               L2_PAGETABLE_SHIFT) + 5) <= nr_pt_pages )
+            break;
 #elif defined(__x86_64__)
 #define NR(_l,_h,_s) \
     (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
@@ -249,6 +258,24 @@
     }
 
     /* WARNING: The new domain must have its 'processor' field filled in! */
+#if CONFIG_PAGING_LEVELS == 3
+    l3start = l3tab = (l3_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
+    l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += 4*PAGE_SIZE;
+    memcpy(l2tab, idle_pg_table_l2, 4*PAGE_SIZE);
+    for (i = 0; i < 4; i++) {
+        l3tab[i] = l3e_create_phys((u32)l2tab + i*PAGE_SIZE, L3_PROT);
+        l2tab[(LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT)+i] =
+            l2e_create_phys((u32)l2tab + i*PAGE_SIZE, __PAGE_HYPERVISOR);
+    }
+    unsigned long v;
+    for (v = PERDOMAIN_VIRT_START; v < PERDOMAIN_VIRT_END;
+         v += (1 << L2_PAGETABLE_SHIFT)) {
+        l2tab[v >> L2_PAGETABLE_SHIFT] =
+            l2e_create_phys(__pa(d->arch.mm_perdomain_pt) + 
(v-PERDOMAIN_VIRT_START),
+                            __PAGE_HYPERVISOR);
+    }
+    ed->arch.guest_table = mk_pagetable((unsigned long)l3start);
+#else
     l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
     memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE);
     l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
@@ -256,8 +283,9 @@
     l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
         l2e_create_phys(__pa(d->arch.mm_perdomain_pt), __PAGE_HYPERVISOR);
     ed->arch.guest_table = mk_pagetable((unsigned long)l2start);
+#endif
 
-    l2tab += l2_table_offset(dsi.v_start);
+    l2tab += l2_linear_offset(dsi.v_start);
     mfn = alloc_start >> PAGE_SHIFT;
     for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
     {
@@ -282,8 +310,8 @@
     }
 
     /* Pages that are part of page tables must be read only. */
-    l2tab = l2start + l2_table_offset(vpt_start);
-    l1start = l1tab = (l1_pgentry_t *)l2e_get_phys(*l2tab);
+    l2tab = l2start + l2_linear_offset(vpt_start);

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] Subject: PAE support, BitKeeper Bot <=