WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] Fix x86/64 pagetable initialisation to not waste several

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] Fix x86/64 pagetable initialisation to not waste several
From: Xen patchbot -unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Tue, 30 Aug 2005 16:20:12 +0000
Delivery-date: Tue, 30 Aug 2005 16:18:42 +0000
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID f0bf239844a66f1cd0b8bbf112e1acd73996cae1
# Parent  b043928b08738e714900eef21cd8932934d09c45
Fix x86/64 pagetable initialisation to not waste several
megabytes of memory.

Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>

diff -r b043928b0873 -r f0bf239844a6 
linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c    Tue Aug 30 16:15:27 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c    Tue Aug 30 16:19:07 2005
@@ -62,14 +62,16 @@
  * avaialble in init_memory_mapping().
  */
 
-#define addr_to_page(addr, page)                                             \
-        (addr) &= PHYSICAL_PAGE_MASK;                                   \
-        (page) = ((unsigned long *) ((unsigned long)(((mfn_to_pfn((addr) >> 
PAGE_SHIFT)) << PAGE_SHIFT) + __START_KERNEL_map)))
+#define addr_to_page(addr, page)                               \
+       (addr) &= PHYSICAL_PAGE_MASK;                           \
+       (page) = ((unsigned long *) ((unsigned long)            \
+       (((mfn_to_pfn((addr) >> PAGE_SHIFT)) << PAGE_SHIFT) +   \
+       __START_KERNEL_map)))
 
 static void __make_page_readonly(unsigned long va)
 {
-        unsigned long addr;
-        pte_t pte, *ptep;
+       unsigned long addr;
+       pte_t pte, *ptep;
        unsigned long *page = (unsigned long *) init_level4_pgt;
 
        addr = (unsigned long) page[pgd_index(va)];
@@ -89,22 +91,22 @@
 
 static void __make_page_writable(unsigned long va)
 {
-        unsigned long addr;
-        pte_t pte, *ptep;
-        unsigned long *page = (unsigned long *) init_level4_pgt;
-
-        addr = (unsigned long) page[pgd_index(va)];
-        addr_to_page(addr, page);
-
-        addr = page[pud_index(va)];
-        addr_to_page(addr, page);
-        
-        addr = page[pmd_index(va)];
-        addr_to_page(addr, page);
-
-        ptep = (pte_t *) &page[pte_index(va)];
+       unsigned long addr;
+       pte_t pte, *ptep;
+       unsigned long *page = (unsigned long *) init_level4_pgt;
+
+       addr = (unsigned long) page[pgd_index(va)];
+       addr_to_page(addr, page);
+
+       addr = page[pud_index(va)];
+       addr_to_page(addr, page);
+ 
+       addr = page[pmd_index(va)];
+       addr_to_page(addr, page);
+
+       ptep = (pte_t *) &page[pte_index(va)];
        pte.pte = (ptep->pte | _PAGE_RW);
-        xen_l1_entry_update(ptep, pte);
+       xen_l1_entry_update(ptep, pte);
        __flush_tlb_one(addr);
 }
 
@@ -115,55 +117,55 @@
 void make_page_readonly(void *va)
 {
        pgd_t* pgd; pud_t *pud; pmd_t* pmd; pte_t pte, *ptep;
-        unsigned long addr = (unsigned long) va;
-
-        if (!init_mapping_done) {
-                __make_page_readonly(addr);
-                return;
-        }
-                
-        pgd = pgd_offset_k(addr);
-        pud = pud_offset(pgd, addr);
-        pmd = pmd_offset(pud, addr);
-        ptep = pte_offset_kernel(pmd, addr);
+       unsigned long addr = (unsigned long) va;
+
+       if (!init_mapping_done) {
+               __make_page_readonly(addr);
+               return;
+       }
+  
+       pgd = pgd_offset_k(addr);
+       pud = pud_offset(pgd, addr);
+       pmd = pmd_offset(pud, addr);
+       ptep = pte_offset_kernel(pmd, addr);
        pte.pte = (ptep->pte & ~_PAGE_RW);
-        xen_l1_entry_update(ptep, pte);
+       xen_l1_entry_update(ptep, pte);
        __flush_tlb_one(addr);
 }
 
 void make_page_writable(void *va)
 {
-        pgd_t* pgd; pud_t *pud; pmd_t* pmd; pte_t pte, *ptep;
-        unsigned long addr = (unsigned long) va;
-
-        if (!init_mapping_done) {
-                __make_page_writable(addr);
-                return;
-        }
-
-        pgd = pgd_offset_k(addr);
-        pud = pud_offset(pgd, addr);
-        pmd = pmd_offset(pud, addr);
-        ptep = pte_offset_kernel(pmd, addr);
+       pgd_t* pgd; pud_t *pud; pmd_t* pmd; pte_t pte, *ptep;
+       unsigned long addr = (unsigned long) va;
+
+       if (!init_mapping_done) {
+               __make_page_writable(addr);
+               return;
+       }
+
+       pgd = pgd_offset_k(addr);
+       pud = pud_offset(pgd, addr);
+       pmd = pmd_offset(pud, addr);
+       ptep = pte_offset_kernel(pmd, addr);
        pte.pte = (ptep->pte | _PAGE_RW);
-        xen_l1_entry_update(ptep, pte);
+       xen_l1_entry_update(ptep, pte);
        __flush_tlb_one(addr);
 }
 
 void make_pages_readonly(void* va, unsigned nr)
 {
-        while ( nr-- != 0 ) {
-                make_page_readonly(va);
-                va = (void*)((unsigned long)va + PAGE_SIZE);
-        }
+       while (nr-- != 0) {
+               make_page_readonly(va);
+               va = (void*)((unsigned long)va + PAGE_SIZE);
+       }
 }
 
 void make_pages_writable(void* va, unsigned nr)
 {
-        while ( nr-- != 0 ) {
-                make_page_writable(va);
-                va = (void*)((unsigned long)va + PAGE_SIZE);
-        }
+       while (nr-- != 0) {
+               make_page_writable(va);
+               va = (void*)((unsigned long)va + PAGE_SIZE);
+       }
 }
 
 /*
@@ -389,7 +391,7 @@
         set_pte_phys(address, phys, prot, SET_FIXMAP_USER); 
 }
 
-unsigned long __initdata table_start, table_end, tables_space; 
+unsigned long __initdata table_start, tables_space; 
 
 unsigned long get_machine_pfn(unsigned long addr)
 {
@@ -400,40 +402,15 @@
         return pte_mfn(*pte);
 } 
 
-#define ALIGN_TO_4K __attribute__((section(".data.page_aligned")))
-#define MAX_LOW_PAGES  0x20
-static unsigned long __init_pgt[MAX_LOW_PAGES][512]  ALIGN_TO_4K;
-static int __init_pgt_index;
-
-/*
- * We start using from start_pfn
- */
 static __init void *alloc_static_page(unsigned long *phys)
 {
-       int i = __init_pgt_index++;
-
-       if (__init_pgt_index >= MAX_LOW_PAGES) {
-               printk("Need to increase MAX_LOW_PAGES");
-               BUG();
-       }
-               
-       *phys = __pa(__init_pgt[i]);
-
-       return (void *) __init_pgt[i];
+       unsigned long va = (start_pfn << PAGE_SHIFT) + __START_KERNEL_map;
+       *phys = start_pfn << PAGE_SHIFT;
+       start_pfn++;
+       memset((void *)va, 0, PAGE_SIZE);
+       return (void *)va;
 } 
 
-/*
- * Get RO page
- */
-static void __init *alloc_low_page(unsigned long *phys)
-{ 
-        unsigned long pfn = table_end++;
-    
-        *phys = (pfn << PAGE_SHIFT);
-        memset((void *) ((pfn << PAGE_SHIFT) + __START_KERNEL_map), 0, 
PAGE_SIZE);
-        return (void *)((pfn << PAGE_SHIFT) + __START_KERNEL_map);
-} 
-
 #define PTE_SIZE PAGE_SIZE
 
 static inline void __set_pte(pte_t *dst, pte_t val)
@@ -443,27 +420,21 @@
 
 static inline int make_readonly(unsigned long paddr)
 {
-    int readonly = 0;
-
-    /* Make new page tables read-only. */
-    if ((paddr < ((table_start << PAGE_SHIFT) + tables_space)) &&
-        (paddr >= (table_start << PAGE_SHIFT)))
-        readonly = 1;
-
-    /* Make old page tables read-only. */
-    if ((paddr < ((xen_start_info.pt_base - __START_KERNEL_map) +
-                  (xen_start_info.nr_pt_frames << PAGE_SHIFT))) &&
-        (paddr >= (xen_start_info.pt_base - __START_KERNEL_map)))
-        readonly = 1;
-
-    /*
-     * No need for writable mapping of kernel image. This also ensures that
-     * page and descriptor tables embedded inside don't have writable mappings.
-     */
-    if ((paddr >= __pa_symbol(&_text)) && (paddr < __pa_symbol(&_end)))
-        readonly = 1;
-
-    return readonly;
+       int readonly = 0;
+
+       /* Make old and new page tables read-only. */
+       if ((paddr >= (xen_start_info.pt_base - __START_KERNEL_map))
+           && (paddr < ((table_start << PAGE_SHIFT) + tables_space)))
+               readonly = 1;
+       /*
+        * No need for writable mapping of kernel image. This also ensures that
+        * page and descriptor tables embedded inside don't have writable
+        * mappings. 
+        */
+       if ((paddr >= __pa_symbol(&_text)) && (paddr < __pa_symbol(&_end)))
+               readonly = 1;
+
+       return readonly;
 }
 
 static void __init phys_pud_init(pud_t *pud, unsigned long address, unsigned 
long end)
@@ -485,7 +456,7 @@
                        break;
                } 
 
-               pmd = alloc_low_page(&pmd_phys);
+               pmd = alloc_static_page(&pmd_phys);
                 make_page_readonly(pmd);
                 xen_pmd_pin(pmd_phys);
                set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
@@ -499,7 +470,7 @@
                                        set_pmd(pmd,  __pmd(0)); 
                                break;
                        }
-                        pte = alloc_low_page(&pte_phys);
+                        pte = alloc_static_page(&pte_phys);
                         pte_save = pte;
                         for (k = 0; k < PTRS_PER_PTE; pte++, k++, paddr += 
PTE_SIZE) {
                                 if ((paddr >= end) ||
@@ -526,15 +497,16 @@
 
 static void __init find_early_table_space(unsigned long end)
 {
-        unsigned long puds, pmds, ptes; 
+       unsigned long puds, pmds, ptes; 
 
        puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
        pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
-        ptes = (end + PTE_SIZE - 1) >> PAGE_SHIFT;
-
-        tables_space = round_up(puds * 8, PAGE_SIZE) + 
-                         round_up(pmds * 8, PAGE_SIZE) + 
-                         round_up(ptes * 8, PAGE_SIZE); 
+       ptes = (end + PTE_SIZE - 1) >> PAGE_SHIFT;
+
+       tables_space =
+               round_up(puds * 8, PAGE_SIZE) + 
+               round_up(pmds * 8, PAGE_SIZE) + 
+               round_up(ptes * 8, PAGE_SIZE); 
 }
 
 void __init xen_init_pt(void)
@@ -580,65 +552,58 @@
                mk_kernel_pgd(__pa_symbol(level3_user_pgt)));
 }
 
-/*
- * Extend kernel mapping to access pages for page tables.  The initial
- * mapping done by Xen is minimal (e.g. 8MB) and we need to extend the
- * mapping for early initialization.
- */
-static unsigned long current_size, extended_size;
-
 void __init extend_init_mapping(void) 
 {
        unsigned long va = __START_KERNEL_map;
        unsigned long phys, addr, *pte_page;
-        pmd_t *pmd;
+       pmd_t *pmd;
        pte_t *pte, new_pte;
-       unsigned long *page = (unsigned long *) init_level4_pgt;
-       int i;
+       unsigned long *page = (unsigned long *)init_level4_pgt;
 
        addr = page[pgd_index(va)];
        addr_to_page(addr, page);
        addr = page[pud_index(va)];
        addr_to_page(addr, page);
 
-       for (;;) {
+       /* Kill mapping of low 1MB. */
+       while (va < (unsigned long)&_text) {
+               HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0);
+               va += PAGE_SIZE;
+       }
+
+       /* Ensure init mappings cover kernel text/data and initial tables. */
+       while (va < (__START_KERNEL_map
+                    + (start_pfn << PAGE_SHIFT)
+                    + tables_space)) {
                pmd = (pmd_t *)&page[pmd_index(va)];
-               if (!pmd_present(*pmd))
-                       break;
-               addr = page[pmd_index(va)];
-               addr_to_page(addr, pte_page);
-               for (i = 0; i < PTRS_PER_PTE; i++) {
-                       pte = (pte_t *) &pte_page[pte_index(va)];
-                       if (!pte_present(*pte))
-                               break;
-                       va += PAGE_SIZE;
-                       current_size += PAGE_SIZE;
+               if (pmd_none(*pmd)) {
+                       pte_page = alloc_static_page(&phys);
+                       make_page_readonly(pte_page);
+                       xen_pte_pin(phys);
+                       set_pmd(pmd, __pmd(phys | _KERNPG_TABLE | _PAGE_USER));
+               } else {
+                       addr = page[pmd_index(va)];
+                       addr_to_page(addr, pte_page);
                }
-       }
-
-       while (va < __START_KERNEL_map + current_size + tables_space) {
-               pmd = (pmd_t *) &page[pmd_index(va)];
-               if (!pmd_none(*pmd))
-                       continue;
-               pte_page = (unsigned long *) alloc_static_page(&phys);
-               make_page_readonly(pte_page);
-               xen_pte_pin(phys);
-               set_pmd(pmd, __pmd(phys | _KERNPG_TABLE | _PAGE_USER));
-               for (i = 0; i < PTRS_PER_PTE; i++, va += PAGE_SIZE) {
+               pte = (pte_t *)&pte_page[pte_index(va)];
+               if (pte_none(*pte)) {
                        new_pte = pfn_pte(
                                (va - __START_KERNEL_map) >> PAGE_SHIFT, 
                                __pgprot(_KERNPG_TABLE | _PAGE_USER));
-                       pte = (pte_t *)&pte_page[pte_index(va)];
                        xen_l1_entry_update(pte, new_pte);
-                       extended_size += PAGE_SIZE;
                }
-       }
-
-       /* Kill mapping of low 1MB. */
-       for (va = __START_KERNEL_map; va < (unsigned long)&_text; va += 
PAGE_SIZE)
+               va += PAGE_SIZE;
+       }
+
+       /* Finally, blow away any spurious initial mappings. */
+       while (1) {
+               pmd = (pmd_t *)&page[pmd_index(va)];
+               if (pmd_none(*pmd))
+                       break;
                HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0);
-}
-
+               va += PAGE_SIZE;
+       }
+}
 
 /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
    This runs before bootmem is initialized and gets pages directly from the 
@@ -651,34 +616,31 @@
 
        find_early_table_space(end);
        extend_init_mapping();
-       start_pfn = current_size >> PAGE_SHIFT;
 
        table_start = start_pfn;
-       table_end = table_start;
 
        start = (unsigned long)__va(start);
        end = (unsigned long)__va(end);
 
        for (; start < end; start = next) {
                unsigned long pud_phys; 
-                pud_t *pud = alloc_low_page(&pud_phys);
-                make_page_readonly(pud);
-                xen_pud_pin(pud_phys);
+               pud_t *pud = alloc_static_page(&pud_phys);
+               make_page_readonly(pud);
+               xen_pud_pin(pud_phys);
                next = start + PGDIR_SIZE;
                if (next > end) 
                        next = end; 
                phys_pud_init(pud, __pa(start), __pa(next));
                set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
-       } 
-
-       printk("kernel direct mapping tables upto %lx @ %lx-%lx\n", end, 
-              table_start<<PAGE_SHIFT, 
-              table_end<<PAGE_SHIFT);
-
-        start_pfn = ((current_size + extended_size) >> PAGE_SHIFT);
+       }
+
+       printk("kernel direct mapping tables upto %lx @ %lx-%lx\n",
+              __pa(end), table_start<<PAGE_SHIFT, start_pfn<<PAGE_SHIFT);
+
+       BUG_ON(start_pfn != (table_start + (tables_space >> PAGE_SHIFT)));
 
        __flush_tlb_all();
-        init_mapping_done = 1;
+       init_mapping_done = 1;
 }
 
 extern struct x8664_pda cpu_pda[NR_CPUS];
@@ -1003,3 +965,13 @@
 {
        return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
 }
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] Fix x86/64 pagetable initialisation to not waste several, Xen patchbot -unstable <=