[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 1/2] x86: do away with the boot time low-memory 1:1 mapping



By doing so, we're no longer restricted to be able to place all boot
loader modules into the low 1Gb/4Gb (32-/64-bit) of memory, nor is
there a dependency anymore on where the boot loader places the modules.

We're also no longer restricted to copy the modules into a place below
4Gb, nor to put them all together into a single piece of memory.

Further it allows even the 32-bit Dom0 kernel to be loaded anywhere in
physical memory (except if it doesn't support PAE-above-4G).

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>

--- 2010-11-09.orig/xen/arch/x86/boot/head.S    2010-08-06 08:44:33.000000000 
+0200
+++ 2010-11-09/xen/arch/x86/boot/head.S 2010-11-09 10:30:06.000000000 +0100
@@ -110,12 +110,15 @@ __start:
         /* Initialise L2 identity-map and xen page table entries (16MB). */
         mov     $sym_phys(l2_identmap),%edi
         mov     $sym_phys(l2_xenmap),%esi
+        mov     $sym_phys(l2_bootmap),%edx
         mov     $0x1e3,%eax                  /* PRESENT+RW+A+D+2MB+GLOBAL */
         mov     $8,%ecx
 1:      mov     %eax,(%edi)
         add     $8,%edi
         mov     %eax,(%esi)
         add     $8,%esi
+        mov     %eax,(%edx)
+        add     $8,%edx
         add     $(1<<L2_PAGETABLE_SHIFT),%eax
         loop    1b
         /* Initialise L3 identity-map page directory entries. */
@@ -129,9 +132,13 @@ __start:
         /* Initialise L3 xen-map page directory entry. */
         mov     $(sym_phys(l2_xenmap)+7),%eax
         mov     %eax,sym_phys(l3_xenmap) + l3_table_offset(XEN_VIRT_START)*8
-        /* Hook identity-map and xen-map L3 tables into PML4. */
+        /* Initialise L3 boot-map page directory entry. */
+        mov     $(sym_phys(l2_bootmap)+7),%eax
+        mov     %eax,sym_phys(l3_bootmap) + 0*8
+        /* Hook identity-map, xen-map, and boot-map L3 tables into PML4. */
+        mov     $(sym_phys(l3_bootmap)+7),%eax
+        mov     %eax,sym_phys(idle_pg_table) + 0*8
         mov     $(sym_phys(l3_identmap)+7),%eax
-        mov     %eax,sym_phys(idle_pg_table) + (  0*8) /* PML4[  0]: 1:1 map */
         mov     %eax,sym_phys(idle_pg_table) + 
l4_table_offset(DIRECTMAP_VIRT_START)*8
         mov     $(sym_phys(l3_xenmap)+7),%eax
         mov     %eax,sym_phys(idle_pg_table) + 
l4_table_offset(XEN_VIRT_START)*8
@@ -176,6 +183,7 @@ __start:
 #if defined(__x86_64__)
         mov     %edi,sym_phys(l2_identmap)
         mov     %edi,sym_phys(l2_xenmap)
+        mov     %edi,sym_phys(l2_bootmap)
 #else
         mov     %edi,sym_phys(idle_pg_table_l2)
         mov     %edi,sym_phys(idle_pg_table_l2) + (__PAGE_OFFSET>>18)
--- 2010-11-09.orig/xen/arch/x86/boot/Makefile  2010-04-22 14:43:25.000000000 
+0200
+++ 2010-11-09/xen/arch/x86/boot/Makefile       2010-11-09 10:30:06.000000000 
+0100
@@ -4,6 +4,6 @@ head.o: reloc.S
 
 BOOT_TRAMPOLINE := $(shell sed -n 
's,^\#define[[:space:]]\{1\,\}BOOT_TRAMPOLINE[[:space:]]\{1\,\},,p' 
$(BASEDIR)/include/asm-x86/config.h)
 %.S: %.c
-       RELOC=$(BOOT_TRAMPOLINE) XEN_BITSPERLONG=$(patsubst 
x86_%,%,$(TARGET_SUBARCH)) $(MAKE) -f build32.mk $@
+       RELOC=$(BOOT_TRAMPOLINE) $(MAKE) -f build32.mk $@
 
 reloc.S: $(BASEDIR)/include/asm-x86/config.h
--- 2010-11-09.orig/xen/arch/x86/boot/build32.mk        2010-08-06 
08:44:33.000000000 +0200
+++ 2010-11-09/xen/arch/x86/boot/build32.mk     2010-11-09 10:30:06.000000000 
+0100
@@ -19,6 +19,6 @@ CFLAGS += -Werror -fno-builtin -msoft-fl
        $(LD) $(LDFLAGS_DIRECT) -N -Ttext $(RELOC) -o $@ $<
 
 %.o: %.c
-       $(CC) $(CFLAGS) -DXEN_BITSPERLONG=$(XEN_BITSPERLONG) -c $< -o $@
+       $(CC) $(CFLAGS) -c $< -o $@
 
 reloc.o: $(BASEDIR)/include/asm-x86/config.h
--- 2010-11-09.orig/xen/arch/x86/boot/reloc.c   2010-08-06 08:44:33.000000000 
+0200
+++ 2010-11-09/xen/arch/x86/boot/reloc.c        2010-11-09 10:30:06.000000000 
+0100
@@ -68,7 +68,6 @@ multiboot_info_t *reloc(multiboot_info_t
     {
         module_t *mods = reloc_mbi_struct(
             (module_t *)mbi->mods_addr, mbi->mods_count * sizeof(module_t));
-        u32 max_addr = 0;
 
         mbi->mods_addr = (u32)mods;
 
@@ -76,29 +75,6 @@ multiboot_info_t *reloc(multiboot_info_t
         {
             if ( mods[i].string )
                 mods[i].string = (u32)reloc_mbi_string((char *)mods[i].string);
-            if ( mods[i].mod_end > max_addr )
-                max_addr = mods[i].mod_end;
-        }
-
-        /*
-         * 32-bit Xen only maps bottom 1GB of memory at boot time. Relocate 
-         * modules which extend beyond this (GRUB2 in particular likes to 
-         * place modules as high as possible below 4GB).
-         */
-#define BOOTMAP_END (1ul<<30) /* 1GB */
-        if ( (XEN_BITSPERLONG == 32) && (max_addr > BOOTMAP_END) )
-        {
-            char *mod_alloc = (char *)BOOTMAP_END;
-            for ( i = 0; i < mbi->mods_count; i++ )
-                mod_alloc -= mods[i].mod_end - mods[i].mod_start;
-            for ( i = 0; i < mbi->mods_count; i++ )
-            {
-                u32 mod_len = mods[i].mod_end - mods[i].mod_start;
-                mods[i].mod_start = (u32)memcpy(
-                    mod_alloc, (char *)mods[i].mod_start, mod_len);
-                mods[i].mod_end = mods[i].mod_start + mod_len;
-                mod_alloc += mod_len;
-            }
         }
     }
 
--- 2010-11-09.orig/xen/arch/x86/domain_build.c 2010-08-06 08:44:33.000000000 
+0200
+++ 2010-11-09/xen/arch/x86/domain_build.c      2010-11-09 10:30:06.000000000 
+0100
@@ -31,6 +31,7 @@
 #include <asm/p2m.h>
 #include <asm/e820.h>
 #include <asm/acpi.h>
+#include <asm/setup.h>
 #include <asm/bzimage.h> /* for bzimage_parse */
 #include <asm/io_apic.h>
 
@@ -284,9 +285,9 @@ static void __init process_dom0_ioports_
 
 int __init construct_dom0(
     struct domain *d,
-    unsigned long _image_base,
-    unsigned long _image_start, unsigned long image_len,
-    unsigned long _initrd_start, unsigned long initrd_len,
+    const module_t *image, unsigned long image_headroom,
+    const module_t *initrd,
+    void *(*bootstrap_map)(const module_t *),
     char *cmdline)
 {
     int i, cpu, rc, compatible, compat32, order, machine;
@@ -301,16 +302,14 @@ int __init construct_dom0(
     start_info_t *si;
     struct vcpu *v = d->vcpu[0];
     unsigned long long value;
-#if defined(__i386__)
-    char *image_base   = (char *)_image_base;   /* use lowmem mappings */
-    char *image_start  = (char *)_image_start;  /* use lowmem mappings */
-    char *initrd_start = (char *)_initrd_start; /* use lowmem mappings */
-#elif defined(__x86_64__)
-    char *image_base   = __va(_image_base);
-    char *image_start  = __va(_image_start);
-    char *initrd_start = __va(_initrd_start);
-#endif
-#if CONFIG_PAGING_LEVELS >= 4
+    char *image_base = bootstrap_map(image);
+    unsigned long image_len = image->mod_end;
+    char *image_start = image_base + image_headroom;
+    unsigned long initrd_len = initrd ? initrd->mod_end : 0;
+#if CONFIG_PAGING_LEVELS < 4
+    module_t mpt;
+    void *mpt_ptr;
+#else
     l4_pgentry_t *l4tab = NULL, *l4start = NULL;
 #endif
     l3_pgentry_t *l3tab = NULL, *l3start = NULL;
@@ -340,7 +339,7 @@ int __init construct_dom0(
     unsigned long v_end;
 
     /* Machine address of next candidate page-table page. */
-    unsigned long mpt_alloc;
+    paddr_t mpt_alloc;
 
     /* Sanity! */
     BUG_ON(d->domain_id != 0);
@@ -495,17 +494,17 @@ int __init construct_dom0(
     if ( (1UL << order) > nr_pages )
         panic("Domain 0 allocation is too small for kernel image.\n");
 
-#ifdef __i386__
-    /* Ensure that our low-memory 1:1 mapping covers the allocation. */
-    page = alloc_domheap_pages(d, order, MEMF_bits(30));
-#else
     if ( parms.p2m_base != UNSET_ADDR )
     {
         vphysmap_start = parms.p2m_base;
         vphysmap_end   = vphysmap_start + nr_pages * sizeof(unsigned long);
     }
-    page = alloc_domheap_pages(d, order, 0);
+#ifdef __i386__
+    if ( !test_bit(XENFEAT_pae_pgdir_above_4gb, parms.f_supported) )
+        page = alloc_domheap_pages(d, order, MEMF_bits(32));
+    else
 #endif
+        page = alloc_domheap_pages(d, order, 0);
     if ( page == NULL )
         panic("Not enough RAM for domain 0 allocation.\n");
     alloc_spfn = page_to_mfn(page);
@@ -534,8 +533,7 @@ int __init construct_dom0(
            _p(v_start), _p(v_end));
     printk(" ENTRY ADDRESS: %p\n", _p(parms.virt_entry));
 
-    mpt_alloc = (vpt_start - v_start) +
-        (unsigned long)pfn_to_paddr(alloc_spfn);
+    mpt_alloc = (vpt_start - v_start) + pfn_to_paddr(alloc_spfn);
 
 #if defined(__i386__)
     /*
@@ -548,17 +546,25 @@ int __init construct_dom0(
         return -EINVAL;
     }
 
+    mpt.mod_start = mpt_alloc >> PAGE_SHIFT;
+    mpt.mod_end   = vpt_end - vpt_start;
+    mpt_ptr = bootstrap_map(&mpt);
+#define MPT_ALLOC(n) (mpt_ptr += (n)*PAGE_SIZE, mpt_alloc += (n)*PAGE_SIZE)
+
     /* WARNING: The new domain must have its 'processor' field filled in! */
-    l3start = l3tab = (l3_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
-    l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += 4*PAGE_SIZE;
+    l3start = l3tab = mpt_ptr; MPT_ALLOC(1);
+    l2start = l2tab = mpt_ptr; MPT_ALLOC(4);
     for (i = 0; i < L3_PAGETABLE_ENTRIES; i++) {
-        copy_page(l2tab + i * L2_PAGETABLE_ENTRIES,
-                  idle_pg_table_l2 + i * L2_PAGETABLE_ENTRIES);
-        l3tab[i] = l3e_from_paddr((u32)l2tab + i*PAGE_SIZE, L3_PROT);
+        if ( i < 3 )
+            clear_page(l2tab + i * L2_PAGETABLE_ENTRIES);
+        else
+            copy_page(l2tab + i * L2_PAGETABLE_ENTRIES,
+                      idle_pg_table_l2 + i * L2_PAGETABLE_ENTRIES);
+        l3tab[i] = l3e_from_pfn(mpt.mod_start + 1 + i, L3_PROT);
         l2tab[(LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT)+i] =
-            l2e_from_paddr((u32)l2tab + i*PAGE_SIZE, __PAGE_HYPERVISOR);
+            l2e_from_pfn(mpt.mod_start + 1 + i, __PAGE_HYPERVISOR);
     }
-    v->arch.guest_table = pagetable_from_paddr((unsigned long)l3start);
+    v->arch.guest_table = pagetable_from_pfn(mpt.mod_start);
 
     for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
         l2tab[l2_linear_offset(PERDOMAIN_VIRT_START) + i] =
@@ -570,9 +576,9 @@ int __init construct_dom0(
     {
         if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
         {
-            l1start = l1tab = (l1_pgentry_t *)mpt_alloc;
-            mpt_alloc += PAGE_SIZE;
-            *l2tab = l2e_from_paddr((unsigned long)l1start, L2_PROT);
+            l1tab = mpt_ptr;
+            *l2tab = l2e_from_paddr(mpt_alloc, L2_PROT);
+            MPT_ALLOC(1);
             l2tab++;
             clear_page(l1tab);
             if ( count == 0 )
@@ -587,11 +593,14 @@ int __init construct_dom0(
 
         mfn++;
     }
+#undef MPT_ALLOC
 
     /* Pages that are part of page tables must be read only. */
+    mpt_alloc = (paddr_t)mpt.mod_start << PAGE_SHIFT;
+    mpt_ptr = l3start;
     l2tab = l2start + l2_linear_offset(vpt_start);
-    l1start = l1tab = (l1_pgentry_t *)(u32)l2e_get_paddr(*l2tab);
-    l1tab += l1_table_offset(vpt_start);
+    l1start = mpt_ptr + (l2e_get_paddr(*l2tab) - mpt_alloc);
+    l1tab = l1start + l1_table_offset(vpt_start);
     for ( count = 0; count < nr_pt_pages; count++ ) 
     {
         page = mfn_to_page(l1e_get_pfn(*l1tab));
@@ -627,9 +636,15 @@ int __init construct_dom0(
             break;
         }
         if ( !((unsigned long)++l1tab & (PAGE_SIZE - 1)) )
-            l1start = l1tab = (l1_pgentry_t *)(u32)l2e_get_paddr(*++l2tab);
+            l1tab = mpt_ptr + (l2e_get_paddr(*++l2tab) - mpt_alloc);
     }
 
+    /*
+     * Put Xen's first L3 entry into Dom0's page tables so that updates
+     * through bootstrap_map() will affect the page tables we will run on.
+     */
+    l3start[0] = l3e_from_paddr(__pa(idle_pg_table_l2), L3_PROT);
+
 #elif defined(__x86_64__)
 
     /* Overlap with Xen protected area? */
@@ -807,6 +822,7 @@ int __init construct_dom0(
     /* Copy the OS image and free temporary buffer. */
     elf.dest = (void*)vkern_start;
     elf_load_binary(&elf);
+    bootstrap_map(NULL);
 
     if ( UNSET_ADDR != parms.virt_hypercall )
     {
@@ -823,7 +839,12 @@ int __init construct_dom0(
 
     /* Copy the initial ramdisk. */
     if ( initrd_len != 0 )
+    {
+        char *initrd_start = bootstrap_map(initrd);
+
         memcpy((void *)vinitrd_start, initrd_start, initrd_len);
+        bootstrap_map(NULL);
+    }
 
     /* Free temporary buffers. */
     discard_initial_images();
@@ -1033,7 +1054,22 @@ int __init construct_dom0(
     write_ptbase(current);
 
 #if defined(__i386__)
-    /* Destroy low mappings - they were only for our convenience. */
+    /* Restore Dom0's first L3 entry. */
+    mpt.mod_end = 5 * PAGE_SIZE;
+    l3start = mpt_ptr = bootstrap_map(&mpt);
+    l2start = mpt_ptr + PAGE_SIZE;
+    l3start[0] = l3e_from_pfn(mpt.mod_start + 1, L3_PROT);
+
+    /* Re-setup CR3  */
+    if ( paging_mode_enabled(d) )
+        paging_update_paging_modes(v);
+    else
+        update_cr3(v);
+
+    /*
+     * Destroy low mappings - they were only for our convenience. Note
+     * that zap_low_mappings() exceeds what bootstrap_map(NULL) would do.
+     */
     zap_low_mappings(l2start);
 #endif
 
--- 2010-11-09.orig/xen/arch/x86/setup.c        2010-08-12 08:17:22.000000000 
+0200
+++ 2010-11-09/xen/arch/x86/setup.c     2010-11-09 10:30:06.000000000 +0100
@@ -45,14 +45,6 @@
 #include <asm/setup.h>
 #include <xen/cpu.h>
 
-#if defined(CONFIG_X86_64)
-#define BOOTSTRAP_DIRECTMAP_END (1UL << 32) /* 4GB */
-#define maddr_to_bootstrap_virt(m) maddr_to_virt(m)
-#else
-#define BOOTSTRAP_DIRECTMAP_END (1UL << 30) /* 1GB */
-#define maddr_to_bootstrap_virt(m) ((void *)(long)(m))
-#endif
-
 extern u16 boot_edid_caps;
 extern u8 boot_edid_info[128];
 extern struct boot_video_info boot_vid_info;
@@ -152,21 +144,34 @@ static void __init parse_acpi_param(char
     for ( ; ; ) halt();                         \
 } while (0)
 
-static unsigned long __initdata initial_images_base;
-static unsigned long __initdata initial_images_start;
-static unsigned long __initdata initial_images_end;
+static const module_t *__initdata initial_images;
+static unsigned int __initdata nr_initial_images;
 
 unsigned long __init initial_images_nrpages(void)
 {
-    ASSERT(!(initial_images_base & ~PAGE_MASK));
-    ASSERT(!(initial_images_end   & ~PAGE_MASK));
-    return ((initial_images_end >> PAGE_SHIFT) -
-            (initial_images_base >> PAGE_SHIFT));
+    unsigned long nr;
+    unsigned int i;
+
+    for ( nr = i = 0; i < nr_initial_images; ++i )
+        nr += PFN_UP(initial_images[i].mod_end);
+
+    return nr;
 }
 
 void __init discard_initial_images(void)
 {
-    init_domheap_pages(initial_images_base, initial_images_end);
+    unsigned int i;
+
+    for ( i = 0; i < nr_initial_images; ++i )
+    {
+        uint64_t start = (uint64_t)initial_images[i].mod_start << PAGE_SHIFT;
+
+        init_domheap_pages(start,
+                           start + PAGE_ALIGN(initial_images[i].mod_end));
+    }
+
+    nr_initial_images = 0;
+    initial_images = NULL;
 }
 
 static void free_xen_data(char *s, char *e)
@@ -257,33 +262,128 @@ static void __init normalise_cpu_order(v
     }
 }
 
+#define BOOTSTRAP_MAP_BASE  (16UL << 20)
+#define BOOTSTRAP_MAP_LIMIT (1UL << L3_PAGETABLE_SHIFT)
+
 /*
  * Ensure a given physical memory range is present in the bootstrap mappings.
  * Use superpage mappings to ensure that pagetable memory needn't be allocated.
  */
-static void __init bootstrap_map(unsigned long start, unsigned long end)
+static void *__init bootstrap_map(const module_t *mod)
 {
-    unsigned long mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
-    start = max_t(unsigned long, start & ~mask, 16UL << 20);
-    end   = (end + mask) & ~mask;
+    static unsigned long __initdata map_cur = BOOTSTRAP_MAP_BASE;
+    uint64_t start, end, mask = (1L << L2_PAGETABLE_SHIFT) - 1;
+    void *ret;
+
+#ifdef __x86_64__
+    if ( !early_boot )
+        return mod ? mfn_to_virt(mod->mod_start) : NULL;
+#endif
+
+    if ( !mod )
+    {
+        destroy_xen_mappings(BOOTSTRAP_MAP_BASE, BOOTSTRAP_MAP_LIMIT);
+        map_cur = BOOTSTRAP_MAP_BASE;
+        return NULL;
+    }
+
+    start = (uint64_t)mod->mod_start << PAGE_SHIFT;
+    end = start + mod->mod_end;
     if ( start >= end )
-        return;
-    if ( end > BOOTSTRAP_DIRECTMAP_END )
-        panic("Cannot access memory beyond end of "
-              "bootstrap direct-map area\n");
-    map_pages_to_xen(
-        (unsigned long)maddr_to_bootstrap_virt(start),
-        start >> PAGE_SHIFT, (end-start) >> PAGE_SHIFT, PAGE_HYPERVISOR);
+        return NULL;
+
+    if ( end <= BOOTSTRAP_MAP_BASE )
+        return (void *)(unsigned long)start;
+
+    ret = (void *)(map_cur + (unsigned long)(start & mask));
+    start &= ~mask;
+    end = (end + mask) & ~mask;
+    if ( end - start > BOOTSTRAP_MAP_LIMIT - map_cur )
+        return NULL;
+
+    map_pages_to_xen(map_cur, start >> PAGE_SHIFT,
+                     (end - start) >> PAGE_SHIFT, PAGE_HYPERVISOR);
+    map_cur += end - start;
+    return ret;
 }
 
-static void __init move_memory(
-    unsigned long dst, unsigned long src_start, unsigned long src_end)
+static void *__init move_memory(
+    uint64_t dst, uint64_t src, unsigned int size, bool_t keep)
 {
-    bootstrap_map(src_start, src_end);
-    bootstrap_map(dst, dst + src_end - src_start);
-    memmove(maddr_to_bootstrap_virt(dst),
-            maddr_to_bootstrap_virt(src_start),
-            src_end - src_start);
+    unsigned int blksz = BOOTSTRAP_MAP_LIMIT - BOOTSTRAP_MAP_BASE;
+    unsigned int mask = (1L << L2_PAGETABLE_SHIFT) - 1;
+
+    if ( src + size > BOOTSTRAP_MAP_BASE )
+        blksz >>= 1;
+
+    while ( size )
+    {
+        module_t mod;
+        unsigned int soffs = src & mask;
+        unsigned int doffs = dst & mask;
+        unsigned int sz;
+        void *d, *s;
+
+        mod.mod_start = (src - soffs) >> PAGE_SHIFT;
+        mod.mod_end = soffs + size;
+        if ( mod.mod_end > blksz )
+            mod.mod_end = blksz;
+        sz = mod.mod_end - soffs;
+        s = bootstrap_map(&mod);
+
+        mod.mod_start = (dst - doffs) >> PAGE_SHIFT;
+        mod.mod_end = doffs + size;
+        if ( mod.mod_end > blksz )
+            mod.mod_end = blksz;
+        if ( sz > mod.mod_end - doffs )
+            sz = mod.mod_end - doffs;
+        d = bootstrap_map(&mod);
+
+        memmove(d + doffs, s + soffs, sz);
+
+        dst += sz;
+        src += sz;
+        size -= sz;
+
+        if ( keep )
+            return size ? NULL : d + doffs;
+
+        bootstrap_map(NULL);
+    }
+
+    return NULL;
+}
+
+static uint64_t __init consider_modules(
+    uint64_t s, uint64_t e, uint32_t size, const module_t *mod,
+    unsigned int nr_mods, unsigned int this_mod)
+{
+    unsigned int i;
+
+    if ( s > e || e - s < size )
+        return 0;
+
+    for ( i = 0; i < nr_mods ; ++i )
+    {
+        uint64_t start = (uint64_t)mod[i].mod_start << PAGE_SHIFT;
+        uint64_t end = start + PAGE_ALIGN(mod[i].mod_end);
+
+        if ( i == this_mod )
+            continue;
+
+        if ( s < end && start < e )
+        {
+            end = consider_modules(end, e, size, mod + i + 1,
+                                   nr_mods - i - 1, this_mod - i - 1);
+            if ( end )
+                return end;
+
+            return consider_modules(s, start, size, mod + i + 1,
+                                    nr_mods - i - 1, this_mod - i - 1);
+        }
+    }
+
+    return e;
 }
 
 static void __init setup_max_pdx(void)
@@ -447,11 +547,10 @@ void __init __start_xen(unsigned long mb
 {
     char *memmap_type = NULL;
     char *cmdline, *kextra, *loader;
-    unsigned long _initrd_start = 0, _initrd_len = 0;
     unsigned int initrdidx = 1;
     multiboot_info_t *mbi = __va(mbi_p);
     module_t *mod = (module_t *)__va(mbi->mods_addr);
-    unsigned long nr_pages, modules_length, modules_headroom;
+    unsigned long nr_pages, modules_headroom;
     int i, j, e820_warn = 0, bytes = 0;
     bool_t acpi_boot_table_init_done = 0;
     struct ns16550_defaults ns16550 = {
@@ -647,6 +746,9 @@ void __init __start_xen(unsigned long mb
     set_kexec_crash_area_size((u64)nr_pages << PAGE_SHIFT);
     kexec_reserve_area(&boot_e820);
 
+    initial_images = mod;
+    nr_initial_images = mbi->mods_count;
+
     /*
      * Iterate backwards over all superpage-aligned RAM regions.
      * 
@@ -660,48 +762,64 @@ void __init __start_xen(unsigned long mb
      * we can relocate the dom0 kernel and other multiboot modules. Also, on
      * x86/64, we relocate Xen to higher memory.
      */
-    modules_length = 0;
     for ( i = 0; i < mbi->mods_count; i++ )
-        modules_length += mod[i].mod_end - mod[i].mod_start;
+    {
+        if ( mod[i].mod_start & (PAGE_SIZE - 1) )
+            EARLY_FAIL("Bootloader didn't honor module alignment request.\n");
+        mod[i].mod_end -= mod[i].mod_start;
+        mod[i].mod_start >>= PAGE_SHIFT;
+        mod[i].reserved = 0;
+    }
 
-    /* ensure mod[0] is mapped before parsing */
-    bootstrap_map(mod[0].mod_start, mod[0].mod_end);
-    modules_headroom = bzimage_headroom(
-                      (char *)(unsigned long)mod[0].mod_start,
-                      (unsigned long)(mod[0].mod_end - mod[0].mod_start));
+    modules_headroom = bzimage_headroom(bootstrap_map(mod), mod->mod_end);
+    bootstrap_map(NULL);
 
     for ( i = boot_e820.nr_map-1; i >= 0; i-- )
     {
         uint64_t s, e, mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
+        uint64_t end, limit = ARRAY_SIZE(l2_identmap) << L2_PAGETABLE_SHIFT;
 
-        /* Superpage-aligned chunks from 16MB to BOOTSTRAP_DIRECTMAP_END. */
+        /* Superpage-aligned chunks from BOOTSTRAP_MAP_BASE. */
         s = (boot_e820.map[i].addr + mask) & ~mask;
         e = (boot_e820.map[i].addr + boot_e820.map[i].size) & ~mask;
-        s = max_t(uint64_t, s, 16 << 20);
-        e = min_t(uint64_t, e, BOOTSTRAP_DIRECTMAP_END);
+        s = max_t(uint64_t, s, BOOTSTRAP_MAP_BASE);
         if ( (boot_e820.map[i].type != E820_RAM) || (s >= e) )
             continue;
 
-        set_pdx_range(s >> PAGE_SHIFT, e >> PAGE_SHIFT);
-
-        /* Map the chunk. No memory will need to be allocated to do this. */
-        map_pages_to_xen(
-            (unsigned long)maddr_to_bootstrap_virt(s),
-            s >> PAGE_SHIFT, (e-s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
+        if ( s < limit )
+        {
+            end = min(e, limit);
+            set_pdx_range(s >> PAGE_SHIFT, end >> PAGE_SHIFT);
+#ifdef CONFIG_X86_64
+            map_pages_to_xen((unsigned long)__va(s), s >> PAGE_SHIFT,
+                             (end - s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
+#endif
+        }
 
 #if defined(CONFIG_X86_64)
+        e = min_t(uint64_t, e, 1ULL << (PAGE_SHIFT + 32));
 #define reloc_size ((__pa(&_end) + mask) & ~mask)
         /* Is the region suitable for relocating Xen? */
-        if ( !xen_phys_start && ((e-s) >= reloc_size) )
+        if ( !xen_phys_start && e <= limit )
+        {
+            /* Don't overlap with modules. */
+            end = consider_modules(s, e, reloc_size + mask,
+                                   mod, mbi->mods_count, -1);
+            end &= ~mask;
+        }
+        else
+            end = 0;
+        if ( end > s )
         {
             extern l2_pgentry_t l2_xenmap[];
             l4_pgentry_t *pl4e;
             l3_pgentry_t *pl3e;
             l2_pgentry_t *pl2e;
             int i, j, k;
+            void *dst;
 
             /* Select relocation address. */
-            e -= reloc_size;
+            e = end - reloc_size;
             xen_phys_start = e;
             bootsym(trampoline_xen_phys_start) = e;
 
@@ -712,10 +830,10 @@ void __init __start_xen(unsigned long mb
              * data until after we have switched to the relocated pagetables!
              */
             barrier();
-            move_memory(e, 0, __pa(&_end) - xen_phys_start);
+            dst = move_memory(e, 0, (unsigned long)&_end - XEN_VIRT_START, 1);
 
             /* Poison low 1MB to detect stray pointers to physical 0-1MB. */
-            memset(maddr_to_bootstrap_virt(e), 0x55, 1U<<20);
+            memset(dst, 0x55, 1U << 20);
 
             /* Walk initial pagetables, relocating page directory entries. */
             pl4e = __va(__pa(idle_pg_table));
@@ -772,38 +890,58 @@ void __init __start_xen(unsigned long mb
                 "movq %%rsi,%%cr4   " /* CR4.PGE == 1 */
                 : : "r" (__pa(idle_pg_table)), "S" (cpu0_stack),
                 "D" (__va(__pa(cpu0_stack))), "c" (STACK_SIZE) : "memory" );
+
+            bootstrap_map(NULL);
         }
 #endif
 
         /* Is the region suitable for relocating the multiboot modules? */
-        if ( !initial_images_start && (s < e) &&
-             ((e-s) >= (modules_length+modules_headroom)) )
+        for ( j = mbi->mods_count - 1; j >= 0; j-- )
         {
-            initial_images_end = e;
-            initial_images_start = initial_images_end - modules_length;
-            initial_images_base = initial_images_start - modules_headroom;
-            initial_images_base &= PAGE_MASK;
-            for ( j = mbi->mods_count-1; j >= 0; j-- )
+            unsigned long headroom = j ? 0 : modules_headroom;
+            unsigned long size = PAGE_ALIGN(headroom + mod[j].mod_end);
+
+            if ( mod[j].reserved )
+                continue;
+
+            /* Don't overlap with other modules. */
+            end = consider_modules(s, e, size, mod, mbi->mods_count, j);
+
+            if ( s < end &&
+                 (headroom ||
+                  ((end - size) >> PAGE_SHIFT) > mod[j].mod_start) )
             {
-                e -= mod[j].mod_end - mod[j].mod_start;
-                move_memory(e, mod[j].mod_start, mod[j].mod_end);
-                mod[j].mod_end += e - mod[j].mod_start;
-                mod[j].mod_start = e;
+                move_memory(end - size + headroom,
+                            (uint64_t)mod[j].mod_start << PAGE_SHIFT,
+                            mod[j].mod_end, 0);
+                mod[j].mod_start = (end - size) >> PAGE_SHIFT;
+                mod[j].mod_end += headroom;
+                mod[j].reserved = 1;
             }
-            e = initial_images_base;
         }
 
-        if ( !kexec_crash_area.start && (s < e) &&
-             ((e-s) >= kexec_crash_area.size) )
+#ifdef CONFIG_X86_32
+        /* Confine the kexec area to below 4Gb. */
+        e = min_t(uint64_t, e, 1ULL << 32);
+#endif
+        /* Don't overlap with modules. */
+        e = consider_modules(s, e, PAGE_ALIGN(kexec_crash_area.size),
+                             mod, mbi->mods_count, -1);
+        if ( !kexec_crash_area.start && (s < e) )
         {
             e = (e - kexec_crash_area.size) & PAGE_MASK;
             kexec_crash_area.start = e;
         }
     }
 
-    if ( !initial_images_start )
+    if ( modules_headroom && !mod->reserved )
         EARLY_FAIL("Not enough memory to relocate the dom0 kernel image.\n");
-    reserve_e820_ram(&boot_e820, initial_images_base, initial_images_end);
+    for ( i = 0; i < mbi->mods_count; ++i )
+    {
+        uint64_t s = (uint64_t)mod[i].mod_start << PAGE_SHIFT;
+
+        reserve_e820_ram(&boot_e820, s, s + PAGE_ALIGN(mod[i].mod_end));
+    }
 
 #if defined(CONFIG_X86_32)
     xenheap_initial_phys_start = (PFN_UP(__pa(&_end)) + 1) << PAGE_SHIFT;
@@ -827,7 +965,10 @@ void __init __start_xen(unsigned long mb
      */
     for ( i = 0; i < boot_e820.nr_map; i++ )
     {
-        uint64_t s, e, map_s, map_e, mask = PAGE_SIZE - 1;
+        uint64_t s, e, mask = PAGE_SIZE - 1;
+#ifdef CONFIG_X86_64
+        uint64_t map_s, map_e;
+#endif
 
         /* Only page alignment required now. */
         s = (boot_e820.map[i].addr + mask) & ~mask;
@@ -842,7 +983,7 @@ void __init __start_xen(unsigned long mb
 
 #ifdef __x86_64__
         if ( !acpi_boot_table_init_done &&
-             s >= BOOTSTRAP_DIRECTMAP_END &&
+             s >= (1ULL << 32) &&
              !acpi_boot_table_init() )
         {
             acpi_boot_table_init_done = 1;
@@ -881,26 +1022,60 @@ void __init __start_xen(unsigned long mb
 
         set_pdx_range(s >> PAGE_SHIFT, e >> PAGE_SHIFT);
 
-        /* Need to create mappings above 16MB. */
-        map_s = max_t(uint64_t, s, 16<<20);
-        map_e = e;
-#if defined(CONFIG_X86_32) /* mappings are truncated on x86_32 */
-        map_e = min_t(uint64_t, map_e, BOOTSTRAP_DIRECTMAP_END);
-#endif
+#ifdef CONFIG_X86_64
+        /* Need to create mappings above BOOTSTRAP_MAP_BASE. */
+        map_s = max_t(uint64_t, s, BOOTSTRAP_MAP_BASE);
+        map_e = min_t(uint64_t, e,
+                      ARRAY_SIZE(l2_identmap) << L2_PAGETABLE_SHIFT);
 
         /* Pass mapped memory to allocator /before/ creating new mappings. */
-        init_boot_pages(s, min_t(uint64_t, map_s, e));
+        init_boot_pages(s, min(map_s, e));
+        s = map_s;
+        if ( s < map_e )
+        {
+            uint64_t mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
+
+            map_s = (s + mask) & ~mask;
+            map_e &= ~mask;
+            init_boot_pages(map_s, map_e);
+        }
+
+        if ( map_s > map_e )
+            map_s = map_e = s;
 
         /* Create new mappings /before/ passing memory to the allocator. */
-        if ( map_s < map_e )
-            map_pages_to_xen(
-                (unsigned long)maddr_to_bootstrap_virt(map_s),
-                map_s >> PAGE_SHIFT, (map_e-map_s) >> PAGE_SHIFT,
-                PAGE_HYPERVISOR);
+        if ( map_e < e )
+        {
+            map_pages_to_xen((unsigned long)__va(map_e), map_e >> PAGE_SHIFT,
+                             (e - map_e) >> PAGE_SHIFT, PAGE_HYPERVISOR);
+            init_boot_pages(map_e, e);
+        }
+        if ( s < map_s )
+        {
+            map_pages_to_xen((unsigned long)__va(s), s >> PAGE_SHIFT,
+                             (map_s - s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
+            init_boot_pages(s, map_s);
+        }
+#else
+        init_boot_pages(s, e);
+#endif
+    }
 
-        /* Pass remainder of this memory chunk to the allocator. */
-        init_boot_pages(map_s, e);
+    for ( i = 0; i < mbi->mods_count; ++i )
+    {
+        set_pdx_range(mod[i].mod_start,
+                      mod[i].mod_start + PFN_UP(mod[i].mod_end));
+#ifdef CONFIG_X86_64
+        map_pages_to_xen((unsigned long)mfn_to_virt(mod[i].mod_start),
+                         mod[i].mod_start,
+                         PFN_UP(mod[i].mod_end), PAGE_HYPERVISOR);
+#endif
     }
+#ifdef CONFIG_X86_64
+    map_pages_to_xen((unsigned long)__va(kexec_crash_area.start),
+                     kexec_crash_area.start >> PAGE_SHIFT,
+                     PFN_UP(kexec_crash_area.size), PAGE_HYPERVISOR);
+#endif
 
     memguard_init();
 
@@ -1023,7 +1198,7 @@ void __init __start_xen(unsigned long mb
 
     init_IRQ();
 
-    xsm_init(&initrdidx, mbi, initial_images_start);
+    xsm_init(&initrdidx, mbi, bootstrap_map);
 
     timer_init();
 
@@ -1135,12 +1310,6 @@ void __init __start_xen(unsigned long mb
         cmdline = dom0_cmdline;
     }
 
-    if ( (initrdidx > 0) && (initrdidx < mbi->mods_count) )
-    {
-        _initrd_start = mod[initrdidx].mod_start;
-        _initrd_len   = mod[initrdidx].mod_end - mod[initrdidx].mod_start;
-    }
-
     if ( xen_cpuidle )
         xen_processor_pmbits |= XEN_PROCESSOR_PM_CX;
 
@@ -1148,13 +1317,10 @@ void __init __start_xen(unsigned long mb
      * We're going to setup domain0 using the module(s) that we stashed safely
      * above our heap. The second module, if present, is an initrd ramdisk.
      */
-    if ( construct_dom0(dom0,
-                        initial_images_base,
-                        initial_images_start,
-                        mod[0].mod_end-mod[0].mod_start,
-                        _initrd_start,
-                        _initrd_len,
-                        cmdline) != 0)
+    if ( construct_dom0(dom0, mod, modules_headroom,
+                        (initrdidx > 0) && (initrdidx < mbi->mods_count)
+                        ? mod + initrdidx : NULL,
+                        bootstrap_map, cmdline) != 0)
         panic("Could not set up DOM0 guest OS\n");
 
     /* Scrub RAM that is still free and so may go to an unprivileged domain. */
--- 2010-11-09.orig/xen/arch/x86/x86_64/mm.c    2010-09-06 08:21:15.000000000 
+0200
+++ 2010-11-09/xen/arch/x86/x86_64/mm.c 2010-11-09 10:30:06.000000000 +0100
@@ -65,6 +65,12 @@ l3_pgentry_t __attribute__ ((__section__
 l2_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
     l2_xenmap[L2_PAGETABLE_ENTRIES];
 
+/* Enough page directories to map into the bottom 1GB. */
+l3_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
+    l3_bootmap[L3_PAGETABLE_ENTRIES];
+l2_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
+    l2_bootmap[L2_PAGETABLE_ENTRIES];
+
 int __mfn_valid(unsigned long mfn)
 {
     return likely(mfn < max_page) &&
--- 2010-11-09.orig/xen/include/asm-x86/domain.h        2010-11-05 
09:22:58.000000000 +0100
+++ 2010-11-09/xen/include/asm-x86/domain.h     2010-11-09 10:30:06.000000000 
+0100
@@ -466,16 +466,6 @@ void domain_cpuid(struct domain *d,
                   unsigned int  *ecx,
                   unsigned int  *edx);
 
-int construct_dom0(
-    struct domain *d,
-    unsigned long image_base,
-    unsigned long image_start, unsigned long image_len,
-    unsigned long initrd_start, unsigned long initrd_len,
-    char *cmdline);
-
-extern unsigned long initial_images_nrpages(void);
-extern void discard_initial_images(void);
-
 #endif /* __ASM_DOMAIN_H__ */
 
 /*
--- 2010-11-09.orig/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h       
2010-06-01 13:39:57.000000000 +0200
+++ 2010-11-09/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h    2010-11-09 
10:30:06.000000000 +0100
@@ -30,7 +30,6 @@
         &amd_iommu_head, list)
 
 #define DMA_32BIT_MASK  0x00000000ffffffffULL
-#define PAGE_ALIGN(addr)    (((addr) + PAGE_SIZE - 1) & PAGE_MASK)
 
 extern int amd_iommu_debug;
 extern int amd_iommu_perdev_intremap;
--- 2010-11-09.orig/xen/include/asm-x86/page.h  2010-05-28 13:59:16.000000000 
+0200
+++ 2010-11-09/xen/include/asm-x86/page.h       2010-11-09 10:30:06.000000000 
+0100
@@ -302,6 +302,7 @@ extern l2_pgentry_t   idle_pg_table_l2[
 extern l2_pgentry_t  *compat_idle_pg_table_l2;
 extern unsigned int   m2p_compat_vstart;
 #endif
+extern l2_pgentry_t l2_identmap[4*L2_PAGETABLE_ENTRIES];
 void paging_init(void);
 void setup_idle_pagetable(void);
 #endif /* !defined(__ASSEMBLY__) */
@@ -397,6 +398,7 @@ static inline uint32_t cacheattr_to_pte_
 
 #define PFN_DOWN(x)   ((x) >> PAGE_SHIFT)
 #define PFN_UP(x)     (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
+#define PAGE_ALIGN(x) (((x) + PAGE_SIZE - 1) & PAGE_MASK)
 
 #endif /* __X86_PAGE_H__ */
 
--- 2010-11-09.orig/xen/include/asm-x86/setup.h 2010-05-20 09:59:27.000000000 
+0200
+++ 2010-11-09/xen/include/asm-x86/setup.h      2010-11-09 10:30:06.000000000 
+0100
@@ -1,6 +1,8 @@
 #ifndef __X86_SETUP_H_
 #define __X86_SETUP_H_
 
+#include <xen/multiboot.h>
+
 extern int early_boot;
 extern unsigned long xenheap_initial_phys_start;
 
@@ -25,4 +27,14 @@ void init_IRQ(void);
 void vesa_init(void);
 void vesa_mtrr_init(void);
 
+int construct_dom0(
+    struct domain *d,
+    const module_t *kernel, unsigned long kernel_headroom,
+    const module_t *initrd,
+    void *(*bootstrap_map)(const module_t *),
+    char *cmdline);
+
+unsigned long initial_images_nrpages(void);
+void discard_initial_images(void);
+
 #endif
--- 2010-11-09.orig/xen/include/xsm/xsm.h       2010-11-09 08:25:42.000000000 
+0100
+++ 2010-11-09/xen/include/xsm/xsm.h    2010-11-09 10:30:06.000000000 +0100
@@ -432,14 +432,15 @@ static inline long __do_xsm_op (XEN_GUES
 
 #ifdef XSM_ENABLE
 extern int xsm_init(unsigned int *initrdidx, const multiboot_info_t *mbi,
-                                          unsigned long initial_images_start);
+                    void *(*bootstrap_map)(const module_t *));
 extern int xsm_policy_init(unsigned int *initrdidx, const multiboot_info_t 
*mbi,
-                                           unsigned long initial_images_start);
+                           void *(*bootstrap_map)(const module_t *));
 extern int register_xsm(struct xsm_operations *ops);
 extern int unregister_xsm(struct xsm_operations *ops);
 #else
 static inline int xsm_init (unsigned int *initrdidx,
-                const multiboot_info_t *mbi, unsigned long 
initial_images_start)
+                            const multiboot_info_t *mbi,
+                            void *(*bootstrap_map)(const module_t *))
 {
     return 0;
 }
--- 2010-11-09.orig/xen/xsm/xsm_core.c  2009-10-07 13:31:36.000000000 +0200
+++ 2010-11-09/xen/xsm/xsm_core.c       2010-11-09 10:30:06.000000000 +0100
@@ -47,7 +47,7 @@ static void __init do_xsm_initcalls(void
 }
 
 int __init xsm_init(unsigned int *initrdidx, const multiboot_info_t *mbi,
-                    unsigned long initial_images_start)
+                    void *(*bootstrap_map)(const module_t *))
 {
     int ret = 0;
 
@@ -55,9 +55,10 @@ int __init xsm_init(unsigned int *initrd
 
     if ( XSM_MAGIC )
     {
-        ret = xsm_policy_init(initrdidx, mbi, initial_images_start);
+        ret = xsm_policy_init(initrdidx, mbi, bootstrap_map);
         if ( ret )
         {
+            bootstrap_map(NULL);
             printk("%s: Error initializing policy.\n", __FUNCTION__);
             return -EINVAL;
         }
@@ -65,6 +66,7 @@ int __init xsm_init(unsigned int *initrd
 
     if ( verify(&dummy_xsm_ops) )
     {
+        bootstrap_map(NULL);
         printk("%s could not verify "
                "dummy_xsm_ops structure.\n", __FUNCTION__);
         return -EIO;
@@ -72,6 +74,7 @@ int __init xsm_init(unsigned int *initrd
 
     xsm_ops = &dummy_xsm_ops;
     do_xsm_initcalls();
+    bootstrap_map(NULL);
 
     return 0;
 }
--- 2010-11-09.orig/xen/xsm/xsm_policy.c        2007-11-02 17:25:59.000000000 
+0100
+++ 2010-11-09/xen/xsm/xsm_policy.c     2010-11-09 10:30:06.000000000 +0100
@@ -22,11 +22,11 @@
 #include <xsm/xsm.h>
 #include <xen/multiboot.h>
 
-char *policy_buffer = NULL;
-u32 policy_size = 0;
+char *__initdata policy_buffer = NULL;
+u32 __initdata policy_size = 0;
 
 int xsm_policy_init(unsigned int *initrdidx, const multiboot_info_t *mbi,
-                           unsigned long initial_images_start)
+                    void *(*bootstrap_map)(const module_t *))
 {
     int i;
     module_t *mod = (module_t *)__va(mbi->mods_addr);
@@ -40,15 +40,8 @@ int xsm_policy_init(unsigned int *initrd
      */
     for ( i = mbi->mods_count-1; i >= 1; i-- )
     {
-        start = initial_images_start + (mod[i].mod_start-mod[0].mod_start);
-#if defined(__i386__)
-        _policy_start = (u32 *)start;
-#elif defined(__x86_64__)
-        _policy_start = maddr_to_virt(start);
-#else
-        _policy_start = NULL;
-#endif
-        _policy_len   = mod[i].mod_end - mod[i].mod_start;
+        _policy_start = bootstrap_map(mod + i);
+        _policy_len   = mod[i].mod_end;
 
         if ( (xsm_magic_t)(*_policy_start) == XSM_MAGIC )
         {
@@ -63,6 +56,8 @@ int xsm_policy_init(unsigned int *initrd
             break;
 
         }
+
+        bootstrap_map(NULL);
     }
 
     return rc;


Attachment: x86-custom-boot-map.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.