By doing so, we're no longer restricted to be able to place all boot
loader modules into the low 1Gb/4Gb (32-/64-bit) of memory, nor is
there a dependency anymore on where the boot loader places the modules.
We're also no longer restricted to copy the modules into a place below
4Gb, nor to put them all together into a single piece of memory.
Further it allows even the 32-bit Dom0 kernel to be loaded anywhere in
physical memory (except if it doesn't support PAE-above-4G).
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
--- 2010-11-09.orig/xen/arch/x86/boot/head.S 2010-08-06 08:44:33.000000000
+0200
+++ 2010-11-09/xen/arch/x86/boot/head.S 2010-11-09 10:30:06.000000000 +0100
@@ -110,12 +110,15 @@ __start:
/* Initialise L2 identity-map and xen page table entries (16MB). */
mov $sym_phys(l2_identmap),%edi
mov $sym_phys(l2_xenmap),%esi
+ mov $sym_phys(l2_bootmap),%edx
mov $0x1e3,%eax /* PRESENT+RW+A+D+2MB+GLOBAL */
mov $8,%ecx
1: mov %eax,(%edi)
add $8,%edi
mov %eax,(%esi)
add $8,%esi
+ mov %eax,(%edx)
+ add $8,%edx
add $(1<<L2_PAGETABLE_SHIFT),%eax
loop 1b
/* Initialise L3 identity-map page directory entries. */
@@ -129,9 +132,13 @@ __start:
/* Initialise L3 xen-map page directory entry. */
mov $(sym_phys(l2_xenmap)+7),%eax
mov %eax,sym_phys(l3_xenmap) + l3_table_offset(XEN_VIRT_START)*8
- /* Hook identity-map and xen-map L3 tables into PML4. */
+ /* Initialise L3 boot-map page directory entry. */
+ mov $(sym_phys(l2_bootmap)+7),%eax
+ mov %eax,sym_phys(l3_bootmap) + 0*8
+ /* Hook identity-map, xen-map, and boot-map L3 tables into PML4. */
+ mov $(sym_phys(l3_bootmap)+7),%eax
+ mov %eax,sym_phys(idle_pg_table) + 0*8
mov $(sym_phys(l3_identmap)+7),%eax
- mov %eax,sym_phys(idle_pg_table) + ( 0*8) /* PML4[ 0]: 1:1 map */
mov %eax,sym_phys(idle_pg_table) +
l4_table_offset(DIRECTMAP_VIRT_START)*8
mov $(sym_phys(l3_xenmap)+7),%eax
mov %eax,sym_phys(idle_pg_table) +
l4_table_offset(XEN_VIRT_START)*8
@@ -176,6 +183,7 @@ __start:
#if defined(__x86_64__)
mov %edi,sym_phys(l2_identmap)
mov %edi,sym_phys(l2_xenmap)
+ mov %edi,sym_phys(l2_bootmap)
#else
mov %edi,sym_phys(idle_pg_table_l2)
mov %edi,sym_phys(idle_pg_table_l2) + (__PAGE_OFFSET>>18)
--- 2010-11-09.orig/xen/arch/x86/boot/Makefile 2010-04-22 14:43:25.000000000
+0200
+++ 2010-11-09/xen/arch/x86/boot/Makefile 2010-11-09 10:30:06.000000000
+0100
@@ -4,6 +4,6 @@ head.o: reloc.S
BOOT_TRAMPOLINE := $(shell sed -n
's,^\#define[[:space:]]\{1\,\}BOOT_TRAMPOLINE[[:space:]]\{1\,\},,p'
$(BASEDIR)/include/asm-x86/config.h)
%.S: %.c
- RELOC=$(BOOT_TRAMPOLINE) XEN_BITSPERLONG=$(patsubst
x86_%,%,$(TARGET_SUBARCH)) $(MAKE) -f build32.mk $@
+ RELOC=$(BOOT_TRAMPOLINE) $(MAKE) -f build32.mk $@
reloc.S: $(BASEDIR)/include/asm-x86/config.h
--- 2010-11-09.orig/xen/arch/x86/boot/build32.mk 2010-08-06
08:44:33.000000000 +0200
+++ 2010-11-09/xen/arch/x86/boot/build32.mk 2010-11-09 10:30:06.000000000
+0100
@@ -19,6 +19,6 @@ CFLAGS += -Werror -fno-builtin -msoft-fl
$(LD) $(LDFLAGS_DIRECT) -N -Ttext $(RELOC) -o $@ $<
%.o: %.c
- $(CC) $(CFLAGS) -DXEN_BITSPERLONG=$(XEN_BITSPERLONG) -c $< -o $@
+ $(CC) $(CFLAGS) -c $< -o $@
reloc.o: $(BASEDIR)/include/asm-x86/config.h
--- 2010-11-09.orig/xen/arch/x86/boot/reloc.c 2010-08-06 08:44:33.000000000
+0200
+++ 2010-11-09/xen/arch/x86/boot/reloc.c 2010-11-09 10:30:06.000000000
+0100
@@ -68,7 +68,6 @@ multiboot_info_t *reloc(multiboot_info_t
{
module_t *mods = reloc_mbi_struct(
(module_t *)mbi->mods_addr, mbi->mods_count * sizeof(module_t));
- u32 max_addr = 0;
mbi->mods_addr = (u32)mods;
@@ -76,29 +75,6 @@ multiboot_info_t *reloc(multiboot_info_t
{
if ( mods[i].string )
mods[i].string = (u32)reloc_mbi_string((char *)mods[i].string);
- if ( mods[i].mod_end > max_addr )
- max_addr = mods[i].mod_end;
- }
-
- /*
- * 32-bit Xen only maps bottom 1GB of memory at boot time. Relocate
- * modules which extend beyond this (GRUB2 in particular likes to
- * place modules as high as possible below 4GB).
- */
-#define BOOTMAP_END (1ul<<30) /* 1GB */
- if ( (XEN_BITSPERLONG == 32) && (max_addr > BOOTMAP_END) )
- {
- char *mod_alloc = (char *)BOOTMAP_END;
- for ( i = 0; i < mbi->mods_count; i++ )
- mod_alloc -= mods[i].mod_end - mods[i].mod_start;
- for ( i = 0; i < mbi->mods_count; i++ )
- {
- u32 mod_len = mods[i].mod_end - mods[i].mod_start;
- mods[i].mod_start = (u32)memcpy(
- mod_alloc, (char *)mods[i].mod_start, mod_len);
- mods[i].mod_end = mods[i].mod_start + mod_len;
- mod_alloc += mod_len;
- }
}
}
--- 2010-11-09.orig/xen/arch/x86/domain_build.c 2010-08-06 08:44:33.000000000
+0200
+++ 2010-11-09/xen/arch/x86/domain_build.c 2010-11-09 10:30:06.000000000
+0100
@@ -31,6 +31,7 @@
#include <asm/p2m.h>
#include <asm/e820.h>
#include <asm/acpi.h>
+#include <asm/setup.h>
#include <asm/bzimage.h> /* for bzimage_parse */
#include <asm/io_apic.h>
@@ -284,9 +285,9 @@ static void __init process_dom0_ioports_
int __init construct_dom0(
struct domain *d,
- unsigned long _image_base,
- unsigned long _image_start, unsigned long image_len,
- unsigned long _initrd_start, unsigned long initrd_len,
+ const module_t *image, unsigned long image_headroom,
+ const module_t *initrd,
+ void *(*bootstrap_map)(const module_t *),
char *cmdline)
{
int i, cpu, rc, compatible, compat32, order, machine;
@@ -301,16 +302,14 @@ int __init construct_dom0(
start_info_t *si;
struct vcpu *v = d->vcpu[0];
unsigned long long value;
-#if defined(__i386__)
- char *image_base = (char *)_image_base; /* use lowmem mappings */
- char *image_start = (char *)_image_start; /* use lowmem mappings */
- char *initrd_start = (char *)_initrd_start; /* use lowmem mappings */
-#elif defined(__x86_64__)
- char *image_base = __va(_image_base);
- char *image_start = __va(_image_start);
- char *initrd_start = __va(_initrd_start);
-#endif
-#if CONFIG_PAGING_LEVELS >= 4
+ char *image_base = bootstrap_map(image);
+ unsigned long image_len = image->mod_end;
+ char *image_start = image_base + image_headroom;
+ unsigned long initrd_len = initrd ? initrd->mod_end : 0;
+#if CONFIG_PAGING_LEVELS < 4
+ module_t mpt;
+ void *mpt_ptr;
+#else
l4_pgentry_t *l4tab = NULL, *l4start = NULL;
#endif
l3_pgentry_t *l3tab = NULL, *l3start = NULL;
@@ -340,7 +339,7 @@ int __init construct_dom0(
unsigned long v_end;
/* Machine address of next candidate page-table page. */
- unsigned long mpt_alloc;
+ paddr_t mpt_alloc;
/* Sanity! */
BUG_ON(d->domain_id != 0);
@@ -495,17 +494,17 @@ int __init construct_dom0(
if ( (1UL << order) > nr_pages )
panic("Domain 0 allocation is too small for kernel image.\n");
-#ifdef __i386__
- /* Ensure that our low-memory 1:1 mapping covers the allocation. */
- page = alloc_domheap_pages(d, order, MEMF_bits(30));
-#else
if ( parms.p2m_base != UNSET_ADDR )
{
vphysmap_start = parms.p2m_base;
vphysmap_end = vphysmap_start + nr_pages * sizeof(unsigned long);
}
- page = alloc_domheap_pages(d, order, 0);
+#ifdef __i386__
+ if ( !test_bit(XENFEAT_pae_pgdir_above_4gb, parms.f_supported) )
+ page = alloc_domheap_pages(d, order, MEMF_bits(32));
+ else
#endif
+ page = alloc_domheap_pages(d, order, 0);
if ( page == NULL )
panic("Not enough RAM for domain 0 allocation.\n");
alloc_spfn = page_to_mfn(page);
@@ -534,8 +533,7 @@ int __init construct_dom0(
_p(v_start), _p(v_end));
printk(" ENTRY ADDRESS: %p\n", _p(parms.virt_entry));
- mpt_alloc = (vpt_start - v_start) +
- (unsigned long)pfn_to_paddr(alloc_spfn);
+ mpt_alloc = (vpt_start - v_start) + pfn_to_paddr(alloc_spfn);
#if defined(__i386__)
/*
@@ -548,17 +546,25 @@ int __init construct_dom0(
return -EINVAL;
}
+ mpt.mod_start = mpt_alloc >> PAGE_SHIFT;
+ mpt.mod_end = vpt_end - vpt_start;
+ mpt_ptr = bootstrap_map(&mpt);
+#define MPT_ALLOC(n) (mpt_ptr += (n)*PAGE_SIZE, mpt_alloc += (n)*PAGE_SIZE)
+
/* WARNING: The new domain must have its 'processor' field filled in! */
- l3start = l3tab = (l3_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
- l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += 4*PAGE_SIZE;
+ l3start = l3tab = mpt_ptr; MPT_ALLOC(1);
+ l2start = l2tab = mpt_ptr; MPT_ALLOC(4);
for (i = 0; i < L3_PAGETABLE_ENTRIES; i++) {
- copy_page(l2tab + i * L2_PAGETABLE_ENTRIES,
- idle_pg_table_l2 + i * L2_PAGETABLE_ENTRIES);
- l3tab[i] = l3e_from_paddr((u32)l2tab + i*PAGE_SIZE, L3_PROT);
+ if ( i < 3 )
+ clear_page(l2tab + i * L2_PAGETABLE_ENTRIES);
+ else
+ copy_page(l2tab + i * L2_PAGETABLE_ENTRIES,
+ idle_pg_table_l2 + i * L2_PAGETABLE_ENTRIES);
+ l3tab[i] = l3e_from_pfn(mpt.mod_start + 1 + i, L3_PROT);
l2tab[(LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT)+i] =
- l2e_from_paddr((u32)l2tab + i*PAGE_SIZE, __PAGE_HYPERVISOR);
+ l2e_from_pfn(mpt.mod_start + 1 + i, __PAGE_HYPERVISOR);
}
- v->arch.guest_table = pagetable_from_paddr((unsigned long)l3start);
+ v->arch.guest_table = pagetable_from_pfn(mpt.mod_start);
for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
l2tab[l2_linear_offset(PERDOMAIN_VIRT_START) + i] =
@@ -570,9 +576,9 @@ int __init construct_dom0(
{
if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
{
- l1start = l1tab = (l1_pgentry_t *)mpt_alloc;
- mpt_alloc += PAGE_SIZE;
- *l2tab = l2e_from_paddr((unsigned long)l1start, L2_PROT);
+ l1tab = mpt_ptr;
+ *l2tab = l2e_from_paddr(mpt_alloc, L2_PROT);
+ MPT_ALLOC(1);
l2tab++;
clear_page(l1tab);
if ( count == 0 )
@@ -587,11 +593,14 @@ int __init construct_dom0(
mfn++;
}
+#undef MPT_ALLOC
/* Pages that are part of page tables must be read only. */
+ mpt_alloc = (paddr_t)mpt.mod_start << PAGE_SHIFT;
+ mpt_ptr = l3start;
l2tab = l2start + l2_linear_offset(vpt_start);
- l1start = l1tab = (l1_pgentry_t *)(u32)l2e_get_paddr(*l2tab);
- l1tab += l1_table_offset(vpt_start);
+ l1start = mpt_ptr + (l2e_get_paddr(*l2tab) - mpt_alloc);
+ l1tab = l1start + l1_table_offset(vpt_start);
for ( count = 0; count < nr_pt_pages; count++ )
{
page = mfn_to_page(l1e_get_pfn(*l1tab));
@@ -627,9 +636,15 @@ int __init construct_dom0(
break;
}
if ( !((unsigned long)++l1tab & (PAGE_SIZE - 1)) )
- l1start = l1tab = (l1_pgentry_t *)(u32)l2e_get_paddr(*++l2tab);
+ l1tab = mpt_ptr + (l2e_get_paddr(*++l2tab) - mpt_alloc);
}
+ /*
+ * Put Xen's first L3 entry into Dom0's page tables so that updates
+ * through bootstrap_map() will affect the page tables we will run on.
+ */
+ l3start[0] = l3e_from_paddr(__pa(idle_pg_table_l2), L3_PROT);
+
#elif defined(__x86_64__)
/* Overlap with Xen protected area? */
@@ -807,6 +822,7 @@ int __init construct_dom0(
/* Copy the OS image and free temporary buffer. */
elf.dest = (void*)vkern_start;
elf_load_binary(&elf);
+ bootstrap_map(NULL);
if ( UNSET_ADDR != parms.virt_hypercall )
{
@@ -823,7 +839,12 @@ int __init construct_dom0(
/* Copy the initial ramdisk. */
if ( initrd_len != 0 )
+ {
+ char *initrd_start = bootstrap_map(initrd);
+
memcpy((void *)vinitrd_start, initrd_start, initrd_len);
+ bootstrap_map(NULL);
+ }
/* Free temporary buffers. */
discard_initial_images();
@@ -1033,7 +1054,22 @@ int __init construct_dom0(
write_ptbase(current);
#if defined(__i386__)
- /* Destroy low mappings - they were only for our convenience. */
+ /* Restore Dom0's first L3 entry. */
+ mpt.mod_end = 5 * PAGE_SIZE;
+ l3start = mpt_ptr = bootstrap_map(&mpt);
+ l2start = mpt_ptr + PAGE_SIZE;
+ l3start[0] = l3e_from_pfn(mpt.mod_start + 1, L3_PROT);
+
+ /* Re-setup CR3 */
+ if ( paging_mode_enabled(d) )
+ paging_update_paging_modes(v);
+ else
+ update_cr3(v);
+
+ /*
+ * Destroy low mappings - they were only for our convenience. Note
+ * that zap_low_mappings() exceeds what bootstrap_map(NULL) would do.
+ */
zap_low_mappings(l2start);
#endif
--- 2010-11-09.orig/xen/arch/x86/setup.c 2010-08-12 08:17:22.000000000
+0200
+++ 2010-11-09/xen/arch/x86/setup.c 2010-11-09 10:30:06.000000000 +0100
@@ -45,14 +45,6 @@
#include <asm/setup.h>
#include <xen/cpu.h>
-#if defined(CONFIG_X86_64)
-#define BOOTSTRAP_DIRECTMAP_END (1UL << 32) /* 4GB */
-#define maddr_to_bootstrap_virt(m) maddr_to_virt(m)
-#else
-#define BOOTSTRAP_DIRECTMAP_END (1UL << 30) /* 1GB */
-#define maddr_to_bootstrap_virt(m) ((void *)(long)(m))
-#endif
-
extern u16 boot_edid_caps;
extern u8 boot_edid_info[128];
extern struct boot_video_info boot_vid_info;
@@ -152,21 +144,34 @@ static void __init parse_acpi_param(char
for ( ; ; ) halt(); \
} while (0)
-static unsigned long __initdata initial_images_base;
-static unsigned long __initdata initial_images_start;
-static unsigned long __initdata initial_images_end;
+static const module_t *__initdata initial_images;
+static unsigned int __initdata nr_initial_images;
unsigned long __init initial_images_nrpages(void)
{
- ASSERT(!(initial_images_base & ~PAGE_MASK));
- ASSERT(!(initial_images_end & ~PAGE_MASK));
- return ((initial_images_end >> PAGE_SHIFT) -
- (initial_images_base >> PAGE_SHIFT));
+ unsigned long nr;
+ unsigned int i;
+
+ for ( nr = i = 0; i < nr_initial_images; ++i )
+ nr += PFN_UP(initial_images[i].mod_end);
+
+ return nr;
}
void __init discard_initial_images(void)
{
- init_domheap_pages(initial_images_base, initial_images_end);
+ unsigned int i;
+
+ for ( i = 0; i < nr_initial_images; ++i )
+ {
+ uint64_t start = (uint64_t)initial_images[i].mod_start << PAGE_SHIFT;
+
+ init_domheap_pages(start,
+ start + PAGE_ALIGN(initial_images[i].mod_end));
+ }
+
+ nr_initial_images = 0;
+ initial_images = NULL;
}
static void free_xen_data(char *s, char *e)
@@ -257,33 +262,128 @@ static void __init normalise_cpu_order(v
}
}
+#define BOOTSTRAP_MAP_BASE (16UL << 20)
+#define BOOTSTRAP_MAP_LIMIT (1UL << L3_PAGETABLE_SHIFT)
+
/*
* Ensure a given physical memory range is present in the bootstrap mappings.
* Use superpage mappings to ensure that pagetable memory needn't be allocated.
*/
-static void __init bootstrap_map(unsigned long start, unsigned long end)
+static void *__init bootstrap_map(const module_t *mod)
{
- unsigned long mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
- start = max_t(unsigned long, start & ~mask, 16UL << 20);
- end = (end + mask) & ~mask;
+ static unsigned long __initdata map_cur = BOOTSTRAP_MAP_BASE;
+ uint64_t start, end, mask = (1L << L2_PAGETABLE_SHIFT) - 1;
+ void *ret;
+
+#ifdef __x86_64__
+ if ( !early_boot )
+ return mod ? mfn_to_virt(mod->mod_start) : NULL;
+#endif
+
+ if ( !mod )
+ {
+ destroy_xen_mappings(BOOTSTRAP_MAP_BASE, BOOTSTRAP_MAP_LIMIT);
+ map_cur = BOOTSTRAP_MAP_BASE;
+ return NULL;
+ }
+
+ start = (uint64_t)mod->mod_start << PAGE_SHIFT;
+ end = start + mod->mod_end;
if ( start >= end )
- return;
- if ( end > BOOTSTRAP_DIRECTMAP_END )
- panic("Cannot access memory beyond end of "
- "bootstrap direct-map area\n");
- map_pages_to_xen(
- (unsigned long)maddr_to_bootstrap_virt(start),
- start >> PAGE_SHIFT, (end-start) >> PAGE_SHIFT, PAGE_HYPERVISOR);
+ return NULL;
+
+ if ( end <= BOOTSTRAP_MAP_BASE )
+ return (void *)(unsigned long)start;
+
+ ret = (void *)(map_cur + (unsigned long)(start & mask));
+ start &= ~mask;
+ end = (end + mask) & ~mask;
+ if ( end - start > BOOTSTRAP_MAP_LIMIT - map_cur )
+ return NULL;
+
+ map_pages_to_xen(map_cur, start >> PAGE_SHIFT,
+ (end - start) >> PAGE_SHIFT, PAGE_HYPERVISOR);
+ map_cur += end - start;
+ return ret;
}
-static void __init move_memory(
- unsigned long dst, unsigned long src_start, unsigned long src_end)
+static void *__init move_memory(
+ uint64_t dst, uint64_t src, unsigned int size, bool_t keep)
{
- bootstrap_map(src_start, src_end);
- bootstrap_map(dst, dst + src_end - src_start);
- memmove(maddr_to_bootstrap_virt(dst),
- maddr_to_bootstrap_virt(src_start),
- src_end - src_start);
+ unsigned int blksz = BOOTSTRAP_MAP_LIMIT - BOOTSTRAP_MAP_BASE;
+ unsigned int mask = (1L << L2_PAGETABLE_SHIFT) - 1;
+
+ if ( src + size > BOOTSTRAP_MAP_BASE )
+ blksz >>= 1;
+
+ while ( size )
+ {
+ module_t mod;
+ unsigned int soffs = src & mask;
+ unsigned int doffs = dst & mask;
+ unsigned int sz;
+ void *d, *s;
+
+ mod.mod_start = (src - soffs) >> PAGE_SHIFT;
+ mod.mod_end = soffs + size;
+ if ( mod.mod_end > blksz )
+ mod.mod_end = blksz;
+ sz = mod.mod_end - soffs;
+ s = bootstrap_map(&mod);
+
+ mod.mod_start = (dst - doffs) >> PAGE_SHIFT;
+ mod.mod_end = doffs + size;
+ if ( mod.mod_end > blksz )
+ mod.mod_end = blksz;
+ if ( sz > mod.mod_end - doffs )
+ sz = mod.mod_end - doffs;
+ d = bootstrap_map(&mod);
+
+ memmove(d + doffs, s + soffs, sz);
+
+ dst += sz;
+ src += sz;
+ size -= sz;
+
+ if ( keep )
+ return size ? NULL : d + doffs;
+
+ bootstrap_map(NULL);
+ }
+
+ return NULL;
+}
+
+static uint64_t __init consider_modules(
+ uint64_t s, uint64_t e, uint32_t size, const module_t *mod,
+ unsigned int nr_mods, unsigned int this_mod)
+{
+ unsigned int i;
+
+ if ( s > e || e - s < size )
+ return 0;
+
+ for ( i = 0; i < nr_mods ; ++i )
+ {
+ uint64_t start = (uint64_t)mod[i].mod_start << PAGE_SHIFT;
+ uint64_t end = start + PAGE_ALIGN(mod[i].mod_end);
+
+ if ( i == this_mod )
+ continue;
+
+ if ( s < end && start < e )
+ {
+ end = consider_modules(end, e, size, mod + i + 1,
+ nr_mods - i - 1, this_mod - i - 1);
+ if ( end )
+ return end;
+
+ return consider_modules(s, start, size, mod + i + 1,
+ nr_mods - i - 1, this_mod - i - 1);
+ }
+ }
+
+ return e;
}
static void __init setup_max_pdx(void)
@@ -447,11 +547,10 @@ void __init __start_xen(unsigned long mb
{
char *memmap_type = NULL;
char *cmdline, *kextra, *loader;
- unsigned long _initrd_start = 0, _initrd_len = 0;
unsigned int initrdidx = 1;
multiboot_info_t *mbi = __va(mbi_p);
module_t *mod = (module_t *)__va(mbi->mods_addr);
- unsigned long nr_pages, modules_length, modules_headroom;
+ unsigned long nr_pages, modules_headroom;
int i, j, e820_warn = 0, bytes = 0;
bool_t acpi_boot_table_init_done = 0;
struct ns16550_defaults ns16550 = {
@@ -647,6 +746,9 @@ void __init __start_xen(unsigned long mb
set_kexec_crash_area_size((u64)nr_pages << PAGE_SHIFT);
kexec_reserve_area(&boot_e820);
+ initial_images = mod;
+ nr_initial_images = mbi->mods_count;
+
/*
* Iterate backwards over all superpage-aligned RAM regions.
*
@@ -660,48 +762,64 @@ void __init __start_xen(unsigned long mb
* we can relocate the dom0 kernel and other multiboot modules. Also, on
* x86/64, we relocate Xen to higher memory.
*/
- modules_length = 0;
for ( i = 0; i < mbi->mods_count; i++ )
- modules_length += mod[i].mod_end - mod[i].mod_start;
+ {
+ if ( mod[i].mod_start & (PAGE_SIZE - 1) )
+ EARLY_FAIL("Bootloader didn't honor module alignment request.\n");
+ mod[i].mod_end -= mod[i].mod_start;
+ mod[i].mod_start >>= PAGE_SHIFT;
+ mod[i].reserved = 0;
+ }
- /* ensure mod[0] is mapped before parsing */
- bootstrap_map(mod[0].mod_start, mod[0].mod_end);
- modules_headroom = bzimage_headroom(
- (char *)(unsigned long)mod[0].mod_start,
- (unsigned long)(mod[0].mod_end - mod[0].mod_start));
+ modules_headroom = bzimage_headroom(bootstrap_map(mod), mod->mod_end);
+ bootstrap_map(NULL);
for ( i = boot_e820.nr_map-1; i >= 0; i-- )
{
uint64_t s, e, mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
+ uint64_t end, limit = ARRAY_SIZE(l2_identmap) << L2_PAGETABLE_SHIFT;
- /* Superpage-aligned chunks from 16MB to BOOTSTRAP_DIRECTMAP_END. */
+ /* Superpage-aligned chunks from BOOTSTRAP_MAP_BASE. */
s = (boot_e820.map[i].addr + mask) & ~mask;
e = (boot_e820.map[i].addr + boot_e820.map[i].size) & ~mask;
- s = max_t(uint64_t, s, 16 << 20);
- e = min_t(uint64_t, e, BOOTSTRAP_DIRECTMAP_END);
+ s = max_t(uint64_t, s, BOOTSTRAP_MAP_BASE);
if ( (boot_e820.map[i].type != E820_RAM) || (s >= e) )
continue;
- set_pdx_range(s >> PAGE_SHIFT, e >> PAGE_SHIFT);
-
- /* Map the chunk. No memory will need to be allocated to do this. */
- map_pages_to_xen(
- (unsigned long)maddr_to_bootstrap_virt(s),
- s >> PAGE_SHIFT, (e-s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
+ if ( s < limit )
+ {
+ end = min(e, limit);
+ set_pdx_range(s >> PAGE_SHIFT, end >> PAGE_SHIFT);
+#ifdef CONFIG_X86_64
+ map_pages_to_xen((unsigned long)__va(s), s >> PAGE_SHIFT,
+ (end - s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
+#endif
+ }
#if defined(CONFIG_X86_64)
+ e = min_t(uint64_t, e, 1ULL << (PAGE_SHIFT + 32));
#define reloc_size ((__pa(&_end) + mask) & ~mask)
/* Is the region suitable for relocating Xen? */
- if ( !xen_phys_start && ((e-s) >= reloc_size) )
+ if ( !xen_phys_start && e <= limit )
+ {
+ /* Don't overlap with modules. */
+ end = consider_modules(s, e, reloc_size + mask,
+ mod, mbi->mods_count, -1);
+ end &= ~mask;
+ }
+ else
+ end = 0;
+ if ( end > s )
{
extern l2_pgentry_t l2_xenmap[];
l4_pgentry_t *pl4e;
l3_pgentry_t *pl3e;
l2_pgentry_t *pl2e;
int i, j, k;
+ void *dst;
/* Select relocation address. */
- e -= reloc_size;
+ e = end - reloc_size;
xen_phys_start = e;
bootsym(trampoline_xen_phys_start) = e;
@@ -712,10 +830,10 @@ void __init __start_xen(unsigned long mb
* data until after we have switched to the relocated pagetables!
*/
barrier();
- move_memory(e, 0, __pa(&_end) - xen_phys_start);
+ dst = move_memory(e, 0, (unsigned long)&_end - XEN_VIRT_START, 1);
/* Poison low 1MB to detect stray pointers to physical 0-1MB. */
- memset(maddr_to_bootstrap_virt(e), 0x55, 1U<<20);
+ memset(dst, 0x55, 1U << 20);
/* Walk initial pagetables, relocating page directory entries. */
pl4e = __va(__pa(idle_pg_table));
@@ -772,38 +890,58 @@ void __init __start_xen(unsigned long mb
"movq %%rsi,%%cr4 " /* CR4.PGE == 1 */
: : "r" (__pa(idle_pg_table)), "S" (cpu0_stack),
"D" (__va(__pa(cpu0_stack))), "c" (STACK_SIZE) : "memory" );
+
+ bootstrap_map(NULL);
}
#endif
/* Is the region suitable for relocating the multiboot modules? */
- if ( !initial_images_start && (s < e) &&
- ((e-s) >= (modules_length+modules_headroom)) )
+ for ( j = mbi->mods_count - 1; j >= 0; j-- )
{
- initial_images_end = e;
- initial_images_start = initial_images_end - modules_length;
- initial_images_base = initial_images_start - modules_headroom;
- initial_images_base &= PAGE_MASK;
- for ( j = mbi->mods_count-1; j >= 0; j-- )
+ unsigned long headroom = j ? 0 : modules_headroom;
+ unsigned long size = PAGE_ALIGN(headroom + mod[j].mod_end);
+
+ if ( mod[j].reserved )
+ continue;
+
+ /* Don't overlap with other modules. */
+ end = consider_modules(s, e, size, mod, mbi->mods_count, j);
+
+ if ( s < end &&
+ (headroom ||
+ ((end - size) >> PAGE_SHIFT) > mod[j].mod_start) )
{
- e -= mod[j].mod_end - mod[j].mod_start;
- move_memory(e, mod[j].mod_start, mod[j].mod_end);
- mod[j].mod_end += e - mod[j].mod_start;
- mod[j].mod_start = e;
+ move_memory(end - size + headroom,
+ (uint64_t)mod[j].mod_start << PAGE_SHIFT,
+ mod[j].mod_end, 0);
+ mod[j].mod_start = (end - size) >> PAGE_SHIFT;
+ mod[j].mod_end += headroom;
+ mod[j].reserved = 1;
}
- e = initial_images_base;
}
- if ( !kexec_crash_area.start && (s < e) &&
- ((e-s) >= kexec_crash_area.size) )
+#ifdef CONFIG_X86_32
+ /* Confine the kexec area to below 4Gb. */
+ e = min_t(uint64_t, e, 1ULL << 32);
+#endif
+ /* Don't overlap with modules. */
+ e = consider_modules(s, e, PAGE_ALIGN(kexec_crash_area.size),
+ mod, mbi->mods_count, -1);
+ if ( !kexec_crash_area.start && (s < e) )
{
e = (e - kexec_crash_area.size) & PAGE_MASK;
kexec_crash_area.start = e;
}
}
- if ( !initial_images_start )
+ if ( modules_headroom && !mod->reserved )
EARLY_FAIL("Not enough memory to relocate the dom0 kernel image.\n");
- reserve_e820_ram(&boot_e820, initial_images_base, initial_images_end);
+ for ( i = 0; i < mbi->mods_count; ++i )
+ {
+ uint64_t s = (uint64_t)mod[i].mod_start << PAGE_SHIFT;
+
+ reserve_e820_ram(&boot_e820, s, s + PAGE_ALIGN(mod[i].mod_end));
+ }
#if defined(CONFIG_X86_32)
xenheap_initial_phys_start = (PFN_UP(__pa(&_end)) + 1) << PAGE_SHIFT;
@@ -827,7 +965,10 @@ void __init __start_xen(unsigned long mb
*/
for ( i = 0; i < boot_e820.nr_map; i++ )
{
- uint64_t s, e, map_s, map_e, mask = PAGE_SIZE - 1;
+ uint64_t s, e, mask = PAGE_SIZE - 1;
+#ifdef CONFIG_X86_64
+ uint64_t map_s, map_e;
+#endif
/* Only page alignment required now. */
s = (boot_e820.map[i].addr + mask) & ~mask;
@@ -842,7 +983,7 @@ void __init __start_xen(unsigned long mb
#ifdef __x86_64__
if ( !acpi_boot_table_init_done &&
- s >= BOOTSTRAP_DIRECTMAP_END &&
+ s >= (1ULL << 32) &&
!acpi_boot_table_init() )
{
acpi_boot_table_init_done = 1;
@@ -881,26 +1022,60 @@ void __init __start_xen(unsigned long mb
set_pdx_range(s >> PAGE_SHIFT, e >> PAGE_SHIFT);
- /* Need to create mappings above 16MB. */
- map_s = max_t(uint64_t, s, 16<<20);
- map_e = e;
-#if defined(CONFIG_X86_32) /* mappings are truncated on x86_32 */
- map_e = min_t(uint64_t, map_e, BOOTSTRAP_DIRECTMAP_END);
-#endif
+#ifdef CONFIG_X86_64
+ /* Need to create mappings above BOOTSTRAP_MAP_BASE. */
+ map_s = max_t(uint64_t, s, BOOTSTRAP_MAP_BASE);
+ map_e = min_t(uint64_t, e,
+ ARRAY_SIZE(l2_identmap) << L2_PAGETABLE_SHIFT);
/* Pass mapped memory to allocator /before/ creating new mappings. */
- init_boot_pages(s, min_t(uint64_t, map_s, e));
+ init_boot_pages(s, min(map_s, e));
+ s = map_s;
+ if ( s < map_e )
+ {
+ uint64_t mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
+
+ map_s = (s + mask) & ~mask;
+ map_e &= ~mask;
+ init_boot_pages(map_s, map_e);
+ }
+
+ if ( map_s > map_e )
+ map_s = map_e = s;
/* Create new mappings /before/ passing memory to the allocator. */
- if ( map_s < map_e )
- map_pages_to_xen(
- (unsigned long)maddr_to_bootstrap_virt(map_s),
- map_s >> PAGE_SHIFT, (map_e-map_s) >> PAGE_SHIFT,
- PAGE_HYPERVISOR);
+ if ( map_e < e )
+ {
+ map_pages_to_xen((unsigned long)__va(map_e), map_e >> PAGE_SHIFT,
+ (e - map_e) >> PAGE_SHIFT, PAGE_HYPERVISOR);
+ init_boot_pages(map_e, e);
+ }
+ if ( s < map_s )
+ {
+ map_pages_to_xen((unsigned long)__va(s), s >> PAGE_SHIFT,
+ (map_s - s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
+ init_boot_pages(s, map_s);
+ }
+#else
+ init_boot_pages(s, e);
+#endif
+ }
- /* Pass remainder of this memory chunk to the allocator. */
- init_boot_pages(map_s, e);
+ for ( i = 0; i < mbi->mods_count; ++i )
+ {
+ set_pdx_range(mod[i].mod_start,
+ mod[i].mod_start + PFN_UP(mod[i].mod_end));
+#ifdef CONFIG_X86_64
+ map_pages_to_xen((unsigned long)mfn_to_virt(mod[i].mod_start),
+ mod[i].mod_start,
+ PFN_UP(mod[i].mod_end), PAGE_HYPERVISOR);
+#endif
}
+#ifdef CONFIG_X86_64
+ map_pages_to_xen((unsigned long)__va(kexec_crash_area.start),
+ kexec_crash_area.start >> PAGE_SHIFT,
+ PFN_UP(kexec_crash_area.size), PAGE_HYPERVISOR);
+#endif
memguard_init();
@@ -1023,7 +1198,7 @@ void __init __start_xen(unsigned long mb
init_IRQ();
- xsm_init(&initrdidx, mbi, initial_images_start);
+ xsm_init(&initrdidx, mbi, bootstrap_map);
timer_init();
@@ -1135,12 +1310,6 @@ void __init __start_xen(unsigned long mb
cmdline = dom0_cmdline;
}
- if ( (initrdidx > 0) && (initrdidx < mbi->mods_count) )
- {
- _initrd_start = mod[initrdidx].mod_start;
- _initrd_len = mod[initrdidx].mod_end - mod[initrdidx].mod_start;
- }
-
if ( xen_cpuidle )
xen_processor_pmbits |= XEN_PROCESSOR_PM_CX;
@@ -1148,13 +1317,10 @@ void __init __start_xen(unsigned long mb
* We're going to setup domain0 using the module(s) that we stashed safely
* above our heap. The second module, if present, is an initrd ramdisk.
*/
- if ( construct_dom0(dom0,
- initial_images_base,
- initial_images_start,
- mod[0].mod_end-mod[0].mod_start,
- _initrd_start,
- _initrd_len,
- cmdline) != 0)
+ if ( construct_dom0(dom0, mod, modules_headroom,
+ (initrdidx > 0) && (initrdidx < mbi->mods_count)
+ ? mod + initrdidx : NULL,
+ bootstrap_map, cmdline) != 0)
panic("Could not set up DOM0 guest OS\n");
/* Scrub RAM that is still free and so may go to an unprivileged domain. */
--- 2010-11-09.orig/xen/arch/x86/x86_64/mm.c 2010-09-06 08:21:15.000000000
+0200
+++ 2010-11-09/xen/arch/x86/x86_64/mm.c 2010-11-09 10:30:06.000000000 +0100
@@ -65,6 +65,12 @@ l3_pgentry_t __attribute__ ((__section__
l2_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
l2_xenmap[L2_PAGETABLE_ENTRIES];
+/* Enough page directories to map into the bottom 1GB. */
+l3_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
+ l3_bootmap[L3_PAGETABLE_ENTRIES];
+l2_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
+ l2_bootmap[L2_PAGETABLE_ENTRIES];
+
int __mfn_valid(unsigned long mfn)
{
return likely(mfn < max_page) &&
--- 2010-11-09.orig/xen/include/asm-x86/domain.h 2010-11-05
09:22:58.000000000 +0100
+++ 2010-11-09/xen/include/asm-x86/domain.h 2010-11-09 10:30:06.000000000
+0100
@@ -466,16 +466,6 @@ void domain_cpuid(struct domain *d,
unsigned int *ecx,
unsigned int *edx);
-int construct_dom0(
- struct domain *d,
- unsigned long image_base,
- unsigned long image_start, unsigned long image_len,
- unsigned long initrd_start, unsigned long initrd_len,
- char *cmdline);
-
-extern unsigned long initial_images_nrpages(void);
-extern void discard_initial_images(void);
-
#endif /* __ASM_DOMAIN_H__ */
/*
--- 2010-11-09.orig/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
2010-06-01 13:39:57.000000000 +0200
+++ 2010-11-09/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h 2010-11-09
10:30:06.000000000 +0100
@@ -30,7 +30,6 @@
&amd_iommu_head, list)
#define DMA_32BIT_MASK 0x00000000ffffffffULL
-#define PAGE_ALIGN(addr) (((addr) + PAGE_SIZE - 1) & PAGE_MASK)
extern int amd_iommu_debug;
extern int amd_iommu_perdev_intremap;
--- 2010-11-09.orig/xen/include/asm-x86/page.h 2010-05-28 13:59:16.000000000
+0200
+++ 2010-11-09/xen/include/asm-x86/page.h 2010-11-09 10:30:06.000000000
+0100
@@ -302,6 +302,7 @@ extern l2_pgentry_t idle_pg_table_l2[
extern l2_pgentry_t *compat_idle_pg_table_l2;
extern unsigned int m2p_compat_vstart;
#endif
+extern l2_pgentry_t l2_identmap[4*L2_PAGETABLE_ENTRIES];
void paging_init(void);
void setup_idle_pagetable(void);
#endif /* !defined(__ASSEMBLY__) */
@@ -397,6 +398,7 @@ static inline uint32_t cacheattr_to_pte_
#define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
+#define PAGE_ALIGN(x) (((x) + PAGE_SIZE - 1) & PAGE_MASK)
#endif /* __X86_PAGE_H__ */
--- 2010-11-09.orig/xen/include/asm-x86/setup.h 2010-05-20 09:59:27.000000000
+0200
+++ 2010-11-09/xen/include/asm-x86/setup.h 2010-11-09 10:30:06.000000000
+0100
@@ -1,6 +1,8 @@
#ifndef __X86_SETUP_H_
#define __X86_SETUP_H_
+#include <xen/multiboot.h>
+
extern int early_boot;
extern unsigned long xenheap_initial_phys_start;
@@ -25,4 +27,14 @@ void init_IRQ(void);
void vesa_init(void);
void vesa_mtrr_init(void);
+int construct_dom0(
+ struct domain *d,
+ const module_t *kernel, unsigned long kernel_headroom,
+ const module_t *initrd,
+ void *(*bootstrap_map)(const module_t *),
+ char *cmdline);
+
+unsigned long initial_images_nrpages(void);
+void discard_initial_images(void);
+
#endif
--- 2010-11-09.orig/xen/include/xsm/xsm.h 2010-11-09 08:25:42.000000000
+0100
+++ 2010-11-09/xen/include/xsm/xsm.h 2010-11-09 10:30:06.000000000 +0100
@@ -432,14 +432,15 @@ static inline long __do_xsm_op (XEN_GUES
#ifdef XSM_ENABLE
extern int xsm_init(unsigned int *initrdidx, const multiboot_info_t *mbi,
- unsigned long initial_images_start);
+ void *(*bootstrap_map)(const module_t *));
extern int xsm_policy_init(unsigned int *initrdidx, const multiboot_info_t
*mbi,
- unsigned long initial_images_start);
+ void *(*bootstrap_map)(const module_t *));
extern int register_xsm(struct xsm_operations *ops);
extern int unregister_xsm(struct xsm_operations *ops);
#else
static inline int xsm_init (unsigned int *initrdidx,
- const multiboot_info_t *mbi, unsigned long
initial_images_start)
+ const multiboot_info_t *mbi,
+ void *(*bootstrap_map)(const module_t *))
{
return 0;
}
--- 2010-11-09.orig/xen/xsm/xsm_core.c 2009-10-07 13:31:36.000000000 +0200
+++ 2010-11-09/xen/xsm/xsm_core.c 2010-11-09 10:30:06.000000000 +0100
@@ -47,7 +47,7 @@ static void __init do_xsm_initcalls(void
}
int __init xsm_init(unsigned int *initrdidx, const multiboot_info_t *mbi,
- unsigned long initial_images_start)
+ void *(*bootstrap_map)(const module_t *))
{
int ret = 0;
@@ -55,9 +55,10 @@ int __init xsm_init(unsigned int *initrd
if ( XSM_MAGIC )
{
- ret = xsm_policy_init(initrdidx, mbi, initial_images_start);
+ ret = xsm_policy_init(initrdidx, mbi, bootstrap_map);
if ( ret )
{
+ bootstrap_map(NULL);
printk("%s: Error initializing policy.\n", __FUNCTION__);
return -EINVAL;
}
@@ -65,6 +66,7 @@ int __init xsm_init(unsigned int *initrd
if ( verify(&dummy_xsm_ops) )
{
+ bootstrap_map(NULL);
printk("%s could not verify "
"dummy_xsm_ops structure.\n", __FUNCTION__);
return -EIO;
@@ -72,6 +74,7 @@ int __init xsm_init(unsigned int *initrd
xsm_ops = &dummy_xsm_ops;
do_xsm_initcalls();
+ bootstrap_map(NULL);
return 0;
}
--- 2010-11-09.orig/xen/xsm/xsm_policy.c 2007-11-02 17:25:59.000000000
+0100
+++ 2010-11-09/xen/xsm/xsm_policy.c 2010-11-09 10:30:06.000000000 +0100
@@ -22,11 +22,11 @@
#include <xsm/xsm.h>
#include <xen/multiboot.h>
-char *policy_buffer = NULL;
-u32 policy_size = 0;
+char *__initdata policy_buffer = NULL;
+u32 __initdata policy_size = 0;
int xsm_policy_init(unsigned int *initrdidx, const multiboot_info_t *mbi,
- unsigned long initial_images_start)
+ void *(*bootstrap_map)(const module_t *))
{
int i;
module_t *mod = (module_t *)__va(mbi->mods_addr);
@@ -40,15 +40,8 @@ int xsm_policy_init(unsigned int *initrd
*/
for ( i = mbi->mods_count-1; i >= 1; i-- )
{
- start = initial_images_start + (mod[i].mod_start-mod[0].mod_start);
-#if defined(__i386__)
- _policy_start = (u32 *)start;
-#elif defined(__x86_64__)
- _policy_start = maddr_to_virt(start);
-#else
- _policy_start = NULL;
-#endif
- _policy_len = mod[i].mod_end - mod[i].mod_start;
+ _policy_start = bootstrap_map(mod + i);
+ _policy_len = mod[i].mod_end;
if ( (xsm_magic_t)(*_policy_start) == XSM_MAGIC )
{
@@ -63,6 +56,8 @@ int xsm_policy_init(unsigned int *initrd
break;
}
+
+ bootstrap_map(NULL);
}
return rc;
x86-custom-boot-map.patch
Description: Text document
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|