WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] x86: allow passing initrd to kernel witho

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] x86: allow passing initrd to kernel without exposing it through the initial mapping
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Tue, 09 Nov 2010 20:10:17 -0800
Delivery-date: Tue, 09 Nov 2010 20:12:10 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1289303414 0
# Node ID 7f3ccf376aad340a7a74e0e37d0ee8de2e9298db
# Parent  426f3a2657844cec77ce0043b0408b0887fafa41
x86: allow passing initrd to kernel without exposing it through the initial 
mapping

The (Dom0 only for now) kernel can indicate that it doesn't need its
initrd mapped through a newly added ELF note - it gets passed the PFN
of the initrd in this case instead of the virtual address.

Even for kernels not making use of the new feature, the initrd will
no longer get copied into the initial mapping, but the memory it lives
in will get assigned to and mapped for the guest instead.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
---
 xen/arch/x86/domain_build.c        |  116 ++++++++++++++++++++++++++++---------
 xen/common/libelf/libelf-dominfo.c |    1 
 xen/include/asm-x86/setup.h        |    2 
 xen/include/public/elfnote.h       |    8 ++
 xen/include/public/xen.h           |    5 +
 5 files changed, 102 insertions(+), 30 deletions(-)

diff -r 426f3a265784 -r 7f3ccf376aad xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Tue Nov 09 11:49:49 2010 +0000
+++ b/xen/arch/x86/domain_build.c       Tue Nov 09 11:50:14 2010 +0000
@@ -225,7 +225,9 @@ static unsigned long __init compute_dom0
         size_t sizeof_long = is_pv_32bit_domain(d) ? sizeof(int) : 
sizeof(long);
 
         vstart = parms->virt_base;
-        vend = round_pgup(parms->virt_kend) + round_pgup(initrd_len);
+        vend = round_pgup(parms->virt_kend);
+        if ( !parms->elf_notes[XEN_ELFNOTE_MOD_START_PFN].data.num )
+            vend += round_pgup(initrd_len);
         end = vend + nr_pages * sizeof_long;
 
         if ( end > vstart )
@@ -286,7 +288,7 @@ int __init construct_dom0(
 int __init construct_dom0(
     struct domain *d,
     const module_t *image, unsigned long image_headroom,
-    const module_t *initrd,
+    module_t *initrd,
     void *(*bootstrap_map)(const module_t *),
     char *cmdline)
 {
@@ -297,6 +299,7 @@ int __init construct_dom0(
     unsigned long nr_pt_pages;
     unsigned long alloc_spfn;
     unsigned long alloc_epfn;
+    unsigned long initrd_pfn = -1, initrd_mfn = 0;
     unsigned long count;
     struct page_info *page = NULL;
     start_info_t *si;
@@ -449,9 +452,17 @@ int __init construct_dom0(
     v_start          = parms.virt_base;
     vkern_start      = parms.virt_kstart;
     vkern_end        = parms.virt_kend;
-    vinitrd_start    = round_pgup(vkern_end);
-    vinitrd_end      = vinitrd_start + initrd_len;
-    vphysmap_start   = round_pgup(vinitrd_end);
+    if ( parms.elf_notes[XEN_ELFNOTE_MOD_START_PFN].data.num )
+    {
+        vinitrd_start  = vinitrd_end = 0;
+        vphysmap_start = round_pgup(vkern_end);
+    }
+    else
+    {
+        vinitrd_start  = round_pgup(vkern_end);
+        vinitrd_end    = vinitrd_start + initrd_len;
+        vphysmap_start = round_pgup(vinitrd_end);
+    }
     vphysmap_end     = vphysmap_start + (nr_pages * (!is_pv_32on64_domain(d) ?
                                                      sizeof(unsigned long) :
                                                      sizeof(unsigned int)));
@@ -490,8 +501,11 @@ int __init construct_dom0(
 #endif
     }
 
-    order = get_order_from_bytes(v_end - v_start);
-    if ( (1UL << order) > nr_pages )
+    count = v_end - v_start;
+    if ( vinitrd_start )
+        count -= PAGE_ALIGN(initrd_len);
+    order = get_order_from_bytes(count);
+    if ( (1UL << order) + PFN_UP(initrd_len) > nr_pages )
         panic("Domain 0 allocation is too small for kernel image.\n");
 
     if ( parms.p2m_base != UNSET_ADDR )
@@ -509,6 +523,42 @@ int __init construct_dom0(
         panic("Not enough RAM for domain 0 allocation.\n");
     alloc_spfn = page_to_mfn(page);
     alloc_epfn = alloc_spfn + d->tot_pages;
+
+    if ( initrd_len )
+    {
+        initrd_pfn = vinitrd_start ?
+                     (vinitrd_start - v_start) >> PAGE_SHIFT :
+                     d->tot_pages;
+        initrd_mfn = mfn = initrd->mod_start;
+        count = PFN_UP(initrd_len);
+#ifdef __x86_64__
+        if ( d->arch.physaddr_bitsize &&
+             ((mfn + count - 1) >> (d->arch.physaddr_bitsize - PAGE_SHIFT)) )
+        {
+            order = get_order_from_pages(count);
+            page = alloc_domheap_pages(d, order, 0);
+            if ( !page )
+                panic("Not enough RAM for domain 0 initrd.\n");
+            for ( count = -count; order--; )
+                if ( count & (1UL << order) )
+                {
+                    free_domheap_pages(page, order);
+                    page += 1UL << order;
+                }
+            memcpy(page_to_virt(page), mfn_to_virt(initrd->mod_start),
+                   initrd_len);
+            mpt_alloc = (paddr_t)initrd->mod_start << PAGE_SHIFT;
+            init_domheap_pages(mpt_alloc,
+                               mpt_alloc + PAGE_ALIGN(initrd_len));
+            initrd->mod_start = initrd_mfn = page_to_mfn(page);
+        }
+        else
+#endif
+            while ( count-- )
+                if ( assign_pages(d, mfn_to_page(mfn++), 0, 0) )
+                    BUG();
+        initrd->mod_end = 0;
+    }
 
     printk("PHYSICAL MEMORY ARRANGEMENT:\n"
            " Dom0 alloc.:   %"PRIpaddr"->%"PRIpaddr,
@@ -516,6 +566,12 @@ int __init construct_dom0(
     if ( d->tot_pages < nr_pages )
         printk(" (%lu pages to be allocated)",
                nr_pages - d->tot_pages);
+    if ( initrd )
+    {
+        mpt_alloc = (paddr_t)initrd->mod_start << PAGE_SHIFT;
+        printk("\n Init. ramdisk: %"PRIpaddr"->%"PRIpaddr,
+               mpt_alloc, mpt_alloc + initrd_len);
+    }
     printk("\nVIRTUAL MEMORY ARRANGEMENT:\n"
            " Loaded kernel: %p->%p\n"
            " Init. ramdisk: %p->%p\n"
@@ -534,6 +590,8 @@ int __init construct_dom0(
     printk(" ENTRY ADDRESS: %p\n", _p(parms.virt_entry));
 
     mpt_alloc = (vpt_start - v_start) + pfn_to_paddr(alloc_spfn);
+    if ( vinitrd_start )
+        mpt_alloc -= PAGE_ALIGN(initrd_len);
 
 #if defined(__i386__)
     /*
@@ -571,7 +629,7 @@ int __init construct_dom0(
             l2e_from_page(perdomain_pt_page(d, i), __PAGE_HYPERVISOR);
 
     l2tab += l2_linear_offset(v_start);
-    mfn = alloc_spfn;
+    pfn = alloc_spfn;
     for ( count = 0; count < ((v_end-v_start)>>PAGE_SHIFT); count++ )
     {
         if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
@@ -584,14 +642,16 @@ int __init construct_dom0(
             if ( count == 0 )
                 l1tab += l1_table_offset(v_start);
         }
+        if ( count < initrd_pfn || count >= initrd_pfn + PFN_UP(initrd_len) )
+            mfn = pfn++;
+        else
+            mfn = initrd_mfn++;
         *l1tab = l1e_from_pfn(mfn, L1_PROT);
         l1tab++;
         
         page = mfn_to_page(mfn);
         if ( !get_page_and_type(page, d, PGT_writable_page) )
             BUG();
-
-        mfn++;
     }
 #undef MPT_ALLOC
 
@@ -688,7 +748,7 @@ int __init construct_dom0(
         v->arch.guest_table_user = v->arch.guest_table;
 
     l4tab += l4_table_offset(v_start);
-    mfn = alloc_spfn;
+    pfn = alloc_spfn;
     for ( count = 0; count < ((v_end-v_start)>>PAGE_SHIFT); count++ )
     {
         if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
@@ -722,6 +782,10 @@ int __init construct_dom0(
             *l2tab = l2e_from_paddr(__pa(l1start), L2_PROT);
             l2tab++;
         }
+        if ( count < initrd_pfn || count >= initrd_pfn + PFN_UP(initrd_len) )
+            mfn = pfn++;
+        else
+            mfn = initrd_mfn++;
         *l1tab = l1e_from_pfn(mfn, (!is_pv_32on64_domain(d) ?
                                     L1_PROT : COMPAT_L1_PROT));
         l1tab++;
@@ -730,8 +794,6 @@ int __init construct_dom0(
         if ( (page->u.inuse.type_info == 0) &&
              !get_page_and_type(page, d, PGT_writable_page) )
             BUG();
-
-        mfn++;
     }
 
     if ( is_pv_32on64_domain(d) )
@@ -837,15 +899,6 @@ int __init construct_dom0(
             d, (void *)(unsigned long)parms.virt_hypercall);
     }
 
-    /* Copy the initial ramdisk. */
-    if ( initrd_len != 0 )
-    {
-        char *initrd_start = bootstrap_map(initrd);
-
-        memcpy((void *)vinitrd_start, initrd_start, initrd_len);
-        bootstrap_map(NULL);
-    }
-
     /* Free temporary buffers. */
     discard_initial_images();
 
@@ -857,6 +910,8 @@ int __init construct_dom0(
     si->shared_info = virt_to_maddr(d->shared_info);
 
     si->flags        = SIF_PRIVILEGED | SIF_INITDOMAIN;
+    if ( !vinitrd_start && initrd_len )
+        si->flags   |= SIF_MOD_START_PFN;
     si->flags       |= (xen_processor_pmbits << 8) & SIF_PM_MASK;
     si->pt_base      = vpt_start + 2 * PAGE_SIZE * !!is_pv_32on64_domain(d);
     si->nr_pt_frames = nr_pt_pages;
@@ -971,9 +1026,16 @@ int __init construct_dom0(
     for ( pfn = 0; pfn < count; pfn++ )
     {
         mfn = pfn + alloc_spfn;
+        if ( pfn >= initrd_pfn )
+        {
+            if ( pfn < initrd_pfn + PFN_UP(initrd_len) )
+                mfn = initrd->mod_start + (pfn - initrd_pfn);
+            else
+                mfn -= PFN_UP(initrd_len);
+        }
 #ifndef NDEBUG
 #define REVERSE_START ((v_end - v_start) >> PAGE_SHIFT)
-        if ( pfn > REVERSE_START )
+        if ( pfn > REVERSE_START && (vinitrd_start || pfn < initrd_pfn) )
             mfn = alloc_epfn - (pfn - REVERSE_START);
 #endif
         if ( !is_pv_32on64_domain(d) )
@@ -999,14 +1061,14 @@ int __init construct_dom0(
             ((unsigned long *)vphysmap_start)[pfn] = mfn;
             set_gpfn_from_mfn(mfn, pfn);
             ++pfn;
-#ifndef NDEBUG
-            ++alloc_epfn;
-#endif
             if (!(pfn & 0xfffff))
                 process_pending_softirqs();
         }
     }
     BUG_ON(pfn != d->tot_pages);
+#ifndef NDEBUG
+    alloc_epfn += PFN_UP(initrd_len) + si->nr_p2m_frames;
+#endif
     while ( pfn < nr_pages )
     {
         if ( (page = alloc_chunk(d, nr_pages - d->tot_pages)) == NULL )
@@ -1031,7 +1093,7 @@ int __init construct_dom0(
 
     if ( initrd_len != 0 )
     {
-        si->mod_start = vinitrd_start;
+        si->mod_start = vinitrd_start ?: initrd_pfn;
         si->mod_len   = initrd_len;
     }
 
diff -r 426f3a265784 -r 7f3ccf376aad xen/common/libelf/libelf-dominfo.c
--- a/xen/common/libelf/libelf-dominfo.c        Tue Nov 09 11:49:49 2010 +0000
+++ b/xen/common/libelf/libelf-dominfo.c        Tue Nov 09 11:50:14 2010 +0000
@@ -115,6 +115,7 @@ int elf_xen_parse_note(struct elf_binary
         [XEN_ELFNOTE_FEATURES] = { "FEATURES", 1},
         [XEN_ELFNOTE_BSD_SYMTAB] = { "BSD_SYMTAB", 1},
         [XEN_ELFNOTE_SUSPEND_CANCEL] = { "SUSPEND_CANCEL", 0 },
+        [XEN_ELFNOTE_MOD_START_PFN] = { "MOD_START_PFN", 0 },
     };
 /* *INDENT-ON* */
 
diff -r 426f3a265784 -r 7f3ccf376aad xen/include/asm-x86/setup.h
--- a/xen/include/asm-x86/setup.h       Tue Nov 09 11:49:49 2010 +0000
+++ b/xen/include/asm-x86/setup.h       Tue Nov 09 11:50:14 2010 +0000
@@ -30,7 +30,7 @@ int construct_dom0(
 int construct_dom0(
     struct domain *d,
     const module_t *kernel, unsigned long kernel_headroom,
-    const module_t *initrd,
+    module_t *initrd,
     void *(*bootstrap_map)(const module_t *),
     char *cmdline);
 
diff -r 426f3a265784 -r 7f3ccf376aad xen/include/public/elfnote.h
--- a/xen/include/public/elfnote.h      Tue Nov 09 11:49:49 2010 +0000
+++ b/xen/include/public/elfnote.h      Tue Nov 09 11:50:14 2010 +0000
@@ -173,9 +173,15 @@
 #define XEN_ELFNOTE_INIT_P2M      15
 
 /*
+ * Whether or not the guest can deal with being passed an initrd not
+ * mapped through its initial page tables.
+ */
+#define XEN_ELFNOTE_MOD_START_PFN 16
+
+/*
  * The number of the highest elfnote defined.
  */
-#define XEN_ELFNOTE_MAX XEN_ELFNOTE_INIT_P2M
+#define XEN_ELFNOTE_MAX XEN_ELFNOTE_MOD_START_PFN
 
 /*
  * System information exported through crash notes.
diff -r 426f3a265784 -r 7f3ccf376aad xen/include/public/xen.h
--- a/xen/include/public/xen.h  Tue Nov 09 11:49:49 2010 +0000
+++ b/xen/include/public/xen.h  Tue Nov 09 11:50:14 2010 +0000
@@ -581,7 +581,9 @@ struct start_info {
     unsigned long pt_base;      /* VIRTUAL address of page directory.     */
     unsigned long nr_pt_frames; /* Number of bootstrap p.t. frames.       */
     unsigned long mfn_list;     /* VIRTUAL address of page-frame list.    */
-    unsigned long mod_start;    /* VIRTUAL address of pre-loaded module.  */
+    unsigned long mod_start;    /* VIRTUAL address of pre-loaded module   */
+                                /* (PFN of pre-loaded module if           */
+                                /*  SIF_MOD_START_PFN set in flags).      */
     unsigned long mod_len;      /* Size (bytes) of pre-loaded module.     */
     int8_t cmd_line[MAX_GUEST_CMDLINE];
     /* The pfn range here covers both page table and p->m table frames.   */
@@ -600,6 +602,7 @@ typedef struct start_info start_info_t;
 #define SIF_PRIVILEGED    (1<<0)  /* Is the domain privileged? */
 #define SIF_INITDOMAIN    (1<<1)  /* Is this the initial control domain? */
 #define SIF_MULTIBOOT_MOD (1<<2)  /* Is mod_start a multiboot module? */
+#define SIF_MOD_START_PFN (1<<3)  /* Is mod_start a PFN? */
 #define SIF_PM_MASK       (0xFF<<8) /* reserve 1 byte for xen-pm options */
 
 /*

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] x86: allow passing initrd to kernel without exposing it through the initial mapping, Xen patchbot-unstable <=