[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] x86/boot: Move/copy sections more efficiently



Both the trampoline copy and BSS initialise can be performed more
efficiently by using 4-byte variants of the string operations.

The ALIGN(STACK_SIZE) actually belongs with .bss.stack_aligned, but
__init_end still needs page alignment because of the init sections being
freed and returned to the domheap after boot.

Note concerning Intel ERMSB, which indicate that byte MOVS are
efficient.  ERMSB and non-aliased aligned MOVSD scale with identical
complexity albeit ERMSB doesn't have a small setup overhead (which falls
into the nose, given the length of the REP).  On non-ERMSB systems
however, MOVSD scales 4 times better than MOVSB.

Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
CC: Jan Beulich <JBeulich@xxxxxxxx>

---
v2: Better patch description.  No functional change.
---
 xen/arch/x86/boot/head.S |    9 +++++----
 xen/arch/x86/xen.lds.S   |    5 ++++-
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S
index f63b349..2b38048 100644
--- a/xen/arch/x86/boot/head.S
+++ b/xen/arch/x86/boot/head.S
@@ -128,7 +128,8 @@ __start:
         mov     $sym_phys(__bss_end),%ecx
         sub     %edi,%ecx
         xor     %eax,%eax
-        rep     stosb
+        shr     $2,%ecx
+        rep     stosl
 
         /* Interrogate CPU extended features via CPUID. */
         mov     $0x80000000,%eax
@@ -197,8 +198,8 @@ __start:
 
         /* Copy bootstrap trampoline to low memory, below 1MB. */
         mov     $sym_phys(trampoline_start),%esi
-        mov     $trampoline_end - trampoline_start,%ecx
-        rep     movsb
+        mov     $((trampoline_end - trampoline_start) / 4),%ecx
+        rep     movsl
 
         /* Jump into the relocated trampoline. */
         lret
@@ -210,6 +211,6 @@ reloc:
 
 ENTRY(trampoline_start)
 #include "trampoline.S"
-GLOBAL(trampoline_end)
+ENTRY(trampoline_end)
 
 #include "x86_64.S"
diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S
index 6553cff..c1180b2 100644
--- a/xen/arch/x86/xen.lds.S
+++ b/xen/arch/x86/xen.lds.S
@@ -158,11 +158,13 @@ SECTIONS
        __xsm_initcall_start = .;
        *(.xsm_initcall.init)
        __xsm_initcall_end = .;
+
+       . = ALIGN(PAGE_SIZE);
   } :text
-  . = ALIGN(STACK_SIZE);
   __init_end = .;
 
   .bss : {                     /* BSS */
+       . = ALIGN(STACK_SIZE);
        __bss_start = .;
        *(.bss.stack_aligned)
        . = ALIGN(PAGE_SIZE);
@@ -175,6 +177,7 @@ SECTIONS
        *(.bss.percpu.read_mostly)
        . = ALIGN(SMP_CACHE_BYTES);
        __per_cpu_data_end = .;
+       . = ALIGN(8);
        __bss_end = .;
   } :text
   _end = . ;
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.