[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v3 08/11] xen: arm: rewrite start of day page table and cpu bring up



On 09/27/2013 11:20 AM, Ian Campbell wrote:
> This is unfortunately a rather large monolithic patch.
> 
> Rather than bringing up all CPUs in lockstep as we setup paging and relocate
> Xen instead create a simplified set of dedicated boot time pagetables.
> 
> This allows secondary CPUs to remain powered down or in the firmware until we
> actually want to enable them. The bringup is now done later on in C and can be
> driven by DT etc. I have included code for the vexpress platform, but other
> platforms will need to be added.
> 
> The mechanism for deciding how to bring up a CPU differs between arm32 and
> arm64. On arm32 it is essentially a per-platform property, with the exception
> of PSCI which can be implemented globally (but isn't here). On arm64 there is 
> a
> per-cpu property in the device tree.
> 
> Secondary CPUs are brought up directly into the relocated Xen image, instead 
> of
> relying on being able to launch on the unrelocated Xen and hoping that it
> hasn't been clobbered.
> 
> As part of this change drop support for switching from secure mode to NS HYP 
> as
> well as the early CPU kick. Xen now requires that it is launched in NS HYP
> mode and that firmware configure things such that secondary CPUs can be woken
> up by a primarly CPU in HYP mode. This may require fixes to bootloaders or the
> use of a boot wrapper.
> 
> The changes done here (re)exposed an issue with relocating Xen and the 
> compiler
> spilling values to the stack between the copy and the actual switch to the
> relocaed copy of Xen in setup_pagetables. Therefore switch to doing the copy
> and switch in a single asm function where we can control precisely what gets
> spilled to the stack etc.
> 
> Since we now have a separate set of boot pagetables it is much easier to build
> the real Xen pagetables inplace before relocating rather than the more complex
> approach of rewriting the pagetables in the relocated copy before switching.
> 
> This will also enable Xen to be loaded above the 4GB boundary on 64-bit.

There is a conflict with this patch and your recently pushed patch
series "xen: arm: memory mangement fixes / improvements".

> Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
> ---
> v3: add a dsb before the smp_up_cpu wait loop.
>     move arch_smp_init() and arch_cpu_init() int init_cpus_maps() which I
>     rename to smp_init_cpus().
> v2: Lots of review feedback from Julien and Tim
>     Reintroduce a smp_up_cpu gate.  We cannot reliably wake a single CPU so we
>     add our own gate. However it is purely in the relocated image.
> ---
>  xen/arch/arm/arm32/Makefile              |    1 -
>  xen/arch/arm/arm32/head.S                |  391 ++++++++++++++++++-----------
>  xen/arch/arm/arm32/mode_switch.S         |  158 ------------
>  xen/arch/arm/arm64/Makefile              |    1 -
>  xen/arch/arm/arm64/head.S                |  396 
> ++++++++++++++++++++----------
>  xen/arch/arm/arm64/mode_switch.S         |   89 -------
>  xen/arch/arm/mm.c                        |  192 +++++++++------
>  xen/arch/arm/setup.c                     |   26 +-
>  xen/arch/arm/smpboot.c                   |   57 ++---
>  xen/include/asm-arm/mm.h                 |    3 +-
>  xen/include/asm-arm/platforms/exynos5.h  |   14 --
>  xen/include/asm-arm/platforms/vexpress.h |   11 -
>  xen/include/asm-arm/smp.h                |    6 -
>  13 files changed, 672 insertions(+), 673 deletions(-)
>  delete mode 100644 xen/arch/arm/arm32/mode_switch.S
>  delete mode 100644 xen/arch/arm/arm64/mode_switch.S
> 
> diff --git a/xen/arch/arm/arm32/Makefile b/xen/arch/arm/arm32/Makefile
> index 463b1f5..aacdcb9 100644
> --- a/xen/arch/arm/arm32/Makefile
> +++ b/xen/arch/arm/arm32/Makefile
> @@ -1,7 +1,6 @@
>  subdir-y += lib
>  
>  obj-y += entry.o
> -obj-y += mode_switch.o
>  obj-y += proc-v7.o
>  
>  obj-y += traps.o
> diff --git a/xen/arch/arm/arm32/head.S b/xen/arch/arm/arm32/head.S
> index 8cb31a2..946394c 100644
> --- a/xen/arch/arm/arm32/head.S
> +++ b/xen/arch/arm/arm32/head.S
> @@ -37,6 +37,25 @@
>  #include EARLY_PRINTK_INC
>  #endif
>  
> +/*
> + * Common register usage in this file:
> + *   r0  -
> + *   r1  -
> + *   r2  -
> + *   r3  -
> + *   r4  -
> + *   r5  -
> + *   r6  -
> + *   r7  - CPUID
> + *   r8  - DTB address (boot CPU only)
> + *   r9  - paddr(start)
> + *   r10 - phys offset
> + *   r11 - UART address
> + *   r12 - is_secondary_cpu
> + *   r13 - SP
> + *   r14 - LR
> + *   r15 - PC
> + */
>  /* Macro to print a string to the UART, if there is one.
>   * Clobbers r0-r3. */
>  #ifdef EARLY_PRINTK
> @@ -59,7 +78,6 @@
>           * or the initial pagetable code below will need adjustment. */
>          .global start
>  start:
> -GLOBAL(init_secondary) /* currently unused */
>          /* zImage magic header, see:
>           * 
> http://www.simtec.co.uk/products/SWLINUX/files/booting_article.html#d0e309
>           */
> @@ -77,7 +95,6 @@ past_zImage:
>          cpsid aif                    /* Disable all interrupts */
>  
>          /* Save the bootloader arguments in less-clobberable registers */
> -        mov   r5, r1                 /* r5: ARM-linux machine type */
>          mov   r8, r2                 /* r8 := DTB base address */
>  
>          /* Find out where we are */
> @@ -91,53 +108,55 @@ past_zImage:
>          add   r8, r10                /* r8 := paddr(DTB) */
>  #endif
>  
> -        /* Are we the boot CPU? */
> -        mov   r12, #0                /* r12 := CPU ID */
> -        mrc   CP32(r0, MPIDR)
> -        tst   r0, #(1<<31)           /* Multiprocessor extension supported? 
> */
> -        beq   boot_cpu
> -        tst   r0, #(1<<30)           /* Uniprocessor system? */
> -        bne   boot_cpu
> -        bics  r12, r0, #(~MPIDR_HWID_MASK) /* Mask out flags to get CPU ID */
> -        beq   boot_cpu               /* If we're CPU 0, boot now */
> -
> -        /* Non-boot CPUs wait here to be woken up one at a time. */
> -1:      dsb
> -        ldr   r0, =smp_up_cpu        /* VA of gate */
> -        add   r0, r0, r10            /* PA of gate */
> -        ldr   r1, [r0]               /* Which CPU is being booted? */
> -        teq   r1, r12                /* Is it us? */
> -        wfene
> -        bne   1b
> +        mov   r12, #0                /* r12 := is_secondary_cpu */
> +
> +        b     common_start
> +
> +GLOBAL(init_secondary)
> +        cpsid aif                    /* Disable all interrupts */
> +
> +        /* Find out where we are */
> +        ldr   r0, =start
> +        adr   r9, start              /* r9  := paddr (start) */
> +        sub   r10, r9, r0            /* r10 := phys-offset */
> +
> +        mov   r12, #1                /* r12 := is_secondary_cpu */
> +
> +common_start:
> +        mov   r7, #0                 /* r7 := CPU ID. Initialy zero until we
> +                                      * find that multiprocessor extensions 
> are
> +                                      * present and the system is SMP */
> +        mrc   CP32(r1, MPIDR)
> +        tst   r1, #(1<<31)           /* Multiprocessor extension supported? 
> */
> +        beq   1f
> +        tst   r1, #(1<<30)           /* Uniprocessor system? */
> +        bne   1f
> +        bic   r7, r1, #(~MPIDR_HWID_MASK) /* Mask out flags to get CPU ID */
> +1:
> +
> +        /* Non-boot CPUs wait here until __cpu_up is ready for them */
> +        teq   r12, #0
> +        beq   1f
> +
> +        ldr   r0, =smp_up_cpu
> +        add   r0, r0, r10            /* Apply physical offset */
> +        dsb
> +2:      ldr   r1, [r0]
> +        cmp   r1, r7
> +        beq   1f
> +        wfe
> +        b     2b
> +1:
>  
> -boot_cpu:
>  #ifdef EARLY_PRINTK
>          ldr   r11, =EARLY_UART_BASE_ADDRESS  /* r11 := UART base address */
> -        teq   r12, #0                   /* CPU 0 sets up the UART too */
> +        teq   r12, #0                /* Boot CPU sets up the UART too */
>          bleq  init_uart
>          PRINT("- CPU ")
> -        mov   r0, r12
> +        mov   r0, r7
>          bl    putn
>          PRINT(" booting -\r\n")
>  #endif
> -        /* Secondary CPUs doesn't have machine ID
> -         *  - Store machine ID on boot CPU
> -         *  - Load machine ID on secondary CPUs
> -         * Machine ID is needed in kick_cpus and enter_hyp_mode */
> -        ldr   r0, =machine_id           /* VA of machine_id */
> -        add   r0, r0, r10               /* PA of machine_id */
> -        teq   r12, #0
> -        streq r5, [r0]                  /* On boot CPU save machine ID */
> -        ldrne r5, [r0]                  /* If non boot cpu r5 := machine ID 
> */
> -
> -        /* Wake up secondary cpus */
> -        teq   r12, #0
> -        bleq  kick_cpus
> -
> -        PRINT("- Machine ID ")
> -        mov   r0, r5
> -        bl    putn
> -        PRINT(" -\r\n")
>  
>          /* Check that this CPU has Hyp mode */
>          mrc   CP32(r0, ID_PFR1)
> @@ -147,28 +166,19 @@ boot_cpu:
>          PRINT("- CPU doesn't support the virtualization extensions -\r\n")
>          b     fail
>  1:
> -        /* Check if we're already in it */
> +
> +        /* Check that we're already in Hyp mode */
>          mrs   r0, cpsr
>          and   r0, r0, #0x1f          /* Mode is in the low 5 bits of CPSR */
>          teq   r0, #0x1a              /* Hyp Mode? */
> -        bne   1f
> -        PRINT("- Started in Hyp mode -\r\n")
> -        b     hyp
> -1:
> -        /* Otherwise, it must have been Secure Supervisor mode */
> -        mrc   CP32(r0, SCR)
> -        tst   r0, #0x1               /* Not-Secure bit set? */
> -        beq   1f
> -        PRINT("- CPU is not in Hyp mode or Secure state -\r\n")
> +        beq   hyp
> +
> +        /* OK, we're boned. */
> +        PRINT("- Xen must be entered in NS Hyp mode -\r\n" \
> +              "- Please update the bootloader -\r\n")
>          b     fail
> -1:
> -        /* OK, we're in Secure state. */
> -        PRINT("- Started in Secure state -\r\n- Entering Hyp mode -\r\n")
> -        ldr   r0, =enter_hyp_mode    /* VA of function */
> -        adr   lr, hyp                /* Set return address for call */
> -        add   pc, r0, r10            /* Call PA of function */
>  
> -hyp:
> +hyp:    PRINT("- Xen starting in Hyp mode -\r\n")
>  
>          /* Zero BSS On the boot CPU to avoid nasty surprises */
>          teq   r12, #0
> @@ -208,8 +218,8 @@ skip_bss:
>          bl    putn
>          PRINT(" -\r\n")
>          b     fail
> -
>  2:
> +
>          /* Jump to cpu_init */
>          ldr   r1, [r1, #PROCINFO_cpu_init]  /* r1 := vaddr(init func) */
>          adr   lr, cpu_init_done             /* Save return address */
> @@ -242,68 +252,69 @@ cpu_init_done:
>          ldr   r0, =(HSCTLR_BASE|SCTLR_A)
>          mcr   CP32(r0, HSCTLR)
>  
> +        /* Rebuild the boot pagetable's first-level entries. The structure
> +         * is described in mm.c.
> +         *
> +         * After the CPU enables paging it will add the fixmap mapping
> +         * to these page tables, however this may clash with the 1:1
> +         * mapping. So each CPU must rebuild the page tables here with
> +         * the 1:1 in place. */
> +
>          /* Write Xen's PT's paddr into the HTTBR */
>          ldr   r4, =boot_pgtable
> -        add   r4, r4, r10            /* r4 := paddr (xen_pagetable) */
> -        mov   r5, #0                 /* r4:r5 is paddr (xen_pagetable) */
> +        add   r4, r4, r10            /* r4 := paddr (boot_pagetable) */
> +        mov   r5, #0                 /* r4:r5 is paddr (boot_pagetable) */
>          mcrr  CP64(r4, r5, HTTBR)
>  
> -        /* Non-boot CPUs don't need to rebuild the pagetable */
> -        teq   r12, #0
> -        bne   pt_ready
> -
> -        /* console fixmap */
> -#if defined(EARLY_PRINTK)
> -        ldr   r1, =xen_fixmap
> -        add   r1, r1, r10            /* r1 := paddr (xen_fixmap) */
> -        mov   r3, #0
> -        lsr   r2, r11, #12
> -        lsl   r2, r2, #12            /* 4K aligned paddr of UART */
> -        orr   r2, r2, #PT_UPPER(DEV_L3)
> -        orr   r2, r2, #PT_LOWER(DEV_L3) /* r2:r3 := 4K dev map including 
> UART */
> -        strd  r2, r3, [r1, #(FIXMAP_CONSOLE*8)] /* Map it in the first 
> fixmap's slot */
> -#endif
> -
> -        /* Build the baseline idle pagetable's first-level entries */
> -        ldr   r1, =xen_second
> -        add   r1, r1, r10            /* r1 := paddr (xen_second) */
> +        /* Setup boot_pgtable: */
> +        ldr   r1, =boot_second
> +        add   r1, r1, r10            /* r1 := paddr (boot_second) */
>          mov   r3, #0x0
> -        orr   r2, r1, #PT_UPPER(PT)  /* r2:r3 := table map of xen_second */
> +
> +        /* ... map boot_second in boot_pgtable[0] */
> +        orr   r2, r1, #PT_UPPER(PT)  /* r2:r3 := table map of boot_second */
>          orr   r2, r2, #PT_LOWER(PT)  /* (+ rights for linear PT) */
>          strd  r2, r3, [r4, #0]       /* Map it in slot 0 */
> -        add   r2, r2, #0x1000
> -        strd  r2, r3, [r4, #8]       /* Map 2nd page in slot 1 */
> -        add   r2, r2, #0x1000
> -        strd  r2, r3, [r4, #16]      /* Map 3rd page in slot 2 */
> -        add   r2, r2, #0x1000
> -        strd  r2, r3, [r4, #24]      /* Map 4th page in slot 3 */
> -
> -        /* Now set up the second-level entries */
> -        orr   r2, r9, #PT_UPPER(MEM)
> -        orr   r2, r2, #PT_LOWER(MEM) /* r2:r3 := 2MB normal map of Xen */
> -        mov   r4, r9, lsr #18        /* Slot for paddr(start) */
> -        strd  r2, r3, [r1, r4]       /* Map Xen there */
> -        ldr   r4, =start
> -        lsr   r4, #18                /* Slot for vaddr(start) */
> -        strd  r2, r3, [r1, r4]       /* Map Xen there too */
> -
> -        /* xen_fixmap pagetable */
> -        ldr   r2, =xen_fixmap
> -        add   r2, r2, r10            /* r2 := paddr (xen_fixmap) */
> -        orr   r2, r2, #PT_UPPER(PT)
> -        orr   r2, r2, #PT_LOWER(PT)  /* r2:r3 := table map of xen_fixmap */
> -        add   r4, r4, #8
> -        strd  r2, r3, [r1, r4]       /* Map it in the fixmap's slot */
>  
> -        mov   r3, #0x0
> -        lsr   r2, r8, #21
> -        lsl   r2, r2, #21            /* 2MB-aligned paddr of DTB */
> -        orr   r2, r2, #PT_UPPER(MEM)
> -        orr   r2, r2, #PT_LOWER(MEM) /* r2:r3 := 2MB RAM incl. DTB */
> -        add   r4, r4, #8
> -        strd  r2, r3, [r1, r4]       /* Map it in the early boot slot */
> +        /* ... map of paddr(start) in boot_pgtable */
> +        lsrs  r1, r9, #30            /* Offset of base paddr in boot_pgtable 
> */
> +        beq   1f                     /* If it is in slot 0 then map in 
> boot_second
> +                                      * later on */
> +        lsl   r2, r1, #30            /* Base address for 1GB mapping */
> +        orr   r2, r2, #PT_UPPER(MEM) /* r2:r3 := section map */
> +        orr   r2, r2, #PT_LOWER(MEM)
> +        lsl   r1, r1, #3             /* r1 := Slot offset */
> +        strd  r2, r3, [r4, r1]       /* Mapping of paddr(start) */
> +
> +1:      /* Setup boot_second: */
> +        ldr   r4, =boot_second
> +        add   r4, r4, r10            /* r1 := paddr (boot_second) */
> +
> +        lsr   r2, r9, #20            /* Base address for 2MB mapping */
> +        lsl   r2, r2, #20
> +        orr   r2, r2, #PT_UPPER(MEM) /* r2:r3 := section map */
> +        orr   r2, r2, #PT_LOWER(MEM)
> +
> +        /* ... map of vaddr(start) in boot_second */
> +        ldr   r1, =start
> +        lsr   r1, #18                /* Slot for vaddr(start) */
> +        strd  r2, r3, [r4, r1]       /* Map vaddr(start) */
> +
> +        /* ... map of paddr(start) in boot_second */
> +        lsrs  r1, r9, #30            /* Base paddr */
> +        bne   1f                     /* If paddr(start) is not in slot 0
> +                                      * then the mapping was done in
> +                                      * boot_pgtable above */
> +
> +        mov   r1, r9, lsr #18        /* Slot for paddr(start) */
> +        strd  r2, r3, [r4, r1]       /* Map Xen there */
> +1:
> +
> +        /* Defer fixmap and dtb mapping until after paging enabled, to
> +         * avoid them clashing with the 1:1 mapping. */
> +
> +        /* boot pagetable setup complete */
>  
> -pt_ready:
>          PRINT("- Turning on paging -\r\n")
>  
>          ldr   r1, =paging            /* Explicit vaddr, not RIP-relative */
> @@ -315,11 +326,53 @@ pt_ready:
>          mov   pc, r1                 /* Get a proper vaddr into PC */
>  paging:
>  
> +        /* Now we can install the fixmap and dtb mappings, since we
> +         * don't need the 1:1 map any more */
> +        dsb
> +#if defined(EARLY_PRINTK) /* Fixmap is only used by early printk */
> +        /* Non-boot CPUs don't need to rebuild the fixmap itself, just
> +      * the mapping from boot_second to xen_fixmap */
> +        teq   r12, #0
> +        bne   1f
> +
> +        /* Add UART to the fixmap table */
> +        ldr   r1, =xen_fixmap        /* r1 := vaddr (xen_fixmap) */
> +        mov   r3, #0
> +        lsr   r2, r11, #12
> +        lsl   r2, r2, #12            /* 4K aligned paddr of UART */
> +        orr   r2, r2, #PT_UPPER(DEV_L3)
> +        orr   r2, r2, #PT_LOWER(DEV_L3) /* r2:r3 := 4K dev map including 
> UART */
> +        strd  r2, r3, [r1, #(FIXMAP_CONSOLE*8)] /* Map it in the first 
> fixmap's slot */
> +1:
> +
> +        /* Map fixmap into boot_second */
> +        ldr   r1, =boot_second       /* r1 := vaddr (xen_fixmap) */
> +        ldr   r2, =xen_fixmap
> +        add   r2, r2, r10            /* r2 := paddr (xen_fixmap) */
> +        orr   r2, r2, #PT_UPPER(PT)
> +        orr   r2, r2, #PT_LOWER(PT)  /* r2:r3 := table map of xen_fixmap */
> +        ldr   r4, =FIXMAP_ADDR(0)
> +        mov   r4, r4, lsr #18        /* r4 := Slot for FIXMAP(0) */
> +        strd  r2, r3, [r1, r4]       /* Map it in the fixmap's slot */
>  
> -#ifdef EARLY_PRINTK
>          /* Use a virtual address to access the UART. */
>          ldr   r11, =FIXMAP_ADDR(FIXMAP_CONSOLE)
>  #endif
> +        /* Map the DTB in the boot misc slot */
> +        teq   r12, #0                /* Only on boot CPU */
> +        bne   1f
> +
> +        ldr   r1, =boot_second
> +        mov   r3, #0x0
> +        lsr   r2, r8, #21
> +        lsl   r2, r2, #21            /* r2: 2MB-aligned paddr of DTB */
> +        orr   r2, r2, #PT_UPPER(MEM)
> +        orr   r2, r2, #PT_LOWER(MEM) /* r2:r3 := 2MB RAM incl. DTB */
> +        ldr   r4, =BOOT_MISC_VIRT_START
> +        mov   r4, r4, lsr #18        /* Slot for BOOT_MISC_VIRT_START */
> +        strd  r2, r3, [r1, r4]       /* Map it in the early boot slot */
> +        dsb
> +1:
>  
>          PRINT("- Ready -\r\n")
>  
> @@ -327,10 +380,10 @@ paging:
>          teq   r12, #0
>          beq   launch
>  
> -        /* Non-boot CPUs need to move on to the relocated pagetables */
> -        mov   r0, #0
> -        ldr   r4, =boot_ttbr         /* VA of HTTBR value stashed by CPU 0 */
> -        add   r4, r4, r10            /* PA of it */
> +        /* Non-boot CPUs need to move on to the proper pagetables, which were
> +         * setup in init_secondary_pagetables. */
> +
> +        ldr   r4, =init_ttbr         /* VA of HTTBR value stashed by CPU 0 */
>          ldrd  r4, r5, [r4]           /* Actual value */
>          dsb
>          mcrr  CP64(r4, r5, HTTBR)
> @@ -342,29 +395,6 @@ paging:
>          dsb                          /* Ensure completion of TLB+BP flush */
>          isb
>  
> -        /* Non-boot CPUs report that they've got this far */
> -        ldr   r0, =ready_cpus
> -1:      ldrex r1, [r0]               /*            { read # of ready CPUs } 
> */
> -        add   r1, r1, #1             /* Atomically { ++                   } 
> */
> -        strex r2, r1, [r0]           /*            { writeback            } 
> */
> -        teq   r2, #0
> -        bne   1b
> -        dsb
> -        mcr   CP32(r0, DCCMVAC)      /* flush D-Cache */
> -        dsb
> -
> -        /* Here, the non-boot CPUs must wait again -- they're now running on
> -         * the boot CPU's pagetables so it's safe for the boot CPU to
> -         * overwrite the non-relocated copy of Xen.  Once it's done that,
> -         * and brought up the memory allocator, non-boot CPUs can get their
> -         * own stacks and enter C. */
> -1:      wfe
> -        dsb
> -        ldr   r0, =smp_up_cpu
> -        ldr   r1, [r0]               /* Which CPU is being booted? */
> -        teq   r1, r12                /* Is it us? */
> -        bne   1b
> -
>  launch:
>          ldr   r0, =init_data
>          add   r0, #INITINFO_stack    /* Find the boot-time stack */
> @@ -373,7 +403,7 @@ launch:
>          sub   sp, #CPUINFO_sizeof    /* Make room for CPU save record */
>          mov   r0, r10                /* Marshal args: - phys_offset */
>          mov   r1, r8                 /*               - DTB address */
> -        movs  r2, r12                /*               - CPU ID */
> +        movs  r2, r7                 /*               - CPU ID */
>          beq   start_xen              /* and disappear into the land of C */
>          b     start_secondary        /* (to the appropriate entry point) */
>  
> @@ -383,6 +413,82 @@ fail:   PRINT("- Boot failed -\r\n")
>  1:      wfe
>          b     1b
>  
> +/* Copy Xen to new location and switch TTBR
> + * r1:r0       ttbr
> + * r2          source address
> + * r3          destination address
> + * [sp]=>r4    length
> + *
> + * Source and destination must be word aligned, length is rounded up
> + * to a 16 byte boundary.
> + *
> + * MUST BE VERY CAREFUL when saving things to RAM over the copy */
> +ENTRY(relocate_xen)
> +        push {r4,r5,r6,r7,r8,r9,r10,r11}
> +
> +        ldr   r4, [sp, #8*4]                /* Get 4th argument from stack */
> +
> +        /* Copy 16 bytes at a time using:
> +         * r5:  counter
> +         * r6:  data
> +         * r7:  data
> +         * r8:  data
> +         * r9:  data
> +         * r10: source
> +         * r11: destination
> +         */
> +        mov   r5, r4
> +        mov   r10, r2
> +        mov   r11, r3
> +1:      ldmia r10!, {r6, r7, r8, r9}
> +        stmia r11!, {r6, r7, r8, r9}
> +
> +        subs  r5, r5, #16
> +        bgt   1b
> +
> +        /* Flush destination from dcache using:
> +         * r5: counter
> +         * r6: step
> +         * r7: vaddr
> +         */
> +        dsb        /* So the CPU issues all writes to the range */
> +
> +        mov   r5, r4
> +        ldr   r6, =cacheline_bytes /* r6 := step */
> +        ldr   r6, [r6]
> +        mov   r7, r3
> +
> +1:      mcr   CP32(r7, DCCMVAC)
> +
> +        add   r7, r7, r6
> +        subs  r5, r5, r6
> +        bgt   1b
> +
> +        dsb                            /* Ensure the flushes happen before
> +                                        * continuing */
> +        isb                            /* Ensure synchronization with 
> previous
> +                                        * changes to text */
> +        mcr   CP32(r0, TLBIALLH)       /* Flush hypervisor TLB */
> +        mcr   CP32(r0, ICIALLU)        /* Flush I-cache */
> +        mcr   CP32(r0, BPIALL)         /* Flush branch predictor */
> +        dsb                            /* Ensure completion of TLB+BP flush 
> */
> +        isb
> +
> +        mcrr  CP64(r0, r1, HTTBR)
> +
> +        dsb                            /* ensure memory accesses do not cross
> +                                        * over the TTBR0 write */
> +        isb                            /* Ensure synchronization with 
> previous
> +                                        * changes to text */
> +        mcr   CP32(r0, TLBIALLH)       /* Flush hypervisor TLB */
> +        mcr   CP32(r0, ICIALLU)        /* Flush I-cache */
> +        mcr   CP32(r0, BPIALL)         /* Flush branch predictor */
> +        dsb                            /* Ensure completion of TLB+BP flush 
> */
> +        isb
> +
> +        pop {r4, r5,r6,r7,r8,r9,r10,r11}
> +
> +        mov pc, lr
>  
>  #ifdef EARLY_PRINTK
>  /* Bring up the UART.
> @@ -439,9 +545,6 @@ putn:   mov   pc, lr
>  
>  #endif /* !EARLY_PRINTK */
>  
> -/* Place holder for machine ID */
> -machine_id: .word 0x0
> -
>  /*
>   * Local variables:
>   * mode: ASM
> diff --git a/xen/arch/arm/arm32/mode_switch.S 
> b/xen/arch/arm/arm32/mode_switch.S
> deleted file mode 100644
> index 2cd5888..0000000
> --- a/xen/arch/arm/arm32/mode_switch.S
> +++ /dev/null
> @@ -1,158 +0,0 @@
> -/*
> - * xen/arch/arm/mode_switch.S
> - *
> - * Start-of day code to take a CPU from Secure mode to Hyp mode.
> - *
> - * Tim Deegan <tim@xxxxxxx>
> - * Copyright (c) 2011-2012 Citrix Systems.
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License as published by
> - * the Free Software Foundation; either version 2 of the License, or
> - * (at your option) any later version.
> - *
> - * This program is distributed in the hope that it will be useful,
> - * but WITHOUT ANY WARRANTY; without even the implied warranty of
> - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> - * GNU General Public License for more details.
> - */
> -
> -#include <asm/config.h>
> -#include <asm/page.h>
> -#include <asm/platforms/vexpress.h>
> -#include <asm/platforms/exynos5.h>
> -#include <asm/asm_defns.h>
> -#include <asm/gic.h>
> -
> -/* Wake up secondary cpus
> - * This code relies on Machine ID and only works for Vexpress and the Arndale
> - * TODO: Move this code either later (via platform specific desc) or in a 
> bootwrapper
> - * r5: Machine ID
> - * Clobber r0 r2 */
> -GLOBAL(kick_cpus)
> -        ldr   r0, =MACH_TYPE_SMDK5250
> -        teq   r5, r0                          /* Are we running on the 
> arndale? */
> -        beq   kick_cpus_arndale
> -        /* otherwise versatile express */
> -        /* write start paddr to v2m sysreg FLAGSSET register */
> -        ldr   r0, =(V2M_SYS_MMIO_BASE)        /* base V2M sysreg MMIO 
> address */
> -        dsb
> -        mov   r2, #0xffffffff
> -        str   r2, [r0, #(V2M_SYS_FLAGSCLR)]
> -        dsb
> -        ldr   r2, =start
> -        add   r2, r2, r10
> -        str   r2, [r0, #(V2M_SYS_FLAGSSET)]
> -        dsb
> -        ldr   r2, =V2M_GIC_BASE_ADDRESS       /* r2 := VE gic base address */
> -        b     kick_cpus_sgi
> -kick_cpus_arndale:
> -        /* write start paddr to CPU 1 sysreg register */
> -        ldr   r0, =(S5P_PA_SYSRAM)
> -        ldr   r2, =start
> -        add   r2, r2, r10
> -        str   r2, [r0]
> -        dsb
> -        ldr   r2, =EXYNOS5_GIC_BASE_ADDRESS   /* r2 := Exynos5 gic base 
> address */
> -kick_cpus_sgi:
> -        /* send an interrupt */
> -        ldr   r0, =GIC_DR_OFFSET              /* GIC distributor offset */
> -        add   r0, r2                          /* r0 := r0 + gic base address 
> */
> -        mov   r2, #0x1
> -        str   r2, [r0, #(GICD_CTLR * 4)]      /* enable distributor */
> -        mov   r2, #0xfe0000
> -        str   r2, [r0, #(GICD_SGIR * 4)]      /* send IPI to everybody, SGI0 
> = Event check */
> -        dsb
> -        str   r2, [r0, #(GICD_CTLR * 4)]      /* disable distributor */
> -        mov   pc, lr
> -
> -
> -/* Get up a CPU into Hyp mode.  Clobbers r0-r3.
> - *
> - * r5: Machine ID
> - * r12: CPU number
> - *
> - * This code is specific to the VE model/Arndale, and not intended to be used
> - * on production systems.  As such it's a bit hackier than the main
> - * boot code in head.S.  In future it will be replaced by better
> - * integration with the bootloader/firmware so that Xen always starts
> - * in Hyp mode.
> - * Clobber r0 - r4 */
> -GLOBAL(enter_hyp_mode)
> -        mov   r3, lr                 /* Put return address in non-banked reg 
> */
> -        cpsid aif, #0x16             /* Enter Monitor mode */
> -        mrc   CP32(r0, SCR)
> -        orr   r0, r0, #0x100         /* Set HCE */
> -        orr   r0, r0, #0xb1          /* Set SCD, AW, FW and NS */
> -        bic   r0, r0, #0xe           /* Clear EA, FIQ and IRQ */
> -        mcr   CP32(r0, SCR)
> -
> -        ldr   r2, =MACH_TYPE_SMDK5250   /* r4 := Arndale machine ID */
> -        /* By default load Arndale defaults values */
> -        ldr   r0, =EXYNOS5_TIMER_FREQUENCY  /* r0 := timer's frequency */
> -        ldr   r1, =EXYNOS5_GIC_BASE_ADDRESS /* r1 := GIC base address */
> -        /* If it's not the Arndale machine ID, load VE values */
> -        teq   r5, r2
> -        ldrne r0, =V2M_TIMER_FREQUENCY
> -        ldrne r1, =V2M_GIC_BASE_ADDRESS
> -
> -        /* Ugly: the system timer's frequency register is only
> -         * programmable in Secure state.  Since we don't know where its
> -         * memory-mapped control registers live, we can't find out the
> -         * right frequency. */
> -        mcr   CP32(r0, CNTFRQ)
> -
> -        mrc   CP32(r0,NSACR)
> -        ldr   r4, =0x3fff            /* Allow access to all co-processors in 
> NS mode */
> -        orr   r0, r0, r4
> -        orr   r0, r0, #(1<<18)       /* CA7/CA15: Allow access to ACTLR.SMP 
> in NS mode */
> -        mcr   CP32(r0, NSACR)
> -
> -        add   r0, r1, #GIC_DR_OFFSET
> -        /* Disable the GIC distributor, on the boot CPU only */
> -        mov   r4, #0
> -        teq   r12, #0                /* Is this the boot CPU? */
> -        streq r4, [r0]
> -        /* Continuing ugliness: Set up the GIC so NS state owns interrupts,
> -         * The first 32 interrupts (SGIs & PPIs) must be configured on all
> -         * CPUs while the remainder are SPIs and only need to be done one, on
> -         * the boot CPU. */
> -        add   r0, r0, #0x80          /* GICD_IGROUP0 */
> -        mov   r2, #0xffffffff        /* All interrupts to group 1 */
> -        str   r2, [r0]               /* Interrupts  0-31 (SGI & PPI) */
> -        teq   r12, #0                /* Boot CPU? */
> -        bne   skip_spis              /* Don't route SPIs on secondary CPUs */
> -
> -        add   r4, r1, #GIC_DR_OFFSET
> -        ldr   r4, [r4, #4]            /* r4 := Interrupt Controller Type Reg 
> */
> -        and   r4, r4, #GICD_TYPE_LINES /* r4 := number of SPIs */
> -1:      teq   r4, #0
> -        beq   skip_spis
> -        add   r0, r0, #4             /* Go to the new group */
> -        str   r2, [r0]               /* Update the group */
> -        sub  r4, r4, #1
> -        b     1b
> -skip_spis:
> -        /* Disable the GIC CPU interface on all processors */
> -        add   r0, r1, #GIC_CR_OFFSET
> -        mov   r1, #0
> -        str   r1, [r0]
> -        /* Must drop priority mask below 0x80 before entering NS state */
> -        ldr   r1, =0xff
> -        str   r1, [r0, #0x4]         /* -> GICC_PMR */
> -        /* Reset a few config registers */
> -        mov   r0, #0
> -        mcr   CP32(r0, FCSEIDR)
> -        mcr   CP32(r0, CONTEXTIDR)
> -
> -        mrs   r0, cpsr               /* Copy the CPSR */
> -        add   r0, r0, #0x4           /* 0x16 (Monitor) -> 0x1a (Hyp) */
> -        msr   spsr_cxsf, r0          /* into the SPSR */
> -        movs  pc, r3                 /* Exception-return into Hyp mode */
> -
> -/*
> - * Local variables:
> - * mode: ASM
> - * indent-tabs-mode: nil
> - * End:
> - */
> diff --git a/xen/arch/arm/arm64/Makefile b/xen/arch/arm/arm64/Makefile
> index 30fb480..5d28bad 100644
> --- a/xen/arch/arm/arm64/Makefile
> +++ b/xen/arch/arm/arm64/Makefile
> @@ -1,7 +1,6 @@
>  subdir-y += lib
>  
>  obj-y += entry.o
> -obj-y += mode_switch.o
>  
>  obj-y += traps.o
>  obj-y += domain.o
> diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S
> index 7cecac3..b327770 100644
> --- a/xen/arch/arm/arm64/head.S
> +++ b/xen/arch/arm/arm64/head.S
> @@ -33,6 +33,41 @@
>  #include EARLY_PRINTK_INC
>  #endif
>  
> +/*
> + * Common register usage in this file:
> + *  x0  -
> + *  x1  -
> + *  x2  -
> + *  x3  -
> + *  x4  -
> + *  x5  -
> + *  x6  -
> + *  x7  -
> + *  x8  -
> + *  x9  -
> + *  x10 -
> + *  x11 -
> + *  x12 -
> + *  x13 -
> + *  x14 -
> + *  x15 -
> + *  x16 -
> + *  x17 -
> + *  x18 -
> + *  x19 - paddr(start)
> + *  x20 - phys offset
> + *  x21 - DTB address (boot cpu only)
> + *  x22 - is_secondary_cpu
> + *  x23 - UART address
> + *  x24 - cpuid
> + *  x25 -
> + *  x26 -
> + *  x27 -
> + *  x28 -
> + *  x29 -
> + *  x30 - lr
> + */
> +
>  /* Macro to print a string to the UART, if there is one.
>   * Clobbers x0-x3. */
>  #ifdef EARLY_PRINTK
> @@ -65,7 +100,6 @@
>  
>          .global start
>  start:
> -GLOBAL(init_secondary) /* currently unused */
>          /*
>           * DO NOT MODIFY. Image header expected by Linux boot-loaders.
>           */
> @@ -100,69 +134,73 @@ real_start:
>          add   x21, x21, x20          /* x21 := paddr(DTB) */
>  #endif
>  
> -        /* Are we the boot CPU? */
> -        mov   x22, #0                /* x22 := CPU ID */
> +        mov   x22, #0                /* x22 := is_secondary_cpu */
> +
> +        b     common_start
> +
> +GLOBAL(init_secondary)
> +        msr   DAIFSet, 0xf           /* Disable all interrupts */
> +
> +        /* Find out where we are */
> +        ldr   x0, =start
> +        adr   x19, start             /* x19 := paddr (start) */
> +        sub   x20, x19, x0           /* x20 := phys-offset */
> +
> +        mov   x22, #1                /* x22 := is_secondary_cpu */
> +
> +common_start:
> +        mov   x24, #0                /* x24 := CPU ID. Initialy zero until we
> +                                      * find that multiprocessor extensions 
> are
> +                                      * present and the system is SMP  */
>          mrs   x0, mpidr_el1
> -        tbz   x0, 31, boot_cpu       /* Multiprocessor extension supported? 
> */
> -        tbnz  x0, 30, boot_cpu       /* Uniprocessor system? */
> +        tbz   x0, 31, 1f             /* Multiprocessor extension not 
> supported? */
> +        tbnz  x0, 30, 1f             /* Uniprocessor system? */
>  
>          mov   x13, #(0xff << 24)
> -        bics  x22, x0, x13           /* Mask out flags to get CPU ID */
> -        b.eq  boot_cpu               /* If we're CPU 0, boot now */
> -
> -        /* Non-boot CPUs wait here to be woken up one at a time. */
> -1:      dsb   sy
> -        ldr   x0, =smp_up_cpu        /* VA of gate */
> -        add   x0, x0, x20            /* PA of gate */
> -        ldr   x1, [x0]               /* Which CPU is being booted? */
> -        cmp   x1, x22                /* Is it us? */
> -        b.eq  2f
> +        bic   x24, x0, x13           /* Mask out flags to get CPU ID */
> +1:
> +
> +        /* Non-boot CPUs wait here until __cpu_up is ready for them */
> +        cbz   x22, 1f
> +
> +        ldr   x0, =smp_up_cpu
> +        add   x0, x0, x20            /* Apply physical offset */
> +        dsb   sy
> +2:      ldr   x1, [x0]
> +        cmp   x1, x24
> +        beq   1f
>          wfe
> -        b     1b
> -2:
> +        b     2b
> +1:
>  
> -boot_cpu:
>  #ifdef EARLY_PRINTK
>          ldr   x23, =EARLY_UART_BASE_ADDRESS /* x23 := UART base address */
>          cbnz  x22, 1f
> -#ifdef EARLY_PRINTK_INIT_UART
> -        bl    init_uart                 /* CPU 0 sets up the UART too */
> -#endif
> +        bl    init_uart                 /* Boot CPU sets up the UART too */
>  1:      PRINT("- CPU ")
> -        mov   x0, x22
> +        mov   x0, x24
>          bl    putn
>          PRINT(" booting -\r\n")
>  #endif
>  
>          PRINT("- Current EL ")
> -        mrs   x0, CurrentEL
> +        mrs   x4, CurrentEL
> +        mov   x0, x4
>          bl    putn
>          PRINT(" -\r\n")
>  
> -        /* Are we in EL3 */
> -        mrs   x0, CurrentEL
> -        cmp   x0, #PSR_MODE_EL3t
> -        ccmp  x0, #PSR_MODE_EL3h, #0x4, ne
> -        b.eq  1f /* Yes */
> -
>          /* Are we in EL2 */
> -        cmp   x0, #PSR_MODE_EL2t
> -        ccmp  x0, #PSR_MODE_EL2h, #0x4, ne
> -        b.eq  2f /* Yes */
> +        cmp   x4, #PSR_MODE_EL2t
> +        ccmp  x4, #PSR_MODE_EL2h, #0x4, ne
> +        b.eq  el2 /* Yes */
>  
> -        /* Otherwise, it must have been EL0 or EL1 */
> -        PRINT("- CPU is not in EL3 or EL2 -\r\n")
> -        b     fail
> +        /* OK, we're boned. */
> +        PRINT("- Xen must be entered in NS EL2 mode -\r\n" \
> +              "- Please update the bootloader -\r\n")
> +        b fail
>  
> -1:      PRINT("- Started in EL3 -\r\n- Entering EL2 -\r\n")
> -        ldr   x1, =enter_el2_mode    /* VA of function */
> -        add   x1, x1, x20            /* PA of function */
> -        adr   x30, el2               /* Set return address for call */
> -        br    x1                     /* Call function */
> +el2:    PRINT("- Xen starting at EL2 -\r\n")
>  
> -2:      PRINT("- Started in EL2 mode -\r\n")
> -
> -el2:
>          /* Zero BSS On the boot CPU to avoid nasty surprises */
>          cbnz  x22, skip_bss
>  
> @@ -177,9 +215,10 @@ el2:
>          b.lo  1b
>  
>  skip_bss:
> -
>          PRINT("- Setting up control registers -\r\n")
>  
> +        /* XXXX call PROCINFO_cpu_init here */
> +
>          /* Set up memory attribute type tables */
>          ldr   x0, =MAIRVAL
>          msr   mair_el2, x0
> @@ -193,7 +232,7 @@ skip_bss:
>          ldr   x0, =0x80802500
>          msr   tcr_el2, x0
>  
> -        /* Set up the HSCTLR:
> +        /* Set up the SCTLR_EL2:
>           * Exceptions in LE ARM,
>           * Low-latency IRQs disabled,
>           * Write-implies-XN disabled (for now),
> @@ -204,69 +243,90 @@ skip_bss:
>          ldr   x0, =(HSCTLR_BASE|SCTLR_A)
>          msr   SCTLR_EL2, x0
>  
> -        /* Write Xen's PT's paddr into the HTTBR */
> +        /* Rebuild the boot pagetable's first-level entries. The structure
> +         * is described in mm.c.
> +         *
> +         * After the CPU enables paging it will add the fixmap mapping
> +         * to these page tables, however this may clash with the 1:1
> +         * mapping. So each CPU must rebuild the page tables here with
> +         * the 1:1 in place. */
> +
> +        /* Write Xen's PT's paddr into TTBR0_EL2 */
>          ldr   x4, =boot_pgtable
> -        add   x4, x4, x20            /* x4 := paddr (xen_pagetable) */
> +        add   x4, x4, x20            /* x4 := paddr (boot_pagetable) */
>          msr   TTBR0_EL2, x4
>  
> -        /* Non-boot CPUs don't need to rebuild the pagetable */
> -        cbnz  x22, pt_ready
> -
> +        /* Setup boot_pgtable: */
>          ldr   x1, =boot_first
> -        add   x1, x1, x20            /* x1 := paddr (xen_first) */
> -        mov   x3, #PT_PT             /* x2 := table map of xen_first */
> -        orr   x2, x1, x3             /* (+ rights for linear PT) */
> -        str   x2, [x4, #0]           /* Map it in slot 0 */
> +        add   x1, x1, x20            /* x1 := paddr (boot_first) */
>  
> -        mov   x4, x1                 /* Next level into xen_first */
> +        /* ... map boot_first in boot_pgtable[0] */
> +        mov   x3, #PT_PT             /* x2 := table map of boot_first */
> +        orr   x2, x1, x3             /*       + rights for linear PT */
> +        str   x2, [x4, #0]           /* Map it in slot 0 */
>  
> -       /* console fixmap */
> -        ldr   x1, =xen_fixmap
> -        add   x1, x1, x20            /* x1 := paddr (xen_fixmap) */
> -        lsr   x2, x23, #12
> -        lsl   x2, x2, #12            /* 4K aligned paddr of UART */
> -        mov   x3, #PT_DEV_L3
> -        orr   x2, x2, x3             /* x2 := 4K dev map including UART */
> -        str   x2, [x1, #(FIXMAP_CONSOLE*8)] /* Map it in the first fixmap's 
> slot */
> +        /* ... map of paddr(start) in boot_pgtable */
> +        lsr   x1, x19, #39           /* Offset of base paddr in boot_pgtable 
> */
> +        cbz   x1, 1f                 /* It's in slot 0, map in boot_first
> +                                      * or boot_second later on */
>  
> -        /* Build the baseline idle pagetable's first-level entries */
> -        ldr   x1, =xen_second
> -        add   x1, x1, x20            /* x1 := paddr (xen_second) */
> -        mov   x3, #PT_PT             /* x2 := table map of xen_second */
> -        orr   x2, x1, x3             /* (+ rights for linear PT) */
> +        lsl   x2, x1, #39            /* Base address for 512GB mapping */
> +        mov   x3, #PT_MEM            /* x2 := Section mapping */
> +        orr   x2, x2, x3
> +        lsl   x1, x1, #3             /* x1 := Slot offset */
> +        str   x2, [x4, x1]           /* Mapping of paddr(start)*/
> +
> +1:      /* Setup boot_first: */
> +        ldr   x4, =boot_first        /* Next level into boot_first */
> +        add   x4, x4, x20            /* x4 := paddr(boot_first) */
> +
> +        /* ... map boot_second in boot_first[0] */
> +        ldr   x1, =boot_second
> +        add   x1, x1, x20            /* x1 := paddr(boot_second) */
> +        mov   x3, #PT_PT             /* x2 := table map of boot_first */
> +        orr   x2, x1, x3             /*       + rights for linear PT */
>          str   x2, [x4, #0]           /* Map it in slot 0 */
> -        add   x2, x2, #0x1000
> -        str   x2, [x4, #8]           /* Map 2nd page in slot 1 */
> -        add   x2, x2, #0x1000
> -        str   x2, [x4, #16]          /* Map 3rd page in slot 2 */
> -        add   x2, x2, #0x1000
> -        str   x2, [x4, #24]          /* Map 4th page in slot 3 */
> -
> -        /* Now set up the second-level entries */
> -        mov   x3, #PT_MEM
> -        orr   x2, x19, x3            /* x2 := 2MB normal map of Xen */
> -        orr   x4, xzr, x19, lsr #18
> -        str   x2, [x1, x4]           /* Map Xen there */
> -        ldr   x4, =start
> -        lsr   x4, x4, #18            /* Slot for vaddr(start) */
> -        str   x2, [x1, x4]           /* Map Xen there too */
> -
> -        /* xen_fixmap pagetable */
> -        ldr   x2, =xen_fixmap
> -        add   x2, x2, x20            /* x2 := paddr (xen_fixmap) */
> -        mov   x3, #PT_PT
> -        orr   x2, x2, x3             /* x2 := table map of xen_fixmap */
> -        add   x4, x4, #8
> -        str   x2, [x1, x4]           /* Map it in the fixmap's slot */
>  
> -        lsr   x2, x21, #21
> -        lsl   x2, x2, #21            /* 2MB-aligned paddr of DTB */
> -        mov   x3, #PT_MEM            /* x2 := 2MB RAM incl. DTB */
> +        /* ... map of paddr(start) in boot_first */
> +        lsr   x2, x19, #30           /* x2 := Offset of base paddr in 
> boot_first */
> +        and   x1, x2, 0x1ff          /* x1 := Slot to use */
> +        cbz   x1, 1f                 /* It's in slot 0, map in boot_second */
> +
> +        lsl   x2, x2, #30            /* Base address for 1GB mapping */
> +        mov   x3, #PT_MEM            /* x2 := Section map */
>          orr   x2, x2, x3
> -        add   x4, x4, #8
> -        str   x2, [x1, x4]           /* Map it in the early boot slot */
> +        lsl   x1, x1, #3             /* x1 := Slot offset */
> +        str   x2, [x4, x1]           /* Create mapping of paddr(start)*/
> +
> +1:      /* Setup boot_second: */
> +        ldr   x4, =boot_second
> +        add   x4, x4, x20            /* x4 := paddr (boot_second) */
> +
> +        lsr   x2, x19, #20           /* Base address for 2MB mapping */
> +        lsl   x2, x2, #20
> +        mov   x3, #PT_MEM            /* x2 := Section map */
> +        orr   x2, x2, x3
> +
> +        /* ... map of vaddr(start) in boot_second */
> +        ldr   x1, =start
> +        lsr   x1, x1, #18            /* Slot for vaddr(start) */
> +        str   x2, [x4, x1]           /* Map vaddr(start) */
> +
> +        /* ... map of paddr(start) in boot_second */
> +        lsr   x1, x19, #30           /* Base paddr */
> +        cbnz  x1, 1f                 /* If paddr(start) is not in slot 0
> +                                      * then the mapping was done in
> +                                      * boot_pgtable or boot_first above */
> +
> +        lsr   x1, x19, #18           /* Slot for paddr(start) */
> +        str   x2, [x4, x1]           /* Map Xen there */
> +1:
> +
> +        /* Defer fixmap and dtb mapping until after paging enabled, to
> +         * avoid them clashing with the 1:1 mapping. */
> +
> +        /* boot pagetable setup complete */
>  
> -pt_ready:
>          PRINT("- Turning on paging -\r\n")
>  
>          ldr   x1, =paging            /* Explicit vaddr, not RIP-relative */
> @@ -279,17 +339,60 @@ pt_ready:
>          br    x1                     /* Get a proper vaddr into PC */
>  paging:
>  
> +        /* Now we can install the fixmap and dtb mappings, since we
> +         * don't need the 1:1 map any more */
> +        dsb   sy
> +#if defined(EARLY_PRINTK) /* Fixmap is only used by early printk */
> +        /* Non-boot CPUs don't need to rebuild the fixmap itself, just
> +      * the mapping from boot_second to xen_fixmap */
> +        cbnz  x22, 1f
> +
> +        /* Add UART to the fixmap table */
> +        ldr   x1, =xen_fixmap
> +        add   x1, x1, x20            /* x1 := paddr (xen_fixmap) */
> +        lsr   x2, x23, #12
> +        lsl   x2, x2, #12            /* 4K aligned paddr of UART */
> +        mov   x3, #PT_DEV_L3
> +        orr   x2, x2, x3             /* x2 := 4K dev map including UART */
> +        str   x2, [x1, #(FIXMAP_CONSOLE*8)] /* Map it in the first fixmap's 
> slot */
> +1:
> +
> +        /* Map fixmap into boot_second */
> +        ldr   x4, =boot_second       /* x4 := vaddr (boot_second) */
> +        ldr   x2, =xen_fixmap
> +        add   x2, x2, x20            /* x2 := paddr (xen_fixmap) */
> +        mov   x3, #PT_PT
> +        orr   x2, x2, x3             /* x2 := table map of xen_fixmap */
> +        ldr   x1, =FIXMAP_ADDR(0)
> +        lsr   x1, x1, #18            /* x1 := Slot for FIXMAP(0) */
> +        str   x2, [x4, x1]           /* Map it in the fixmap's slot */
> +
>          /* Use a virtual address to access the UART. */
>          ldr   x23, =FIXMAP_ADDR(FIXMAP_CONSOLE)
> +#endif
> +
> +        /* Map the DTB in the boot misc slot */
> +        cbnz  x22, 1f                /* Only on boot CPU */
> +
> +        lsr   x2, x21, #21
> +        lsl   x2, x2, #21            /* x2 := 2MB-aligned paddr of DTB */
> +        mov   x3, #PT_MEM            /* x2 := 2MB RAM incl. DTB */
> +        orr   x2, x2, x3
> +        ldr   x1, =BOOT_MISC_VIRT_START
> +        lsr   x1, x1, #18            /* x4 := Slot for BOOT_MISC_VIRT_START 
> */
> +        str   x2, [x4, x1]           /* Map it in the early boot slot */
> +        dsb   sy
> +1:
>  
>          PRINT("- Ready -\r\n")
>  
>          /* The boot CPU should go straight into C now */
>          cbz   x22, launch
>  
> -        /* Non-boot CPUs need to move on to the relocated pagetables */
> -        ldr   x4, =boot_ttbr         /* VA of TTBR0_EL2 stashed by CPU 0 */
> -        add   x4, x4, x20            /* PA of it */
> +        /* Non-boot CPUs need to move on to the proper pagetables, which were
> +         * setup in init_secondary_pagetables. */
> +
> +        ldr   x4, =init_ttbr         /* VA of TTBR0_EL2 stashed by CPU 0 */
>          ldr   x4, [x4]               /* Actual value */
>          dsb   sy
>          msr   TTBR0_EL2, x4
> @@ -299,28 +402,6 @@ paging:
>          dsb   sy                     /* Ensure completion of TLB flush */
>          isb
>  
> -        /* Non-boot CPUs report that they've got this far */
> -        ldr   x0, =ready_cpus
> -1:      ldaxr x1, [x0]               /*            { read # of ready CPUs } 
> */
> -        add   x1, x1, #1             /* Atomically { ++                   } 
> */
> -        stlxr w2, x1, [x0]           /*            { writeback            } 
> */
> -        cbnz  w2, 1b
> -        dsb   sy
> -        dc    cvac, x0               /* Flush D-Cache */
> -        dsb   sy
> -
> -        /* Here, the non-boot CPUs must wait again -- they're now running on
> -         * the boot CPU's pagetables so it's safe for the boot CPU to
> -         * overwrite the non-relocated copy of Xen.  Once it's done that,
> -         * and brought up the memory allocator, non-boot CPUs can get their
> -         * own stacks and enter C. */
> -1:      wfe
> -        dsb   sy
> -        ldr   x0, =smp_up_cpu
> -        ldr   x1, [x0]               /* Which CPU is being booted? */
> -        cmp   x1, x22                /* Is it us? */
> -        b.ne  1b
> -
>  launch:
>          ldr   x0, =init_data
>          add   x0, x0, #INITINFO_stack /* Find the boot-time stack */
> @@ -331,7 +412,7 @@ launch:
>  
>          mov   x0, x20                /* Marshal args: - phys_offset */
>          mov   x1, x21                /*               - FDT */
> -        mov   x2, x22                /*               - CPU ID */
> +        mov   x2, x24                /*               - CPU ID */
>          cbz   x22, start_xen         /* and disappear into the land of C */
>          b     start_secondary        /* (to the appropriate entry point) */
>  
> @@ -341,13 +422,80 @@ fail:   PRINT("- Boot failed -\r\n")
>  1:      wfe
>          b     1b
>  
> -#ifdef EARLY_PRINTK
> +/* Copy Xen to new location and switch TTBR
> + * x0    ttbr
> + * x1    source address
> + * x2    destination address
> + * x3    length
> + *
> + * Source and destination must be word aligned, length is rounded up
> + * to a 16 byte boundary.
> + *
> + * MUST BE VERY CAREFUL when saving things to RAM over the copy */
> +ENTRY(relocate_xen)
> +        /* Copy 16 bytes at a time using:
> +         *   x9: counter
> +         *   x10: data
> +         *   x11: data
> +         *   x12: source
> +         *   x13: destination
> +         */
> +        mov     x9, x3
> +        mov     x12, x1
> +        mov     x13, x2
>  
> +1:      ldp     x10, x11, [x12], #16
> +        stp     x10, x11, [x13], #16
> +
> +        subs    x9, x9, #16
> +        bgt     1b
> +
> +        /* Flush destination from dcache using:
> +         * x9: counter
> +         * x10: step
> +         * x11: vaddr
> +         */
> +        dsb   sy        /* So the CPU issues all writes to the range */
> +
> +        mov   x9, x3
> +        ldr   x10, =cacheline_bytes /* x10 := step */
> +        ldr   x10, [x10]
> +        mov   x11, x2
> +
> +1:      dc    cvac, x11
> +
> +        add   x11, x11, x10
> +        subs  x9, x9, x10
> +        bgt   1b
> +
> +        dsb   sy                     /* Ensure the flushes happen before
> +                                      * continuing */
> +        isb                          /* Ensure synchronization with previous
> +                                      * changes to text */
> +        tlbi   alle2                 /* Flush hypervisor TLB */
> +        ic     iallu                 /* Flush I-cache */
> +        dsb    sy                    /* Ensure completion of TLB flush */
> +        isb
> +
> +        msr    TTBR0_EL2, x0
> +
> +        isb                          /* Ensure synchronization with previous
> +                                      * changes to text */
> +        tlbi   alle2                 /* Flush hypervisor TLB */
> +        ic     iallu                 /* Flush I-cache */
> +        dsb    sy                    /* Ensure completion of TLB flush */
> +        isb
> +
> +        ret
> +
> +#ifdef EARLY_PRINTK
>  /* Bring up the UART.
>   * x23: Early UART base address
>   * Clobbers x0-x1 */
>  init_uart:
> +#ifdef EARLY_PRINTK_INIT_UART
>          early_uart_init x23, 0
> +#endif
>          adr   x0, 1f
>          b     puts
>  1:      .asciz "- UART enabled -\r\n"
> diff --git a/xen/arch/arm/arm64/mode_switch.S 
> b/xen/arch/arm/arm64/mode_switch.S
> deleted file mode 100644
> index ea64f22..0000000
> --- a/xen/arch/arm/arm64/mode_switch.S
> +++ /dev/null
> @@ -1,89 +0,0 @@
> -/*
> - * xen/arch/arm/arm64/mode_switch.S
> - *
> - * Start-of day code to take a CPU from EL3 to EL2. Largely taken from
> - *       bootwrapper.
> - *
> - * Ian Campbell <ian.campbell@xxxxxxxxxx>
> - * Copyright (c) 2012 Citrix Systems.
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License as published by
> - * the Free Software Foundation; either version 2 of the License, or
> - * (at your option) any later version.
> - *
> - * This program is distributed in the hope that it will be useful,
> - * but WITHOUT ANY WARRANTY; without even the implied warranty of
> - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> - * GNU General Public License for more details.
> - */
> -
> -#include <asm/config.h>
> -#include <asm/page.h>
> -#include <asm/asm_defns.h>
> -#include <asm/platforms/vexpress.h>
> -
> -/* Get up a CPU into EL2.  Clobbers x0-x3.
> - *
> - * Expects x22 == CPU number
> - * Expects x30  == EL2 entry point
> - *
> - * This code is specific to the VE model, and not intended to be used
> - * on production systems.  As such it's a bit hackier than the main
> - * boot code in head.S.  In future it will be replaced by better
> - * integration with the bootloader/firmware so that Xen always starts
> - * at EL2.
> - */
> -GLOBAL(enter_el2_mode)
> -        mov     x0, #0x30                       // RES1
> -        orr     x0, x0, #(1 << 0)               // Non-secure EL1
> -        orr     x0, x0, #(1 << 8)               // HVC enable
> -        orr     x0, x0, #(1 << 10)              // 64-bit EL2
> -        msr     scr_el3, x0
> -
> -        msr     cptr_el3, xzr                   // Disable copro. traps to 
> EL3
> -
> -        ldr     x0, =0x01800000                 // 24Mhz
> -        msr     cntfrq_el0, x0
> -
> -        /*
> -         * Check for the primary CPU to avoid a race on the distributor
> -         * registers.
> -         */
> -        cbnz    x22, 1f
> -
> -        ldr     x1, =(V2M_GIC_BASE_ADDRESS+GIC_DR_OFFSET) // GICD_CTLR
> -        mov     w0, #3                          // EnableGrp0 | EnableGrp1
> -        str     w0, [x1]
> -
> -1:      ldr     x1, =(V2M_GIC_BASE_ADDRESS+GIC_DR_OFFSET+0x80) // 
> GICD_IGROUPR
> -        mov     w0, #~0                         // Grp1 interrupts
> -        str     w0, [x1], #4
> -        b.ne    2f                              // Only local interrupts for 
> secondary CPUs
> -        str     w0, [x1], #4
> -        str     w0, [x1], #4
> -
> -2:      ldr     x1, =(V2M_GIC_BASE_ADDRESS+GIC_CR_OFFSET) // GICC_CTLR
> -        ldr     w0, [x1]
> -        mov     w0, #3                          // EnableGrp0 | EnableGrp1
> -        str     w0, [x1]
> -
> -        mov     w0, #1 << 7                     // allow NS access to 
> GICC_PMR
> -        str     w0, [x1, #4]                    // GICC_PMR
> -
> -        msr     sctlr_el2, xzr
> -
> -        /*
> -         * Prepare the switch to the EL2_SP1 mode from EL3
> -         */
> -        msr     elr_el3, x30                    // Return to desired function
> -        mov     x1, #0x3c9                      // EL2_SP1 | D | A | I | F
> -        msr     spsr_el3, x1
> -        eret
> -
> -/*
> - * Local variables:
> - * mode: ASM
> - * indent-tabs-mode: nil
> - * End:
> - */
> diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
> index 3d7b2f5..701bc2e 100644
> --- a/xen/arch/arm/mm.c
> +++ b/xen/arch/arm/mm.c
> @@ -43,40 +43,70 @@
>  
>  struct domain *dom_xen, *dom_io, *dom_cow;
>  
> -/* Static start-of-day pagetables that we use before the
> - * allocators are up. These go on to become the boot CPU's real pagetables.
> +/* Static start-of-day pagetables that we use before the allocators
> + * are up. These are used by all CPUs during bringup before switching
> + * to the CPUs own pagetables.
> + *
> + * These pagetables have a very simple structure. They include:
> + *  - a 2MB mapping of xen at XEN_VIRT_START, boot_first and
> + *    boot_second are used to populate the trie down to that mapping.
> + *  - a 1:1 mapping of xen at its current physical address. This uses a
> + *    section mapping at whichever of boot_{pgtable,first,second}
> + *    covers that physical address.
> + *
> + * For the boot CPU these mappings point to the address where Xen was
> + * loaded by the bootloader. For secondary CPUs they point to the
> + * relocated copy of Xen for the benefit of secondary CPUs.
> + *
> + * In addition to the above for the boot CPU the device-tree is
> + * initially mapped in the boot misc slot. This mapping is not present
> + * for secondary CPUs.
> + *
> + * Finally, if EARLY_PRINTK is enabled then xen_fixmap will be mapped
> + * by the CPU once it has moved off the 1:1 mapping.
>   */
>  lpae_t boot_pgtable[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
>  #ifdef CONFIG_ARM_64
>  lpae_t boot_first[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
> -/* The first page of the first level mapping of the xenheap. The
> - * subsequent xenheap first level pages are dynamically allocated, but
> - * we need this one to bootstrap ourselves. */
> -lpae_t xenheap_first_first[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
> -/* The zeroeth level slot which uses xenheap_first_first. Used because
> - * setup_xenheap_mappings otherwise relies on mfn_to_virt which isn't
> - * valid for a non-xenheap mapping. */
> -static __initdata int xenheap_first_first_slot = -1;
>  #endif
> +lpae_t boot_second[LPAE_ENTRIES]  __attribute__((__aligned__(4096)));
> +
> +/* Main runtime page tables */
>  
>  /*
> - * xen_pgtable and xen_dommap are per-PCPU and are allocated before
> - * bringing up each CPU. On 64-bit a first level table is also allocated.
> + * For arm32 xen_pgtable and xen_dommap are per-PCPU and are allocated before
> + * bringing up each CPU. For arm64 xen_pgtable is common to all PCPUs.
>   *
> - * xen_second, xen_fixmap and xen_xenmap are shared between all PCPUs.
> + * xen_second, xen_fixmap and xen_xenmap are always shared between all
> + * PCPUs.
>   */
>  
>  #ifdef CONFIG_ARM_64
> -#define THIS_CPU_PGTABLE boot_pgtable
> +lpae_t xen_pgtable[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
> +lpae_t xen_first[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
> +#define THIS_CPU_PGTABLE xen_pgtable
>  #else
>  /* Per-CPU pagetable pages */
>  /* xen_pgtable == root of the trie (zeroeth level on 64-bit, first on 
> 32-bit) */
>  static DEFINE_PER_CPU(lpae_t *, xen_pgtable);
>  #define THIS_CPU_PGTABLE this_cpu(xen_pgtable)
>  /* xen_dommap == pages used by map_domain_page, these pages contain
> - * the second level pagetables which mapp the domheap region
> + * the second level pagetables which map the domheap region
>   * DOMHEAP_VIRT_START...DOMHEAP_VIRT_END in 2MB chunks. */
>  static DEFINE_PER_CPU(lpae_t *, xen_dommap);
> +/* Root of the trie for cpu0 */
> +lpae_t cpu0_pgtable[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
> +#endif
> +
> +#ifdef CONFIG_ARM_64
> +/* The first page of the first level mapping of the xenheap. The
> + * subsequent xenheap first level pages are dynamically allocated, but
> + * we need this one to bootstrap ourselves. */
> +lpae_t xenheap_first_first[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
> +/* The zeroeth level slot which uses xenheap_first_first. Used because
> + * setup_xenheap_mappings otherwise relies on mfn_to_virt which isn't
> + * valid for a non-xenheap mapping. */
> +static __initdata int xenheap_first_first_slot = -1;
>  #endif
>  
>  /* Common pagetable leaves */
> @@ -104,9 +134,8 @@ lpae_t xen_fixmap[LPAE_ENTRIES] 
> __attribute__((__aligned__(4096)));
>   * as appropriate. */
>  static lpae_t xen_xenmap[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
>  
> -
>  /* Non-boot CPUs use this to find the correct pagetables. */
> -uint64_t boot_ttbr;
> +uint64_t init_ttbr;
>  
>  static paddr_t phys_offset;
>  
> @@ -131,6 +160,12 @@ static inline void 
> check_memory_layout_alignment_constraints(void) {
>      BUILD_BUG_ON(BOOT_MISC_VIRT_START & ~SECOND_MASK);
>      /* 1GB aligned regions */
>      BUILD_BUG_ON(XENHEAP_VIRT_START & ~FIRST_MASK);
> +    /* Page table structure constraints */
> +#ifdef CONFIG_ARM_64
> +    BUILD_BUG_ON(zeroeth_table_offset(XEN_VIRT_START));
> +#endif
> +    BUILD_BUG_ON(first_table_offset(XEN_VIRT_START));
> +    BUILD_BUG_ON(second_linear_offset(XEN_VIRT_START) >= LPAE_ENTRIES);
>  #ifdef CONFIG_DOMAIN_PAGE
>      BUILD_BUG_ON(DOMHEAP_VIRT_START & ~FIRST_MASK);
>  #endif
> @@ -361,16 +396,6 @@ void __cpuinit setup_virt_paging(void)
>      WRITE_SYSREG32(0x80002558, VTCR_EL2); isb();
>  }
>  
> -/* This needs to be a macro to stop the compiler spilling to the stack
> - * which will change when we change pagetables */
> -#define WRITE_TTBR(ttbr)                                                \
> -    flush_xen_text_tlb();                                               \
> -    WRITE_SYSREG64(ttbr, TTBR0_EL2);                                    \
> -    dsb(); /* ensure memory accesses do not cross over the TTBR0 write */ \
> -    /* flush_xen_text_tlb contains an initial isb which ensures the     \
> -     * write to TTBR0 has completed. */                                 \
> -    flush_xen_text_tlb()
> -
>  static inline lpae_t pte_of_xenaddr(vaddr_t va)
>  {
>      paddr_t ma = va + phys_offset;
> @@ -378,69 +403,73 @@ static inline lpae_t pte_of_xenaddr(vaddr_t va)
>      return mfn_to_xen_entry(mfn);
>  }
>  
> +extern void relocate_xen(uint64_t ttbr, void *src, void *dst, size_t len);
> +
>  /* Boot-time pagetable setup.
>   * Changes here may need matching changes in head.S */
>  void __init setup_pagetables(unsigned long boot_phys_offset, paddr_t 
> xen_paddr)
>  {
> +    uint64_t ttbr;
>      unsigned long dest_va;
>      lpae_t pte, *p;
>      int i;
>  
> -    /* Map the destination in the boot misc area. */
> -    dest_va = BOOT_MISC_VIRT_START;
> -    pte = mfn_to_xen_entry(xen_paddr >> PAGE_SHIFT);
> -    write_pte(xen_second + second_table_offset(dest_va), pte);
> -    flush_xen_data_tlb_range_va(dest_va, SECOND_SIZE);
> -
>      /* Calculate virt-to-phys offset for the new location */
>      phys_offset = xen_paddr - (unsigned long) _start;
>  
> -    /* Copy */
> -    memcpy((void *) dest_va, _start, _end - _start);
> -
> -    /* Beware!  Any state we modify between now and the PT switch may be
> -     * discarded when we switch over to the copy. */
> -
> -    /* Update the copy of boot_pgtable to use the new paddrs */
> -    p = (void *) boot_pgtable + dest_va - (unsigned long) _start;
>  #ifdef CONFIG_ARM_64
> -    p[0].pt.base += (phys_offset - boot_phys_offset) >> PAGE_SHIFT;
> -    p = (void *) boot_first + dest_va - (unsigned long) _start;
> +    p = (void *) xen_pgtable;
> +    p[0] = pte_of_xenaddr((uintptr_t)xen_first);
> +    p[0].pt.table = 1;
> +    p[0].pt.xn = 0;
> +    p = (void *) xen_first;
> +#else
> +    p = (void *) cpu0_pgtable;
>  #endif
> -    for ( i = 0; i < 4; i++)
> -        p[i].pt.base += (phys_offset - boot_phys_offset) >> PAGE_SHIFT;
>  
> -    p = (void *) xen_second + dest_va - (unsigned long) _start;
> -    if ( boot_phys_offset != 0 )
> +    /* Initialise first level entries, to point to second level entries */
> +    for ( i = 0; i < 4; i++)
>      {
> -        /* Remove the old identity mapping of the boot paddr */
> -        vaddr_t va = (vaddr_t)_start + boot_phys_offset;
> -        p[second_linear_offset(va)].bits = 0;
> +        p[i] = pte_of_xenaddr((uintptr_t)(xen_second+i*LPAE_ENTRIES));
> +        p[i].pt.table = 1;
> +        p[i].pt.xn = 0;
>      }
> -    for ( i = 0; i < 4 * LPAE_ENTRIES; i++)
> -        if ( p[i].pt.valid )
> -            p[i].pt.base += (phys_offset - boot_phys_offset) >> PAGE_SHIFT;
> -
> -    /* Change pagetables to the copy in the relocated Xen */
> -    boot_ttbr = (uintptr_t) boot_pgtable + phys_offset;
> -    flush_xen_dcache(boot_ttbr);
> -    flush_xen_dcache_va_range((void*)dest_va, _end - _start);
>  
> -    WRITE_TTBR(boot_ttbr);
> +    /* Initialise xen second level entries ... */
> +    /* ... Xen's text etc */
>  
> -    /* Undo the temporary map */
> -    pte.bits = 0;
> -    write_pte(xen_second + second_table_offset(dest_va), pte);
> -    flush_xen_text_tlb();
> +    pte = mfn_to_xen_entry(xen_paddr>>PAGE_SHIFT);
> +    pte.pt.xn = 0;/* Contains our text mapping! */
> +    xen_second[second_table_offset(XEN_VIRT_START)] = pte;
>  
> -    /* Link in the fixmap pagetable */
> +    /* ... Fixmap */
>      pte = pte_of_xenaddr((vaddr_t)xen_fixmap);
>      pte.pt.table = 1;
> -    write_pte(xen_second + second_table_offset(FIXMAP_ADDR(0)), pte);
> -    /*
> -     * No flush required here. Individual flushes are done in
> -     * set_fixmap as entries are used.
> -     */
> +    xen_second[second_table_offset(FIXMAP_ADDR(0))] = pte;
> +
> +    /* Map the destination in the boot misc area. */
> +    dest_va = BOOT_MISC_VIRT_START;
> +    pte = mfn_to_xen_entry(xen_paddr >> PAGE_SHIFT);
> +    write_pte(boot_second + second_table_offset(dest_va), pte);
> +    flush_xen_data_tlb_range_va(dest_va, SECOND_SIZE);
> +#ifdef CONFIG_ARM_64
> +    ttbr = (uintptr_t) xen_pgtable + phys_offset;
> +#else
> +    ttbr = (uintptr_t) cpu0_pgtable + phys_offset;
> +#endif
> +
> +    relocate_xen(ttbr, _start, (void*)dest_va, _end - _start);
> +
> +    /* Clear the copy of the boot pagetables. Each secondary CPU
> +     * rebuilds these itself (see head.S) */
> +    memset(boot_pgtable, 0x0, PAGE_SIZE);
> +    flush_xen_dcache(boot_pgtable);
> +#ifdef CONFIG_ARM_64
> +    memset(boot_pgtable, 0x0, PAGE_SIZE);
> +    flush_xen_dcache(boot_first);
> +#endif
> +    memset(boot_second, 0x0, PAGE_SIZE);
> +    flush_xen_dcache(boot_second);
>  
>      /* Break up the Xen mapping into 4k pages and protect them separately. */
>      for ( i = 0; i < LPAE_ENTRIES; i++ )
> @@ -461,6 +490,7 @@ void __init setup_pagetables(unsigned long 
> boot_phys_offset, paddr_t xen_paddr)
>          write_pte(xen_xenmap + i, pte);
>          /* No flush required here as page table is not hooked in yet. */
>      }
> +
>      pte = pte_of_xenaddr((vaddr_t)xen_xenmap);
>      pte.pt.table = 1;
>      write_pte(xen_second + second_linear_offset(XEN_VIRT_START), pte);
> @@ -472,7 +502,7 @@ void __init setup_pagetables(unsigned long 
> boot_phys_offset, paddr_t xen_paddr)
>      flush_xen_text_tlb();
>  
>  #ifdef CONFIG_ARM_32
> -    per_cpu(xen_pgtable, 0) = boot_pgtable;
> +    per_cpu(xen_pgtable, 0) = cpu0_pgtable;
>      per_cpu(xen_dommap, 0) = xen_second +
>          second_linear_offset(DOMHEAP_VIRT_START);
>  
> @@ -483,10 +513,14 @@ void __init setup_pagetables(unsigned long 
> boot_phys_offset, paddr_t xen_paddr)
>                                DOMHEAP_SECOND_PAGES*PAGE_SIZE);
>  #endif
>  }
> +
>  #ifdef CONFIG_ARM_64
>  int init_secondary_pagetables(int cpu)
>  {
> -    /* All CPUs share a single page table on 64 bit */
> +    /* Set init_ttbr for this CPU coming up. All CPus share a single setof
> +     * pagetables, but rewrite it each time for consistency with 32 bit. */
> +    init_ttbr = (uintptr_t) xen_pgtable + phys_offset;
> +    flush_xen_dcache(init_ttbr);
>      return 0;
>  }
>  #else
> @@ -507,7 +541,7 @@ int init_secondary_pagetables(int cpu)
>      }
>  
>      /* Initialise root pagetable from root of boot tables */
> -    memcpy(first, boot_pgtable, PAGE_SIZE);
> +    memcpy(first, cpu0_pgtable, PAGE_SIZE);
>  
>      /* Ensure the domheap has no stray mappings */
>      memset(domheap, 0, DOMHEAP_SECOND_PAGES*PAGE_SIZE);
> @@ -527,6 +561,10 @@ int init_secondary_pagetables(int cpu)
>      per_cpu(xen_pgtable, cpu) = first;
>      per_cpu(xen_dommap, cpu) = domheap;
>  
> +    /* Set init_ttbr for this CPU coming up */
> +    init_ttbr = (uintptr_t) THIS_CPU_PGTABLE + phys_offset;
> +    flush_xen_dcache(init_ttbr);
> +
>      return 0;
>  }
>  #endif
> @@ -534,12 +572,6 @@ int init_secondary_pagetables(int cpu)
>  /* MMU setup for secondary CPUS (which already have paging enabled) */
>  void __cpuinit mmu_init_secondary_cpu(void)
>  {
> -    uint64_t ttbr;
> -
> -    /* Change to this CPU's pagetables */
> -    ttbr = (uintptr_t)virt_to_maddr(THIS_CPU_PGTABLE);
> -    WRITE_TTBR(ttbr);
> -
>      /* From now on, no mapping may be both writable and executable. */
>      WRITE_SYSREG32(READ_SYSREG32(SCTLR_EL2) | SCTLR_WXN, SCTLR_EL2);
>      flush_xen_text_tlb();
> @@ -612,7 +644,7 @@ void __init setup_xenheap_mappings(unsigned long base_mfn,
>      while ( base_mfn < end_mfn )
>      {
>          int slot = zeroeth_table_offset(vaddr);
> -        lpae_t *p = &boot_pgtable[slot];
> +        lpae_t *p = &xen_pgtable[slot];
>  
>          if ( p->pt.valid )
>          {
> @@ -679,7 +711,7 @@ void __init setup_frametable_mappings(paddr_t ps, paddr_t 
> pe)
>      {
>          pte = mfn_to_xen_entry(second_base + i);
>          pte.pt.table = 1;
> -        write_pte(&boot_first[first_table_offset(FRAMETABLE_VIRT_START)+i], 
> pte);
> +        write_pte(&xen_first[first_table_offset(FRAMETABLE_VIRT_START)+i], 
> pte);
>      }
>      create_32mb_mappings(second, 0, base_mfn, frametable_size >> PAGE_SHIFT);
>  #else
> diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
> index 07338e1..a943dc8 100644
> --- a/xen/arch/arm/setup.c
> +++ b/xen/arch/arm/setup.c
> @@ -518,7 +518,7 @@ void __init setup_cache(void)
>   * MPIDR values related to logical cpus
>   * Code base on Linux arch/arm/kernel/devtree.c
>   */
> -static void __init init_cpus_maps(void)
> +static void __init smp_init_cpus(void)
>  {
>      register_t mpidr;
>      struct dt_device_node *cpus = dt_find_node_by_path("/cpus");
> @@ -530,6 +530,14 @@ static void __init init_cpus_maps(void)
>          [0 ... NR_CPUS - 1] = MPIDR_INVALID
>      };
>      bool_t bootcpu_valid = 0;
> +    int rc;
> +
> +    if ( (rc = arch_smp_init()) < 0 )
> +    {
> +        printk(XENLOG_WARNING "SMP init failed (%d)\n"
> +               "Using only 1 CPU\n", rc);
> +        return;
> +    }
>  
>      mpidr = boot_cpu_data.mpidr.bits & MPIDR_HWID_MASK;
>  
> @@ -581,6 +589,12 @@ static void __init init_cpus_maps(void)
>              }
>          }
>  
> +        if ( (rc = arch_cpu_init(hwid, cpu)) < 0 )
> +        {
> +            printk("cpu init failed (hwid %x): %d\n", hwid, rc);
> +            continue;
> +        }
> +
>          /*
>           * Build a stashed array of MPIDR values. Numbering scheme requires
>           * that if detected the boot CPU must be assigned logical id 0. Other
> @@ -599,7 +613,8 @@ static void __init init_cpus_maps(void)
>  
>          if ( cpuidx > NR_CPUS )
>          {
> -            printk(XENLOG_WARNING "DT /cpu %u node greater than max cores 
> %u, capping them\n",
> +            printk(XENLOG_WARNING
> +                   "DT /cpu %u node greater than max cores %u, capping 
> them\n",
>                     cpuidx, NR_CPUS);
>              cpuidx = NR_CPUS;
>              break;
> @@ -657,15 +672,14 @@ void __init start_xen(unsigned long boot_phys_offset,
>  
>      processor_id();
>  
> -    init_cpus_maps();
> -    cpus = smp_get_max_cpus();
> -
>      platform_init();
>  
> +    smp_init_cpus();
> +    cpus = smp_get_max_cpus();
> +
>      init_xen_time();
>  
>      gic_init();
> -    make_cpus_ready(cpus, boot_phys_offset);
>  
>      set_current((struct vcpu *)0xfffff000); /* debug sanity */
>      idle_vcpu[0] = current;
> diff --git a/xen/arch/arm/smpboot.c b/xen/arch/arm/smpboot.c
> index 234748e..7b4ad8a 100644
> --- a/xen/arch/arm/smpboot.c
> +++ b/xen/arch/arm/smpboot.c
> @@ -56,12 +56,10 @@ struct init_info __initdata init_data =
>  };
>  
>  /* Shared state for coordinating CPU bringup */
> -unsigned long smp_up_cpu = 0;
> +unsigned long smp_up_cpu = ~0UL;
> +/* Shared state for coordinating CPU teardown */
>  static bool_t cpu_is_dead = 0;
>  
> -/* Number of non-boot CPUs ready to enter C */
> -unsigned long __initdata ready_cpus = 0;
> -
>  /* ID of the PCPU we're running on */
>  DEFINE_PER_CPU(unsigned int, cpu_id);
>  /* XXX these seem awfully x86ish... */
> @@ -103,7 +101,6 @@ smp_get_max_cpus (void)
>      return max_cpus;
>  }
>  
> -
>  void __init
>  smp_prepare_cpus (unsigned int max_cpus)
>  {
> @@ -112,32 +109,6 @@ smp_prepare_cpus (unsigned int max_cpus)
>      setup_cpu_sibling_map(0);
>  }
>  
> -void __init
> -make_cpus_ready(unsigned int max_cpus, unsigned long boot_phys_offset)
> -{
> -    unsigned long *gate;
> -    paddr_t gate_pa;
> -    int i;
> -
> -    printk("Waiting for %i other CPUs to be ready\n", max_cpus - 1);
> -    /* We use the unrelocated copy of smp_up_cpu as that's the one the
> -     * others can see. */ 
> -    gate_pa = ((paddr_t) (unsigned long) &smp_up_cpu) + boot_phys_offset;
> -    gate = map_domain_page(gate_pa >> PAGE_SHIFT) + (gate_pa & ~PAGE_MASK); 
> -    for ( i = 1; i < max_cpus; i++ )
> -    {
> -        /* Tell the next CPU to get ready */
> -        *gate = cpu_logical_map(i);
> -        flush_xen_dcache(*gate);
> -        isb();
> -        sev();
> -        /* And wait for it to respond */
> -        while ( ready_cpus < i )
> -            smp_rmb();
> -    }
> -    unmap_domain_page(gate);
> -}
> -
>  /* Boot the current CPU */
>  void __cpuinit start_secondary(unsigned long boot_phys_offset,
>                                 unsigned long fdt_paddr,
> @@ -176,6 +147,7 @@ void __cpuinit start_secondary(unsigned long 
> boot_phys_offset,
>      wmb();
>  
>      /* Now report this CPU is up */
> +    smp_up_cpu = ~0UL;
>      cpumask_set_cpu(cpuid, &cpu_online_map);
>      wmb();
>  
> @@ -226,6 +198,8 @@ int __cpu_up(unsigned int cpu)
>  {
>      int rc;
>  
> +    printk("Bringing up CPU%d\n", cpu);
> +
>      rc = init_secondary_pagetables(cpu);
>      if ( rc < 0 )
>          return rc;
> @@ -236,14 +210,22 @@ int __cpu_up(unsigned int cpu)
>      /* Tell the remote CPU what is it's logical CPU ID */
>      init_data.cpuid = cpu;
>  
> -    /* Unblock the CPU.  It should be waiting in the loop in head.S
> -     * for an event to arrive when smp_up_cpu matches its cpuid. */
> +    /* Open the gate for this CPU */
>      smp_up_cpu = cpu_logical_map(cpu);
> -    /* we need to make sure that the change to smp_up_cpu is visible to
> -     * secondary cpus with D-cache off */
>      flush_xen_dcache(smp_up_cpu);
> -    isb();
> -    sev();
> +
> +    rc = arch_cpu_up(cpu);
> +
> +    if ( rc < 0 )
> +    {
> +        printk("Failed to bring up CPU%d\n", cpu);
> +        return rc;
> +    }
> +
> +    /* We don't know the GIC ID of the CPU until it has woken up, so just 
> signal
> +     * everyone and rely on our own smp_up_cpu gate to ensure only the one we
> +     * want gets through. */
> +    send_SGI_allbutself(GIC_SGI_EVENT_CHECK);
>  
>      while ( !cpu_online(cpu) )
>      {
> @@ -272,7 +254,6 @@ void __cpu_die(unsigned int cpu)
>      mb();
>  }
>  
> -
>  /*
>   * Local variables:
>   * mode: C
> diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h
> index 173db1b..188b385 100644
> --- a/xen/include/asm-arm/mm.h
> +++ b/xen/include/asm-arm/mm.h
> @@ -147,7 +147,8 @@ extern unsigned long total_pages;
>  
>  /* Boot-time pagetable setup */
>  extern void setup_pagetables(unsigned long boot_phys_offset, paddr_t 
> xen_paddr);
> -/* Allocate and initialise pagetables for a secondary CPU */
> +/* Allocate and initialise pagetables for a secondary CPU. Sets init_ttbr to 
> the
> + * new page table */
>  extern int __cpuinit init_secondary_pagetables(int cpu);
>  /* Switch secondary CPUS to its own pagetables and finalise MMU setup */
>  extern void __cpuinit mmu_init_secondary_cpu(void);
> diff --git a/xen/include/asm-arm/platforms/exynos5.h 
> b/xen/include/asm-arm/platforms/exynos5.h
> index ee5bdfa..af30608 100644
> --- a/xen/include/asm-arm/platforms/exynos5.h
> +++ b/xen/include/asm-arm/platforms/exynos5.h
> @@ -14,20 +14,6 @@
>  
>  #define S5P_PA_SYSRAM   0x02020000
>  
> -/* Constants below is only used in assembly because the DTS is not yet 
> parsed */
> -#ifdef __ASSEMBLY__
> -
> -/* GIC Base Address */
> -#define EXYNOS5_GIC_BASE_ADDRESS    0x10480000
> -
> -/* Timer's frequency */
> -#define EXYNOS5_TIMER_FREQUENCY     (24 * 1000 * 1000) /* 24 MHz */
> -
> -/* Arndale machine ID */
> -#define MACH_TYPE_SMDK5250          3774
> -
> -#endif /* __ASSEMBLY__ */
> -
>  #endif /* __ASM_ARM_PLATFORMS_EXYNOS5_H */
>  /*
>   * Local variables:
> diff --git a/xen/include/asm-arm/platforms/vexpress.h 
> b/xen/include/asm-arm/platforms/vexpress.h
> index 982a293..5cf3aba 100644
> --- a/xen/include/asm-arm/platforms/vexpress.h
> +++ b/xen/include/asm-arm/platforms/vexpress.h
> @@ -32,17 +32,6 @@
>  int vexpress_syscfg(int write, int function, int device, uint32_t *data);
>  #endif
>  
> -/* Constants below is only used in assembly because the DTS is not yet 
> parsed */
> -#ifdef __ASSEMBLY__
> -
> -/* GIC base address */
> -#define V2M_GIC_BASE_ADDRESS        0x2c000000
> -
> -/* Timer's frequency */
> -#define V2M_TIMER_FREQUENCY         0x5f5e100 /* 100 Mhz */
> -
> -#endif /* __ASSEMBLY__ */
> -
>  #endif /* __ASM_ARM_PLATFORMS_VEXPRESS_H */
>  /*
>   * Local variables:
> diff --git a/xen/include/asm-arm/smp.h b/xen/include/asm-arm/smp.h
> index 1added5..83add6c 100644
> --- a/xen/include/asm-arm/smp.h
> +++ b/xen/include/asm-arm/smp.h
> @@ -17,12 +17,6 @@ DECLARE_PER_CPU(cpumask_var_t, cpu_core_mask);
>  
>  extern void stop_cpu(void);
>  
> -/* Bring the non-boot CPUs up to paging and ready to enter C.  
> - * Must be called after Xen is relocated but before the original copy of
> - * .text gets overwritten. */
> -extern void
> -make_cpus_ready(unsigned int max_cpus, unsigned long boot_phys_offset);
> -
>  extern int arch_smp_init(void);
>  extern int arch_cpu_init(int cpu, struct dt_device_node *dn);
>  extern int arch_cpu_up(int cpu);
> 


-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.