[IA64] fix early access to per cpu area. The following changeset broke booting xen-ia64 on some kinds of ia64 boxes. http://xenbits.xensource.com/ext/ia64/xen-unstable.hg/rev/3fd8f9b34941 The tasklet_schedule call raise_softirq(). Because raise_softirq() use per_cpu, if we access per_cpu before cpu_init() the behavior would be unexpected. There was a similar issueson Linux/ia64. The following change sets resolved it. 10617bbe84628eb18ab5f723d3ba35005adde143 c459ce8b5a7d933a3bcf6915ab17ac1e036e2ac4 This patch fixes the issue following the linux/ia64 solution. Allocate per cpu area for cpu0 in .data section and initialize it early. reported-by: Akio Takebe Signed-off-by: Isaku Yamahata diff --git a/xen/arch/ia64/linux-xen/head.S b/xen/arch/ia64/linux-xen/head.S --- a/xen/arch/ia64/linux-xen/head.S +++ b/xen/arch/ia64/linux-xen/head.S @@ -382,6 +382,35 @@ 1: // now we are in virtual mode mov ar.rsc=0 // place RSE in enforced lazy mode ;; loadrs // clear the dirty partition +#ifdef XEN +(isAP) br.few 2f + movl r19=__phys_per_cpu_start + mov r18=PERCPU_PAGE_SIZE +#ifndef CONFIG_SMP + add r19=r19,r18 + ;; +#else + movl r20=__cpu0_per_cpu + ;; + shr.u r18=r18,3 +1: + ld8 r21=[r19],8 ;; + st8[r20]=r21,8 + adds r18=-1,r18 + ;; + cmp4.lt p7,p6=0,r18 +(p7) br.cond.dptk.few 1b + ;; +#endif + movl r18=__per_cpu_offset + movl r19=__cpu0_per_cpu + movl r20=__per_cpu_start + ;; + sub r20=r19,r20 + ;; + st8 [r18]=r20 +2: +#endif ;; mov ar.bspstore=r2 // establish the new RSE stack ;; diff --git a/xen/arch/ia64/linux-xen/mm_contig.c b/xen/arch/ia64/linux-xen/mm_contig.c --- a/xen/arch/ia64/linux-xen/mm_contig.c +++ b/xen/arch/ia64/linux-xen/mm_contig.c @@ -183,7 +183,7 @@ void *percpu_area __initdata = NULL; void* __init per_cpu_allocate(void *xen_heap_start, unsigned long end_in_pa) { - int order = get_order(NR_CPUS * PERCPU_PAGE_SIZE); + int order = get_order((NR_CPUS - 1) * PERCPU_PAGE_SIZE); unsigned long size = 1UL << (order + PAGE_SHIFT); unsigned long start = ALIGN_UP((unsigned long)xen_heap_start, PERCPU_PAGE_SIZE); @@ -226,19 +226,31 @@ per_cpu_init (void) */ if (smp_processor_id() == 0) { #ifdef XEN + void *cpu0_data = __cpu0_per_cpu; + + __per_cpu_offset[0] = (char *)cpu0_data - __per_cpu_start; + per_cpu(local_per_cpu_offset, 0) = __per_cpu_offset[0]; + cpu_data = get_per_cpu_area(); if (cpu_data == NULL) panic("can't allocate per cpu area.\n"); + + for (cpu = 1; cpu < NR_CPUS; cpu++) { + memcpy(cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start); + __per_cpu_offset[cpu] = (char *) cpu_data - __per_cpu_start; + cpu_data += PERCPU_PAGE_SIZE; + per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; + } #else cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS, PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); -#endif for (cpu = 0; cpu < NR_CPUS; cpu++) { memcpy(cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start); __per_cpu_offset[cpu] = (char *) cpu_data - __per_cpu_start; cpu_data += PERCPU_PAGE_SIZE; per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; } +#endif } return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; } diff --git a/xen/arch/ia64/linux-xen/smpboot.c b/xen/arch/ia64/linux-xen/smpboot.c --- a/xen/arch/ia64/linux-xen/smpboot.c +++ b/xen/arch/ia64/linux-xen/smpboot.c @@ -449,8 +449,8 @@ start_secondary (void *unused) { /* Early console may use I/O ports */ ia64_set_kr(IA64_KR_IO_BASE, __pa(ia64_iobase)); +#ifndef XEN Dprintk("start_secondary: starting CPU 0x%x\n", hard_smp_processor_id()); -#ifndef XEN efi_map_pal_code(); #endif cpu_init(); diff --git a/xen/arch/ia64/xen/xen.lds.S b/xen/arch/ia64/xen/xen.lds.S --- a/xen/arch/ia64/xen/xen.lds.S +++ b/xen/arch/ia64/xen/xen.lds.S @@ -195,7 +195,17 @@ SECTIONS data : { } :data .data : AT(ADDR(.data) - LOAD_OFFSET) - { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS } + { +#ifdef CONFIG_SMP + . = ALIGN(PERCPU_PAGE_SIZE); + __cpu0_per_cpu = .; + . = . + PERCPU_PAGE_SIZE; /* cpu0 per-cpu space */ +#endif + *(.data) + *(.data1) + *(.gnu.linkonce.d*) + CONSTRUCTORS + } . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */ .got : AT(ADDR(.got) - LOAD_OFFSET) diff --git a/xen/include/asm-ia64/linux-xen/asm/README.origin b/xen/include/asm-ia64/linux-xen/asm/README.origin --- a/xen/include/asm-ia64/linux-xen/asm/README.origin +++ b/xen/include/asm-ia64/linux-xen/asm/README.origin @@ -22,6 +22,7 @@ pgtable.h -> linux/include/asm-ia64/pgt processor.h -> linux/include/asm-ia64/processor.h ptrace.h -> linux/include/asm-ia64/ptrace.h sal.h -> linux/include/asm-ia64/sal.h +sections.h -> linux/include/asm-ia64/sections.h smp.h -> linux/include/asm-ia64/smp.h spinlock.h -> linux/include/asm-ia64/spinlock.h system.h -> linux/include/asm-ia64/system.h diff --git a/xen/include/asm-ia64/linux/asm/sections.h b/xen/include/asm-ia64/linux-xen/asm/sections.h rename from xen/include/asm-ia64/linux/asm/sections.h rename to xen/include/asm-ia64/linux-xen/asm/sections.h --- a/xen/include/asm-ia64/linux/asm/sections.h +++ b/xen/include/asm-ia64/linux-xen/asm/sections.h @@ -9,6 +9,9 @@ #include extern char __per_cpu_start[], __per_cpu_end[], __phys_per_cpu_start[]; +#ifdef CONFIG_SMP +extern char __cpu0_per_cpu[]; +#endif extern char __start___vtop_patchlist[], __end___vtop_patchlist[]; extern char __start___mckinley_e9_bundles[], __end___mckinley_e9_bundles[]; extern char __start_gate_section[]; diff --git a/xen/include/asm-ia64/linux/asm/README.origin b/xen/include/asm-ia64/linux/asm/README.origin --- a/xen/include/asm-ia64/linux/asm/README.origin +++ b/xen/include/asm-ia64/linux/asm/README.origin @@ -29,7 +29,6 @@ param.h -> linux/include/asm-ia64/para patch.h -> linux/include/asm-ia64/patch.h pci.h -> linux/include/asm-ia64/pci.h rse.h -> linux/include/asm-ia64/rse.h -sections.h -> linux/include/asm-ia64/sections.h setup.h -> linux/include/asm-ia64/setup.h string.h -> linux/include/asm-ia64/string.h thread_info.h -> linux/include/asm-ia64/thread_info.h