# HG changeset patch # User yamahata@xxxxxxxxxxxxx # Date 1185941538 -32400 # Node ID 9d4bcd10abcb51136dbea5a4c2d3606a830de1bb # Parent 6e0d898e9dad897b29db1c08be2b8b64c929d779 Introduce xen specific gate page. With this transparent paravirtualization over head is eliminated. PATCHNAME: xen_specific_gate_page Signed-off-by: Isaku Yamahata diff -r 6e0d898e9dad -r 9d4bcd10abcb arch/ia64/Kconfig --- a/arch/ia64/Kconfig Mon Jul 30 11:46:14 2007 -0600 +++ b/arch/ia64/Kconfig Wed Aug 01 13:12:18 2007 +0900 @@ -64,13 +64,6 @@ config XEN help Enable Xen hypervisor support. Resulting kernel runs both as a guest OS on Xen and natively on hardware. - -config XEN_IA64_VDSO_PARAVIRT - bool "Xen/IA64 vdso area paravirtualization" - depends on XEN && !ITANIUM - default y - help - vDSO paravirtualization config XEN_IA64_EXPOSE_P2M bool "Xen/IA64 exposure p2m table" diff -r 6e0d898e9dad -r 9d4bcd10abcb arch/ia64/kernel/Makefile --- a/arch/ia64/kernel/Makefile Mon Jul 30 11:46:14 2007 -0600 +++ b/arch/ia64/kernel/Makefile Wed Aug 01 13:12:18 2007 +0900 @@ -61,3 +61,58 @@ GATECFLAGS_gate-syms.o = -r # We must build gate.so before we can assemble it. # Note: kbuild does not track this dependency due to usage of .incbin $(obj)/gate-data.o: $(obj)/gate.so + + +# +# gate page paravirtualization for xen +# +obj-$(CONFIG_XEN) += xengate-data.o + +# The gate DSO image is built using a special linker script. +targets += xengate.so xengate-syms.o + +extra-$(CONFIG_XEN) += xengate.so xengate.lds xengate.o + +AFLAGS_xengate.o += -D__XEN_IA64_VDSO_PARAVIRT +$(obj)/xengate.o: $(src)/gate.S FORCE + $(call if_changed_dep,as_o_S) + +CPPFLAGS_xengate.lds := -P -C -U$(ARCH) -D__XEN_IA64_VDSO_PARAVIRT +$(obj)/xengate.lds: $(src)/gate.lds.S + $(call if_changed_dep,cpp_lds_S) + +GATECFLAGS_xengate.so = -shared -s -Wl,-soname=linux-gate.so.1 \ + $(call ld-option, -Wl$(comma)--hash-style=sysv) +$(obj)/xengate.so: $(obj)/xengate.lds $(obj)/xengate.o FORCE + $(call if_changed,gate) + +ifeq ($(CONFIG_XEN), y) +$(obj)/built-in.o: $(obj)/xengate-syms.o +$(obj)/built-in.o: ld_flags += -R $(obj)/xengate-syms.o +$(obj)/mca_recovery.o: $(obj)/gate-syms.o $(obj)/xengate-syms.o +endif + +GATECFLAGS_xengate-syms.o = -r +$(obj)/xengate-syms.o: $(obj)/xengate.lds $(obj)/xengate.o FORCE + $(call if_changed,gate) +$(obj)/xengate-data.o: $(obj)/xengate.so + + +# +# .tmp_gate.o to calculate padding size for __kernel_syscall_via_epc +# +extra-y += gate-skip.s .tmp_gate.o + +ifeq ($(CONFIG_XEN), y) +AFLAGS_gate.o += -D__KERNEL_SYSCALL_VIA_EPC_PADDING +$(obj)/gate.o: $(obj)/gate-skip.s FORCE +endif +$(obj)/.tmp_gate.o: $(src)/gate.S FORCE + $(call if_changed_dep,as_o_S) +quiet_cmd_gate_size = GATE_SIZE $@ + cmd_gate_size = $(NM) --extern-only --print-size $(obj)/xengate.o | \ + $(AWK) '/__kernel_syscall_via_epc/{printf "\t.skip 0x"$$2" - "}' > $@; \ + $(NM) --extern-only --print-size $(obj)/.tmp_gate.o | \ + $(AWK) '/__kernel_syscall_via_epc/{printf "0x"$$2"\n"}' >> $@ +$(obj)/gate-skip.s: $(obj)/xengate.o $(obj)/.tmp_gate.o FORCE + $(call if_changed,gate_size) diff -r 6e0d898e9dad -r 9d4bcd10abcb arch/ia64/kernel/gate.S --- a/arch/ia64/kernel/gate.S Mon Jul 30 11:46:14 2007 -0600 +++ b/arch/ia64/kernel/gate.S Wed Aug 01 13:12:18 2007 +0900 @@ -31,17 +31,6 @@ #define BRL_COND_FSYS_BUBBLE_DOWN(pr) \ [1:](pr)brl.cond.sptk 0; \ .xdata4 ".data.patch.brl_fsys_bubble_down", 1b-. - -#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT - // Currently is_running_on_xen() is defined as running_on_xen. - // If is_running_on_xen() is a real function, we must update - // according to it. - .section ".data.patch.running_on_xen", "a" - .previous -#define LOAD_RUNNING_ON_XEN(reg) \ -[1:] movl reg=0; \ - .xdata4 ".data.patch.running_on_xen", 1b-. -#endif /* CONFIG_XEN_IA64_VDSO_PARAVIRT */ GLOBAL_ENTRY(__kernel_syscall_via_break) .prologue @@ -86,37 +75,24 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc) mov r10=0 // A default to successful syscall execution epc // B causes split-issue } -#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT + ;; +#ifdef __XEN_IA64_VDSO_PARAVIRT // r20 = 1 // r22 = &vcpu->vcpu_info->evtchn_upcall_mask // r24 = &vcpu->vcpu_info->evtchn_upcall_pending // r25 = tmp - // r28 = &running_on_xen - // r30 = running_on_xen // r31 = tmp // p11 = tmp - // p12 = running_on_xen - // p13 = !running_on_xen // p14 = tmp - // p15 = tmp -#define isXen p12 -#define isRaw p13 - LOAD_RUNNING_ON_XEN(r28) + mov r20=1 movl r22=XSI_PSR_I_ADDR - mov r20=1 - ;; - ld4 r30=[r28] - ;; - cmp.ne isXen,isRaw=r0,r30 - ;; -(isXen) ld8 r22=[r22] - ;; -(isRaw) rsm psr.be | psr.i -(isXen) adds r24=-1,r22 -(isXen) st1 [r22]=r20 -(isXen) rum psr.be -#else - ;; + ;; + ld8 r22=[r22] + ;; + st1 [r22]=r20 + rum psr.be + adds r24=-1,r22 +#else rsm psr.be | psr.i // M2 (5 cyc to srlz.d) #endif LOAD_FSYSCALL_TABLE(r14) // X @@ -124,14 +100,14 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc) mov r16=IA64_KR(CURRENT) // M2 (12 cyc) shladd r18=r17,3,r14 // A mov r19=NR_syscalls-1 // A +#ifdef __XEN_IA64_VDSO_PARAVIRT + XEN_HYPER_GET_PSR ;; lfetch [r18] // M0|1 -#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT -(isRaw) mov r29=psr -(isXen) XEN_HYPER_GET_PSR - ;; -(isXen) mov r29=r8 -#else + mov r29=r8 +#else + ;; + lfetch [r18] // M0|1 mov r29=psr // M2 (12 cyc) #endif // If r17 is a NaT, p6 will be zero @@ -147,7 +123,7 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc) ;; nop.m 0 (p6) tbit.z.unc p8,p0=r18,0 // I0 (dual-issues with "mov b7=r18"!) -#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT +#ifdef __XEN_IA64_VDSO_PARAVIRT #define XEN_SET_PSR_I(pred) \ (pred) ld1 r31=[r22]; \ @@ -162,12 +138,7 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc) (p11) XEN_HYPER_SSM_I; ;; - // p14 = running_on_xen && p8 - // p15 = !running_on_xen && p8 -(p8) cmp.ne.unc p14,p15=r0,r30 - ;; -(p15) ssm psr.i - XEN_SET_PSR_I(p14) + XEN_SET_PSR_I(p8) #else nop.i 0 ;; @@ -193,9 +164,8 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc) #else BRL_COND_FSYS_BUBBLE_DOWN(p6) #endif -#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT -(isRaw) ssm psr.i - XEN_SET_PSR_I(isXen) +#ifdef __XEN_IA64_VDSO_PARAVIRT + XEN_SET_PSR_I(p0) #else ssm psr.i #endif @@ -203,6 +173,14 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc) (p10) mov r8=EINVAL (p9) mov r8=ENOSYS FSYS_RETURN +#ifdef __KERNEL_SYSCALL_VIA_EPC_PADDING + /* + * All values/sizes of __kernel_xxx symbol in gate.so and xengate.so + * must be same to each other. + * Adjust symbol size in gate.so to be same to the one in xengate.so. + */ +.include "arch/ia64/kernel/gate-skip.s" +#endif END(__kernel_syscall_via_epc) # define ARG0_OFF (16 + IA64_SIGFRAME_ARG0_OFFSET) diff -r 6e0d898e9dad -r 9d4bcd10abcb arch/ia64/kernel/gate.lds.S --- a/arch/ia64/kernel/gate.lds.S Mon Jul 30 11:46:14 2007 -0600 +++ b/arch/ia64/kernel/gate.lds.S Wed Aug 01 13:12:18 2007 +0900 @@ -28,6 +28,24 @@ SECTIONS . = GATE_ADDR + 0x500; .data.patch : { +#ifdef __XEN_IA64_VDSO_PARAVIRT +#define __start_gate_mckinley_e9_patchlist \ + __start_gate_mckinley_e9_patchlist_xen +#define __end_gate_mckinley_e9_patchlist \ + __end_gate_mckinley_e9_patchlist_xen +#define __start_gate_vtop_patchlist \ + __start_gate_vtop_patchlist_xen +#define __end_gate_vtop_patchlist \ + __end_gate_vtop_patchlist_xen +#define __start_gate_fsyscall_patchlist \ + __start_gate_fsyscall_patchlist_xen +#define __end_gate_fsyscall_patchlist \ + __end_gate_fsyscall_patchlist_xen +#define __start_gate_brl_fsys_bubble_down_patchlist \ + __start_gate_brl_fsys_bubble_down_patchlist_xen +#define __end_gate_brl_fsys_bubble_down_patchlist \ + __end_gate_brl_fsys_bubble_down_patchlist_xen +#endif __start_gate_mckinley_e9_patchlist = .; *(.data.patch.mckinley_e9) __end_gate_mckinley_e9_patchlist = .; @@ -43,12 +61,6 @@ SECTIONS __start_gate_brl_fsys_bubble_down_patchlist = .; *(.data.patch.brl_fsys_bubble_down) __end_gate_brl_fsys_bubble_down_patchlist = .; - -#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT - __start_gate_running_on_xen_patchlist = .; - *(.data.patch.running_on_xen) - __end_gate_running_on_xen_patchlist = .; -#endif } :readable .IA_64.unwind_info : { *(.IA_64.unwind_info*) } .IA_64.unwind : { *(.IA_64.unwind*) } :readable :unwind diff -r 6e0d898e9dad -r 9d4bcd10abcb arch/ia64/kernel/patch.c --- a/arch/ia64/kernel/patch.c Mon Jul 30 11:46:14 2007 -0600 +++ b/arch/ia64/kernel/patch.c Wed Aug 01 13:12:18 2007 +0900 @@ -185,40 +185,41 @@ patch_brl_fsys_bubble_down (unsigned lon } #ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT -extern char __start_gate_running_on_xen_patchlist[]; -extern char __end_gate_running_on_xen_patchlist[]; - -void -patch_running_on_xen(unsigned long start, unsigned long end) -{ - extern const int running_on_xen; - s32 *offp = (s32 *)start; - u64 ip; - - while (offp < (s32 *)end) { - ip = (u64)ia64_imva((char *)offp + *offp); - ia64_patch_imm64(ip, (u64)&running_on_xen); - ia64_fc((void *)ip); - ++offp; - } - ia64_sync_i(); - ia64_srlz_i(); +void __init +ia64_patch_gate_xen (void) +{ + extern char __start_gate_mckinley_e9_patchlist_xen[], __end_gate_mckinley_e9_patchlist_xen[]; + extern char __start_gate_vtop_patchlist_xen[], __end_gate_vtop_patchlist_xen[]; + extern char __start_gate_fsyscall_patchlist_xen[], __end_gate_fsyscall_patchlist_xen[]; + extern char __start_gate_brl_fsys_bubble_down_patchlist_xen[], __end_gate_brl_fsys_bubble_down_patchlist_xen[]; +# define START(name) ((unsigned long) __start_gate_##name##_patchlist_xen) +# define END(name) ((unsigned long)__end_gate_##name##_patchlist_xen) + + patch_fsyscall_table(START(fsyscall), END(fsyscall)); + patch_brl_fsys_bubble_down(START(brl_fsys_bubble_down), END(brl_fsys_bubble_down)); + ia64_patch_vtop(START(vtop), END(vtop)); + ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9)); + +# undef START +# undef END } #else -#define patch_running_on_xen(start, end) do { } while (0) +#define ia64_patch_gate_xen() do { } while (0) #endif void __init ia64_patch_gate (void) { + if (is_running_on_xen()) { + ia64_patch_gate_xen(); + return; + } + # define START(name) ((unsigned long) __start_gate_##name##_patchlist) # define END(name) ((unsigned long)__end_gate_##name##_patchlist) patch_fsyscall_table(START(fsyscall), END(fsyscall)); patch_brl_fsys_bubble_down(START(brl_fsys_bubble_down), END(brl_fsys_bubble_down)); -#ifdef CONFIG_XEN - patch_running_on_xen(START(running_on_xen), END(running_on_xen)); -#endif ia64_patch_vtop(START(vtop), END(vtop)); ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9)); } diff -r 6e0d898e9dad -r 9d4bcd10abcb arch/ia64/kernel/vmlinux.lds.S --- a/arch/ia64/kernel/vmlinux.lds.S Mon Jul 30 11:46:14 2007 -0600 +++ b/arch/ia64/kernel/vmlinux.lds.S Wed Aug 01 13:12:18 2007 +0900 @@ -183,6 +183,10 @@ SECTIONS __start_gate_section = .; *(.data.gate) __stop_gate_section = .; + . = ALIGN(PAGE_SIZE); + __start_xen_gate_section = .; + *(.data.gate.xen) + __stop_xen_gate_section = .; } . = ALIGN(PAGE_SIZE); /* make sure the gate page doesn't expose kernel data */ diff -r 6e0d898e9dad -r 9d4bcd10abcb arch/ia64/kernel/xengate-data.S --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/arch/ia64/kernel/xengate-data.S Wed Aug 01 13:12:18 2007 +0900 @@ -0,0 +1,3 @@ + .section .data.gate.xen, "aw" + + .incbin "arch/ia64/kernel/xengate.so" diff -r 6e0d898e9dad -r 9d4bcd10abcb arch/ia64/mm/init.c --- a/arch/ia64/mm/init.c Mon Jul 30 11:46:14 2007 -0600 +++ b/arch/ia64/mm/init.c Wed Aug 01 13:12:18 2007 +0900 @@ -303,16 +303,34 @@ setup_gate (void) setup_gate (void) { struct page *page; + void *gate_page_addr = __start_gate_section; + +#ifdef CONFIG_XEN + unsigned long unused_gate; + extern char __start_xen_gate_section[]; + if (is_running_on_xen()) { + gate_page_addr = __start_xen_gate_section; + unused_gate = (unsigned long)ia64_imva(__start_gate_section); + } else + unused_gate = + (unsigned long)ia64_imva(__start_xen_gate_section); +#ifndef HAVE_BUGGY_SEGREL + ClearPageReserved(virt_to_page(unused_gate)); + init_page_count(virt_to_page(unused_gate)); + free_page(unused_gate); + ++totalram_pages; +#endif +#endif /* * Map the gate page twice: once read-only to export the ELF * headers etc. and once execute-only page to enable * privilege-promotion via "epc": */ - page = virt_to_page(ia64_imva(__start_gate_section)); + page = virt_to_page(ia64_imva(gate_page_addr)); put_kernel_page(page, GATE_ADDR, PAGE_READONLY); #ifdef HAVE_BUGGY_SEGREL - page = virt_to_page(ia64_imva(__start_gate_section + PAGE_SIZE)); + page = virt_to_page(ia64_imva(gate_page_addr + PAGE_SIZE)); put_kernel_page(page, GATE_ADDR + PAGE_SIZE, PAGE_GATE); #else put_kernel_page(page, GATE_ADDR + PERCPU_PAGE_SIZE, PAGE_GATE);