# HG changeset patch # User yamahata@xxxxxxxxxxxxx # Node ID ea22399b6d1d5c3881fa2fd5fbcba8e43952fec2 # Parent 1c92c31bdcd5cb1967613d24c927b7154555b40a paravirtualize vdso areabased on Kevin's pointout and Dan's Idea. introduce hyperprivop HYPERPRIVOP_RSM_BE and HYPERPRIVOP_GET_PSR. and paravirtualize vdso area using them. PATCHNAME: vdso_area_paravirtualize Signed-off-by: Isaku Yamahata diff -r 1c92c31bdcd5 -r ea22399b6d1d linux-2.6-xen-sparse/arch/ia64/Kconfig --- a/linux-2.6-xen-sparse/arch/ia64/Kconfig Wed Jul 19 11:52:21 2006 +0900 +++ b/linux-2.6-xen-sparse/arch/ia64/Kconfig Wed Jul 19 11:54:59 2006 +0900 @@ -70,6 +70,13 @@ config XEN_IA64_DOM0_NON_VP default y help dom0 P=M model + +config XEN_IA64_VDSO_PARAVIRT + bool + depends on XEN && !ITANIUM + default y + help + vDSO paravirtualization config SCHED_NO_NO_OMIT_FRAME_POINTER bool diff -r 1c92c31bdcd5 -r ea22399b6d1d linux-2.6-xen-sparse/arch/ia64/kernel/gate.S --- a/linux-2.6-xen-sparse/arch/ia64/kernel/gate.S Wed Jul 19 11:52:21 2006 +0900 +++ b/linux-2.6-xen-sparse/arch/ia64/kernel/gate.S Wed Jul 19 11:54:59 2006 +0900 @@ -14,6 +14,9 @@ #include #include #include +#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT +# include +#endif /* * We can't easily refer to symbols inside the kernel. To avoid full runtime relocation, @@ -32,6 +35,52 @@ #define BRL_COND_FSYS_BUBBLE_DOWN(pr) \ [1:](pr)brl.cond.sptk 0; \ .xdata4 ".data.patch.brl_fsys_bubble_down", 1b-. + +#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT + // The page in which hyperprivop lives must be pinned by ITR. + // However vDSO area isn't pinned. So issueing hyperprivop + // from vDSO page causes trouble that Kevin pointed out. + // After clearing vpsr.ic, the vcpu is pre-empted and the itlb + // is flushed. Then vcpu get cpu again, tlb miss fault occures. + // However it results in nested dtlb fault because vpsr.ic is off. + // To avoid such a situation, we jumps into the kernel text area + // which is pinned, and then issue hyperprivop and return back + // to vDSO page. + // This is This is Dan Magenheimer's idea. + + // Currently is_running_on_xen() is defined as running_on_xen. + // If is_running_on_xen() is a real function, we must update + // according to it. + .section ".data.patch.running_on_xen", "a" + .previous +#define LOAD_RUNNING_ON_XEN(reg) \ +[1:] movl reg=0; \ + .xdata4 ".data.patch.running_on_xen", 1b-. + + .section ".data.patch.brl_xen_rsm_be_i", "a" + .previous +#define BRL_COND_XEN_RSM_BE_I(pr) \ +[1:](pr)brl.cond.sptk 0; \ + .xdata4 ".data.patch.brl_xen_rsm_be_i", 1b-. + + .section ".data.patch.brl_xen_get_psr", "a" + .previous +#define BRL_COND_XEN_GET_PSR(pr) \ +[1:](pr)brl.cond.sptk 0; \ + .xdata4 ".data.patch.brl_xen_get_psr", 1b-. + + .section ".data.patch.brl_xen_ssm_i_0", "a" + .previous +#define BRL_COND_XEN_SSM_I_0(pr) \ +[1:](pr)brl.cond.sptk 0; \ + .xdata4 ".data.patch.brl_xen_ssm_i_0", 1b-. + + .section ".data.patch.brl_xen_ssm_i_1", "a" + .previous +#define BRL_COND_XEN_SSM_I_1(pr) \ +[1:](pr)brl.cond.sptk 0; \ + .xdata4 ".data.patch.brl_xen_ssm_i_1", 1b-. +#endif GLOBAL_ENTRY(__kernel_syscall_via_break) .prologue @@ -77,7 +126,39 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc) epc // B causes split-issue } ;; +#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT + // r20 = 1 + // r22 = &vcpu->evtchn_mask + // r23 = &vpsr.ic + // r24 = vcpu->pending_interruption + // r25 = tmp + // r28 = &running_on_xen + // r30 = running_on_xen + // r31 = tmp + // p11 = tmp + // p12 = running_on_xen + // p13 = !running_on_xen + // p14 = tmp + // p15 = tmp +#define isXen p12 +#define isRaw p13 + LOAD_RUNNING_ON_XEN(r28) + movl r22=XSI_PSR_I_ADDR + movl r23=XSI_PSR_IC + movl r24=XSI_PSR_I_ADDR+(XSI_PEND_OFS-XSI_PSR_I_ADDR_OFS) + mov r20=1 + ;; + ld4 r30=[r28] + ;; + cmp.ne isXen,isRaw=r0,r30 + ;; +(isRaw) rsm psr.be | psr.i + BRL_COND_XEN_RSM_BE_I(isXen) + .global .vdso_rsm_be_i_ret +.vdso_rsm_be_i_ret: +#else rsm psr.be | psr.i // M2 (5 cyc to srlz.d) +#endif LOAD_FSYSCALL_TABLE(r14) // X ;; mov r16=IA64_KR(CURRENT) // M2 (12 cyc) @@ -85,7 +166,14 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc) mov r19=NR_syscalls-1 // A ;; lfetch [r18] // M0|1 +#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT +(isRaw) mov r29=psr + BRL_COND_XEN_GET_PSR(isXen) + .global .vdso_get_psr_ret +.vdso_get_psr_ret: +#else mov r29=psr // M2 (12 cyc) +#endif // If r17 is a NaT, p6 will be zero cmp.geu p6,p7=r19,r17 // A (sysnr > 0 && sysnr < 1024+NR_syscalls)? ;; @@ -99,9 +187,21 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc) ;; nop.m 0 (p6) tbit.z.unc p8,p0=r18,0 // I0 (dual-issues with "mov b7=r18"!) +#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT + ;; + // p14 = running_on_xen && p8 + // p15 = !running_on_xen && p8 +(p8) cmp.ne.unc p14,p15=r0,r30 + ;; +(p15) ssm psr.i + BRL_COND_XEN_SSM_I_0(p14) + .global .vdso_ssm_i_0_ret +.vdso_ssm_i_0_ret: +#else nop.i 0 ;; (p8) ssm psr.i +#endif (p6) mov b7=r18 // I0 (p8) br.dptk.many b7 // B @@ -122,9 +222,21 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc) #else BRL_COND_FSYS_BUBBLE_DOWN(p6) #endif +#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT +(isRaw) ssm psr.i + BRL_COND_XEN_SSM_I_1(isXen) + .global .vdso_ssm_i_1_ret +.vdso_ssm_i_1_ret: +#else ssm psr.i +#endif mov r10=-1 (p10) mov r8=EINVAL +#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT + dv_serialize_data // shut up gas warning. + // we know xen_hyper_ssm_i_0 or xen_hyper_ssm_i_1 + // doesn't change p9 and p10 +#endif (p9) mov r8=ENOSYS FSYS_RETURN END(__kernel_syscall_via_epc) diff -r 1c92c31bdcd5 -r ea22399b6d1d linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S --- a/linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S Wed Jul 19 11:52:21 2006 +0900 +++ b/linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S Wed Jul 19 11:54:59 2006 +0900 @@ -43,6 +43,28 @@ SECTIONS __start_gate_brl_fsys_bubble_down_patchlist = .; *(.data.patch.brl_fsys_bubble_down) __end_gate_brl_fsys_bubble_down_patchlist = .; + +#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT + __start_gate_running_on_xen_patchlist = .; + *(.data.patch.running_on_xen) + __end_gate_running_on_xen_patchlist = .; + + __start_gate_brl_xen_rsm_be_i_patchlist = .; + *(.data.patch.brl_xen_rsm_be_i) + __end_gate_brl_xen_rsm_be_i_patchlist = .; + + __start_gate_brl_xen_get_psr_patchlist = .; + *(.data.patch.brl_xen_get_psr) + __end_gate_brl_xen_get_psr_patchlist = .; + + __start_gate_brl_xen_ssm_i_0_patchlist = .; + *(.data.patch.brl_xen_ssm_i_0) + __end_gate_brl_xen_ssm_i_0_patchlist = .; + + __start_gate_brl_xen_ssm_i_1_patchlist = .; + *(.data.patch.brl_xen_ssm_i_1) + __end_gate_brl_xen_ssm_i_1_patchlist = .; +#endif } :readable .IA_64.unwind_info : { *(.IA_64.unwind_info*) } .IA_64.unwind : { *(.IA_64.unwind*) } :readable :unwind diff -r 1c92c31bdcd5 -r ea22399b6d1d linux-2.6-xen-sparse/arch/ia64/kernel/patch.c --- a/linux-2.6-xen-sparse/arch/ia64/kernel/patch.c Wed Jul 19 11:52:21 2006 +0900 +++ b/linux-2.6-xen-sparse/arch/ia64/kernel/patch.c Wed Jul 19 11:54:59 2006 +0900 @@ -184,6 +184,70 @@ patch_brl_fsys_bubble_down (unsigned lon ia64_srlz_i(); } +#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT +extern char __start_gate_running_on_xen_patchlist[]; +extern char __end_gate_running_on_xen_patchlist[]; +void +patch_running_on_xen(unsigned long start, unsigned long end) +{ + extern int running_on_xen; + s32 *offp = (s32 *) start; + u64 ip; + + while (offp < (s32 *) end) { + ip = (u64) ia64_imva((char *) offp + *offp); + ia64_patch_imm64(ip, (u64) &running_on_xen); + ia64_fc((void *) ip); + ++offp; + } + ia64_sync_i(); + ia64_srlz_i(); +} + +static void +patch_brl_symaddr(unsigned long start, unsigned long end, + unsigned long symaddr) +{ + s32 *offp = (s32 *) start; + u64 ip; + + while (offp < (s32 *) end) { + ip = (u64) offp + *offp; + ia64_patch_imm60((u64) ia64_imva((void *) ip), + (u64) (symaddr - (ip & -16)) / 16); + ia64_fc((void *) ip); + ++offp; + } + ia64_sync_i(); + ia64_srlz_i(); +} + +static void +patch_brl_in_vdso(void) +{ +#define EXTERN_PATCHLIST(name) \ + extern char __start_gate_brl_##name##_patchlist[]; \ + extern char __end_gate_brl_##name##_patchlist[]; \ + extern char name[] + EXTERN_PATCHLIST(xen_rsm_be_i); + EXTERN_PATCHLIST(xen_get_psr); + EXTERN_PATCHLIST(xen_ssm_i_0); + EXTERN_PATCHLIST(xen_ssm_i_1); + +#define PATCH_BRL_SYMADDR(name) \ + patch_brl_symaddr((unsigned long)__start_gate_brl_##name##_patchlist, \ + (unsigned long)__end_gate_brl_##name##_patchlist, \ + (unsigned long)name) + PATCH_BRL_SYMADDR(xen_rsm_be_i); + PATCH_BRL_SYMADDR(xen_get_psr); + PATCH_BRL_SYMADDR(xen_ssm_i_0); + PATCH_BRL_SYMADDR(xen_ssm_i_1); +} +#else +#define patch_running_on_xen(start, end) do { } while (0) +#define patch_brl_in_vdso() do { } while (0) +#endif + void ia64_patch_gate (void) { @@ -192,6 +256,8 @@ ia64_patch_gate (void) patch_fsyscall_table(START(fsyscall), END(fsyscall)); patch_brl_fsys_bubble_down(START(brl_fsys_bubble_down), END(brl_fsys_bubble_down)); + patch_running_on_xen(START(running_on_xen), END(running_on_xen)); + patch_brl_in_vdso(); ia64_patch_vtop(START(vtop), END(vtop)); ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9)); } diff -r 1c92c31bdcd5 -r ea22399b6d1d linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S --- a/linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S Wed Jul 19 11:52:21 2006 +0900 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S Wed Jul 19 11:54:59 2006 +0900 @@ -351,3 +351,59 @@ GLOBAL_ENTRY(xen_send_ipi) br.ret.sptk.many rp ;; END(xen_send_ipi) + +#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT +// Those are vdso specialized. +// In fsys mode, call, ret can't be used. +GLOBAL_ENTRY(xen_rsm_be_i) + ld8 r22=[r22] + ;; + st1 [r22]=r20 + st4 [r23]=r0 + XEN_HYPER_RSM_BE + st4 [r23]=r20 + brl.cond.sptk .vdso_rsm_be_i_ret + ;; +END(xen_rsm_be_i) + +GLOBAL_ENTRY(xen_get_psr) + mov r31=r8 + mov r25=IA64_PSR_IC + st4 [r23]=r0 + XEN_HYPER_GET_PSR + ;; + st4 [r23]=r20 + or r29=r8,r25 // vpsr.ic was cleared for hyperprivop + mov r8=r31 + brl.cond.sptk .vdso_get_psr_ret + ;; +END(xen_get_psr) + +GLOBAL_ENTRY(xen_ssm_i_0) + st4 [r22]=r20 + ld4 r25=[r24] + ;; + cmp.ne.unc p11,p0=r0, r25 + ;; +(p11) st4 [r22]=r0 +(p11) st4 [r23]=r0 +(p11) XEN_HYPER_SSM_I + + brl.cond.sptk .vdso_ssm_i_0_ret + ;; +END(xen_ssm_i_0) + +GLOBAL_ENTRY(xen_ssm_i_1) + st4 [r22]=r20 + ld4 r25=[r24] + ;; + cmp.ne.unc p11,p0=r0, r25 + ;; +(p11) st4 [r22]=r0 +(p11) st4 [r23]=r0 +(p11) XEN_HYPER_SSM_I + ;; + brl.cond.sptk .vdso_ssm_i_1_ret + ;; +END(xen_ssm_i_1) +#endif diff -r 1c92c31bdcd5 -r ea22399b6d1d linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h --- a/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h Wed Jul 19 11:52:21 2006 +0900 +++ b/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h Wed Jul 19 11:54:59 2006 +0900 @@ -48,6 +48,8 @@ #define XEN_HYPER_GET_PMD break HYPERPRIVOP_GET_PMD #define XEN_HYPER_GET_EFLAG break HYPERPRIVOP_GET_EFLAG #define XEN_HYPER_SET_EFLAG break HYPERPRIVOP_SET_EFLAG +#define XEN_HYPER_RSM_BE break HYPERPRIVOP_RSM_BE +#define XEN_HYPER_GET_PSR break HYPERPRIVOP_GET_PSR #define XSI_IFS (XSI_BASE + XSI_IFS_OFS) #define XSI_PRECOVER_IFS (XSI_BASE + XSI_PRECOVER_IFS_OFS) diff -r 1c92c31bdcd5 -r ea22399b6d1d xen/arch/ia64/xen/privop.c --- a/xen/arch/ia64/xen/privop.c Wed Jul 19 11:52:21 2006 +0900 +++ b/xen/arch/ia64/xen/privop.c Wed Jul 19 11:54:59 2006 +0900 @@ -789,6 +789,13 @@ ia64_hyperprivop(unsigned long iim, REGS case HYPERPRIVOP_SET_EFLAG: (void)vcpu_set_ar(v,24,regs->r8); return 1; + case HYPERPRIVOP_RSM_BE: + (void)vcpu_reset_psr_sm(v, IA64_PSR_BE); + return 1; + case HYPERPRIVOP_GET_PSR: + (void)vcpu_get_psr(v, &val); + regs->r8 = val; + return 1; } return 0; } diff -r 1c92c31bdcd5 -r ea22399b6d1d xen/include/public/arch-ia64.h --- a/xen/include/public/arch-ia64.h Wed Jul 19 11:52:21 2006 +0900 +++ b/xen/include/public/arch-ia64.h Wed Jul 19 11:54:59 2006 +0900 @@ -420,7 +420,9 @@ struct xen_ia64_boot_param { #define HYPERPRIVOP_GET_PMD 0x15 #define HYPERPRIVOP_GET_EFLAG 0x16 #define HYPERPRIVOP_SET_EFLAG 0x17 -#define HYPERPRIVOP_MAX 0x17 +#define HYPERPRIVOP_RSM_BE 0x18 +#define HYPERPRIVOP_GET_PSR 0x19 +#define HYPERPRIVOP_MAX 0x19 #endif /* __HYPERVISOR_IF_IA64_H__ */