WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH] x86/asm: allow some unlikely taken branches to be st

To: "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH] x86/asm: allow some unlikely taken branches to be statically predicted this way
From: "Jan Beulich" <JBeulich@xxxxxxxxxx>
Date: Thu, 16 Dec 2010 16:09:11 +0000
Delivery-date: Thu, 16 Dec 2010 08:10:12 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
... by moving the respective code out of line (into sub-section 1 of
the particular section). A few other branches could be eliminated
altogether.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>

--- a/xen/arch/x86/x86_32/entry.S
+++ b/xen/arch/x86/x86_32/entry.S
@@ -142,7 +142,7 @@ restore_all_xen:
 ENTRY(hypercall)
         subl $4,%esp
         FIXUP_RING0_GUEST_STACK
-        SAVE_ALL(1f,1f)
+        SAVE_ALL(,1f)
 1:      sti
         GET_CURRENT(%ebx)
         cmpl  $NR_hypercalls,%eax
@@ -182,12 +182,14 @@ ENTRY(hypercall)
 #define SHADOW_BYTES 24 /* 6 shadow parameters */
 #endif
         cmpb  $0,tb_init_done
-        je    1f
+UNLIKELY_START(ne, trace)
         call  trace_hypercall
         /* Now restore all the registers that trace_hypercall clobbered */
         movl  UREGS_eax+SHADOW_BYTES(%esp),%eax /* Hypercall # */
+UNLIKELY_END(trace)
+        call *hypercall_table(,%eax,4)
+        movl  %eax,UREGS_eax+SHADOW_BYTES(%esp) # save the return value
 #undef SHADOW_BYTES
-1:      call *hypercall_table(,%eax,4)
         addl  $24,%esp     # Discard the shadow parameters
 #ifndef NDEBUG
         /* Deliberately corrupt real parameter regs used by this hypercall. */
@@ -197,13 +199,10 @@ ENTRY(hypercall)
         jne   skip_clobber # If EIP has changed then don't clobber
         movzb hypercall_args_table(,%ecx,1),%ecx
         movl  %esp,%edi
-        movl  %eax,%esi
         movl  $0xDEADBEEF,%eax
         rep   stosl
-        movl  %esi,%eax
 skip_clobber:
 #endif
-        movl %eax,UREGS_eax(%esp)       # save the return value
 
 test_all_events:
         xorl %ecx,%ecx
@@ -293,8 +292,8 @@ create_bounce_frame:
         jz   ring1 /* jump if returning to an existing ring-1 activation */
         movl VCPU_kernel_sp(%ebx),%esi
 .Lft6:  mov  VCPU_kernel_ss(%ebx),%gs
-        testl $X86_EFLAGS_VM,UREGS_eflags+4(%esp)
-        jz   .Lnvm86_1
+        testl $X86_EFLAGS_VM,%ecx
+UNLIKELY_START(nz, bounce_vm86_1)
         subl $16,%esi       /* push ES/DS/FS/GS (VM86 stack frame) */
         movl UREGS_es+4(%esp),%eax
 .Lft7:  movl %eax,%gs:(%esi)
@@ -304,7 +303,7 @@ create_bounce_frame:
 .Lft9:  movl %eax,%gs:8(%esi)
         movl UREGS_gs+4(%esp),%eax
 .Lft10: movl %eax,%gs:12(%esi)
-.Lnvm86_1:
+UNLIKELY_END(bounce_vm86_1)
         subl $8,%esi        /* push SS/ESP (inter-priv iret) */
         movl UREGS_esp+4(%esp),%eax
 .Lft11: movl %eax,%gs:(%esi)
@@ -346,17 +345,10 @@ ring1:  /* obtain ss/esp from oldss/olde
         movl TRAPBOUNCE_error_code(%edx),%eax
 .Lft17: movl %eax,%gs:(%esi)
 1:      testb $TBF_FAILSAFE,%cl
-        jz   2f
+UNLIKELY_START(nz, bounce_failsafe)
         subl $16,%esi                # add DS/ES/FS/GS to failsafe stack frame
         testl $X86_EFLAGS_VM,UREGS_eflags+4(%esp)
-        jz   .Lnvm86_2
-        xorl %eax,%eax               # VM86: we write zero selector values
-.Lft18: movl %eax,%gs:(%esi)
-.Lft19: movl %eax,%gs:4(%esi)
-.Lft20: movl %eax,%gs:8(%esi)
-.Lft21: movl %eax,%gs:12(%esi)
-        jmp  2f
-.Lnvm86_2:
+        jnz  .Lvm86_2
         movl UREGS_ds+4(%esp),%eax   # non-VM86: write real selector values
 .Lft22: movl %eax,%gs:(%esi)
         movl UREGS_es+4(%esp),%eax
@@ -365,13 +357,22 @@ ring1:  /* obtain ss/esp from oldss/olde
 .Lft24: movl %eax,%gs:8(%esi)
         movl UREGS_gs+4(%esp),%eax
 .Lft25: movl %eax,%gs:12(%esi)
-2:      testl $X86_EFLAGS_VM,UREGS_eflags+4(%esp)
-        jz   .Lnvm86_3
+        jmp  .Lnvm86_3
+.Lvm86_2:
+        xorl %eax,%eax               # VM86: we write zero selector values
+.Lft18: movl %eax,%gs:(%esi)
+.Lft19: movl %eax,%gs:4(%esi)
+.Lft20: movl %eax,%gs:8(%esi)
+.Lft21: movl %eax,%gs:12(%esi)
+UNLIKELY_END(bounce_failsafe)
+        testl $X86_EFLAGS_VM,UREGS_eflags+4(%esp)
+UNLIKELY_START(nz, bounce_vm86_3)
         xorl %eax,%eax      /* zero DS-GS, just as a real CPU would */
         movl %eax,UREGS_ds+4(%esp)
         movl %eax,UREGS_es+4(%esp)
         movl %eax,UREGS_fs+4(%esp)
         movl %eax,UREGS_gs+4(%esp)
+UNLIKELY_END(bounce_vm86_3)
 .Lnvm86_3:
         /* Rewrite our stack frame and return to ring 1. */
         /* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -49,7 +49,7 @@ ENTRY(compat_hypercall)
 #define SHADOW_BYTES 0  /* No on-stack shadow state */
 #endif
         cmpb  $0,tb_init_done(%rip)
-        je    1f
+UNLIKELY_START(ne, compat_trace)
         call  trace_hypercall
         /* Now restore all the registers that trace_hypercall clobbered */
         movl  UREGS_rax+SHADOW_BYTES(%rsp),%eax   /* Hypercall #  */
@@ -60,7 +60,8 @@ ENTRY(compat_hypercall)
         movl  UREGS_rdi+SHADOW_BYTES(%rsp),%r8d   /* Arg 5        */
         movl  UREGS_rbp+SHADOW_BYTES(%rsp),%r9d   /* Arg 6        */
 #undef SHADOW_BYTES
-1:      leaq  compat_hypercall_table(%rip),%r10
+UNLIKELY_END(compat_trace)
+        leaq  compat_hypercall_table(%rip),%r10
         PERFC_INCR(PERFC_hypercalls, %rax, %rbx)
         callq *(%r10,%rax,8)
 #ifndef NDEBUG
@@ -295,7 +296,7 @@ compat_create_bounce_frame:
 .Lft8:  movl  %eax,%fs:(%rsi)           # ERROR CODE
 1:
         testb $TBF_FAILSAFE,%cl
-        jz    2f
+UNLIKELY_START(nz, compat_bounce_failsafe)
         subl  $4*4,%esi
         movl  %gs,%eax
 .Lft9:  movl  %eax,%fs:3*4(%rsi)        # GS
@@ -304,7 +305,7 @@ compat_create_bounce_frame:
 .Lft11: movl  %eax,%fs:1*4(%rsi)        # ES
         movl  %ds,%eax
 .Lft12: movl  %eax,%fs:0*4(%rsi)        # DS
-2:
+UNLIKELY_END(compat_bounce_failsafe)
         /* Rewrite our stack frame and return to guest-OS mode. */
         /* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */
         andl  $~(X86_EFLAGS_VM|X86_EFLAGS_RF|\
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -148,7 +148,7 @@ ENTRY(syscall_enter)
 #define SHADOW_BYTES 0  /* No on-stack shadow state */
 #endif
         cmpb  $0,tb_init_done(%rip)
-        je    1f
+UNLIKELY_START(ne, trace)
         call  trace_hypercall
         /* Now restore all the registers that trace_hypercall clobbered */
         movq  UREGS_rax+SHADOW_BYTES(%rsp),%rax   /* Hypercall #  */
@@ -159,7 +159,8 @@ ENTRY(syscall_enter)
         movq  UREGS_r8 +SHADOW_BYTES(%rsp),%r8    /* Arg 5        */
         movq  UREGS_r9 +SHADOW_BYTES(%rsp),%r9    /* Arg 6        */
 #undef SHADOW_BYTES
-1:      leaq  hypercall_table(%rip),%r10
+UNLIKELY_END(trace)
+        leaq  hypercall_table(%rip),%r10
         PERFC_INCR(PERFC_hypercalls, %rax, %rbx)
         callq *(%r10,%rax,8)
 #ifndef NDEBUG
@@ -341,11 +342,12 @@ create_bounce_frame:
 2:      andq  $~0xf,%rsi                # Stack frames are 16-byte aligned.
         movq  $HYPERVISOR_VIRT_START,%rax
         cmpq  %rax,%rsi
-        jb    1f                        # In +ve address space? Then okay.
         movq  $HYPERVISOR_VIRT_END+60,%rax
+        sbb   %ecx,%ecx                 # In +ve address space? Then okay.
         cmpq  %rax,%rsi
-        jb    domain_crash_synchronous  # Above Xen private area? Then okay.
-1:      movb  TRAPBOUNCE_flags(%rdx),%cl
+        adc   %ecx,%ecx                 # Above Xen private area? Then okay.
+        jg    domain_crash_synchronous
+        movb  TRAPBOUNCE_flags(%rdx),%cl
         subq  $40,%rsi
         movq  UREGS_ss+8(%rsp),%rax
 .Lft2:  movq  %rax,32(%rsi)             # SS
@@ -376,7 +378,7 @@ create_bounce_frame:
         movl  TRAPBOUNCE_error_code(%rdx),%eax
 .Lft7:  movq  %rax,(%rsi)               # ERROR CODE
 1:      testb $TBF_FAILSAFE,%cl
-        jz    2f
+UNLIKELY_START(nz, bounce_failsafe)
         subq  $32,%rsi
         movl  %gs,%eax
 .Lft8:  movq  %rax,24(%rsi)             # GS
@@ -386,7 +388,8 @@ create_bounce_frame:
 .Lft10: movq  %rax,8(%rsi)              # ES
         movl  %ds,%eax
 .Lft11: movq  %rax,(%rsi)               # DS
-2:      subq  $16,%rsi
+UNLIKELY_END(bounce_failsafe)
+        subq  $16,%rsi
         movq  UREGS_r11+8(%rsp),%rax
 .Lft12: movq  %rax,8(%rsi)              # R11
         movq  UREGS_rcx+8(%rsp),%rax
--- a/xen/include/asm-x86/asm_defns.h
+++ b/xen/include/asm-x86/asm_defns.h
@@ -12,4 +12,18 @@
 #include <asm/x86_32/asm_defns.h>
 #endif
 
+#ifdef __ASSEMBLY__
+
+#define UNLIKELY_START(cond, tag) \
+        j##cond .Lunlikely.tag;   \
+        .subsection 1;            \
+        .Lunlikely.tag:
+
+#define UNLIKELY_END(tag)         \
+        jmp .Llikely.tag;         \
+        .subsection 0;            \
+        .Llikely.tag:
+
+#endif
+
 #endif /* __X86_ASM_DEFNS_H__ */
--- a/xen/include/asm-x86/x86_32/asm_defns.h
+++ b/xen/include/asm-x86/x86_32/asm_defns.h
@@ -1,6 +1,7 @@
 #ifndef __X86_32_ASM_DEFNS_H__
 #define __X86_32_ASM_DEFNS_H__
 
+#include <xen/stringify.h>
 #include <asm/percpu.h>
 
 #ifdef CONFIG_FRAME_POINTER
@@ -53,12 +54,14 @@
         mov   %es,%esi;                                 \
         mov   $(__HYPERVISOR_DS),%ecx;                  \
         jnz   86f;                                      \
-        .text 1;                                        \
+        .subsection 1;                                  \
         86:   call setup_vm86_frame;                    \
         jmp   vm86_lbl;                                 \
         .previous;                                      \
+        .ifnes __stringify(xen_lbl), "";                \
         testb $3,UREGS_cs(%esp);                        \
         jz    xen_lbl;                                  \
+        .endif;                                         \
         /*                                              \
          * We are the outermost Xen context, but our    \
          * life is complicated by NMIs and MCEs. These  \
--- /dev/null
+++ b/xen/include/xen/stringify.h
@@ -0,0 +1,12 @@
+#ifndef __XEN_STRINGIFY_H
+#define __XEN_STRINGIFY_H
+
+/* Indirect stringification.  Doing two levels allows the parameter to be a
+ * macro itself.  For example, compile with -DFOO=bar, __stringify(FOO)
+ * converts to "bar".
+ */
+
+#define __stringify_1(x...) #x
+#define __stringify(x...)   __stringify_1(x)
+
+#endif /* !__XEN_STRINGIFY_H */


Attachment: x86-forward-branches.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-devel] [PATCH] x86/asm: allow some unlikely taken branches to be statically predicted this way, Jan Beulich <=