[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] linux/x86: use sysenter/syscall for 32-bit apps on 64-bit Xen



For both 32-bit apps on 64-bit kernels and 32bit kernels.

Also removing the needless re-enabling of events on x86-64's 64-bit
syscall path as well as it's 32-bit int80 path (the latter accompanied
by telling Xen not to disable them in the first place).

This was tested on 2.6.22-rc6, and only made apply to 2.6.18 without
further testing.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>

Index: head-2007-07-02/arch/i386/kernel/entry-xen.S
===================================================================
--- head-2007-07-02.orig/arch/i386/kernel/entry-xen.S   2007-07-04 
11:45:03.000000000 +0200
+++ head-2007-07-02/arch/i386/kernel/entry-xen.S        2007-07-03 
14:01:17.000000000 +0200
@@ -382,6 +382,26 @@ sysenter_past_esp:
        CFI_ENDPROC
 
 
+       # pv sysenter call handler stub
+ENTRY(sysenter_entry_pv)
+       RING0_INT_FRAME
+       movl $__USER_DS,16(%esp)
+       movl %ebp,12(%esp)
+/*
+ * Load the potential sixth argument from user stack.
+ * Careful about security.
+ */
+       cmpl $__PAGE_OFFSET-3,%ebp
+       jae syscall_fault
+1:     movl (%ebp),%ebp
+.section __ex_table,"a"
+       .align 4
+       .long 1b,syscall_fault
+.previous
+       /* fall through */
+       CFI_ENDPROC
+ENDPROC(sysenter_entry_pv)
+
        # system call handler stub
 ENTRY(system_call)
        RING0_INT_FRAME                 # can't unwind into user space anyway
Index: head-2007-07-02/arch/i386/kernel/process-xen.c
===================================================================
--- head-2007-07-02.orig/arch/i386/kernel/process-xen.c 2007-07-04 
11:45:01.000000000 +0200
+++ head-2007-07-02/arch/i386/kernel/process-xen.c      2007-07-04 
10:02:51.000000000 +0200
@@ -51,6 +51,7 @@
 #include <asm/math_emu.h>
 #endif
 
+#include <xen/interface/callback.h>
 #include <xen/interface/physdev.h>
 #include <xen/interface/vcpu.h>
 #include <xen/cpu_hotplug.h>
@@ -581,6 +582,22 @@ struct task_struct fastcall * __switch_t
        mcl->args[1] = next->esp0;
        mcl++;
 
+       if (boot_cpu_has(X86_FEATURE_SEP) && next_p->mm) {
+               /* Specify sysexit address. */
+               struct callback_register sysexit = {
+                       .type = CALLBACKTYPE_sysexit,
+                       .address = {
+                               __USER_CS,
+                               (unsigned 
long)task_thread_info(next_p)->sysenter_return
+                       }
+               };
+
+               mcl->op      = __HYPERVISOR_callback_op;
+               mcl->args[0] = CALLBACKOP_register;
+               mcl->args[1] = (unsigned long)&sysexit;
+               mcl++;
+       }
+
        /*
         * Load the per-thread Thread-Local Storage descriptor.
         * This is load_TLS(next, cpu) with multicalls.
@@ -616,7 +633,9 @@ struct task_struct fastcall * __switch_t
                mcl++;
        }
 
-       (void)HYPERVISOR_multicall(_mcl, mcl - _mcl);
+       BUG_ON(mcl > _mcl + ARRAY_SIZE(_mcl));
+       if (unlikely(HYPERVISOR_multicall_check(_mcl, mcl - _mcl, NULL)))
+               BUG();
 
        /*
         * Restore %fs and %gs if needed.
Index: head-2007-07-02/arch/i386/kernel/sysenter.c
===================================================================
--- head-2007-07-02.orig/arch/i386/kernel/sysenter.c    2007-07-04 
11:45:01.000000000 +0200
+++ head-2007-07-02/arch/i386/kernel/sysenter.c 2007-07-04 09:55:04.000000000 
+0200
@@ -48,22 +48,33 @@ extern asmlinkage void sysenter_entry(vo
 
 void enable_sep_cpu(void)
 {
-#ifndef CONFIG_X86_NO_TSS
        int cpu = get_cpu();
+#ifndef CONFIG_X86_NO_TSS
        struct tss_struct *tss = &per_cpu(init_tss, cpu);
+#else
+       (void)cpu;
+#endif
 
        if (!boot_cpu_has(X86_FEATURE_SEP)) {
                put_cpu();
                return;
        }
 
+#ifndef CONFIG_X86_NO_TSS
        tss->ss1 = __KERNEL_CS;
        tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss;
-       wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
        wrmsr(MSR_IA32_SYSENTER_ESP, tss->esp1, 0);
+#endif
+       wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
+#ifdef CONFIG_XEN
+       if (!xen_feature(XENFEAT_supervisor_mode_kernel)) {
+               extern asmlinkage void sysenter_entry_pv(void);
+
+               wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry_pv, 
0);
+       } else
+#endif
        wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0);
        put_cpu();      
-#endif
 }
 
 /*
@@ -78,18 +89,6 @@ int __init sysenter_setup(void)
 {
        syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
 
-#ifdef CONFIG_XEN
-       if (boot_cpu_has(X86_FEATURE_SEP)) {
-               static struct callback_register __initdata sysenter = {
-                       .type = CALLBACKTYPE_sysenter,
-                       .address = { __KERNEL_CS, (unsigned long)sysenter_entry 
},
-               };
-
-               if (HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) < 0)
-                       clear_bit(X86_FEATURE_SEP, 
boot_cpu_data.x86_capability);
-       }
-#endif
-
 #ifdef CONFIG_COMPAT_VDSO
        __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY);
        printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
@@ -174,6 +173,19 @@ int arch_setup_additional_pages(struct l
        current->mm->context.vdso = (void *)addr;
        current_thread_info()->sysenter_return =
                                    (void *)VDSO_SYM(&SYSENTER_RETURN);
+#ifdef CONFIG_XEN
+       if (boot_cpu_has(X86_FEATURE_SEP)) {
+               /* Specify sysexit address. */
+               struct callback_register sysexit = {
+                       .type = CALLBACKTYPE_sysexit,
+                       .address = { __USER_CS, VDSO_SYM(&SYSENTER_RETURN) }
+               };
+
+               if (HYPERVISOR_callback_op(CALLBACKOP_register,
+                                          &sysexit) < 0)
+                       BUG();
+       }
+#endif
        mm->total_vm++;
 up_fail:
        up_write(&mm->mmap_sem);
Index: head-2007-07-02/arch/x86_64/ia32/Makefile
===================================================================
--- head-2007-07-02.orig/arch/x86_64/ia32/Makefile      2007-07-04 
11:45:01.000000000 +0200
+++ head-2007-07-02/arch/x86_64/ia32/Makefile   2007-07-03 14:37:01.000000000 
+0200
@@ -14,11 +14,15 @@ obj-$(CONFIG_IA32_AOUT) += ia32_aout.o
 audit-class-$(CONFIG_AUDIT) := audit.o
 obj-$(CONFIG_IA32_EMULATION) += $(audit-class-y)
 
+syscall32-types-y := sysenter syscall
+syscall32-types-$(shell expr $(CONFIG_XEN_COMPAT)0 '<' 0x0302000 \
+                        | sed 'y,01,n$(CONFIG_XEN),') += int80
+
 $(obj)/syscall32_syscall.o: \
-       $(foreach F,sysenter syscall,$(obj)/vsyscall-$F.so)
+       $(foreach F,$(syscall32-types-y),$(obj)/vsyscall-$F.so)
 
 # Teach kbuild about targets
-targets := $(foreach F,sysenter syscall,vsyscall-$F.o vsyscall-$F.so)
+targets := $(foreach F,$(syscall32-types-y),vsyscall-$F.o vsyscall-$F.so)
 
 # The DSO images are built using a special linker script
 quiet_cmd_syscall = SYSCALL $@
@@ -27,23 +31,17 @@ quiet_cmd_syscall = SYSCALL $@
                           -Wl,-soname=linux-gate.so.1 -o $@ \
                           -Wl,-T,$(filter-out FORCE,$^)
 
-$(obj)/vsyscall-int80.so \
-$(obj)/vsyscall-sysenter.so $(obj)/vsyscall-syscall.so: \
+$(foreach F,$(syscall32-types-y),$(obj)/vsyscall-$F.so): \
 $(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE
        $(call if_changed,syscall)
 
 AFLAGS_vsyscall-sysenter.o = -m32 -Wa,-32 -Iarch/i386/kernel
 AFLAGS_vsyscall-syscall.o = -m32 -Wa,-32 -Iarch/i386/kernel
-
-ifdef CONFIG_XEN
 AFLAGS_vsyscall-int80.o = -m32 -Wa,-32 -Iarch/i386/kernel
-CFLAGS_syscall32-xen.o += -DUSE_INT80
-AFLAGS_syscall32_syscall-xen.o += -DUSE_INT80
 
+ifdef CONFIG_XEN
 $(obj)/syscall32_syscall-xen.o: \
-       $(foreach F,int80 sysenter syscall,$(obj)/vsyscall-$F.so)
-
-targets := $(foreach F,int80 sysenter syscall,vsyscall-$F.o vsyscall-$F.so)
+       $(foreach F,$(syscall32-types-y),$(obj)/vsyscall-$F.so)
 
 include $(srctree)/scripts/Makefile.xen
 
Index: head-2007-07-02/arch/x86_64/ia32/ia32entry-xen.S
===================================================================
--- head-2007-07-02.orig/arch/x86_64/ia32/ia32entry-xen.S       2007-07-04 
11:45:01.000000000 +0200
+++ head-2007-07-02/arch/x86_64/ia32/ia32entry-xen.S    2007-07-03 
15:12:20.000000000 +0200
@@ -80,7 +80,7 @@
  * %ebp user stack
  * 0(%ebp) Arg6        
  *     
- * Interrupts off.
+ * Interrupts on.
  *     
  * This is purely a fast path. For anything complicated we use the int 0x80
  * path below. Set up a complete hardware stack frame to share code
@@ -89,38 +89,25 @@
  */    
 ENTRY(ia32_sysenter_target)
        CFI_STARTPROC32 simple
-       CFI_DEF_CFA     rsp,0
-       CFI_REGISTER    rsp,rbp
-       __swapgs 
-       movq    %gs:pda_kernelstack, %rsp
-       addq    $(PDA_STACKOFFSET),%rsp
-       /*
-        * No need to follow this irqs on/off section: the syscall
-        * disabled irqs, here we enable it straight after entry:
-        */
-       XEN_UNBLOCK_EVENTS(%r11)        
-       __sti
+       CFI_DEF_CFA     rsp,SS+8-RIP+16
+       /*CFI_REL_OFFSET        ss,SS-RIP+16*/
+       CFI_REL_OFFSET  rsp,RSP-RIP+16
+       /*CFI_REL_OFFSET        rflags,EFLAGS-RIP+16*/
+       /*CFI_REL_OFFSET        cs,CS-RIP+16*/
+       CFI_REL_OFFSET  rip,RIP-RIP+16
+       CFI_REL_OFFSET  r11,8
+       CFI_REL_OFFSET  rcx,0
+       movq    8(%rsp),%r11
+       CFI_RESTORE     r11
+       popq    %rcx
+       CFI_ADJUST_CFA_OFFSET -8
+       CFI_RESTORE     rcx
        movl    %ebp,%ebp               /* zero extension */
-       pushq   $__USER32_DS
-       CFI_ADJUST_CFA_OFFSET 8
-       /*CFI_REL_OFFSET ss,0*/
-       pushq   %rbp
-       CFI_ADJUST_CFA_OFFSET 8
-       CFI_REL_OFFSET rsp,0
-       pushfq
-       CFI_ADJUST_CFA_OFFSET 8
-       /*CFI_REL_OFFSET rflags,0*/
-       movl    $VSYSCALL32_SYSEXIT, %r10d
-       CFI_REGISTER rip,r10
-       pushq   $__USER32_CS
-       CFI_ADJUST_CFA_OFFSET 8
-       /*CFI_REL_OFFSET cs,0*/
-       movl    %eax, %eax
-       pushq   %r10
-       CFI_ADJUST_CFA_OFFSET 8
-       CFI_REL_OFFSET rip,0
-       pushq   %rax
-       CFI_ADJUST_CFA_OFFSET 8
+       movl    %eax,%eax
+       movl    $__USER32_DS,40(%rsp)
+       movq    %rbp,32(%rsp)
+       movl    $__USER32_CS,16(%rsp)
+       movq    %rax,(%rsp)
        cld
        SAVE_ARGS 0,0,0
        /* no need to do an access_ok check here because rbp has been
@@ -132,7 +119,6 @@ ENTRY(ia32_sysenter_target)
        GET_THREAD_INFO(%r10)
        orl    $TS_COMPAT,threadinfo_status(%r10)
        testl  
$(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
-       CFI_REMEMBER_STATE
        jnz  sysenter_tracesys
 sysenter_do_call:      
        cmpl    $(IA32_NR_syscalls-1),%eax
@@ -140,33 +126,9 @@ sysenter_do_call:  
        IA32_ARG_FIXUP 1
        call    *ia32_sys_call_table(,%rax,8)
        movq    %rax,RAX-ARGOFFSET(%rsp)
-       GET_THREAD_INFO(%r10)
-       XEN_BLOCK_EVENTS(%r11)  
-       __cli
-       TRACE_IRQS_OFF
-       testl   $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
-       jnz     int_ret_from_sys_call
-       andl    $~TS_COMPAT,threadinfo_status(%r10)
-       /* clear IF, that popfq doesn't enable interrupts early */
-       andl  $~0x200,EFLAGS-R11(%rsp) 
-       RESTORE_ARGS 1,24,1,1,1,1
-       popfq
-       CFI_ADJUST_CFA_OFFSET -8
-       /*CFI_RESTORE rflags*/
-       popq    %rcx                            /* User %esp */
-       CFI_ADJUST_CFA_OFFSET -8
-       CFI_REGISTER rsp,rcx
-       movl    $VSYSCALL32_SYSEXIT,%edx        /* User %eip */
-       CFI_REGISTER rip,rdx
-       TRACE_IRQS_ON
-       __swapgs
-       XEN_UNBLOCK_EVENTS(%r11)                
-       __sti           /* sti only takes effect after the next instruction */
-       /* sysexit */
-       .byte   0xf, 0x35  /* TBD */
+       jmp int_ret_from_sys_call
 
 sysenter_tracesys:
-       CFI_RESTORE_STATE
        SAVE_REST
        CLEAR_RREGS
        movq    $-ENOSYS,RAX(%rsp)      /* really needed? */
@@ -199,7 +161,7 @@ ENDPROC(ia32_sysenter_target)
  * %esp user stack 
  * 0(%esp) Arg6
  *     
- * Interrupts off.
+ * Interrupts on.
  *     
  * This is purely a fast path. For anything complicated we use the int 0x80
  * path below. Set up a complete hardware stack frame to share code
@@ -208,32 +170,20 @@ ENDPROC(ia32_sysenter_target)
  */    
 ENTRY(ia32_cstar_target)
        CFI_STARTPROC32 simple
-       CFI_DEF_CFA     rsp,PDA_STACKOFFSET
-       CFI_REGISTER    rip,rcx
-       /*CFI_REGISTER  rflags,r11*/
-       __swapgs
-       movl    %esp,%r8d
-       CFI_REGISTER    rsp,r8
-       movq    %gs:pda_kernelstack,%rsp
-       /*
-        * No need to follow this irqs on/off section: the syscall
-        * disabled irqs and here we enable it straight after entry:
-        */
-       XEN_UNBLOCK_EVENTS(%r11)        
-       __sti
-       SAVE_ARGS 8,1,1
+       CFI_DEF_CFA     rsp,SS+8-RIP+16
+       /*CFI_REL_OFFSET        ss,SS-RIP+16*/
+       CFI_REL_OFFSET  rsp,RSP-RIP+16
+       /*CFI_REL_OFFSET        rflags,EFLAGS-RIP+16*/
+       /*CFI_REL_OFFSET        cs,CS-RIP+16*/
+       CFI_REL_OFFSET  rip,RIP-RIP+16
        movl    %eax,%eax       /* zero extension */
+       movl    RSP-RIP+16(%rsp),%r8d
+       SAVE_ARGS -8,1,1
        movq    %rax,ORIG_RAX-ARGOFFSET(%rsp)
-       movq    %rcx,RIP-ARGOFFSET(%rsp)
-       CFI_REL_OFFSET rip,RIP-ARGOFFSET
        movq    %rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */
        movl    %ebp,%ecx
-       movq    $__USER32_CS,CS-ARGOFFSET(%rsp)
-       movq    $__USER32_DS,SS-ARGOFFSET(%rsp)
-       movq    %r11,EFLAGS-ARGOFFSET(%rsp)
-       /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
-       movq    %r8,RSP-ARGOFFSET(%rsp) 
-       CFI_REL_OFFSET rsp,RSP-ARGOFFSET
+       movl    $__USER32_CS,CS-ARGOFFSET(%rsp)
+       movl    $__USER32_DS,SS-ARGOFFSET(%rsp)
        /* no need to do an access_ok check here because r8 has been
           32bit zero extended */ 
        /* hardware stack frame is complete now */      
@@ -244,7 +194,6 @@ ENTRY(ia32_cstar_target)
        GET_THREAD_INFO(%r10)
        orl   $TS_COMPAT,threadinfo_status(%r10)
        testl 
$(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
-       CFI_REMEMBER_STATE
        jnz   cstar_tracesys
 cstar_do_call: 
        cmpl $IA32_NR_syscalls-1,%eax
@@ -252,26 +201,9 @@ cstar_do_call:     
        IA32_ARG_FIXUP 1
        call *ia32_sys_call_table(,%rax,8)
        movq %rax,RAX-ARGOFFSET(%rsp)
-       GET_THREAD_INFO(%r10)
-       XEN_BLOCK_EVENTS(%r11)          
-       __cli
-       TRACE_IRQS_OFF
-       testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
-       jnz  int_ret_from_sys_call
-       andl $~TS_COMPAT,threadinfo_status(%r10)
-       RESTORE_ARGS 1,-ARG_SKIP,1,1,1
-       movl RIP-ARGOFFSET(%rsp),%ecx
-       CFI_REGISTER rip,rcx
-       movl EFLAGS-ARGOFFSET(%rsp),%r11d       
-       /*CFI_REGISTER rflags,r11*/
-       TRACE_IRQS_ON
-       movl RSP-ARGOFFSET(%rsp),%esp
-       CFI_RESTORE rsp
-       __swapgs
-       sysretl  /* TBD */
+       jmp int_ret_from_sys_call
        
 cstar_tracesys:        
-       CFI_RESTORE_STATE
        SAVE_REST
        CLEAR_RREGS
        movq $-ENOSYS,RAX(%rsp) /* really needed? */
@@ -312,32 +244,27 @@ ia32_badarg:
  * Arguments are zero extended. For system calls that want sign extension and
  * take long arguments a wrapper is needed. Most calls can just be called
  * directly.
- * Assumes it is only called from user space and entered with interrupts off.  
+ * Assumes it is only called from user space and entered with interrupts on.
  */                            
 
 ENTRY(ia32_syscall)
        CFI_STARTPROC   simple
-       CFI_DEF_CFA     rsp,SS+8-RIP
-       /*CFI_REL_OFFSET        ss,SS-RIP*/
-       CFI_REL_OFFSET  rsp,RSP-RIP
-       /*CFI_REL_OFFSET        rflags,EFLAGS-RIP*/
-       /*CFI_REL_OFFSET        cs,CS-RIP*/
-       CFI_REL_OFFSET  rip,RIP-RIP
-       __swapgs
-       /*
-        * No need to follow this irqs on/off section: the syscall
-        * disabled irqs and here we enable it straight after entry:
-        */
-       XEN_UNBLOCK_EVENTS(%r11)
-       __sti
-       movq (%rsp),%rcx
+       CFI_DEF_CFA     rsp,SS+8-RIP+16
+       /*CFI_REL_OFFSET        ss,SS-RIP+16*/
+       CFI_REL_OFFSET  rsp,RSP-RIP+16
+       /*CFI_REL_OFFSET        rflags,EFLAGS-RIP+16*/
+       /*CFI_REL_OFFSET        cs,CS-RIP+16*/
+       CFI_REL_OFFSET  rip,RIP-RIP+16
+       CFI_REL_OFFSET  r11,8
+       CFI_REL_OFFSET  rcx,0
        movq 8(%rsp),%r11
-        addq $0x10,%rsp /* skip rcx and r11 */
+       CFI_RESTORE     r11
+       popq %rcx
+       CFI_ADJUST_CFA_OFFSET -8
+       CFI_RESTORE     rcx
        movl %eax,%eax
-       pushq %rax
-       CFI_ADJUST_CFA_OFFSET 8
+       movq %rax,(%rsp)
        cld
-/* 1:  jmp 1b   */
        /* note the registers are not zero extended to the sf.
           this could be a problem. */
        SAVE_ARGS 0,0,1
Index: head-2007-07-02/arch/x86_64/ia32/syscall32_syscall-xen.S
===================================================================
--- head-2007-07-02.orig/arch/x86_64/ia32/syscall32_syscall-xen.S       
2007-07-04 11:45:01.000000000 +0200
+++ head-2007-07-02/arch/x86_64/ia32/syscall32_syscall-xen.S    2007-07-03 
14:01:17.000000000 +0200
@@ -2,7 +2,7 @@
 
        .section ".init.data","aw"
 
-#ifdef USE_INT80
+#if CONFIG_XEN_COMPAT < 0x030200
 
        .globl syscall32_int80
        .globl syscall32_int80_end
Index: head-2007-07-02/arch/x86_64/ia32/syscall32-xen.c
===================================================================
--- head-2007-07-02.orig/arch/x86_64/ia32/syscall32-xen.c       2007-07-04 
11:45:01.000000000 +0200
+++ head-2007-07-02/arch/x86_64/ia32/syscall32-xen.c    2007-07-03 
14:37:38.000000000 +0200
@@ -15,7 +15,8 @@
 #include <asm/tlbflush.h>
 #include <asm/ia32_unistd.h>
 
-#ifdef USE_INT80
+#if CONFIG_XEN_COMPAT < 0x030200
+#include <xen/interface/callback.h>
 extern unsigned char syscall32_int80[], syscall32_int80_end[];
 #endif
 extern unsigned char syscall32_syscall[], syscall32_syscall_end[];
@@ -23,8 +24,9 @@ extern unsigned char syscall32_sysenter[
 extern int sysctl_vsyscall32;
 
 char *syscall32_page; 
-#ifndef USE_INT80
 static int use_sysenter = -1;
+#if CONFIG_XEN_COMPAT < 0x030200
+static int use_int80 = 1;
 #endif
 
 static struct page *
@@ -69,13 +71,12 @@ static int __init init_syscall32(void)
        if (!syscall32_page) 
                panic("Cannot allocate syscall32 page"); 
 
-#ifdef USE_INT80
-       /*
-        * At this point we use int 0x80.
-        */
-       memcpy(syscall32_page, syscall32_int80,
-              syscall32_int80_end - syscall32_int80);
-#else
+#if CONFIG_XEN_COMPAT < 0x030200
+       if (use_int80) {
+               memcpy(syscall32_page, syscall32_int80,
+                      syscall32_int80_end - syscall32_int80);
+       } else
+#endif
        if (use_sysenter > 0) {
                memcpy(syscall32_page, syscall32_sysenter,
                       syscall32_sysenter_end - syscall32_sysenter);
@@ -83,7 +84,6 @@ static int __init init_syscall32(void)
                memcpy(syscall32_page, syscall32_syscall,
                       syscall32_syscall_end - syscall32_syscall);
        }       
-#endif
        return 0;
 } 
 
@@ -96,7 +96,17 @@ core_initcall(init_syscall32); 
 /* May not be __init: called during resume */
 void syscall32_cpu_init(void)
 {
-#ifndef USE_INT80
+#if CONFIG_XEN_COMPAT < 0x030200
+       static struct callback_register sysexit = {
+               .type = CALLBACKTYPE_sysexit,
+               .address = (unsigned long)VSYSCALL32_SYSEXIT
+       };
+
+       if (HYPERVISOR_callback_op(CALLBACKOP_register, &sysexit) < 0)
+               return;
+       use_int80 = 0;
+#endif
+
        if (use_sysenter < 0)
                use_sysenter = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL);
 
@@ -107,5 +117,4 @@ void syscall32_cpu_init(void)
        checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
 
        wrmsrl(MSR_CSTAR, ia32_cstar_target);
-#endif
 }
Index: head-2007-07-02/arch/x86_64/kernel/entry-xen.S
===================================================================
--- head-2007-07-02.orig/arch/x86_64/kernel/entry-xen.S 2007-07-04 
11:45:03.000000000 +0200
+++ head-2007-07-02/arch/x86_64/kernel/entry-xen.S      2007-07-03 
14:01:17.000000000 +0200
@@ -244,7 +244,7 @@ END(ret_from_fork)
  * r11  eflags for syscall/sysret, temporary for C
  * r12-r15,rbp,rbx saved by C code, not touched.               
  * 
- * Interrupts are off on entry.
+ * Interrupts are enabled on entry.
  * Only called from user space.
  *
  * XXX if we had a free scratch register we could save the RSP into the stack 
frame
@@ -259,11 +259,6 @@ ENTRY(system_call)
        _frame (RIP-0x10)
        SAVE_ARGS -8,0
        movq  %rax,ORIG_RAX-ARGOFFSET(%rsp) 
-       /*
-        * No need to follow this irqs off/on section - it's straight
-        * and short:
-        */
-        XEN_UNBLOCK_EVENTS(%r11)        
        GET_THREAD_INFO(%rcx)
        testl 
$(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
        CFI_REMEMBER_STATE
Index: head-2007-07-02/arch/x86_64/kernel/process-xen.c
===================================================================
--- head-2007-07-02.orig/arch/x86_64/kernel/process-xen.c       2007-07-04 
11:45:01.000000000 +0200
+++ head-2007-07-02/arch/x86_64/kernel/process-xen.c    2007-07-04 
10:03:04.000000000 +0200
@@ -584,7 +584,10 @@ __switch_to(struct task_struct *prev_p, 
                mcl++;
        }
 
-       (void)HYPERVISOR_multicall(_mcl, mcl - _mcl);
+       BUG_ON(mcl > _mcl + ARRAY_SIZE(_mcl));
+       if (unlikely(HYPERVISOR_multicall_check(_mcl, mcl - _mcl, NULL)))
+               BUG();
+
        /* 
         * Switch DS and ES.
         * This won't pick up thread selector changes, but I guess that is ok.
Index: head-2007-07-02/arch/x86_64/kernel/setup64-xen.c
===================================================================
--- head-2007-07-02.orig/arch/x86_64/kernel/setup64-xen.c       2007-07-04 
11:45:01.000000000 +0200
+++ head-2007-07-02/arch/x86_64/kernel/setup64-xen.c    2007-07-03 
14:01:17.000000000 +0200
@@ -211,7 +211,10 @@ extern asmlinkage void ignore_sysret(voi
 /* May not be marked __init: used by software suspend */
 void syscall_init(void)
 {
-#ifndef CONFIG_XEN
+#if CONFIG_XEN_COMPAT < 0x030200
+       if (!boot_cpu_has(X86_FEATURE_SEP))
+               return;
+#endif
        /* 
         * LSTAR and STAR live in a bit strange symbiosis.
         * They both write to the same internal register. STAR allows to set 
CS/DS
@@ -220,12 +223,12 @@ void syscall_init(void)
        wrmsrl(MSR_STAR,  ((u64)__USER32_CS)<<48  | ((u64)__KERNEL_CS)<<32); 
        wrmsrl(MSR_LSTAR, system_call); 
 
-       /* Flags to clear on syscall */
-       wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000); 
-#endif
 #ifdef CONFIG_IA32_EMULATION                   
        syscall32_cpu_init ();
 #endif
+
+       /* Flags to clear on syscall */
+       wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF);
 }
 
 void __cpuinit check_efer(void)
Index: head-2007-07-02/arch/x86_64/kernel/traps-xen.c
===================================================================
--- head-2007-07-02.orig/arch/x86_64/kernel/traps-xen.c 2007-07-04 
11:45:04.000000000 +0200
+++ head-2007-07-02/arch/x86_64/kernel/traps-xen.c      2007-07-03 
14:01:17.000000000 +0200
@@ -1110,7 +1110,7 @@ static trap_info_t trap_table[] = {
 #endif
         { 19, 0|4, __KERNEL_CS, (unsigned long)simd_coprocessor_error     },
 #ifdef CONFIG_IA32_EMULATION
-       { IA32_SYSCALL_VECTOR, 3|4, __KERNEL_CS, (unsigned long)ia32_syscall},
+       { IA32_SYSCALL_VECTOR, 3, __KERNEL_CS, (unsigned long)ia32_syscall},
 #endif
         {  0, 0,           0, 0                                              }
 };
Index: head-2007-07-02/drivers/xen/Kconfig
===================================================================
--- head-2007-07-02.orig/drivers/xen/Kconfig    2007-07-04 11:45:01.000000000 
+0200
+++ head-2007-07-02/drivers/xen/Kconfig 2007-07-03 14:01:17.000000000 +0200
@@ -13,7 +13,7 @@ config XEN
 if XEN
 config XEN_INTERFACE_VERSION
        hex
-       default 0x00030205
+       default 0x00030206
 
 menu "XEN"
 
@@ -247,6 +247,9 @@ choice
        config XEN_COMPAT_030004_AND_LATER
                bool "3.0.4 and later"
 
+       config XEN_COMPAT_030100_AND_LATER
+               bool "3.1.0 and later"
+
        config XEN_COMPAT_LATEST_ONLY
                bool "no compatibility code"
 
@@ -255,6 +258,7 @@ endchoice
 config XEN_COMPAT
        hex
        default 0xffffff if XEN_COMPAT_LATEST_ONLY
+       default 0x030100 if XEN_COMPAT_030100_AND_LATER
        default 0x030004 if XEN_COMPAT_030004_AND_LATER
        default 0x030002 if XEN_COMPAT_030002_AND_LATER
        default 0
Index: head-2007-07-02/drivers/xen/core/smpboot.c
===================================================================
--- head-2007-07-02.orig/drivers/xen/core/smpboot.c     2007-07-04 
11:45:07.000000000 +0200
+++ head-2007-07-02/drivers/xen/core/smpboot.c  2007-07-03 14:46:21.000000000 
+0200
@@ -174,6 +174,7 @@ static void xen_smp_intr_exit(unsigned i
 void cpu_bringup(void)
 {
        cpu_init();
+       identify_cpu(cpu_data + smp_processor_id());
        touch_softlockup_watchdog();
        preempt_disable();
        local_irq_enable();
Index: head-2007-07-02/include/asm-x86_64/mach-xen/setup_arch_post.h
===================================================================
--- head-2007-07-02.orig/include/asm-x86_64/mach-xen/setup_arch_post.h  
2007-07-04 11:45:01.000000000 +0200
+++ head-2007-07-02/include/asm-x86_64/mach-xen/setup_arch_post.h       
2007-07-03 14:01:17.000000000 +0200
@@ -23,10 +23,12 @@ static void __init machine_specific_arch
                .type = CALLBACKTYPE_failsafe,
                .address = (unsigned long)failsafe_callback,
        };
+#if CONFIG_XEN_COMPAT < 0x030200
        static struct callback_register __initdata syscall = {
                .type = CALLBACKTYPE_syscall,
                .address = (unsigned long)system_call,
        };
+#endif
 #ifdef CONFIG_X86_LOCAL_APIC
        static struct callback_register __initdata nmi_cb = {
                .type = CALLBACKTYPE_nmi,
@@ -37,8 +39,10 @@ static void __init machine_specific_arch
        ret = HYPERVISOR_callback_op(CALLBACKOP_register, &event);
        if (ret == 0)
                ret = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe);
+#if CONFIG_XEN_COMPAT < 0x030200
        if (ret == 0)
                ret = HYPERVISOR_callback_op(CALLBACKOP_register, &syscall);
+#endif
 #if CONFIG_XEN_COMPAT <= 0x030002
        if (ret == -ENOSYS)
                ret = HYPERVISOR_set_callbacks(
Index: head-2007-07-02/include/xen/interface/callback.h
===================================================================
--- head-2007-07-02.orig/include/xen/interface/callback.h       2007-07-04 
11:45:01.000000000 +0200
+++ head-2007-07-02/include/xen/interface/callback.h    2007-07-03 
10:39:14.000000000 +0200
@@ -38,13 +38,34 @@
 
 #define CALLBACKTYPE_event                 0
 #define CALLBACKTYPE_failsafe              1
-#define CALLBACKTYPE_syscall               2 /* x86_64 only */
+#define CALLBACKTYPE_syscall               2 /* x86_64 hv only */
 /*
- * sysenter is only available on x86_32 with the
- * supervisor_mode_kernel option enabled.
+ * sysenter_deprecated is only available on x86_32 with the
+ * supervisor_mode_kernel option enabled, and should not be used in new code.
  */
-#define CALLBACKTYPE_sysenter              3
+#define CALLBACKTYPE_sysenter_deprecated   3
 #define CALLBACKTYPE_nmi                   4
+#if __XEN_INTERFACE_VERSION__ < 0x00030206
+#define CALLBACKTYPE_sysenter              CALLBACKTYPE_sysenter_deprecated
+#else
+/*
+ * sysenter is only available
+ * - on x86_32 with the supervisor_mode_kernel option enabled,
+ * - on x86_64 hv for x86_32 pv or 32-bit guest support in x86_64 pv.
+ */
+#define CALLBACKTYPE_sysenter              5
+/*
+ * sysexit is only available on x86_64 hv, and is only used to fill a
+ * sysenter frame's return address (if the guest desires to have a non-NULL
+ * value there). Additionally, since CALLBACKF_mask_events is meaningless
+ * here, it is being (mis-)used for 64-bits guests to distinguish sysenter
+ * callers expected to be in 64-bit mode (flag set) from 32-bit ones (flag
+ * clear).
+ */
+#define CALLBACKTYPE_sysexit               6
+#define CALLBACKTYPE_syscall32             7 /* x86_64 only */
+#define CALLBACKTYPE_sfmask                8 /* x86_64 only */
+#endif
 
 /*
  * Disable event deliver during callback? This flag is ignored for event and
Index: head-2007-07-02/include/xen/interface/xen-compat.h
===================================================================
--- head-2007-07-02.orig/include/xen/interface/xen-compat.h     2007-07-04 
11:45:01.000000000 +0200
+++ head-2007-07-02/include/xen/interface/xen-compat.h  2007-07-03 
10:39:14.000000000 +0200
@@ -27,7 +27,7 @@
 #ifndef __XEN_PUBLIC_XEN_COMPAT_H__
 #define __XEN_PUBLIC_XEN_COMPAT_H__
 
-#define __XEN_LATEST_INTERFACE_VERSION__ 0x00030205
+#define __XEN_LATEST_INTERFACE_VERSION__ 0x00030206
 
 #if defined(__XEN__) || defined(__XEN_TOOLS__)
 /* Xen is built with matching headers and implements the latest interface. */



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.