[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] x86-64: syscall/sysenter support for 32-bit apps



This is for both 32-bit apps in 64-bit pv guests and 32on64.

While I coded both a hypercall interface and MSR emulation, I'm not really
sure both mechanisms need to be there.

Depends on more than just guest_context getting saved/restored as guest
state during save/restore/migrate (namely the new fields holding callback
addresses), which isn't implemented yet (and I likely won't do it).

Since the 32-bit kernel doesn't make use of syscall (it would be possible to
do so now, when running on a 64-bit hv), the compat mode guest code path for
syscall wasn't tested.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>

Index: 2007-07-03/xen/arch/x86/domain.c
===================================================================
--- 2007-07-03.orig/xen/arch/x86/domain.c       2007-06-22 16:57:45.000000000 
+0200
+++ 2007-07-03/xen/arch/x86/domain.c    2007-07-03 10:39:13.000000000 +0200
@@ -395,6 +395,12 @@ int vcpu_initialise(struct vcpu *v)
     v->arch.perdomain_ptes =
         d->arch.mm_perdomain_pt + (v->vcpu_id << GDT_LDT_VCPU_SHIFT);
 
+#ifdef __x86_64__
+    v->arch.sysexit_cs = 3;
+    v->arch.syscall_eflags_mask = X86_EFLAGS_DF|X86_EFLAGS_TF|X86_EFLAGS_NT|
+                                  X86_EFLAGS_RF|X86_EFLAGS_VM;
+#endif
+
     return (is_pv_32on64_vcpu(v) ? setup_compat_l4(v) : 0);
 }
 
@@ -607,7 +613,18 @@ int arch_set_info_guest(
         v->arch.flags |= TF_kernel_mode;
 
     if ( !compat )
+    {
         memcpy(&v->arch.guest_context, c.nat, sizeof(*c.nat));
+#ifdef __x86_64__
+        /*
+         * Despite not being correct, be backwards compatible - most
+         * importantly in order to prevent the guest from being crashed
+         * due to use of syscall from compatibility mode when the kernel
+         * didn't set the compatibility mode callback.
+         */
+        v->arch.syscall32_callback_eip = c.nat->syscall_callback_eip;
+#endif
+    }
 #ifdef CONFIG_COMPAT
     else
     {
@@ -1274,7 +1291,9 @@ void context_switch(struct vcpu *prev, s
             local_flush_tlb_one(GDT_VIRT_START(next) +
                                 FIRST_RESERVED_GDT_BYTE);
 
-            if ( !is_pv_32on64_vcpu(next) == !(efer & EFER_SCE) )
+            if ( (!is_pv_32on64_vcpu(next)
+                  || (next->arch.syscall32_callback_cs & ~3)) ==
+                 !(efer & EFER_SCE) )
                 write_efer(efer ^ EFER_SCE);
         }
 #endif
Index: 2007-07-03/xen/arch/x86/traps.c
===================================================================
--- 2007-07-03.orig/xen/arch/x86/traps.c        2007-07-03 10:35:22.000000000 
+0200
+++ 2007-07-03/xen/arch/x86/traps.c     2007-07-04 13:21:20.000000000 +0200
@@ -609,16 +609,21 @@ static int emulate_forced_invalid_op(str
         clear_bit(X86_FEATURE_DE,  &d);
         clear_bit(X86_FEATURE_PSE, &d);
         clear_bit(X86_FEATURE_PGE, &d);
+        if ( !cpu_has_sep )
+            clear_bit(X86_FEATURE_SEP, &d);
+#ifdef __i386__
         if ( !supervisor_mode_kernel )
             clear_bit(X86_FEATURE_SEP, &d);
+#endif
         if ( !IS_PRIV(current->domain) )
             clear_bit(X86_FEATURE_MTRR, &d);
     }
     else if ( regs->eax == 0x80000001 )
     {
         /* Modify Feature Information. */
-        if ( is_pv_32bit_vcpu(current) )
-            clear_bit(X86_FEATURE_SYSCALL % 32, &d);
+#ifdef __i386__
+        clear_bit(X86_FEATURE_SYSCALL % 32, &d);
+#endif
         clear_bit(X86_FEATURE_RDTSCP % 32, &d);
     }
     else
@@ -1695,6 +1700,8 @@ static int emulate_privileged_op(struct 
         break;
 
     case 0x30: /* WRMSR */
+        data = regs->eax;
+        res = ((u64)regs->edx << 32) | data;
         switch ( regs->ecx )
         {
 #ifdef CONFIG_X86_64
@@ -1703,24 +1710,87 @@ static int emulate_privileged_op(struct 
                 goto fail;
             if ( wrmsr_safe(MSR_FS_BASE, regs->eax, regs->edx) )
                 goto fail;
-            v->arch.guest_context.fs_base =
-                ((u64)regs->edx << 32) | regs->eax;
+            v->arch.guest_context.fs_base = res;
             break;
         case MSR_GS_BASE:
             if ( is_pv_32on64_vcpu(v) )
                 goto fail;
             if ( wrmsr_safe(MSR_GS_BASE, regs->eax, regs->edx) )
                 goto fail;
-            v->arch.guest_context.gs_base_kernel =
-                ((u64)regs->edx << 32) | regs->eax;
+            v->arch.guest_context.gs_base_kernel = res;
             break;
         case MSR_SHADOW_GS_BASE:
             if ( is_pv_32on64_vcpu(v) )
                 goto fail;
             if ( wrmsr_safe(MSR_SHADOW_GS_BASE, regs->eax, regs->edx) )
                 goto fail;
-            v->arch.guest_context.gs_base_user =
-                ((u64)regs->edx << 32) | regs->eax;
+            v->arch.guest_context.gs_base_user = res;
+            break;
+        case MSR_STAR:
+            if ( is_pv_32on64_vcpu(v) )
+            {
+                v->arch.syscall32_callback_eip = data;
+                v->arch.syscall32_callback_cs = (uint16_t)regs->edx;
+                fixup_guest_code_selector(v->domain,
+                                          v->arch.syscall32_callback_cs);
+            }
+            break;
+        case MSR_LSTAR:
+            if ( is_pv_32on64_vcpu(v) || !is_canonical_address(res) )
+                goto fail;
+            v->arch.guest_context.syscall_callback_eip = res;
+            break;
+        case MSR_CSTAR:
+            if ( is_pv_32on64_vcpu(v) || !is_canonical_address(res) )
+                goto fail;
+            v->arch.syscall32_callback_eip = res;
+            break;
+        case MSR_SYSCALL_MASK:
+            if ( is_pv_32on64_vcpu(v) || (uint32_t)regs->edx != 0 )
+                goto fail;
+            v->arch.syscall_eflags_mask = data &
+                                          ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL);
+            if ( data & X86_EFLAGS_IF )
+            {
+                set_bit(_VGCF_syscall_disables_events,
+                        &v->arch.guest_context.flags);
+                v->arch.syscall32_disables_events = 1;
+            }
+            else
+            {
+                clear_bit(_VGCF_syscall_disables_events,
+                          &v->arch.guest_context.flags);
+                v->arch.syscall32_disables_events = 0;
+            }
+            break;
+        case MSR_IA32_SYSENTER_CS:
+            if ( is_pv_32on64_vcpu(v) )
+            {
+                v->arch.sysenter_callback_cs = data;
+                fixup_guest_code_selector(v->domain,
+                                          v->arch.sysenter_callback_cs);
+                /*
+                 * While this doesn't match real SYSENTER behavior, the guest
+                 * generally doesn't have a need to switch stacks (or anything
+                 * else that needs to keep interrupts disabled). If the guest
+                 * really needs interrupts disabled on entry, it can still use
+                 * the corresponding hypercall.
+                 */
+                v->arch.sysenter_disables_events = 0;
+            }
+            v->arch.sysexit_cs = (data + 16) | 3;
+            break;
+        case MSR_IA32_SYSENTER_EIP:
+            if ( !is_pv_32on64_vcpu(v) && !is_canonical_address(res) )
+                goto fail;
+            v->arch.sysenter_callback_eip = is_pv_32on64_vcpu(v) ? data : res;
+            if ( !is_pv_32on64_vcpu(v) )
+                /* See comment above. */
+                v->arch.sysenter_disables_events = 0;
+            break;
+        case MSR_IA32_SYSENTER_ESP:
+            if ( !is_pv_32on64_vcpu(v) && !is_canonical_address(res) )
+                goto fail;
             break;
 #endif
         default:
@@ -1758,6 +1828,53 @@ static int emulate_privileged_op(struct 
             regs->eax = v->arch.guest_context.gs_base_user & 0xFFFFFFFFUL;
             regs->edx = v->arch.guest_context.gs_base_user >> 32;
             break;
+        case MSR_STAR:
+            if ( is_pv_32on64_vcpu(v) )
+            {
+                regs->eax = v->arch.syscall32_callback_eip;
+                regs->edx = v->arch.syscall32_callback_cs |
+                            (FLAT_COMPAT_USER_CS << 16);
+            }
+            else
+                regs->edx = FLAT_KERNEL_CS64 | (FLAT_USER_CS64 << 16);
+            break;
+        case MSR_LSTAR:
+            if ( is_pv_32on64_vcpu(v) )
+                goto fail;
+            regs->eax = (uint32_t)v->arch.guest_context.syscall_callback_eip;
+            regs->edx = v->arch.guest_context.syscall_callback_eip >> 32;
+            break;
+        case MSR_CSTAR:
+            if ( is_pv_32on64_vcpu(v) )
+                goto fail;
+            regs->eax = (uint32_t)v->arch.syscall32_callback_eip;
+            regs->edx = v->arch.syscall32_callback_eip >> 32;
+            break;
+        case MSR_SYSCALL_MASK:
+            if ( is_pv_32on64_vcpu(v) )
+                goto fail;
+            data = v->arch.syscall_eflags_mask;
+            if ( test_bit(_VGCF_syscall_disables_events,
+                          &v->arch.guest_context.flags) )
+                data |= X86_EFLAGS_IF;
+            regs->eax = data;
+            regs->edx = 0;
+            break;
+        case MSR_IA32_SYSENTER_CS:
+            if ( is_pv_32on64_vcpu(v) )
+                regs->eax = v->arch.sysenter_callback_cs;
+            else
+                regs->eax = FLAT_KERNEL_CS64;
+            regs->edx = 0;
+            break;
+        case MSR_IA32_SYSENTER_EIP:
+            regs->eax = (uint32_t)v->arch.sysenter_callback_eip;
+            regs->edx = v->arch.sysenter_callback_eip >> 32;
+            break;
+        case MSR_IA32_SYSENTER_ESP:
+            regs->eax = (uint32_t)v->arch.guest_context.kernel_sp;
+            regs->edx = v->arch.guest_context.kernel_sp >> 32;
+            break;
 #endif
         case MSR_EFER:
             if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) )
@@ -2026,6 +2143,13 @@ asmlinkage int do_debug(struct cpu_user_
 
     if ( !guest_mode(regs) )
     {
+#ifdef __x86_64__
+        /*
+         * Single stepping across sysenter must not result in the single step
+         * flag being lost: record it here for create_bounce_frame to pick up.
+         */
+        v->arch.eflags_mask |= (regs->eflags & EF_TF);
+#endif
         /* Clear TF just for absolute sanity. */
         regs->eflags &= ~EF_TF;
         /*
Index: 2007-07-03/xen/arch/x86/x86_32/traps.c
===================================================================
--- 2007-07-03.orig/xen/arch/x86/x86_32/traps.c 2007-06-22 16:57:45.000000000 
+0200
+++ 2007-07-03/xen/arch/x86/x86_32/traps.c      2007-07-03 10:39:14.000000000 
+0200
@@ -329,12 +329,19 @@ static long register_guest_callback(stru
         break;
 
 #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
-    case CALLBACKTYPE_sysenter:
+    case CALLBACKTYPE_sysenter_deprecated:
         if ( ! cpu_has_sep )
             ret = -EINVAL;
         else if ( on_each_cpu(do_update_sysenter, &reg->address, 1, 1) != 0 )
             ret = -EIO;
         break;
+
+    case CALLBACKTYPE_sysenter:
+        if ( ! cpu_has_sep )
+            ret = -EINVAL;
+        else
+            do_update_sysenter(&reg->address);
+        break;
 #endif
 
     case CALLBACKTYPE_nmi:
@@ -358,6 +365,7 @@ static long unregister_guest_callback(st
     case CALLBACKTYPE_event:
     case CALLBACKTYPE_failsafe:
 #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
+    case CALLBACKTYPE_sysenter_deprecated:
     case CALLBACKTYPE_sysenter:
 #endif
         ret = -EINVAL;
Index: 2007-07-03/xen/arch/x86/x86_64/asm-offsets.c
===================================================================
--- 2007-07-03.orig/xen/arch/x86/x86_64/asm-offsets.c   2007-07-03 
10:35:22.000000000 +0200
+++ 2007-07-03/xen/arch/x86/x86_64/asm-offsets.c        2007-07-04 
12:51:10.000000000 +0200
@@ -71,6 +71,22 @@ void __dummy__(void)
            arch.guest_context.failsafe_callback_cs);
     OFFSET(VCPU_syscall_addr, struct vcpu,
            arch.guest_context.syscall_callback_eip);
+    OFFSET(VCPU_syscall32_addr, struct vcpu, arch.syscall32_callback_eip);
+    OFFSET(VCPU_syscall32_sel, struct vcpu, arch.syscall32_callback_cs);
+    OFFSET(VCPU_syscall32_disables_events, struct vcpu,
+           arch.syscall32_disables_events);
+    OFFSET(VCPU_syscall_eflags_mask, struct vcpu, arch.syscall_eflags_mask);
+    OFFSET(VCPU_sysenter_addr, struct vcpu, arch.sysenter_callback_eip);
+    OFFSET(VCPU_sysenter_sel, struct vcpu, arch.sysenter_callback_cs);
+    OFFSET(VCPU_sysenter_disables_events, struct vcpu,
+           arch.sysenter_disables_events);
+    OFFSET(VCPU_sysexit_addr, struct vcpu, arch.sysexit_eip);
+    OFFSET(VCPU_sysexit_sel, struct vcpu, arch.sysexit_cs);
+    OFFSET(VCPU_eflags_mask, struct vcpu, arch.eflags_mask);
+    OFFSET(VCPU_gp_fault_addr, struct vcpu,
+           arch.guest_context.trap_ctxt[TRAP_gp_fault].address);
+    OFFSET(VCPU_gp_fault_sel, struct vcpu,
+           arch.guest_context.trap_ctxt[TRAP_gp_fault].cs);
     OFFSET(VCPU_kernel_sp, struct vcpu, arch.guest_context.kernel_sp);
     OFFSET(VCPU_kernel_ss, struct vcpu, arch.guest_context.kernel_ss);
     OFFSET(VCPU_guest_context_flags, struct vcpu, arch.guest_context.flags);
Index: 2007-07-03/xen/arch/x86/x86_64/compat/entry.S
===================================================================
--- 2007-07-03.orig/xen/arch/x86/x86_64/compat/entry.S  2007-07-03 
10:35:22.000000000 +0200
+++ 2007-07-03/xen/arch/x86/x86_64/compat/entry.S       2007-07-04 
13:26:46.000000000 +0200
@@ -188,6 +188,39 @@ ENTRY(compat_post_handle_exception)
         movb  $0,TRAPBOUNCE_flags(%rdx)
         jmp   compat_test_all_events
 
+ENTRY(compat_syscall)
+        cmpb  $0,VCPU_syscall32_disables_events(%rbx)
+        movzwl VCPU_syscall32_sel(%rbx),%esi
+        movq  VCPU_syscall32_addr(%rbx),%rax
+        setne %cl
+        leaq  VCPU_trap_bounce(%rbx),%rdx
+        testl $~3,%esi
+        leal  (,%rcx,TBF_INTERRUPT),%ecx
+        jz    2f
+1:      movq  %rax,TRAPBOUNCE_eip(%rdx)
+        movw  %si,TRAPBOUNCE_cs(%rdx)
+        movb  %cl,TRAPBOUNCE_flags(%rdx)
+        call  compat_create_bounce_frame
+        jmp   compat_test_all_events
+2:      movl  $TRAP_gp_fault,UREGS_entry_vector(%rsp)
+        movq  VCPU_gp_fault_addr(%rbx),%rax
+        movzwl VCPU_gp_fault_sel(%rbx),%esi
+        movb  $(TBF_EXCEPTION|TBF_EXCEPTION_ERRCODE|TBF_INTERRUPT),%cl
+        movl  $0,TRAPBOUNCE_error_code(%rdx)
+        jmp   1b
+
+ENTRY(compat_sysenter)
+        cmpl  $TRAP_gp_fault,UREGS_entry_vector(%rsp)
+        movzwl VCPU_sysenter_sel(%rbx),%eax
+        movzwl VCPU_gp_fault_sel(%rbx),%ecx
+        cmovel %ecx,%eax
+        testl $~3,%eax
+        movl  $FLAT_COMPAT_USER_SS,UREGS_ss(%rsp)
+        cmovzl %ecx,%eax
+        movw  %ax,TRAPBOUNCE_cs(%rdx)
+        call  compat_create_bounce_frame
+        jmp   compat_test_all_events
+
 ENTRY(compat_int80_direct_trap)
         call  compat_create_bounce_frame
         jmp   compat_test_all_events
@@ -230,7 +263,9 @@ compat_create_bounce_frame:
         setz  %ch                       # %ch == !saved_upcall_mask
         movl  UREGS_eflags+8(%rsp),%eax
         andl  $~X86_EFLAGS_IF,%eax
-        shlb  $1,%ch                    # Bit 9 (EFLAGS.IF)
+        addb  %ch,%ch                   # Bit 9 (EFLAGS.IF)
+        orl   VCPU_eflags_mask(%rbx),%eax
+        movl  $0,VCPU_eflags_mask(%rbx)
         orb   %ch,%ah                   # Fold EFLAGS.IF into %eax
 .Lft6:  movl  %eax,%fs:2*4(%rsi)        # EFLAGS
         movl  UREGS_rip+8(%rsp),%eax
Index: 2007-07-03/xen/arch/x86/x86_64/compat/traps.c
===================================================================
--- 2007-07-03.orig/xen/arch/x86/x86_64/compat/traps.c  2007-07-03 
10:35:22.000000000 +0200
+++ 2007-07-03/xen/arch/x86/x86_64/compat/traps.c       2007-07-03 
11:54:46.000000000 +0200
@@ -160,12 +160,35 @@ static long compat_register_guest_callba
                       &v->arch.guest_context.flags);
         break;
 
+    case CALLBACKTYPE_syscall:
+        v->arch.syscall32_callback_cs     = reg->address.cs;
+        v->arch.syscall32_callback_eip    = reg->address.eip;
+        v->arch.syscall32_disables_events =
+            (reg->flags & CALLBACKF_mask_events) != 0;
+        if ( v->arch.syscall32_callback_cs & ~3 )
+             write_efer(read_efer() | EFER_SCE);
+        else
+             write_efer(read_efer() & ~EFER_SCE);
+        break;
+
+    case CALLBACKTYPE_sysenter:
+        v->arch.sysenter_callback_cs     = reg->address.cs;
+        v->arch.sysenter_callback_eip    = reg->address.eip;
+        v->arch.sysenter_disables_events =
+            (reg->flags & CALLBACKF_mask_events) != 0;
+        break;
+
+    case CALLBACKTYPE_sysexit:
+        v->arch.sysexit_cs  = reg->address.cs | 3;
+        v->arch.sysexit_eip = reg->address.eip;
+        break;
+
     case CALLBACKTYPE_nmi:
         ret = register_guest_nmi_callback(reg->address.eip);
         break;
 
     default:
-        ret = -EINVAL;
+        ret = -ENOSYS;
         break;
     }
 
@@ -178,12 +201,20 @@ static long compat_unregister_guest_call
 
     switch ( unreg->type )
     {
+    case CALLBACKTYPE_event:
+    case CALLBACKTYPE_failsafe:
+    case CALLBACKTYPE_syscall:
+    case CALLBACKTYPE_sysenter:
+    case CALLBACKTYPE_sysexit:
+        ret = -EINVAL;
+        break;
+
     case CALLBACKTYPE_nmi:
         ret = unregister_guest_nmi_callback();
         break;
 
     default:
-        ret = -EINVAL;
+        ret = -ENOSYS;
         break;
     }
 
Index: 2007-07-03/xen/arch/x86/x86_64/entry.S
===================================================================
--- 2007-07-03.orig/xen/arch/x86/x86_64/entry.S 2007-07-03 10:35:37.000000000 
+0200
+++ 2007-07-03/xen/arch/x86/x86_64/entry.S      2007-07-04 12:48:33.000000000 
+0200
@@ -26,15 +26,19 @@
         ALIGN
 /* %rbx: struct vcpu */
 switch_to_kernel:
-        leaq  VCPU_trap_bounce(%rbx),%rdx
+        cmpw  $FLAT_USER_CS32,UREGS_cs(%rsp)
         movq  VCPU_syscall_addr(%rbx),%rax
+        leaq  VCPU_trap_bounce(%rbx),%rdx
+        cmoveq VCPU_syscall32_addr(%rbx),%rax
+        btl   $_VGCF_syscall_disables_events,VCPU_guest_context_flags(%rbx)
         movq  %rax,TRAPBOUNCE_eip(%rdx)
-        movb  $0,TRAPBOUNCE_flags(%rdx)
-        bt    $_VGCF_syscall_disables_events,VCPU_guest_context_flags(%rbx)
-        jnc   1f
-        movb  $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx)
-1:      call  create_bounce_frame
-        andl  $~X86_EFLAGS_DF,UREGS_eflags(%rsp)
+        setc  %cl
+        leal  (,%rcx,TBF_INTERRUPT),%ecx
+        movb  %cl,TRAPBOUNCE_flags(%rdx)
+        call  create_bounce_frame
+        movl  VCPU_syscall_eflags_mask(%rbx),%eax
+        notl  %eax
+        andl  %eax,UREGS_eflags(%rsp)
         jmp   test_all_events
 
 /* %rbx: struct vcpu, interrupts disabled */
@@ -47,7 +51,7 @@ restore_all_guest:
         addq  $8,%rsp
         popq  %rcx                    # RIP
         popq  %r11                    # CS
-        cmpw  $FLAT_KERNEL_CS32,%r11
+        cmpw  $FLAT_USER_CS32,%r11
         popq  %r11                    # RFLAGS
         popq  %rsp                    # RSP
         je    1f
@@ -127,6 +131,9 @@ ENTRY(syscall_enter)
         movl  $TRAP_syscall,4(%rsp)
         SAVE_ALL
         GET_CURRENT(%rbx)
+        movq  VCPU_domain(%rbx),%rcx
+        testb $1,DOMAIN_is_32bit_pv(%rcx)
+        jnz   compat_syscall
         testb $TF_kernel_mode,VCPU_thread_flags(%rbx)
         jz    switch_to_kernel
 
@@ -224,6 +231,41 @@ bad_hypercall:
         movq $-ENOSYS,UREGS_rax(%rsp)
         jmp  test_all_events
 
+ENTRY(sysenter_entry)
+        sti
+        pushq $FLAT_USER_SS
+        pushq $0
+        pushfq
+        pushq $0
+        pushq $0
+        pushq $0
+        movl  $TRAP_syscall,4(%rsp)
+        SAVE_ALL
+        GET_CURRENT(%rbx)
+        movq  VCPU_sysexit_addr(%rbx),%rax
+        movzwl VCPU_sysexit_sel(%rbx),%edx
+        cmpb  $0,VCPU_sysenter_disables_events(%rbx)
+        movq  %rax,UREGS_rip(%rsp)
+        movl  %edx,UREGS_cs(%rsp)
+        movq  VCPU_sysenter_addr(%rbx),%rax
+        setne %cl
+        leaq  VCPU_trap_bounce(%rbx),%rdx
+        testq %rax,%rax
+        leal  (,%rcx,TBF_INTERRUPT),%ecx
+        jz    2f
+1:      movq  VCPU_domain(%rbx),%rdi
+        movq  %rax,TRAPBOUNCE_eip(%rdx)
+        movb  %cl,TRAPBOUNCE_flags(%rdx)
+        testb $1,DOMAIN_is_32bit_pv(%rdi)
+        jnz   compat_sysenter
+        call  create_bounce_frame
+        jmp   test_all_events
+2:      movl  %eax,TRAPBOUNCE_error_code(%rdx)
+        movq  VCPU_gp_fault_addr(%rbx),%rax
+        movb  $(TBF_EXCEPTION|TBF_EXCEPTION_ERRCODE|TBF_INTERRUPT),%cl
+        movl  $TRAP_gp_fault,UREGS_entry_vector(%rsp)
+        jmp   1b
+
 ENTRY(int80_direct_trap)
         pushq $0
         SAVE_ALL
@@ -296,9 +338,11 @@ create_bounce_frame:
         shrq  $32,%rax
         testb $0xFF,%al                 # Bits 0-7: saved_upcall_mask
         setz  %ch                       # %ch == !saved_upcall_mask
-        movq  UREGS_eflags+8(%rsp),%rax
-        andq  $~X86_EFLAGS_IF,%rax
-        shlb  $1,%ch                    # Bit 9 (EFLAGS.IF)
+        movl  UREGS_eflags+8(%rsp),%eax
+        andl  $~X86_EFLAGS_IF,%eax
+        addb  %ch,%ch                   # Bit 9 (EFLAGS.IF)
+        orl   VCPU_eflags_mask(%rbx),%eax
+        movl  $0,VCPU_eflags_mask(%rbx)
         orb   %ch,%ah                   # Fold EFLAGS.IF into %eax
 .Lft5:  movq  %rax,16(%rsi)             # RFLAGS
         movq  UREGS_rip+8(%rsp),%rax
Index: 2007-07-03/xen/arch/x86/x86_64/traps.c
===================================================================
--- 2007-07-03.orig/xen/arch/x86/x86_64/traps.c 2007-07-03 10:34:30.000000000 
+0200
+++ 2007-07-03/xen/arch/x86/x86_64/traps.c      2007-07-03 12:06:05.000000000 
+0200
@@ -22,6 +22,7 @@
 #include <public/callback.h>
 
 asmlinkage void syscall_enter(void);
+asmlinkage void sysenter_entry(void);
 asmlinkage void compat_hypercall(void);
 asmlinkage void int80_direct_trap(void);
 
@@ -323,12 +324,26 @@ void __init percpu_traps_init(void)
 
     /* Trampoline for SYSCALL entry from long mode. */
     stack = &stack[IST_MAX * PAGE_SIZE]; /* Skip the IST stacks. */
-    wrmsr(MSR_LSTAR, (unsigned long)stack, ((unsigned long)stack>>32));
+    wrmsrl(MSR_LSTAR, (unsigned long)stack);
     stack += write_stack_trampoline(stack, stack_bottom, FLAT_KERNEL_CS64);
 
-    /* Trampoline for SYSCALL entry from compatibility mode. */
-    wrmsr(MSR_CSTAR, (unsigned long)stack, ((unsigned long)stack>>32));
-    stack += write_stack_trampoline(stack, stack_bottom, FLAT_KERNEL_CS32);
+    switch ( boot_cpu_data.x86_vendor )
+    {
+    case X86_VENDOR_INTEL:
+        /* SYSENTER entry. */
+        wrmsrl(MSR_IA32_SYSENTER_ESP, (unsigned long)stack_bottom);
+        wrmsrl(MSR_IA32_SYSENTER_EIP, (unsigned long)sysenter_entry);
+        wrmsr(MSR_IA32_SYSENTER_CS, __HYPERVISOR_CS, 0);
+        break;
+    case X86_VENDOR_AMD:
+        /* Trampoline for SYSCALL entry from compatibility mode. */
+        stack = (char *)L1_CACHE_ALIGN((unsigned long)stack);
+        wrmsrl(MSR_CSTAR, (unsigned long)stack);
+        stack += write_stack_trampoline(stack, stack_bottom, FLAT_USER_CS32);
+        break;
+    default:
+        BUG();
+    }
 
     /* Common SYSCALL parameters. */
     wrmsr(MSR_STAR, 0, (FLAT_RING3_CS32<<16) | __HYPERVISOR_CS);
@@ -353,6 +368,9 @@ static long register_guest_callback(stru
     long ret = 0;
     struct vcpu *v = current;
 
+    if ( !is_canonical_address(reg->address) )
+        return -EINVAL;
+
     switch ( reg->type )
     {
     case CALLBACKTYPE_event:
@@ -370,6 +388,14 @@ static long register_guest_callback(stru
         break;
 
     case CALLBACKTYPE_syscall:
+        /* See arch_set_info_guest() for why this is being done. */
+        if ( v->arch.syscall32_callback_eip ==
+             v->arch.guest_context.syscall_callback_eip )
+        {
+            v->arch.syscall32_callback_eip = reg->address;
+            v->arch.syscall32_disables_events =
+                (reg->flags & CALLBACKF_mask_events) != 0;
+        }
         v->arch.guest_context.syscall_callback_eip  = reg->address;
         if ( reg->flags & CALLBACKF_mask_events )
             set_bit(_VGCF_syscall_disables_events,
@@ -379,6 +405,43 @@ static long register_guest_callback(stru
                       &v->arch.guest_context.flags);
         break;
 
+    case CALLBACKTYPE_syscall32:
+        v->arch.syscall32_callback_eip = reg->address;
+        v->arch.syscall32_disables_events =
+            (reg->flags & CALLBACKF_mask_events) != 0;
+        break;
+
+    case CALLBACKTYPE_sfmask:
+        v->arch.syscall_eflags_mask = reg->address &
+                                      ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL);
+        if ( reg->address & X86_EFLAGS_IF )
+        {
+            set_bit(_VGCF_syscall_disables_events,
+                    &v->arch.guest_context.flags);
+            v->arch.syscall32_disables_events = 1;
+        }
+        else
+        {
+            clear_bit(_VGCF_syscall_disables_events,
+                      &v->arch.guest_context.flags);
+            v->arch.syscall32_disables_events = 0;
+        }
+        break;
+
+    case CALLBACKTYPE_sysenter:
+        v->arch.sysenter_callback_eip = reg->address;
+        v->arch.sysenter_disables_events =
+            (reg->flags & CALLBACKF_mask_events) != 0;
+        break;
+
+    case CALLBACKTYPE_sysexit:
+        v->arch.sysexit_eip = reg->address;
+        if ( reg->flags & CALLBACKF_mask_events )
+            v->arch.sysexit_cs = FLAT_USER_CS32;
+        else
+            v->arch.sysexit_cs = FLAT_USER_CS64;
+        break;
+
     case CALLBACKTYPE_nmi:
         ret = register_guest_nmi_callback(reg->address);
         break;
@@ -400,6 +463,10 @@ static long unregister_guest_callback(st
     case CALLBACKTYPE_event:
     case CALLBACKTYPE_failsafe:
     case CALLBACKTYPE_syscall:
+    case CALLBACKTYPE_syscall32:
+    case CALLBACKTYPE_sfmask:
+    case CALLBACKTYPE_sysenter:
+    case CALLBACKTYPE_sysexit:
         ret = -EINVAL;
         break;
 
Index: 2007-07-03/xen/include/asm-x86/cpufeature.h
===================================================================
--- 2007-07-03.orig/xen/include/asm-x86/cpufeature.h    2007-07-03 
10:35:30.000000000 +0200
+++ 2007-07-03/xen/include/asm-x86/cpufeature.h 2007-07-03 10:39:14.000000000 
+0200
@@ -130,7 +130,7 @@
 #define cpu_has_pae            1
 #define cpu_has_pge            1
 #define cpu_has_apic           boot_cpu_has(X86_FEATURE_APIC)
-#define cpu_has_sep            0
+#define cpu_has_sep            boot_cpu_has(X86_FEATURE_SEP)
 #define cpu_has_mtrr           1
 #define cpu_has_mmx            1
 #define cpu_has_fxsr           1
Index: 2007-07-03/xen/include/asm-x86/domain.h
===================================================================
--- 2007-07-03.orig/xen/include/asm-x86/domain.h        2007-06-15 
14:05:46.000000000 +0200
+++ 2007-07-03/xen/include/asm-x86/domain.h     2007-07-04 12:51:40.000000000 
+0200
@@ -281,6 +281,16 @@ struct arch_vcpu
 #endif
 #ifdef CONFIG_X86_64
     struct trap_bounce int80_bounce;
+    unsigned long      syscall32_callback_eip;
+    unsigned long      sysenter_callback_eip;
+    unsigned long      sysexit_eip;
+    unsigned short     syscall32_callback_cs;
+    unsigned short     sysenter_callback_cs;
+    unsigned short     sysexit_cs;
+    bool_t             syscall32_disables_events;
+    bool_t             sysenter_disables_events;
+    unsigned int       syscall_eflags_mask;
+    unsigned int       eflags_mask;
 #endif
 
     /* Virtual Machine Extensions */
Index: 2007-07-03/xen/include/public/callback.h
===================================================================
--- 2007-07-03.orig/xen/include/public/callback.h       2006-11-08 
10:37:31.000000000 +0100
+++ 2007-07-03/xen/include/public/callback.h    2007-07-03 10:39:14.000000000 
+0200
@@ -38,13 +38,34 @@
 
 #define CALLBACKTYPE_event                 0
 #define CALLBACKTYPE_failsafe              1
-#define CALLBACKTYPE_syscall               2 /* x86_64 only */
+#define CALLBACKTYPE_syscall               2 /* x86_64 hv only */
 /*
- * sysenter is only available on x86_32 with the
- * supervisor_mode_kernel option enabled.
+ * sysenter_deprecated is only available on x86_32 with the
+ * supervisor_mode_kernel option enabled, and should not be used in new code.
  */
-#define CALLBACKTYPE_sysenter              3
+#define CALLBACKTYPE_sysenter_deprecated   3
 #define CALLBACKTYPE_nmi                   4
+#if __XEN_INTERFACE_VERSION__ < 0x00030206
+#define CALLBACKTYPE_sysenter              CALLBACKTYPE_sysenter_deprecated
+#else
+/*
+ * sysenter is only available
+ * - on x86_32 with the supervisor_mode_kernel option enabled,
+ * - on x86_64 hv for x86_32 pv or 32-bit guest support in x86_64 pv.
+ */
+#define CALLBACKTYPE_sysenter              5
+/*
+ * sysexit is only available on x86_64 hv, and is only used to fill a
+ * sysenter frame's return address (if the guest desires to have a non-NULL
+ * value there). Additionally, since CALLBACKF_mask_events is meaningless
+ * here, it is being (mis-)used for 64-bits guests to distinguish sysenter
+ * callers expected to be in 64-bit mode (flag set) from 32-bit ones (flag
+ * clear).
+ */
+#define CALLBACKTYPE_sysexit               6
+#define CALLBACKTYPE_syscall32             7 /* x86_64 only */
+#define CALLBACKTYPE_sfmask                8 /* x86_64 only */
+#endif
 
 /*
  * Disable event deliver during callback? This flag is ignored for event and
Index: 2007-07-03/xen/include/public/xen-compat.h
===================================================================
--- 2007-07-03.orig/xen/include/public/xen-compat.h     2006-11-16 
14:06:41.000000000 +0100
+++ 2007-07-03/xen/include/public/xen-compat.h  2007-07-03 10:39:14.000000000 
+0200
@@ -27,7 +27,7 @@
 #ifndef __XEN_PUBLIC_XEN_COMPAT_H__
 #define __XEN_PUBLIC_XEN_COMPAT_H__
 
-#define __XEN_LATEST_INTERFACE_VERSION__ 0x00030205
+#define __XEN_LATEST_INTERFACE_VERSION__ 0x00030206
 
 #if defined(__XEN__) || defined(__XEN_TOOLS__)
 /* Xen is built with matching headers and implements the latest interface. */



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.