[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [RFC] [PATCH] Dom0: Don't switch back to user space stack in syscall entry



Jeremy, this patch is a RFC for MCE injection issue as discussed in 
http://lists.xensource.com/archives/html/xen-devel/2009-12/msg00849.html .

Currently, when syscall into dom0, the stack is kernel stack. Dom0 kernel will 
firstly switch to user space stack, to create context same as physical syscall 
entry and then call kernel's common syscall entry. In kernel's common syscall 
entry, it will return back to kernel stack.
This give a small security windows for MCE. If a vMCE is injected into dom0 
when dom0 is in the syscalll entry, but is still using the user space stack, it 
will cause great trouble (check above URL for detailed information).

I can think out two options for this issue:
a) The method this patch took. Dom0 didn't try to switch to user space stack. 
Instead, it will use kernel statck directly and jump to kenerl entry.
b) Add IST support to xen hypervisor, Considering the ptrace issue in 
http://lists.xensource.com/archives/html/xen-devel/2009-05/msg00200.html and 
also Ingo's comments in 
http://lkml.indiana.edu/hypermail/linux/kernel/0905.1/00087.html, maybe it 
still have value to add such support. But not sure if that is accepetable for 
xen hypervisor.

As RFC, this patch is just tested to make sure the system can still boot 
succesffully, I did't try ptrace or something else. If the idea is ok, I will 
do more testing on it.

Thanks
Yunhong Jiang

>From 6f8e8019b6cee92b23a3f02787376e0ab3a4244f Mon Sep 17 00:00:00 2001
From: Jiang, Yunhong <yunhong.jiang@xxxxxxxxx>
Date: Thu, 28 Jan 2010 03:43:41 +0800
Subject: [PATCH] Change the syscall/sysexit entry to use the kernel stack 
directly

---
 arch/x86/ia32/ia32entry.S       |    2 +-
 arch/x86/include/asm/irqflags.h |    2 ++
 arch/x86/kernel/entry_64.S      |    2 +-
 arch/x86/xen/xen-asm_64.S       |   37 +++++++++++++++++++++++++------------
 4 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index b09502d..2f8e942 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -284,6 +284,7 @@ ENTRY(ia32_cstar_target)
        movl    %esp,%r8d
        CFI_REGISTER    rsp,r8
        movq    PER_CPU_VAR(kernel_stack),%rsp
+ENTRY(ia32_cstart_after_switch_stack)
        /*
         * No need to follow this irqs on/off section: the syscall
         * disabled irqs and here we enable it straight after entry:
@@ -337,7 +338,6 @@ sysretl_from_sys_call:
        xorq    %r9,%r9
        xorq    %r8,%r8
        TRACE_IRQS_ON
-       movl RSP-ARGOFFSET(%rsp),%esp
        CFI_RESTORE rsp
        USERGS_SYSRET32
        
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index c6ccbe7..7e62aed 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -133,9 +133,11 @@ static inline unsigned long __raw_local_irq_save(void)
 
 #define INTERRUPT_RETURN       iretq
 #define USERGS_SYSRET64                                \
+       movq PER_CPU_VAR(old_rsp), %rsp \
        swapgs;                                 \
        sysretq;
 #define USERGS_SYSRET32                                \
+       movl RSP-ARGOFFSET(%rsp), %esp          \
        swapgs;                                 \
        sysretl
 #define ENABLE_INTERRUPTS_SYSEXIT32            \
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index c251be7..5bc75fe 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -472,6 +472,7 @@ ENTRY(system_call_after_swapgs)
         * No need to follow this irqs off/on section - it's straight
         * and short:
         */
+ENTRY(system_call_after_switch_stack)
        ENABLE_INTERRUPTS(CLBR_NONE)
        SAVE_ARGS 8,1
        movq  %rax,ORIG_RAX-ARGOFFSET(%rsp)
@@ -510,7 +511,6 @@ sysret_check:
        CFI_REGISTER    rip,rcx
        RESTORE_ARGS 0,-ARG_SKIP,1
        /*CFI_REGISTER  rflags,r11*/
-       movq    PER_CPU_VAR(old_rsp), %rsp
        USERGS_SYSRET64
 
        CFI_RESTORE_STATE
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 53adefd..c953d14 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -12,6 +12,7 @@
  */
 
 #include <asm/errno.h>
+#include <asm/calling.h>
 #include <asm/percpu.h>
 #include <asm/processor-flags.h>
 #include <asm/segment.h>
@@ -68,9 +69,7 @@ ENTRY(xen_sysret64)
         * We're already on the usermode stack at this point, but
         * still with the kernel gs, so we can easily switch back
         */
-       movq %rsp, PER_CPU_VAR(old_rsp)
-       movq PER_CPU_VAR(kernel_stack), %rsp
-
+    movq PER_CPU_VAR(kernel_stack), %rsp
        pushq $__USER_DS
        pushq PER_CPU_VAR(old_rsp)
        pushq %r11
@@ -84,12 +83,15 @@ RELOC(xen_sysret64, 1b+1)
 
 ENTRY(xen_sysret32)
        /*
-        * We're already on the usermode stack at this point, but
+        * We're still on the kernel mode stack at this point, but
         * still with the kernel gs, so we can easily switch back
         */
-       movq %rsp, PER_CPU_VAR(old_rsp)
-       movq PER_CPU_VAR(kernel_stack), %rsp
-
+    /* The ARGS is restored, so don't clobber anything */
+    pushq %rax
+    movq RSP-ARGOFFSET(%rsp), %rax
+    movq %rax, PER_CPU_VAR(old_rsp)
+    popq %rax
+    movq PER_CPU_VAR(kernel_stack), %rsp
        pushq $__USER32_DS
        pushq PER_CPU_VAR(old_rsp)
        pushq %r11
@@ -116,27 +118,38 @@ RELOC(xen_sysret32, 1b+1)
  * rsp->rcx
  *
  * In all the entrypoints, we undo all that to make it look like a
- * CPU-generated syscall/sysenter and jump to the normal entrypoint.
+ * CPU-generated syscall/sysenter and jump to the normal entrypoint,
+ * but we will not switch stack
  */
 
 .macro undo_xen_syscall
+    /* Clobber rcx is ok */
+       movq 5*8(%rsp), %rcx
+    movq %rcx, PER_CPU_VAR(old_rsp)
        mov 0*8(%rsp), %rcx
        mov 1*8(%rsp), %r11
-       mov 5*8(%rsp), %rsp
+       sub $56, %rsp
 .endm
 
 /* Normal 64-bit system call target */
 ENTRY(xen_syscall_target)
        undo_xen_syscall
-       jmp system_call_after_swapgs
+       jmp system_call_after_switch_stack
 ENDPROC(xen_syscall_target)
 
 #ifdef CONFIG_IA32_EMULATION
 
+.macro undo_xen_ia32_syscall
+       mov 0*8(%rsp), %rcx
+       mov 1*8(%rsp), %r11
+       mov     5*8(%esp), %r8d
+       sub $56, %rsp
+.endm
 /* 32-bit compat syscall target */
 ENTRY(xen_syscall32_target)
-       undo_xen_syscall
-       jmp ia32_cstar_target
+       undo_xen_ia32_syscall
+       movl    %esp,%r8d
+       jmp ia32_cstart_after_switch_stack 
 ENDPROC(xen_syscall32_target)
 
 /* 32-bit compat sysenter target */
-- 
1.5.4.2



Attachment: 0002-Change-the-syscall-sysexit-entry-to-use-the-kernel-s.patch
Description: 0002-Change-the-syscall-sysexit-entry-to-use-the-kernel-s.patch

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.