[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 2/2, v2] x86: save/restore only partial register state where possible


  • To: "xen-devel" <xen-devel@xxxxxxxxxxxxx>
  • From: "Jan Beulich" <JBeulich@xxxxxxxx>
  • Date: Tue, 30 Oct 2012 14:29:40 +0000
  • Delivery-date: Tue, 30 Oct 2012 14:30:03 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xen.org>

... and make restore conditional not only upon having saved the state,
but also upon whether saved state was actually modified (and register
values are known to have been preserved).

Note that RBP is unconditionally considered a volatile register (i.e.
irrespective of CONFIG_FRAME_POINTER), since the RBP handling would
become overly complicated due to the need to save/restore it on the
compat mode hypercall path [6th argument].

Note further that for compat mode code paths, saving/restoring R8...R15
is entirely unnecessary - we don't allow those guests to enter 64-bit
mode, and hence they have no way of seeing these registers' contents
(and there consequently also is no information leak, except if the
context saving domctl would be considered such).

Finally, note that this may not properly deal with gdbstub's needs, yet
(but if so, I can't really suggest adjustments, as I don't know that
code).

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
v2: Add a few comments.

This patch functionally depends on x86-drop-get_x86_gpr.patch.

--- a/tools/tests/x86_emulator/x86_emulate.c
+++ b/tools/tests/x86_emulator/x86_emulate.c
@@ -10,6 +10,7 @@ typedef bool bool_t;
 #define BUG() abort()
 
 #define cpu_has_amd_erratum(nr) 0
+#define mark_regs_dirty(r) ((void)(r))
 
 #include "x86_emulate/x86_emulate.h"
 #include "x86_emulate/x86_emulate.c"
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -143,6 +143,7 @@ static void continue_idle_domain(struct 
 static void continue_nonidle_domain(struct vcpu *v)
 {
     check_wakeup_from_wait();
+    mark_regs_dirty(guest_cpu_user_regs());
     reset_stack_and_jump(ret_from_intr);
 }
 
@@ -1312,7 +1313,7 @@ static void load_segments(struct vcpu *n
             if ( test_bit(_VGCF_failsafe_disables_events, &n->arch.vgc_flags) )
                 vcpu_info(n, evtchn_upcall_mask) = 1;
 
-            regs->entry_vector  = TRAP_syscall;
+            regs->entry_vector |= TRAP_syscall;
             regs->_eflags      &= 0xFFFCBEFFUL;
             regs->ss            = FLAT_COMPAT_KERNEL_SS;
             regs->_esp          = (unsigned long)(esp-7);
@@ -1354,7 +1355,7 @@ static void load_segments(struct vcpu *n
         if ( test_bit(_VGCF_failsafe_disables_events, &n->arch.vgc_flags) )
             vcpu_info(n, evtchn_upcall_mask) = 1;
 
-        regs->entry_vector  = TRAP_syscall;
+        regs->entry_vector |= TRAP_syscall;
         regs->rflags       &= ~(X86_EFLAGS_AC|X86_EFLAGS_VM|X86_EFLAGS_RF|
                                 X86_EFLAGS_NT|X86_EFLAGS_TF);
         regs->ss            = FLAT_KERNEL_SS;
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -704,7 +704,7 @@ void irq_complete_move(struct irq_desc *
     if (likely(!desc->arch.move_in_progress))
         return;
 
-    vector = get_irq_regs()->entry_vector;
+    vector = (u8)get_irq_regs()->entry_vector;
     me = smp_processor_id();
 
     if ( vector == desc->arch.vector &&
@@ -798,7 +798,7 @@ void do_IRQ(struct cpu_user_regs *regs)
     struct irqaction *action;
     uint32_t          tsc_in;
     struct irq_desc  *desc;
-    unsigned int      vector = regs->entry_vector;
+    unsigned int      vector = (u8)regs->entry_vector;
     int irq = __get_cpu_var(vector_irq[vector]);
     struct cpu_user_regs *old_regs = set_irq_regs(regs);
     
@@ -892,7 +892,7 @@ void do_IRQ(struct cpu_user_regs *regs)
 
  out:
     if ( desc->handler->end )
-        desc->handler->end(desc, regs->entry_vector);
+        desc->handler->end(desc, vector);
  out_no_end:
     spin_unlock(&desc->lock);
  out_no_unlock:
@@ -1113,7 +1113,7 @@ static void __do_IRQ_guest(int irq)
     struct domain      *d;
     int                 i, sp;
     struct pending_eoi *peoi = this_cpu(pending_eoi);
-    int vector = get_irq_regs()->entry_vector;
+    unsigned int        vector = (u8)get_irq_regs()->entry_vector;
 
     if ( unlikely(action->nr_guests == 0) )
     {
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -2082,6 +2082,7 @@ static int emulate_privileged_op(struct 
             goto fail;
         if ( admin_io_okay(port, op_bytes, v, regs) )
         {
+            mark_regs_dirty(regs);
             io_emul(regs);            
         }
         else
@@ -2111,6 +2112,7 @@ static int emulate_privileged_op(struct 
             goto fail;
         if ( admin_io_okay(port, op_bytes, v, regs) )
         {
+            mark_regs_dirty(regs);
             io_emul(regs);            
             if ( (op_bytes == 1) && pv_post_outb_hook )
                 pv_post_outb_hook(port, regs->eax);
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -14,8 +14,7 @@
 
 ENTRY(compat_hypercall)
         pushq $0
-        movl  $TRAP_syscall,4(%rsp)
-        SAVE_ALL
+        SAVE_VOLATILE type=TRAP_syscall compat=1
 
         cmpb  $0,untrusted_msi(%rip)
 UNLIKELY_START(ne, msi_check)
@@ -126,6 +125,7 @@ compat_test_guest_events:
 /* %rbx: struct vcpu */
 compat_process_softirqs:
         sti
+        andl  $~TRAP_regs_partial,UREGS_entry_vector(%rsp)
         call  do_softirq
         jmp   compat_test_all_events
 
@@ -172,7 +172,7 @@ compat_bad_hypercall:
 /* %rbx: struct vcpu, interrupts disabled */
 compat_restore_all_guest:
         ASSERT_INTERRUPTS_DISABLED
-        RESTORE_ALL adj=8
+        RESTORE_ALL adj=8 compat=1
 .Lft0:  iretq
 
 .section .fixup,"ax"
@@ -243,7 +243,7 @@ UNLIKELY_END(compat_syscall_gpf)
 
 ENTRY(compat_sysenter)
         movq  VCPU_trap_ctxt(%rbx),%rcx
-        cmpl  $TRAP_gp_fault,UREGS_entry_vector(%rsp)
+        cmpb  $TRAP_gp_fault,UREGS_entry_vector(%rsp)
         movzwl VCPU_sysenter_sel(%rbx),%eax
         movzwl TRAP_gp_fault * TRAPINFO_sizeof + TRAPINFO_cs(%rcx),%ecx
         cmovel %ecx,%eax
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -123,9 +123,8 @@ ENTRY(syscall_enter)
         movl  $FLAT_KERNEL_SS,24(%rsp)
         pushq %rcx
         pushq $0
-        movl  $TRAP_syscall,4(%rsp)
-        movq  24(%rsp),%r11 /* Re-load user RFLAGS into %r11 before SAVE_ALL */
-        SAVE_ALL
+        movq  24(%rsp),%r11 /* Re-load user RFLAGS into %r11 before saving */
+        SAVE_VOLATILE TRAP_syscall
         GET_CURRENT(%rbx)
         movq  VCPU_domain(%rbx),%rcx
         testb $1,DOMAIN_is_32bit_pv(%rcx)
@@ -222,6 +221,7 @@ test_guest_events:
 /* %rbx: struct vcpu */
 process_softirqs:
         sti       
+        SAVE_PRESERVED
         call do_softirq
         jmp  test_all_events
 
@@ -275,8 +275,7 @@ sysenter_eflags_saved:
         pushq $3 /* ring 3 null cs */
         pushq $0 /* null rip */
         pushq $0
-        movl  $TRAP_syscall,4(%rsp)
-        SAVE_ALL
+        SAVE_VOLATILE TRAP_syscall
         GET_CURRENT(%rbx)
         cmpb  $0,VCPU_sysenter_disables_events(%rbx)
         movq  VCPU_sysenter_addr(%rbx),%rax
@@ -286,6 +285,7 @@ sysenter_eflags_saved:
         leal  (,%rcx,TBF_INTERRUPT),%ecx
 UNLIKELY_START(z, sysenter_gpf)
         movq  VCPU_trap_ctxt(%rbx),%rsi
+        SAVE_PRESERVED
         movl  $TRAP_gp_fault,UREGS_entry_vector(%rsp)
         movl  %eax,TRAPBOUNCE_error_code(%rdx)
         movq  TRAP_gp_fault * TRAPINFO_sizeof + TRAPINFO_eip(%rsi),%rax
@@ -302,7 +302,7 @@ UNLIKELY_END(sysenter_gpf)
 
 ENTRY(int80_direct_trap)
         pushq $0
-        SAVE_ALL
+        SAVE_VOLATILE 0x80
 
         cmpb  $0,untrusted_msi(%rip)
 UNLIKELY_START(ne, msi_check)
@@ -331,6 +331,7 @@ int80_slow_path:
          * IDT entry with DPL==0.
          */
         movl  $((0x80 << 3) | 0x2),UREGS_error_code(%rsp)
+        SAVE_PRESERVED
         movl  $TRAP_gp_fault,UREGS_entry_vector(%rsp)
         /* A GPF wouldn't have incremented the instruction pointer. */
         subq  $2,UREGS_rip(%rsp)
@@ -412,7 +413,7 @@ UNLIKELY_END(bounce_failsafe)
         /* Rewrite our stack frame and return to guest-OS mode. */
         /* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */
         /* Also clear AC: alignment checks shouldn't trigger in kernel mode. */
-        movl  $TRAP_syscall,UREGS_entry_vector+8(%rsp)
+        orl   $TRAP_syscall,UREGS_entry_vector+8(%rsp)
         andl  $~(X86_EFLAGS_AC|X86_EFLAGS_VM|X86_EFLAGS_RF|\
                  X86_EFLAGS_NT|X86_EFLAGS_TF),UREGS_eflags+8(%rsp)
         movq  $FLAT_KERNEL_SS,UREGS_ss+8(%rsp)
@@ -477,7 +478,7 @@ handle_exception_saved:
         jz    exception_with_ints_disabled
         sti
 1:      movq  %rsp,%rdi
-        movl  UREGS_entry_vector(%rsp),%eax
+        movzbl UREGS_entry_vector(%rsp),%eax
         leaq  exception_table(%rip),%rdx
         GET_CURRENT(%rbx)
         PERFC_INCR(exceptions, %rax, %rbx)
@@ -518,7 +519,7 @@ exception_with_ints_disabled:
 
 /* No special register assumptions. */
 FATAL_exception_with_ints_disabled:
-        movl  UREGS_entry_vector(%rsp),%edi
+        movzbl UREGS_entry_vector(%rsp),%edi
         movq  %rsp,%rsi
         call  fatal_trap
         ud2
@@ -624,7 +625,7 @@ handle_ist_exception:
         movq  %rdi,%rsp
         rep   movsq
 1:      movq  %rsp,%rdi
-        movl  UREGS_entry_vector(%rsp),%eax
+        movzbl UREGS_entry_vector(%rsp),%eax
         leaq  exception_table(%rip),%rdx
         callq *(%rdx,%rax,8)
         jmp   ret_from_intr
--- a/xen/arch/x86/x86_64/traps.c
+++ b/xen/arch/x86/x86_64/traps.c
@@ -67,10 +67,15 @@ static void _show_registers(
            regs->rbp, regs->rsp, regs->r8);
     printk("r9:  %016lx   r10: %016lx   r11: %016lx\n",
            regs->r9,  regs->r10, regs->r11);
-    printk("r12: %016lx   r13: %016lx   r14: %016lx\n",
-           regs->r12, regs->r13, regs->r14);
-    printk("r15: %016lx   cr0: %016lx   cr4: %016lx\n",
-           regs->r15, crs[0], crs[4]);
+    if ( !(regs->entry_vector & TRAP_regs_partial) )
+    {
+        printk("r12: %016lx   r13: %016lx   r14: %016lx\n",
+               regs->r12, regs->r13, regs->r14);
+        printk("r15: %016lx   cr0: %016lx   cr4: %016lx\n",
+               regs->r15, crs[0], crs[4]);
+    }
+    else
+        printk("cr0: %016lx   cr4: %016lx\n", crs[0], crs[4]);
     printk("cr3: %016lx   cr2: %016lx\n", crs[3], crs[2]);
     printk("ds: %04x   es: %04x   fs: %04x   gs: %04x   "
            "ss: %04x   cs: %04x\n",
@@ -301,7 +306,7 @@ unsigned long do_iret(void)
 
     if ( !(iret_saved.flags & VGCF_in_syscall) )
     {
-        regs->entry_vector = 0;
+        regs->entry_vector &= ~TRAP_syscall;
         regs->r11 = iret_saved.r11;
         regs->rcx = iret_saved.rcx;
     }
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1292,10 +1292,10 @@ decode_register(
     case  9: p = &regs->r9;  break;
     case 10: p = &regs->r10; break;
     case 11: p = &regs->r11; break;
-    case 12: p = &regs->r12; break;
-    case 13: p = &regs->r13; break;
-    case 14: p = &regs->r14; break;
-    case 15: p = &regs->r15; break;
+    case 12: mark_regs_dirty(regs); p = &regs->r12; break;
+    case 13: mark_regs_dirty(regs); p = &regs->r13; break;
+    case 14: mark_regs_dirty(regs); p = &regs->r14; break;
+    case 15: mark_regs_dirty(regs); p = &regs->r15; break;
 #endif
     default: BUG(); p = NULL; break;
     }
--- a/xen/arch/x86/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate.c
@@ -10,6 +10,7 @@
  */
 
 #include <asm/x86_emulate.h>
+#include <asm/asm_defns.h> /* mark_regs_dirty() */
 #include <asm/processor.h> /* current_cpu_info */
 #include <asm/amd.h> /* cpu_has_amd_erratum() */
 
--- a/xen/common/wait.c
+++ b/xen/common/wait.c
@@ -124,10 +124,12 @@ void wake_up_all(struct waitqueue_head *
 
 static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
 {
-    char *cpu_info = (char *)get_cpu_info();
+    struct cpu_info *cpu_info = get_cpu_info();
     struct vcpu *curr = current;
     unsigned long dummy;
+    u32 entry_vector = cpu_info->guest_cpu_user_regs.entry_vector;
 
+    cpu_info->guest_cpu_user_regs.entry_vector &= ~TRAP_regs_partial;
     ASSERT(wqv->esp == 0);
 
     /* Save current VCPU affinity; force wakeup on *this* CPU only. */
@@ -157,6 +159,8 @@ static void __prepare_to_wait(struct wai
         gdprintk(XENLOG_ERR, "Stack too large in %s\n", __FUNCTION__);
         domain_crash_synchronous();
     }
+
+    cpu_info->guest_cpu_user_regs.entry_vector = entry_vector;
 }
 
 static void __finish_wait(struct waitqueue_vcpu *wqv)
--- a/xen/include/asm-x86/x86_64/asm_defns.h
+++ b/xen/include/asm-x86/x86_64/asm_defns.h
@@ -26,6 +26,31 @@
 #define ASSERT_INTERRUPTS_ENABLED  ASSERT_INTERRUPT_STATUS(nz)
 #define ASSERT_INTERRUPTS_DISABLED ASSERT_INTERRUPT_STATUS(z)
 
+/*
+ * This flag is set in an exception frame when registers R12-R15 did not get
+ * saved.
+ */
+#define _TRAP_regs_partial 16
+#define TRAP_regs_partial  (1 << _TRAP_regs_partial)
+/*
+ * This flag gets set in an exception frame when registers R12-R15 possibly
+ * get modified from their originally saved values and hence need to be
+ * restored even if the normal call flow would restore register values.
+ *
+ * The flag being set implies _TRAP_regs_partial to be unset. Restoring
+ * R12-R15 thus is
+ * - required when this flag is set,
+ * - safe when _TRAP_regs_partial is unset.
+ */
+#define _TRAP_regs_dirty   17
+#define TRAP_regs_dirty    (1 << _TRAP_regs_dirty)
+
+#define mark_regs_dirty(r) ({ \
+        struct cpu_user_regs *r__ = (r); \
+        ASSERT(!((r__)->entry_vector & TRAP_regs_partial)); \
+        r__->entry_vector |= TRAP_regs_dirty; \
+})
+
 #define SAVE_ALL                                \
         addq  $-(UREGS_error_code-UREGS_r15), %rsp; \
         cld;                                    \
@@ -47,11 +72,66 @@
         movq  %r15,UREGS_r15(%rsp);             \
 
 #ifdef __ASSEMBLY__
-.macro LOAD_C_CLOBBERED
+
+/*
+ * Save all registers not preserved by C code or used in entry/exit code. Mark
+ * the frame as partial.
+ *
+ * @type: exception type
+ * @compat: R8-R15 don't need saving, and the frame nevertheless is complete
+ */
+.macro SAVE_VOLATILE type compat=0
+.if \compat
+        movl  $\type,UREGS_entry_vector-UREGS_error_code(%rsp)
+.else
+        movl  $\type|TRAP_regs_partial,\
+              UREGS_entry_vector-UREGS_error_code(%rsp)
+.endif
+        addq  $-(UREGS_error_code-UREGS_r15),%rsp
+        cld
+        movq  %rdi,UREGS_rdi(%rsp)
+        movq  %rsi,UREGS_rsi(%rsp)
+        movq  %rdx,UREGS_rdx(%rsp)
+        movq  %rcx,UREGS_rcx(%rsp)
+        movq  %rax,UREGS_rax(%rsp)
+.if !\compat
+        movq  %r8,UREGS_r8(%rsp)
+        movq  %r9,UREGS_r9(%rsp)
+        movq  %r10,UREGS_r10(%rsp)
+        movq  %r11,UREGS_r11(%rsp)
+.endif
+        movq  %rbx,UREGS_rbx(%rsp)
+        movq  %rbp,UREGS_rbp(%rsp)
+        SETUP_EXCEPTION_FRAME_POINTER(UREGS_rbp)
+.endm
+
+/*
+ * Complete a frame potentially only partially saved.
+ */
+.macro SAVE_PRESERVED
+        btrl  $_TRAP_regs_partial,UREGS_entry_vector(%rsp)
+        jnc   987f
+        movq  %r12,UREGS_r12(%rsp)
+        movq  %r13,UREGS_r13(%rsp)
+        movq  %r14,UREGS_r14(%rsp)
+        movq  %r15,UREGS_r15(%rsp)
+987:
+.endm
+
+/*
+ * Reload registers not preserved by C code from frame.
+ *
+ * @compat: R8-R11 don't need reloading
+ *
+ * For the way it is used in RESTORE_ALL, this macro must preserve EFLAGS.ZF.
+ */
+.macro LOAD_C_CLOBBERED compat=0
+.if !\compat
         movq  UREGS_r11(%rsp),%r11
         movq  UREGS_r10(%rsp),%r10
         movq  UREGS_r9(%rsp),%r9
         movq  UREGS_r8(%rsp),%r8
+.endif
         movq  UREGS_rax(%rsp),%rax
         movq  UREGS_rcx(%rsp),%rcx
         movq  UREGS_rdx(%rsp),%rdx
@@ -59,16 +139,45 @@
         movq  UREGS_rdi(%rsp),%rdi
 .endm
 
-.macro RESTORE_ALL adj=0
+/*
+ * Restore all previously saved registers.
+ *
+ * @adj: extra stack pointer adjustment to be folded into the adjustment done
+ *       anyway at the end of the macro
+ * @compat: R8-R15 don't need reloading
+ */
+.macro RESTORE_ALL adj=0 compat=0
+.if !\compat
+        testl $TRAP_regs_dirty,UREGS_entry_vector(%rsp)
+.endif
+        LOAD_C_CLOBBERED \compat
+.if !\compat
+        jz    987f
         movq  UREGS_r15(%rsp),%r15
         movq  UREGS_r14(%rsp),%r14
         movq  UREGS_r13(%rsp),%r13
         movq  UREGS_r12(%rsp),%r12
-        movq  UREGS_rbp(%rsp),%rbp
+#ifndef NDEBUG
+        .subsection 1
+987:    testl $TRAP_regs_partial,UREGS_entry_vector(%rsp)
+        jnz   987f
+        cmpq  UREGS_r15(%rsp),%r15
+        jne   789f
+        cmpq  UREGS_r14(%rsp),%r14
+        jne   789f
+        cmpq  UREGS_r13(%rsp),%r13
+        jne   789f
+        cmpq  UREGS_r12(%rsp),%r12
+        je    987f
+789:    ud2
+        .subsection 0
+#endif
+.endif
+987:    movq  UREGS_rbp(%rsp),%rbp
         movq  UREGS_rbx(%rsp),%rbx
-        LOAD_C_CLOBBERED
         subq  $-(UREGS_error_code-UREGS_r15+\adj), %rsp
 .endm
+
 #endif
 
 #ifdef PERF_COUNTERS


Attachment: x86-save-restore-reduce.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.