[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v2] x86/xen/64: Rearrange the SYSCALL entries



On Mon, Aug 7, 2017 at 11:59 PM, Andy Lutomirski <luto@xxxxxxxxxx> wrote:
> Xen's raw SYSCALL entries are much less weird than native.  Rather
> than fudging them to look like native entries, use the Xen-provided
> stack frame directly.
>
> This lets us eliminate entry_SYSCALL_64_after_swapgs and two uses of
> the SWAPGS_UNSAFE_STACK paravirt hook.  The SYSENTER code would
> benefit from similar treatment.
>
> This makes one change to the native code path: the compat
> instruction that clears the high 32 bits of %rax is moved slightly
> later.  I'd be surprised if this affects performance at all.
>
> Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx>
> ---
>
> Changes from v1 (which I never actually emailed):
>  - Fix zero-extension in the compat case.
>
>  arch/x86/entry/entry_64.S        |  9 ++-------
>  arch/x86/entry/entry_64_compat.S |  7 +++----
>  arch/x86/xen/xen-asm_64.S        | 23 +++++++++--------------
>  3 files changed, 14 insertions(+), 25 deletions(-)
>
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index aa58155187c5..7cee92cf807f 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -142,14 +142,8 @@ ENTRY(entry_SYSCALL_64)
>          * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
>          * it is too small to ever cause noticeable irq latency.
>          */
> -       SWAPGS_UNSAFE_STACK
> -       /*
> -        * A hypervisor implementation might want to use a label
> -        * after the swapgs, so that it can do the swapgs
> -        * for the guest and jump here on syscall.
> -        */
> -GLOBAL(entry_SYSCALL_64_after_swapgs)
>
> +       swapgs
>         movq    %rsp, PER_CPU_VAR(rsp_scratch)
>         movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
>
> @@ -161,6 +155,7 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
>         pushq   %r11                            /* pt_regs->flags */
>         pushq   $__USER_CS                      /* pt_regs->cs */
>         pushq   %rcx                            /* pt_regs->ip */
> +GLOBAL(entry_SYSCALL_64_after_hwframe)
>         pushq   %rax                            /* pt_regs->orig_ax */
>         pushq   %rdi                            /* pt_regs->di */
>         pushq   %rsi                            /* pt_regs->si */
> diff --git a/arch/x86/entry/entry_64_compat.S 
> b/arch/x86/entry/entry_64_compat.S
> index e1721dafbcb1..5314d7b8e5ad 100644
> --- a/arch/x86/entry/entry_64_compat.S
> +++ b/arch/x86/entry/entry_64_compat.S
> @@ -183,21 +183,20 @@ ENDPROC(entry_SYSENTER_compat)
>   */
>  ENTRY(entry_SYSCALL_compat)
>         /* Interrupts are off on entry. */
> -       SWAPGS_UNSAFE_STACK
> +       swapgs
>
>         /* Stash user ESP and switch to the kernel stack. */
>         movl    %esp, %r8d
>         movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
>
> -       /* Zero-extending 32-bit regs, do not remove */
> -       movl    %eax, %eax
> -
>         /* Construct struct pt_regs on stack */
>         pushq   $__USER32_DS            /* pt_regs->ss */
>         pushq   %r8                     /* pt_regs->sp */
>         pushq   %r11                    /* pt_regs->flags */
>         pushq   $__USER32_CS            /* pt_regs->cs */
>         pushq   %rcx                    /* pt_regs->ip */
> +GLOBAL(entry_SYSCALL_compat_after_hwframe)
> +       movl    %eax, %eax              /* discard orig_ax high bits */
>         pushq   %rax                    /* pt_regs->orig_ax */
>         pushq   %rdi                    /* pt_regs->di */
>         pushq   %rsi                    /* pt_regs->si */
> diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
> index c3df43141e70..a8a4f4c460a6 100644
> --- a/arch/x86/xen/xen-asm_64.S
> +++ b/arch/x86/xen/xen-asm_64.S
> @@ -82,34 +82,29 @@ RELOC(xen_sysret64, 1b+1)
>   *     rip
>   *     r11
>   * rsp->rcx
> - *
> - * In all the entrypoints, we undo all that to make it look like a
> - * CPU-generated syscall/sysenter and jump to the normal entrypoint.
>   */
>
> -.macro undo_xen_syscall
> -       mov 0*8(%rsp), %rcx
> -       mov 1*8(%rsp), %r11
> -       mov 5*8(%rsp), %rsp
> -.endm
> -
>  /* Normal 64-bit system call target */
>  ENTRY(xen_syscall_target)
> -       undo_xen_syscall
> -       jmp entry_SYSCALL_64_after_swapgs
> +       popq %rcx
> +       popq %r11
> +       jmp entry_SYSCALL_64_after_hwframe
>  ENDPROC(xen_syscall_target)
>
>  #ifdef CONFIG_IA32_EMULATION
>
>  /* 32-bit compat syscall target */
>  ENTRY(xen_syscall32_target)
> -       undo_xen_syscall
> -       jmp entry_SYSCALL_compat
> +       popq %rcx
> +       popq %r11
> +       jmp entry_SYSCALL_compat_after_hwframe
>  ENDPROC(xen_syscall32_target)
>
>  /* 32-bit compat sysenter target */
>  ENTRY(xen_sysenter_target)
> -       undo_xen_syscall
> +       mov 0*8(%rsp), %rcx
> +       mov 1*8(%rsp), %r11
> +       mov 5*8(%rsp), %rsp
>         jmp entry_SYSENTER_compat
>  ENDPROC(xen_sysenter_target)

This patch causes the iopl_32 and ioperm_32 self-tests to fail on a
64-bit PV kernel.  The 64-bit versions pass. It gets a seg fault after
"parent: write to 0x80 (should fail)", and the fault isn't caught by
the signal handler.  It just dumps back to the shell.  The tests pass
after reverting this.

--
Brian Gerst

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.