WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] 32-on-64 sysenter for pvops

To: Keir Fraser <Keir.Fraser@xxxxxxxxxxxx>, Jan Beulich <jbeulich@xxxxxxxxxx>, Ian Campbell <Ian.Campbell@xxxxxxxxxxxxx>
Subject: [Xen-devel] 32-on-64 sysenter for pvops
From: Jeremy Fitzhardinge <jeremy@xxxxxxxx>
Date: Mon, 03 Mar 2008 17:06:02 -0800
Cc: Xen-devel <xen-devel@xxxxxxxxxxxxxxxxxxx>
Delivery-date: Mon, 03 Mar 2008 17:11:37 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Thunderbird 2.0.0.12 (X11/20080226)
I implemented sysenter for 32-on-64, since it seemed straightforward enough. It mostly works, but every now and again I get vcpus just hanging in blocked state, as if events are being lost or ignored. Its very similar to the symptoms that other people have reported against the pvops kernel, which I have not managed to reproduce. Perhaps using sysenter is exacerbating an existing bug...

Anyway, a couple of questions. It seems that the stack frame that Xen's sysenter generates is not exactly the same as the one the kernel expects, so the direct access to the threadinfo structure doesn't work properly. What's the difference in the frames?

I guess the other reason for the separate PV Xen sysenter entrypoint is to deal with sysexit not working. I addressed this by implementing a sysexit pvop using iret, though I think I could just set the TIF_IRET flag in threadinfo.

Anyway, could you look at these changes and see if anything problematic leaps out.

I'm also having debugging it, since xenctx and gdbserver-xen don't work on 32-bit compat domains, and the console itself seems to locked up. I'm not sure how I can get any state out of the vcpus; even an eip would help.

Thanks,
   J
Subject: xen: add support for callbackops hypercall

Signed-off-by: Jeremy Fitzhardinge <jeremy@xxxxxxxxxxxxx>

---
 include/asm-x86/xen/hypercall.h  |    6 ++
 include/asm-x86/xen/interface.h  |    4 +
 include/xen/interface/callback.h |  102 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 112 insertions(+)

===================================================================
--- a/include/asm-x86/xen/hypercall.h
+++ b/include/asm-x86/xen/hypercall.h
@@ -161,6 +161,12 @@
        return _hypercall4(int, set_callbacks,
                           event_selector, event_address,
                           failsafe_selector, failsafe_address);
+}
+
+static inline int
+HYPERVISOR_callback_op(int cmd, void *arg)
+{
+       return _hypercall2(int, callback_op, cmd, arg);
 }
 
 static inline int
===================================================================
--- a/include/asm-x86/xen/interface.h
+++ b/include/asm-x86/xen/interface.h
@@ -171,6 +171,10 @@
     unsigned long pad[5]; /* sizeof(struct vcpu_info) == 64 */
 };
 
+struct xen_callback {
+       unsigned long cs;
+       unsigned long eip;
+};
 #endif /* !__ASSEMBLY__ */
 
 /*
===================================================================
--- /dev/null
+++ b/include/xen/interface/callback.h
@@ -0,0 +1,102 @@
+/******************************************************************************
+ * callback.h
+ *
+ * Register guest OS callbacks with Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2006, Ian Campbell
+ */
+
+#ifndef __XEN_PUBLIC_CALLBACK_H__
+#define __XEN_PUBLIC_CALLBACK_H__
+
+#include "xen.h"
+
+/*
+ * Prototype for this hypercall is:
+ *   long callback_op(int cmd, void *extra_args)
+ * @cmd        == CALLBACKOP_??? (callback operation).
+ * @extra_args == Operation-specific extra arguments (NULL if none).
+ */
+
+/* ia64, x86: Callback for event delivery. */
+#define CALLBACKTYPE_event                 0
+
+/* x86: Failsafe callback when guest state cannot be restored by Xen. */
+#define CALLBACKTYPE_failsafe              1
+
+/* x86/64 hypervisor: Syscall by 64-bit guest app ('64-on-64-on-64'). */
+#define CALLBACKTYPE_syscall               2
+
+/*
+ * x86/32 hypervisor: Only available on x86/32 when supervisor_mode_kernel
+ *     feature is enabled. Do not use this callback type in new code.
+ */
+#define CALLBACKTYPE_sysenter_deprecated   3
+
+/* x86: Callback for NMI delivery. */
+#define CALLBACKTYPE_nmi                   4
+
+/*
+ * x86: sysenter is only available as follows:
+ * - 32-bit hypervisor: with the supervisor_mode_kernel feature enabled
+ * - 64-bit hypervisor: 32-bit guest applications on Intel CPUs
+ *                      ('32-on-32-on-64', '32-on-64-on-64')
+ *                      [nb. also 64-bit guest applications on Intel CPUs
+ *                           ('64-on-64-on-64'), but syscall is preferred]
+ */
+#define CALLBACKTYPE_sysenter              5
+
+/*
+ * x86/64 hypervisor: Syscall by 32-bit guest app on AMD CPUs
+ *                    ('32-on-32-on-64', '32-on-64-on-64')
+ */
+#define CALLBACKTYPE_syscall32             7
+
+/*
+ * Disable event deliver during callback? This flag is ignored for event and
+ * NMI callbacks: event delivery is unconditionally disabled.
+ */
+#define _CALLBACKF_mask_events             0
+#define CALLBACKF_mask_events              (1U << _CALLBACKF_mask_events)
+
+/*
+ * Register a callback.
+ */
+#define CALLBACKOP_register                0
+struct callback_register {
+    uint16_t type;
+    uint16_t flags;
+    struct xen_callback address;
+};
+
+/*
+ * Unregister a callback.
+ *
+ * Not all callbacks can be unregistered. -EINVAL will be returned if
+ * you attempt to unregister such a callback.
+ */
+#define CALLBACKOP_unregister              1
+struct callback_unregister {
+    uint16_t type;
+    uint16_t _unused;
+};
+
+#endif /* __XEN_PUBLIC_CALLBACK_H__ */
Subject: x86: only enable interrupts when kernel state has been set up

The sysenter path tries to enable interrupts immediately.  Unfortunately
this doesn't work in a paravirt environment, because not enough kernel
state has been set up at that point (namely, pointing %fs to the kernel
percpu data segment).  To fix this, defer ENABLE_INTERRUPTS until after
the kernel state has been set up.

Unfortunately this means that we're running with interrupts disabled
for a while without calling the IRQ tracing code, but that can't be
called without setting up %fs either.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xxxxxxxxxxxxx>
---
 arch/x86/kernel/entry_32.S |    8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

===================================================================
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -291,10 +291,10 @@ ENTRY(ia32_sysenter_target)
        movl TSS_sysenter_sp0(%esp),%esp
 ENTRY(sysenter_past_esp)
        /*
-        * No need to follow this irqs on/off section: the syscall
-        * disabled irqs and here we enable it straight after entry:
+        * Interrupts are disabled here, but we can't trace it until
+        * enough kernel state to call TRACE_IRQS_OFF can be called - but
+        * we immediately enable interrupts at that point anyway.
         */
-       ENABLE_INTERRUPTS(CLBR_NONE)
        pushl $(__USER_DS)
        CFI_ADJUST_CFA_OFFSET 4
        /*CFI_REL_OFFSET ss, 0*/
@@ -330,6 +330,7 @@ 1:  movl (%ebp),%ebp
        pushl %eax
        CFI_ADJUST_CFA_OFFSET 4
        SAVE_ALL
+       ENABLE_INTERRUPTS(CLBR_NONE)
        GET_THREAD_INFO(%ebp)
 
        /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not 
testb */
@@ -546,6 +547,7 @@ syscall_fault:
        pushl %eax                      # save orig_eax
        CFI_ADJUST_CFA_OFFSET 4
        SAVE_ALL
+       ENABLE_INTERRUPTS(CLBR_NONE)
        GET_THREAD_INFO(%ebp)
        movl $-EFAULT,PT_EAX(%esp)
        jmp resume_userspace
Subject: xen: support sysenter/sysexit if hypervisor does

64-bit Xen supports sysenter for 32-bit guests, so support its
use.  (sysenter is faster than int $0x80 in 32-on-64.)

sysexit is still not supported, so we fake it up using iret.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xxxxxxxxxxxxx>
---
 arch/x86/kernel/entry_32.S |   22 +++++++++++++++++-
 arch/x86/xen/enlighten.c   |    3 --
 arch/x86/xen/setup.c       |   21 +++++++++++++++++
 arch/x86/xen/smp.c         |    1 
 arch/x86/xen/xen-asm.S     |   52 ++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/xen/xen-ops.h     |    3 ++
 6 files changed, 99 insertions(+), 3 deletions(-)

===================================================================
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -311,6 +311,7 @@ ENTRY(sysenter_past_esp)
         * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
         * pushed above; +8 corresponds to copy_thread's esp0 setting.
         */
+sysenter_stack_setup:
        pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
        CFI_ADJUST_CFA_OFFSET 4
        CFI_REL_OFFSET eip, 0
@@ -1025,6 +1026,16 @@ ENDPROC(kernel_thread_helper)
 ENDPROC(kernel_thread_helper)
 
 #ifdef CONFIG_XEN
+/* Xen doesn't set %esp to be precisely what the normal sysenter
+   entrypoint expects, so fix it up before using the normal path. */
+ENTRY(xen_sysenter_target)
+       RING0_INT_FRAME
+       movl $__USER_DS,16(%esp)
+       movl %ebp,12(%esp)
+       movl $__USER_CS,4(%esp)
+       addl $4,%esp
+       jmp sysenter_stack_setup
+
 ENTRY(xen_hypervisor_callback)
        CFI_STARTPROC
        pushl $0
@@ -1044,8 +1055,17 @@ ENTRY(xen_hypervisor_callback)
        jae  1f
 
        call xen_iret_crit_fixup
+       jmp  2f
 
-1:     mov %esp, %eax
+1:     cmpl $xen_sysexit_start_crit,%eax
+       jb   2f
+       cmpl $xen_sysexit_end_crit,%eax
+       jae  2f
+
+       jmp xen_sysexit_crit_fixup
+
+ENTRY(xen_do_upcall)
+2:     mov %esp, %eax
        call xen_evtchn_do_upcall
        jmp  ret_from_intr
        CFI_ENDPROC
===================================================================
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -153,7 +153,6 @@ static void xen_cpuid(unsigned int *ax, 
        if (*ax == 1)
                maskedx = ~((1 << X86_FEATURE_APIC) |  /* disable APIC */
                            (1 << X86_FEATURE_ACPI) |  /* disable ACPI */
-                           (1 << X86_FEATURE_SEP)  |  /* disable SEP */
                            (1 << X86_FEATURE_ACC));   /* thermal monitoring */
 
        asm(XEN_EMULATE_PREFIX "cpuid"
@@ -969,7 +968,7 @@ static const struct pv_cpu_ops xen_cpu_o
        .read_pmc = native_read_pmc,
 
        .iret = xen_iret,
-       .irq_enable_syscall_ret = NULL,  /* never called */
+       .irq_enable_syscall_ret = xen_sysexit,
 
        .load_tr_desc = paravirt_nop,
        .set_ldt = xen_set_ldt,
===================================================================
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -16,6 +16,7 @@
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
 
+#include <xen/interface/callback.h>
 #include <xen/interface/physdev.h>
 #include <xen/features.h>
 
@@ -67,6 +68,24 @@ static void __init fiddle_vdso(void)
        *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
 }
 
+void xen_enable_sysenter(void)
+{
+       int cpu = smp_processor_id();
+       extern void xen_sysenter_target(void);
+       /* Mask events on entry, even though they get enabled immediately */
+       static struct callback_register sysenter = {
+               .type = CALLBACKTYPE_sysenter,
+               .address = { __KERNEL_CS, (unsigned long)xen_sysenter_target },
+               .flags = CALLBACKF_mask_events,
+       };
+
+       if (!boot_cpu_has(X86_FEATURE_SEP) ||
+           HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) != 0) {
+               clear_cpu_cap(&cpu_data(cpu), X86_FEATURE_SEP);
+               clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP);
+       }
+}
+
 void __init xen_arch_setup(void)
 {
        struct physdev_set_iopl set_iopl;
@@ -80,6 +99,8 @@ void __init xen_arch_setup(void)
 
        HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned 
long)xen_hypervisor_callback,
                                 __KERNEL_CS, (unsigned 
long)xen_failsafe_callback);
+
+       xen_enable_sysenter();
 
        set_iopl.iopl = 1;
        rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
===================================================================
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -72,6 +72,7 @@ static __cpuinit void cpu_bringup_and_id
        int cpu = smp_processor_id();
 
        cpu_init();
+       xen_enable_sysenter();
 
        preempt_disable();
        per_cpu(cpu_state, cpu) = CPU_ONLINE;
===================================================================
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -275,6 +275,58 @@ 2: ret
 2:     ret
 
 
+ENTRY(xen_sysexit)
+       /* Store vcpu_info pointer for easy access.  Do it this
+          way to avoid having to reload %fs */
+#ifdef CONFIG_SMP
+       GET_THREAD_INFO(%eax)
+       movl TI_cpu(%eax),%eax
+       movl __per_cpu_offset(,%eax,4),%eax
+       mov per_cpu__xen_vcpu(%eax),%eax
+#else
+       movl per_cpu__xen_vcpu, %eax
+#endif
+
+       /* We can't actually use sysexit in a pv guest,
+          so fake it up with iret */
+       pushl $__USER_DS                /* user stack segment */
+       pushl %ecx                      /* user esp */
+       pushl PT_EFLAGS+2*4(%esp)       /* user eflags */
+       pushl $__USER_CS                /* user code segment */
+       pushl %edx                      /* user eip */
+
+       /* Unconditionally unmask events and test for pending */
+       andw $0x00ff, XEN_vcpu_info_pending(%eax)
+
+xen_sysexit_start_crit:
+       /* If there's something pending, mask events again so we
+          can directly inject it back into the kernel. */
+       jnz   1f
+
+       movl PT_EAX+5*4(%esp),%eax
+2:     iret
+1:     movb $1, XEN_vcpu_info_mask(%eax)
+xen_sysexit_end_crit:
+       addl $5*4, %esp         /* remove iret frame */
+       /* no need to re-save regs, but need to restore kernel %fs */
+       mov $__KERNEL_PERCPU, %eax
+       mov %eax, %fs
+       jmp xen_do_upcall
+.section __ex_table,"a"
+       .align 4
+       .long 2b,iret_exc
+.previous
+
+       .globl xen_sysexit_start_crit, xen_sysexit_end_crit
+/*
+       sysexit fixup is easy, since the old frame is still sitting there
+       on the stack.  We just need to remove the new recursive
+       interrupt and return.
+ */
+ENTRY(xen_sysexit_crit_fixup)
+       addl $PT_OLDESP+5*4, %esp               /* remove frame+iret */
+       jmp xen_do_upcall
+
 /*
        Force an event check by making a hypercall,
        but preserve regs before making the call.
===================================================================
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -19,6 +19,7 @@ char * __init xen_memory_setup(void);
 char * __init xen_memory_setup(void);
 void __init xen_arch_setup(void);
 void __init xen_init_IRQ(void);
+void xen_enable_sysenter(void);
 
 void xen_setup_timer(int cpu);
 void xen_setup_cpu_clockevents(void);
@@ -64,4 +65,6 @@ DECL_ASM(void, xen_restore_fl_direct, un
 DECL_ASM(void, xen_restore_fl_direct, unsigned long);
 
 void xen_iret(void);
+void xen_sysexit(void);
+
 #endif /* XEN_OPS_H */
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
<Prev in Thread] Current Thread [Next in Thread>