WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 3/12] Provide basic Xen PM infrastructure

To: <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH 3/12] Provide basic Xen PM infrastructure
From: "Tian, Kevin" <kevin.tian@xxxxxxxxx>
Date: Tue, 15 May 2007 22:16:30 +0800
Delivery-date: Tue, 15 May 2007 07:15:37 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
Thread-index: AceW+6GWqRDl7Ly2S/+Xy1Y62QxDUA==
Thread-topic: [PATCH 3/12] Provide basic Xen PM infrastructure
Add basic infrastructure for xen power management. Now
only S3 (suspend to ram) is supported.

Signed-off-by Ke Yu <ke.yu@xxxxxxxxx>
Signed-off-by Kevin Tian <kevin.tian@xxxxxxxxx>

diff -r 84c103f8881a xen/arch/x86/acpi/Makefile
--- a/xen/arch/x86/acpi/Makefile        Mon May 14 15:12:50 2007 -0400
+++ b/xen/arch/x86/acpi/Makefile        Mon May 14 16:34:31 2007 -0400
@@ -1,1 +1,2 @@ obj-y += boot.o
 obj-y += boot.o
+obj-y += power.o
diff -r 84c103f8881a xen/arch/x86/acpi/power.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/acpi/power.c Mon May 14 20:49:36 2007 -0400
@@ -0,0 +1,209 @@
+/* drivers/acpi/sleep/power.c - PM core functionality for Xen
+ *
+ * Copyrights from Linux side:
+ * Copyright (c) 2000-2003 Patrick Mochel
+ * Copyright (C) 2001-2003 Pavel Machek <pavel@xxxxxxx>
+ * Copyright (c) 2003 Open Source Development Lab
+ * Copyright (c) 2004 David Shaohua Li <shaohua.li@xxxxxxxxx>
+ * Copyright (c) 2005 Alexey Starikovskiy
<alexey.y.starikovskiy@xxxxxxxxx>
+ *
+ * Slimmed with Xen specific support.
+ */
+
+#include <asm/io.h>
+#define CONFIG_ACPI_SLEEP
+#include <asm/acpi.h>
+#include <xen/acpi.h>
+#include <xen/errno.h>
+#include <xen/iocap.h>
+#include <xen/sched.h>
+#include <asm/acpi.h>
+#include <asm/irq.h>
+#include <asm/init.h>
+#include <xen/spinlock.h>
+#include <xen/sched.h>
+#include <xen/domain.h>
+#include <xen/console.h>
+
+u8 sleep_states[ACPI_S_STATE_COUNT];
+DEFINE_SPINLOCK(pm_lock);
+
+extern void do_suspend_lowlevel(void);
+
+static char *acpi_states[ACPI_S_STATE_COUNT] =
+{
+    [ACPI_STATE_S1] = "standby",
+    [ACPI_STATE_S3] = "mem",
+    [ACPI_STATE_S4] = "disk",
+};
+
+/* address in low memory of the wakeup routine. */
+unsigned long acpi_wakeup_address;
+unsigned long acpi_video_flags;
+extern char wakeup_start, wakeup_end;
+unsigned long saved_videomode;
+extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long));
+
+/**
+ * acpi_save_state_mem - save kernel state
+ *
+ * Create an identity mapped page table and copy the wakeup routine to
+ * low memory.
+ */
+int acpi_save_state_mem(void)
+{
+    if (!acpi_wakeup_address)
+        return 1;
+
+    init_low_mappings();
+    memcpy((void *)acpi_wakeup_address, &wakeup_start,
+           &wakeup_end - &wakeup_start);
+    acpi_copy_wakeup_routine(acpi_wakeup_address);
+    return 0;
+}
+
+/*
+ * acpi_restore_state - undo effects of acpi_save_state_mem
+ */
+void acpi_restore_state_mem(void)
+{
+#ifdef CONFIG_X86_64
+    zap_low_mappings();
+#else
+    zap_low_mappings(idle_pg_table_l2);
+#endif
+}
+
+/**
+ * acpi_reserve_bootmem - do _very_ early ACPI initialisation
+ *
+ * We allocate a page from the first 1MB of memory for the wakeup
+ * routine for when we come back from a sleep state. The
+ * runtime allocator allows specification of <16MB pages, but not
+ * <1MB pages.
+ */
+void __init acpi_reserve_bootmem(void)
+{
+    if ((&wakeup_end - &wakeup_start) > PAGE_SIZE) {
+       pmprintk(XENLOG_ERR, "ACPI: Wakeup code way too big, S3
disabled.\n");
+       return;
+    }
+    
+    /*  0~640K is not used by anyone, except 0x9000 is used by smp
+     *  trampoline code, so choose 0x7000 for XEN acpi wake up code
+     */
+    acpi_wakeup_address = (unsigned long)__va(0x7000);
+}
+
+/* Add suspend failure recover later */
+static int device_power_down(void)
+{
+    console_suspend();
+
+    time_suspend();
+
+    i8259A_suspend();
+    
+    ioapic_suspend();
+    
+    lapic_suspend();
+
+    return 0;
+}
+
+static void device_power_up(void)
+{
+    lapic_resume();
+    
+    ioapic_resume();
+
+    i8259A_resume();
+    
+    time_resume();
+
+    console_resume();
+}
+
+int enter_state(u32 state)
+{
+    struct domain *d;
+    unsigned long flags;
+    int error;
+
+    if (state <= ACPI_STATE_S0 || state > ACPI_S_STATES_MAX)
+        return -EINVAL;
+
+    if (!spin_trylock(&pm_lock))
+        return -EBUSY;
+    
+    for_each_domain(d)
+        if (d->domain_id != 0)
+            domain_pause(d);
+
+    pmprintk(XENLOG_INFO, "PM: Preparing system for %s sleep\n",
+        acpi_states[state]);
+
+    local_irq_save(flags);
+
+    if ((error = device_power_down())) {
+        printk(XENLOG_ERR "Some devices failed to power down\n");
+        goto Done;
+    }
+
+    ACPI_FLUSH_CPU_CACHE();
+
+    /* Do arch specific saving of state. */
+    if (state > ACPI_STATE_S1) {
+        error = acpi_save_state_mem();
+        if (error)
+            goto Powerup;
+    }
+
+    switch (state) {
+        case ACPI_STATE_S3:
+            do_suspend_lowlevel();
+            break;
+        default:
+            error = -EINVAL;
+            goto Powerup;
+    }
+
+    pmprintk(XENLOG_INFO, "Back to C!\n");
+    if (state > ACPI_STATE_S1)
+        acpi_restore_state_mem();
+
+ Powerup:
+    device_power_up();
+
+    pmprintk(XENLOG_INFO, "PM: Finishing wakeup.\n");
+    for_each_domain(d)
+       if (d->domain_id!=0)
+           domain_unpause(d);
+
+ Done:
+    local_irq_restore(flags);
+    spin_unlock(&pm_lock);
+    return error;
+
+}
+
+static int __init acpi_sleep_init(void)
+{
+    int i = 0; 
+
+    pmprintk(XENLOG_INFO, "ACPI (supports");
+    for (i = 0; i < ACPI_S_STATE_COUNT; i++) {
+        if (i == ACPI_STATE_S3){
+            sleep_states[i] = 1;
+            printk(" S%d", i);
+        }
+        else{
+            sleep_states[i] = 0;
+        }
+    }
+    printk(")\n");
+
+    acpi_reserve_bootmem();
+    return 0;
+}
+__initcall(acpi_sleep_init);
diff -r 84c103f8881a xen/arch/x86/boot/x86_32.S
--- a/xen/arch/x86/boot/x86_32.S        Mon May 14 15:12:50 2007 -0400
+++ b/xen/arch/x86/boot/x86_32.S        Mon May 14 16:34:31 2007 -0400
@@ -146,6 +146,8 @@ start_paging:
         rdmsr
         bts     $_EFER_NX,%eax
         wrmsr
+        mov     $1,%eax
+        mov     %eax, nx_enabled-__PAGE_OFFSET
 no_execute_disable:
         pop     %ebx
 #endif
diff -r 84c103f8881a xen/arch/x86/boot/x86_64.S
--- a/xen/arch/x86/boot/x86_64.S        Mon May 14 15:12:50 2007 -0400
+++ b/xen/arch/x86/boot/x86_64.S        Mon May 14 16:34:31 2007 -0400
@@ -198,6 +198,7 @@ multiboot_ptr:
         .long   0
 
         .word   0
+        .global nopaging_gdt_descr
 nopaging_gdt_descr:
         .word   LAST_RESERVED_GDT_BYTE
         .quad   gdt_table - FIRST_RESERVED_GDT_BYTE - __PAGE_OFFSET
@@ -207,6 +208,7 @@ cpuid_ext_features:
         
         .word   0
 gdt_descr:
+        .global gdt_descr
         .word   LAST_RESERVED_GDT_BYTE
         .quad   gdt_table - FIRST_RESERVED_GDT_BYTE
 
diff -r 84c103f8881a xen/arch/x86/x86_32/Makefile
--- a/xen/arch/x86/x86_32/Makefile      Mon May 14 15:12:50 2007 -0400
+++ b/xen/arch/x86/x86_32/Makefile      Mon May 14 16:34:31 2007 -0400
@@ -6,3 +6,5 @@ obj-y += traps.o
 obj-y += traps.o
 
 obj-$(supervisor_mode_kernel) += supervisor_mode_kernel.o
+subdir-y += acpi
+subdir-y += power
diff -r 84c103f8881a xen/arch/x86/x86_32/acpi/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/x86_32/acpi/Makefile Mon May 14 16:34:31 2007 -0400
@@ -0,0 +1,1 @@
+obj-y += wakeup.o
diff -r 84c103f8881a xen/arch/x86/x86_32/acpi/wakeup.S
--- a/xen/arch/x86/x86_32/acpi/wakeup.S Mon May 14 15:12:50 2007 -0400
+++ b/xen/arch/x86/x86_32/acpi/wakeup.S Mon May 14 16:34:31 2007 -0400
@@ -1,6 +1,11 @@
 .text
+#ifndef __XEN__
 #include <linux/linkage.h>
 #include <asm/segment.h>
+#else
+#include <xen/config.h>
+#include <asm/asm_defns.h>
+#endif
 #include <asm/page.h>
 
 #
@@ -56,7 +61,11 @@ 1:
 1:
 
        # set up page table
+#ifndef __XEN__
        movl    $swsusp_pg_dir-__PAGE_OFFSET, %eax
+#else
+       movl    $idle_pg_table-__PAGE_OFFSET, %eax
+#endif
        movl    %eax, %cr3
 
        testl   $1, real_efer_save_restore - wakeup_code
@@ -88,7 +97,11 @@ 1:
        cmpl    $0x12345678, %eax
        jne     bogus_real_magic
 
+#ifndef __XEN__
        ljmpl   $__KERNEL_CS,$wakeup_pmode_return
+#else
+       ljmpl   $(__HYPERVISOR_CS),$wakeup_pmode_return
+#endif
 
 real_save_gdt: .word 0
                .long 0
@@ -184,7 +197,11 @@ ENTRY(wakeup_end)
 .org   0x1000
 
 wakeup_pmode_return:
+#ifndef __XEN__
        movw    $__KERNEL_DS, %ax
+#else
+       movw    $__HYPERVISOR_DS, %ax
+#endif
        movw    %ax, %ss
        movw    %ax, %ds
        movw    %ax, %es
@@ -196,7 +213,11 @@ wakeup_pmode_return:
        lgdt    saved_gdt
        lidt    saved_idt
        lldt    saved_ldt
+#ifndef __XEN__
        ljmp    $(__KERNEL_CS),$1f
+#else
+       ljmp    $(__HYPERVISOR_CS),$1f
+#endif
 1:
        movl    %cr3, %eax
        movl    %eax, %cr3
diff -r 84c103f8881a xen/arch/x86/x86_32/mm.c
--- a/xen/arch/x86/x86_32/mm.c  Mon May 14 15:12:50 2007 -0400
+++ b/xen/arch/x86/x86_32/mm.c  Mon May 14 16:34:31 2007 -0400
@@ -34,6 +34,7 @@ unsigned int PAGE_HYPERVISOR_NOCACHE = _
 unsigned int PAGE_HYPERVISOR_NOCACHE = __PAGE_HYPERVISOR_NOCACHE;
 
 static unsigned long mpt_size;
+int nx_enabled = 0;
 
 void *alloc_xen_pagetable(void)
 {
@@ -133,7 +134,7 @@ void __init setup_idle_pagetable(void)
                                 __PAGE_HYPERVISOR));
 }
 
-void __init zap_low_mappings(l2_pgentry_t *base)
+void zap_low_mappings(l2_pgentry_t *base)
 {
     int i;
     u32 addr;
@@ -147,6 +148,18 @@ void __init zap_low_mappings(l2_pgentry_
             continue;
         l2e_write(&base[i], l2e_empty());
     }
+
+    flush_tlb_all_pge();
+}
+
+void init_low_mappings(void)
+{
+    int sz = ((DIRECTMAP_MBYTES << 20) >> L2_PAGETABLE_SHIFT) *
+             sizeof(l2_pgentry_t);
+
+    memcpy(idle_pg_table_l2,
+           idle_pg_table_l2 + (DIRECTMAP_VIRT_START >>
L2_PAGETABLE_SHIFT),
+           sz);
 
     flush_tlb_all_pge();
 }
diff -r 84c103f8881a xen/arch/x86/x86_32/power/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/x86_32/power/Makefile        Mon May 14 16:34:31 2007
-0400
@@ -0,0 +1,1 @@
+obj-y += cpu.o
diff -r 84c103f8881a xen/arch/x86/x86_32/power/cpu.c
--- a/xen/arch/x86/x86_32/power/cpu.c   Mon May 14 15:12:50 2007 -0400
+++ b/xen/arch/x86/x86_32/power/cpu.c   Mon May 14 20:49:34 2007 -0400
@@ -7,10 +7,91 @@
  * Copyright (c) 2001 Patrick Mochel <mochel@xxxxxxxx>
  */
 
+#ifndef __XEN__
 #include <linux/module.h>
 #include <linux/suspend.h>
 #include <asm/mtrr.h>
 #include <asm/mce.h>
+#else
+#include <xen/config.h>
+#include <xen/acpi.h>
+#include <xen/smp.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/flushtlb.h>
+
+/* image of the saved processor state */
+struct saved_context {
+       u16 es, fs, gs, ss;
+       unsigned long cr0, cr2, cr3, cr4;
+       u16 gdt_pad;
+       u16 gdt_limit;
+       unsigned long gdt_base;
+       u16 idt_pad;
+       u16 idt_limit;
+       unsigned long idt_base;
+       u16 ldt;
+       u16 tss;
+       unsigned long tr;
+       unsigned long safety;
+       unsigned long return_address;
+} __attribute__((packed));
+
+#define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q"
(GDT_ENTRY_TSS*8))
+#define load_LDT_desc() __asm__ __volatile__("lldt %w0"::"q"
(GDT_ENTRY_LDT*8))
+
+#define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr))
+#define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr))
+#define load_tr(tr) __asm__ __volatile("ltr %0"::"mr" (tr))
+#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"mr" (ldt))
+
+#define store_gdt(dtr) __asm__ ("sgdt %0":"=m" (*dtr))
+#define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr))
+#define store_tr(tr) __asm__ ("str %0":"=mr" (tr))
+#define store_ldt(ldt) __asm__ ("sldt %0":"=mr" (ldt))
+
+/*
+ * Load a segment. Fall back on loading the zero
+ * segment if something goes wrong..
+ */
+#define loadsegment(seg,value)          \
+    asm volatile("\n"           \
+        "1:\t"              \
+        "mov %0,%%" #seg "\n"       \
+        "2:\n"              \
+        ".section .fixup,\"ax\"\n"  \
+        "3:\t"              \
+        "pushl $0\n\t"          \
+        "popl %%" #seg "\n\t"       \
+        "jmp 2b\n"          \
+        ".previous\n"           \
+        ".section __ex_table,\"a\"\n\t" \
+        ".align 4\n\t"          \
+        ".long 1b,3b\n"         \
+        ".previous"         \
+        : :"rm" (value))
+
+/*
+ * Save a segment register away
+ */
+#define savesegment(seg, value) \
+       asm volatile("mov %%" #seg ",%0":"=rm" (value))
+
+#define set_debugreg(value, register)           \
+        __asm__("movl %0,%%db" #register        \
+            : /* no output */           \
+            :"r" (value))
+
+void kernel_fpu_begin(void)
+{
+       clts();
+}
+
+void kernel_fpu_end(void)
+{
+       stts();
+}
+#endif
 
 static struct saved_context saved_context;
 
@@ -34,8 +115,10 @@ void __save_processor_state(struct saved
         * segment registers
         */
        savesegment(es, ctxt->es);
+#ifndef __XEN__
        savesegment(fs, ctxt->fs);
        savesegment(gs, ctxt->gs);
+#endif
        savesegment(ss, ctxt->ss);
 
        /*
@@ -60,6 +143,7 @@ static void do_fpu_end(void)
        kernel_fpu_end();
 }
 
+#ifndef __XEN__
 static void fix_processor_context(void)
 {
        int cpu = smp_processor_id();
@@ -84,6 +168,32 @@ static void fix_processor_context(void)
        }
 
 }
+#else
+static void fix_processor_context(void)
+{
+       int cpu = smp_processor_id();
+       struct tss_struct * t = &init_tss[cpu];;
+
+       if ( supervisor_mode_kernel && cpu_has_sep )
+               wrmsr(MSR_IA32_SYSENTER_ESP, &t->esp1, 0);
+
+       set_tss_desc(cpu,t);    /* This just modifies memory; should not
be necessary. But... This is necessary, because 386 hardware has concept
of busy TSS or some similar stupidity. */
+
+       load_TR(cpu);          /* This does ltr */
+       __asm__ __volatile__ ( "lldt %%ax" : : "a" (0) );/* This does
lldt */
+
+       /*
+        * Now maybe reset the debug registers
+        */
+       set_debugreg(0UL, 0);
+       set_debugreg(0UL, 1);
+       set_debugreg(0UL, 2);
+       set_debugreg(0UL, 3);
+       /* no 4 and 5 */
+       set_debugreg(0UL, 6);
+       set_debugreg(0UL, 7);
+}
+#endif
 
 void __restore_processor_state(struct saved_context *ctxt)
 {
@@ -106,15 +216,19 @@ void __restore_processor_state(struct sa
         * segment registers
         */
        loadsegment(es, ctxt->es);
+#ifndef __XEN__
        loadsegment(fs, ctxt->fs);
        loadsegment(gs, ctxt->gs);
+#endif
        loadsegment(ss, ctxt->ss);
 
+#ifndef __XEN__
        /*
         * sysenter MSRs
         */
        if (boot_cpu_has(X86_FEATURE_SEP))
                enable_sep_cpu();
+#endif
 
        fix_processor_context();
        do_fpu_end();
@@ -127,6 +241,8 @@ void restore_processor_state(void)
        __restore_processor_state(&saved_context);
 }
 
+#ifndef __XEN__
 /* Needed by apm.c */
 EXPORT_SYMBOL(save_processor_state);
 EXPORT_SYMBOL(restore_processor_state);
+#endif
diff -r 84c103f8881a xen/arch/x86/x86_64/Makefile
--- a/xen/arch/x86/x86_64/Makefile      Mon May 14 15:12:50 2007 -0400
+++ b/xen/arch/x86/x86_64/Makefile      Mon May 14 16:34:31 2007 -0400
@@ -5,6 +5,8 @@ obj-y += gpr_switch.o
 obj-y += gpr_switch.o
 obj-y += mm.o
 obj-y += traps.o
+
+subdir-y += power
 
 obj-$(CONFIG_COMPAT) += compat.o
 obj-$(CONFIG_COMPAT) += domain.o
diff -r 84c103f8881a xen/arch/x86/x86_64/mm.c
--- a/xen/arch/x86/x86_64/mm.c  Mon May 14 15:12:50 2007 -0400
+++ b/xen/arch/x86/x86_64/mm.c  Mon May 14 16:34:31 2007 -0400
@@ -191,9 +191,16 @@ void __init setup_idle_pagetable(void)
                   __PAGE_HYPERVISOR));
 }
 
-void __init zap_low_mappings(void)
+void zap_low_mappings(void)
 {
     l4e_write(&idle_pg_table[0], l4e_empty());
+    flush_tlb_all_pge();
+}
+
+void init_low_mappings(void)
+{
+    l4e_write(&idle_pg_table[0],
+               l4e_from_paddr(__pa(idle_pg_table_l3),
__PAGE_HYPERVISOR));
     flush_tlb_all_pge();
 }
 
diff -r 84c103f8881a xen/arch/x86/x86_64/power/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/x86_64/power/Makefile        Mon May 14 16:34:31 2007
-0400
@@ -0,0 +1,2 @@
+obj-y += wakeup.o
+obj-y += suspend.o
diff -r 84c103f8881a xen/arch/x86/x86_64/power/suspend.c
--- a/xen/arch/x86/x86_64/power/suspend.c       Mon May 14 15:12:50 2007
-0400
+++ b/xen/arch/x86/x86_64/power/suspend.c       Mon May 14 21:03:02 2007
-0400
@@ -6,12 +6,17 @@
  * Copyright (c) 2002 Pavel Machek <pavel@xxxxxxx>
  * Copyright (c) 2001 Patrick Mochel <mochel@xxxxxxxx>
  */
-
-#include <linux/smp.h>
-#include <linux/suspend.h>
-#include <asm/proto.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
+#include <xen/config.h>
+#include <xen/acpi.h>
+#include <xen/smp.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/flushtlb.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/support.h>
+#include <asm/x86_64/suspend.h>
+#include <asm/x86_64/asm_defns.h>
+#include <asm/ldt.h>
 
 struct saved_context saved_context;
 
@@ -21,6 +26,44 @@ unsigned long saved_context_r12, saved_c
 unsigned long saved_context_r12, saved_context_r13, saved_context_r14,
saved_context_r15;
 unsigned long saved_context_eflags;
 
+#ifdef __XEN__
+unsigned long saved_context_msr_cstar, saved_context_msr_lstar;
+unsigned long saved_video_mode;
+
+#define MSR_KERNEL_GS_BASE MSR_SHADOW_GS_BASE
+
+static inline void kernel_fpu_begin(void){
+    clts();
+}
+
+static inline void kernel_fpu_end(void){
+    stts();
+}
+
+static inline void syscall_init(void){
+    wrmsrl(MSR_LSTAR, saved_context_msr_lstar);
+    wrmsrl(MSR_CSTAR, saved_context_msr_cstar);
+    wrmsr(MSR_STAR, 0, (FLAT_RING3_CS32<<16) | __HYPERVISOR_CS);
+    wrmsr(MSR_SYSCALL_MASK, EF_VM|EF_RF|EF_NT|EF_DF|EF_IE|EF_TF, 0U);
+}
+
+static inline void load_gs_index(unsigned base){
+        __asm__ __volatile__ (
+            "     swapgs              \n"
+            "1:   movl %k0,%%gs       \n"
+            "    "safe_swapgs"        \n"
+            ".section .fixup,\"ax\"   \n"
+            "2:   xorl %k0,%k0        \n"
+            "     jmp  1b             \n"
+            ".previous                \n"
+            ".section __ex_table,\"a\"\n"
+            "    .align 8             \n"
+            "    .quad 1b,2b          \n"
+            ".previous                  "
+            : : "r" (base&0xffff) );
+}
+#endif /* __XEN__*/
+
 void __save_processor_state(struct saved_context *ctxt)
 {
     kernel_fpu_begin();
@@ -55,6 +98,9 @@ void __save_processor_state(struct saved
     asm volatile ("movq %%cr3, %0" : "=r" (ctxt->cr3));
     asm volatile ("movq %%cr4, %0" : "=r" (ctxt->cr4));
     asm volatile ("movq %%cr8, %0" : "=r" (ctxt->cr8));
+
+    rdmsrl(MSR_CSTAR, saved_context_msr_cstar);
+    rdmsrl(MSR_LSTAR, saved_context_msr_lstar);
 }
 
 void save_processor_state(void)
@@ -91,10 +137,24 @@ void __restore_processor_state(struct sa
     /*
      * segment registers
      */
+#ifndef __XEN__
     asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds));
     asm volatile ("movw %0, %%es" :: "r" (ctxt->es));
     asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs));
     load_gs_index(ctxt->gs);
+#else
+    /* Xen doesn't care these selectors. However if previous suspend
+     * happens on an idle context, we should avoid recover them since
+     * idle page table only maps xen portion of gdt table and that
+     * load may result page fault badly for guest portion.
+     */
+    if (!is_idle_vcpu(current)) {
+       asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds));
+       asm volatile ("movw %0, %%es" :: "r" (ctxt->es));
+       asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs));
+       load_gs_index(ctxt->gs);
+    }
+#endif
     asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss));
 
     wrmsrl(MSR_FS_BASE, ctxt->fs_base);
@@ -114,6 +174,28 @@ void restore_processor_state(void)
 
 void fix_processor_context(void)
 {
+#ifdef __XEN__    
+    int cpu = smp_processor_id();
+
+    struct tss_struct *t = &init_tss[cpu];
+
+    set_tss_desc(cpu,t);    /* This just modifies memory; should not be
neccessary. But... This is neccessary, because 386 hardware has concept
of busy TSS or some similar stupidity. */
+
+    syscall_init();                         /* This sets MSR_*STAR and
related */
+    load_TR(cpu);             /* This does ltr */
+    load_LDT(current); /* This does lldt */
+
+    /*
+     * Now maybe reset the debug registers
+     */
+    set_debugreg(current, 0UL, 0);
+    set_debugreg(current, 0UL, 1);
+    set_debugreg(current, 0UL, 2);
+    set_debugreg(current, 0UL, 3);
+    /* no 4 and 5 */
+    set_debugreg(current, 0UL, 6);
+    set_debugreg(current, 0UL, 7);
+#else
     int cpu = smp_processor_id();
     struct tss_struct *t = &per_cpu(init_tss, cpu);
 
@@ -137,85 +219,6 @@ void fix_processor_context(void)
                 loaddebug(&current->thread, 6);
                 loaddebug(&current->thread, 7);
     }
-}
-
-#ifdef CONFIG_SOFTWARE_SUSPEND
-/* Defined in arch/x86_64/kernel/suspend_asm.S */
-extern int restore_image(void);
-
-pgd_t *temp_level4_pgt;
-
-static int res_phys_pud_init(pud_t *pud, unsigned long address,
unsigned long end)
-{
-    long i, j;
-
-    i = pud_index(address);
-    pud = pud + i;
-    for (; i < PTRS_PER_PUD; pud++, i++) {
-        unsigned long paddr;
-        pmd_t *pmd;
-
-        paddr = address + i*PUD_SIZE;
-        if (paddr >= end)
-            break;
-
-        pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
-        if (!pmd)
-            return -ENOMEM;
-        set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
-        for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
-            unsigned long pe;
-
-            if (paddr >= end)
-                break;
-            pe = _PAGE_NX | _PAGE_PSE | _KERNPG_TABLE | paddr;
-            pe &= __supported_pte_mask;
-            set_pmd(pmd, __pmd(pe));
-        }
-    }
-    return 0;
-}
-
-static int set_up_temporary_mappings(void)
-{
-    unsigned long start, end, next;
-    int error;
-
-    temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC);
-    if (!temp_level4_pgt)
-        return -ENOMEM;
-
-    /* It is safe to reuse the original kernel mapping */
-    set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
-        init_level4_pgt[pgd_index(__START_KERNEL_map)]);
-
-    /* Set up the direct mapping from scratch */
-    start = (unsigned long)pfn_to_kaddr(0);
-    end = (unsigned long)pfn_to_kaddr(end_pfn);
-
-    for (; start < end; start = next) {
-        pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC);
-        if (!pud)
-            return -ENOMEM;
-        next = start + PGDIR_SIZE;
-        if (next > end)
-            next = end;
-        if ((error = res_phys_pud_init(pud, __pa(start), __pa(next))))
-            return error;
-        set_pgd(temp_level4_pgt + pgd_index(start),
-            mk_kernel_pgd(__pa(pud)));
-    }
-    return 0;
-}
-
-int swsusp_arch_resume(void)
-{
-    int error;
-
-    /* We have got enough memory and from now on we cannot recover */
-    if ((error = set_up_temporary_mappings()))
-        return error;
-    restore_image();
-    return 0;
-}
-#endif /* CONFIG_SOFTWARE_SUSPEND */
+#endif /* __XEN__ */
+}
+
diff -r 84c103f8881a xen/arch/x86/x86_64/power/wakeup.S
--- a/xen/arch/x86/x86_64/power/wakeup.S        Mon May 14 15:12:50 2007
-0400
+++ b/xen/arch/x86/x86_64/power/wakeup.S        Mon May 14 16:34:31 2007
-0400
@@ -1,8 +1,16 @@
 .text
+
+#ifdef __XEN__
+#include <asm/page.h>
+#include <asm/msr.h>
+#include <xen/config.h>
+#include <asm/config.h>
+#else
 #include <linux/linkage.h>
 #include <asm/segment.h>
 #include <asm/page.h>
 #include <asm/msr.h>
+#endif /* __XEN__ */
 
 # Copyright 2003 Pavel Machek <pavel@xxxxxxx>, distribute under GPLv2
 #
@@ -15,6 +23,13 @@
 # cs = 0x1234, eip = 0x05
 #
 
+#ifdef __XEN__
+/* A wakeup gdt is used before restoring cpu context. Clean it later */
+#define __KERNEL_CS   0x10
+#define __KERNEL_DS   0x18
+#define __START_KERNEL_map __PAGE_OFFSET
+#define SYM_PHYS(sym) (sym - __PAGE_OFFSET)
+#endif
 
 ALIGN
     .align  16
@@ -121,7 +136,13 @@ wakeup_32:
     movl    %eax, %cr4
 
     /* Setup early boot stage 4 level pagetables */
+    
+#ifdef __XEN__    
+     movl    $SYM_PHYS(idle_pg_table), %eax
+#else    
     movl    $(wakeup_level4_pgt - __START_KERNEL_map), %eax
+#endif /* __XEN__ */
+
     movl    %eax, %cr3
 
     /* Setup EFER (Extended Feature Enable Register) */
@@ -178,8 +199,12 @@ reach_compatibility_mode:
     movb    $0xa9, %al  ;  outb %al, $0x80
     
     /* Load new GDT with the 64bit segment using 32bit descriptor */
+#ifdef __XEN__    
+    lgdt    %cs:SYM_PHYS(nopaging_gdt_descr)
+#else
     movl    $(pGDT32 - __START_KERNEL_map), %eax
     lgdt    (%eax)
+#endif /* __XEN__ */
 
     movl    $(wakeup_jumpvector - __START_KERNEL_map), %eax
     /* Finally jump in 64bit mode */
@@ -187,7 +212,11 @@ reach_compatibility_mode:
 
 wakeup_jumpvector:
     .long   wakeup_long64 - __START_KERNEL_map
+#ifdef __XEN__    
+    .word   __HYPERVISOR_CS
+#else    
     .word   __KERNEL_CS
+#endif    
 
 .code64
 
@@ -199,20 +228,44 @@ wakeup_long64:
      * addresses where we're currently running on. We have to do that
here
      * because in 32bit we couldn't load a 64bit linear address.
      */
+#ifdef __XEN__
+    lgdt    SYM_PHYS(nopaging_gdt_descr)
+#else    
     lgdt    cpu_gdt_descr - __START_KERNEL_map
+#endif
 
     movw    $0x0e00 + 'u', %ds:(0xb8016)
     
     nop
     nop
+#ifdef __XEN__    
+    movw    $__HYPERVISOR_DS, %ax
+#else    
     movw    $__KERNEL_DS, %ax
+#endif    
     movw    %ax, %ss    
     movw    %ax, %ds
     movw    %ax, %es
     movw    %ax, %fs
     movw    %ax, %gs
+    
+#ifdef __XEN__    
+    /* Xen doesn't use large memory mode, and can we? */
+    movq    SYM_PHYS(saved_esp), %rsp
+    
+    movw    $0x0e00 + 'x', %ds:(0xb8018)
+    movq    SYM_PHYS(saved_ebx), %rbx
+    movq    SYM_PHYS(saved_edi), %rdi
+    movq    SYM_PHYS(saved_esi), %rsi
+    movq    SYM_PHYS(saved_ebp), %rbp
+
+    movw    $0x0e00 + '!', %ds:(0xb801a)
+    movq    SYM_PHYS(saved_eip), %rax
+
+#else    
+
     movq    saved_esp, %rsp
-
+    
     movw    $0x0e00 + 'x', %ds:(0xb8018)
     movq    saved_ebx, %rbx
     movq    saved_edi, %rdi
@@ -221,6 +274,8 @@ wakeup_long64:
 
     movw    $0x0e00 + '!', %ds:(0xb801a)
     movq    saved_eip, %rax
+    
+#endif /* __XEN__ */
     jmp *%rax
 
 .code32
@@ -355,7 +410,6 @@ bogus_magic2:
     movw    $0x0e00 + '2', %ds:(0xb8018)
     jmp bogus_magic2
     
-
 wakeup_stack_begin: # Stack grows down
 
 .org    0xff0
@@ -378,6 +432,35 @@ ENTRY(acpi_copy_wakeup_routine)
     pushq   %rcx
     pushq   %rdx
 
+#ifdef __XEN__
+    
+    sgdt    saved_gdt(%rip)
+    sidt    saved_idt(%rip)
+    sldt    saved_ldt(%rip)
+    str saved_tss(%rip)
+    
+    movq    %cr3, %rdx
+    movq    %rdx, saved_cr3(%rip)
+    movq    %cr4, %rdx
+    movq    %rdx, saved_cr4(%rip)
+    movq    %cr0, %rdx
+    movq    %rdx, saved_cr0(%rip)
+    sgdt    real_save_gdt - wakeup_start (,%rdi)
+    movl    $MSR_EFER, %ecx
+    rdmsr
+    movl    %eax, saved_efer(%rip)
+    movl    %edx, saved_efer2(%rip)
+
+    movl    saved_video_mode(%rip), %edx
+    movl    %edx, video_mode - wakeup_start (,%rdi)
+    movl    acpi_video_flags(%rip), %edx
+    movl    %edx, video_flags - wakeup_start (,%rdi)
+    movq    $0x12345678, real_magic - wakeup_start (,%rdi)
+    movq    $0x123456789abcdef0, %rdx
+    movq    %rdx, saved_magic(%rip)
+   
+#else
+    
     sgdt    saved_gdt
     sidt    saved_idt
     sldt    saved_ldt
@@ -415,6 +498,8 @@ ENTRY(acpi_copy_wakeup_routine)
     movq    %rax, %cr0
     jmp 1f      # Flush pipelines
 1:
+#endif /* __XEN__ */
+
     # restore the regs we used
     popq    %rdx
     popq    %rcx
@@ -450,6 +535,19 @@ do_suspend_lowlevel:
     movq %r15, saved_context_r15(%rip)
     pushfq ; popq saved_context_eflags(%rip)
 
+#ifdef __XEN__
+/* Xen did not use large memory mode, so change code to ip relative */
+
+    lea .L97(%rip), %rax
+    movq %rax, saved_eip(%rip)
+    
+    movq %rsp,saved_esp(%rip)
+    movq %rbp,saved_ebp(%rip)
+    movq %rbx,saved_ebx(%rip)
+    movq %rdi,saved_edi(%rip)
+    movq %rsi,saved_esi(%rip)
+
+#else    
     movq    $.L97, saved_eip(%rip)
 
     movq %rsp,saved_esp
@@ -458,6 +556,8 @@ do_suspend_lowlevel:
     movq %rdi,saved_edi
     movq %rsi,saved_esi
 
+#endif /* __XEN__ */
+    
     addq    $8, %rsp
     movl    $3, %edi
     xorl    %eax, %eax
@@ -466,7 +566,11 @@ do_suspend_lowlevel:
     .p2align 4,,7
 .L99:
     .align 4
+#ifdef __XEN__
+    movl $__HYPERVISOR_DS32, %eax
+#else    
     movl    $24, %eax
+#endif    
     movw %ax, %ds
     movq    saved_context+58(%rip), %rax
     movq %rax, %cr4
@@ -525,3 +629,4 @@ saved_cr4:  .quad 0
 saved_cr4:  .quad 0
 saved_efer: .quad 0
 saved_efer2:    .quad 0
+
diff -r 84c103f8881a xen/include/asm-x86/acpi.h
--- a/xen/include/asm-x86/acpi.h        Mon May 14 15:12:50 2007 -0400
+++ b/xen/include/asm-x86/acpi.h        Mon May 14 18:14:26 2007 -0400
@@ -178,4 +178,6 @@ extern u8 x86_acpiid_to_apicid[];
 extern u8 x86_acpiid_to_apicid[];
 #define MAX_LOCAL_APIC 256
 
+#define pmprintk(_l, _f, _a...)                      \
+    printk(_l "<PM>" _f, ## _a )
 #endif /*_ASM_ACPI_H*/
diff -r 84c103f8881a xen/include/asm-x86/config.h
--- a/xen/include/asm-x86/config.h      Mon May 14 15:12:50 2007 -0400
+++ b/xen/include/asm-x86/config.h      Mon May 14 20:49:27 2007 -0400
@@ -367,4 +367,6 @@ extern unsigned long xenheap_phys_end; /
 #define ELFSIZE 32
 #endif
 
+#define FASTCALL(x)    x __attribute__((regparm(3)))
+
 #endif /* __X86_CONFIG_H__ */
diff -r 84c103f8881a xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h        Mon May 14 15:12:50 2007 -0400
+++ b/xen/include/asm-x86/page.h        Mon May 14 16:34:31 2007 -0400
@@ -287,6 +287,9 @@ extern l2_pgentry_t   idle_pg_table_l2[R
 #else
 extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES];
 extern l2_pgentry_t   idle_pg_table_l2[ROOT_PAGETABLE_ENTRIES];
+#if CONFIG_PAGING_LEVELS == 4
+extern l3_pgentry_t   idle_pg_table_l3[L3_PAGETABLE_ENTRIES];
+#endif
 #ifdef CONFIG_COMPAT
 extern l2_pgentry_t  *compat_idle_pg_table_l2;
 extern unsigned int   m2p_compat_vstart;
diff -r 84c103f8881a xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h   Mon May 14 15:12:50 2007 -0400
+++ b/xen/include/asm-x86/processor.h   Mon May 14 16:34:31 2007 -0400
@@ -297,6 +297,11 @@ static inline unsigned long read_cr2(voi
     return __cr2;
 }
 
+static inline void write_cr2(unsigned long val)
+{
+       __asm__("mov %0,%%cr2": :"r" ((unsigned long)val));
+}
+
 static inline unsigned long read_cr4(void)
 {
     unsigned long __cr4;
diff -r 84c103f8881a xen/include/asm-x86/smp.h
--- a/xen/include/asm-x86/smp.h Mon May 14 15:12:50 2007 -0400
+++ b/xen/include/asm-x86/smp.h Mon May 14 20:49:27 2007 -0400
@@ -45,6 +45,7 @@ extern void zap_low_mappings(l2_pgentry_
 extern void zap_low_mappings(l2_pgentry_t *base);
 #endif
 
+extern void init_low_mappings(void);
 #define MAX_APICID 256
 extern u8 x86_cpu_to_apicid[];
 
diff -r 84c103f8881a xen/include/asm-x86/x86_64/suspend.h
--- a/xen/include/asm-x86/x86_64/suspend.h      Mon May 14 15:12:50 2007
-0400
+++ b/xen/include/asm-x86/x86_64/suspend.h      Mon May 14 16:34:31 2007
-0400
@@ -39,11 +39,12 @@ extern unsigned long saved_context_eflag
 extern unsigned long saved_context_eflags;
 
 #define loaddebug(thread,register) \
-    set_debugreg((thread)->debugreg##register, register)
+               __asm__("movq %0,%%db" #register  \
+                       : /* no output */ \
+                       :"r" ((thread)->debugreg##register))
 
 extern void fix_processor_context(void);
 
-#ifdef CONFIG_ACPI_SLEEP
 extern unsigned long saved_eip;
 extern unsigned long saved_esp;
 extern unsigned long saved_ebp;
@@ -53,4 +54,3 @@ extern unsigned long saved_edi;
 
 /* routines for saving/restoring kernel state */
 extern int acpi_save_state_mem(void);
-#endif

Attachment: xen_pm_arch.patch
Description: xen_pm_arch.patch

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-devel] [PATCH 3/12] Provide basic Xen PM infrastructure, Tian, Kevin <=