[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH]: kexec: framework and i386



kexec: framework and i386

Here is a first cut of kexec for dom0/xen, which will actually
kexec the physical machine from xen. The approach taken is
to move the architecture-dependant kexec code into a new hypercall.

Some notes:
  * machine_kexec_cleanup() and machine_kexec_prepare() don't do
    anything in i386. So while this patch adds a framework for them,
    I am not sure what parameters are needs at this stage.
  * Only works for UP, as machine_shutdown is not implemented yet
  * kexecing into xen does not seem to work, I think that 
    kexec-tools needs updating, but I have not investigated yet
  * I don't believe that kdump works yet
  * This patch was prepared against xen-unstable.hg 9514
    As of today (9574) two new hypercalls have been added.
    I rediffed and moved the kexec hypercall to 33. However
    this exceedes hypercall_NR, which is currently 32. 
    I tried increasing this, but the dom0 now crashes 
    in entry.S on init. Even after rebuilding both xen and the kernel
    completely from scratch after a make distclean. Help!!

Prepared with the assistance of my colleague Magnus Damm

Signed-Off-By: Horms <horms@xxxxxxxxxxxx>

--- from-0001/linux-2.6-xen-sparse/arch/i386/Kconfig
+++ to-work/linux-2.6-xen-sparse/arch/i386/Kconfig      2006-04-03 
15:13:38.000000000 +0900
@@ -726,7 +726,7 @@ source kernel/Kconfig.hz
 
 config KEXEC
        bool "kexec system call (EXPERIMENTAL)"
-       depends on EXPERIMENTAL && !X86_XEN
+       depends on EXPERIMENTAL
        help
          kexec is a system call that implements the ability to shutdown your
          current kernel, and to start another kernel.  It is like a reboot
--- /dev/null
+++ to-work/linux-2.6-xen-sparse/arch/i386/kernel/crash-xen.c   2006-04-03 
15:13:38.000000000 +0900
@@ -0,0 +1,15 @@
+/*
+ * Architecture specific (i386-xen) functions for kexec based crash dumps.
+ *
+ * Created by: Horms <horms@xxxxxxxxxxxx>
+ *
+ */
+
+#include <linux/kernel.h> /* For printk */
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+       /* XXX: This should do something */
+       printk("xen-kexec: Need to turn of other CPUS in "
+              "machine_crash_shutdown()\n");
+}
--- /dev/null
+++ to-work/linux-2.6-xen-sparse/arch/i386/kernel/machine_kexec-xen.c   
2006-04-07 12:59:51.000000000 +0900
@@ -0,0 +1,80 @@
+/*
+ * machine_kexec-xen.c - handle transition of Linux booting another kernel
+ *
+ * Created By: Horms <horms@xxxxxxxxxxxx>
+ *
+ * Losely based on arch/i386/kernel/machine_kexec-xen.c
+ */
+
+#include <linux/kexec.h>
+#include <xen/interface/kexec.h>
+#include <linux/mm.h>
+#include <asm/hypercall.h>
+
+const extern unsigned char relocate_new_kernel[];
+extern unsigned int relocate_new_kernel_size;
+static kexec_arg_t hypercall_arg;
+
+/*
+ * A architecture hook called to validate the
+ * proposed image and prepare the control pages
+ * as needed.  The pages for KEXEC_CONTROL_CODE_SIZE
+ * have been allocated, but the segments have yet
+ * been copied into the kernel.
+ *
+ * Do what every setup is needed on image and the
+ * reboot code buffer to allow us to avoid allocations
+ * later.
+ *
+ * Currently nothing.
+ */
+int machine_kexec_prepare(struct kimage *image)
+{
+    return 0;
+}
+
+/*
+ * Undo anything leftover by machine_kexec_prepare
+ * when an image is freed.
+ */
+void machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+/*
+ * Do not allocate memory (or fail in any way) in machine_kexec().
+ * We are past the point of no return, committed to rebooting now.
+ */
+NORET_TYPE void machine_kexec(struct kimage *image)
+{
+       kimage_entry_t *ptr, entry;
+
+       /* 
+        * Translate addresses inside head from physcical to machine
+        * In practice, this only needs to change the pointer to
+        * indirection pages as non-indirected pages are relative.
+        */
+       ptr = &image->head;
+       while ((entry = *ptr) && !(entry & IND_DONE)) {
+               if (!(entry & IND_DESTINATION))
+                       *ptr = phys_to_machine(entry & PAGE_MASK) |
+                               (entry & ~PAGE_MASK);
+
+               if (entry & IND_INDIRECTION)
+                       ptr = __va(entry & PAGE_MASK);
+               else
+                       ptr++;
+       }
+
+       /* Set up arguments to hypercall */
+       hypercall_arg.u.kexec.indirection_page = image->head;
+       hypercall_arg.u.kexec.reboot_code_buffer = 
+               pfn_to_mfn(page_to_pfn(image->control_code_page)) << PAGE_SHIFT;
+       hypercall_arg.u.kexec.start_address = image->start;
+       hypercall_arg.u.kexec.relocate_new_kernel = relocate_new_kernel;
+       hypercall_arg.u.kexec.relocate_new_kernel_size = 
+               relocate_new_kernel_size;
+
+       /* Let Xen do the rest of the work */
+       HYPERVISOR_kexec(KEXEC_CMD_kexec, &hypercall_arg);
+}
--- from-0001/linux-2.6-xen-sparse/drivers/xen/core/reboot.c
+++ to-work/linux-2.6-xen-sparse/drivers/xen/core/reboot.c      2006-04-03 
15:13:38.000000000 +0900
@@ -38,6 +38,11 @@ extern void ctrl_alt_del(void);
  */
 #define SHUTDOWN_HALT      4
 
+void machine_shutdown(void) 
+{
+       printk("machine_shutdown: does nothing\n");
+}
+
 void machine_emergency_restart(void)
 {
        /* We really want to get pending console data out before we die. */
--- from-0001/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h
+++ to-work/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h      
2006-04-06 11:00:03.000000000 +0900
@@ -37,6 +37,8 @@
 # error "please don't include this file directly"
 #endif
 
+#include <xen/interface/kexec.h>
+
 #define __STR(x) #x
 #define STR(x) __STR(x)
 
@@ -329,6 +331,13 @@ HYPERVISOR_nmi_op(
        return _hypercall2(int, nmi_op, op, arg);
 }
 
+static inline int
+HYPERVISOR_kexec(
+       unsigned long op, kexec_arg_t * arg)
+{
+       return _hypercall2(int, kexec_op, op, arg); 
+}
+
 #endif /* __HYPERCALL_H__ */
 
 /*
バイナリー・ファイル/dev/nullとto-work/linux-2.6.16-xen/kernel/.kexec.c.swpは違います
--- from-0001/xen/arch/x86/x86_32/Makefile
+++ to-work/xen/arch/x86/x86_32/Makefile        2006-04-03 16:25:31.000000000 
+0900
@@ -5,6 +5,7 @@ obj-y += entry.o
 obj-y += mm.o
 obj-y += seg_fixup.o
 obj-y += traps.o
+obj-y += machine_kexec.o
 
 obj-$(supervisor_mode_kernel) += supervisor_mode_kernel.o
 
--- from-0001/xen/arch/x86/x86_32/entry.S
+++ to-work/xen/arch/x86/x86_32/entry.S 2006-04-04 13:02:36.000000000 +0900
@@ -648,6 +648,7 @@ ENTRY(hypercall_table)
         .long do_acm_op
         .long do_nmi_op
         .long do_arch_sched_op
+        .long do_kexec             /* 30 */
         .rept NR_hypercalls-((.-hypercall_table)/4)
         .long do_ni_hypercall
         .endr
@@ -683,6 +684,7 @@ ENTRY(hypercall_args_table)
         .byte 1 /* do_acm_op            */
         .byte 2 /* do_nmi_op            */
         .byte 2 /* do_arch_sched_op     */
+        .byte 2 /* do_kexec             */  /* 30 */
         .rept NR_hypercalls-(.-hypercall_args_table)
         .byte 0 /* do_ni_hypercall      */
         .endr
--- /dev/null
+++ to-work/xen/arch/x86/x86_32/machine_kexec.c 2006-04-07 12:44:16.000000000 
+0900
@@ -0,0 +1,168 @@
+/******************************************************************************
+ * arch/x86/machine_kexec.c
+ * 
+ * Created By: Horms
+ *
+ * Based heavily on arch/i386/machine_kexec.c from Linux 2.6.16
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/domain_page.h> 
+#include <xen/timer.h>
+#include <xen/sched.h>
+#include <asm/page.h> 
+#include <asm/flushtlb.h>
+#include <public/xen.h>
+#include <public/kexec.h>
+
+typedef asmlinkage void (*relocate_new_kernel_t)(
+                    unsigned long indirection_page,
+                    unsigned long reboot_code_buffer,
+                    unsigned long start_address,
+                    unsigned int has_pae);
+
+#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
+
+#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define L2_ATTR (_PAGE_PRESENT)
+
+#ifndef CONFIG_X86_PAE
+
+static u32 pgtable_level1[L1_PAGETABLE_ENTRIES] PAGE_ALIGNED;
+
+static void identity_map_page(unsigned long address)
+{
+    unsigned long mfn;
+    u32 *pgtable_level2;
+
+    /* Find the current page table */
+    mfn = read_cr3() >> PAGE_SHIFT;
+    pgtable_level2 = map_domain_page(mfn);
+
+    /* Identity map the page table entry */
+    pgtable_level1[l1_table_offset(address)] = address | L0_ATTR;
+    pgtable_level2[l2_table_offset(address)] = __pa(pgtable_level1) | L1_ATTR;
+
+    /* Flush the tlb so the new mapping takes effect.
+     * Global tlb entries are not flushed but that is not an issue.
+     */
+    write_cr3(mfn << PAGE_SHIFT);
+
+    unmap_domain_page(pgtable_level2);
+}
+
+#else
+static u64 pgtable_level1[L1_PAGETABLE_ENTRIES] PAGE_ALIGNED;
+static u64 pgtable_level2[L2_PAGETABLE_ENTRIES] PAGE_ALIGNED;
+
+static void identity_map_page(unsigned long address)
+{
+    int mfn;
+    intpte_t *pgtable_level3;
+
+    /* Find the current page table */
+    mfn = read_cr3() >> PAGE_SHIFT;
+    pgtable_level3 = map_domain_page(mfn);
+
+    /* Identity map the page table entry */
+    pgtable_level1[l1_table_offset(address)] = address | L0_ATTR;
+    pgtable_level2[l2_table_offset(address)] = __pa(pgtable_level1) | L1_ATTR;
+    set_64bit(&pgtable_level3[l3_table_offset(address)],
+             __pa(pgtable_level2) | L2_ATTR);
+
+    /* Flush the tlb so the new mapping takes effect.
+     * Global tlb entries are not flushed but that is not an issue.
+     */
+    load_cr3(mfn << PAGE_SHIFT);
+
+    unmap_domain_page(pgtable_level3);
+}
+#endif
+
+static void kexec_load_segments(void)
+{
+#define __SSTR(X) #X
+#define SSTR(X) __SSTR(X)
+    __asm__ __volatile__ (
+        "\tljmp $"SSTR(__HYPERVISOR_CS)",$1f\n"
+        "\t1:\n"
+        "\tmovl $"SSTR(__HYPERVISOR_DS)",%%eax\n"
+        "\tmovl %%eax,%%ds\n"
+        "\tmovl %%eax,%%es\n"
+        "\tmovl %%eax,%%fs\n"
+        "\tmovl %%eax,%%gs\n"
+        "\tmovl %%eax,%%ss\n"
+        ::: "eax", "memory");
+#undef SSTR
+#undef __SSTR
+}
+
+#define kexec_load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr))
+static void kexec_set_idt(void *newidt, __u16 limit)
+{
+    struct Xgt_desc_struct curidt;
+
+    /* ia32 supports unaliged loads & stores */
+    curidt.size    = limit;
+    curidt.address = (unsigned long)newidt;
+    
+    kexec_load_idt(&curidt);
+
+};
+
+#define kexec_load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr))
+static void kexec_set_gdt(void *newgdt, __u16 limit)
+{
+    struct Xgt_desc_struct curgdt;
+
+    /* ia32 supports unaligned loads & stores */
+    curgdt.size    = limit;
+    curgdt.address = (unsigned long)newgdt;
+
+    kexec_load_gdt(&curgdt);
+};
+
+int machine_kexec_prepare(struct kexec_arg *arg)
+{
+       return 0;
+}
+
+void machine_kexec_cleanup(struct kexec_arg *arg)
+{
+}
+
+void machine_kexec(struct kexec_arg *arg)
+{
+    relocate_new_kernel_t rnk;
+
+    local_irq_disable();
+
+    identity_map_page(arg->u.kexec.reboot_code_buffer);
+
+    copy_from_user((void *)arg->u.kexec.reboot_code_buffer, 
+           arg->u.kexec.relocate_new_kernel,
+           arg->u.kexec.relocate_new_kernel_size);
+
+    kexec_load_segments();
+
+    kexec_set_gdt(__va(0),0);
+
+    kexec_set_idt(__va(0),0);
+
+    rnk = (relocate_new_kernel_t) arg->u.kexec.reboot_code_buffer;
+
+    (*rnk)(arg->u.kexec.indirection_page, arg->u.kexec.reboot_code_buffer, 
+           arg->u.kexec.start_address, cpu_has_pae);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- from-0001/xen/common/Makefile
+++ to-work/xen/common/Makefile 2006-04-03 15:13:38.000000000 +0900
@@ -24,6 +24,7 @@ obj-y += trace.o
 obj-y += timer.o
 obj-y += vsprintf.o
 obj-y += xmalloc.o
+obj-y += kexec.o
 
 obj-$(perfc)       += perfc.o
 obj-$(crash_debug) += gdbstub.o
--- /dev/null
+++ to-work/xen/common/kexec.c  2006-04-07 13:06:54.000000000 +0900
@@ -0,0 +1,54 @@
+/*
+ * Achitecture independent kexec code for Xen
+ *
+ * At this statge, just a switch for the kexec hypercall into
+ * architecture dependent code.
+ *
+ * Created By: Horms <horms@xxxxxxxxxxxx>
+ */
+
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include <xen/guest_access.h>
+#include <public/xen.h>
+#include <public/kexec.h>
+
+extern int machine_kexec_prepare(struct kexec_arg *arg);
+extern void machine_kexec_cleanup(struct kexec_arg *arg);
+extern void machine_kexec(struct kexec_arg *arg);
+
+int do_kexec(unsigned long op, 
+             GUEST_HANDLE(kexec_arg_t) uarg)
+{
+    struct kexec_arg arg;
+
+    if ( unlikely(copy_from_guest(&arg, uarg, 1) != 0) )
+    {
+        printk("do_kexec: __copy_from_guest failed");
+        return -EFAULT;
+    }
+
+       switch(op) {
+       case KEXEC_CMD_kexec:
+               machine_kexec(&arg);
+        return -EINVAL; /* Not Reached */
+       case KEXEC_CMD_kexec_prepare:
+               return machine_kexec_prepare(&arg);
+       case KEXEC_CMD_kexec_cleanup:
+               machine_kexec_cleanup(&arg);
+               return 0;
+       }
+
+       return -EINVAL;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
--- from-0001/xen/include/asm-x86/hypercall.h
+++ to-work/xen/include/asm-x86/hypercall.h     2006-04-07 13:05:06.000000000 
+0900
@@ -6,6 +6,7 @@
 #define __ASM_X86_HYPERCALL_H__
 
 #include <public/physdev.h>
+#include <public/kexec.h>
 
 extern long
 do_set_trap_table(
@@ -79,6 +80,11 @@ extern long
 arch_do_vcpu_op(
     int cmd, struct vcpu *v, GUEST_HANDLE(void) arg);
 
+extern int
+do_kexec(
+    unsigned long op, 
+    GUEST_HANDLE(kexec_arg_t) uarg);
+
 #ifdef __x86_64__
 
 extern long
--- /dev/null
+++ to-work/xen/include/public/kexec.h  2006-04-07 12:44:43.000000000 +0900
@@ -0,0 +1,39 @@
+/*
+ * kexec.h: Xen kexec
+ *
+ * Created By: Horms <horms@xxxxxxxxxxxx>
+ */
+
+#ifndef _XEN_PUBLIC_KEXEC_H
+#define _XEN_PUBLIC_KEXEC_H
+
+/*
+ * Scratch space for passing arguments to the kexec hypercall
+ */
+typedef struct kexec_arg {
+    union {
+        struct {
+            unsigned long data; /* Not sure what this should be yet */
+        } helper;
+        struct {
+            unsigned long indirection_page;
+            unsigned long reboot_code_buffer;
+            unsigned long start_address;
+            const char *relocate_new_kernel;
+            unsigned int relocate_new_kernel_size;
+        } kexec;
+    } u;
+} kexec_arg_t;
+DEFINE_GUEST_HANDLE(kexec_arg_t);
+
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- from-0001/xen/include/public/xen.h
+++ to-work/xen/include/public/xen.h    2006-04-04 13:29:54.000000000 +0900
@@ -60,6 +60,7 @@
 #define __HYPERVISOR_acm_op               27
 #define __HYPERVISOR_nmi_op               28
 #define __HYPERVISOR_sched_op             29
+#define __HYPERVISOR_kexec_op             30
 
 /* 
  * VIRTUAL INTERRUPTS
@@ -206,6 +207,13 @@ DEFINE_GUEST_HANDLE(mmuext_op_t);
 #define VMASST_TYPE_writable_pagetables  2
 #define MAX_VMASST_TYPE 2
 
+/*
+ * Commands to HYPERVISOR_kexec().
+ */
+#define KEXEC_CMD_kexec                 0
+#define KEXEC_CMD_kexec_prepare         1
+#define KEXEC_CMD_kexec_cleanup         2
+
 #ifndef __ASSEMBLY__
 
 typedef uint16_t domid_t;
diff -r 0010df11836d buildconfigs/linux-defconfig_xen_x86_32
--- a/buildconfigs/linux-defconfig_xen_x86_32   Fri Apr  7 00:32:54 2006 +0100
+++ b/buildconfigs/linux-defconfig_xen_x86_32   Fri Apr  7 14:54:45 2006 +0900
@@ -184,6 +184,7 @@ CONFIG_HZ_100=y
 # CONFIG_HZ_250 is not set
 # CONFIG_HZ_1000 is not set
 CONFIG_HZ=100
+CONFIG_KEXEC=y
 # CONFIG_CRASH_DUMP is not set
 CONFIG_PHYSICAL_START=0x100000
 CONFIG_HOTPLUG_CPU=y

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.