WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH]: kexec: framework and i386

To: xen-devel@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-devel] [PATCH]: kexec: framework and i386
From: Horms <horms@xxxxxxxxxxxx>
Date: Fri, 7 Apr 2006 16:42:36 +0900
Cc: Magnus Damm <magnus@xxxxxxxxxxxxx>
Delivery-date: Fri, 07 Apr 2006 07:23:44 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Mutt/1.5.11+cvs20060126
kexec: framework and i386

Here is a first cut of kexec for dom0/xen, which will actually
kexec the physical machine from xen. The approach taken is
to move the architecture-dependant kexec code into a new hypercall.

Some notes:
  * machine_kexec_cleanup() and machine_kexec_prepare() don't do
    anything in i386. So while this patch adds a framework for them,
    I am not sure what parameters are needs at this stage.
  * Only works for UP, as machine_shutdown is not implemented yet
  * kexecing into xen does not seem to work, I think that 
    kexec-tools needs updating, but I have not investigated yet
  * I don't believe that kdump works yet
  * This patch was prepared against xen-unstable.hg 9514
    As of today (9574) two new hypercalls have been added.
    I rediffed and moved the kexec hypercall to 33. However
    this exceedes hypercall_NR, which is currently 32. 
    I tried increasing this, but the dom0 now crashes 
    in entry.S on init. Even after rebuilding both xen and the kernel
    completely from scratch after a make distclean. Help!!

Prepared with the assistance of my colleague Magnus Damm

Signed-Off-By: Horms <horms@xxxxxxxxxxxx>

--- from-0001/linux-2.6-xen-sparse/arch/i386/Kconfig
+++ to-work/linux-2.6-xen-sparse/arch/i386/Kconfig      2006-04-03 
15:13:38.000000000 +0900
@@ -726,7 +726,7 @@ source kernel/Kconfig.hz
 
 config KEXEC
        bool "kexec system call (EXPERIMENTAL)"
-       depends on EXPERIMENTAL && !X86_XEN
+       depends on EXPERIMENTAL
        help
          kexec is a system call that implements the ability to shutdown your
          current kernel, and to start another kernel.  It is like a reboot
--- /dev/null
+++ to-work/linux-2.6-xen-sparse/arch/i386/kernel/crash-xen.c   2006-04-03 
15:13:38.000000000 +0900
@@ -0,0 +1,15 @@
+/*
+ * Architecture specific (i386-xen) functions for kexec based crash dumps.
+ *
+ * Created by: Horms <horms@xxxxxxxxxxxx>
+ *
+ */
+
+#include <linux/kernel.h> /* For printk */
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+       /* XXX: This should do something */
+       printk("xen-kexec: Need to turn of other CPUS in "
+              "machine_crash_shutdown()\n");
+}
--- /dev/null
+++ to-work/linux-2.6-xen-sparse/arch/i386/kernel/machine_kexec-xen.c   
2006-04-07 12:59:51.000000000 +0900
@@ -0,0 +1,80 @@
+/*
+ * machine_kexec-xen.c - handle transition of Linux booting another kernel
+ *
+ * Created By: Horms <horms@xxxxxxxxxxxx>
+ *
+ * Losely based on arch/i386/kernel/machine_kexec-xen.c
+ */
+
+#include <linux/kexec.h>
+#include <xen/interface/kexec.h>
+#include <linux/mm.h>
+#include <asm/hypercall.h>
+
+const extern unsigned char relocate_new_kernel[];
+extern unsigned int relocate_new_kernel_size;
+static kexec_arg_t hypercall_arg;
+
+/*
+ * A architecture hook called to validate the
+ * proposed image and prepare the control pages
+ * as needed.  The pages for KEXEC_CONTROL_CODE_SIZE
+ * have been allocated, but the segments have yet
+ * been copied into the kernel.
+ *
+ * Do what every setup is needed on image and the
+ * reboot code buffer to allow us to avoid allocations
+ * later.
+ *
+ * Currently nothing.
+ */
+int machine_kexec_prepare(struct kimage *image)
+{
+    return 0;
+}
+
+/*
+ * Undo anything leftover by machine_kexec_prepare
+ * when an image is freed.
+ */
+void machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+/*
+ * Do not allocate memory (or fail in any way) in machine_kexec().
+ * We are past the point of no return, committed to rebooting now.
+ */
+NORET_TYPE void machine_kexec(struct kimage *image)
+{
+       kimage_entry_t *ptr, entry;
+
+       /* 
+        * Translate addresses inside head from physcical to machine
+        * In practice, this only needs to change the pointer to
+        * indirection pages as non-indirected pages are relative.
+        */
+       ptr = &image->head;
+       while ((entry = *ptr) && !(entry & IND_DONE)) {
+               if (!(entry & IND_DESTINATION))
+                       *ptr = phys_to_machine(entry & PAGE_MASK) |
+                               (entry & ~PAGE_MASK);
+
+               if (entry & IND_INDIRECTION)
+                       ptr = __va(entry & PAGE_MASK);
+               else
+                       ptr++;
+       }
+
+       /* Set up arguments to hypercall */
+       hypercall_arg.u.kexec.indirection_page = image->head;
+       hypercall_arg.u.kexec.reboot_code_buffer = 
+               pfn_to_mfn(page_to_pfn(image->control_code_page)) << PAGE_SHIFT;
+       hypercall_arg.u.kexec.start_address = image->start;
+       hypercall_arg.u.kexec.relocate_new_kernel = relocate_new_kernel;
+       hypercall_arg.u.kexec.relocate_new_kernel_size = 
+               relocate_new_kernel_size;
+
+       /* Let Xen do the rest of the work */
+       HYPERVISOR_kexec(KEXEC_CMD_kexec, &hypercall_arg);
+}
--- from-0001/linux-2.6-xen-sparse/drivers/xen/core/reboot.c
+++ to-work/linux-2.6-xen-sparse/drivers/xen/core/reboot.c      2006-04-03 
15:13:38.000000000 +0900
@@ -38,6 +38,11 @@ extern void ctrl_alt_del(void);
  */
 #define SHUTDOWN_HALT      4
 
+void machine_shutdown(void) 
+{
+       printk("machine_shutdown: does nothing\n");
+}
+
 void machine_emergency_restart(void)
 {
        /* We really want to get pending console data out before we die. */
--- from-0001/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h
+++ to-work/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h      
2006-04-06 11:00:03.000000000 +0900
@@ -37,6 +37,8 @@
 # error "please don't include this file directly"
 #endif
 
+#include <xen/interface/kexec.h>
+
 #define __STR(x) #x
 #define STR(x) __STR(x)
 
@@ -329,6 +331,13 @@ HYPERVISOR_nmi_op(
        return _hypercall2(int, nmi_op, op, arg);
 }
 
+static inline int
+HYPERVISOR_kexec(
+       unsigned long op, kexec_arg_t * arg)
+{
+       return _hypercall2(int, kexec_op, op, arg); 
+}
+
 #endif /* __HYPERCALL_H__ */
 
 /*
バイナリー・ファイル/dev/nullとto-work/linux-2.6.16-xen/kernel/.kexec.c.swpは違います
--- from-0001/xen/arch/x86/x86_32/Makefile
+++ to-work/xen/arch/x86/x86_32/Makefile        2006-04-03 16:25:31.000000000 
+0900
@@ -5,6 +5,7 @@ obj-y += entry.o
 obj-y += mm.o
 obj-y += seg_fixup.o
 obj-y += traps.o
+obj-y += machine_kexec.o
 
 obj-$(supervisor_mode_kernel) += supervisor_mode_kernel.o
 
--- from-0001/xen/arch/x86/x86_32/entry.S
+++ to-work/xen/arch/x86/x86_32/entry.S 2006-04-04 13:02:36.000000000 +0900
@@ -648,6 +648,7 @@ ENTRY(hypercall_table)
         .long do_acm_op
         .long do_nmi_op
         .long do_arch_sched_op
+        .long do_kexec             /* 30 */
         .rept NR_hypercalls-((.-hypercall_table)/4)
         .long do_ni_hypercall
         .endr
@@ -683,6 +684,7 @@ ENTRY(hypercall_args_table)
         .byte 1 /* do_acm_op            */
         .byte 2 /* do_nmi_op            */
         .byte 2 /* do_arch_sched_op     */
+        .byte 2 /* do_kexec             */  /* 30 */
         .rept NR_hypercalls-(.-hypercall_args_table)
         .byte 0 /* do_ni_hypercall      */
         .endr
--- /dev/null
+++ to-work/xen/arch/x86/x86_32/machine_kexec.c 2006-04-07 12:44:16.000000000 
+0900
@@ -0,0 +1,168 @@
+/******************************************************************************
+ * arch/x86/machine_kexec.c
+ * 
+ * Created By: Horms
+ *
+ * Based heavily on arch/i386/machine_kexec.c from Linux 2.6.16
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/domain_page.h> 
+#include <xen/timer.h>
+#include <xen/sched.h>
+#include <asm/page.h> 
+#include <asm/flushtlb.h>
+#include <public/xen.h>
+#include <public/kexec.h>
+
+typedef asmlinkage void (*relocate_new_kernel_t)(
+                    unsigned long indirection_page,
+                    unsigned long reboot_code_buffer,
+                    unsigned long start_address,
+                    unsigned int has_pae);
+
+#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
+
+#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define L2_ATTR (_PAGE_PRESENT)
+
+#ifndef CONFIG_X86_PAE
+
+static u32 pgtable_level1[L1_PAGETABLE_ENTRIES] PAGE_ALIGNED;
+
+static void identity_map_page(unsigned long address)
+{
+    unsigned long mfn;
+    u32 *pgtable_level2;
+
+    /* Find the current page table */
+    mfn = read_cr3() >> PAGE_SHIFT;
+    pgtable_level2 = map_domain_page(mfn);
+
+    /* Identity map the page table entry */
+    pgtable_level1[l1_table_offset(address)] = address | L0_ATTR;
+    pgtable_level2[l2_table_offset(address)] = __pa(pgtable_level1) | L1_ATTR;
+
+    /* Flush the tlb so the new mapping takes effect.
+     * Global tlb entries are not flushed but that is not an issue.
+     */
+    write_cr3(mfn << PAGE_SHIFT);
+
+    unmap_domain_page(pgtable_level2);
+}
+
+#else
+static u64 pgtable_level1[L1_PAGETABLE_ENTRIES] PAGE_ALIGNED;
+static u64 pgtable_level2[L2_PAGETABLE_ENTRIES] PAGE_ALIGNED;
+
+static void identity_map_page(unsigned long address)
+{
+    int mfn;
+    intpte_t *pgtable_level3;
+
+    /* Find the current page table */
+    mfn = read_cr3() >> PAGE_SHIFT;
+    pgtable_level3 = map_domain_page(mfn);
+
+    /* Identity map the page table entry */
+    pgtable_level1[l1_table_offset(address)] = address | L0_ATTR;
+    pgtable_level2[l2_table_offset(address)] = __pa(pgtable_level1) | L1_ATTR;
+    set_64bit(&pgtable_level3[l3_table_offset(address)],
+             __pa(pgtable_level2) | L2_ATTR);
+
+    /* Flush the tlb so the new mapping takes effect.
+     * Global tlb entries are not flushed but that is not an issue.
+     */
+    load_cr3(mfn << PAGE_SHIFT);
+
+    unmap_domain_page(pgtable_level3);
+}
+#endif
+
+static void kexec_load_segments(void)
+{
+#define __SSTR(X) #X
+#define SSTR(X) __SSTR(X)
+    __asm__ __volatile__ (
+        "\tljmp $"SSTR(__HYPERVISOR_CS)",$1f\n"
+        "\t1:\n"
+        "\tmovl $"SSTR(__HYPERVISOR_DS)",%%eax\n"
+        "\tmovl %%eax,%%ds\n"
+        "\tmovl %%eax,%%es\n"
+        "\tmovl %%eax,%%fs\n"
+        "\tmovl %%eax,%%gs\n"
+        "\tmovl %%eax,%%ss\n"
+        ::: "eax", "memory");
+#undef SSTR
+#undef __SSTR
+}
+
+#define kexec_load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr))
+static void kexec_set_idt(void *newidt, __u16 limit)
+{
+    struct Xgt_desc_struct curidt;
+
+    /* ia32 supports unaliged loads & stores */
+    curidt.size    = limit;
+    curidt.address = (unsigned long)newidt;
+    
+    kexec_load_idt(&curidt);
+
+};
+
+#define kexec_load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr))
+static void kexec_set_gdt(void *newgdt, __u16 limit)
+{
+    struct Xgt_desc_struct curgdt;
+
+    /* ia32 supports unaligned loads & stores */
+    curgdt.size    = limit;
+    curgdt.address = (unsigned long)newgdt;
+
+    kexec_load_gdt(&curgdt);
+};
+
+int machine_kexec_prepare(struct kexec_arg *arg)
+{
+       return 0;
+}
+
+void machine_kexec_cleanup(struct kexec_arg *arg)
+{
+}
+
+void machine_kexec(struct kexec_arg *arg)
+{
+    relocate_new_kernel_t rnk;
+
+    local_irq_disable();
+
+    identity_map_page(arg->u.kexec.reboot_code_buffer);
+
+    copy_from_user((void *)arg->u.kexec.reboot_code_buffer, 
+           arg->u.kexec.relocate_new_kernel,
+           arg->u.kexec.relocate_new_kernel_size);
+
+    kexec_load_segments();
+
+    kexec_set_gdt(__va(0),0);
+
+    kexec_set_idt(__va(0),0);
+
+    rnk = (relocate_new_kernel_t) arg->u.kexec.reboot_code_buffer;
+
+    (*rnk)(arg->u.kexec.indirection_page, arg->u.kexec.reboot_code_buffer, 
+           arg->u.kexec.start_address, cpu_has_pae);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- from-0001/xen/common/Makefile
+++ to-work/xen/common/Makefile 2006-04-03 15:13:38.000000000 +0900
@@ -24,6 +24,7 @@ obj-y += trace.o
 obj-y += timer.o
 obj-y += vsprintf.o
 obj-y += xmalloc.o
+obj-y += kexec.o
 
 obj-$(perfc)       += perfc.o
 obj-$(crash_debug) += gdbstub.o
--- /dev/null
+++ to-work/xen/common/kexec.c  2006-04-07 13:06:54.000000000 +0900
@@ -0,0 +1,54 @@
+/*
+ * Achitecture independent kexec code for Xen
+ *
+ * At this statge, just a switch for the kexec hypercall into
+ * architecture dependent code.
+ *
+ * Created By: Horms <horms@xxxxxxxxxxxx>
+ */
+
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include <xen/guest_access.h>
+#include <public/xen.h>
+#include <public/kexec.h>
+
+extern int machine_kexec_prepare(struct kexec_arg *arg);
+extern void machine_kexec_cleanup(struct kexec_arg *arg);
+extern void machine_kexec(struct kexec_arg *arg);
+
+int do_kexec(unsigned long op, 
+             GUEST_HANDLE(kexec_arg_t) uarg)
+{
+    struct kexec_arg arg;
+
+    if ( unlikely(copy_from_guest(&arg, uarg, 1) != 0) )
+    {
+        printk("do_kexec: __copy_from_guest failed");
+        return -EFAULT;
+    }
+
+       switch(op) {
+       case KEXEC_CMD_kexec:
+               machine_kexec(&arg);
+        return -EINVAL; /* Not Reached */
+       case KEXEC_CMD_kexec_prepare:
+               return machine_kexec_prepare(&arg);
+       case KEXEC_CMD_kexec_cleanup:
+               machine_kexec_cleanup(&arg);
+               return 0;
+       }
+
+       return -EINVAL;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
--- from-0001/xen/include/asm-x86/hypercall.h
+++ to-work/xen/include/asm-x86/hypercall.h     2006-04-07 13:05:06.000000000 
+0900
@@ -6,6 +6,7 @@
 #define __ASM_X86_HYPERCALL_H__
 
 #include <public/physdev.h>
+#include <public/kexec.h>
 
 extern long
 do_set_trap_table(
@@ -79,6 +80,11 @@ extern long
 arch_do_vcpu_op(
     int cmd, struct vcpu *v, GUEST_HANDLE(void) arg);
 
+extern int
+do_kexec(
+    unsigned long op, 
+    GUEST_HANDLE(kexec_arg_t) uarg);
+
 #ifdef __x86_64__
 
 extern long
--- /dev/null
+++ to-work/xen/include/public/kexec.h  2006-04-07 12:44:43.000000000 +0900
@@ -0,0 +1,39 @@
+/*
+ * kexec.h: Xen kexec
+ *
+ * Created By: Horms <horms@xxxxxxxxxxxx>
+ */
+
+#ifndef _XEN_PUBLIC_KEXEC_H
+#define _XEN_PUBLIC_KEXEC_H
+
+/*
+ * Scratch space for passing arguments to the kexec hypercall
+ */
+typedef struct kexec_arg {
+    union {
+        struct {
+            unsigned long data; /* Not sure what this should be yet */
+        } helper;
+        struct {
+            unsigned long indirection_page;
+            unsigned long reboot_code_buffer;
+            unsigned long start_address;
+            const char *relocate_new_kernel;
+            unsigned int relocate_new_kernel_size;
+        } kexec;
+    } u;
+} kexec_arg_t;
+DEFINE_GUEST_HANDLE(kexec_arg_t);
+
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- from-0001/xen/include/public/xen.h
+++ to-work/xen/include/public/xen.h    2006-04-04 13:29:54.000000000 +0900
@@ -60,6 +60,7 @@
 #define __HYPERVISOR_acm_op               27
 #define __HYPERVISOR_nmi_op               28
 #define __HYPERVISOR_sched_op             29
+#define __HYPERVISOR_kexec_op             30
 
 /* 
  * VIRTUAL INTERRUPTS
@@ -206,6 +207,13 @@ DEFINE_GUEST_HANDLE(mmuext_op_t);
 #define VMASST_TYPE_writable_pagetables  2
 #define MAX_VMASST_TYPE 2
 
+/*
+ * Commands to HYPERVISOR_kexec().
+ */
+#define KEXEC_CMD_kexec                 0
+#define KEXEC_CMD_kexec_prepare         1
+#define KEXEC_CMD_kexec_cleanup         2
+
 #ifndef __ASSEMBLY__
 
 typedef uint16_t domid_t;
diff -r 0010df11836d buildconfigs/linux-defconfig_xen_x86_32
--- a/buildconfigs/linux-defconfig_xen_x86_32   Fri Apr  7 00:32:54 2006 +0100
+++ b/buildconfigs/linux-defconfig_xen_x86_32   Fri Apr  7 14:54:45 2006 +0900
@@ -184,6 +184,7 @@ CONFIG_HZ_100=y
 # CONFIG_HZ_250 is not set
 # CONFIG_HZ_1000 is not set
 CONFIG_HZ=100
+CONFIG_KEXEC=y
 # CONFIG_CRASH_DUMP is not set
 CONFIG_PHYSICAL_START=0x100000
 CONFIG_HOTPLUG_CPU=y

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel