# HG changeset patch
# User Ian Campbell <ian.campbell@xxxxxxxxxxxxx>
# Node ID 5d6be0099bdfebb260480176a2e051d83b18d3ed
# Parent 88935ae47fa92feeacaf25bf2c07770206ac14fa
# Parent 2a17ff9b8ffc7e8d52092b57bd48ab42876fab2d
merge
---
buildconfigs/linux-defconfig_xen_x86_32
| 2
buildconfigs/linux-defconfig_xen_x86_64
| 1
linux-2.6-xen-sparse/arch/i386/Kconfig
| 2
linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c
| 19
linux-2.6-xen-sparse/arch/x86_64/Kconfig
| 2
linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c
| 6
linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c
| 13
linux-2.6-xen-sparse/drivers/xen/core/Makefile
| 1
linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c
| 170 ++++
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h
| 8
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h
| 7
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/ptrace.h
| 2
patches/linux-2.6.16.33/git-2a8a3d5b65e86ec1dfef7d268c64a909eab94af7.patch
| 50 +
patches/linux-2.6.16.33/git-2efe55a9cec8418f0e0cde3dc3787a42fddc4411.patch
| 75 ++
patches/linux-2.6.16.33/git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch
| 197 +++++
patches/linux-2.6.16.33/git-4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f.patch
| 156 ++++
patches/linux-2.6.16.33/git-dbaab49f92ff6ae6255762a948375e4036cbdbd2.patch
| 44 +
patches/linux-2.6.16.33/kexec-generic.patch
| 167 ++++
patches/linux-2.6.16.33/linux-2.6.19-rc1-kexec-move_segment_code-i386.patch
| 114 +++
patches/linux-2.6.16.33/linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch
| 114 +++
patches/linux-2.6.16.33/linux-2.6.19-rc1-kexec-xen-i386.patch
| 49 +
patches/linux-2.6.16.33/linux-2.6.19-rc1-kexec-xen-x86_64.patch
| 88 ++
patches/linux-2.6.16.33/series
| 10
xen/arch/ia64/xen/Makefile
| 2
xen/arch/ia64/xen/crash.c
| 19
xen/arch/ia64/xen/machine_kexec.c
| 34
xen/arch/powerpc/Makefile
| 2
xen/arch/powerpc/crash.c
| 19
xen/arch/powerpc/machine_kexec.c
| 34
xen/arch/x86/Makefile
| 2
xen/arch/x86/crash.c
| 128 +++
xen/arch/x86/machine_kexec.c
| 105 ++
xen/arch/x86/setup.c
| 76 +-
xen/arch/x86/traps.c
| 2
xen/arch/x86/x86_32/entry.S
| 2
xen/arch/x86/x86_64/entry.S
| 2
xen/common/Makefile
| 1
xen/common/kexec.c
| 359 ++++++++++
xen/common/page_alloc.c
| 37 -
xen/drivers/char/console.c
| 3
xen/include/asm-ia64/elf.h
| 30
xen/include/asm-ia64/kexec.h
| 25
xen/include/asm-powerpc/elf.h
| 30
xen/include/asm-powerpc/kexec.h
| 25
xen/include/asm-x86/elf.h
| 24
xen/include/asm-x86/fixmap.h
| 4
xen/include/asm-x86/hypercall.h
| 5
xen/include/asm-x86/kexec.h
| 20
xen/include/asm-x86/x86_32/elf.h
| 72 ++
xen/include/asm-x86/x86_32/kexec.h
| 40 +
xen/include/asm-x86/x86_64/elf.h
| 92 ++
xen/include/asm-x86/x86_64/kexec.h
| 39 +
xen/include/public/elfnote.h
| 19
xen/include/public/kexec.h
| 138 +++
xen/include/xen/elf.h
| 20
xen/include/xen/elfcore.h
| 140 +++
xen/include/xen/hypercall.h
| 6
xen/include/xen/kexec.h
| 43 +
xen/include/xen/mm.h
| 1
59 files changed, 2856 insertions(+), 41 deletions(-)
diff -r 88935ae47fa9 -r 5d6be0099bdf buildconfigs/linux-defconfig_xen_x86_32
--- a/buildconfigs/linux-defconfig_xen_x86_32 Thu Nov 30 10:57:28 2006 +0000
+++ b/buildconfigs/linux-defconfig_xen_x86_32 Thu Nov 30 13:05:27 2006 +0000
@@ -184,6 +184,7 @@ CONFIG_REGPARM=y
CONFIG_REGPARM=y
CONFIG_SECCOMP=y
CONFIG_HZ_100=y
+CONFIG_KEXEC=y
# CONFIG_HZ_250 is not set
# CONFIG_HZ_1000 is not set
CONFIG_HZ=100
@@ -2776,6 +2777,7 @@ CONFIG_NTFS_FS=m
#
CONFIG_PROC_FS=y
CONFIG_PROC_KCORE=y
+# CONFIG_PROC_VMCORE is not set
CONFIG_SYSFS=y
CONFIG_TMPFS=y
# CONFIG_HUGETLB_PAGE is not set
diff -r 88935ae47fa9 -r 5d6be0099bdf buildconfigs/linux-defconfig_xen_x86_64
--- a/buildconfigs/linux-defconfig_xen_x86_64 Thu Nov 30 10:57:28 2006 +0000
+++ b/buildconfigs/linux-defconfig_xen_x86_64 Thu Nov 30 13:05:27 2006 +0000
@@ -139,6 +139,7 @@ CONFIG_PHYSICAL_START=0x100000
CONFIG_PHYSICAL_START=0x100000
CONFIG_SECCOMP=y
CONFIG_HZ_100=y
+CONFIG_KEXEC=y
# CONFIG_HZ_250 is not set
# CONFIG_HZ_1000 is not set
CONFIG_HZ=100
diff -r 88935ae47fa9 -r 5d6be0099bdf linux-2.6-xen-sparse/arch/i386/Kconfig
--- a/linux-2.6-xen-sparse/arch/i386/Kconfig Thu Nov 30 10:57:28 2006 +0000
+++ b/linux-2.6-xen-sparse/arch/i386/Kconfig Thu Nov 30 13:05:27 2006 +0000
@@ -726,7 +726,7 @@ source kernel/Kconfig.hz
config KEXEC
bool "kexec system call (EXPERIMENTAL)"
- depends on EXPERIMENTAL && !X86_XEN
+ depends on EXPERIMENTAL && !XEN_UNPRIVILEGED_GUEST
help
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
diff -r 88935ae47fa9 -r 5d6be0099bdf
linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Thu Nov 30 10:57:28
2006 +0000
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Thu Nov 30 13:05:27
2006 +0000
@@ -68,6 +68,10 @@
#include <xen/xencons.h>
#include "setup_arch_pre.h"
#include <bios_ebda.h>
+
+#ifdef CONFIG_XEN
+#include <xen/interface/kexec.h>
+#endif
/* Forward Declaration. */
void __init find_max_pfn(void);
@@ -943,6 +947,7 @@ static void __init parse_cmdline_early (
* after a kernel panic.
*/
else if (!memcmp(from, "crashkernel=", 12)) {
+#ifndef CONFIG_XEN
unsigned long size, base;
size = memparse(from+12, &from);
if (*from == '@') {
@@ -953,6 +958,10 @@ static void __init parse_cmdline_early (
crashk_res.start = base;
crashk_res.end = base + size - 1;
}
+#else
+ printk("Ignoring crashkernel command line, "
+ "parameter will be supplied by xen\n");
+#endif
}
#endif
#ifdef CONFIG_PROC_VMCORE
@@ -1322,9 +1331,13 @@ void __init setup_bootmem_allocator(void
}
#endif
#ifdef CONFIG_KEXEC
+#ifdef CONFIG_XEN
+ xen_machine_kexec_setup_resources();
+#else
if (crashk_res.start != crashk_res.end)
reserve_bootmem(crashk_res.start,
crashk_res.end - crashk_res.start + 1);
+#endif
#endif
if (!xen_feature(XENFEAT_auto_translated_physmap))
@@ -1389,7 +1402,11 @@ legacy_init_iomem_resources(struct e820e
request_resource(res, data_resource);
#endif
#ifdef CONFIG_KEXEC
- request_resource(res, &crashk_res);
+ if (crashk_res.start != crashk_res.end)
+ request_resource(res, &crashk_res);
+#ifdef CONFIG_XEN
+ xen_machine_kexec_register_resources(res);
+#endif
#endif
}
}
diff -r 88935ae47fa9 -r 5d6be0099bdf linux-2.6-xen-sparse/arch/x86_64/Kconfig
--- a/linux-2.6-xen-sparse/arch/x86_64/Kconfig Thu Nov 30 10:57:28 2006 +0000
+++ b/linux-2.6-xen-sparse/arch/x86_64/Kconfig Thu Nov 30 13:05:27 2006 +0000
@@ -435,7 +435,7 @@ config X86_MCE_AMD
config KEXEC
bool "kexec system call (EXPERIMENTAL)"
- depends on EXPERIMENTAL && !X86_64_XEN
+ depends on EXPERIMENTAL && !XEN_UNPRIVILEGED_GUEST
help
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
diff -r 88935ae47fa9 -r 5d6be0099bdf
linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c Thu Nov 30
10:57:28 2006 +0000
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c Thu Nov 30
13:05:27 2006 +0000
@@ -260,7 +260,11 @@ void __init e820_reserve_resources(struc
request_resource(res, &data_resource);
#endif
#ifdef CONFIG_KEXEC
- request_resource(res, &crashk_res);
+ if (crashk_res.start != crashk_res.end)
+ request_resource(res, &crashk_res);
+#ifdef CONFIG_XEN
+ xen_machine_kexec_register_resources(res);
+#endif
#endif
}
}
diff -r 88935ae47fa9 -r 5d6be0099bdf
linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Thu Nov 30
10:57:28 2006 +0000
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Thu Nov 30
13:05:27 2006 +0000
@@ -80,6 +80,10 @@
#include <asm/mach-xen/setup_arch_post.h>
#include <xen/interface/memory.h>
+#ifdef CONFIG_XEN
+#include <xen/interface/kexec.h>
+#endif
+
extern unsigned long start_pfn;
extern struct edid_info edid_info;
@@ -450,6 +454,7 @@ static __init void parse_cmdline_early (
* after a kernel panic.
*/
else if (!memcmp(from, "crashkernel=", 12)) {
+#ifndef CONFIG_XEN
unsigned long size, base;
size = memparse(from+12, &from);
if (*from == '@') {
@@ -460,6 +465,10 @@ static __init void parse_cmdline_early (
crashk_res.start = base;
crashk_res.end = base + size - 1;
}
+#else
+ printk("Ignoring crashkernel command line, "
+ "parameter will be supplied by xen\n");
+#endif
}
#endif
@@ -812,10 +821,14 @@ void __init setup_arch(char **cmdline_p)
#endif
#endif /* !CONFIG_XEN */
#ifdef CONFIG_KEXEC
+#ifdef CONFIG_XEN
+ xen_machine_kexec_setup_resources();
+#else
if (crashk_res.start != crashk_res.end) {
reserve_bootmem(crashk_res.start,
crashk_res.end - crashk_res.start + 1);
}
+#endif
#endif
paging_init();
diff -r 88935ae47fa9 -r 5d6be0099bdf
linux-2.6-xen-sparse/drivers/xen/core/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/core/Makefile Thu Nov 30 10:57:28
2006 +0000
+++ b/linux-2.6-xen-sparse/drivers/xen/core/Makefile Thu Nov 30 13:05:27
2006 +0000
@@ -11,3 +11,4 @@ obj-$(CONFIG_XEN_SKBUFF) += skbuff.o
obj-$(CONFIG_XEN_SKBUFF) += skbuff.o
obj-$(CONFIG_XEN_REBOOT) += reboot.o machine_reboot.o
obj-$(CONFIG_XEN_SMPBOOT) += smpboot.o
+obj-$(CONFIG_KEXEC) += machine_kexec.o
diff -r 88935ae47fa9 -r 5d6be0099bdf
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h Thu Nov
30 10:57:28 2006 +0000
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h Thu Nov
30 13:05:27 2006 +0000
@@ -395,5 +395,13 @@ HYPERVISOR_xenoprof_op(
return _hypercall2(int, xenoprof_op, op, arg);
}
+static inline int
+HYPERVISOR_kexec_op(
+ unsigned long op, void *args)
+{
+ return _hypercall2(int, kexec_op, op, args);
+}
+
+
#endif /* __HYPERCALL_H__ */
diff -r 88935ae47fa9 -r 5d6be0099bdf
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h Thu Nov
30 10:57:28 2006 +0000
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h Thu Nov
30 13:05:27 2006 +0000
@@ -396,4 +396,11 @@ HYPERVISOR_xenoprof_op(
return _hypercall2(int, xenoprof_op, op, arg);
}
+static inline int
+HYPERVISOR_kexec_op(
+ unsigned long op, void *args)
+{
+ return _hypercall2(int, kexec_op, op, args);
+}
+
#endif /* __HYPERCALL_H__ */
diff -r 88935ae47fa9 -r 5d6be0099bdf
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/ptrace.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/ptrace.h Thu Nov
30 10:57:28 2006 +0000
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/ptrace.h Thu Nov
30 13:05:27 2006 +0000
@@ -90,6 +90,8 @@ extern unsigned long profile_pc(struct p
#define profile_pc(regs) instruction_pointer(regs)
#endif
+#include <linux/compiler.h>
+
void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
struct task_struct;
diff -r 88935ae47fa9 -r 5d6be0099bdf patches/linux-2.6.16.33/series
--- a/patches/linux-2.6.16.33/series Thu Nov 30 10:57:28 2006 +0000
+++ b/patches/linux-2.6.16.33/series Thu Nov 30 13:05:27 2006 +0000
@@ -1,3 +1,12 @@ blktap-aio-16_03_06.patch
+kexec-generic.patch
+git-2efe55a9cec8418f0e0cde3dc3787a42fddc4411.patch
+git-2a8a3d5b65e86ec1dfef7d268c64a909eab94af7.patch
+git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch
+linux-2.6.19-rc1-kexec-move_segment_code-i386.patch
+linux-2.6.19-rc1-kexec-xen-i386.patch
+git-4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f.patch
+linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch
+linux-2.6.19-rc1-kexec-xen-x86_64.patch
blktap-aio-16_03_06.patch
device_bind.patch
fix-hz-suspend.patch
@@ -22,6 +31,7 @@ xenoprof-generic.patch
xenoprof-generic.patch
x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch
x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch
+git-dbaab49f92ff6ae6255762a948375e4036cbdbd2.patch
x86-elfnote-as-preprocessor-macro.patch
vsnprintf.patch
kasprintf.patch
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/arch/ia64/xen/Makefile
--- a/xen/arch/ia64/xen/Makefile Thu Nov 30 10:57:28 2006 +0000
+++ b/xen/arch/ia64/xen/Makefile Thu Nov 30 13:05:27 2006 +0000
@@ -1,3 +1,5 @@ obj-y += acpi.o
+obj-y += machine_kexec.o
+obj-y += crash.o
obj-y += acpi.o
obj-y += dom0_ops.o
obj-y += domain.o
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/arch/powerpc/Makefile
--- a/xen/arch/powerpc/Makefile Thu Nov 30 10:57:28 2006 +0000
+++ b/xen/arch/powerpc/Makefile Thu Nov 30 13:05:27 2006 +0000
@@ -40,6 +40,8 @@ obj-y += sysctl.o
obj-y += sysctl.o
obj-y += time.o
obj-y += usercopy.o
+obj-y += machine_kexec.o
+obj-y += crash.o
obj-$(debug) += 0opt.o
obj-$(crash_debug) += gdbstub.o
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile Thu Nov 30 10:57:28 2006 +0000
+++ b/xen/arch/x86/Makefile Thu Nov 30 13:05:27 2006 +0000
@@ -43,6 +43,8 @@ obj-y += traps.o
obj-y += traps.o
obj-y += usercopy.o
obj-y += x86_emulate.o
+obj-y += machine_kexec.o
+obj-y += crash.o
obj-$(crash_debug) += gdbstub.o
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c Thu Nov 30 10:57:28 2006 +0000
+++ b/xen/arch/x86/setup.c Thu Nov 30 13:05:27 2006 +0000
@@ -27,6 +27,7 @@
#include <asm/shadow.h>
#include <asm/e820.h>
#include <acm/acm_hooks.h>
+#include <xen/kexec.h>
extern void dmi_scan_machine(void);
extern void generic_apic_probe(void);
@@ -273,6 +274,20 @@ static void srat_detect_node(int cpu)
printk(KERN_INFO "CPU %d APIC %d -> Node %d\n", cpu, apicid, node);
}
+void __init move_memory(unsigned long dst,
+ unsigned long src_start, unsigned long src_end)
+{
+#if defined(CONFIG_X86_32)
+ memmove((void *)dst, /* use low mapping */
+ (void *)src_start, /* use low mapping */
+ src_end - src_start);
+#elif defined(CONFIG_X86_64)
+ memmove(__va(dst),
+ __va(src_start),
+ src_end - src_start);
+#endif
+}
+
void __init __start_xen(multiboot_info_t *mbi)
{
char __cmdline[] = "", *cmdline = __cmdline;
@@ -284,6 +299,7 @@ void __init __start_xen(multiboot_info_t
unsigned long nr_pages, modules_length;
paddr_t s, e;
int i, e820_warn = 0, e820_raw_nr = 0, bytes = 0;
+ xen_kexec_reserve_t crash_area;
struct ns16550_defaults ns16550 = {
.data_bits = 8,
.parity = 'n',
@@ -415,15 +431,8 @@ void __init __start_xen(multiboot_info_t
initial_images_start = xenheap_phys_end;
initial_images_end = initial_images_start + modules_length;
-#if defined(CONFIG_X86_32)
- memmove((void *)initial_images_start, /* use low mapping */
- (void *)mod[0].mod_start, /* use low mapping */
- mod[mbi->mods_count-1].mod_end - mod[0].mod_start);
-#elif defined(CONFIG_X86_64)
- memmove(__va(initial_images_start),
- __va(mod[0].mod_start),
- mod[mbi->mods_count-1].mod_end - mod[0].mod_start);
-#endif
+ move_memory(initial_images_start,
+ mod[0].mod_start, mod[mbi->mods_count-1].mod_end);
/* Initialise boot-time allocator with all RAM situated after modules. */
xenheap_phys_start = init_boot_allocator(__pa(&_end));
@@ -471,10 +480,57 @@ void __init __start_xen(multiboot_info_t
#endif
}
+ machine_kexec_reserved(&crash_area);
+ if ( crash_area.size > 0 )
+ {
+ unsigned long kdump_start, kdump_size, k;
+
+ /* Mark images pages as free for now. */
+
+ init_boot_pages(initial_images_start, initial_images_end);
+
+ kdump_start = crash_area.start;
+ kdump_size = crash_area.size;
+
+ printk("Kdump: %luMB (%lukB) at 0x%lx\n",
+ kdump_size >> 20,
+ kdump_size >> 10,
+ kdump_start);
+
+ if ( (kdump_start & ~PAGE_MASK) || (kdump_size & ~PAGE_MASK) )
+ panic("Kdump parameters not page aligned\n");
+
+ kdump_start >>= PAGE_SHIFT;
+ kdump_size >>= PAGE_SHIFT;
+
+ /* allocate pages for Kdump memory area */
+
+ k = alloc_boot_pages_at(kdump_size, kdump_start);
+
+ if ( k != kdump_start )
+ panic("Unable to reserve Kdump memory\n");
+
+ /* allocate pages for relocated initial images */
+
+ k = ((initial_images_end - initial_images_start) & ~PAGE_MASK) ? 1 : 0;
+ k += (initial_images_end - initial_images_start) >> PAGE_SHIFT;
+
+ k = alloc_boot_pages(k, 1);
+
+ if ( !k )
+ panic("Unable to allocate initial images memory\n");
+
+ move_memory(k << PAGE_SHIFT, initial_images_start, initial_images_end);
+
+ initial_images_end -= initial_images_start;
+ initial_images_start = k << PAGE_SHIFT;
+ initial_images_end += initial_images_start;
+ }
+
memguard_init();
percpu_guard_areas();
- printk("System RAM: %luMB (%lukB)\n",
+ printk("System RAM: %luMB (%lukB)\n",
nr_pages >> (20 - PAGE_SHIFT),
nr_pages << (PAGE_SHIFT - 10));
total_pages = nr_pages;
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Thu Nov 30 10:57:28 2006 +0000
+++ b/xen/arch/x86/traps.c Thu Nov 30 13:05:27 2006 +0000
@@ -45,6 +45,7 @@
#include <xen/iocap.h>
#include <xen/nmi.h>
#include <xen/version.h>
+#include <xen/kexec.h>
#include <asm/shadow.h>
#include <asm/system.h>
#include <asm/io.h>
@@ -1633,6 +1634,7 @@ static void unknown_nmi_error(unsigned c
printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
printk("Dazed and confused, but trying to continue\n");
printk("Do you have a strange power saving mode enabled?\n");
+ machine_crash_kexec();
}
}
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/arch/x86/x86_32/entry.S
--- a/xen/arch/x86/x86_32/entry.S Thu Nov 30 10:57:28 2006 +0000
+++ b/xen/arch/x86/x86_32/entry.S Thu Nov 30 13:05:27 2006 +0000
@@ -659,6 +659,7 @@ ENTRY(hypercall_table)
.long do_hvm_op
.long do_sysctl /* 35 */
.long do_domctl
+ .long do_kexec_op
.rept NR_hypercalls-((.-hypercall_table)/4)
.long do_ni_hypercall
.endr
@@ -701,6 +702,7 @@ ENTRY(hypercall_args_table)
.byte 2 /* do_hvm_op */
.byte 1 /* do_sysctl */ /* 35 */
.byte 1 /* do_domctl */
+ .byte 2 /* do_kexec_op */
.rept NR_hypercalls-(.-hypercall_args_table)
.byte 0 /* do_ni_hypercall */
.endr
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/arch/x86/x86_64/entry.S
--- a/xen/arch/x86/x86_64/entry.S Thu Nov 30 10:57:28 2006 +0000
+++ b/xen/arch/x86/x86_64/entry.S Thu Nov 30 13:05:27 2006 +0000
@@ -559,6 +559,7 @@ ENTRY(hypercall_table)
.quad do_hvm_op
.quad do_sysctl /* 35 */
.quad do_domctl
+ .quad do_kexec_op
.rept NR_hypercalls-((.-hypercall_table)/8)
.quad do_ni_hypercall
.endr
@@ -601,6 +602,7 @@ ENTRY(hypercall_args_table)
.byte 2 /* do_hvm_op */
.byte 1 /* do_sysctl */ /* 35 */
.byte 1 /* do_domctl */
+ .byte 2 /* do_kexec */
.rept NR_hypercalls-(.-hypercall_args_table)
.byte 0 /* do_ni_hypercall */
.endr
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/common/Makefile
--- a/xen/common/Makefile Thu Nov 30 10:57:28 2006 +0000
+++ b/xen/common/Makefile Thu Nov 30 13:05:27 2006 +0000
@@ -7,6 +7,7 @@ obj-y += grant_table.o
obj-y += grant_table.o
obj-y += kernel.o
obj-y += keyhandler.o
+obj-y += kexec.o
obj-y += lib.o
obj-y += memory.o
obj-y += multicall.o
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/common/page_alloc.c
--- a/xen/common/page_alloc.c Thu Nov 30 10:57:28 2006 +0000
+++ b/xen/common/page_alloc.c Thu Nov 30 13:05:27 2006 +0000
@@ -237,24 +237,35 @@ void init_boot_pages(paddr_t ps, paddr_t
}
}
+unsigned long alloc_boot_pages_at(unsigned long nr_pfns, unsigned long pfn_at)
+{
+ unsigned long i;
+
+ for ( i = 0; i < nr_pfns; i++ )
+ if ( allocated_in_map(pfn_at + i) )
+ break;
+
+ if ( i == nr_pfns )
+ {
+ map_alloc(pfn_at, nr_pfns);
+ return pfn_at;
+ }
+
+ return 0;
+}
+
unsigned long alloc_boot_pages(unsigned long nr_pfns, unsigned long pfn_align)
{
- unsigned long pg, i;
+ unsigned long pg, i = 0;
for ( pg = 0; (pg + nr_pfns) < max_page; pg += pfn_align )
{
- for ( i = 0; i < nr_pfns; i++ )
- if ( allocated_in_map(pg + i) )
- break;
-
- if ( i == nr_pfns )
- {
- map_alloc(pg, nr_pfns);
- return pg;
- }
- }
-
- return 0;
+ i = alloc_boot_pages_at(nr_pfns, pg);
+ if (i != 0)
+ break;
+ }
+
+ return i;
}
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/drivers/char/console.c
--- a/xen/drivers/char/console.c Thu Nov 30 10:57:28 2006 +0000
+++ b/xen/drivers/char/console.c Thu Nov 30 13:05:27 2006 +0000
@@ -27,6 +27,7 @@
#include <xen/guest_access.h>
#include <xen/shutdown.h>
#include <xen/vga.h>
+#include <xen/kexec.h>
#include <asm/current.h>
#include <asm/debugger.h>
#include <asm/io.h>
@@ -865,6 +866,8 @@ void panic(const char *fmt, ...)
debugger_trap_immediate();
+ machine_crash_kexec();
+
if ( opt_noreboot )
{
machine_halt();
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/include/asm-x86/fixmap.h
--- a/xen/include/asm-x86/fixmap.h Thu Nov 30 10:57:28 2006 +0000
+++ b/xen/include/asm-x86/fixmap.h Thu Nov 30 13:05:27 2006 +0000
@@ -16,6 +16,7 @@
#include <asm/apicdef.h>
#include <asm/acpi.h>
#include <asm/page.h>
+#include <xen/kexec.h>
/*
* Here we define all the compile-time 'special' virtual
@@ -36,6 +37,9 @@ enum fixed_addresses {
FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
FIX_HPET_BASE,
FIX_CYCLONE_TIMER,
+ FIX_KEXEC_BASE_0,
+ FIX_KEXEC_BASE_END = FIX_KEXEC_BASE_0 \
+ + ((KEXEC_XEN_NO_PAGES >> 1) * KEXEC_IMAGE_NR) - 1,
__end_of_fixed_addresses
};
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/include/asm-x86/hypercall.h
--- a/xen/include/asm-x86/hypercall.h Thu Nov 30 10:57:28 2006 +0000
+++ b/xen/include/asm-x86/hypercall.h Thu Nov 30 13:05:27 2006 +0000
@@ -6,6 +6,7 @@
#define __ASM_X86_HYPERCALL_H__
#include <public/physdev.h>
+#include <xen/types.h>
extern long
do_event_channel_op_compat(
@@ -87,6 +88,10 @@ arch_do_vcpu_op(
arch_do_vcpu_op(
int cmd, struct vcpu *v, XEN_GUEST_HANDLE(void) arg);
+extern int
+do_kexec(
+ unsigned long op, unsigned arg1, XEN_GUEST_HANDLE(void) uarg);
+
#ifdef __x86_64__
extern long
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/include/public/elfnote.h
--- a/xen/include/public/elfnote.h Thu Nov 30 10:57:28 2006 +0000
+++ b/xen/include/public/elfnote.h Thu Nov 30 13:05:27 2006 +0000
@@ -147,6 +147,25 @@
*/
#define XEN_ELFNOTE_HV_START_LOW 12
+/*
+ * System information exported through crash notes.
+ *
+ * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_INFO
+ * note in case of a system crash. This note will contain various
+ * information about the system, see xen/include/xen/elfcore.h.
+ */
+#define XEN_ELFNOTE_CRASH_INFO 0x1000001
+
+/*
+ * System registers exported through crash notes.
+ *
+ * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_REGS
+ * note per cpu in case of a system crash. This note is architecture
+ * specific and will contain registers not saved in the "CORE" note.
+ * See xen/include/xen/elfcore.h for more information.
+ */
+#define XEN_ELFNOTE_CRASH_REGS 0x1000002
+
#endif /* __XEN_PUBLIC_ELFNOTE_H__ */
/*
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/include/xen/elf.h
--- a/xen/include/xen/elf.h Thu Nov 30 10:57:28 2006 +0000
+++ b/xen/include/xen/elf.h Thu Nov 30 13:05:27 2006 +0000
@@ -452,18 +452,12 @@ unsigned int elf_hash(const unsigned cha
/*
* Note Definitions
*/
-typedef struct {
- Elf32_Word namesz;
- Elf32_Word descsz;
- Elf32_Word type;
-} Elf32_Note;
-
-typedef struct {
- Elf64_Half namesz;
- Elf64_Half descsz;
- Elf64_Half type;
-} Elf64_Note;
-
+
+typedef struct {
+ u32 namesz;
+ u32 descsz;
+ u32 type;
+} Elf_Note; /* same format for both 32-bit and 64-bit ELF */
#if defined(ELFSIZE)
#define CONCAT(x,y) __CONCAT(x,y)
@@ -486,7 +480,6 @@ typedef struct {
#define Elf_Addr Elf32_Addr
#define Elf_Off Elf32_Off
#define Elf_Nhdr Elf32_Nhdr
-#define Elf_Note Elf32_Note
#define ELF_R_SYM ELF32_R_SYM
#define ELF_R_TYPE ELF32_R_TYPE
@@ -511,7 +504,6 @@ typedef struct {
#define Elf_Addr Elf64_Addr
#define Elf_Off Elf64_Off
#define Elf_Nhdr Elf64_Nhdr
-#define Elf_Note Elf64_Note
#define ELF_R_SYM ELF64_R_SYM
#define ELF_R_TYPE ELF64_R_TYPE
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/include/xen/hypercall.h
--- a/xen/include/xen/hypercall.h Thu Nov 30 10:57:28 2006 +0000
+++ b/xen/include/xen/hypercall.h Thu Nov 30 13:05:27 2006 +0000
@@ -102,4 +102,10 @@ do_hvm_op(
unsigned long op,
XEN_GUEST_HANDLE(void) arg);
+extern long
+do_kexec_op(
+ unsigned long op,
+ int arg1,
+ XEN_GUEST_HANDLE(void) arg);
+
#endif /* __XEN_HYPERCALL_H__ */
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/include/xen/mm.h
--- a/xen/include/xen/mm.h Thu Nov 30 10:57:28 2006 +0000
+++ b/xen/include/xen/mm.h Thu Nov 30 13:05:27 2006 +0000
@@ -40,6 +40,7 @@ paddr_t init_boot_allocator(paddr_t bitm
paddr_t init_boot_allocator(paddr_t bitmap_start);
void init_boot_pages(paddr_t ps, paddr_t pe);
unsigned long alloc_boot_pages(unsigned long nr_pfns, unsigned long pfn_align);
+unsigned long alloc_boot_pages_at(unsigned long nr_pfns, unsigned long pfn_at);
void end_boot_allocator(void);
/* Generic allocator. These functions are *not* interrupt-safe. */
diff -r 88935ae47fa9 -r 5d6be0099bdf
linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c Thu Nov 30
13:05:27 2006 +0000
@@ -0,0 +1,170 @@
+/*
+ * drivers/xen/core/machine_kexec.c
+ * handle transition of Linux booting another kernel
+ */
+
+#include <linux/kexec.h>
+#include <xen/interface/kexec.h>
+#include <linux/mm.h>
+#include <linux/bootmem.h>
+#include <asm/hypercall.h>
+
+extern void machine_kexec_setup_load_arg(xen_kexec_image_t *xki,
+ struct kimage *image);
+
+int xen_max_nr_phys_cpus;
+struct resource xen_hypervisor_res;
+struct resource *xen_phys_cpus;
+
+void xen_machine_kexec_setup_resources(void)
+{
+ xen_kexec_range_t range;
+ struct resource *res;
+ int k = 0;
+
+ /* determine maximum number of physical cpus */
+
+ while (1) {
+ memset(&range, 0, sizeof(range));
+ range.range = KEXEC_RANGE_MA_CPU;
+ range.nr = k;
+
+ if (HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range))
+ break;
+
+ k++;
+ }
+
+ xen_max_nr_phys_cpus = k;
+
+ /* allocate xen_phys_cpus */
+
+ xen_phys_cpus = alloc_bootmem_low(k * sizeof(struct resource));
+ BUG_ON(!xen_phys_cpus);
+
+ /* fill in xen_phys_cpus with per-cpu crash note information */
+
+ for (k = 0; k < xen_max_nr_phys_cpus; k++) {
+ memset(&range, 0, sizeof(range));
+ range.range = KEXEC_RANGE_MA_CPU;
+ range.nr = k;
+
+ BUG_ON(HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range));
+
+ res = xen_phys_cpus + k;
+
+ memset(res, 0, sizeof(*res));
+ res->name = "Crash note";
+ res->start = range.start;
+ res->end = range.start + range.size - 1;
+ res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+ }
+
+ /* fill in xen_hypervisor_res with hypervisor machine address range */
+
+ memset(&range, 0, sizeof(range));
+ range.range = KEXEC_RANGE_MA_XEN;
+
+ BUG_ON(HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range));
+
+ xen_hypervisor_res.name = "Hypervisor code and data";
+ xen_hypervisor_res.start = range.start;
+ xen_hypervisor_res.end = range.start + range.size - 1;
+ xen_hypervisor_res.flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+
+ /* fill in crashk_res if range is reserved by hypervisor */
+
+ memset(&range, 0, sizeof(range));
+ range.range = KEXEC_RANGE_MA_CRASH;
+
+ BUG_ON(HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range));
+
+ if (range.size) {
+ crashk_res.start = range.start;
+ crashk_res.end = range.start + range.size - 1;
+ }
+}
+
+void xen_machine_kexec_register_resources(struct resource *res)
+{
+ int k;
+
+ request_resource(res, &xen_hypervisor_res);
+
+ for (k = 0; k < xen_max_nr_phys_cpus; k++)
+ request_resource(res, xen_phys_cpus + k);
+
+}
+
+static void setup_load_arg(xen_kexec_image_t *xki, struct kimage *image)
+{
+ machine_kexec_setup_load_arg(xki, image);
+
+ xki->indirection_page = image->head;
+ xki->start_address = image->start;
+}
+
+/*
+ * Load the image into xen so xen can kdump itself
+ * This might have been done in prepare, but prepare
+ * is currently called too early. It might make sense
+ * to move prepare, but for now, just add an extra hook.
+ */
+int xen_machine_kexec_load(struct kimage *image)
+{
+ xen_kexec_load_t xkl;
+
+ memset(&xkl, 0, sizeof(xkl));
+ xkl.type = image->type;
+ setup_load_arg(&xkl.image, image);
+ return HYPERVISOR_kexec_op(KEXEC_CMD_kexec_load, &xkl);
+}
+
+/*
+ * Unload the image that was stored by machine_kexec_load()
+ * This might have been done in machine_kexec_cleanup() but it
+ * is called too late, and its possible xen could try and kdump
+ * using resources that have been freed.
+ */
+void xen_machine_kexec_unload(struct kimage *image)
+{
+ xen_kexec_load_t xkl;
+
+ memset(&xkl, 0, sizeof(xkl));
+ xkl.type = image->type;
+ HYPERVISOR_kexec_op(KEXEC_CMD_kexec_unload, &xkl);
+}
+
+/*
+ * Do not allocate memory (or fail in any way) in machine_kexec().
+ * We are past the point of no return, committed to rebooting now.
+ *
+ * This has the hypervisor move to the prefered reboot CPU,
+ * stop all CPUs and kexec. That is it combines machine_shutdown()
+ * and machine_kexec() in Linux kexec terms.
+ */
+NORET_TYPE void xen_machine_kexec(struct kimage *image)
+{
+ xen_kexec_exec_t xke;
+
+ memset(&xke, 0, sizeof(xke));
+ xke.type = image->type;
+ HYPERVISOR_kexec_op(KEXEC_CMD_kexec, &xke);
+ panic("KEXEC_CMD_kexec hypercall should not return\n");
+}
+
+void machine_shutdown(void)
+{
+ /* do nothing */
+}
+
+
+/*
+ * Local variables:
+ * c-file-style: "linux"
+ * indent-tabs-mode: t
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * tab-width: 8
+ * End:
+ */
diff -r 88935ae47fa9 -r 5d6be0099bdf
patches/linux-2.6.16.33/git-2a8a3d5b65e86ec1dfef7d268c64a909eab94af7.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++
b/patches/linux-2.6.16.33/git-2a8a3d5b65e86ec1dfef7d268c64a909eab94af7.patch
Thu Nov 30 13:05:27 2006 +0000
@@ -0,0 +1,62 @@
+From: Eric W. Biederman <ebiederm@xxxxxxxxxxxx>
+Date: Sun, 30 Jul 2006 10:03:20 +0000 (-0700)
+Subject: [PATCH] machine_kexec.c: Fix the description of segment handling
+X-Git-Tag: v2.6.18-rc4
+X-Git-Url:
http://www.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=2a8a3d5b65e86ec1dfef7d268c64a909eab94af7
+
+[PATCH] machine_kexec.c: Fix the description of segment handling
+
+One of my original comments in machine_kexec was unclear
+and this should fix it.
+
+Signed-off-by: Eric W. Biederman <ebiederm@xxxxxxxxxxxx>
+Cc: Andi Kleen <ak@xxxxxx>
+Acked-by: Horms <horms@xxxxxxxxxxxx>
+Signed-off-by: Andrew Morton <akpm@xxxxxxxx>
+Signed-off-by: Linus Torvalds <torvalds@xxxxxxxx>
+---
+
+--- a/arch/i386/kernel/machine_kexec.c
++++ b/arch/i386/kernel/machine_kexec.c
+@@ -189,14 +189,11 @@ NORET_TYPE void machine_kexec(struct kim
+ memcpy((void *)reboot_code_buffer, relocate_new_kernel,
+ relocate_new_kernel_size);
+
+- /* The segment registers are funny things, they are
+- * automatically loaded from a table, in memory wherever you
+- * set them to a specific selector, but this table is never
+- * accessed again you set the segment to a different selector.
+- *
+- * The more common model is are caches where the behide
+- * the scenes work is done, but is also dropped at arbitrary
+- * times.
++ /* The segment registers are funny things, they have both a
++ * visible and an invisible part. Whenever the visible part is
++ * set to a specific selector, the invisible part is loaded
++ * with from a table in memory. At no other time is the
++ * descriptor table in memory accessed.
+ *
+ * I take advantage of this here by force loading the
+ * segments, before I zap the gdt with an invalid value.
+--- a/arch/x86_64/kernel/machine_kexec.c
++++ b/arch/x86_64/kernel/machine_kexec.c
+@@ -207,14 +207,11 @@ NORET_TYPE void machine_kexec(struct kim
+ __flush_tlb();
+
+
+- /* The segment registers are funny things, they are
+- * automatically loaded from a table, in memory wherever you
+- * set them to a specific selector, but this table is never
+- * accessed again unless you set the segment to a different selector.
+- *
+- * The more common model are caches where the behide
+- * the scenes work is done, but is also dropped at arbitrary
+- * times.
++ /* The segment registers are funny things, they have both a
++ * visible and an invisible part. Whenever the visible part is
++ * set to a specific selector, the invisible part is loaded
++ * with from a table in memory. At no other time is the
++ * descriptor table in memory accessed.
+ *
+ * I take advantage of this here by force loading the
+ * segments, before I zap the gdt with an invalid value.
diff -r 88935ae47fa9 -r 5d6be0099bdf
patches/linux-2.6.16.33/git-2efe55a9cec8418f0e0cde3dc3787a42fddc4411.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++
b/patches/linux-2.6.16.33/git-2efe55a9cec8418f0e0cde3dc3787a42fddc4411.patch
Thu Nov 30 13:05:27 2006 +0000
@@ -0,0 +1,93 @@
+From: Tobias Klauser <tklauser@xxxxxxxxxxx>
+Date: Mon, 26 Jun 2006 16:57:34 +0000 (+0200)
+Subject: Storage class should be first
+X-Git-Tag: v2.6.18-rc1
+X-Git-Url:
http://www.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=2efe55a9cec8418f0e0cde3dc3787a42fddc4411
+
+Storage class should be first
+
+Storage class should be before const
+
+Signed-off-by: Tobias Klauser <tklauser@xxxxxxxxxxx>
+Signed-off-by: Adrian Bunk <bunk@xxxxxxxxx>
+---
+
+--- a/arch/i386/kernel/machine_kexec.c
++++ b/arch/i386/kernel/machine_kexec.c
+@@ -133,9 +133,9 @@ typedef asmlinkage NORET_TYPE void (*rel
+ unsigned long start_address,
+ unsigned int has_pae) ATTRIB_NORET;
+
+-const extern unsigned char relocate_new_kernel[];
++extern const unsigned char relocate_new_kernel[];
+ extern void relocate_new_kernel_end(void);
+-const extern unsigned int relocate_new_kernel_size;
++extern const unsigned int relocate_new_kernel_size;
+
+ /*
+ * A architecture hook called to validate the
+--- a/arch/powerpc/kernel/machine_kexec_32.c
++++ b/arch/powerpc/kernel/machine_kexec_32.c
+@@ -30,8 +30,8 @@ typedef NORET_TYPE void (*relocate_new_k
+ */
+ void default_machine_kexec(struct kimage *image)
+ {
+- const extern unsigned char relocate_new_kernel[];
+- const extern unsigned int relocate_new_kernel_size;
++ extern const unsigned char relocate_new_kernel[];
++ extern const unsigned int relocate_new_kernel_size;
+ unsigned long page_list;
+ unsigned long reboot_code_buffer, reboot_code_buffer_phys;
+ relocate_new_kernel_t rnk;
+--- a/arch/ppc/kernel/machine_kexec.c
++++ b/arch/ppc/kernel/machine_kexec.c
+@@ -25,8 +25,8 @@ typedef NORET_TYPE void (*relocate_new_k
+ unsigned long reboot_code_buffer,
+ unsigned long start_address) ATTRIB_NORET;
+
+-const extern unsigned char relocate_new_kernel[];
+-const extern unsigned int relocate_new_kernel_size;
++extern const unsigned char relocate_new_kernel[];
++extern const unsigned int relocate_new_kernel_size;
+
+ void machine_shutdown(void)
+ {
+--- a/arch/s390/kernel/machine_kexec.c
++++ b/arch/s390/kernel/machine_kexec.c
+@@ -27,8 +27,8 @@ static void kexec_halt_all_cpus(void *);
+
+ typedef void (*relocate_kernel_t) (kimage_entry_t *, unsigned long);
+
+-const extern unsigned char relocate_kernel[];
+-const extern unsigned long long relocate_kernel_len;
++extern const unsigned char relocate_kernel[];
++extern const unsigned long long relocate_kernel_len;
+
+ int
+ machine_kexec_prepare(struct kimage *image)
+--- a/arch/sh/kernel/machine_kexec.c
++++ b/arch/sh/kernel/machine_kexec.c
+@@ -25,8 +25,8 @@ typedef NORET_TYPE void (*relocate_new_k
+ unsigned long start_address,
+ unsigned long vbr_reg) ATTRIB_NORET;
+
+-const extern unsigned char relocate_new_kernel[];
+-const extern unsigned int relocate_new_kernel_size;
++extern const unsigned char relocate_new_kernel[];
++extern const unsigned int relocate_new_kernel_size;
+ extern void *gdb_vbr_vector;
+
+ /*
+--- a/arch/x86_64/kernel/machine_kexec.c
++++ b/arch/x86_64/kernel/machine_kexec.c
+@@ -149,8 +149,8 @@ typedef NORET_TYPE void (*relocate_new_k
+ unsigned long start_address,
+ unsigned long pgtable) ATTRIB_NORET;
+
+-const extern unsigned char relocate_new_kernel[];
+-const extern unsigned long relocate_new_kernel_size;
++extern const unsigned char relocate_new_kernel[];
++extern const unsigned long relocate_new_kernel_size;
+
+ int machine_kexec_prepare(struct kimage *image)
+ {
diff -r 88935ae47fa9 -r 5d6be0099bdf
patches/linux-2.6.16.33/git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++
b/patches/linux-2.6.16.33/git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch
Thu Nov 30 13:05:27 2006 +0000
@@ -0,0 +1,401 @@
+From: Magnus Damm <magnus@xxxxxxxxxxxxx>
+Date: Tue, 26 Sep 2006 08:52:38 +0000 (+0200)
+Subject: [PATCH] i386: Avoid overwriting the current pgd (V4, i386)
+X-Git-Url:
http://www.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=3566561bfadffcb5dbc85d576be80c0dbf2cccc9
+
+[PATCH] i386: Avoid overwriting the current pgd (V4, i386)
+
+kexec: Avoid overwriting the current pgd (V4, i386)
+
+This patch upgrades the i386-specific kexec code to avoid overwriting the
+current pgd. Overwriting the current pgd is bad when CONFIG_CRASH_DUMP is used
+to start a secondary kernel that dumps the memory of the previous kernel.
+
+The code introduces a new set of page tables. These tables are used to provide
+an executable identity mapping without overwriting the current pgd.
+
+Signed-off-by: Magnus Damm <magnus@xxxxxxxxxxxxx>
+Signed-off-by: Andi Kleen <ak@xxxxxxx>
+---
+
+--- a/arch/i386/kernel/machine_kexec.c
++++ b/arch/i386/kernel/machine_kexec.c
+@@ -21,70 +21,13 @@
+ #include <asm/system.h>
+
+ #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
+-
+-#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+-#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+-#define L2_ATTR (_PAGE_PRESENT)
+-
+-#define LEVEL0_SIZE (1UL << 12UL)
+-
+-#ifndef CONFIG_X86_PAE
+-#define LEVEL1_SIZE (1UL << 22UL)
+-static u32 pgtable_level1[1024] PAGE_ALIGNED;
+-
+-static void identity_map_page(unsigned long address)
+-{
+- unsigned long level1_index, level2_index;
+- u32 *pgtable_level2;
+-
+- /* Find the current page table */
+- pgtable_level2 = __va(read_cr3());
+-
+- /* Find the indexes of the physical address to identity map */
+- level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
+- level2_index = address / LEVEL1_SIZE;
+-
+- /* Identity map the page table entry */
+- pgtable_level1[level1_index] = address | L0_ATTR;
+- pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
+-
+- /* Flush the tlb so the new mapping takes effect.
+- * Global tlb entries are not flushed but that is not an issue.
+- */
+- load_cr3(pgtable_level2);
+-}
+-
+-#else
+-#define LEVEL1_SIZE (1UL << 21UL)
+-#define LEVEL2_SIZE (1UL << 30UL)
+-static u64 pgtable_level1[512] PAGE_ALIGNED;
+-static u64 pgtable_level2[512] PAGE_ALIGNED;
+-
+-static void identity_map_page(unsigned long address)
+-{
+- unsigned long level1_index, level2_index, level3_index;
+- u64 *pgtable_level3;
+-
+- /* Find the current page table */
+- pgtable_level3 = __va(read_cr3());
+-
+- /* Find the indexes of the physical address to identity map */
+- level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
+- level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE;
+- level3_index = address / LEVEL2_SIZE;
+-
+- /* Identity map the page table entry */
+- pgtable_level1[level1_index] = address | L0_ATTR;
+- pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
+- set_64bit(&pgtable_level3[level3_index],
+- __pa(pgtable_level2) | L2_ATTR);
+-
+- /* Flush the tlb so the new mapping takes effect.
+- * Global tlb entries are not flushed but that is not an issue.
+- */
+- load_cr3(pgtable_level3);
+-}
++static u32 kexec_pgd[1024] PAGE_ALIGNED;
++#ifdef CONFIG_X86_PAE
++static u32 kexec_pmd0[1024] PAGE_ALIGNED;
++static u32 kexec_pmd1[1024] PAGE_ALIGNED;
+ #endif
++static u32 kexec_pte0[1024] PAGE_ALIGNED;
++static u32 kexec_pte1[1024] PAGE_ALIGNED;
+
+ static void set_idt(void *newidt, __u16 limit)
+ {
+@@ -128,16 +71,6 @@ static void load_segments(void)
+ #undef __STR
+ }
+
+-typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)(
+- unsigned long indirection_page,
+- unsigned long reboot_code_buffer,
+- unsigned long start_address,
+- unsigned int has_pae) ATTRIB_NORET;
+-
+-extern const unsigned char relocate_new_kernel[];
+-extern void relocate_new_kernel_end(void);
+-extern const unsigned int relocate_new_kernel_size;
+-
+ /*
+ * A architecture hook called to validate the
+ * proposed image and prepare the control pages
+@@ -170,25 +103,29 @@ void machine_kexec_cleanup(struct kimage
+ */
+ NORET_TYPE void machine_kexec(struct kimage *image)
+ {
+- unsigned long page_list;
+- unsigned long reboot_code_buffer;
+-
+- relocate_new_kernel_t rnk;
++ unsigned long page_list[PAGES_NR];
++ void *control_page;
+
+ /* Interrupts aren't acceptable while we reboot */
+ local_irq_disable();
+
+- /* Compute some offsets */
+- reboot_code_buffer = page_to_pfn(image->control_code_page)
+- << PAGE_SHIFT;
+- page_list = image->head;
+-
+- /* Set up an identity mapping for the reboot_code_buffer */
+- identity_map_page(reboot_code_buffer);
+-
+- /* copy it out */
+- memcpy((void *)reboot_code_buffer, relocate_new_kernel,
+- relocate_new_kernel_size);
++ control_page = page_address(image->control_code_page);
++ memcpy(control_page, relocate_kernel, PAGE_SIZE);
++
++ page_list[PA_CONTROL_PAGE] = __pa(control_page);
++ page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
++ page_list[PA_PGD] = __pa(kexec_pgd);
++ page_list[VA_PGD] = (unsigned long)kexec_pgd;
++#ifdef CONFIG_X86_PAE
++ page_list[PA_PMD_0] = __pa(kexec_pmd0);
++ page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
++ page_list[PA_PMD_1] = __pa(kexec_pmd1);
++ page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
++#endif
++ page_list[PA_PTE_0] = __pa(kexec_pte0);
++ page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
++ page_list[PA_PTE_1] = __pa(kexec_pte1);
++ page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
+
+ /* The segment registers are funny things, they have both a
+ * visible and an invisible part. Whenever the visible part is
+@@ -207,8 +144,8 @@ NORET_TYPE void machine_kexec(struct kim
+ set_idt(phys_to_virt(0),0);
+
+ /* now call it */
+- rnk = (relocate_new_kernel_t) reboot_code_buffer;
+- (*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae);
++ relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
++ image->start, cpu_has_pae);
+ }
+
+ /* crashkernel=size@addr specifies the location to reserve for
+--- a/arch/i386/kernel/relocate_kernel.S
++++ b/arch/i386/kernel/relocate_kernel.S
+@@ -7,16 +7,138 @@
+ */
+
+ #include <linux/linkage.h>
++#include <asm/page.h>
++#include <asm/kexec.h>
++
++/*
++ * Must be relocatable PIC code callable as a C function
++ */
++
++#define PTR(x) (x << 2)
++#define PAGE_ALIGNED (1 << PAGE_SHIFT)
++#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */
++#define PAE_PGD_ATTR 0x01 /* _PAGE_PRESENT */
++
++ .text
++ .align PAGE_ALIGNED
++ .globl relocate_kernel
++relocate_kernel:
++ movl 8(%esp), %ebp /* list of pages */
++
++#ifdef CONFIG_X86_PAE
++ /* map the control page at its virtual address */
++
++ movl PTR(VA_PGD)(%ebp), %edi
++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
++ andl $0xc0000000, %eax
++ shrl $27, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_PMD_0)(%ebp), %edx
++ orl $PAE_PGD_ATTR, %edx
++ movl %edx, (%eax)
++
++ movl PTR(VA_PMD_0)(%ebp), %edi
++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
++ andl $0x3fe00000, %eax
++ shrl $18, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_PTE_0)(%ebp), %edx
++ orl $PAGE_ATTR, %edx
++ movl %edx, (%eax)
++
++ movl PTR(VA_PTE_0)(%ebp), %edi
++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
++ andl $0x001ff000, %eax
++ shrl $9, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
++ orl $PAGE_ATTR, %edx
++ movl %edx, (%eax)
++
++ /* identity map the control page at its physical address */
++
++ movl PTR(VA_PGD)(%ebp), %edi
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
++ andl $0xc0000000, %eax
++ shrl $27, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_PMD_1)(%ebp), %edx
++ orl $PAE_PGD_ATTR, %edx
++ movl %edx, (%eax)
++
++ movl PTR(VA_PMD_1)(%ebp), %edi
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
++ andl $0x3fe00000, %eax
++ shrl $18, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_PTE_1)(%ebp), %edx
++ orl $PAGE_ATTR, %edx
++ movl %edx, (%eax)
++
++ movl PTR(VA_PTE_1)(%ebp), %edi
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
++ andl $0x001ff000, %eax
++ shrl $9, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
++ orl $PAGE_ATTR, %edx
++ movl %edx, (%eax)
++#else
++ /* map the control page at its virtual address */
++
++ movl PTR(VA_PGD)(%ebp), %edi
++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
++ andl $0xffc00000, %eax
++ shrl $20, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_PTE_0)(%ebp), %edx
++ orl $PAGE_ATTR, %edx
++ movl %edx, (%eax)
++
++ movl PTR(VA_PTE_0)(%ebp), %edi
++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
++ andl $0x003ff000, %eax
++ shrl $10, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
++ orl $PAGE_ATTR, %edx
++ movl %edx, (%eax)
++
++ /* identity map the control page at its physical address */
++
++ movl PTR(VA_PGD)(%ebp), %edi
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
++ andl $0xffc00000, %eax
++ shrl $20, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_PTE_1)(%ebp), %edx
++ orl $PAGE_ATTR, %edx
++ movl %edx, (%eax)
++
++ movl PTR(VA_PTE_1)(%ebp), %edi
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
++ andl $0x003ff000, %eax
++ shrl $10, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
++ orl $PAGE_ATTR, %edx
++ movl %edx, (%eax)
++#endif
+
+- /*
+- * Must be relocatable PIC code callable as a C function, that once
+- * it starts can not use the previous processes stack.
+- */
+- .globl relocate_new_kernel
+ relocate_new_kernel:
+ /* read the arguments and say goodbye to the stack */
+ movl 4(%esp), %ebx /* page_list */
+- movl 8(%esp), %ebp /* reboot_code_buffer */
++ movl 8(%esp), %ebp /* list of pages */
+ movl 12(%esp), %edx /* start address */
+ movl 16(%esp), %ecx /* cpu_has_pae */
+
+@@ -24,11 +146,26 @@ relocate_new_kernel:
+ pushl $0
+ popfl
+
+- /* set a new stack at the bottom of our page... */
+- lea 4096(%ebp), %esp
++ /* get physical address of control page now */
++ /* this is impossible after page table switch */
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edi
++
++ /* switch to new set of page tables */
++ movl PTR(PA_PGD)(%ebp), %eax
++ movl %eax, %cr3
++
++ /* setup a new stack at the end of the physical control page */
++ lea 4096(%edi), %esp
+
+- /* store the parameters back on the stack */
+- pushl %edx /* store the start address */
++ /* jump to identity mapped page */
++ movl %edi, %eax
++ addl $(identity_mapped - relocate_kernel), %eax
++ pushl %eax
++ ret
++
++identity_mapped:
++ /* store the start address on the stack */
++ pushl %edx
+
+ /* Set cr0 to a known state:
+ * 31 0 == Paging disabled
+@@ -113,8 +250,3 @@ relocate_new_kernel:
+ xorl %edi, %edi
+ xorl %ebp, %ebp
+ ret
+-relocate_new_kernel_end:
+-
+- .globl relocate_new_kernel_size
+-relocate_new_kernel_size:
+- .long relocate_new_kernel_end - relocate_new_kernel
+--- a/include/asm-i386/kexec.h
++++ b/include/asm-i386/kexec.h
+@@ -1,6 +1,26 @@
+ #ifndef _I386_KEXEC_H
+ #define _I386_KEXEC_H
+
++#define PA_CONTROL_PAGE 0
++#define VA_CONTROL_PAGE 1
++#define PA_PGD 2
++#define VA_PGD 3
++#define PA_PTE_0 4
++#define VA_PTE_0 5
++#define PA_PTE_1 6
++#define VA_PTE_1 7
++#ifdef CONFIG_X86_PAE
++#define PA_PMD_0 8
++#define VA_PMD_0 9
++#define PA_PMD_1 10
++#define VA_PMD_1 11
++#define PAGES_NR 12
++#else
++#define PAGES_NR 8
++#endif
++
++#ifndef __ASSEMBLY__
++
+ #include <asm/fixmap.h>
+ #include <asm/ptrace.h>
+ #include <asm/string.h>
+@@ -72,5 +92,12 @@ static inline void crash_setup_regs(stru
+ newregs->eip = (unsigned long)current_text_addr();
+ }
+ }
++asmlinkage NORET_TYPE void
++relocate_kernel(unsigned long indirection_page,
++ unsigned long control_page,
++ unsigned long start_address,
++ unsigned int has_pae) ATTRIB_NORET;
++
++#endif /* __ASSEMBLY__ */
+
+ #endif /* _I386_KEXEC_H */
diff -r 88935ae47fa9 -r 5d6be0099bdf
patches/linux-2.6.16.33/git-4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++
b/patches/linux-2.6.16.33/git-4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f.patch
Thu Nov 30 13:05:27 2006 +0000
@@ -0,0 +1,375 @@
+From: Magnus Damm <magnus@xxxxxxxxxxxxx>
+Date: Tue, 26 Sep 2006 08:52:38 +0000 (+0200)
+Subject: [PATCH] Avoid overwriting the current pgd (V4, x86_64)
+X-Git-Tag: v2.6.19-rc1
+X-Git-Url:
http://www.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f
+
+[PATCH] Avoid overwriting the current pgd (V4, x86_64)
+
+kexec: Avoid overwriting the current pgd (V4, x86_64)
+
+This patch upgrades the x86_64-specific kexec code to avoid overwriting the
+current pgd. Overwriting the current pgd is bad when CONFIG_CRASH_DUMP is used
+to start a secondary kernel that dumps the memory of the previous kernel.
+
+The code introduces a new set of page tables. These tables are used to provide
+an executable identity mapping without overwriting the current pgd.
+
+Signed-off-by: Magnus Damm <magnus@xxxxxxxxxxxxx>
+Signed-off-by: Andi Kleen <ak@xxxxxxx>
+---
+
+--- a/arch/x86_64/kernel/machine_kexec.c
++++ b/arch/x86_64/kernel/machine_kexec.c
+@@ -15,6 +15,15 @@
+ #include <asm/mmu_context.h>
+ #include <asm/io.h>
+
++#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
++static u64 kexec_pgd[512] PAGE_ALIGNED;
++static u64 kexec_pud0[512] PAGE_ALIGNED;
++static u64 kexec_pmd0[512] PAGE_ALIGNED;
++static u64 kexec_pte0[512] PAGE_ALIGNED;
++static u64 kexec_pud1[512] PAGE_ALIGNED;
++static u64 kexec_pmd1[512] PAGE_ALIGNED;
++static u64 kexec_pte1[512] PAGE_ALIGNED;
++
+ static void init_level2_page(pmd_t *level2p, unsigned long addr)
+ {
+ unsigned long end_addr;
+@@ -144,32 +153,19 @@ static void load_segments(void)
+ );
+ }
+
+-typedef NORET_TYPE void (*relocate_new_kernel_t)(unsigned long
indirection_page,
+- unsigned long control_code_buffer,
+- unsigned long start_address,
+- unsigned long pgtable) ATTRIB_NORET;
+-
+-extern const unsigned char relocate_new_kernel[];
+-extern const unsigned long relocate_new_kernel_size;
+-
+ int machine_kexec_prepare(struct kimage *image)
+ {
+- unsigned long start_pgtable, control_code_buffer;
++ unsigned long start_pgtable;
+ int result;
+
+ /* Calculate the offsets */
+ start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
+- control_code_buffer = start_pgtable + PAGE_SIZE;
+
+ /* Setup the identity mapped 64bit page table */
+ result = init_pgtable(image, start_pgtable);
+ if (result)
+ return result;
+
+- /* Place the code in the reboot code buffer */
+- memcpy(__va(control_code_buffer), relocate_new_kernel,
+- relocate_new_kernel_size);
+-
+ return 0;
+ }
+
+@@ -184,28 +180,34 @@ void machine_kexec_cleanup(struct kimage
+ */
+ NORET_TYPE void machine_kexec(struct kimage *image)
+ {
+- unsigned long page_list;
+- unsigned long control_code_buffer;
+- unsigned long start_pgtable;
+- relocate_new_kernel_t rnk;
++ unsigned long page_list[PAGES_NR];
++ void *control_page;
+
+ /* Interrupts aren't acceptable while we reboot */
+ local_irq_disable();
+
+- /* Calculate the offsets */
+- page_list = image->head;
+- start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
+- control_code_buffer = start_pgtable + PAGE_SIZE;
++ control_page = page_address(image->control_code_page) + PAGE_SIZE;
++ memcpy(control_page, relocate_kernel, PAGE_SIZE);
+
+- /* Set the low half of the page table to my identity mapped
+- * page table for kexec. Leave the high half pointing at the
+- * kernel pages. Don't bother to flush the global pages
+- * as that will happen when I fully switch to my identity mapped
+- * page table anyway.
+- */
+- memcpy(__va(read_cr3()), __va(start_pgtable), PAGE_SIZE/2);
+- __flush_tlb();
++ page_list[PA_CONTROL_PAGE] = __pa(control_page);
++ page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
++ page_list[PA_PGD] = __pa(kexec_pgd);
++ page_list[VA_PGD] = (unsigned long)kexec_pgd;
++ page_list[PA_PUD_0] = __pa(kexec_pud0);
++ page_list[VA_PUD_0] = (unsigned long)kexec_pud0;
++ page_list[PA_PMD_0] = __pa(kexec_pmd0);
++ page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
++ page_list[PA_PTE_0] = __pa(kexec_pte0);
++ page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
++ page_list[PA_PUD_1] = __pa(kexec_pud1);
++ page_list[VA_PUD_1] = (unsigned long)kexec_pud1;
++ page_list[PA_PMD_1] = __pa(kexec_pmd1);
++ page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
++ page_list[PA_PTE_1] = __pa(kexec_pte1);
++ page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
+
++ page_list[PA_TABLE_PAGE] =
++ (unsigned long)__pa(page_address(image->control_code_page));
+
+ /* The segment registers are funny things, they have both a
+ * visible and an invisible part. Whenever the visible part is
+@@ -222,9 +224,10 @@ NORET_TYPE void machine_kexec(struct kim
+ */
+ set_gdt(phys_to_virt(0),0);
+ set_idt(phys_to_virt(0),0);
++
+ /* now call it */
+- rnk = (relocate_new_kernel_t) control_code_buffer;
+- (*rnk)(page_list, control_code_buffer, image->start, start_pgtable);
++ relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
++ image->start);
+ }
+
+ /* crashkernel=size@addr specifies the location to reserve for
+--- a/arch/x86_64/kernel/relocate_kernel.S
++++ b/arch/x86_64/kernel/relocate_kernel.S
+@@ -7,31 +7,169 @@
+ */
+
+ #include <linux/linkage.h>
++#include <asm/page.h>
++#include <asm/kexec.h>
+
+- /*
+- * Must be relocatable PIC code callable as a C function, that once
+- * it starts can not use the previous processes stack.
+- */
+- .globl relocate_new_kernel
++/*
++ * Must be relocatable PIC code callable as a C function
++ */
++
++#define PTR(x) (x << 3)
++#define PAGE_ALIGNED (1 << PAGE_SHIFT)
++#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */
++
++ .text
++ .align PAGE_ALIGNED
+ .code64
++ .globl relocate_kernel
++relocate_kernel:
++ /* %rdi indirection_page
++ * %rsi page_list
++ * %rdx start address
++ */
++
++ /* map the control page at its virtual address */
++
++ movq $0x0000ff8000000000, %r10 /* mask */
++ mov $(39 - 3), %cl /* bits to shift */
++ movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 /* address to map */
++
++ movq %r11, %r9
++ andq %r10, %r9
++ shrq %cl, %r9
++
++ movq PTR(VA_PGD)(%rsi), %r8
++ addq %r8, %r9
++ movq PTR(PA_PUD_0)(%rsi), %r8
++ orq $PAGE_ATTR, %r8
++ movq %r8, (%r9)
++
++ shrq $9, %r10
++ sub $9, %cl
++
++ movq %r11, %r9
++ andq %r10, %r9
++ shrq %cl, %r9
++
++ movq PTR(VA_PUD_0)(%rsi), %r8
++ addq %r8, %r9
++ movq PTR(PA_PMD_0)(%rsi), %r8
++ orq $PAGE_ATTR, %r8
++ movq %r8, (%r9)
++
++ shrq $9, %r10
++ sub $9, %cl
++
++ movq %r11, %r9
++ andq %r10, %r9
++ shrq %cl, %r9
++
++ movq PTR(VA_PMD_0)(%rsi), %r8
++ addq %r8, %r9
++ movq PTR(PA_PTE_0)(%rsi), %r8
++ orq $PAGE_ATTR, %r8
++ movq %r8, (%r9)
++
++ shrq $9, %r10
++ sub $9, %cl
++
++ movq %r11, %r9
++ andq %r10, %r9
++ shrq %cl, %r9
++
++ movq PTR(VA_PTE_0)(%rsi), %r8
++ addq %r8, %r9
++ movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
++ orq $PAGE_ATTR, %r8
++ movq %r8, (%r9)
++
++ /* identity map the control page at its physical address */
++
++ movq $0x0000ff8000000000, %r10 /* mask */
++ mov $(39 - 3), %cl /* bits to shift */
++ movq PTR(PA_CONTROL_PAGE)(%rsi), %r11 /* address to map */
++
++ movq %r11, %r9
++ andq %r10, %r9
++ shrq %cl, %r9
++
++ movq PTR(VA_PGD)(%rsi), %r8
++ addq %r8, %r9
++ movq PTR(PA_PUD_1)(%rsi), %r8
++ orq $PAGE_ATTR, %r8
++ movq %r8, (%r9)
++
++ shrq $9, %r10
++ sub $9, %cl
++
++ movq %r11, %r9
++ andq %r10, %r9
++ shrq %cl, %r9
++
++ movq PTR(VA_PUD_1)(%rsi), %r8
++ addq %r8, %r9
++ movq PTR(PA_PMD_1)(%rsi), %r8
++ orq $PAGE_ATTR, %r8
++ movq %r8, (%r9)
++
++ shrq $9, %r10
++ sub $9, %cl
++
++ movq %r11, %r9
++ andq %r10, %r9
++ shrq %cl, %r9
++
++ movq PTR(VA_PMD_1)(%rsi), %r8
++ addq %r8, %r9
++ movq PTR(PA_PTE_1)(%rsi), %r8
++ orq $PAGE_ATTR, %r8
++ movq %r8, (%r9)
++
++ shrq $9, %r10
++ sub $9, %cl
++
++ movq %r11, %r9
++ andq %r10, %r9
++ shrq %cl, %r9
++
++ movq PTR(VA_PTE_1)(%rsi), %r8
++ addq %r8, %r9
++ movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
++ orq $PAGE_ATTR, %r8
++ movq %r8, (%r9)
++
+ relocate_new_kernel:
+- /* %rdi page_list
+- * %rsi reboot_code_buffer
++ /* %rdi indirection_page
++ * %rsi page_list
+ * %rdx start address
+- * %rcx page_table
+- * %r8 arg5
+- * %r9 arg6
+ */
+
+ /* zero out flags, and disable interrupts */
+ pushq $0
+ popfq
+
+- /* set a new stack at the bottom of our page... */
+- lea 4096(%rsi), %rsp
++ /* get physical address of control page now */
++ /* this is impossible after page table switch */
++ movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
++
++ /* get physical address of page table now too */
++ movq PTR(PA_TABLE_PAGE)(%rsi), %rcx
++
++ /* switch to new set of page tables */
++ movq PTR(PA_PGD)(%rsi), %r9
++ movq %r9, %cr3
++
++ /* setup a new stack at the end of the physical control page */
++ lea 4096(%r8), %rsp
++
++ /* jump to identity mapped page */
++ addq $(identity_mapped - relocate_kernel), %r8
++ pushq %r8
++ ret
+
+- /* store the parameters back on the stack */
+- pushq %rdx /* store the start address */
++identity_mapped:
++ /* store the start address on the stack */
++ pushq %rdx
+
+ /* Set cr0 to a known state:
+ * 31 1 == Paging enabled
+@@ -136,8 +274,3 @@ relocate_new_kernel:
+ xorq %r15, %r15
+
+ ret
+-relocate_new_kernel_end:
+-
+- .globl relocate_new_kernel_size
+-relocate_new_kernel_size:
+- .quad relocate_new_kernel_end - relocate_new_kernel
+--- a/include/asm-x86_64/kexec.h
++++ b/include/asm-x86_64/kexec.h
+@@ -1,6 +1,27 @@
+ #ifndef _X86_64_KEXEC_H
+ #define _X86_64_KEXEC_H
+
++#define PA_CONTROL_PAGE 0
++#define VA_CONTROL_PAGE 1
++#define PA_PGD 2
++#define VA_PGD 3
++#define PA_PUD_0 4
++#define VA_PUD_0 5
++#define PA_PMD_0 6
++#define VA_PMD_0 7
++#define PA_PTE_0 8
++#define VA_PTE_0 9
++#define PA_PUD_1 10
++#define VA_PUD_1 11
++#define PA_PMD_1 12
++#define VA_PMD_1 13
++#define PA_PTE_1 14
++#define VA_PTE_1 15
++#define PA_TABLE_PAGE 16
++#define PAGES_NR 17
++
++#ifndef __ASSEMBLY__
++
+ #include <linux/string.h>
+
+ #include <asm/page.h>
+@@ -64,4 +85,12 @@ static inline void crash_setup_regs(stru
+ newregs->rip = (unsigned long)current_text_addr();
+ }
+ }
++
++NORET_TYPE void
++relocate_kernel(unsigned long indirection_page,
++ unsigned long page_list,
++ unsigned long start_address) ATTRIB_NORET;
++
++#endif /* __ASSEMBLY__ */
++
+ #endif /* _X86_64_KEXEC_H */
diff -r 88935ae47fa9 -r 5d6be0099bdf
patches/linux-2.6.16.33/git-dbaab49f92ff6ae6255762a948375e4036cbdbd2.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++
b/patches/linux-2.6.16.33/git-dbaab49f92ff6ae6255762a948375e4036cbdbd2.patch
Thu Nov 30 13:05:27 2006 +0000
@@ -0,0 +1,47 @@
+commit dbaab49f92ff6ae6255762a948375e4036cbdbd2
+Author: Vivek Goyal <vgoyal@xxxxxxxxxx>
+Date: Sat Oct 21 18:37:03 2006 +0200
+
+ [PATCH] x86-64: Overlapping program headers in physical addr space fix
+
+ o A recent change to vmlinux.ld.S file broke kexec as now resulting vmlinux
+ program headers are overlapping in physical address space.
+
+ o Now all the vsyscall related sections are placed after data and after
+ that mostly init data sections are placed. To avoid physical overlap
+ among phdrs, there are three possible solutions.
+ - Place vsyscall sections also in data phdrs instead of user
+ - move vsyscal sections after init data in bss.
+ - create another phdrs say data.init and move all the sections
+ after vsyscall into this new phdr.
+
+ o This patch implements the third solution.
+
+ Signed-off-by: Vivek Goyal <vgoyal@xxxxxxxxxx>
+ Signed-off-by: Andi Kleen <ak@xxxxxxx>
+ Cc: Magnus Damm <magnus@xxxxxxxxxxxxx>
+ Cc: Andi Kleen <ak@xxxxxxx>
+ Cc: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx>
+ Signed-off-by: Andrew Morton <akpm@xxxxxxxx>
+
+diff --git a/arch/x86_64/kernel/vmlinux.lds.S
b/arch/x86_64/kernel/vmlinux.lds.S
+index b9df2ab..1283614 100644
+--- a/arch/x86_64/kernel/vmlinux.lds.S
++++ b/arch/x86_64/kernel/vmlinux.lds.S
+@@ -17,6 +17,7 @@ PHDRS {
+ text PT_LOAD FLAGS(5); /* R_E */
+ data PT_LOAD FLAGS(7); /* RWE */
+ user PT_LOAD FLAGS(7); /* RWE */
++ data.init PT_LOAD FLAGS(7); /* RWE */
+ note PT_NOTE FLAGS(4); /* R__ */
+ }
+ SECTIONS
+@@ -131,7 +132,7 @@ SECTIONS
+ . = ALIGN(8192); /* init_task */
+ .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
+ *(.data.init_task)
+- } :data
++ }:data.init
+
+ . = ALIGN(4096);
+ .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
diff -r 88935ae47fa9 -r 5d6be0099bdf patches/linux-2.6.16.33/kexec-generic.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.16.33/kexec-generic.patch Thu Nov 30 13:05:27
2006 +0000
@@ -0,0 +1,228 @@
+--- 0001/include/linux/kexec.h
++++ work/include/linux/kexec.h
+@@ -31,6 +31,13 @@
+ #error KEXEC_ARCH not defined
+ #endif
+
++#ifndef KEXEC_ARCH_HAS_PAGE_MACROS
++#define kexec_page_to_pfn(page) page_to_pfn(page)
++#define kexec_pfn_to_page(pfn) pfn_to_page(pfn)
++#define kexec_virt_to_phys(addr) virt_to_phys(addr)
++#define kexec_phys_to_virt(addr) phys_to_virt(addr)
++#endif
++
+ /*
+ * This structure is used to hold the arguments that are used when loading
+ * kernel binaries.
+@@ -91,6 +98,13 @@ struct kimage {
+ extern NORET_TYPE void machine_kexec(struct kimage *image) ATTRIB_NORET;
+ extern int machine_kexec_prepare(struct kimage *image);
+ extern void machine_kexec_cleanup(struct kimage *image);
++#ifdef CONFIG_XEN
++extern int xen_machine_kexec_load(struct kimage *image);
++extern void xen_machine_kexec_unload(struct kimage *image);
++extern NORET_TYPE void xen_machine_kexec(struct kimage *image) ATTRIB_NORET;
++extern void xen_machine_kexec_setup_resources(void);
++extern void xen_machine_kexec_register_resources(struct resource *res);
++#endif
+ extern asmlinkage long sys_kexec_load(unsigned long entry,
+ unsigned long nr_segments,
+ struct kexec_segment __user *segments,
+--- 0001/kernel/kexec.c
++++ work/kernel/kexec.c
+@@ -403,7 +403,7 @@ static struct page *kimage_alloc_normal_
+ pages = kimage_alloc_pages(GFP_KERNEL, order);
+ if (!pages)
+ break;
+- pfn = page_to_pfn(pages);
++ pfn = kexec_page_to_pfn(pages);
+ epfn = pfn + count;
+ addr = pfn << PAGE_SHIFT;
+ eaddr = epfn << PAGE_SHIFT;
+@@ -437,6 +437,7 @@ static struct page *kimage_alloc_normal_
+ return pages;
+ }
+
++#ifndef CONFIG_XEN
+ static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
+ unsigned int order)
+ {
+@@ -490,7 +491,7 @@ static struct page *kimage_alloc_crash_c
+ }
+ /* If I don't overlap any segments I have found my hole! */
+ if (i == image->nr_segments) {
+- pages = pfn_to_page(hole_start >> PAGE_SHIFT);
++ pages = kexec_pfn_to_page(hole_start >> PAGE_SHIFT);
+ break;
+ }
+ }
+@@ -517,6 +518,13 @@ struct page *kimage_alloc_control_pages(
+
+ return pages;
+ }
++#else /* !CONFIG_XEN */
++struct page *kimage_alloc_control_pages(struct kimage *image,
++ unsigned int order)
++{
++ return kimage_alloc_normal_control_pages(image, order);
++}
++#endif
+
+ static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
+ {
+@@ -532,7 +540,7 @@ static int kimage_add_entry(struct kimag
+ return -ENOMEM;
+
+ ind_page = page_address(page);
+- *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION;
++ *image->entry = kexec_virt_to_phys(ind_page) | IND_INDIRECTION;
+ image->entry = ind_page;
+ image->last_entry = ind_page +
+ ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
+@@ -593,13 +601,13 @@ static int kimage_terminate(struct kimag
+ #define for_each_kimage_entry(image, ptr, entry) \
+ for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
+ ptr = (entry & IND_INDIRECTION)? \
+- phys_to_virt((entry & PAGE_MASK)): ptr +1)
++ kexec_phys_to_virt((entry & PAGE_MASK)): ptr +1)
+
+ static void kimage_free_entry(kimage_entry_t entry)
+ {
+ struct page *page;
+
+- page = pfn_to_page(entry >> PAGE_SHIFT);
++ page = kexec_pfn_to_page(entry >> PAGE_SHIFT);
+ kimage_free_pages(page);
+ }
+
+@@ -611,6 +619,10 @@ static void kimage_free(struct kimage *i
+ if (!image)
+ return;
+
++#ifdef CONFIG_XEN
++ xen_machine_kexec_unload(image);
++#endif
++
+ kimage_free_extra_pages(image);
+ for_each_kimage_entry(image, ptr, entry) {
+ if (entry & IND_INDIRECTION) {
+@@ -686,7 +698,7 @@ static struct page *kimage_alloc_page(st
+ * have a match.
+ */
+ list_for_each_entry(page, &image->dest_pages, lru) {
+- addr = page_to_pfn(page) << PAGE_SHIFT;
++ addr = kexec_page_to_pfn(page) << PAGE_SHIFT;
+ if (addr == destination) {
+ list_del(&page->lru);
+ return page;
+@@ -701,12 +713,12 @@ static struct page *kimage_alloc_page(st
+ if (!page)
+ return NULL;
+ /* If the page cannot be used file it away */
+- if (page_to_pfn(page) >
++ if (kexec_page_to_pfn(page) >
+ (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
+ list_add(&page->lru, &image->unuseable_pages);
+ continue;
+ }
+- addr = page_to_pfn(page) << PAGE_SHIFT;
++ addr = kexec_page_to_pfn(page) << PAGE_SHIFT;
+
+ /* If it is the destination page we want use it */
+ if (addr == destination)
+@@ -729,7 +741,7 @@ static struct page *kimage_alloc_page(st
+ struct page *old_page;
+
+ old_addr = *old & PAGE_MASK;
+- old_page = pfn_to_page(old_addr >> PAGE_SHIFT);
++ old_page = kexec_pfn_to_page(old_addr >> PAGE_SHIFT);
+ copy_highpage(page, old_page);
+ *old = addr | (*old & ~PAGE_MASK);
+
+@@ -779,7 +791,7 @@ static int kimage_load_normal_segment(st
+ result = -ENOMEM;
+ goto out;
+ }
+- result = kimage_add_page(image, page_to_pfn(page)
++ result = kimage_add_page(image, kexec_page_to_pfn(page)
+ << PAGE_SHIFT);
+ if (result < 0)
+ goto out;
+@@ -811,6 +823,7 @@ out:
+ return result;
+ }
+
++#ifndef CONFIG_XEN
+ static int kimage_load_crash_segment(struct kimage *image,
+ struct kexec_segment *segment)
+ {
+@@ -833,7 +846,7 @@ static int kimage_load_crash_segment(str
+ char *ptr;
+ size_t uchunk, mchunk;
+
+- page = pfn_to_page(maddr >> PAGE_SHIFT);
++ page = kexec_pfn_to_page(maddr >> PAGE_SHIFT);
+ if (page == 0) {
+ result = -ENOMEM;
+ goto out;
+@@ -881,6 +894,13 @@ static int kimage_load_segment(struct ki
+
+ return result;
+ }
++#else /* CONFIG_XEN */
++static int kimage_load_segment(struct kimage *image,
++ struct kexec_segment *segment)
++{
++ return kimage_load_normal_segment(image, segment);
++}
++#endif
+
+ /*
+ * Exec Kernel system call: for obvious reasons only root may call it.
+@@ -991,6 +1011,11 @@ asmlinkage long sys_kexec_load(unsigned
+ if (result)
+ goto out;
+ }
++#ifdef CONFIG_XEN
++ result = xen_machine_kexec_load(image);
++ if (result)
++ goto out;
++#endif
+ /* Install the new kernel, and Uninstall the old */
+ image = xchg(dest_image, image);
+
+@@ -1045,7 +1070,6 @@ void crash_kexec(struct pt_regs *regs)
+ struct kimage *image;
+ int locked;
+
+-
+ /* Take the kexec_lock here to prevent sys_kexec_load
+ * running on one cpu from replacing the crash kernel
+ * we are using after a panic on a different cpu.
+@@ -1061,7 +1085,11 @@ void crash_kexec(struct pt_regs *regs)
+ struct pt_regs fixed_regs;
+ crash_setup_regs(&fixed_regs, regs);
+ machine_crash_shutdown(&fixed_regs);
++#ifdef CONFIG_XEN
++ xen_machine_kexec(image);
++#else
+ machine_kexec(image);
++#endif
+ }
+ xchg(&kexec_lock, 0);
+ }
+--- 0002/kernel/sys.c
++++ work/kernel/sys.c
+@@ -435,8 +435,12 @@ void kernel_kexec(void)
+ kernel_restart_prepare(NULL);
+ printk(KERN_EMERG "Starting new kernel\n");
+ machine_shutdown();
++#ifdef CONFIG_XEN
++ xen_machine_kexec(image);
++#else
+ machine_kexec(image);
+ #endif
++#endif
+ }
+ EXPORT_SYMBOL_GPL(kernel_kexec);
+
diff -r 88935ae47fa9 -r 5d6be0099bdf
patches/linux-2.6.16.33/linux-2.6.19-rc1-kexec-move_segment_code-i386.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++
b/patches/linux-2.6.16.33/linux-2.6.19-rc1-kexec-move_segment_code-i386.patch
Thu Nov 30 13:05:27 2006 +0000
@@ -0,0 +1,169 @@
+kexec: Move asm segment handling code to the assembly file (i386)
+
+This patch moves the idt, gdt, and segment handling code from machine_kexec.c
+to relocate_kernel.S. The main reason behind this move is to avoid code
+duplication in the Xen hypervisor. With this patch all code required to kexec
+is put on the control page.
+
+On top of that this patch also counts as a cleanup - I think it is much
+nicer to write assembly directly in assembly files than wrap inline assembly
+in C functions for no apparent reason.
+
+Signed-off-by: Magnus Damm <magnus@xxxxxxxxxxxxx>
+---
+
+ Applies to 2.6.19-rc1.
+
+ machine_kexec.c | 59 -----------------------------------------------------
+ relocate_kernel.S | 58 +++++++++++++++++++++++++++++++++++++++++++++++-----
+ 2 files changed, 53 insertions(+), 64 deletions(-)
+
+--- 0002/arch/i386/kernel/machine_kexec.c
++++ work/arch/i386/kernel/machine_kexec.c 2006-10-05 15:49:08.000000000
+0900
+@@ -29,48 +29,6 @@ static u32 kexec_pmd1[1024] PAGE_ALIGNED
+ static u32 kexec_pte0[1024] PAGE_ALIGNED;
+ static u32 kexec_pte1[1024] PAGE_ALIGNED;
+
+-static void set_idt(void *newidt, __u16 limit)
+-{
+- struct Xgt_desc_struct curidt;
+-
+- /* ia32 supports unaliged loads & stores */
+- curidt.size = limit;
+- curidt.address = (unsigned long)newidt;
+-
+- load_idt(&curidt);
+-};
+-
+-
+-static void set_gdt(void *newgdt, __u16 limit)
+-{
+- struct Xgt_desc_struct curgdt;
+-
+- /* ia32 supports unaligned loads & stores */
+- curgdt.size = limit;
+- curgdt.address = (unsigned long)newgdt;
+-
+- load_gdt(&curgdt);
+-};
+-
+-static void load_segments(void)
+-{
+-#define __STR(X) #X
+-#define STR(X) __STR(X)
+-
+- __asm__ __volatile__ (
+- "\tljmp $"STR(__KERNEL_CS)",$1f\n"
+- "\t1:\n"
+- "\tmovl $"STR(__KERNEL_DS)",%%eax\n"
+- "\tmovl %%eax,%%ds\n"
+- "\tmovl %%eax,%%es\n"
+- "\tmovl %%eax,%%fs\n"
+- "\tmovl %%eax,%%gs\n"
+- "\tmovl %%eax,%%ss\n"
+- ::: "eax", "memory");
+-#undef STR
+-#undef __STR
+-}
+-
+ /*
+ * A architecture hook called to validate the
+ * proposed image and prepare the control pages
+@@ -127,23 +85,6 @@ NORET_TYPE void machine_kexec(struct kim
+ page_list[PA_PTE_1] = __pa(kexec_pte1);
+ page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
+
+- /* The segment registers are funny things, they have both a
+- * visible and an invisible part. Whenever the visible part is
+- * set to a specific selector, the invisible part is loaded
+- * with from a table in memory. At no other time is the
+- * descriptor table in memory accessed.
+- *
+- * I take advantage of this here by force loading the
+- * segments, before I zap the gdt with an invalid value.
+- */
+- load_segments();
+- /* The gdt & idt are now invalid.
+- * If you want to load them you must set up your own idt & gdt.
+- */
+- set_gdt(phys_to_virt(0),0);
+- set_idt(phys_to_virt(0),0);
+-
+- /* now call it */
+ relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
+ image->start, cpu_has_pae);
+ }
+--- 0002/arch/i386/kernel/relocate_kernel.S
++++ work/arch/i386/kernel/relocate_kernel.S 2006-10-05 16:03:21.000000000
+0900
+@@ -154,14 +154,45 @@ relocate_new_kernel:
+ movl PTR(PA_PGD)(%ebp), %eax
+ movl %eax, %cr3
+
++ /* setup idt */
++ movl %edi, %eax
++ addl $(idt_48 - relocate_kernel), %eax
++ lidtl (%eax)
++
++ /* setup gdt */
++ movl %edi, %eax
++ addl $(gdt - relocate_kernel), %eax
++ movl %edi, %esi
++ addl $((gdt_48 - relocate_kernel) + 2), %esi
++ movl %eax, (%esi)
++
++ movl %edi, %eax
++ addl $(gdt_48 - relocate_kernel), %eax
++ lgdtl (%eax)
++
++ /* setup data segment registers */
++ mov $(gdt_ds - gdt), %eax
++ mov %eax, %ds
++ mov %eax, %es
++ mov %eax, %fs
++ mov %eax, %gs
++ mov %eax, %ss
++
+ /* setup a new stack at the end of the physical control page */
+ lea 4096(%edi), %esp
+
+- /* jump to identity mapped page */
+- movl %edi, %eax
+- addl $(identity_mapped - relocate_kernel), %eax
+- pushl %eax
+- ret
++ /* load new code segment and jump to identity mapped page */
++ movl %edi, %esi
++ xorl %eax, %eax
++ pushl %eax
++ pushl %esi
++ pushl %eax
++ movl $(gdt_cs - gdt), %eax
++ pushl %eax
++ movl %edi, %eax
++ addl $(identity_mapped - relocate_kernel),%eax
++ pushl %eax
++ iretl
+
+ identity_mapped:
+ /* store the start address on the stack */
+@@ -250,3 +281,20 @@ identity_mapped:
+ xorl %edi, %edi
+ xorl %ebp, %ebp
+ ret
++
++ .align 16
++gdt:
++ .quad 0x0000000000000000 /* NULL descriptor */
++gdt_cs:
++ .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */
++gdt_ds:
++ .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */
++gdt_end:
++
++gdt_48:
++ .word gdt_end - gdt - 1 /* limit */
++ .long 0 /* base - filled in by code above */
++
++idt_48:
++ .word 0 /* limit */
++ .long 0 /* base */
diff -r 88935ae47fa9 -r 5d6be0099bdf
patches/linux-2.6.16.33/linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++
b/patches/linux-2.6.16.33/linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch
Thu Nov 30 13:05:27 2006 +0000
@@ -0,0 +1,161 @@
+kexec: Move asm segment handling code to the assembly file (x86_64)
+
+This patch moves the idt, gdt, and segment handling code from machine_kexec.c
+to relocate_kernel.S. The main reason behind this move is to avoid code
+duplication in the Xen hypervisor. With this patch all code required to kexec
+is put on the control page.
+
+On top of that this patch also counts as a cleanup - I think it is much
+nicer to write assembly directly in assembly files than wrap inline assembly
+in C functions for no apparent reason.
+
+Signed-off-by: Magnus Damm <magnus@xxxxxxxxxxxxx>
+---
+
+ Applies to 2.6.19-rc1.
+
+ machine_kexec.c | 58 -----------------------------------------------------
+ relocate_kernel.S | 50 +++++++++++++++++++++++++++++++++++++++++----
+ 2 files changed, 45 insertions(+), 63 deletions(-)
+
+--- 0002/arch/x86_64/kernel/machine_kexec.c
++++ work/arch/x86_64/kernel/machine_kexec.c 2006-10-05 16:15:49.000000000
+0900
+@@ -112,47 +112,6 @@ static int init_pgtable(struct kimage *i
+ return init_level4_page(image, level4p, 0, end_pfn << PAGE_SHIFT);
+ }
+
+-static void set_idt(void *newidt, u16 limit)
+-{
+- struct desc_ptr curidt;
+-
+- /* x86-64 supports unaliged loads & stores */
+- curidt.size = limit;
+- curidt.address = (unsigned long)newidt;
+-
+- __asm__ __volatile__ (
+- "lidtq %0\n"
+- : : "m" (curidt)
+- );
+-};
+-
+-
+-static void set_gdt(void *newgdt, u16 limit)
+-{
+- struct desc_ptr curgdt;
+-
+- /* x86-64 supports unaligned loads & stores */
+- curgdt.size = limit;
+- curgdt.address = (unsigned long)newgdt;
+-
+- __asm__ __volatile__ (
+- "lgdtq %0\n"
+- : : "m" (curgdt)
+- );
+-};
+-
+-static void load_segments(void)
+-{
+- __asm__ __volatile__ (
+- "\tmovl %0,%%ds\n"
+- "\tmovl %0,%%es\n"
+- "\tmovl %0,%%ss\n"
+- "\tmovl %0,%%fs\n"
+- "\tmovl %0,%%gs\n"
+- : : "a" (__KERNEL_DS) : "memory"
+- );
+-}
+-
+ int machine_kexec_prepare(struct kimage *image)
+ {
+ unsigned long start_pgtable;
+@@ -209,23 +168,6 @@ NORET_TYPE void machine_kexec(struct kim
+ page_list[PA_TABLE_PAGE] =
+ (unsigned long)__pa(page_address(image->control_code_page));
+
+- /* The segment registers are funny things, they have both a
+- * visible and an invisible part. Whenever the visible part is
+- * set to a specific selector, the invisible part is loaded
+- * with from a table in memory. At no other time is the
+- * descriptor table in memory accessed.
+- *
+- * I take advantage of this here by force loading the
+- * segments, before I zap the gdt with an invalid value.
+- */
+- load_segments();
+- /* The gdt & idt are now invalid.
+- * If you want to load them you must set up your own idt & gdt.
+- */
+- set_gdt(phys_to_virt(0),0);
+- set_idt(phys_to_virt(0),0);
+-
+- /* now call it */
+ relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
+ image->start);
+ }
+--- 0002/arch/x86_64/kernel/relocate_kernel.S
++++ work/arch/x86_64/kernel/relocate_kernel.S 2006-10-05 16:18:07.000000000
+0900
+@@ -159,13 +159,39 @@ relocate_new_kernel:
+ movq PTR(PA_PGD)(%rsi), %r9
+ movq %r9, %cr3
+
++ /* setup idt */
++ movq %r8, %rax
++ addq $(idt_80 - relocate_kernel), %rax
++ lidtq (%rax)
++
++ /* setup gdt */
++ movq %r8, %rax
++ addq $(gdt - relocate_kernel), %rax
++ movq %r8, %r9
++ addq $((gdt_80 - relocate_kernel) + 2), %r9
++ movq %rax, (%r9)
++
++ movq %r8, %rax
++ addq $(gdt_80 - relocate_kernel), %rax
++ lgdtq (%rax)
++
++ /* setup data segment registers */
++ xorl %eax, %eax
++ movl %eax, %ds
++ movl %eax, %es
++ movl %eax, %fs
++ movl %eax, %gs
++ movl %eax, %ss
++
+ /* setup a new stack at the end of the physical control page */
+ lea 4096(%r8), %rsp
+
+- /* jump to identity mapped page */
+- addq $(identity_mapped - relocate_kernel), %r8
+- pushq %r8
+- ret
++ /* load new code segment and jump to identity mapped page */
++ movq %r8, %rax
++ addq $(identity_mapped - relocate_kernel), %rax
++ pushq $(gdt_cs - gdt)
++ pushq %rax
++ lretq
+
+ identity_mapped:
+ /* store the start address on the stack */
+@@ -272,5 +298,19 @@ identity_mapped:
+ xorq %r13, %r13
+ xorq %r14, %r14
+ xorq %r15, %r15
+-
+ ret
++
++ .align 16
++gdt:
++ .quad 0x0000000000000000 /* NULL descriptor */
++gdt_cs:
++ .quad 0x00af9a000000ffff
++gdt_end:
++
++gdt_80:
++ .word gdt_end - gdt - 1 /* limit */
++ .quad 0 /* base - filled in by code above */
++
++idt_80:
++ .word 0 /* limit */
++ .quad 0 /* base */
diff -r 88935ae47fa9 -r 5d6be0099bdf
patches/linux-2.6.16.33/linux-2.6.19-rc1-kexec-xen-i386.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.16.33/linux-2.6.19-rc1-kexec-xen-i386.patch Thu Nov
30 13:05:27 2006 +0000
@@ -0,0 +1,108 @@
+--- 0001/arch/i386/kernel/crash.c
++++ work/arch/i386/kernel/crash.c
+@@ -90,6 +90,7 @@ static void crash_save_self(struct pt_re
+ crash_save_this_cpu(regs, cpu);
+ }
+
++#ifndef CONFIG_XEN
+ #ifdef CONFIG_SMP
+ static atomic_t waiting_for_crash_ipi;
+
+@@ -158,6 +159,7 @@ static void nmi_shootdown_cpus(void)
+ /* There are no cpus to shootdown */
+ }
+ #endif
++#endif /* CONFIG_XEN */
+
+ void machine_crash_shutdown(struct pt_regs *regs)
+ {
+@@ -174,10 +176,12 @@ void machine_crash_shutdown(struct pt_re
+
+ /* Make a note of crashing cpu. Will be used in NMI callback.*/
+ crashing_cpu = smp_processor_id();
++#ifndef CONFIG_XEN
+ nmi_shootdown_cpus();
+ lapic_shutdown();
+ #if defined(CONFIG_X86_IO_APIC)
+ disable_IO_APIC();
+ #endif
++#endif /* CONFIG_XEN */
+ crash_save_self(regs);
+ }
+--- 0007/arch/i386/kernel/machine_kexec.c
++++ work/arch/i386/kernel/machine_kexec.c
+@@ -19,6 +19,10 @@
+ #include <asm/desc.h>
+ #include <asm/system.h>
+
++#ifdef CONFIG_XEN
++#include <xen/interface/kexec.h>
++#endif
++
+ #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
+ static u32 kexec_pgd[1024] PAGE_ALIGNED;
+ #ifdef CONFIG_X86_PAE
+@@ -28,6 +32,40 @@ static u32 kexec_pmd1[1024] PAGE_ALIGNED
+ static u32 kexec_pte0[1024] PAGE_ALIGNED;
+ static u32 kexec_pte1[1024] PAGE_ALIGNED;
+
++#ifdef CONFIG_XEN
++
++#define __ma(x) (pfn_to_mfn(__pa((x)) >> PAGE_SHIFT) << PAGE_SHIFT)
++
++#if PAGES_NR > KEXEC_XEN_NO_PAGES
++#error PAGES_NR is greater than KEXEC_XEN_NO_PAGES - Xen support will break
++#endif
++
++#if PA_CONTROL_PAGE != 0
++#error PA_CONTROL_PAGE is non zero - Xen support will break
++#endif
++
++void machine_kexec_setup_load_arg(xen_kexec_image_t *xki, struct kimage
*image)
++{
++ void *control_page;
++
++ memset(xki->page_list, 0, sizeof(xki->page_list));
++
++ control_page = page_address(image->control_code_page);
++ memcpy(control_page, relocate_kernel, PAGE_SIZE);
++
++ xki->page_list[PA_CONTROL_PAGE] = __ma(control_page);
++ xki->page_list[PA_PGD] = __ma(kexec_pgd);
++#ifdef CONFIG_X86_PAE
++ xki->page_list[PA_PMD_0] = __ma(kexec_pmd0);
++ xki->page_list[PA_PMD_1] = __ma(kexec_pmd1);
++#endif
++ xki->page_list[PA_PTE_0] = __ma(kexec_pte0);
++ xki->page_list[PA_PTE_1] = __ma(kexec_pte1);
++
++}
++
++#endif /* CONFIG_XEN */
++
+ /*
+ * A architecture hook called to validate the
+ * proposed image and prepare the control pages
+--- 0006/include/asm-i386/kexec.h
++++ work/include/asm-i386/kexec.h
+@@ -98,6 +98,20 @@ relocate_kernel(unsigned long indirectio
+ unsigned long start_address,
+ unsigned int has_pae) ATTRIB_NORET;
+
++
++/* Under Xen we need to work with machine addresses. These macros give the
++ * machine address of a certain page to the generic kexec code instead of
++ * the pseudo physical address which would be given by the default macros.
++ */
++
++#ifdef CONFIG_XEN
++#define KEXEC_ARCH_HAS_PAGE_MACROS
++#define kexec_page_to_pfn(page) pfn_to_mfn(page_to_pfn(page))
++#define kexec_pfn_to_page(pfn) pfn_to_page(mfn_to_pfn(pfn))
++#define kexec_virt_to_phys(addr) virt_to_machine(addr)
++#define kexec_phys_to_virt(addr) phys_to_virt(machine_to_phys(addr))
++#endif
++
+ #endif /* __ASSEMBLY__ */
+
+ #endif /* _I386_KEXEC_H */
diff -r 88935ae47fa9 -r 5d6be0099bdf
patches/linux-2.6.16.33/linux-2.6.19-rc1-kexec-xen-x86_64.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.16.33/linux-2.6.19-rc1-kexec-xen-x86_64.patch Thu Nov
30 13:05:27 2006 +0000
@@ -0,0 +1,219 @@
+--- 0001/arch/x86_64/kernel/crash.c
++++ work/arch/x86_64/kernel/crash.c
+@@ -92,6 +92,7 @@ static void crash_save_self(struct pt_re
+ crash_save_this_cpu(regs, cpu);
+ }
+
++#ifndef CONFIG_XEN
+ #ifdef CONFIG_SMP
+ static atomic_t waiting_for_crash_ipi;
+
+@@ -156,6 +157,7 @@ static void nmi_shootdown_cpus(void)
+ /* There are no cpus to shootdown */
+ }
+ #endif
++#endif /* CONFIG_XEN */
+
+ void machine_crash_shutdown(struct pt_regs *regs)
+ {
+@@ -173,6 +175,8 @@ void machine_crash_shutdown(struct pt_re
+
+ /* Make a note of crashing cpu. Will be used in NMI callback.*/
+ crashing_cpu = smp_processor_id();
++
++#ifndef CONFIG_XEN
+ nmi_shootdown_cpus();
+
+ if(cpu_has_apic)
+@@ -181,6 +185,6 @@ void machine_crash_shutdown(struct pt_re
+ #if defined(CONFIG_X86_IO_APIC)
+ disable_IO_APIC();
+ #endif
+-
++#endif /* CONFIG_XEN */
+ crash_save_self(regs);
+ }
+--- 0010/arch/x86_64/kernel/machine_kexec.c
++++ work/arch/x86_64/kernel/machine_kexec.c
+@@ -24,6 +24,104 @@ static u64 kexec_pud1[512] PAGE_ALIGNED;
+ static u64 kexec_pmd1[512] PAGE_ALIGNED;
+ static u64 kexec_pte1[512] PAGE_ALIGNED;
+
++#ifdef CONFIG_XEN
++
++/* In the case of Xen, override hypervisor functions to be able to create
++ * a regular identity mapping page table...
++ */
++
++#include <xen/interface/kexec.h>
++#include <xen/interface/memory.h>
++
++#define x__pmd(x) ((pmd_t) { (x) } )
++#define x__pud(x) ((pud_t) { (x) } )
++#define x__pgd(x) ((pgd_t) { (x) } )
++
++#define x_pmd_val(x) ((x).pmd)
++#define x_pud_val(x) ((x).pud)
++#define x_pgd_val(x) ((x).pgd)
++
++static inline void x_set_pmd(pmd_t *dst, pmd_t val)
++{
++ x_pmd_val(*dst) = x_pmd_val(val);
++}
++
++static inline void x_set_pud(pud_t *dst, pud_t val)
++{
++ x_pud_val(*dst) = phys_to_machine(x_pud_val(val));
++}
++
++static inline void x_pud_clear (pud_t *pud)
++{
++ x_pud_val(*pud) = 0;
++}
++
++static inline void x_set_pgd(pgd_t *dst, pgd_t val)
++{
++ x_pgd_val(*dst) = phys_to_machine(x_pgd_val(val));
++}
++
++static inline void x_pgd_clear (pgd_t * pgd)
++{
++ x_pgd_val(*pgd) = 0;
++}
++
++#define X__PAGE_KERNEL_LARGE_EXEC \
++ _PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_PSE
++#define X_KERNPG_TABLE _PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY
++
++#define __ma(x) (pfn_to_mfn(__pa((x)) >> PAGE_SHIFT) << PAGE_SHIFT)
++
++#if PAGES_NR > KEXEC_XEN_NO_PAGES
++#error PAGES_NR is greater than KEXEC_XEN_NO_PAGES - Xen support will break
++#endif
++
++#if PA_CONTROL_PAGE != 0
++#error PA_CONTROL_PAGE is non zero - Xen support will break
++#endif
++
++void machine_kexec_setup_load_arg(xen_kexec_image_t *xki, struct kimage
*image)
++{
++ void *control_page;
++ void *table_page;
++
++ memset(xki->page_list, 0, sizeof(xki->page_list));
++
++ control_page = page_address(image->control_code_page) + PAGE_SIZE;
++ memcpy(control_page, relocate_kernel, PAGE_SIZE);
++
++ table_page = page_address(image->control_code_page);
++
++ xki->page_list[PA_CONTROL_PAGE] = __ma(control_page);
++ xki->page_list[PA_TABLE_PAGE] = __ma(table_page);
++
++ xki->page_list[PA_PGD] = __ma(kexec_pgd);
++ xki->page_list[PA_PUD_0] = __ma(kexec_pud0);
++ xki->page_list[PA_PUD_1] = __ma(kexec_pud1);
++ xki->page_list[PA_PMD_0] = __ma(kexec_pmd0);
++ xki->page_list[PA_PMD_1] = __ma(kexec_pmd1);
++ xki->page_list[PA_PTE_0] = __ma(kexec_pte0);
++ xki->page_list[PA_PTE_1] = __ma(kexec_pte1);
++}
++
++#else /* CONFIG_XEN */
++
++#define x__pmd(x) __pmd(x)
++#define x__pud(x) __pud(x)
++#define x__pgd(x) __pgd(x)
++
++#define x_set_pmd(x, y) set_pmd(x, y)
++#define x_set_pud(x, y) set_pud(x, y)
++#define x_set_pgd(x, y) set_pgd(x, y)
++
++#define x_pud_clear(x) pud_clear(x)
++#define x_pgd_clear(x) pgd_clear(x)
++
++#define X__PAGE_KERNEL_LARGE_EXEC __PAGE_KERNEL_LARGE_EXEC
++#define X_KERNPG_TABLE _KERNPG_TABLE
++
++#endif /* CONFIG_XEN */
++
+ static void init_level2_page(pmd_t *level2p, unsigned long addr)
+ {
+ unsigned long end_addr;
+@@ -31,7 +129,7 @@ static void init_level2_page(pmd_t *leve
+ addr &= PAGE_MASK;
+ end_addr = addr + PUD_SIZE;
+ while (addr < end_addr) {
+- set_pmd(level2p++, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
++ x_set_pmd(level2p++, x__pmd(addr | X__PAGE_KERNEL_LARGE_EXEC));
+ addr += PMD_SIZE;
+ }
+ }
+@@ -56,12 +154,12 @@ static int init_level3_page(struct kimag
+ }
+ level2p = (pmd_t *)page_address(page);
+ init_level2_page(level2p, addr);
+- set_pud(level3p++, __pud(__pa(level2p) | _KERNPG_TABLE));
++ x_set_pud(level3p++, x__pud(__pa(level2p) | X_KERNPG_TABLE));
+ addr += PUD_SIZE;
+ }
+ /* clear the unused entries */
+ while (addr < end_addr) {
+- pud_clear(level3p++);
++ x_pud_clear(level3p++);
+ addr += PUD_SIZE;
+ }
+ out:
+@@ -92,12 +190,12 @@ static int init_level4_page(struct kimag
+ if (result) {
+ goto out;
+ }
+- set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE));
++ x_set_pgd(level4p++, x__pgd(__pa(level3p) | X_KERNPG_TABLE));
+ addr += PGDIR_SIZE;
+ }
+ /* clear the unused entries */
+ while (addr < end_addr) {
+- pgd_clear(level4p++);
++ x_pgd_clear(level4p++);
+ addr += PGDIR_SIZE;
+ }
+ out:
+@@ -108,8 +206,14 @@ out:
+ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
+ {
+ pgd_t *level4p;
++ unsigned long x_end_pfn = end_pfn;
++
++#ifdef CONFIG_XEN
++ x_end_pfn = HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL);
++#endif
++
+ level4p = (pgd_t *)__va(start_pgtable);
+- return init_level4_page(image, level4p, 0, end_pfn << PAGE_SHIFT);
++ return init_level4_page(image, level4p, 0, x_end_pfn << PAGE_SHIFT);
+ }
+
+ int machine_kexec_prepare(struct kimage *image)
+--- 0009/include/asm-x86_64/kexec.h
++++ work/include/asm-x86_64/kexec.h
+@@ -91,6 +91,19 @@ relocate_kernel(unsigned long indirectio
+ unsigned long page_list,
+ unsigned long start_address) ATTRIB_NORET;
+
++/* Under Xen we need to work with machine addresses. These macros give the
++ * machine address of a certain page to the generic kexec code instead of
++ * the pseudo physical address which would be given by the default macros.
++ */
++
++#ifdef CONFIG_XEN
++#define KEXEC_ARCH_HAS_PAGE_MACROS
++#define kexec_page_to_pfn(page) pfn_to_mfn(page_to_pfn(page))
++#define kexec_pfn_to_page(pfn) pfn_to_page(mfn_to_pfn(pfn))
++#define kexec_virt_to_phys(addr) virt_to_machine(addr)
++#define kexec_phys_to_virt(addr) phys_to_virt(machine_to_phys(addr))
++#endif
++
+ #endif /* __ASSEMBLY__ */
+
+ #endif /* _X86_64_KEXEC_H */
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/arch/ia64/xen/crash.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/xen/crash.c Thu Nov 30 13:05:27 2006 +0000
@@ -0,0 +1,19 @@
+#include <xen/lib.h> /* for printk() used in stub */
+#include <xen/types.h>
+#include <public/kexec.h>
+
+void machine_crash_shutdown(void)
+{
+ printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/arch/ia64/xen/machine_kexec.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/xen/machine_kexec.c Thu Nov 30 13:05:27 2006 +0000
@@ -0,0 +1,34 @@
+#include <xen/lib.h> /* for printk() used in stubs */
+#include <xen/types.h>
+#include <public/kexec.h>
+
+int machine_kexec_load(int type, int slot, xen_kexec_image_t *image)
+{
+ printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
+ return -1;
+}
+
+void machine_kexec_unload(int type, int slot, xen_kexec_image_t *image)
+{
+ printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
+}
+
+void machine_kexec(xen_kexec_image_t *image)
+{
+ printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
+}
+
+void machine_shutdown(xen_kexec_image_t *image)
+{
+ printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/arch/powerpc/crash.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/powerpc/crash.c Thu Nov 30 13:05:27 2006 +0000
@@ -0,0 +1,19 @@
+#include <xen/lib.h> /* for printk() used in stub */
+#include <xen/types.h>
+#include <public/kexec.h>
+
+void machine_crash_shutdown(void)
+{
+ printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/arch/powerpc/machine_kexec.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/powerpc/machine_kexec.c Thu Nov 30 13:05:27 2006 +0000
@@ -0,0 +1,34 @@
+#include <xen/lib.h> /* for printk() used in stubs */
+#include <xen/types.h>
+#include <public/kexec.h>
+
+int machine_kexec_load(int type, int slot, xen_kexec_image_t *image)
+{
+ printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
+ return -1;
+}
+
+void machine_kexec_unload(int type, int slot, xen_kexec_image_t *image)
+{
+ printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
+}
+
+void machine_kexec(xen_kexec_image_t *image)
+{
+ printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
+}
+
+void machine_shutdown(xen_kexec_image_t *image)
+{
+ printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/arch/x86/crash.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/crash.c Thu Nov 30 13:05:27 2006 +0000
@@ -0,0 +1,128 @@
+/******************************************************************************
+ * crash.c
+ *
+ * Based heavily on arch/i386/kernel/crash.c from Linux 2.6.16
+ *
+ * Xen port written by:
+ * - Simon 'Horms' Horman <horms@xxxxxxxxxxxx>
+ * - Magnus Damm <magnus@xxxxxxxxxxxxx>
+ */
+
+#include <asm/atomic.h>
+#include <asm/elf.h>
+#include <asm/percpu.h>
+#include <asm/kexec.h>
+#include <xen/types.h>
+#include <xen/irq.h>
+#include <asm/ipi.h>
+#include <asm/nmi.h>
+#include <xen/string.h>
+#include <xen/elf.h>
+#include <xen/elfcore.h>
+#include <xen/smp.h>
+#include <xen/delay.h>
+#include <xen/perfc.h>
+#include <xen/kexec.h>
+#include <xen/sched.h>
+#include <public/xen.h>
+#include <asm/hvm/hvm.h>
+
+#ifdef CONFIG_SMP
+static atomic_t waiting_for_crash_ipi;
+
+static int crash_nmi_callback(struct cpu_user_regs *regs, int cpu)
+{
+ /* Don't do anything if this handler is invoked on crashing cpu.
+ * Otherwise, system will completely hang. Crashing cpu can get
+ * an NMI if system was initially booted with nmi_watchdog parameter.
+ */
+ if ( cpu == crashing_cpu )
+ return 1;
+ local_irq_disable();
+
+ machine_crash_save_cpu();
+ disable_local_APIC();
+ atomic_dec(&waiting_for_crash_ipi);
+ hvm_disable();
+
+ for ( ; ; )
+ __asm__ __volatile__ ( "hlt" );
+
+ return 1;
+}
+
+/*
+ * By using the NMI code instead of a vector we just sneak thru the
+ * word generator coming out with just what we want. AND it does
+ * not matter if clustered_apic_mode is set or not.
+ */
+static void smp_send_nmi_allbutself(void)
+{
+ cpumask_t allbutself = cpu_online_map;
+
+ cpu_clear(smp_processor_id(), allbutself);
+ send_IPI_mask(allbutself, APIC_DM_NMI);
+}
+
+static void nmi_shootdown_cpus(void)
+{
+ unsigned long msecs;
+
+ atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
+ /* Would it be better to replace the trap vector here? */
+ set_nmi_callback(crash_nmi_callback);
+ /* Ensure the new callback function is set before sending
+ * out the NMI
+ */
+ wmb();
+
+ smp_send_nmi_allbutself();
+
+ msecs = 1000; /* Wait at most a second for the other cpus to stop */
+ while ( (atomic_read(&waiting_for_crash_ipi) > 0) && msecs )
+ {
+ mdelay(1);
+ msecs--;
+ }
+
+ /* Leave the nmi callback set */
+ disable_local_APIC();
+}
+#endif
+
+static void crash_save_xen_notes(void)
+{
+ crash_xen_info_t *info;
+
+ info = machine_crash_save_info();
+
+ info->dom0_pfn_to_mfn_frame_list_list = \
+ dom0->shared_info->arch.pfn_to_mfn_frame_list_list;
+}
+
+void machine_crash_shutdown(void)
+{
+ printk("machine_crash_shutdown: %d\n", smp_processor_id());
+ local_irq_disable();
+
+#ifdef CONFIG_SMP
+ nmi_shootdown_cpus();
+#endif
+
+#ifdef CONFIG_X86_IO_APIC
+ disable_IO_APIC();
+#endif
+ hvm_disable();
+
+ crash_save_xen_notes();
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/arch/x86/machine_kexec.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/machine_kexec.c Thu Nov 30 13:05:27 2006 +0000
@@ -0,0 +1,105 @@
+/******************************************************************************
+ * machine_kexec.c
+ *
+ * Xen port written by:
+ * - Simon 'Horms' Horman <horms@xxxxxxxxxxxx>
+ * - Magnus Damm <magnus@xxxxxxxxxxxxx>
+ */
+
+#include <xen/lib.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/flushtlb.h>
+#include <xen/smp.h>
+#include <xen/nmi.h>
+#include <xen/types.h>
+#include <xen/console.h>
+#include <xen/kexec.h>
+#include <asm/kexec.h>
+#include <xen/domain_page.h>
+#include <asm/fixmap.h>
+#include <asm/hvm/hvm.h>
+
+int machine_kexec_load(int type, int slot, xen_kexec_image_t *image)
+{
+ unsigned long prev_ma = 0;
+ int fix_base = FIX_KEXEC_BASE_0 + (slot * (KEXEC_XEN_NO_PAGES >> 1));
+ int k;
+
+ /* setup fixmap to point to our pages and record the virtual address
+ * in every odd index in page_list[].
+ */
+
+ for ( k = 0; k < KEXEC_XEN_NO_PAGES; k++ )
+ {
+ if ( (k & 1) == 0 )
+ {
+ /* Even pages: machine address. */
+ prev_ma = image->page_list[k];
+ }
+ else
+ {
+ /* Odd pages: va for previous ma. */
+ set_fixmap(fix_base + (k >> 1), prev_ma);
+ image->page_list[k] = fix_to_virt(fix_base + (k >> 1));
+ }
+ }
+
+ return 0;
+}
+
+void machine_kexec_unload(int type, int slot, xen_kexec_image_t *image)
+{
+}
+
+static void __machine_shutdown(void *data)
+{
+ xen_kexec_image_t *image = (xen_kexec_image_t *)data;
+
+ watchdog_disable();
+ console_start_sync();
+
+ smp_send_stop();
+
+#ifdef CONFIG_X86_IO_APIC
+ disable_IO_APIC();
+#endif
+ hvm_disable();
+
+ machine_kexec(image);
+}
+
+void machine_shutdown(xen_kexec_image_t *image)
+{
+ int reboot_cpu_id;
+ cpumask_t reboot_cpu;
+
+ reboot_cpu_id = 0;
+
+ if ( !cpu_isset(reboot_cpu_id, cpu_online_map) )
+ reboot_cpu_id = smp_processor_id();
+
+ if ( reboot_cpu_id != smp_processor_id() )
+ {
+ cpus_clear(reboot_cpu);
+ cpu_set(reboot_cpu_id, reboot_cpu);
+ on_selected_cpus(reboot_cpu, __machine_shutdown, image, 1, 0);
+ for (;;)
+ ; /* nothing */
+ }
+ else
+ {
+ __machine_shutdown(image);
+ }
+ BUG();
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/common/kexec.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/common/kexec.c Thu Nov 30 13:05:27 2006 +0000
@@ -0,0 +1,359 @@
+/******************************************************************************
+ * kexec.c - Achitecture independent kexec code for Xen
+ *
+ * Xen port written by:
+ * - Simon 'Horms' Horman <horms@xxxxxxxxxxxx>
+ * - Magnus Damm <magnus@xxxxxxxxxxxxx>
+ */
+
+#include <asm/kexec.h>
+#include <xen/lib.h>
+#include <xen/ctype.h>
+#include <xen/errno.h>
+#include <xen/guest_access.h>
+#include <xen/sched.h>
+#include <xen/types.h>
+#include <xen/kexec.h>
+#include <xen/keyhandler.h>
+#include <public/kexec.h>
+#include <xen/cpumask.h>
+#include <asm/atomic.h>
+#include <xen/spinlock.h>
+#include <xen/version.h>
+#include <public/elfnote.h>
+
+static char opt_crashkernel[32] = "";
+string_param("crashkernel", opt_crashkernel);
+
+DEFINE_PER_CPU (crash_note_t, crash_notes);
+cpumask_t crash_saved_cpus;
+int crashing_cpu;
+
+xen_kexec_image_t kexec_image[KEXEC_IMAGE_NR];
+
+#define KEXEC_FLAG_DEFAULT_POS (KEXEC_IMAGE_NR + 0)
+#define KEXEC_FLAG_CRASH_POS (KEXEC_IMAGE_NR + 1)
+#define KEXEC_FLAG_IN_PROGRESS (KEXEC_IMAGE_NR + 2)
+
+unsigned long kexec_flags = 0; /* the lowest bits are for KEXEC_IMAGE... */
+
+spinlock_t kexec_lock = SPIN_LOCK_UNLOCKED;
+
+static void one_cpu_only(void)
+{
+ /* Only allow the first cpu to continue - force other cpus to spin */
+ if ( test_and_set_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags) )
+ {
+ while (1);
+ }
+}
+
+/* Save the registers in the per-cpu crash note buffer */
+
+void machine_crash_save_cpu(void)
+{
+ int cpu = smp_processor_id();
+ crash_note_t *cntp;
+
+ if ( !cpu_test_and_set(cpu, crash_saved_cpus) )
+ {
+ cntp = &per_cpu(crash_notes, cpu);
+ elf_core_save_regs(&cntp->core.desc.desc.pr_reg,
+ &cntp->xen_regs.desc.desc);
+
+ /* setup crash "CORE" note */
+ setup_crash_note(cntp, core, CORE_STR, CORE_STR_LEN, NT_PRSTATUS);
+
+ /* setup crash note "Xen", XEN_ELFNOTE_CRASH_REGS */
+ setup_crash_note(cntp, xen_regs, XEN_STR, XEN_STR_LEN,
+ XEN_ELFNOTE_CRASH_REGS);
+ }
+}
+
+/* Setup the single Xen specific info crash note */
+
+crash_xen_info_t *machine_crash_save_info(void)
+{
+ int cpu = smp_processor_id();
+ crash_note_t *cntp;
+ crash_xen_info_t *info;
+
+ BUG_ON(!cpu_test_and_set(cpu, crash_saved_cpus));
+
+ cntp = &per_cpu(crash_notes, cpu);
+
+ /* setup crash note "Xen", XEN_ELFNOTE_CRASH_INFO */
+ setup_crash_note(cntp, xen_info, XEN_STR, XEN_STR_LEN,
+ XEN_ELFNOTE_CRASH_INFO);
+
+ info = &cntp->xen_info.desc.desc;
+
+ info->xen_major_version = xen_major_version();
+ info->xen_minor_version = xen_minor_version();
+ info->xen_extra_version = __pa(xen_extra_version());
+ info->xen_changeset = __pa(xen_changeset());
+ info->xen_compiler = __pa(xen_compiler());
+ info->xen_compile_date = __pa(xen_compile_date());
+ info->xen_compile_time = __pa(xen_compile_time());
+ info->tainted = tainted;
+
+ return info;
+}
+
+void machine_crash_kexec(void)
+{
+ int pos;
+ xen_kexec_image_t *image;
+
+ one_cpu_only();
+
+ machine_crash_save_cpu();
+ crashing_cpu = smp_processor_id();
+
+ machine_crash_shutdown();
+
+ pos = (test_bit(KEXEC_FLAG_CRASH_POS, &kexec_flags) != 0);
+
+ if ( test_bit(KEXEC_IMAGE_CRASH_BASE + pos, &kexec_flags) )
+ {
+ image = &kexec_image[KEXEC_IMAGE_CRASH_BASE + pos];
+ machine_kexec(image); /* Does not return */
+ }
+
+ while (1); /* No image available - just spin */
+}
+
+static void do_crashdump_trigger(unsigned char key)
+{
+ printk("triggering crashdump\n");
+ machine_crash_kexec();
+}
+
+static __init int register_crashdump_trigger(void)
+{
+ register_keyhandler('c', do_crashdump_trigger, "trigger a crashdump");
+ return 0;
+}
+__initcall(register_crashdump_trigger);
+
+void machine_kexec_reserved(xen_kexec_reserve_t *reservation)
+{
+ unsigned long start, size;
+ char *str = opt_crashkernel;
+
+ memset(reservation, 0, sizeof(*reservation));
+
+ size = parse_size_and_unit(str, &str);
+ if ( *str == '@' )
+ start = parse_size_and_unit(str+1, NULL);
+ else
+ start = 0;
+
+ if ( start && size )
+ {
+ reservation->start = start;
+ reservation->size = size;
+ }
+}
+
+static int kexec_get_reserve(xen_kexec_range_t *range)
+{
+ xen_kexec_reserve_t reservation;
+
+ machine_kexec_reserved(&reservation);
+
+ range->start = reservation.start;
+ range->size = reservation.size;
+ return 0;
+}
+
+extern unsigned long _text, _end;
+
+static int kexec_get_xen(xen_kexec_range_t *range, int get_ma)
+{
+ if ( get_ma )
+ range->start = virt_to_maddr(&_text);
+ else
+ range->start = (unsigned long) &_text;
+
+ range->size = &_end - &_text;
+ return 0;
+}
+
+static int kexec_get_cpu(xen_kexec_range_t *range)
+{
+ if ( range->nr < 0 || range->nr >= num_present_cpus() )
+ return -EINVAL;
+
+ range->start = __pa((unsigned long)&per_cpu(crash_notes, range->nr));
+ range->size = sizeof(crash_note_t);
+ return 0;
+}
+
+static int kexec_get_range(XEN_GUEST_HANDLE(void) uarg)
+{
+ xen_kexec_range_t range;
+ int ret = -EINVAL;
+
+ if ( unlikely(copy_from_guest(&range, uarg, 1)) )
+ return -EFAULT;
+
+ switch ( range.range )
+ {
+ case KEXEC_RANGE_MA_CRASH:
+ ret = kexec_get_reserve(&range);
+ break;
+ case KEXEC_RANGE_MA_XEN:
+ ret = kexec_get_xen(&range, 1);
+ break;
+ case KEXEC_RANGE_VA_XEN:
+ ret = kexec_get_xen(&range, 0);
+ break;
+ case KEXEC_RANGE_MA_CPU:
+ ret = kexec_get_cpu(&range);
+ break;
+ }
+
+ if ( ret == 0 && unlikely(copy_to_guest(uarg, &range, 1)) )
+ return -EFAULT;
+
+ return ret;
+}
+
+static int kexec_load_get_bits(int type, int *base, int *bit)
+{
+ switch ( type )
+ {
+ case KEXEC_TYPE_DEFAULT:
+ *base = KEXEC_IMAGE_DEFAULT_BASE;
+ *bit = KEXEC_FLAG_DEFAULT_POS;
+ break;
+ case KEXEC_TYPE_CRASH:
+ *base = KEXEC_IMAGE_CRASH_BASE;
+ *bit = KEXEC_FLAG_CRASH_POS;
+ break;
+ default:
+ return -1;
+ }
+ return 0;
+}
+
+static int kexec_load_unload(unsigned long op, XEN_GUEST_HANDLE(void) uarg)
+{
+ xen_kexec_load_t load;
+ xen_kexec_image_t *image;
+ int base, bit, pos;
+ int ret = 0;
+
+ if ( unlikely(copy_from_guest(&load, uarg, 1)) )
+ return -EFAULT;
+
+ if ( kexec_load_get_bits(load.type, &base, &bit) )
+ return -EINVAL;
+
+ pos = (test_bit(bit, &kexec_flags) != 0);
+
+ /* Load the user data into an unused image */
+ if ( op == KEXEC_CMD_kexec_load )
+ {
+ image = &kexec_image[base + !pos];
+
+ BUG_ON(test_bit((base + !pos), &kexec_flags)); /* must be free */
+
+ memcpy(image, &load.image, sizeof(*image));
+
+ if ( !(ret = machine_kexec_load(load.type, base + !pos, image)) )
+ {
+ /* Set image present bit */
+ set_bit((base + !pos), &kexec_flags);
+
+ /* Make new image the active one */
+ change_bit(bit, &kexec_flags);
+ }
+ }
+
+ /* Unload the old image if present and load successful */
+ if ( ret == 0 && !test_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags) )
+ {
+ if ( test_and_clear_bit((base + pos), &kexec_flags) )
+ {
+ image = &kexec_image[base + pos];
+ machine_kexec_unload(load.type, base + pos, image);
+ }
+ }
+
+ return ret;
+}
+
+static int kexec_exec(XEN_GUEST_HANDLE(void) uarg)
+{
+ xen_kexec_exec_t exec;
+ xen_kexec_image_t *image;
+ int base, bit, pos;
+
+ if ( unlikely(copy_from_guest(&exec, uarg, 1)) )
+ return -EFAULT;
+
+ if ( kexec_load_get_bits(exec.type, &base, &bit) )
+ return -EINVAL;
+
+ pos = (test_bit(bit, &kexec_flags) != 0);
+
+ /* Only allow kexec/kdump into loaded images */
+ if ( !test_bit(base + pos, &kexec_flags) )
+ return -ENOENT;
+
+ switch (exec.type)
+ {
+ case KEXEC_TYPE_DEFAULT:
+ image = &kexec_image[base + pos];
+ one_cpu_only();
+ machine_shutdown(image); /* Does not return */
+ break;
+ case KEXEC_TYPE_CRASH:
+ machine_crash_kexec(); /* Does not return */
+ break;
+ }
+
+ return -EINVAL; /* never reached */
+}
+
+long do_kexec_op(unsigned long op, XEN_GUEST_HANDLE(void) uarg)
+{
+ unsigned long flags;
+ int ret = -EINVAL;
+
+ if ( !IS_PRIV(current->domain) )
+ return -EPERM;
+
+ switch ( op )
+ {
+ case KEXEC_CMD_kexec_get_range:
+ ret = kexec_get_range(uarg);
+ break;
+ case KEXEC_CMD_kexec_load:
+ case KEXEC_CMD_kexec_unload:
+ spin_lock_irqsave(&kexec_lock, flags);
+ if (!test_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags))
+ {
+ ret = kexec_load_unload(op, uarg);
+ }
+ spin_unlock_irqrestore(&kexec_lock, flags);
+ break;
+ case KEXEC_CMD_kexec:
+ ret = kexec_exec(uarg);
+ break;
+ }
+
+ return ret;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/include/asm-ia64/elf.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-ia64/elf.h Thu Nov 30 13:05:27 2006 +0000
@@ -0,0 +1,30 @@
+#ifndef __IA64_ELF_H__
+#define __IA64_ELF_H__
+
+#include <xen/lib.h> /* for printk() used in stub */
+
+typedef struct {
+ unsigned long dummy;
+} ELF_Gregset;
+
+typedef struct {
+ unsigned long dummy;
+} crash_xen_core_t;
+
+extern inline void elf_core_save_regs(ELF_Gregset *core_regs,
+ crash_xen_core_t *xen_core_regs)
+{
+ printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
+}
+
+#endif /* __IA64_ELF_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/include/asm-ia64/kexec.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-ia64/kexec.h Thu Nov 30 13:05:27 2006 +0000
@@ -0,0 +1,25 @@
+#ifndef __IA64_KEXEC_H__
+#define __IA64_KEXEC_H__
+
+#include <xen/lib.h> /* for printk() used in stub */
+#include <xen/types.h>
+#include <public/xen.h>
+#include <xen/kexec.h>
+
+static inline void machine_kexec(xen_kexec_image_t *image)
+{
+ printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
+}
+
+#endif /* __IA64_KEXEC_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/include/asm-powerpc/elf.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-powerpc/elf.h Thu Nov 30 13:05:27 2006 +0000
@@ -0,0 +1,30 @@
+#ifndef _ASM_ELF_H__
+#define _ASM_ELF_H__
+
+#include <xen/lib.h> /* for printk() used in stub */
+
+typedef struct {
+ unsigned long dummy;
+} ELF_Gregset;
+
+typedef struct {
+ unsigned long dummy;
+} crash_xen_core_t;
+
+extern inline void elf_core_save_regs(ELF_Gregset *core_regs,
+ crash_xen_core_t *xen_core_regs)
+{
+ printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
+}
+
+#endif /* _ASM_ELF_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/include/asm-powerpc/kexec.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-powerpc/kexec.h Thu Nov 30 13:05:27 2006 +0000
@@ -0,0 +1,25 @@
+#ifndef _ASM_KEXEC_H__
+#define _ASM_KEXEC_H__
+
+#include <xen/lib.h> /* for printk() used in stub */
+#include <xen/types.h>
+#include <public/xen.h>
+#include <xen/kexec.h>
+
+static inline void machine_kexec(xen_kexec_image_t *image)
+{
+ printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
+}
+
+#endif /* _ASM_KEXEC_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/include/asm-x86/elf.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/elf.h Thu Nov 30 13:05:27 2006 +0000
@@ -0,0 +1,24 @@
+#ifndef __X86_ELF_H__
+#define __X86_ELF_H__
+
+typedef struct {
+ unsigned long cr0, cr2, cr3, cr4;
+} crash_xen_core_t;
+
+#ifdef __x86_64__
+#include <asm/x86_64/elf.h>
+#else
+#include <asm/x86_32/elf.h>
+#endif
+
+#endif /* __X86_ELF_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/include/asm-x86/kexec.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/kexec.h Thu Nov 30 13:05:27 2006 +0000
@@ -0,0 +1,20 @@
+#ifndef __X86_KEXEC_H__
+#define __X86_KEXEC_H__
+
+#ifdef __x86_64__
+#include <asm/x86_64/kexec.h>
+#else
+#include <asm/x86_32/kexec.h>
+#endif
+
+#endif /* __X86_KEXEC_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/include/asm-x86/x86_32/elf.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/x86_32/elf.h Thu Nov 30 13:05:27 2006 +0000
@@ -0,0 +1,72 @@
+#ifndef __X86_32_ELF_H__
+#define __X86_32_ELF_H__
+
+#include <asm/processor.h>
+
+typedef struct {
+ unsigned long ebx;
+ unsigned long ecx;
+ unsigned long edx;
+ unsigned long esi;
+ unsigned long edi;
+ unsigned long ebp;
+ unsigned long eax;
+ unsigned long ds;
+ unsigned long es;
+ unsigned long fs;
+ unsigned long gs;
+ unsigned long orig_eax;
+ unsigned long eip;
+ unsigned long cs;
+ unsigned long eflags;
+ unsigned long esp;
+ unsigned long ss;
+} ELF_Gregset;
+
+extern inline void elf_core_save_regs(ELF_Gregset *core_regs,
+ crash_xen_core_t *xen_core_regs)
+{
+ unsigned long tmp;
+
+ asm volatile("movl %%ebx,%0" : "=m"(core_regs->ebx));
+ asm volatile("movl %%ecx,%0" : "=m"(core_regs->ecx));
+ asm volatile("movl %%edx,%0" : "=m"(core_regs->edx));
+ asm volatile("movl %%esi,%0" : "=m"(core_regs->esi));
+ asm volatile("movl %%edi,%0" : "=m"(core_regs->edi));
+ asm volatile("movl %%ebp,%0" : "=m"(core_regs->ebp));
+ asm volatile("movl %%eax,%0" : "=m"(core_regs->eax));
+ asm volatile("movw %%ds, %%ax;" :"=a"(core_regs->ds));
+ asm volatile("movw %%es, %%ax;" :"=a"(core_regs->es));
+ asm volatile("movw %%fs, %%ax;" :"=a"(core_regs->fs));
+ asm volatile("movw %%gs, %%ax;" :"=a"(core_regs->gs));
+ /* orig_eax not filled in for now */
+ core_regs->eip = (unsigned long)current_text_addr();
+ asm volatile("movw %%cs, %%ax;" :"=a"(core_regs->cs));
+ asm volatile("pushfl; popl %0" :"=m"(core_regs->eflags));
+ asm volatile("movl %%esp,%0" : "=m"(core_regs->esp));
+ asm volatile("movw %%ss, %%ax;" :"=a"(core_regs->ss));
+
+ asm volatile("mov %%cr0, %0" : "=r" (tmp) : );
+ xen_core_regs->cr0 = tmp;
+
+ asm volatile("mov %%cr2, %0" : "=r" (tmp) : );
+ xen_core_regs->cr2 = tmp;
+
+ asm volatile("mov %%cr3, %0" : "=r" (tmp) : );
+ xen_core_regs->cr3 = tmp;
+
+ asm volatile("mov %%cr4, %0" : "=r" (tmp) : );
+ xen_core_regs->cr4 = tmp;
+}
+
+#endif /* __X86_32_ELF_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/include/asm-x86/x86_32/kexec.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/x86_32/kexec.h Thu Nov 30 13:05:27 2006 +0000
@@ -0,0 +1,40 @@
+/******************************************************************************
+ * kexec.h
+ *
+ * Based heavily on machine_kexec.c and kexec.h from Linux 2.6.19-rc1
+ *
+ */
+
+#ifndef __X86_KEXEC_X86_32_H__
+#define __X86_KEXEC_X86_32_H__
+
+#include <xen/types.h>
+#include <xen/kexec.h>
+#include <asm/fixmap.h>
+
+typedef asmlinkage void (*relocate_new_kernel_t)(
+ unsigned long indirection_page,
+ unsigned long page_list,
+ unsigned long start_address,
+ unsigned int has_pae);
+
+static inline void machine_kexec(xen_kexec_image_t *image)
+{
+ relocate_new_kernel_t rnk;
+
+ rnk = (relocate_new_kernel_t) image->page_list[1];
+ (*rnk)(image->indirection_page, (unsigned long)image->page_list,
+ image->start_address, (unsigned long)cpu_has_pae);
+}
+
+#endif /* __X86_KEXEC_X86_32_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/include/asm-x86/x86_64/elf.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/x86_64/elf.h Thu Nov 30 13:05:27 2006 +0000
@@ -0,0 +1,92 @@
+#ifndef __X86_64_ELF_H__
+#define __X86_64_ELF_H__
+
+#include <asm/processor.h>
+
+typedef struct {
+ unsigned long r15;
+ unsigned long r14;
+ unsigned long r13;
+ unsigned long r12;
+ unsigned long rbp;
+ unsigned long rbx;
+ unsigned long r11;
+ unsigned long r10;
+ unsigned long r9;
+ unsigned long r8;
+ unsigned long rax;
+ unsigned long rcx;
+ unsigned long rdx;
+ unsigned long rsi;
+ unsigned long rdi;
+ unsigned long orig_rax;
+ unsigned long rip;
+ unsigned long cs;
+ unsigned long eflags;
+ unsigned long rsp;
+ unsigned long ss;
+ unsigned long thread_fs;
+ unsigned long thread_gs;
+ unsigned long ds;
+ unsigned long es;
+ unsigned long fs;
+ unsigned long gs;
+} ELF_Gregset;
+
+extern inline void elf_core_save_regs(ELF_Gregset *core_regs,
+ crash_xen_core_t *xen_core_regs)
+{
+ unsigned long tmp;
+
+ asm volatile("movq %%r15,%0" : "=m"(core_regs->r15));
+ asm volatile("movq %%r14,%0" : "=m"(core_regs->r14));
+ asm volatile("movq %%r13,%0" : "=m"(core_regs->r13));
+ asm volatile("movq %%r12,%0" : "=m"(core_regs->r12));
+ asm volatile("movq %%rbp,%0" : "=m"(core_regs->rbp));
+ asm volatile("movq %%rbx,%0" : "=m"(core_regs->rbx));
+ asm volatile("movq %%r11,%0" : "=m"(core_regs->r11));
+ asm volatile("movq %%r10,%0" : "=m"(core_regs->r10));
+ asm volatile("movq %%r9,%0" : "=m"(core_regs->r9));
+ asm volatile("movq %%r8,%0" : "=m"(core_regs->r8));
+ asm volatile("movq %%rax,%0" : "=m"(core_regs->rax));
+ asm volatile("movq %%rcx,%0" : "=m"(core_regs->rcx));
+ asm volatile("movq %%rdx,%0" : "=m"(core_regs->rdx));
+ asm volatile("movq %%rsi,%0" : "=m"(core_regs->rsi));
+ asm volatile("movq %%rdi,%0" : "=m"(core_regs->rdi));
+ /* orig_rax not filled in for now */
+ core_regs->rip = (unsigned long)current_text_addr();
+ asm volatile("movl %%cs, %%eax;" :"=a"(core_regs->cs));
+ asm volatile("pushfq; popq %0" :"=m"(core_regs->eflags));
+ asm volatile("movq %%rsp,%0" : "=m"(core_regs->rsp));
+ asm volatile("movl %%ss, %%eax;" :"=a"(core_regs->ss));
+ /* thread_fs not filled in for now */
+ /* thread_gs not filled in for now */
+ asm volatile("movl %%ds, %%eax;" :"=a"(core_regs->ds));
+ asm volatile("movl %%es, %%eax;" :"=a"(core_regs->es));
+ asm volatile("movl %%fs, %%eax;" :"=a"(core_regs->fs));
+ asm volatile("movl %%gs, %%eax;" :"=a"(core_regs->gs));
+
+ asm volatile("mov %%cr0, %0" : "=r" (tmp) : );
+ xen_core_regs->cr0 = tmp;
+
+ asm volatile("mov %%cr2, %0" : "=r" (tmp) : );
+ xen_core_regs->cr2 = tmp;
+
+ asm volatile("mov %%cr3, %0" : "=r" (tmp) : );
+ xen_core_regs->cr3 = tmp;
+
+ asm volatile("mov %%cr4, %0" : "=r" (tmp) : );
+ xen_core_regs->cr4 = tmp;
+}
+
+#endif /* __X86_64_ELF_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/include/asm-x86/x86_64/kexec.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/x86_64/kexec.h Thu Nov 30 13:05:27 2006 +0000
@@ -0,0 +1,39 @@
+/******************************************************************************
+ * kexec.h
+ *
+ * Based heavily on machine_kexec.c and kexec.h from Linux 2.6.19-rc1
+ *
+ */
+
+#ifndef __X86_64_KEXEC_H__
+#define __X86_64_KEXEC_H__
+
+#include <xen/types.h>
+#include <xen/kexec.h>
+#include <asm/fixmap.h>
+
+typedef void (*relocate_new_kernel_t)(
+ unsigned long indirection_page,
+ unsigned long page_list,
+ unsigned long start_address);
+
+static inline void machine_kexec(xen_kexec_image_t *image)
+{
+ relocate_new_kernel_t rnk;
+
+ rnk = (relocate_new_kernel_t) image->page_list[1];
+ (*rnk)(image->indirection_page, (unsigned long)image->page_list,
+ image->start_address);
+}
+
+#endif /* __X86_64_KEXEC_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/include/public/kexec.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/public/kexec.h Thu Nov 30 13:05:27 2006 +0000
@@ -0,0 +1,138 @@
+/******************************************************************************
+ * kexec.h - Public portion
+ *
+ * Xen port written by:
+ * - Simon 'Horms' Horman <horms@xxxxxxxxxxxx>
+ * - Magnus Damm <magnus@xxxxxxxxxxxxx>
+ */
+
+#ifndef _XEN_PUBLIC_KEXEC_H
+#define _XEN_PUBLIC_KEXEC_H
+
+
+/* This file describes the Kexec / Kdump hypercall interface for Xen.
+ *
+ * Kexec under vanilla Linux allows a user to reboot the physical machine
+ * into a new user-specified kernel. The Xen port extends this idea
+ * to allow rebooting of the machine from dom0. When kexec for dom0
+ * is used to reboot, both the hypervisor and the domains get replaced
+ * with some other kernel. It is possible to kexec between vanilla
+ * Linux and Xen and back again. Xen to Xen works well too.
+ *
+ * The hypercall interface for kexec can be divided into three main
+ * types of hypercall operations:
+ *
+ * 1) Range information:
+ * This is used by the dom0 kernel to ask the hypervisor about various
+ * address information. This information is needed to allow kexec-tools
+ * to fill in the ELF headers for /proc/vmcore properly.
+ *
+ * 2) Load and unload of images:
+ * There are no big surprises here, the kexec binary from kexec-tools
+ * runs in userspace in dom0. The tool loads/unloads data into the
+ * dom0 kernel such as new kernel, initramfs and hypervisor. When
+ * loaded the dom0 kernel performs a load hypercall operation, and
+ * before releasing all page references the dom0 kernel calls unload.
+ *
+ * 3) Kexec operation:
+ * This is used to start a previously loaded kernel.
+ */
+
+#include "xen.h"
+
+#if defined(__i386__) || defined(__x86_64__)
+#define KEXEC_XEN_NO_PAGES 17
+#endif
+
+/*
+ * Prototype for this hypercall is:
+ * int kexec_op(int cmd, void *args)
+ * @cmd == KEXEC_CMD_...
+ * KEXEC operation to perform
+ * @args == Operation-specific extra arguments (NULL if none).
+ */
+
+/*
+ * Kexec supports two types of operation:
+ * - kexec into a regular kernel, very similar to a standard reboot
+ * - KEXEC_TYPE_DEFAULT is used to specify this type
+ * - kexec into a special "crash kernel", aka kexec-on-panic
+ * - KEXEC_TYPE_CRASH is used to specify this type
+ * - parts of our system may be broken at kexec-on-panic time
+ * - the code should be kept as simple and self-contained as possible
+ */
+
+#define KEXEC_TYPE_DEFAULT 0
+#define KEXEC_TYPE_CRASH 1
+
+
+/* The kexec implementation for Xen allows the user to load two
+ * types of kernels, KEXEC_TYPE_DEFAULT and KEXEC_TYPE_CRASH.
+ * All data needed for a kexec reboot is kept in one xen_kexec_image_t
+ * per "instance". The data mainly consists of machine address lists to pages
+ * together with destination addresses. The data in xen_kexec_image_t
+ * is passed to the "code page" which is one page of code that performs
+ * the final relocations before jumping to the new kernel.
+ */
+
+typedef struct xen_kexec_image {
+#if defined(__i386__) || defined(__x86_64__)
+ unsigned long page_list[KEXEC_XEN_NO_PAGES];
+#endif
+ unsigned long indirection_page;
+ unsigned long start_address;
+} xen_kexec_image_t;
+
+/*
+ * Perform kexec having previously loaded a kexec or kdump kernel
+ * as appropriate.
+ * type == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH [in]
+ */
+#define KEXEC_CMD_kexec 0
+typedef struct xen_kexec_exec {
+ int type;
+} xen_kexec_exec_t;
+
+/*
+ * Load/Unload kernel image for kexec or kdump.
+ * type == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH [in]
+ * image == relocation information for kexec (ignored for unload) [in]
+ */
+#define KEXEC_CMD_kexec_load 1
+#define KEXEC_CMD_kexec_unload 2
+typedef struct xen_kexec_load {
+ int type;
+ xen_kexec_image_t image;
+} xen_kexec_load_t;
+
+#define KEXEC_RANGE_MA_CRASH 0 /* machine address and size of crash area */
+#define KEXEC_RANGE_MA_XEN 1 /* machine address and size of Xen itself */
+#define KEXEC_RANGE_VA_XEN 2 /* virtual adrress and size of Xen itself */
+#define KEXEC_RANGE_MA_CPU 3 /* machine address and size of a CPU note */
+
+/*
+ * Find the address and size of certain memory areas
+ * range == KEXEC_RANGE_... [in]
+ * nr == physical CPU number (starting from 0) if KEXEC_RANGE_MA_CPU [in]
+ * size == number of bytes reserved in window [out]
+ * start == address of the first byte in the window [out]
+ */
+#define KEXEC_CMD_kexec_get_range 3
+typedef struct xen_kexec_range {
+ int range;
+ int nr;
+ unsigned long size;
+ unsigned long start;
+} xen_kexec_range_t;
+
+#endif /* _XEN_PUBLIC_KEXEC_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/include/xen/elfcore.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/xen/elfcore.h Thu Nov 30 13:05:27 2006 +0000
@@ -0,0 +1,140 @@
+/******************************************************************************
+ * elfcore.h
+ *
+ * Based heavily on include/linux/elfcore.h from Linux 2.6.16
+ * Naming scheeme based on include/xen/elf.h (not include/linux/elfcore.h)
+ *
+ */
+
+#ifndef __ELFCOREC_H__
+#define __ELFCOREC_H__
+
+#include <xen/types.h>
+#include <xen/elf.h>
+#include <asm/elf.h>
+#include <public/xen.h>
+
+#define NT_PRSTATUS 1
+
+typedef struct
+{
+ int signo; /* signal number */
+ int code; /* extra code */
+ int errno; /* errno */
+} ELF_Signifo;
+
+/* These seem to be the same length on all architectures on Linux */
+typedef int ELF_Pid;
+typedef struct {
+ long tv_sec;
+ long tv_usec;
+} ELF_Timeval;
+
+/*
+ * Definitions to generate Intel SVR4-like core files.
+ * These mostly have the same names as the SVR4 types with "elf_"
+ * tacked on the front to prevent clashes with linux definitions,
+ * and the typedef forms have been avoided. This is mostly like
+ * the SVR4 structure, but more Linuxy, with things that Linux does
+ * not support and which gdb doesn't really use excluded.
+ */
+typedef struct
+{
+ ELF_Signifo pr_info; /* Info associated with signal */
+ short pr_cursig; /* Current signal */
+ unsigned long pr_sigpend; /* Set of pending signals */
+ unsigned long pr_sighold; /* Set of held signals */
+ ELF_Pid pr_pid;
+ ELF_Pid pr_ppid;
+ ELF_Pid pr_pgrp;
+ ELF_Pid pr_sid;
+ ELF_Timeval pr_utime; /* User time */
+ ELF_Timeval pr_stime; /* System time */
+ ELF_Timeval pr_cutime; /* Cumulative user time */
+ ELF_Timeval pr_cstime; /* Cumulative system time */
+ ELF_Gregset pr_reg; /* GP registers - from asm header file */
+ int pr_fpvalid; /* True if math co-processor being used. */
+} ELF_Prstatus;
+
+/*
+ * The following data structures provide 64-bit ELF notes. In theory it should
+ * be possible to support both 64-bit and 32-bit ELF files, but to keep it
+ * simple we only do 64-bit.
+ *
+ * Please note that the current code aligns the 64-bit notes in the same
+ * way as Linux does. We are not following the 64-bit ELF spec, no one does.
+ *
+ * We are avoiding two problems by restricting us to 64-bit notes only:
+ * - Alignment of notes change with the word size. Ick.
+ * - We would need to tell kexec-tools which format we are using in the
+ * hypervisor to make sure the right ELF format is generated.
+ * That requires infrastructure. Let's not.
+ */
+
+#define ALIGN(x, n) ((x + ((1 << n) - 1)) / (1 << n))
+#define PAD32(x) u32 pad_data[ALIGN(x, 2)]
+
+#define TYPEDEF_NOTE(type, strlen, desctype) \
+ typedef struct { \
+ union { \
+ struct { \
+ Elf_Note note; \
+ unsigned char name[strlen]; \
+ } note; \
+ PAD32(sizeof(Elf_Note) + strlen); \
+ } note; \
+ union { \
+ desctype desc; \
+ PAD32(sizeof(desctype)); \
+ } desc; \
+ } __attribute__ ((packed)) type
+
+#define CORE_STR "CORE"
+#define CORE_STR_LEN 5 /* including terminating zero */
+
+TYPEDEF_NOTE(crash_note_core_t, CORE_STR_LEN, ELF_Prstatus);
+
+#define XEN_STR "Xen"
+#define XEN_STR_LEN 4 /* including terminating zero */
+
+TYPEDEF_NOTE(crash_note_xen_core_t, XEN_STR_LEN, crash_xen_core_t);
+
+typedef struct {
+ unsigned long xen_major_version;
+ unsigned long xen_minor_version;
+ unsigned long xen_extra_version;
+ unsigned long xen_changeset;
+ unsigned long xen_compiler;
+ unsigned long xen_compile_date;
+ unsigned long xen_compile_time;
+ unsigned long tainted;
+#ifdef CONFIG_X86
+ unsigned long dom0_pfn_to_mfn_frame_list_list;
+#endif
+} crash_xen_info_t;
+
+TYPEDEF_NOTE(crash_note_xen_info_t, XEN_STR_LEN, crash_xen_info_t);
+
+typedef struct {
+ crash_note_core_t core;
+ crash_note_xen_core_t xen_regs;
+ crash_note_xen_info_t xen_info;
+} __attribute__ ((packed)) crash_note_t;
+
+#define setup_crash_note(np, member, str, str_len, id) \
+ np->member.note.note.note.namesz = str_len; \
+ np->member.note.note.note.descsz = sizeof(np->member.desc.desc); \
+ np->member.note.note.note.type = id; \
+ memcpy(np->member.note.note.name, str, str_len)
+
+#endif /* __ELFCOREC_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 88935ae47fa9 -r 5d6be0099bdf xen/include/xen/kexec.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/xen/kexec.h Thu Nov 30 13:05:27 2006 +0000
@@ -0,0 +1,43 @@
+#ifndef __XEN_KEXEC_H__
+#define __XEN_KEXEC_H__
+
+#include <public/kexec.h>
+#include <asm/percpu.h>
+#include <xen/elfcore.h>
+
+extern int crashing_cpu;
+
+typedef struct xen_kexec_reserve {
+ unsigned long size;
+ unsigned long start;
+} xen_kexec_reserve_t;
+
+/* We have space for 4 images to support atomic update
+ * of images. This is important for CRASH images since
+ * a panic can happen at any time...
+ */
+
+#define KEXEC_IMAGE_DEFAULT_BASE 0
+#define KEXEC_IMAGE_CRASH_BASE 2
+#define KEXEC_IMAGE_NR 4
+
+int machine_kexec_load(int type, int slot, xen_kexec_image_t *image);
+void machine_kexec_unload(int type, int slot, xen_kexec_image_t *image);
+void machine_kexec_reserved(xen_kexec_reserve_t *reservation);
+void machine_shutdown(xen_kexec_image_t *image);
+void machine_crash_kexec(void);
+void machine_crash_save_cpu(void);
+crash_xen_info_t *machine_crash_save_info(void);
+void machine_crash_shutdown(void);
+
+#endif /* __XEN_KEXEC_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|