# HG changeset patch # Parent b5ceec1ccccad6e79053a80820132303ff1e136f KEXEC: Allocate crash notes on boot v2 Currently, the buffers for crash notes are allocated per CPU when a KEXEC_CMD_kexec_get_range hypercall is made, referencing the CPU in question. Although it certainly works in general, there are a few edge case problems: 1) There appears to be no guarentee that dom0 will make this hypercall for each pcpu on the system. There appears to be variable behaviour depending on how many cpus dom0 is compiled for, and how many vcpus Xen gives to dom0. 2) The allocation of these buffers occur at the whim of dom0. While this is typically very early on dom0 boot, but not guarenteed. 3) It is possible (although not sensible) for a crash kernel to be loaded without these (or some of these) hypercalls being made. Under these circumstances, a crash would cause the crash note code path will suffer a slew of null pointer deferences. 4) If the hypercalls are made after late in the day, it is possible for the hypercall to return -ENOMEM. As code tends to be more fragile once memory is enhausted, the likelyhood of us needing the crash kernel is greater. In addition, my forthcoming code to support 32bit kdump kernels on 64bit Xen on large (>64GB) boxes will require some guarentees as to where the crash note buffers are actually allocated in physical memory. This is far easier to sort out at boot time, rather than after dom0 has been booted and potentially using the physical memory required. Therefore, allocate the crash note buffers at boot time. Changes since v1: * Use cpu hotplug notifiers to handle allocating of the notes buffers rather than assuming the boot state of cpus will be the same as the crash state. * Move crash_notes from being per_cpu. This is because the kdump kernel elf binary put in the crash area is hard coded to physical addresses which the dom0 kernel typically obtains at boot time. If a cpu is offlined, its buffer should not be deallocated because the kdump kernel would read junk when trying to get the crash notes. Similarly, the same problem would occur if the cpu was re-onlined later and its crash notes buffer was allocated elsewhere. * Only attempt to allocate buffers if a crash area has been specified. Else, allocating crash note buffers is a waste of space. Along with this, change the test in kexec_get_cpu to return -EINVAL if no buffers have been allocated. Signed-off-by: Andrew Cooper diff -r df7cec2c6c03 xen/common/kexec.c --- a/xen/common/kexec.c +++ b/xen/common/kexec.c @@ -25,13 +25,14 @@ #include #include #include +#include #ifdef CONFIG_COMPAT #include #endif bool_t kexecing = FALSE; -static DEFINE_PER_CPU_READ_MOSTLY(void *, crash_notes); +static void * crash_notes[NR_CPUS]; static Elf_Note *xen_crash_note; @@ -165,7 +166,7 @@ static void one_cpu_only(void) void kexec_crash_save_cpu(void) { int cpu = smp_processor_id(); - Elf_Note *note = per_cpu(crash_notes, cpu); + Elf_Note *note = crash_notes[cpu]; ELF_Prstatus *prstatus; crash_xen_core_t *xencore; @@ -245,25 +246,6 @@ static long kexec_reboot(void *_image) return 0; } -static void do_crashdump_trigger(unsigned char key) -{ - printk("'%c' pressed -> triggering crashdump\n", key); - kexec_crash(); - printk(" * no crash kernel loaded!\n"); -} - -static struct keyhandler crashdump_trigger_keyhandler = { - .u.fn = do_crashdump_trigger, - .desc = "trigger a crashdump" -}; - -static __init int register_crashdump_trigger(void) -{ - register_keyhandler('C', &crashdump_trigger_keyhandler); - return 0; -} -__initcall(register_crashdump_trigger); - static void setup_note(Elf_Note *n, const char *name, int type, int descsz) { int l = strlen(name) + 1; @@ -280,6 +262,110 @@ static int sizeof_note(const char *name, ELFNOTE_ALIGN(descsz)); } +/* Allocate a crash note buffer for a newly onlined cpu. */ +static int kexec_init_cpu_notes(const int cpu) +{ + Elf_Note * note; + int nr_bytes = 0; + + BUG_ON( cpu < 0 || cpu >= NR_CPUS ); + + /* If already allocated, nothing to do. */ + if ( crash_notes[cpu] ) + return 0; + + /* All CPUs present a PRSTATUS and crash_xen_core note. */ + nr_bytes = + sizeof_note("CORE", sizeof(ELF_Prstatus)) + + sizeof_note("Xen", sizeof(crash_xen_core_t)); + + /* CPU0 also presents the crash_xen_info note. */ + if ( 0 == cpu ) + nr_bytes = nr_bytes + + sizeof_note("Xen", sizeof(crash_xen_info_t)); + + note = xmalloc_bytes(nr_bytes); + if ( ! note ) + /* Ideally, this would be -ENOMEM. However, there are more problems + * assocated with trying to deal with -ENOMEM sensibly than just + * pretending that the crash note area doesn't exist. */ + return 0; + + crash_notes[cpu] = note; + + /* Setup CORE note. */ + setup_note(note, "CORE", NT_PRSTATUS, sizeof(ELF_Prstatus)); + note = ELFNOTE_NEXT(note); + + /* Setup Xen CORE note. */ + setup_note(note, "Xen", XEN_ELFNOTE_CRASH_REGS, + sizeof(crash_xen_core_t)); + + if ( 0 == cpu ) + { + /* Set up Xen Crash Info note. */ + xen_crash_note = note = ELFNOTE_NEXT(note); + setup_note(note, "Xen", XEN_ELFNOTE_CRASH_INFO, + sizeof(crash_xen_info_t)); + } + + return 0; +} + +static void do_crashdump_trigger(unsigned char key) +{ + printk("'%c' pressed -> triggering crashdump\n", key); + kexec_crash(); + printk(" * no crash kernel loaded!\n"); +} + +static struct keyhandler crashdump_trigger_keyhandler = { + .u.fn = do_crashdump_trigger, + .desc = "trigger a crashdump" +}; + +static int cpu_callback( + struct notifier_block *nfb, unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + + /* Only hook on CPU_UP_PREPARE because once a crash_note has been reported + * to dom0, it must keep it around in case of a crash, as the crash kernel + * will be hard coded to the original physical address reported. */ + switch ( action ) + { + case CPU_UP_PREPARE: + kexec_init_cpu_notes(cpu); + break; + default: + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block cpu_nfb = { + .notifier_call = cpu_callback +}; + +static int __init kexec_init(void) +{ + void *cpu = (void *)(long)smp_processor_id(); + + register_keyhandler('C', &crashdump_trigger_keyhandler); + + /* If no crash area, no need to allocate space for notes. */ + if ( 0 == kexec_crash_area.size ) + return 0; + + cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); + register_cpu_notifier(&cpu_nfb); + return 0; +} +/* The reason for this to be a presmp_initcall as opposed to a regular + * __initcall is to allow the setup of the cpu hotplug handler before APs are + * brought up. */ +presmp_initcall(kexec_init); + static int kexec_get_reserve(xen_kexec_range_t *range) { if ( kexec_crash_area.size > 0 && kexec_crash_area.start > 0) { @@ -296,7 +382,7 @@ static int kexec_get_cpu(xen_kexec_range int nr = range->nr; int nr_bytes = 0; - if ( nr < 0 || nr >= nr_cpu_ids || !cpu_online(nr) ) + if ( nr < 0 || nr >= nr_cpu_ids || !cpu_online(nr) || !crash_notes[nr] ) return -EINVAL; nr_bytes += sizeof_note("CORE", sizeof(ELF_Prstatus)); @@ -306,31 +392,7 @@ static int kexec_get_cpu(xen_kexec_range if ( nr == 0 ) nr_bytes += sizeof_note("Xen", sizeof(crash_xen_info_t)); - if ( per_cpu(crash_notes, nr) == NULL ) - { - Elf_Note *note; - - note = per_cpu(crash_notes, nr) = xmalloc_bytes(nr_bytes); - - if ( note == NULL ) - return -ENOMEM; - - /* Setup CORE note. */ - setup_note(note, "CORE", NT_PRSTATUS, sizeof(ELF_Prstatus)); - - /* Setup Xen CORE note. */ - note = ELFNOTE_NEXT(note); - setup_note(note, "Xen", XEN_ELFNOTE_CRASH_REGS, sizeof(crash_xen_core_t)); - - if (nr == 0) - { - /* Setup system wide Xen info note. */ - xen_crash_note = note = ELFNOTE_NEXT(note); - setup_note(note, "Xen", XEN_ELFNOTE_CRASH_INFO, sizeof(crash_xen_info_t)); - } - } - - range->start = __pa((unsigned long)per_cpu(crash_notes, nr)); + range->start = __pa((unsigned long)crash_notes[nr]); range->size = nr_bytes; return 0; }