# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1274191554 -3600
# Node ID c1b6647c682816f30f2b6c13da62c3b6cc617ff7
# Parent 9d5afef2421c84137959093bc25c1421c0e5c4ee
x86: Pull dynamic memory allocation out of do_boot_cpu().
This has two advantages:
(a) We can move the allocations to a context where we can handle
failure.
(b) We can implement matching deallocations on CPU offline.
Only the idle vcpu structure is now not freed on CPU offline. This
probably does not really matter.
Signed-off-by: Keir Fraser <keir.fraser@xxxxxxxxxx>
---
xen/arch/x86/domain.c | 2
xen/arch/x86/mm.c | 7 +
xen/arch/x86/smpboot.c | 202 +++++++++++++++++++++--------------
xen/arch/x86/x86_32/traps.c | 51 +++++++-
xen/arch/x86/x86_64/mm.c | 22 ++-
xen/common/cpu.c | 2
xen/include/asm-x86/desc.h | 2
xen/include/asm-x86/mm.h | 1
xen/include/asm-x86/smp.h | 2
xen/include/asm-x86/x86_64/uaccess.h | 1
10 files changed, 193 insertions(+), 99 deletions(-)
diff -r 9d5afef2421c -r c1b6647c6828 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Tue May 18 13:41:55 2010 +0100
+++ b/xen/arch/x86/domain.c Tue May 18 15:05:54 2010 +0100
@@ -99,7 +99,7 @@ static void play_dead(void)
static void play_dead(void)
{
/* This must be done before dead CPU ack */
- cpu_exit_clear();
+ cpu_exit_clear(smp_processor_id());
wbinvd();
mb();
/* Ack it */
diff -r 9d5afef2421c -r c1b6647c6828 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Tue May 18 13:41:55 2010 +0100
+++ b/xen/arch/x86/mm.c Tue May 18 15:05:54 2010 +0100
@@ -5331,6 +5331,13 @@ void memguard_guard_stack(void *p)
memguard_guard_range(p, PAGE_SIZE);
}
+void memguard_unguard_stack(void *p)
+{
+ p = (void *)((unsigned long)p + STACK_SIZE -
+ PRIMARY_STACK_SIZE - PAGE_SIZE);
+ memguard_unguard_range(p, PAGE_SIZE);
+}
+
/*
* Local variables:
* mode: C
diff -r 9d5afef2421c -r c1b6647c6828 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c Tue May 18 13:41:55 2010 +0100
+++ b/xen/arch/x86/smpboot.c Tue May 18 15:05:54 2010 +0100
@@ -336,7 +336,7 @@ void start_secondary(void *unused)
/* This must be done before setting cpu_online_map */
spin_debug_enable();
- set_cpu_sibling_map(raw_smp_processor_id());
+ set_cpu_sibling_map(smp_processor_id());
wmb();
/*
@@ -545,24 +545,11 @@ int alloc_cpu_id(void)
return (cpu < NR_CPUS) ? cpu : -ENODEV;
}
-static void *prepare_idle_stack(unsigned int cpu)
-{
- if ( !stack_base[cpu] )
- stack_base[cpu] = alloc_xenheap_pages(STACK_ORDER, 0);
- return stack_base[cpu];
-}
-
static int do_boot_cpu(int apicid, int cpu)
{
unsigned long boot_error;
- unsigned int order;
int timeout;
unsigned long start_eip;
- struct vcpu *v;
- struct desc_struct *gdt;
-#ifdef __x86_64__
- struct page_info *page;
-#endif
/*
* Save current MTRR state in case it was changed since early boot
@@ -572,62 +559,15 @@ static int do_boot_cpu(int apicid, int c
booting_cpu = cpu;
- v = alloc_idle_vcpu(cpu);
- BUG_ON(v == NULL);
-
/* start_eip had better be page-aligned! */
start_eip = setup_trampoline();
/* So we see what's up */
- if (opt_cpu_info)
+ if ( opt_cpu_info )
printk("Booting processor %d/%d eip %lx\n",
cpu, apicid, start_eip);
- stack_start.esp = prepare_idle_stack(cpu);
-
- /* Debug build: detect stack overflow by setting up a guard page. */
- memguard_guard_stack(stack_start.esp);
-
- gdt = per_cpu(gdt_table, cpu);
- if ( gdt == boot_cpu_gdt_table )
- {
- order = get_order_from_pages(NR_RESERVED_GDT_PAGES);
-#ifdef __x86_64__
- page = alloc_domheap_pages(NULL, order,
- MEMF_node(cpu_to_node(cpu)));
- per_cpu(compat_gdt_table, cpu) = gdt = page_to_virt(page);
- memcpy(gdt, boot_cpu_compat_gdt_table,
- NR_RESERVED_GDT_PAGES * PAGE_SIZE);
- gdt[PER_CPU_GDT_ENTRY - FIRST_RESERVED_GDT_ENTRY].a = cpu;
- page = alloc_domheap_pages(NULL, order,
- MEMF_node(cpu_to_node(cpu)));
- per_cpu(gdt_table, cpu) = gdt = page_to_virt(page);
-#else
- per_cpu(gdt_table, cpu) = gdt = alloc_xenheap_pages(order, 0);
-#endif
- memcpy(gdt, boot_cpu_gdt_table,
- NR_RESERVED_GDT_PAGES * PAGE_SIZE);
- BUILD_BUG_ON(NR_CPUS > 0x10000);
- gdt[PER_CPU_GDT_ENTRY - FIRST_RESERVED_GDT_ENTRY].a = cpu;
- }
-
-#ifdef __i386__
- if ( !per_cpu(doublefault_tss, cpu) )
- {
- per_cpu(doublefault_tss, cpu) = alloc_xenheap_page();
- memset(per_cpu(doublefault_tss, cpu), 0, PAGE_SIZE);
- }
-#else
- if ( !per_cpu(compat_arg_xlat, cpu) )
- setup_compat_arg_xlat(cpu, cpu_to_node[cpu]);
-#endif
-
- if ( !idt_tables[cpu] )
- {
- idt_tables[cpu] = xmalloc_array(idt_entry_t, IDT_ENTRIES);
- memcpy(idt_tables[cpu], idt_table,
- IDT_ENTRIES*sizeof(idt_entry_t));
- }
+ stack_start.esp = stack_base[cpu];
/* This grunge runs the startup process for the targeted processor. */
@@ -677,16 +617,7 @@ static int do_boot_cpu(int apicid, int c
}
if ( boot_error )
- {
- /* Try to put things back the way they were before ... */
- unmap_cpu_to_logical_apicid(cpu);
- cpu_clear(cpu, cpu_callout_map); /* was set here */
- cpu_uninit(cpu); /* undoes cpu_init() */
-
- /* Mark the CPU as non-present */
- x86_cpu_to_apicid[cpu] = BAD_APICID;
- cpu_clear(cpu, cpu_present_map);
- }
+ cpu_exit_clear(cpu);
/* mark "stuck" area as not stuck */
bootsym(trampoline_cpu_started) = 0;
@@ -697,10 +628,8 @@ static int do_boot_cpu(int apicid, int c
return boot_error ? -EIO : 0;
}
-void cpu_exit_clear(void)
-{
- int cpu = raw_smp_processor_id();
-
+void cpu_exit_clear(unsigned int cpu)
+{
cpu_uninit(cpu);
cpu_clear(cpu, cpu_callout_map);
@@ -710,8 +639,127 @@ void cpu_exit_clear(void)
unmap_cpu_to_logical_apicid(cpu);
}
+static void cpu_smpboot_free(unsigned int cpu)
+{
+ unsigned int order;
+
+ xfree(idt_tables[cpu]);
+ idt_tables[cpu] = NULL;
+
+#ifdef __x86_64__
+ free_compat_arg_xlat(cpu);
+#endif
+
+ order = get_order_from_pages(NR_RESERVED_GDT_PAGES);
+#ifdef __x86_64__
+ if ( per_cpu(compat_gdt_table, cpu) )
+ free_domheap_pages(virt_to_page(per_cpu(gdt_table, cpu)), order);
+ if ( per_cpu(gdt_table, cpu) )
+ free_domheap_pages(virt_to_page(per_cpu(compat_gdt_table, cpu)),
+ order);
+ per_cpu(compat_gdt_table, cpu) = NULL;
+#else
+ free_xenheap_pages(per_cpu(gdt_table, cpu), order);
+#endif
+ per_cpu(gdt_table, cpu) = NULL;
+
+ if ( stack_base[cpu] != NULL )
+ {
+ memguard_guard_stack(stack_base[cpu]);
+ free_xenheap_pages(stack_base[cpu], STACK_ORDER);
+ stack_base[cpu] = NULL;
+ }
+}
+
+static int cpu_smpboot_alloc(unsigned int cpu)
+{
+ unsigned int order;
+ struct desc_struct *gdt;
+#ifdef __x86_64__
+ struct page_info *page;
+#endif
+
+ if ( alloc_idle_vcpu(cpu) == NULL )
+ goto oom;
+
+ stack_base[cpu] = alloc_xenheap_pages(STACK_ORDER, 0);
+ if ( stack_base[cpu] == NULL )
+ goto oom;
+ memguard_guard_stack(stack_base[cpu]);
+
+ order = get_order_from_pages(NR_RESERVED_GDT_PAGES);
+#ifdef __x86_64__
+ page = alloc_domheap_pages(NULL, order,
+ MEMF_node(cpu_to_node(cpu)));
+ if ( !page )
+ goto oom;
+ per_cpu(compat_gdt_table, cpu) = gdt = page_to_virt(page);
+ memcpy(gdt, boot_cpu_compat_gdt_table,
+ NR_RESERVED_GDT_PAGES * PAGE_SIZE);
+ gdt[PER_CPU_GDT_ENTRY - FIRST_RESERVED_GDT_ENTRY].a = cpu;
+ page = alloc_domheap_pages(NULL, order,
+ MEMF_node(cpu_to_node(cpu)));
+ if ( !page )
+ goto oom;
+ per_cpu(gdt_table, cpu) = gdt = page_to_virt(page);
+#else
+ per_cpu(gdt_table, cpu) = gdt = alloc_xenheap_pages(order, 0);
+ if ( !gdt )
+ goto oom;
+#endif
+ memcpy(gdt, boot_cpu_gdt_table,
+ NR_RESERVED_GDT_PAGES * PAGE_SIZE);
+ BUILD_BUG_ON(NR_CPUS > 0x10000);
+ gdt[PER_CPU_GDT_ENTRY - FIRST_RESERVED_GDT_ENTRY].a = cpu;
+
+#ifdef __x86_64__
+ if ( setup_compat_arg_xlat(cpu, cpu_to_node[cpu]) )
+ goto oom;
+#endif
+
+ idt_tables[cpu] = xmalloc_array(idt_entry_t, IDT_ENTRIES);
+ if ( idt_tables[cpu] == NULL )
+ goto oom;
+ memcpy(idt_tables[cpu], idt_table,
+ IDT_ENTRIES*sizeof(idt_entry_t));
+
+ return 0;
+
+ oom:
+ cpu_smpboot_free(cpu);
+ return -ENOMEM;
+}
+
+static int cpu_smpboot_callback(
+ struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned long)hcpu;
+ int rc = 0;
+
+ switch ( action )
+ {
+ case CPU_UP_PREPARE:
+ rc = cpu_smpboot_alloc(cpu);
+ break;
+ case CPU_UP_CANCELED:
+ case CPU_DEAD:
+ cpu_smpboot_free(cpu);
+ break;
+ default:
+ break;
+ }
+
+ return !rc ? NOTIFY_DONE : notifier_from_errno(rc);
+}
+
+static struct notifier_block cpu_smpboot_nfb = {
+ .notifier_call = cpu_smpboot_callback
+};
+
void __init smp_prepare_cpus(unsigned int max_cpus)
{
+ register_cpu_notifier(&cpu_smpboot_nfb);
+
mtrr_aps_sync_begin();
/* Setup boot CPU information */
diff -r 9d5afef2421c -r c1b6647c6828 xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c Tue May 18 13:41:55 2010 +0100
+++ b/xen/arch/x86/x86_32/traps.c Tue May 18 15:05:54 2010 +0100
@@ -11,6 +11,7 @@
#include <xen/symbols.h>
#include <xen/shutdown.h>
#include <xen/nmi.h>
+#include <xen/cpu.h>
#include <asm/current.h>
#include <asm/flushtlb.h>
#include <asm/traps.h>
@@ -191,9 +192,40 @@ void show_page_walk(unsigned long addr)
unmap_domain_page(l1t);
}
-DEFINE_PER_CPU_READ_MOSTLY(struct tss_struct *, doublefault_tss);
+static DEFINE_PER_CPU_READ_MOSTLY(struct tss_struct *, doublefault_tss);
static unsigned char __attribute__ ((__section__ (".bss.page_aligned")))
boot_cpu_doublefault_space[PAGE_SIZE];
+
+static int cpu_doublefault_tss_callback(
+ struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned long)hcpu;
+ void *p;
+ int rc = 0;
+
+ switch ( action )
+ {
+ case CPU_UP_PREPARE:
+ per_cpu(doublefault_tss, cpu) = p = alloc_xenheap_page();
+ if ( p == NULL )
+ rc = -ENOMEM;
+ else
+ memset(p, 0, PAGE_SIZE);
+ break;
+ case CPU_UP_CANCELED:
+ case CPU_DEAD:
+ free_xenheap_page(per_cpu(doublefault_tss, cpu));
+ break;
+ default:
+ break;
+ }
+
+ return !rc ? NOTIFY_DONE : notifier_from_errno(rc);
+}
+
+static struct notifier_block cpu_doublefault_tss_nfb = {
+ .notifier_call = cpu_doublefault_tss_callback
+};
asmlinkage void do_double_fault(void)
{
@@ -300,17 +332,22 @@ static void set_task_gate(unsigned int n
void __devinit subarch_percpu_traps_init(void)
{
- struct tss_struct *tss = this_cpu(doublefault_tss);
+ struct tss_struct *tss;
asmlinkage int hypercall(void);
-
- if ( !tss )
+ int cpu = smp_processor_id();
+
+ if ( cpu == 0 )
{
/* The hypercall entry vector is only accessible from ring 1. */
_set_gate(idt_table+HYPERCALL_VECTOR, 14, 1, &hypercall);
- tss = (void *)boot_cpu_doublefault_space;
- this_cpu(doublefault_tss) = tss;
- }
+ this_cpu(doublefault_tss) = (void *)boot_cpu_doublefault_space;
+
+ register_cpu_notifier(&cpu_doublefault_tss_nfb);
+ }
+
+ tss = this_cpu(doublefault_tss);
+ BUG_ON(tss == NULL);
/*
* Make a separate task for double faults. This will get us debug output if
diff -r 9d5afef2421c -r c1b6647c6828 xen/arch/x86/x86_64/mm.c
--- a/xen/arch/x86/x86_64/mm.c Tue May 18 13:41:55 2010 +0100
+++ b/xen/arch/x86/x86_64/mm.c Tue May 18 15:05:54 2010 +0100
@@ -810,20 +810,22 @@ int __cpuinit setup_compat_arg_xlat(unsi
int __cpuinit setup_compat_arg_xlat(unsigned int cpu, int node)
{
unsigned int order = get_order_from_bytes(COMPAT_ARG_XLAT_SIZE);
- unsigned long sz = PAGE_SIZE << order;
unsigned int memflags = node != NUMA_NO_NODE ? MEMF_node(node) : 0;
struct page_info *pg;
+ BUG_ON((PAGE_SIZE << order) != COMPAT_ARG_XLAT_SIZE);
+
pg = alloc_domheap_pages(NULL, order, memflags);
- if ( !pg )
- return -ENOMEM;
-
- for ( ; (sz -= PAGE_SIZE) >= COMPAT_ARG_XLAT_SIZE; ++pg )
- free_domheap_page(pg);
-
- per_cpu(compat_arg_xlat, cpu) = page_to_virt(pg);
-
- return 0;
+ per_cpu(compat_arg_xlat, cpu) = pg ? page_to_virt(pg) : NULL;
+ return pg ? 0 : -ENOMEM;
+}
+
+void __cpuinit free_compat_arg_xlat(unsigned int cpu)
+{
+ unsigned int order = get_order_from_bytes(COMPAT_ARG_XLAT_SIZE);
+ if ( per_cpu(compat_arg_xlat, cpu) != NULL )
+ free_domheap_pages(virt_to_page(per_cpu(compat_arg_xlat, cpu)), order);
+ per_cpu(compat_arg_xlat, cpu) = NULL;
}
void cleanup_frame_table(struct mem_hotadd_info *info)
diff -r 9d5afef2421c -r c1b6647c6828 xen/common/cpu.c
--- a/xen/common/cpu.c Tue May 18 13:41:55 2010 +0100
+++ b/xen/common/cpu.c Tue May 18 15:05:54 2010 +0100
@@ -161,7 +161,7 @@ int disable_nonboot_cpus(void)
{
int cpu, error = 0;
- BUG_ON(raw_smp_processor_id() != 0);
+ BUG_ON(smp_processor_id() != 0);
cpus_clear(frozen_cpus);
diff -r 9d5afef2421c -r c1b6647c6828 xen/include/asm-x86/desc.h
--- a/xen/include/asm-x86/desc.h Tue May 18 13:41:55 2010 +0100
+++ b/xen/include/asm-x86/desc.h Tue May 18 15:05:54 2010 +0100
@@ -197,8 +197,6 @@ do {
(((u32)(addr) & 0x00FF0000U) >> 16); \
} while (0)
-DECLARE_PER_CPU(struct tss_struct *, doublefault_tss);
-
#endif
struct desc_ptr {
diff -r 9d5afef2421c -r c1b6647c6828 xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h Tue May 18 13:41:55 2010 +0100
+++ b/xen/include/asm-x86/mm.h Tue May 18 15:05:54 2010 +0100
@@ -485,6 +485,7 @@ void memguard_unguard_range(void *p, uns
#endif
void memguard_guard_stack(void *p);
+void memguard_unguard_stack(void *p);
int ptwr_do_page_fault(struct vcpu *, unsigned long,
struct cpu_user_regs *);
diff -r 9d5afef2421c -r c1b6647c6828 xen/include/asm-x86/smp.h
--- a/xen/include/asm-x86/smp.h Tue May 18 13:41:55 2010 +0100
+++ b/xen/include/asm-x86/smp.h Tue May 18 15:05:54 2010 +0100
@@ -56,7 +56,7 @@ DECLARE_PER_CPU(int, cpu_state);
DECLARE_PER_CPU(int, cpu_state);
#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu))
-extern void cpu_exit_clear(void);
+extern void cpu_exit_clear(unsigned int cpu);
extern void cpu_uninit(unsigned int cpu);
int cpu_add(uint32_t apic_id, uint32_t acpi_id, uint32_t pxm);
diff -r 9d5afef2421c -r c1b6647c6828 xen/include/asm-x86/x86_64/uaccess.h
--- a/xen/include/asm-x86/x86_64/uaccess.h Tue May 18 13:41:55 2010 +0100
+++ b/xen/include/asm-x86/x86_64/uaccess.h Tue May 18 15:05:54 2010 +0100
@@ -5,6 +5,7 @@
#define COMPAT_ARG_XLAT_SIZE (2*PAGE_SIZE)
DECLARE_PER_CPU(void *, compat_arg_xlat);
int setup_compat_arg_xlat(unsigned int cpu, int node);
+void free_compat_arg_xlat(unsigned int cpu);
#define is_compat_arg_xlat_range(addr, size) ({ \
unsigned long __off; \
__off = (unsigned long)(addr) - (unsigned long)COMPAT_ARG_XLAT_VIRT_BASE; \
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|