# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID 3d27ee7da0c17a37621e5cded2f0c7d18a6cdc5a
# Parent 7169e31606bdff194606d5a991655ef4eed4324f
Merge i386/x86_64 smpboot.c into a simplified common Xen version.
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
diff -r 7169e31606bd -r 3d27ee7da0c1
linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Tue Oct 18
14:40:29 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Tue Oct 18
16:40:31 2005
@@ -27,7 +27,7 @@
c-obj-$(CONFIG_X86_CPUID) += cpuid.o
obj-$(CONFIG_MICROCODE) += microcode.o
c-obj-$(CONFIG_APM) += apm.o
-obj-$(CONFIG_X86_SMP) += smp.o smpboot.o
+obj-$(CONFIG_X86_SMP) += smp.o
#obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
obj-$(CONFIG_X86_MPPARSE) += mpparse.o
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o
diff -r 7169e31606bd -r 3d27ee7da0c1
linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S Tue Oct 18 14:40:29 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S Tue Oct 18 16:40:31 2005
@@ -40,7 +40,7 @@
ENTRY(startup_32)
movl %esi,xen_start_info
-#ifdef CONFIG_SMP
+#if 0
ENTRY(startup_32_smp)
#endif /* CONFIG_SMP */
@@ -78,7 +78,7 @@
movl %eax,%gs
cld # gcc2 wants the direction flag cleared at all
times
-#ifdef CONFIG_SMP
+#if 0
movb ready, %cl
cmpb $1,%cl
je 1f # the first CPU calls start_kernel
diff -r 7169e31606bd -r 3d27ee7da0c1
linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c Tue Oct 18
14:40:29 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c Tue Oct 18
16:40:31 2005
@@ -136,9 +136,6 @@
#endif
#ifdef CONFIG_SMP
-EXPORT_SYMBOL(cpu_data);
-EXPORT_SYMBOL(cpu_online_map);
-EXPORT_SYMBOL(cpu_callout_map);
EXPORT_SYMBOL(__write_lock_failed);
EXPORT_SYMBOL(__read_lock_failed);
diff -r 7169e31606bd -r 3d27ee7da0c1
linux-2.6-xen-sparse/arch/xen/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/kernel/Makefile Tue Oct 18 14:40:29 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/Makefile Tue Oct 18 16:40:31 2005
@@ -15,4 +15,4 @@
obj-$(CONFIG_PROC_FS) += xen_proc.o
obj-$(CONFIG_NET) += skbuff.o
-obj-$(CONFIG_SMP) += smp.o
+obj-$(CONFIG_SMP) += smp.o smpboot.o
diff -r 7169e31606bd -r 3d27ee7da0c1
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Tue Oct 18
14:40:29 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Tue Oct 18
16:40:31 2005
@@ -25,7 +25,7 @@
c-obj-$(CONFIG_X86_MSR) += msr.o
obj-$(CONFIG_MICROCODE) += microcode.o
obj-$(CONFIG_X86_CPUID) += cpuid.o
-obj-$(CONFIG_SMP) += smp.o smpboot.o
+obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o
c-obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o
diff -r 7169e31606bd -r 3d27ee7da0c1
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c Tue Oct 18
14:40:29 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c Tue Oct 18
16:40:31 2005
@@ -113,14 +113,11 @@
EXPORT_SYMBOL(cpu_pda);
#ifdef CONFIG_SMP
-EXPORT_SYMBOL(cpu_data);
-EXPORT_SYMBOL(cpu_online_map);
EXPORT_SYMBOL(__write_lock_failed);
EXPORT_SYMBOL(__read_lock_failed);
EXPORT_SYMBOL(synchronize_irq);
EXPORT_SYMBOL(smp_call_function);
-EXPORT_SYMBOL(cpu_callout_map);
#endif
#ifdef CONFIG_VT
diff -r 7169e31606bd -r 3d27ee7da0c1
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/smp.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/smp.h Tue Oct 18
14:40:29 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/smp.h Tue Oct 18
16:40:31 2005
@@ -34,7 +34,6 @@
extern cpumask_t cpu_present_mask;
extern cpumask_t cpu_possible_map;
extern cpumask_t cpu_online_map;
-extern cpumask_t cpu_callout_map;
/*
* Private routines/data
@@ -52,8 +51,8 @@
void smp_stop_cpu(void);
extern cpumask_t cpu_sibling_map[NR_CPUS];
extern cpumask_t cpu_core_map[NR_CPUS];
-extern u8 phys_proc_id[NR_CPUS];
-extern u8 cpu_core_id[NR_CPUS];
+extern int phys_proc_id[NR_CPUS];
+extern int cpu_core_id[NR_CPUS];
#define SMP_TRAMPOLINE_BASE 0x6000
@@ -65,7 +64,7 @@
static inline int num_booting_cpus(void)
{
- return cpus_weight(cpu_callout_map);
+ return cpus_weight(cpu_possible_map);
}
#define __smp_processor_id() read_pda(cpunumber)
diff -r 7169e31606bd -r 3d27ee7da0c1
linux-2.6-xen-sparse/arch/xen/kernel/smpboot.c
--- /dev/null Tue Oct 18 14:40:29 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/smpboot.c Tue Oct 18 16:40:31 2005
@@ -0,0 +1,381 @@
+/*
+ * Xen SMP booting functions
+ *
+ * See arch/i386/kernel/smpboot.c for copyright and credits for derived
+ * portions of this file.
+ */
+
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/kernel_stat.h>
+#include <linux/smp_lock.h>
+#include <linux/irq.h>
+#include <linux/bootmem.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/percpu.h>
+#include <asm/desc.h>
+#include <asm/arch_hooks.h>
+#include <asm/pgalloc.h>
+#include <asm-xen/evtchn.h>
+#include <asm-xen/xen-public/vcpu.h>
+#include <asm-xen/xenbus.h>
+
+#ifdef CONFIG_SMP_ALTERNATIVES
+#include <asm/smp_alt.h>
+#endif
+
+extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
+extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
+
+extern void local_setup_timer(unsigned int cpu);
+extern void local_teardown_timer(unsigned int cpu);
+
+extern void hypervisor_callback(void);
+extern void failsafe_callback(void);
+extern void system_call(void);
+extern void smp_trap_init(trap_info_t *);
+
+extern cpumask_t cpu_initialized;
+
+/* Number of siblings per CPU package */
+int smp_num_siblings = 1;
+int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
+EXPORT_SYMBOL(phys_proc_id);
+int cpu_core_id[NR_CPUS]; /* Core ID of each logical CPU */
+EXPORT_SYMBOL(cpu_core_id);
+
+cpumask_t cpu_online_map;
+EXPORT_SYMBOL(cpu_online_map);
+cpumask_t cpu_possible_map;
+EXPORT_SYMBOL(cpu_possible_map);
+
+struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
+EXPORT_SYMBOL(cpu_data);
+
+#ifdef CONFIG_HOTPLUG_CPU
+DEFINE_PER_CPU(int, cpu_state) = { 0 };
+#endif
+
+static DEFINE_PER_CPU(int, resched_irq);
+static DEFINE_PER_CPU(int, callfunc_irq);
+static char resched_name[NR_CPUS][15];
+static char callfunc_name[NR_CPUS][15];
+
+u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+
+void *xquad_portio;
+
+cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
+cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
+EXPORT_SYMBOL(cpu_core_map);
+
+#ifdef __i386__
+u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = 0xff };
+EXPORT_SYMBOL(x86_cpu_to_apicid);
+#else
+unsigned int maxcpus = NR_CPUS;
+#endif
+
+void __init smp_alloc_memory(void)
+{
+}
+
+static void xen_smp_intr_init(unsigned int cpu)
+{
+ per_cpu(resched_irq, cpu) =
+ bind_ipi_to_irq(RESCHEDULE_VECTOR, cpu);
+ sprintf(resched_name[cpu], "resched%d", cpu);
+ BUG_ON(request_irq(per_cpu(resched_irq, cpu), smp_reschedule_interrupt,
+ SA_INTERRUPT, resched_name[cpu], NULL));
+
+ per_cpu(callfunc_irq, cpu) =
+ bind_ipi_to_irq(CALL_FUNCTION_VECTOR, cpu);
+ sprintf(callfunc_name[cpu], "callfunc%d", cpu);
+ BUG_ON(request_irq(per_cpu(callfunc_irq, cpu),
+ smp_call_function_interrupt,
+ SA_INTERRUPT, callfunc_name[cpu], NULL));
+
+ if (cpu != 0)
+ local_setup_timer(cpu);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void xen_smp_intr_exit(unsigned int cpu)
+{
+ if (cpu != 0)
+ local_teardown_timer(cpu);
+
+ free_irq(per_cpu(resched_irq, cpu), NULL);
+ unbind_ipi_from_irq(RESCHEDULE_VECTOR, cpu);
+
+ free_irq(per_cpu(callfunc_irq, cpu), NULL);
+ unbind_ipi_from_irq(CALL_FUNCTION_VECTOR, cpu);
+}
+#endif
+
+static void cpu_bringup(void)
+{
+ if (!cpu_isset(smp_processor_id(), cpu_initialized))
+ cpu_init();
+ local_irq_enable();
+ cpu_idle();
+}
+
+void vcpu_prepare(int vcpu)
+{
+ vcpu_guest_context_t ctxt;
+ struct task_struct *idle = idle_task(vcpu);
+
+ if (vcpu == 0)
+ return;
+
+ memset(&ctxt, 0, sizeof(ctxt));
+
+ ctxt.flags = VGCF_IN_KERNEL;
+ ctxt.user_regs.ds = __USER_DS;
+ ctxt.user_regs.es = __USER_DS;
+ ctxt.user_regs.fs = 0;
+ ctxt.user_regs.gs = 0;
+ ctxt.user_regs.ss = __KERNEL_DS;
+ ctxt.user_regs.eip = (unsigned long)cpu_bringup;
+ ctxt.user_regs.eflags = X86_EFLAGS_IF | 0x1000; /* IOPL_RING1 */
+
+ memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
+
+ smp_trap_init(ctxt.trap_ctxt);
+
+ ctxt.ldt_ents = 0;
+
+ ctxt.gdt_frames[0] = virt_to_mfn(cpu_gdt_descr[vcpu].address);
+ ctxt.gdt_ents = cpu_gdt_descr[vcpu].size / 8;
+
+#ifdef __i386__
+ ctxt.user_regs.cs = __KERNEL_CS;
+ ctxt.user_regs.esp = idle->thread.esp;
+
+ ctxt.kernel_ss = __KERNEL_DS;
+ ctxt.kernel_sp = idle->thread.esp0;
+
+ ctxt.event_callback_cs = __KERNEL_CS;
+ ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
+ ctxt.failsafe_callback_cs = __KERNEL_CS;
+ ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
+
+ ctxt.ctrlreg[3] = virt_to_mfn(swapper_pg_dir) << PAGE_SHIFT;
+#else
+ ctxt.user_regs.cs = __KERNEL_CS | 3;
+ ctxt.user_regs.esp = idle->thread.rsp;
+
+ ctxt.kernel_ss = __KERNEL_DS;
+ ctxt.kernel_sp = idle->thread.rsp0;
+
+ ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
+ ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
+ ctxt.syscall_callback_eip = (unsigned long)system_call;
+
+ ctxt.ctrlreg[3] = virt_to_mfn(init_level4_pgt) << PAGE_SHIFT;
+#endif
+
+ BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_initialise, vcpu, &ctxt));
+}
+
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+ int cpu;
+ struct task_struct *idle;
+
+ if (max_cpus == 0) {
+ xen_start_info->n_vcpu = 1;
+ return;
+ }
+
+ if (max_cpus < xen_start_info->n_vcpu)
+ xen_start_info->n_vcpu = max_cpus;
+
+ xen_smp_intr_init(0);
+
+ for (cpu = 1; cpu < xen_start_info->n_vcpu; cpu++) {
+ cpu_data[cpu] = boot_cpu_data;
+ cpu_2_logical_apicid[cpu] = cpu;
+ x86_cpu_to_apicid[cpu] = cpu;
+
+ idle = fork_idle(cpu);
+ if (IS_ERR(idle))
+ panic("failed fork for CPU %d", cpu);
+
+ irq_ctx_init(cpu);
+
+ cpu_gdt_descr[cpu].address =
+ __get_free_page(GFP_KERNEL|__GFP_ZERO);
+ BUG_ON(cpu_gdt_descr[0].size > PAGE_SIZE);
+ cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
+ memcpy((void *)cpu_gdt_descr[cpu].address,
+ (void *)cpu_gdt_descr[0].address,
+ cpu_gdt_descr[0].size);
+ make_page_readonly((void *)cpu_gdt_descr[cpu].address);
+
+ cpu_set(cpu, cpu_possible_map);
+
+ vcpu_prepare(cpu);
+ }
+
+ /* Currently, Xen gives no dynamic NUMA/HT info. */
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ cpus_clear(cpu_sibling_map[cpu]);
+ cpus_clear(cpu_core_map[cpu]);
+ }
+
+#ifdef CONFIG_X86_IO_APIC
+ /*
+ * Here we can be sure that there is an IO-APIC in the system. Let's
+ * go and set it up:
+ */
+ if (!skip_ioapic_setup && nr_ioapics)
+ setup_IO_APIC();
+#endif
+}
+
+void __devinit smp_prepare_boot_cpu(void)
+{
+ cpu_possible_map = cpumask_of_cpu(0);
+ cpu_online_map = cpumask_of_cpu(0);
+
+ cpu_data[0] = boot_cpu_data;
+ cpu_2_logical_apicid[0] = 0;
+ x86_cpu_to_apicid[0] = 0;
+
+ current_thread_info()->cpu = 0;
+ cpus_clear(cpu_sibling_map[0]);
+ cpu_set(0, cpu_sibling_map[0]);
+
+ cpus_clear(cpu_core_map[0]);
+ cpu_set(0, cpu_core_map[0]);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+static void handle_vcpu_hotplug_event(
+ struct xenbus_watch *watch, const char **vec, unsigned int len)
+{
+ int err, cpu;
+ char dir[32], state[32];
+ char *cpustr;
+ const char *node = vec[XS_WATCH_PATH];
+
+ if ((cpustr = strstr(node, "cpu/")) == NULL)
+ return;
+
+ sscanf(cpustr, "cpu/%d", &cpu);
+
+ sprintf(dir, "cpu/%d", cpu);
+ err = xenbus_scanf(NULL, dir, "availability", "%s", state);
+ if (err != 1) {
+ printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
+ return;
+ }
+
+ if (strcmp(state, "online") == 0)
+ (void)cpu_up(cpu);
+ else if (strcmp(state, "offline") == 0)
+ (void)cpu_down(cpu);
+ else
+ printk(KERN_ERR "XENBUS: unknown state(%s) on node(%s)\n",
+ state, node);
+}
+
+static int setup_cpu_watcher(struct notifier_block *notifier,
+ unsigned long event, void *data)
+{
+ static struct xenbus_watch cpu_watch = {
+ .node = "cpu",
+ .callback = handle_vcpu_hotplug_event };
+ (void)register_xenbus_watch(&cpu_watch);
+ return NOTIFY_DONE;
+}
+
+static int __init setup_vcpu_hotplug_event(void)
+{
+ static struct notifier_block xsn_cpu = {
+ .notifier_call = setup_cpu_watcher };
+ register_xenstore_notifier(&xsn_cpu);
+ return 0;
+}
+
+subsys_initcall(setup_vcpu_hotplug_event);
+
+int __cpu_disable(void)
+{
+ cpumask_t map = cpu_online_map;
+ int cpu = smp_processor_id();
+
+ if (cpu == 0)
+ return -EBUSY;
+
+ cpu_clear(cpu, map);
+ fixup_irqs(map);
+ cpu_clear(cpu, cpu_online_map);
+
+ return 0;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+ while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
+ current->state = TASK_UNINTERRUPTIBLE;
+ schedule_timeout(HZ/10);
+ }
+
+ xen_smp_intr_exit(cpu);
+
+#ifdef CONFIG_SMP_ALTERNATIVES
+ if (num_online_cpus() == 1)
+ unprepare_for_smp();
+#endif
+}
+
+#else /* ... !CONFIG_HOTPLUG_CPU */
+
+int __cpu_disable(void)
+{
+ return -ENOSYS;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+ BUG();
+}
+
+#endif /* CONFIG_HOTPLUG_CPU */
+
+int __devinit __cpu_up(unsigned int cpu)
+{
+#ifdef CONFIG_SMP_ALTERNATIVES
+ if (num_online_cpus() == 1)
+ prepare_for_smp();
+#endif
+
+ xen_smp_intr_init(cpu);
+ cpu_set(cpu, cpu_online_map);
+ HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
+
+ return 0;
+}
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+}
+
+/*
+ * Local variables:
+ * c-file-style: "linux"
+ * indent-tabs-mode: t
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * tab-width: 8
+ * End:
+ */
diff -r 7169e31606bd -r 3d27ee7da0c1
linux-2.6-xen-sparse/include/asm-xen/asm-i386/smp.h
--- /dev/null Tue Oct 18 14:40:29 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/smp.h Tue Oct 18
16:40:31 2005
@@ -0,0 +1,92 @@
+#ifndef __ASM_SMP_H
+#define __ASM_SMP_H
+
+/*
+ * We need the APIC definitions automatically as part of 'smp.h'
+ */
+#ifndef __ASSEMBLY__
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#endif
+
+#ifdef CONFIG_X86_LOCAL_APIC
+#ifndef __ASSEMBLY__
+#include <asm/fixmap.h>
+#include <asm/bitops.h>
+#include <asm/mpspec.h>
+#ifdef CONFIG_X86_IO_APIC
+#include <asm/io_apic.h>
+#endif
+#include <asm/apic.h>
+#endif
+#endif
+
+#define BAD_APICID 0xFFu
+#ifdef CONFIG_SMP
+#ifndef __ASSEMBLY__
+
+/*
+ * Private routines/data
+ */
+
+extern void smp_alloc_memory(void);
+extern int pic_mode;
+extern int smp_num_siblings;
+extern cpumask_t cpu_sibling_map[];
+extern cpumask_t cpu_core_map[];
+
+extern void smp_flush_tlb(void);
+extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs);
+extern void smp_invalidate_rcv(void); /* Process an NMI */
+extern void (*mtrr_hook) (void);
+extern void zap_low_mappings (void);
+
+#define MAX_APICID 256
+extern u8 x86_cpu_to_apicid[];
+
+/*
+ * This function is needed by all SMP systems. It must _always_ be valid
+ * from the initial startup. We map APIC_BASE very early in page_setup(),
+ * so this is correct in the x86 case.
+ */
+#define __smp_processor_id() (current_thread_info()->cpu)
+
+extern cpumask_t cpu_possible_map;
+
+/* We don't mark CPUs online until __cpu_up(), so we need another measure */
+static inline int num_booting_cpus(void)
+{
+ return cpus_weight(cpu_possible_map);
+}
+
+#ifdef CONFIG_X86_LOCAL_APIC
+
+#ifdef APIC_DEFINITION
+extern int hard_smp_processor_id(void);
+#else
+#include <mach_apicdef.h>
+static inline int hard_smp_processor_id(void)
+{
+ /* we don't want to mark this access volatile - bad code generation */
+ return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
+}
+#endif
+
+static __inline int logical_smp_processor_id(void)
+{
+ /* we don't want to mark this access volatile - bad code generation */
+ return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
+}
+
+#endif
+
+extern int __cpu_disable(void);
+extern void __cpu_die(unsigned int cpu);
+#endif /* !__ASSEMBLY__ */
+
+#define NO_PROC_ID 0xFF /* No processor magic marker */
+
+#endif
+#endif
diff -r 7169e31606bd -r 3d27ee7da0c1
linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c Tue Oct 18
14:40:29 2005
+++ /dev/null Tue Oct 18 16:40:31 2005
@@ -1,1520 +0,0 @@
-/*
- * x86 SMP booting functions
- *
- * (c) 1995 Alan Cox, Building #3 <alan@xxxxxxxxxx>
- * (c) 1998, 1999, 2000 Ingo Molnar <mingo@xxxxxxxxxx>
- *
- * Much of the core SMP work is based on previous work by Thomas Radke, to
- * whom a great many thanks are extended.
- *
- * Thanks to Intel for making available several different Pentium,
- * Pentium Pro and Pentium-II/Xeon MP machines.
- * Original development of Linux SMP code supported by Caldera.
- *
- * This code is released under the GNU General Public License version 2 or
- * later.
- *
- * Fixes
- * Felix Koop : NR_CPUS used properly
- * Jose Renau : Handle single CPU case.
- * Alan Cox : By repeated request 8) - Total BogoMIPS
report.
- * Greg Wright : Fix for kernel stacks panic.
- * Erich Boleyn : MP v1.4 and additional changes.
- * Matthias Sattler : Changes for 2.1 kernel map.
- * Michel Lespinasse : Changes for 2.1 kernel map.
- * Michael Chastain : Change trampoline.S to gnu as.
- * Alan Cox : Dumb bug: 'B' step PPro's are fine
- * Ingo Molnar : Added APIC timers, based on code
- * from Jose Renau
- * Ingo Molnar : various cleanups and rewrites
- * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
- * Maciej W. Rozycki : Bits for genuine 82489DX APICs
- * Martin J. Bligh : Added support for multi-quad systems
- * Dave Jones : Report invalid combinations of Athlon
CPUs.
-* Rusty Russell : Hacked into shape for new "hotplug"
boot process. */
-
-#include <linux/module.h>
-#include <linux/config.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/kernel_stat.h>
-#include <linux/smp_lock.h>
-#include <linux/irq.h>
-#include <linux/bootmem.h>
-#include <linux/notifier.h>
-#include <linux/cpu.h>
-#include <linux/percpu.h>
-
-#include <linux/delay.h>
-#include <linux/mc146818rtc.h>
-#include <asm/tlbflush.h>
-#include <asm/desc.h>
-#include <asm/arch_hooks.h>
-
-#include <asm/smp_alt.h>
-
-#ifndef CONFIG_X86_IO_APIC
-#define Dprintk(args...)
-#endif
-#include <mach_wakecpu.h>
-#include <smpboot_hooks.h>
-
-#include <asm-xen/evtchn.h>
-#include <asm-xen/xen-public/vcpu.h>
-#include <asm-xen/xenbus.h>
-
-static void xen_smp_intr_init(unsigned int cpu);
-static void xen_smp_intr_exit(unsigned int cpu);
-
-/* Set if we find a B stepping CPU */
-static int __initdata smp_b_stepping;
-
-/* Number of siblings per CPU package */
-int smp_num_siblings = 1;
-int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
-EXPORT_SYMBOL(phys_proc_id);
-int cpu_core_id[NR_CPUS]; /* Core ID of each logical CPU */
-EXPORT_SYMBOL(cpu_core_id);
-
-/* bitmap of online cpus */
-cpumask_t cpu_online_map;
-
-cpumask_t cpu_callin_map;
-cpumask_t cpu_callout_map;
-static cpumask_t smp_commenced_mask;
-
-/* Per CPU bogomips and other parameters */
-struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
-
-u8 x86_cpu_to_apicid[NR_CPUS] =
- { [0 ... NR_CPUS-1] = 0xff };
-EXPORT_SYMBOL(x86_cpu_to_apicid);
-
-#if 0
-/*
- * Trampoline 80x86 program as an array.
- */
-
-extern unsigned char trampoline_data [];
-extern unsigned char trampoline_end [];
-static unsigned char *trampoline_base;
-static int trampoline_exec;
-#endif
-
-#ifdef CONFIG_HOTPLUG_CPU
-/* State of each CPU. */
-DEFINE_PER_CPU(int, cpu_state) = { 0 };
-#endif
-
-static DEFINE_PER_CPU(int, resched_irq);
-static DEFINE_PER_CPU(int, callfunc_irq);
-static char resched_name[NR_CPUS][15];
-static char callfunc_name[NR_CPUS][15];
-
-#if 0
-/*
- * Currently trivial. Write the real->protected mode
- * bootstrap into the page concerned. The caller
- * has made sure it's suitably aligned.
- */
-
-static unsigned long __init setup_trampoline(void)
-{
- memcpy(trampoline_base, trampoline_data, trampoline_end -
trampoline_data);
- return virt_to_phys(trampoline_base);
-}
-#endif
-
-static void map_cpu_to_logical_apicid(void);
-
-/*
- * We are called very early to get the low memory for the
- * SMP bootup trampoline page.
- */
-void __init smp_alloc_memory(void)
-{
-#if 0
- trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
- /*
- * Has to be in very low memory so we can execute
- * real-mode AP code.
- */
- if (__pa(trampoline_base) >= 0x9F000)
- BUG();
- /*
- * Make the SMP trampoline executable:
- */
- trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
-#endif
-}
-
-/*
- * The bootstrap kernel entry code has set these up. Save them for
- * a given CPU
- */
-
-static void __init smp_store_cpu_info(int id)
-{
- struct cpuinfo_x86 *c = cpu_data + id;
-
- *c = boot_cpu_data;
- if (id!=0)
- identify_cpu(c);
- /*
- * Mask B, Pentium, but not Pentium MMX
- */
- if (c->x86_vendor == X86_VENDOR_INTEL &&
- c->x86 == 5 &&
- c->x86_mask >= 1 && c->x86_mask <= 4 &&
- c->x86_model <= 3)
- /*
- * Remember we have B step Pentia with bugs
- */
- smp_b_stepping = 1;
-
- /*
- * Certain Athlons might work (for various values of 'work') in SMP
- * but they are not certified as MP capable.
- */
- if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
-
- /* Athlon 660/661 is valid. */
- if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
- goto valid_k7;
-
- /* Duron 670 is valid */
- if ((c->x86_model==7) && (c->x86_mask==0))
- goto valid_k7;
-
- /*
- * Athlon 662, Duron 671, and Athlon >model 7 have capability
bit.
- * It's worth noting that the A5 stepping (662) of some Athlon
XP's
- * have the MP bit set.
- * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for
more.
- */
- if (((c->x86_model==6) && (c->x86_mask>=2)) ||
- ((c->x86_model==7) && (c->x86_mask>=1)) ||
- (c->x86_model> 7))
- if (cpu_has_mp)
- goto valid_k7;
-
- /* If we get here, it's not a certified SMP capable AMD system.
*/
- tainted |= TAINT_UNSAFE_SMP;
- }
-
-valid_k7:
- ;
-}
-
-#if 0
-/*
- * TSC synchronization.
- *
- * We first check whether all CPUs have their TSC's synchronized,
- * then we print a warning if not, and always resync.
- */
-
-static atomic_t tsc_start_flag = ATOMIC_INIT(0);
-static atomic_t tsc_count_start = ATOMIC_INIT(0);
-static atomic_t tsc_count_stop = ATOMIC_INIT(0);
-static unsigned long long tsc_values[NR_CPUS];
-
-#define NR_LOOPS 5
-
-static void __init synchronize_tsc_bp (void)
-{
- int i;
- unsigned long long t0;
- unsigned long long sum, avg;
- long long delta;
- unsigned long one_usec;
- int buggy = 0;
-
- printk(KERN_INFO "checking TSC synchronization across %u CPUs: ",
num_booting_cpus());
-
- /* convert from kcyc/sec to cyc/usec */
- one_usec = cpu_khz / 1000;
-
- atomic_set(&tsc_start_flag, 1);
- wmb();
-
- /*
- * We loop a few times to get a primed instruction cache,
- * then the last pass is more or less synchronized and
- * the BP and APs set their cycle counters to zero all at
- * once. This reduces the chance of having random offsets
- * between the processors, and guarantees that the maximum
- * delay between the cycle counters is never bigger than
- * the latency of information-passing (cachelines) between
- * two CPUs.
- */
- for (i = 0; i < NR_LOOPS; i++) {
- /*
- * all APs synchronize but they loop on '== num_cpus'
- */
- while (atomic_read(&tsc_count_start) != num_booting_cpus()-1)
- mb();
- atomic_set(&tsc_count_stop, 0);
- wmb();
- /*
- * this lets the APs save their current TSC:
- */
- atomic_inc(&tsc_count_start);
-
- rdtscll(tsc_values[smp_processor_id()]);
- /*
- * We clear the TSC in the last loop:
- */
- if (i == NR_LOOPS-1)
- write_tsc(0, 0);
-
- /*
- * Wait for all APs to leave the synchronization point:
- */
- while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1)
- mb();
- atomic_set(&tsc_count_start, 0);
- wmb();
- atomic_inc(&tsc_count_stop);
- }
-
- sum = 0;
- for (i = 0; i < NR_CPUS; i++) {
- if (cpu_isset(i, cpu_callout_map)) {
- t0 = tsc_values[i];
- sum += t0;
- }
- }
- avg = sum;
- do_div(avg, num_booting_cpus());
-
- sum = 0;
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_isset(i, cpu_callout_map))
- continue;
- delta = tsc_values[i] - avg;
- if (delta < 0)
- delta = -delta;
- /*
- * We report bigger than 2 microseconds clock differences.
- */
- if (delta > 2*one_usec) {
- long realdelta;
- if (!buggy) {
- buggy = 1;
- printk("\n");
- }
- realdelta = delta;
- do_div(realdelta, one_usec);
- if (tsc_values[i] < avg)
- realdelta = -realdelta;
-
- printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed
it up.\n", i, realdelta);
- }
-
- sum += delta;
- }
- if (!buggy)
- printk("passed.\n");
-}
-
-static void __init synchronize_tsc_ap (void)
-{
- int i;
-
- /*
- * Not every cpu is online at the time
- * this gets called, so we first wait for the BP to
- * finish SMP initialization:
- */
- while (!atomic_read(&tsc_start_flag)) mb();
-
- for (i = 0; i < NR_LOOPS; i++) {
- atomic_inc(&tsc_count_start);
- while (atomic_read(&tsc_count_start) != num_booting_cpus())
- mb();
-
- rdtscll(tsc_values[smp_processor_id()]);
- if (i == NR_LOOPS-1)
- write_tsc(0, 0);
-
- atomic_inc(&tsc_count_stop);
- while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
- }
-}
-#undef NR_LOOPS
-#endif
-
-extern void calibrate_delay(void);
-
-static atomic_t init_deasserted;
-
-static void __init smp_callin(void)
-{
- int cpuid, phys_id;
-#if 0
- unsigned long timeout;
-
- /*
- * If waken up by an INIT in an 82489DX configuration
- * we may get here before an INIT-deassert IPI reaches
- * our local APIC. We have to wait for the IPI or we'll
- * lock up on an APIC access.
- */
- wait_for_init_deassert(&init_deasserted);
-#endif
-
- /*
- * (This works even if the APIC is not enabled.)
- */
- phys_id = smp_processor_id();
- cpuid = smp_processor_id();
- if (cpu_isset(cpuid, cpu_callin_map)) {
- printk("huh, phys CPU#%d, CPU#%d already present??\n",
- phys_id, cpuid);
- BUG();
- }
- Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
-
-#if 0
- /*
- * STARTUP IPIs are fragile beasts as they might sometimes
- * trigger some glue motherboard logic. Complete APIC bus
- * silence for 1 second, this overestimates the time the
- * boot CPU is spending to send the up to 2 STARTUP IPIs
- * by a factor of two. This should be enough.
- */
-
- /*
- * Waiting 2s total for startup (udelay is not yet working)
- */
- timeout = jiffies + 2*HZ;
- while (time_before(jiffies, timeout)) {
- /*
- * Has the boot CPU finished it's STARTUP sequence?
- */
- if (cpu_isset(cpuid, cpu_callout_map))
- break;
- rep_nop();
- }
-
- if (!time_before(jiffies, timeout)) {
- printk("BUG: CPU%d started up but did not get a callout!\n",
- cpuid);
- BUG();
- }
-
- /*
- * the boot CPU has finished the init stage and is spinning
- * on callin_map until we finish. We are free to set up this
- * CPU, first the APIC. (this is probably redundant on most
- * boards)
- */
-
- Dprintk("CALLIN, before setup_local_APIC().\n");
- smp_callin_clear_local_apic();
- setup_local_APIC();
-#endif
- map_cpu_to_logical_apicid();
-
- /*
- * Get our bogomips.
- */
- calibrate_delay();
- Dprintk("Stack at about %p\n",&cpuid);
-
- /*
- * Save our processor parameters
- */
- smp_store_cpu_info(cpuid);
-
-#if 0
- disable_APIC_timer();
-#endif
-
- /*
- * Allow the master to continue.
- */
- cpu_set(cpuid, cpu_callin_map);
-
-#if 0
- /*
- * Synchronize the TSC with the BP
- */
- if (cpu_has_tsc && cpu_khz)
- synchronize_tsc_ap();
-#endif
-}
-
-static int cpucount;
-
-/*
- * Activate a secondary processor.
- */
-static void __init start_secondary(void *unused)
-{
- /*
- * Dont put anything before smp_callin(), SMP
- * booting is too fragile that we want to limit the
- * things done here to the most necessary things.
- */
- cpu_init();
- smp_callin();
- while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
- rep_nop();
- local_irq_enable();
- /*
- * low-memory mappings have been cleared, flush them from
- * the local TLBs too.
- */
- local_flush_tlb();
- cpu_set(smp_processor_id(), cpu_online_map);
-
- /* We can take interrupts now: we're officially "up". */
- local_irq_enable();
-
- wmb();
- cpu_idle();
-}
-
-/*
- * Everything has been set up for the secondary
- * CPUs - they just need to reload everything
- * from the task structure
- * This function must not return.
- */
-void __init initialize_secondary(void)
-{
- /*
- * We don't actually need to load the full TSS,
- * basically just the stack pointer and the eip.
- */
-
- asm volatile(
- "movl %0,%%esp\n\t"
- "jmp *%1"
- :
- :"r" (current->thread.esp),"r" (current->thread.eip));
-}
-
-extern struct {
- void * esp;
- unsigned short ss;
-} stack_start;
-
-#ifdef CONFIG_NUMA
-
-/* which logical CPUs are on which nodes */
-cpumask_t node_2_cpu_mask[MAX_NUMNODES] =
- { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
-/* which node each logical CPU is on */
-int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 };
-EXPORT_SYMBOL(cpu_2_node);
-
-/* set up a mapping between cpu and node. */
-static inline void map_cpu_to_node(int cpu, int node)
-{
- printk("Mapping cpu %d to node %d\n", cpu, node);
- cpu_set(cpu, node_2_cpu_mask[node]);
- cpu_2_node[cpu] = node;
-}
-
-/* undo a mapping between cpu and node. */
-static inline void unmap_cpu_to_node(int cpu)
-{
- int node;
-
- printk("Unmapping cpu %d from all nodes\n", cpu);
- for (node = 0; node < MAX_NUMNODES; node ++)
- cpu_clear(cpu, node_2_cpu_mask[node]);
- cpu_2_node[cpu] = 0;
-}
-#else /* !CONFIG_NUMA */
-
-#define map_cpu_to_node(cpu, node) ({})
-#define unmap_cpu_to_node(cpu) ({})
-
-#endif /* CONFIG_NUMA */
-
-u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
-
-static void map_cpu_to_logical_apicid(void)
-{
- int cpu = smp_processor_id();
- int apicid = smp_processor_id();
-
- cpu_2_logical_apicid[cpu] = apicid;
- map_cpu_to_node(cpu, apicid_to_node(apicid));
-}
-
-static void unmap_cpu_to_logical_apicid(int cpu)
-{
- cpu_2_logical_apicid[cpu] = BAD_APICID;
- unmap_cpu_to_node(cpu);
-}
-
-#if APIC_DEBUG
-static inline void __inquire_remote_apic(int apicid)
-{
- int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
- char *names[] = { "ID", "VERSION", "SPIV" };
- int timeout, status;
-
- printk("Inquiring remote APIC #%d...\n", apicid);
-
- for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
- printk("... APIC #%d %s: ", apicid, names[i]);
-
- /*
- * Wait for idle.
- */
- apic_wait_icr_idle();
-
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
- apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
-
- timeout = 0;
- do {
- udelay(100);
- status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
- } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
-
- switch (status) {
- case APIC_ICR_RR_VALID:
- status = apic_read(APIC_RRR);
- printk("%08x\n", status);
- break;
- default:
- printk("failed\n");
- }
- }
-}
-#endif
-
-#if 0
-#ifdef WAKE_SECONDARY_VIA_NMI
-/*
- * Poke the other CPU in the eye via NMI to wake it up. Remember that the
normal
- * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
- * won't ... remember to clear down the APIC, etc later.
- */
-static int __init
-wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
-{
- unsigned long send_status = 0, accept_status = 0;
- int timeout, maxlvt;
-
- /* Target chip */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
-
- /* Boot on the stack */
- /* Kick the second */
- apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
-
- Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while (send_status && (timeout++ < 1000));
-
- /*
- * Give the other CPU some time to accept the IPI.
- */
- udelay(200);
- /*
- * Due to the Pentium erratum 3AP.
- */
- maxlvt = get_maxlvt();
- if (maxlvt > 3) {
- apic_read_around(APIC_SPIV);
- apic_write(APIC_ESR, 0);
- }
- accept_status = (apic_read(APIC_ESR) & 0xEF);
- Dprintk("NMI sent.\n");
-
- if (send_status)
- printk("APIC never delivered???\n");
- if (accept_status)
- printk("APIC delivery error (%lx).\n", accept_status);
-
- return (send_status | accept_status);
-}
-#endif /* WAKE_SECONDARY_VIA_NMI */
-
-#ifdef WAKE_SECONDARY_VIA_INIT
-static int __init
-wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
-{
- unsigned long send_status = 0, accept_status = 0;
- int maxlvt, timeout, num_starts, j;
-
- /*
- * Be paranoid about clearing APIC errors.
- */
- if (APIC_INTEGRATED(apic_version[phys_apicid])) {
- apic_read_around(APIC_SPIV);
- apic_write(APIC_ESR, 0);
- apic_read(APIC_ESR);
- }
-
- Dprintk("Asserting INIT.\n");
-
- /*
- * Turn INIT on target chip
- */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
- /*
- * Send IPI
- */
- apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
- | APIC_DM_INIT);
-
- Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while (send_status && (timeout++ < 1000));
-
- mdelay(10);
-
- Dprintk("Deasserting INIT.\n");
-
- /* Target chip */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
- /* Send IPI */
- apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
-
- Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while (send_status && (timeout++ < 1000));
-
- atomic_set(&init_deasserted, 1);
-
- /*
- * Should we send STARTUP IPIs ?
- *
- * Determine this based on the APIC version.
- * If we don't have an integrated APIC, don't send the STARTUP IPIs.
- */
- if (APIC_INTEGRATED(apic_version[phys_apicid]))
- num_starts = 2;
- else
- num_starts = 0;
-
- /*
- * Run STARTUP IPI loop.
- */
- Dprintk("#startup loops: %d.\n", num_starts);
-
- maxlvt = get_maxlvt();
-
- for (j = 1; j <= num_starts; j++) {
- Dprintk("Sending STARTUP #%d.\n",j);
- apic_read_around(APIC_SPIV);
- apic_write(APIC_ESR, 0);
- apic_read(APIC_ESR);
- Dprintk("After apic_write.\n");
-
- /*
- * STARTUP IPI
- */
-
- /* Target chip */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
- /* Boot on the stack */
- /* Kick the second */
- apic_write_around(APIC_ICR, APIC_DM_STARTUP
- | (start_eip >> 12));
-
- /*
- * Give the other CPU some time to accept the IPI.
- */
- udelay(300);
-
- Dprintk("Startup point 1.\n");
-
- Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while (send_status && (timeout++ < 1000));
-
- /*
- * Give the other CPU some time to accept the IPI.
- */
- udelay(200);
- /*
- * Due to the Pentium erratum 3AP.
- */
- if (maxlvt > 3) {
- apic_read_around(APIC_SPIV);
- apic_write(APIC_ESR, 0);
- }
- accept_status = (apic_read(APIC_ESR) & 0xEF);
- if (send_status || accept_status)
- break;
- }
- Dprintk("After Startup.\n");
-
- if (send_status)
- printk("APIC never delivered???\n");
- if (accept_status)
- printk("APIC delivery error (%lx).\n", accept_status);
-
- return (send_status | accept_status);
-}
-#endif /* WAKE_SECONDARY_VIA_INIT */
-#endif
-
-extern cpumask_t cpu_initialized;
-
-static int __init do_boot_cpu(int apicid)
-/*
- * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
- * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
- * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
- */
-{
- struct task_struct *idle;
- unsigned long boot_error;
- int timeout, cpu;
- unsigned long start_eip;
-#if 0
- unsigned short nmi_high = 0, nmi_low = 0;
-#endif
- vcpu_guest_context_t ctxt;
- extern void startup_32_smp(void);
- extern void hypervisor_callback(void);
- extern void failsafe_callback(void);
- extern void smp_trap_init(trap_info_t *);
-
- cpu = ++cpucount;
- /*
- * We can't use kernel_thread since we must avoid to
- * reschedule the child.
- */
- idle = fork_idle(cpu);
- if (IS_ERR(idle))
- panic("failed fork for CPU %d", cpu);
- idle->thread.eip = (unsigned long) start_secondary;
- /* start_eip had better be page-aligned! */
- start_eip = (unsigned long)startup_32_smp;
-
- /* So we see what's up */
- printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
- /* Stack for startup_32 can be just as for start_secondary onwards */
- stack_start.esp = (void *) idle->thread.esp;
-
- irq_ctx_init(cpu);
-
- /*
- * This grunge runs the startup process for
- * the targeted processor.
- */
-
- atomic_set(&init_deasserted, 0);
-
-#if 1
- cpu_gdt_descr[cpu].address = __get_free_page(GFP_KERNEL|__GFP_ZERO);
- BUG_ON(cpu_gdt_descr[0].size > PAGE_SIZE);
- cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
- memcpy((void *)cpu_gdt_descr[cpu].address,
- (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size);
-
- memset(&ctxt, 0, sizeof(ctxt));
-
- ctxt.user_regs.ds = __USER_DS;
- ctxt.user_regs.es = __USER_DS;
- ctxt.user_regs.fs = 0;
- ctxt.user_regs.gs = 0;
- ctxt.user_regs.ss = __KERNEL_DS;
- ctxt.user_regs.cs = __KERNEL_CS;
- ctxt.user_regs.eip = start_eip;
- ctxt.user_regs.esp = idle->thread.esp;
-#define X86_EFLAGS_IOPL_RING1 0x1000
- ctxt.user_regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_IOPL_RING1;
-
- /* FPU is set up to default initial state. */
- memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
-
- smp_trap_init(ctxt.trap_ctxt);
-
- /* No LDT. */
- ctxt.ldt_ents = 0;
-
- {
- unsigned long va;
- int f;
-
- for (va = cpu_gdt_descr[cpu].address, f = 0;
- va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size;
- va += PAGE_SIZE, f++) {
- ctxt.gdt_frames[f] = virt_to_mfn(va);
- make_page_readonly((void *)va);
- }
- ctxt.gdt_ents = cpu_gdt_descr[cpu].size / 8;
- }
-
- /* Ring 1 stack is the initial stack. */
- ctxt.kernel_ss = __KERNEL_DS;
- ctxt.kernel_sp = idle->thread.esp;
-
- /* Callback handlers. */
- ctxt.event_callback_cs = __KERNEL_CS;
- ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
- ctxt.failsafe_callback_cs = __KERNEL_CS;
- ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
-
- ctxt.ctrlreg[3] = virt_to_mfn(swapper_pg_dir) << PAGE_SHIFT;
-
- boot_error = HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt);
- if (boot_error)
- printk("boot error: %ld\n", boot_error);
-
- if (!boot_error) {
- HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
-
- /*
- * allow APs to start initializing.
- */
- Dprintk("Before Callout %d.\n", cpu);
- cpu_set(cpu, cpu_callout_map);
- Dprintk("After Callout %d.\n", cpu);
-
- /*
- * Wait 5s total for a response
- */
- for (timeout = 0; timeout < 50000; timeout++) {
- if (cpu_isset(cpu, cpu_callin_map))
- break; /* It has booted */
- udelay(100);
- }
-
- if (cpu_isset(cpu, cpu_callin_map)) {
- /* number CPUs logically, starting from 1 (BSP is 0) */
- Dprintk("OK.\n");
- printk("CPU%d: ", cpu);
- print_cpu_info(&cpu_data[cpu]);
- Dprintk("CPU has booted.\n");
- } else {
- boot_error= 1;
- }
- }
- x86_cpu_to_apicid[cpu] = apicid;
- if (boot_error) {
- /* Try to put things back the way they were before ... */
- unmap_cpu_to_logical_apicid(cpu);
- cpu_clear(cpu, cpu_callout_map); /* was set here
(do_boot_cpu()) */
- cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
- cpucount--;
- }
-
-#else
- Dprintk("Setting warm reset code and vector.\n");
-
- store_NMI_vector(&nmi_high, &nmi_low);
-
- smpboot_setup_warm_reset_vector(start_eip);
-
- /*
- * Starting actual IPI sequence...
- */
- boot_error = wakeup_secondary_cpu(apicid, start_eip);
-
- if (!boot_error) {
- /*
- * allow APs to start initializing.
- */
- Dprintk("Before Callout %d.\n", cpu);
- cpu_set(cpu, cpu_callout_map);
- Dprintk("After Callout %d.\n", cpu);
-
- /*
- * Wait 5s total for a response
- */
- for (timeout = 0; timeout < 50000; timeout++) {
- if (cpu_isset(cpu, cpu_callin_map))
- break; /* It has booted */
- udelay(100);
- }
-
- if (cpu_isset(cpu, cpu_callin_map)) {
- /* number CPUs logically, starting from 1 (BSP is 0) */
- Dprintk("OK.\n");
- printk("CPU%d: ", cpu);
- print_cpu_info(&cpu_data[cpu]);
- Dprintk("CPU has booted.\n");
- } else {
- boot_error= 1;
- if (*((volatile unsigned char *)trampoline_base)
- == 0xA5)
- /* trampoline started but...? */
- printk("Stuck ??\n");
- else
- /* trampoline code not run */
- printk("Not responding.\n");
- inquire_remote_apic(apicid);
- }
- }
- x86_cpu_to_apicid[cpu] = apicid;
- if (boot_error) {
- /* Try to put things back the way they were before ... */
- unmap_cpu_to_logical_apicid(cpu);
- cpu_clear(cpu, cpu_callout_map); /* was set here
(do_boot_cpu()) */
- cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
- cpucount--;
- }
-
- /* mark "stuck" area as not stuck */
- *((volatile unsigned long *)trampoline_base) = 0;
-#endif
-
- return boot_error;
-}
-
-static void smp_tune_scheduling (void)
-{
- unsigned long cachesize; /* kB */
- unsigned long bandwidth = 350; /* MB/s */
- /*
- * Rough estimation for SMP scheduling, this is the number of
- * cycles it takes for a fully memory-limited process to flush
- * the SMP-local cache.
- *
- * (For a P5 this pretty much means we will choose another idle
- * CPU almost always at wakeup time (this is due to the small
- * L1 cache), on PIIs it's around 50-100 usecs, depending on
- * the cache size)
- */
-
- if (!cpu_khz) {
- /*
- * this basically disables processor-affinity
- * scheduling on SMP without a TSC.
- */
- return;
- } else {
- cachesize = boot_cpu_data.x86_cache_size;
- if (cachesize == -1) {
- cachesize = 16; /* Pentiums, 2x8kB cache */
- bandwidth = 100;
- }
- }
-}
-
-/*
- * Cycle through the processors sending APIC IPIs to boot each.
- */
-
-#if 0
-static int boot_cpu_logical_apicid;
-#endif
-/* Where the IO area was mapped on multiquad, always 0 otherwise */
-void *xquad_portio;
-
-cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
-cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
-EXPORT_SYMBOL(cpu_core_map);
-
-static void __init smp_boot_cpus(unsigned int max_cpus)
-{
- int cpu, kicked;
- unsigned long bogosum = 0;
-#if 0
- int apicid, bit;
-#endif
-
- /*
- * Setup boot CPU information
- */
- smp_store_cpu_info(0); /* Final full version of the data */
- printk("CPU%d: ", 0);
- print_cpu_info(&cpu_data[0]);
-
-#if 0
- boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
- boot_cpu_logical_apicid = logical_smp_processor_id();
- x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
-#else
- // boot_cpu_physical_apicid = 0;
- // boot_cpu_logical_apicid = 0;
- x86_cpu_to_apicid[0] = 0;
-#endif
-
- current_thread_info()->cpu = 0;
- smp_tune_scheduling();
- cpus_clear(cpu_sibling_map[0]);
- cpu_set(0, cpu_sibling_map[0]);
-
- cpus_clear(cpu_core_map[0]);
- cpu_set(0, cpu_core_map[0]);
-
-#ifdef CONFIG_X86_IO_APIC
- /*
- * If we couldn't find an SMP configuration at boot time,
- * get out of here now!
- */
- if (!smp_found_config && !acpi_lapic) {
- printk(KERN_NOTICE "SMP motherboard not detected.\n");
- smpboot_clear_io_apic_irqs();
-#if 0
- phys_cpu_present_map = physid_mask_of_physid(0);
-#endif
-#ifdef CONFIG_X86_LOCAL_APIC
- if (APIC_init_uniprocessor())
- printk(KERN_NOTICE "Local APIC not detected."
- " Using dummy APIC emulation.\n");
-#endif
- map_cpu_to_logical_apicid();
- cpu_set(0, cpu_sibling_map[0]);
- cpu_set(0, cpu_core_map[0]);
- return;
- }
-#endif
-
-#if 0
- /*
- * Should not be necessary because the MP table should list the boot
- * CPU too, but we do it for the sake of robustness anyway.
- * Makes no sense to do this check in clustered apic mode, so skip it
- */
- if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
- printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
- boot_cpu_physical_apicid);
- physid_set(hard_smp_processor_id(), phys_cpu_present_map);
- }
-
- /*
- * If we couldn't find a local APIC, then get out of here now!
- */
- if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
!cpu_has_apic) {
- printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
- boot_cpu_physical_apicid);
- printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell
your hw vendor)\n");
- smpboot_clear_io_apic_irqs();
- phys_cpu_present_map = physid_mask_of_physid(0);
- cpu_set(0, cpu_sibling_map[0]);
- cpu_set(0, cpu_core_map[0]);
- cpu_set(0, cpu_sibling_map[0]);
- cpu_set(0, cpu_core_map[0]);
- return;
- }
-
- verify_local_APIC();
-#endif
-
- /*
- * If SMP should be disabled, then really disable it!
- */
- if (!max_cpus) {
- xen_start_info->n_vcpu = 1;
- printk(KERN_INFO "SMP mode deactivated, forcing use of dummy
APIC emulation.\n");
- smpboot_clear_io_apic_irqs();
-#if 0
- phys_cpu_present_map = physid_mask_of_physid(0);
-#endif
- return;
- }
-
- xen_smp_intr_init(0);
-
-#if 0
- connect_bsp_APIC();
- setup_local_APIC();
-#endif
- map_cpu_to_logical_apicid();
-#if 0
-
-
- setup_portio_remap();
-
- /*
- * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
- *
- * In clustered apic mode, phys_cpu_present_map is a constructed thus:
- * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the
- * clustered apic ID.
- */
- Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
-#endif
- Dprintk("CPU present map: %lx\n", (1UL << xen_start_info->n_vcpu) - 1);
-
- kicked = 1;
- for (cpu = 1; kicked < NR_CPUS && cpu < xen_start_info->n_vcpu; cpu++) {
- if (max_cpus <= cpucount+1)
- continue;
-
-#ifdef CONFIG_SMP_ALTERNATIVES
- if (kicked == 1)
- prepare_for_smp();
-#endif
- if (do_boot_cpu(cpu))
- printk("CPU #%d not responding - cannot use it.\n",
- cpu);
- else
- ++kicked;
- }
-
-#if 0
- /*
- * Cleanup possible dangling ends...
- */
- smpboot_restore_warm_reset_vector();
-#endif
-
- /*
- * Allow the user to impress friends.
- */
- Dprintk("Before bogomips.\n");
- for (cpu = 0; cpu < NR_CPUS; cpu++)
- if (cpu_isset(cpu, cpu_callout_map))
- bogosum += cpu_data[cpu].loops_per_jiffy;
- printk(KERN_INFO
- "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
- cpucount+1,
- bogosum/(500000/HZ),
- (bogosum/(5000/HZ))%100);
-
- Dprintk("Before bogocount - setting activated=1.\n");
-
- if (smp_b_stepping)
- printk(KERN_WARNING "WARNING: SMP operation may be unreliable
with B stepping processors.\n");
-
- /*
- * Don't taint if we are running SMP kernel on a single non-MP
- * approved Athlon
- */
- if (tainted & TAINT_UNSAFE_SMP) {
- if (cpucount)
- printk (KERN_INFO "WARNING: This combination of AMD
processors is not suitable for SMP.\n");
- else
- tainted &= ~TAINT_UNSAFE_SMP;
- }
-
- Dprintk("Boot done.\n");
-
- /*
- * construct cpu_sibling_map[], so that we can tell sibling CPUs
- * efficiently.
- */
- for (cpu = 0; cpu < NR_CPUS; cpu++) {
- cpus_clear(cpu_sibling_map[cpu]);
- cpus_clear(cpu_core_map[cpu]);
- }
-
- for (cpu = 0; cpu < NR_CPUS; cpu++) {
- struct cpuinfo_x86 *c = cpu_data + cpu;
- int siblings = 0;
- int i;
- if (!cpu_isset(cpu, cpu_callout_map))
- continue;
-
- if (smp_num_siblings > 1) {
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_isset(i, cpu_callout_map))
- continue;
- if (cpu_core_id[cpu] == cpu_core_id[i]) {
- siblings++;
- cpu_set(i, cpu_sibling_map[cpu]);
- }
- }
- } else {
- siblings++;
- cpu_set(cpu, cpu_sibling_map[cpu]);
- }
-
- if (siblings != smp_num_siblings) {
- printk(KERN_WARNING "WARNING: %d siblings found for
CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
- smp_num_siblings = siblings;
- }
-
- if (c->x86_num_cores > 1) {
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_isset(i, cpu_callout_map))
- continue;
- if (phys_proc_id[cpu] == phys_proc_id[i]) {
- cpu_set(i, cpu_core_map[cpu]);
- }
- }
- } else {
- cpu_core_map[cpu] = cpu_sibling_map[cpu];
- }
- }
-
- smpboot_setup_io_apic();
-
-#if 0
- setup_boot_APIC_clock();
-
- /*
- * Synchronize the TSC with the AP
- */
- if (cpu_has_tsc && cpucount && cpu_khz)
- synchronize_tsc_bp();
-#endif
-}
-
-/* These are wrappers to interface to the new boot process. Someone
- who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
-void __init smp_prepare_cpus(unsigned int max_cpus)
-{
- smp_commenced_mask = cpumask_of_cpu(0);
- cpu_callin_map = cpumask_of_cpu(0);
- mb();
- smp_boot_cpus(max_cpus);
-}
-
-void __devinit smp_prepare_boot_cpu(void)
-{
- cpu_set(smp_processor_id(), cpu_online_map);
- cpu_set(smp_processor_id(), cpu_callout_map);
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-
-static void handle_vcpu_hotplug_event(
- struct xenbus_watch *watch, const char **vec, unsigned int len)
-{
- int err, cpu;
- char dir[32], state[32];
- char *cpustr;
- const char *node = vec[XS_WATCH_PATH];
-
- if ((cpustr = strstr(node, "cpu/")) == NULL)
- return;
-
- sscanf(cpustr, "cpu/%d", &cpu);
-
- sprintf(dir, "cpu/%d", cpu);
- err = xenbus_scanf(NULL, dir, "availability", "%s", state);
- if (err != 1) {
- printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
- return;
- }
-
- if (strcmp(state, "online") == 0)
- (void)cpu_up(cpu);
- else if (strcmp(state, "offline") == 0)
- (void)cpu_down(cpu);
- else
- printk(KERN_ERR "XENBUS: unknown state(%s) on node(%s)\n",
- state, node);
-}
-
-static int setup_cpu_watcher(struct notifier_block *notifier,
- unsigned long event, void *data)
-{
- static struct xenbus_watch cpu_watch = {
- .node = "cpu",
- .callback = handle_vcpu_hotplug_event };
- (void)register_xenbus_watch(&cpu_watch);
- return NOTIFY_DONE;
-}
-
-static int __init setup_vcpu_hotplug_event(void)
-{
- static struct notifier_block xsn_cpu = {
- .notifier_call = setup_cpu_watcher };
- register_xenstore_notifier(&xsn_cpu);
- return 0;
-}
-
-subsys_initcall(setup_vcpu_hotplug_event);
-
-int __cpu_disable(void)
-{
- cpumask_t map = cpu_online_map;
- int cpu = smp_processor_id();
-
- /*
- * Perhaps use cpufreq to drop frequency, but that could go
- * into generic code.
- *
- * We won't take down the boot processor on i386 due to some
- * interrupts only being able to be serviced by the BSP.
- * Especially so if we're not using an IOAPIC -zwane
- */
- if (cpu == 0)
- return -EBUSY;
-
- cpu_clear(cpu, map);
- fixup_irqs(map);
-
- /* It's now safe to remove this processor from the online map */
- cpu_clear(cpu, cpu_online_map);
-
- return 0;
-}
-
-void __cpu_die(unsigned int cpu)
-{
- while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
- current->state = TASK_UNINTERRUPTIBLE;
- schedule_timeout(HZ/10);
- }
-
- xen_smp_intr_exit(cpu);
-
-#ifdef CONFIG_SMP_ALTERNATIVES
- if (num_online_cpus() == 1)
- unprepare_for_smp();
-#endif
-}
-
-#else /* ... !CONFIG_HOTPLUG_CPU */
-int __cpu_disable(void)
-{
- return -ENOSYS;
-}
-
-void __cpu_die(unsigned int cpu)
-{
- /* We said "no" in __cpu_disable */
- BUG();
-}
-#endif /* CONFIG_HOTPLUG_CPU */
-
-int __devinit __cpu_up(unsigned int cpu)
-{
- /* In case one didn't come up */
- if (!cpu_isset(cpu, cpu_callin_map)) {
- printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu);
- local_irq_enable();
- return -EIO;
- }
-
-#ifdef CONFIG_SMP_ALTERNATIVES
- if (num_online_cpus() == 1)
- prepare_for_smp();
-#endif
-
- xen_smp_intr_init(cpu);
- cpu_set(cpu, smp_commenced_mask);
- cpu_set(cpu, cpu_online_map);
- HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
-
- return 0;
-}
-
-void __init smp_cpus_done(unsigned int max_cpus)
-{
-#if 1
-#else
-#ifdef CONFIG_X86_IO_APIC
- setup_ioapic_dest();
-#endif
- zap_low_mappings();
- /*
- * Disable executability of the SMP trampoline:
- */
- set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
-#endif
-}
-
-extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
-extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
-
-extern void local_setup_timer(unsigned int cpu);
-extern void local_teardown_timer(unsigned int cpu);
-
-static void xen_smp_intr_init(unsigned int cpu)
-{
- per_cpu(resched_irq, cpu) =
- bind_ipi_to_irq(RESCHEDULE_VECTOR, cpu);
- sprintf(resched_name[cpu], "resched%d", cpu);
- BUG_ON(request_irq(per_cpu(resched_irq, cpu), smp_reschedule_interrupt,
- SA_INTERRUPT, resched_name[cpu], NULL));
-
- per_cpu(callfunc_irq, cpu) =
- bind_ipi_to_irq(CALL_FUNCTION_VECTOR, cpu);
- sprintf(callfunc_name[cpu], "callfunc%d", cpu);
- BUG_ON(request_irq(per_cpu(callfunc_irq, cpu),
- smp_call_function_interrupt,
- SA_INTERRUPT, callfunc_name[cpu], NULL));
-
- if (cpu != 0)
- local_setup_timer(cpu);
-}
-
-static void xen_smp_intr_exit(unsigned int cpu)
-{
- if (cpu != 0)
- local_teardown_timer(cpu);
-
- free_irq(per_cpu(resched_irq, cpu), NULL);
- unbind_ipi_from_irq(RESCHEDULE_VECTOR, cpu);
-
- free_irq(per_cpu(callfunc_irq, cpu), NULL);
- unbind_ipi_from_irq(CALL_FUNCTION_VECTOR, cpu);
-}
-
-void vcpu_prepare(int vcpu)
-{
- extern void hypervisor_callback(void);
- extern void failsafe_callback(void);
- extern void smp_trap_init(trap_info_t *);
- vcpu_guest_context_t ctxt;
- struct task_struct *idle = idle_task(vcpu);
-
- if (vcpu == 0)
- return;
-
- memset(&ctxt, 0, sizeof(ctxt));
-
- ctxt.user_regs.ds = __USER_DS;
- ctxt.user_regs.es = __USER_DS;
- ctxt.user_regs.fs = 0;
- ctxt.user_regs.gs = 0;
- ctxt.user_regs.ss = __KERNEL_DS;
- ctxt.user_regs.cs = __KERNEL_CS;
- ctxt.user_regs.eip = (unsigned long)cpu_idle;
- ctxt.user_regs.esp = idle->thread.esp;
- ctxt.user_regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_IOPL_RING1;
-
- memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
-
- smp_trap_init(ctxt.trap_ctxt);
-
- ctxt.ldt_ents = 0;
-
- ctxt.gdt_frames[0] = virt_to_mfn(cpu_gdt_descr[vcpu].address);
- ctxt.gdt_ents = cpu_gdt_descr[vcpu].size / 8;
-
- ctxt.kernel_ss = __KERNEL_DS;
- ctxt.kernel_sp = idle->thread.esp0;
-
- ctxt.event_callback_cs = __KERNEL_CS;
- ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
- ctxt.failsafe_callback_cs = __KERNEL_CS;
- ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
-
- ctxt.ctrlreg[3] = virt_to_mfn(swapper_pg_dir) << PAGE_SHIFT;
-
- (void)HYPERVISOR_vcpu_op(VCPUOP_initialise, vcpu, &ctxt);
-}
-
-/*
- * Local variables:
- * c-file-style: "linux"
- * indent-tabs-mode: t
- * c-indent-level: 8
- * c-basic-offset: 8
- * tab-width: 8
- * End:
- */
diff -r 7169e31606bd -r 3d27ee7da0c1
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c Tue Oct 18
14:40:29 2005
+++ /dev/null Tue Oct 18 16:40:31 2005
@@ -1,1253 +0,0 @@
-/*
- * x86 SMP booting functions
- *
- * (c) 1995 Alan Cox, Building #3 <alan@xxxxxxxxxx>
- * (c) 1998, 1999, 2000 Ingo Molnar <mingo@xxxxxxxxxx>
- * Copyright 2001 Andi Kleen, SuSE Labs.
- *
- * Much of the core SMP work is based on previous work by Thomas Radke, to
- * whom a great many thanks are extended.
- *
- * Thanks to Intel for making available several different Pentium,
- * Pentium Pro and Pentium-II/Xeon MP machines.
- * Original development of Linux SMP code supported by Caldera.
- *
- * This code is released under the GNU General Public License version 2
- *
- * Fixes
- * Felix Koop : NR_CPUS used properly
- * Jose Renau : Handle single CPU case.
- * Alan Cox : By repeated request 8) - Total BogoMIP
report.
- * Greg Wright : Fix for kernel stacks panic.
- * Erich Boleyn : MP v1.4 and additional changes.
- * Matthias Sattler : Changes for 2.1 kernel map.
- * Michel Lespinasse : Changes for 2.1 kernel map.
- * Michael Chastain : Change trampoline.S to gnu as.
- * Alan Cox : Dumb bug: 'B' step PPro's are fine
- * Ingo Molnar : Added APIC timers, based on code
- * from Jose Renau
- * Ingo Molnar : various cleanups and rewrites
- * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
- * Maciej W. Rozycki : Bits for genuine 82489DX APICs
- * Andi Kleen : Changed for SMP boot into long mode.
- * Rusty Russell : Hacked into shape for new "hotplug"
boot process.
- * Andi Kleen : Converted to new state machine.
- * Various cleanups.
- * Probably mostly hotplug CPU ready now.
- */
-
-
-#include <linux/config.h>
-#include <linux/init.h>
-
-#include <linux/mm.h>
-#include <linux/kernel_stat.h>
-#include <linux/smp_lock.h>
-#include <linux/irq.h>
-#include <linux/bootmem.h>
-#include <linux/thread_info.h>
-#include <linux/module.h>
-#ifdef CONFIG_XEN
-#include <linux/interrupt.h>
-#endif
-
-#include <linux/delay.h>
-#include <linux/mc146818rtc.h>
-#include <asm/mtrr.h>
-#include <asm/pgalloc.h>
-#include <asm/desc.h>
-#include <asm/kdebug.h>
-#include <asm/tlbflush.h>
-#include <asm/proto.h>
-#include <asm/nmi.h>
-#ifdef CONFIG_XEN
-#include <asm/arch_hooks.h>
-#include <asm-xen/evtchn.h>
-#include <asm-xen/xen-public/vcpu.h>
-#endif
-
-/* Change for real CPU hotplug. Note other files need to be fixed
- first too. */
-#define __cpuinit __init
-#define __cpuinitdata __initdata
-
-#if defined(CONFIG_XEN) && !defined(CONFIG_XEN_PRIVILEGED_GUEST)
- unsigned int maxcpus = NR_CPUS;
-#endif
-
-/* Number of siblings per CPU package */
-int smp_num_siblings = 1;
-/* Package ID of each logical CPU */
-u8 phys_proc_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
-u8 cpu_core_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
-EXPORT_SYMBOL(phys_proc_id);
-EXPORT_SYMBOL(cpu_core_id);
-
-/* Bitmask of currently online CPUs */
-cpumask_t cpu_online_map;
-
-EXPORT_SYMBOL(cpu_online_map);
-
-/*
- * Private maps to synchronize booting between AP and BP.
- * Probably not needed anymore, but it makes for easier debugging. -AK
- */
-cpumask_t cpu_callin_map;
-cpumask_t cpu_callout_map;
-
-cpumask_t cpu_possible_map;
-EXPORT_SYMBOL(cpu_possible_map);
-
-/* Per CPU bogomips and other parameters */
-struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
-
-/* Set when the idlers are all forked */
-int smp_threads_ready;
-
-cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
-cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
-EXPORT_SYMBOL(cpu_core_map);
-
-#ifndef CONFIG_XEN
-/*
- * Trampoline 80x86 program as an array.
- */
-
-extern unsigned char trampoline_data[];
-extern unsigned char trampoline_end[];
-
-/*
- * Currently trivial. Write the real->protected mode
- * bootstrap into the page concerned. The caller
- * has made sure it's suitably aligned.
- */
-
-static unsigned long __cpuinit setup_trampoline(void)
-{
- void *tramp = __va(SMP_TRAMPOLINE_BASE);
- memcpy(tramp, trampoline_data, trampoline_end - trampoline_data);
- return virt_to_phys(tramp);
-}
-#endif
-
-/*
- * The bootstrap kernel entry code has set these up. Save them for
- * a given CPU
- */
-
-static void __cpuinit smp_store_cpu_info(int id)
-{
- struct cpuinfo_x86 *c = cpu_data + id;
-
- *c = boot_cpu_data;
- identify_cpu(c);
- print_cpu_info(c);
-}
-
-#ifndef CONFIG_XEN
-/*
- * New Funky TSC sync algorithm borrowed from IA64.
- * Main advantage is that it doesn't reset the TSCs fully and
- * in general looks more robust and it works better than my earlier
- * attempts. I believe it was written by David Mosberger. Some minor
- * adjustments for x86-64 by me -AK
- *
- * Original comment reproduced below.
- *
- * Synchronize TSC of the current (slave) CPU with the TSC of the
- * MASTER CPU (normally the time-keeper CPU). We use a closed loop to
- * eliminate the possibility of unaccounted-for errors (such as
- * getting a machine check in the middle of a calibration step). The
- * basic idea is for the slave to ask the master what itc value it has
- * and to read its own itc before and after the master responds. Each
- * iteration gives us three timestamps:
- *
- * slave master
- *
- * t0 ---\
- * ---\
- * --->
- * tm
- * /---
- * /---
- * t1 <---
- *
- *
- * The goal is to adjust the slave's TSC such that tm falls exactly
- * half-way between t0 and t1. If we achieve this, the clocks are
- * synchronized provided the interconnect between the slave and the
- * master is symmetric. Even if the interconnect were asymmetric, we
- * would still know that the synchronization error is smaller than the
- * roundtrip latency (t0 - t1).
- *
- * When the interconnect is quiet and symmetric, this lets us
- * synchronize the TSC to within one or two cycles. However, we can
- * only *guarantee* that the synchronization is accurate to within a
- * round-trip time, which is typically in the range of several hundred
- * cycles (e.g., ~500 cycles). In practice, this means that the TSCs
- * are usually almost perfectly synchronized, but we shouldn't assume
- * that the accuracy is much better than half a micro second or so.
- *
- * [there are other errors like the latency of RDTSC and of the
- * WRMSR. These can also account to hundreds of cycles. So it's
- * probably worse. It claims 153 cycles error on a dual Opteron,
- * but I suspect the numbers are actually somewhat worse -AK]
- */
-
-#define MASTER 0
-#define SLAVE (SMP_CACHE_BYTES/8)
-
-/* Intentionally don't use cpu_relax() while TSC synchronization
- because we don't want to go into funky power save modi or cause
- hypervisors to schedule us away. Going to sleep would likely affect
- latency and low latency is the primary objective here. -AK */
-#define no_cpu_relax() barrier()
-
-static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock);
-static volatile __cpuinitdata unsigned long go[SLAVE + 1];
-static int notscsync __cpuinitdata;
-
-#undef DEBUG_TSC_SYNC
-
-#define NUM_ROUNDS 64 /* magic value */
-#define NUM_ITERS 5 /* likewise */
-
-/* Callback on boot CPU */
-static __cpuinit void sync_master(void *arg)
-{
- unsigned long flags, i;
-
- if (smp_processor_id() != boot_cpu_id)
- return;
-
- go[MASTER] = 0;
-
- local_irq_save(flags);
- {
- for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) {
- while (!go[MASTER])
- no_cpu_relax();
- go[MASTER] = 0;
- rdtscll(go[SLAVE]);
- }
- }
- local_irq_restore(flags);
-}
-
-/*
- * Return the number of cycles by which our tsc differs from the tsc
- * on the master (time-keeper) CPU. A positive number indicates our
- * tsc is ahead of the master, negative that it is behind.
- */
-static inline long
-get_delta(long *rt, long *master)
-{
- unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
- unsigned long tcenter, t0, t1, tm;
- int i;
-
- for (i = 0; i < NUM_ITERS; ++i) {
- rdtscll(t0);
- go[MASTER] = 1;
- while (!(tm = go[SLAVE]))
- no_cpu_relax();
- go[SLAVE] = 0;
- rdtscll(t1);
-
- if (t1 - t0 < best_t1 - best_t0)
- best_t0 = t0, best_t1 = t1, best_tm = tm;
- }
-
- *rt = best_t1 - best_t0;
- *master = best_tm - best_t0;
-
- /* average best_t0 and best_t1 without overflow: */
- tcenter = (best_t0/2 + best_t1/2);
- if (best_t0 % 2 + best_t1 % 2 == 2)
- ++tcenter;
- return tcenter - best_tm;
-}
-
-static __cpuinit void sync_tsc(void)
-{
- int i, done = 0;
- long delta, adj, adjust_latency = 0;
- unsigned long flags, rt, master_time_stamp, bound;
-#if DEBUG_TSC_SYNC
- static struct syncdebug {
- long rt; /* roundtrip time */
- long master; /* master's timestamp */
- long diff; /* difference between midpoint and master's
timestamp */
- long lat; /* estimate of tsc adjustment latency */
- } t[NUM_ROUNDS] __cpuinitdata;
-#endif
-
- go[MASTER] = 1;
-
- smp_call_function(sync_master, NULL, 1, 0);
-
- while (go[MASTER]) /* wait for master to be ready */
- no_cpu_relax();
-
- spin_lock_irqsave(&tsc_sync_lock, flags);
- {
- for (i = 0; i < NUM_ROUNDS; ++i) {
- delta = get_delta(&rt, &master_time_stamp);
- if (delta == 0) {
- done = 1; /* let's lock on to this... */
- bound = rt;
- }
-
- if (!done) {
- unsigned long t;
- if (i > 0) {
- adjust_latency += -delta;
- adj = -delta + adjust_latency/4;
- } else
- adj = -delta;
-
- rdtscll(t);
- wrmsrl(MSR_IA32_TSC, t + adj);
- }
-#if DEBUG_TSC_SYNC
- t[i].rt = rt;
- t[i].master = master_time_stamp;
- t[i].diff = delta;
- t[i].lat = adjust_latency/4;
-#endif
- }
- }
- spin_unlock_irqrestore(&tsc_sync_lock, flags);
-
-#if DEBUG_TSC_SYNC
- for (i = 0; i < NUM_ROUNDS; ++i)
- printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
- t[i].rt, t[i].master, t[i].diff, t[i].lat);
-#endif
-
- printk(KERN_INFO
- "CPU %d: synchronized TSC with CPU %u (last diff %ld cycles, "
- "maxerr %lu cycles)\n",
- smp_processor_id(), boot_cpu_id, delta, rt);
-}
-
-static void __cpuinit tsc_sync_wait(void)
-{
- if (notscsync || !cpu_has_tsc)
- return;
- printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n", smp_processor_id(),
- boot_cpu_id);
- sync_tsc();
-}
-
-static __init int notscsync_setup(char *s)
-{
- notscsync = 1;
- return 0;
-}
-__setup("notscsync", notscsync_setup);
-#endif
-
-static atomic_t init_deasserted __cpuinitdata;
-
-/*
- * Report back to the Boot Processor.
- * Running on AP.
- */
-void __cpuinit smp_callin(void)
-{
- int cpuid, phys_id;
- unsigned long timeout;
-
-#ifndef CONFIG_XEN
- /*
- * If waken up by an INIT in an 82489DX configuration
- * we may get here before an INIT-deassert IPI reaches
- * our local APIC. We have to wait for the IPI or we'll
- * lock up on an APIC access.
- */
- while (!atomic_read(&init_deasserted))
- cpu_relax();
-
-#endif
- /*
- * (This works even if the APIC is not enabled.)
- */
-#ifndef CONFIG_XEN
- phys_id = GET_APIC_ID(apic_read(APIC_ID));
-#else
- phys_id = smp_processor_id();
-#endif
- cpuid = smp_processor_id();
- if (cpu_isset(cpuid, cpu_callin_map)) {
- panic("smp_callin: phys CPU#%d, CPU#%d already present??\n",
- phys_id, cpuid);
- }
- Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
-
- /*
- * STARTUP IPIs are fragile beasts as they might sometimes
- * trigger some glue motherboard logic. Complete APIC bus
- * silence for 1 second, this overestimates the time the
- * boot CPU is spending to send the up to 2 STARTUP IPIs
- * by a factor of two. This should be enough.
- */
-
- /*
- * Waiting 2s total for startup (udelay is not yet working)
- */
- timeout = jiffies + 2*HZ;
- while (time_before(jiffies, timeout)) {
- /*
- * Has the boot CPU finished it's STARTUP sequence?
- */
- if (cpu_isset(cpuid, cpu_callout_map))
- break;
- cpu_relax();
- }
-
- if (!time_before(jiffies, timeout)) {
- panic("smp_callin: CPU%d started up but did not get a
callout!\n",
- cpuid);
- }
-
-#ifndef CONFIG_XEN
- /*
- * the boot CPU has finished the init stage and is spinning
- * on callin_map until we finish. We are free to set up this
- * CPU, first the APIC. (this is probably redundant on most
- * boards)
- */
-
- Dprintk("CALLIN, before setup_local_APIC().\n");
- setup_local_APIC();
-#endif
-
- /*
- * Get our bogomips.
- */
- calibrate_delay();
- Dprintk("Stack at about %p\n",&cpuid);
-
-#ifndef CONFIG_XEN
- disable_APIC_timer();
-#endif
-
- /*
- * Save our processor parameters
- */
- smp_store_cpu_info(cpuid);
-
- /*
- * Allow the master to continue.
- */
- cpu_set(cpuid, cpu_callin_map);
-}
-
-#ifdef CONFIG_XEN
-extern void local_setup_timer(unsigned int cpu);
-#endif
-
-/*
- * Setup code on secondary processor (after comming out of the trampoline)
- */
-void __cpuinit start_secondary(void)
-{
- /*
- * Dont put anything before smp_callin(), SMP
- * booting is too fragile that we want to limit the
- * things done here to the most necessary things.
- */
- cpu_init();
- smp_callin();
-
- /* otherwise gcc will move up the smp_processor_id before the cpu_init
*/
- barrier();
-
-#ifndef CONFIG_XEN
- Dprintk("cpu %d: setting up apic clock\n", smp_processor_id());
- setup_secondary_APIC_clock();
-
- Dprintk("cpu %d: enabling apic timer\n", smp_processor_id());
-
- if (nmi_watchdog == NMI_IO_APIC) {
- disable_8259A_irq(0);
- enable_NMI_through_LVT0(NULL);
- enable_8259A_irq(0);
- }
-
- enable_APIC_timer();
-#else
- local_setup_timer(smp_processor_id());
- smp_intr_init();
- local_irq_enable();
-#endif
-
- /*
- * Allow the master to continue.
- */
- cpu_set(smp_processor_id(), cpu_online_map);
- mb();
-
-#ifndef CONFIG_XEN
- /* Wait for TSC sync to not schedule things before.
- We still process interrupts, which could see an inconsistent
- time in that window unfortunately. */
- tsc_sync_wait();
-#endif
-
- cpu_idle();
-}
-
-extern volatile unsigned long init_rsp;
-extern void (*initial_code)(void);
-
-#ifndef CONFIG_XEN
-#if APIC_DEBUG
-static void inquire_remote_apic(int apicid)
-{
- unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
- char *names[] = { "ID", "VERSION", "SPIV" };
- int timeout, status;
-
- printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid);
-
- for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
- printk("... APIC #%d %s: ", apicid, names[i]);
-
- /*
- * Wait for idle.
- */
- apic_wait_icr_idle();
-
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
- apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
-
- timeout = 0;
- do {
- udelay(100);
- status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
- } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
-
- switch (status) {
- case APIC_ICR_RR_VALID:
- status = apic_read(APIC_RRR);
- printk("%08x\n", status);
- break;
- default:
- printk("failed\n");
- }
- }
-}
-#endif
-
-/*
- * Kick the secondary to wake up.
- */
-static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int
start_rip)
-{
- unsigned long send_status = 0, accept_status = 0;
- int maxlvt, timeout, num_starts, j;
-
- Dprintk("Asserting INIT.\n");
-
- /*
- * Turn INIT on target chip
- */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
- /*
- * Send IPI
- */
- apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
- | APIC_DM_INIT);
-
- Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while (send_status && (timeout++ < 1000));
-
- mdelay(10);
-
- Dprintk("Deasserting INIT.\n");
-
- /* Target chip */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
- /* Send IPI */
- apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
-
- Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while (send_status && (timeout++ < 1000));
-
- atomic_set(&init_deasserted, 1);
-
- /*
- * Should we send STARTUP IPIs ?
- *
- * Determine this based on the APIC version.
- * If we don't have an integrated APIC, don't send the STARTUP IPIs.
- */
- if (APIC_INTEGRATED(apic_version[phys_apicid]))
- num_starts = 2;
- else
- num_starts = 0;
-
- /*
- * Run STARTUP IPI loop.
- */
- Dprintk("#startup loops: %d.\n", num_starts);
-
- maxlvt = get_maxlvt();
-
- for (j = 1; j <= num_starts; j++) {
- Dprintk("Sending STARTUP #%d.\n",j);
- apic_read_around(APIC_SPIV);
- apic_write(APIC_ESR, 0);
- apic_read(APIC_ESR);
- Dprintk("After apic_write.\n");
-
- /*
- * STARTUP IPI
- */
-
- /* Target chip */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
- /* Boot on the stack */
- /* Kick the second */
- apic_write_around(APIC_ICR, APIC_DM_STARTUP
- | (start_rip >> 12));
-
- /*
- * Give the other CPU some time to accept the IPI.
- */
- udelay(300);
-
- Dprintk("Startup point 1.\n");
-
- Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while (send_status && (timeout++ < 1000));
-
- /*
- * Give the other CPU some time to accept the IPI.
- */
- udelay(200);
- /*
- * Due to the Pentium erratum 3AP.
- */
- if (maxlvt > 3) {
- apic_read_around(APIC_SPIV);
- apic_write(APIC_ESR, 0);
- }
- accept_status = (apic_read(APIC_ESR) & 0xEF);
- if (send_status || accept_status)
- break;
- }
- Dprintk("After Startup.\n");
-
- if (send_status)
- printk(KERN_ERR "APIC never delivered???\n");
- if (accept_status)
- printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status);
-
- return (send_status | accept_status);
-}
-#endif
-
-/*
- * Boot one CPU.
- */
-static int __cpuinit do_boot_cpu(int cpu, int apicid)
-{
- struct task_struct *idle;
- unsigned long boot_error;
- int timeout;
- unsigned long start_rip;
-#ifdef CONFIG_XEN
- vcpu_guest_context_t ctxt;
- extern void startup_64_smp(void);
- extern void hypervisor_callback(void);
- extern void failsafe_callback(void);
- extern void smp_trap_init(trap_info_t *);
-#endif
- /*
- * We can't use kernel_thread since we must avoid to
- * reschedule the child.
- */
- idle = fork_idle(cpu);
- if (IS_ERR(idle)) {
- printk("failed fork for CPU %d\n", cpu);
- return PTR_ERR(idle);
- }
-
- cpu_pda[cpu].pcurrent = idle;
-
-#ifndef CONFIG_XEN
- start_rip = setup_trampoline();
-#else
- start_rip = (unsigned long)startup_64_smp;
-#endif
-
- init_rsp = idle->thread.rsp;
- per_cpu(init_tss,cpu).rsp0 = init_rsp;
- initial_code = start_secondary;
- clear_ti_thread_flag(idle->thread_info, TIF_FORK);
-
- printk(KERN_INFO "Booting processor %d/%d rip %lx rsp %lx\n", cpu,
apicid,
- start_rip, init_rsp);
-
- /*
- * This grunge runs the startup process for
- * the targeted processor.
- */
-
- atomic_set(&init_deasserted, 0);
-
-#ifdef CONFIG_XEN
- cpu_gdt_descr[cpu].address = __get_free_page(GFP_KERNEL|__GFP_ZERO);
- BUG_ON(cpu_gdt_descr[0].size > PAGE_SIZE);
- cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
- memcpy((void *)cpu_gdt_descr[cpu].address,
- (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size);
-
- memset(&ctxt, 0, sizeof(ctxt));
-
- ctxt.flags = VGCF_IN_KERNEL;
- ctxt.user_regs.ds = __USER_DS;
- ctxt.user_regs.es = __USER_DS;
- ctxt.user_regs.fs = 0;
- ctxt.user_regs.gs = 0;
- ctxt.user_regs.ss = __KERNEL_DS|0x3;
- ctxt.user_regs.cs = __KERNEL_CS|0x3;
- ctxt.user_regs.rip = start_rip;
- ctxt.user_regs.rsp = idle->thread.rsp;
-#define X86_EFLAGS_IOPL_RING3 0x3000
- ctxt.user_regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_IOPL_RING3;
-
- /* FPU is set up to default initial state. */
- memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
-
- smp_trap_init(ctxt.trap_ctxt);
-
- /* No LDT. */
- ctxt.ldt_ents = 0;
-
- {
- unsigned long va;
- int f;
-
- for (va = cpu_gdt_descr[cpu].address, f = 0;
- va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size;
- va += PAGE_SIZE, f++) {
- ctxt.gdt_frames[f] = virt_to_mfn(va);
- make_page_readonly((void *)va);
- }
- ctxt.gdt_ents = GDT_ENTRIES;
- }
-
- /* Ring 1 stack is the initial stack. */
- ctxt.kernel_ss = __KERNEL_DS;
- ctxt.kernel_sp = idle->thread.rsp;
-
- /* Callback handlers. */
- ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
- ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
- ctxt.syscall_callback_eip = (unsigned long)system_call;
-
- ctxt.ctrlreg[3] = virt_to_mfn(init_level4_pgt) << PAGE_SHIFT;
-
- boot_error = HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt);
- if (boot_error)
- printk("boot error: %ld\n", boot_error);
-
- if (!boot_error) {
- HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
-
- /*
- * allow APs to start initializing.
- */
- Dprintk("Before Callout %d.\n", cpu);
- cpu_set(cpu, cpu_callout_map);
- Dprintk("After Callout %d.\n", cpu);
-
- /*
- * Wait 5s total for a response
- */
- for (timeout = 0; timeout < 50000; timeout++) {
- if (cpu_isset(cpu, cpu_callin_map))
- break; /* It has booted */
- udelay(100);
- }
-
- if (cpu_isset(cpu, cpu_callin_map)) {
- /* number CPUs logically, starting from 1 (BSP is 0) */
- Dprintk("CPU has booted.\n");
- } else {
- boot_error= 1;
- }
- }
- x86_cpu_to_apicid[cpu] = apicid;
-#else
- Dprintk("Setting warm reset code and vector.\n");
-
- CMOS_WRITE(0xa, 0xf);
- local_flush_tlb();
- Dprintk("1.\n");
- *((volatile unsigned short *) phys_to_virt(0x469)) = start_rip >> 4;
- Dprintk("2.\n");
- *((volatile unsigned short *) phys_to_virt(0x467)) = start_rip & 0xf;
- Dprintk("3.\n");
-
- /*
- * Be paranoid about clearing APIC errors.
- */
- if (APIC_INTEGRATED(apic_version[apicid])) {
- apic_read_around(APIC_SPIV);
- apic_write(APIC_ESR, 0);
- apic_read(APIC_ESR);
- }
-
- /*
- * Status is now clean
- */
- boot_error = 0;
-
- /*
- * Starting actual IPI sequence...
- */
- boot_error = wakeup_secondary_via_INIT(apicid, start_rip);
-
- if (!boot_error) {
- /*
- * allow APs to start initializing.
- */
- Dprintk("Before Callout %d.\n", cpu);
- cpu_set(cpu, cpu_callout_map);
- Dprintk("After Callout %d.\n", cpu);
-
- /*
- * Wait 5s total for a response
- */
- for (timeout = 0; timeout < 50000; timeout++) {
- if (cpu_isset(cpu, cpu_callin_map))
- break; /* It has booted */
- udelay(100);
- }
-
- if (cpu_isset(cpu, cpu_callin_map)) {
- /* number CPUs logically, starting from 1 (BSP is 0) */
- Dprintk("CPU has booted.\n");
- } else {
- boot_error = 1;
- if (*((volatile unsigned char
*)phys_to_virt(SMP_TRAMPOLINE_BASE))
- == 0xA5)
- /* trampoline started but...? */
- printk("Stuck ??\n");
- else
- /* trampoline code not run */
- printk("Not responding.\n");
-#if APIC_DEBUG
- inquire_remote_apic(apicid);
-#endif
- }
- }
-#endif
- if (boot_error) {
- cpu_clear(cpu, cpu_callout_map); /* was set here
(do_boot_cpu()) */
- clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
- cpu_clear(cpu, cpu_present_map);
- cpu_clear(cpu, cpu_possible_map);
- x86_cpu_to_apicid[cpu] = BAD_APICID;
- x86_cpu_to_log_apicid[cpu] = BAD_APICID;
- return -EIO;
- }
-
- return 0;
-}
-
-cycles_t cacheflush_time;
-unsigned long cache_decay_ticks;
-
-/*
- * Construct cpu_sibling_map[], so that we can tell the sibling CPU
- * on SMT systems efficiently.
- */
-static __cpuinit void detect_siblings(void)
-{
- int cpu;
-
- for (cpu = 0; cpu < NR_CPUS; cpu++) {
- cpus_clear(cpu_sibling_map[cpu]);
- cpus_clear(cpu_core_map[cpu]);
- }
-
- for_each_online_cpu (cpu) {
- struct cpuinfo_x86 *c = cpu_data + cpu;
- int siblings = 0;
- int i;
- if (smp_num_siblings > 1) {
- for_each_online_cpu (i) {
- if (cpu_core_id[cpu] == cpu_core_id[i]) {
- siblings++;
- cpu_set(i, cpu_sibling_map[cpu]);
- }
- }
- } else {
- siblings++;
- cpu_set(cpu, cpu_sibling_map[cpu]);
- }
-
- if (siblings != smp_num_siblings) {
- printk(KERN_WARNING
- "WARNING: %d siblings found for CPU%d, should be %d\n",
- siblings, cpu, smp_num_siblings);
- smp_num_siblings = siblings;
- }
- if (c->x86_num_cores > 1) {
- for_each_online_cpu(i) {
- if (phys_proc_id[cpu] == phys_proc_id[i])
- cpu_set(i, cpu_core_map[cpu]);
- }
- } else
- cpu_core_map[cpu] = cpu_sibling_map[cpu];
- }
-}
-
-#ifndef CONFIG_XEN
-/*
- * Cleanup possible dangling ends...
- */
-static __cpuinit void smp_cleanup_boot(void)
-{
- /*
- * Paranoid: Set warm reset code and vector here back
- * to default values.
- */
- CMOS_WRITE(0, 0xf);
-
- /*
- * Reset trampoline flag
- */
- *((volatile int *) phys_to_virt(0x467)) = 0;
-
-#ifndef CONFIG_HOTPLUG_CPU
- /*
- * Free pages reserved for SMP bootup.
- * When you add hotplug CPU support later remove this
- * Note there is more work to be done for later CPU bootup.
- */
-
- free_page((unsigned long) __va(PAGE_SIZE));
- free_page((unsigned long) __va(SMP_TRAMPOLINE_BASE));
-#endif
-}
-#endif
-
-/*
- * Fall back to non SMP mode after errors.
- *
- * RED-PEN audit/test this more. I bet there is more state messed up here.
- */
-static __cpuinit void disable_smp(void)
-{
- cpu_present_map = cpumask_of_cpu(0);
- cpu_possible_map = cpumask_of_cpu(0);
-#ifndef CONFIG_XEN
- if (smp_found_config)
- phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
- else
- phys_cpu_present_map = physid_mask_of_physid(0);
-#endif
- cpu_set(0, cpu_sibling_map[0]);
- cpu_set(0, cpu_core_map[0]);
-}
-
-/*
- * Handle user cpus=... parameter.
- */
-static __cpuinit void enforce_max_cpus(unsigned max_cpus)
-{
- int i, k;
- k = 0;
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_possible(i))
- continue;
- if (++k > max_cpus) {
- cpu_clear(i, cpu_possible_map);
- cpu_clear(i, cpu_present_map);
- }
- }
-}
-
-/*
- * Various sanity checks.
- */
-static int __cpuinit smp_sanity_check(unsigned max_cpus)
-{
-#ifndef CONFIG_XEN
- if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
- printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
- hard_smp_processor_id());
- physid_set(hard_smp_processor_id(), phys_cpu_present_map);
- }
-
- /*
- * If we couldn't find an SMP configuration at boot time,
- * get out of here now!
- */
- if (!smp_found_config) {
- printk(KERN_NOTICE "SMP motherboard not detected.\n");
- disable_smp();
- if (APIC_init_uniprocessor())
- printk(KERN_NOTICE "Local APIC not detected."
- " Using dummy APIC emulation.\n");
- return -1;
- }
-
- /*
- * Should not be necessary because the MP table should list the boot
- * CPU too, but we do it for the sake of robustness anyway.
- */
- if (!physid_isset(boot_cpu_id, phys_cpu_present_map)) {
- printk(KERN_NOTICE "weird, boot CPU (#%d) not listed by the
BIOS.\n",
- boot_cpu_id);
- physid_set(hard_smp_processor_id(), phys_cpu_present_map);
- }
-
- /*
- * If we couldn't find a local APIC, then get out of here now!
- */
- if (APIC_INTEGRATED(apic_version[boot_cpu_id]) && !cpu_has_apic) {
- printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
- boot_cpu_id);
- printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell
your hw vendor)\n");
- nr_ioapics = 0;
- return -1;
- }
-#endif
-
- /*
- * If SMP should be disabled, then really disable it!
- */
- if (!max_cpus) {
-#ifdef CONFIG_XEN
- xen_start_info->n_vcpu = 1;
-#endif
- printk(KERN_INFO "SMP mode deactivated, forcing use of dummy
APIC emulation.\n");
-#ifndef CONFIG_XEN
- nr_ioapics = 0;
-#endif
- return -1;
- }
-
- return 0;
-}
-
-/*
- * Prepare for SMP bootup. The MP table or ACPI has been read
- * earlier. Just do some sanity checking here and enable APIC mode.
- */
-void __cpuinit smp_prepare_cpus(unsigned int max_cpus)
-{
- int i;
-
-#if defined(CONFIG_XEN) && !defined(CONFIG_XEN_PRIVILEGED_GUEST)
-#else
- nmi_watchdog_default();
-#endif
- current_cpu_data = boot_cpu_data;
- current_thread_info()->cpu = 0; /* needed? */
-
- enforce_max_cpus(max_cpus);
-
- /*
- * Fill in cpu_present_mask
- */
- for (i = 0; i < NR_CPUS; i++) {
-#ifndef CONFIG_XEN
- int apicid = cpu_present_to_apicid(i);
- if (physid_isset(apicid, phys_cpu_present_map)) {
-#else
- if (i < xen_start_info->n_vcpu) {
-#endif
- cpu_set(i, cpu_present_map);
- /* possible map would be different if we supported real
- CPU hotplug. */
- cpu_set(i, cpu_possible_map);
- }
- }
-
- if (smp_sanity_check(max_cpus) < 0) {
- printk(KERN_INFO "SMP disabled\n");
- disable_smp();
- return;
- }
-
-#ifdef CONFIG_XEN
- smp_intr_init();
-#else
-
- /*
- * Switch from PIC to APIC mode.
- */
- connect_bsp_APIC();
- setup_local_APIC();
-
- if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) {
- panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
- GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id);
- /* Or can we switch back to PIC here? */
- }
-#endif
-
- /*
- * Now start the IO-APICs
- */
-#if defined(CONFIG_XEN) && !defined(CONFIG_XEN_PRIVILEGED_GUEST)
-#else
- if (!skip_ioapic_setup && nr_ioapics)
- setup_IO_APIC();
- else
- nr_ioapics = 0;
-#endif
-
- /*
- * Set up local APIC timer on boot CPU.
- */
-
-#ifndef CONFIG_XEN
- setup_boot_APIC_clock();
-#endif
-}
-
-/*
- * Early setup to make printk work.
- */
-void __init smp_prepare_boot_cpu(void)
-{
- int me = smp_processor_id();
- cpu_set(me, cpu_online_map);
- cpu_set(me, cpu_callout_map);
-}
-
-/*
- * Entry point to boot a CPU.
- *
- * This is all __cpuinit, not __devinit for now because we don't support
- * CPU hotplug (yet).
- */
-int __cpuinit __cpu_up(unsigned int cpu)
-{
- int err;
-#ifndef CONFIG_XEN
- int apicid = cpu_present_to_apicid(cpu);
-#else
- int apicid = cpu;
-#endif
-
- WARN_ON(irqs_disabled());
-
- Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu);
-
-#ifndef CONFIG_XEN
- if (apicid == BAD_APICID || apicid == boot_cpu_id ||
- !physid_isset(apicid, phys_cpu_present_map)) {
- printk("__cpu_up: bad cpu %d\n", cpu);
- return -EINVAL;
- }
-#endif
-
- /* Boot it! */
- err = do_boot_cpu(cpu, apicid);
- if (err < 0) {
- Dprintk("do_boot_cpu failed %d\n", err);
- return err;
- }
-
- /* Unleash the CPU! */
- Dprintk("waiting for cpu %d\n", cpu);
-
- while (!cpu_isset(cpu, cpu_online_map))
- cpu_relax();
- return 0;
-}
-
-/*
- * Finish the SMP boot.
- */
-void __cpuinit smp_cpus_done(unsigned int max_cpus)
-{
-#ifndef CONFIG_XEN
- zap_low_mappings();
- smp_cleanup_boot();
-
-#ifdef CONFIG_X86_IO_APIC
- setup_ioapic_dest();
-#endif
-#endif
-
- detect_siblings();
-#ifndef CONFIG_XEN
- time_init_gtod();
-
- check_nmi_watchdog();
-#endif
-}
-
-#ifdef CONFIG_XEN
-extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
-extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
-
-static DEFINE_PER_CPU(int, resched_irq);
-static DEFINE_PER_CPU(int, callfunc_irq);
-static char resched_name[NR_CPUS][15];
-static char callfunc_name[NR_CPUS][15];
-
-void smp_intr_init(void)
-{
- int cpu = smp_processor_id();
-
- per_cpu(resched_irq, cpu) =
- bind_ipi_to_irq(RESCHEDULE_VECTOR, cpu);
- sprintf(resched_name[cpu], "resched%d", cpu);
- BUG_ON(request_irq(per_cpu(resched_irq, cpu), smp_reschedule_interrupt,
- SA_INTERRUPT, resched_name[cpu], NULL));
-
- per_cpu(callfunc_irq, cpu) =
- bind_ipi_to_irq(CALL_FUNCTION_VECTOR, cpu);
- sprintf(callfunc_name[cpu], "callfunc%d", cpu);
- BUG_ON(request_irq(per_cpu(callfunc_irq, cpu),
- smp_call_function_interrupt,
- SA_INTERRUPT, callfunc_name[cpu], NULL));
-}
-
-static void smp_intr_exit(void)
-{
- int cpu = smp_processor_id();
-
- free_irq(per_cpu(resched_irq, cpu), NULL);
- unbind_ipi_from_irq(RESCHEDULE_VECTOR, cpu);
-
- free_irq(per_cpu(callfunc_irq, cpu), NULL);
- unbind_ipi_from_irq(CALL_FUNCTION_VECTOR, cpu);
-}
-
-void vcpu_prepare(int vcpu)
-{
-}
-
-#endif
diff -r 7169e31606bd -r 3d27ee7da0c1
linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h
Tue Oct 18 14:40:29 2005
+++ /dev/null Tue Oct 18 16:40:31 2005
@@ -1,55 +0,0 @@
-/* two abstractions specific to kernel/smpboot.c, mainly to cater to visws
- * which needs to alter them. */
-
-static inline void smpboot_clear_io_apic_irqs(void)
-{
-#ifdef CONFIG_X86_IO_APIC
- io_apic_irqs = 0;
-#endif
-}
-
-static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
-{
-#if 1
- printk("smpboot_setup_warm_reset_vector\n");
-#else
- CMOS_WRITE(0xa, 0xf);
- local_flush_tlb();
- Dprintk("1.\n");
- *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
- Dprintk("2.\n");
- *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
- Dprintk("3.\n");
-#endif
-}
-
-static inline void smpboot_restore_warm_reset_vector(void)
-{
- /*
- * Install writable page 0 entry to set BIOS data area.
- */
- local_flush_tlb();
-
- /*
- * Paranoid: Set warm reset code and vector here back
- * to default values.
- */
- CMOS_WRITE(0, 0xf);
-
- *((volatile long *) phys_to_virt(0x467)) = 0;
-}
-
-static inline void smpboot_setup_io_apic(void)
-{
-#ifdef CONFIG_X86_IO_APIC
- /*
- * Here we can be sure that there is an IO-APIC in the system. Let's
- * go and set it up:
- */
- if (!skip_ioapic_setup && nr_ioapics)
- setup_IO_APIC();
-#endif
-}
-
-
-#define smp_found_config (xen_start_info->n_vcpu > 1)
diff -r 7169e31606bd -r 3d27ee7da0c1
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/smpboot_hooks.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/smpboot_hooks.h
Tue Oct 18 14:40:29 2005
+++ /dev/null Tue Oct 18 16:40:31 2005
@@ -1,55 +0,0 @@
-/* two abstractions specific to kernel/smpboot.c, mainly to cater to visws
- * which needs to alter them. */
-
-static inline void smpboot_clear_io_apic_irqs(void)
-{
-#ifdef CONFIG_X86_IO_APIC
- io_apic_irqs = 0;
-#endif
-}
-
-static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
-{
-#if 1
- printk("smpboot_setup_warm_reset_vector\n");
-#else
- CMOS_WRITE(0xa, 0xf);
- local_flush_tlb();
- Dprintk("1.\n");
- *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
- Dprintk("2.\n");
- *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
- Dprintk("3.\n");
-#endif
-}
-
-static inline void smpboot_restore_warm_reset_vector(void)
-{
- /*
- * Install writable page 0 entry to set BIOS data area.
- */
- local_flush_tlb();
-
- /*
- * Paranoid: Set warm reset code and vector here back
- * to default values.
- */
- CMOS_WRITE(0, 0xf);
-
- *((volatile long *) phys_to_virt(0x467)) = 0;
-}
-
-static inline void smpboot_setup_io_apic(void)
-{
-#ifdef CONFIG_X86_IO_APIC
- /*
- * Here we can be sure that there is an IO-APIC in the system. Let's
- * go and set it up:
- */
- if (!skip_ioapic_setup && nr_ioapics)
- setup_IO_APIC();
-#endif
-}
-
-
-#define smp_found_config (xen_start_info->n_vcpu > 1)
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|