# HG changeset patch
# User Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
# Node ID a384dbf50d5934ba93eea17eccb7e43cf408dd87
# Parent bbf2db4ddf5400e908ee6bf92ac798e5cfed82a0
[XEN][POWERPC] Turn on SMP.. Finally.
The following patch uses Xen specific methods to spin up secondary
processors and add them to the Linux devtree (not the flat-devtree).
Specifically:
- Adds HYPERVISOR_vcpu_op() for probing and spinning.
- "Hot-Plug" new CPU entries into the devtree
- Start CPUs int he same place tha OF/prom_init.c would have
- Wire up SMP IPI to Xen event channels
- 6 line common code change in LinuxPPC to set the # possible CPUs correctly
Tested on JS21 (4-way) and Maple(2-way) creating 1-1 Dom0 and several
VIO/DomUs up to 32-way.
NOTE: we cannot yet:
- _add_ a CPU after the normal boot spinup process
- remove a CPU
Signed-off-by: Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
---
arch/powerpc/kernel/setup-common.c | 6
arch/powerpc/platforms/xen/Makefile | 1
arch/powerpc/platforms/xen/hcall.c | 30 ++
arch/powerpc/platforms/xen/setup.c | 36 --
arch/powerpc/platforms/xen/setup.h | 1
arch/powerpc/platforms/xen/smp.c | 424 +++++++++++++++++++++++++++++++
include/asm-powerpc/xen/asm/hypercall.h | 1
include/asm-powerpc/xen/asm/hypervisor.h | 2
8 files changed, 468 insertions(+), 33 deletions(-)
diff -r bbf2db4ddf54 -r a384dbf50d59 arch/powerpc/kernel/setup-common.c
--- a/arch/powerpc/kernel/setup-common.c Tue Dec 19 09:22:37 2006 -0500
+++ b/arch/powerpc/kernel/setup-common.c Sun Jan 21 08:34:45 2007 -0500
@@ -388,6 +388,12 @@ void __init smp_setup_cpu_maps(void)
}
}
+ if (machine_is(xen)) {
+ /* something more inteligent perhaps? */
+ for (cpu = 0; cpu < NR_CPUS; cpu++)
+ cpu_set(cpu, cpu_possible_map);
+ }
+
#ifdef CONFIG_PPC64
/*
* On pSeries LPAR, we need to know how many cpus
diff -r bbf2db4ddf54 -r a384dbf50d59 arch/powerpc/platforms/xen/Makefile
--- a/arch/powerpc/platforms/xen/Makefile Tue Dec 19 09:22:37 2006 -0500
+++ b/arch/powerpc/platforms/xen/Makefile Sun Jan 21 08:34:45 2007 -0500
@@ -3,6 +3,7 @@ obj-y += hcall.o
obj-y += hcall.o
obj-y += reboot.o
obj-y += setup.o
+obj-y += smp.o
obj-y += time.o
obj-y += udbg_xen.o
obj-y += xen_guest.o
diff -r bbf2db4ddf54 -r a384dbf50d59 arch/powerpc/platforms/xen/hcall.c
--- a/arch/powerpc/platforms/xen/hcall.c Tue Dec 19 09:22:37 2006 -0500
+++ b/arch/powerpc/platforms/xen/hcall.c Sun Jan 21 08:34:45 2007 -0500
@@ -33,7 +33,7 @@
#include <xen/interface/sched.h>
#include <xen/interface/event_channel.h>
#include <xen/interface/physdev.h>
-#include <xen/interface/grant_table.h>
+#include <xen/interface/vcpu.h>
#include <xen/public/privcmd.h>
#include <asm/hypercall.h>
#include <asm/page.h>
@@ -599,3 +599,31 @@ int arch_privcmd_hypercall(privcmd_hyper
}
}
+int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args)
+{
+ int argsize;
+ const unsigned long hcall = __HYPERVISOR_vcpu_op;
+ void *desc;
+
+ switch (cmd) {
+ case VCPUOP_initialise:
+ argsize = sizeof(vcpu_guest_context_t);
+ break;
+ case VCPUOP_up:
+ case VCPUOP_down:
+ case VCPUOP_is_up:
+ return plpar_hcall_norets(XEN_MARK(hcall), cmd, vcpuid, 0);
+
+ case VCPUOP_get_runstate_info:
+ argsize = sizeof (vcpu_runstate_info_t);
+ break;
+ default:
+ printk(KERN_ERR "%s: unknown version cmd %d\n", __func__, cmd);
+ return -ENOSYS;
+ }
+
+ desc = xencomm_create_inline(extra_args);
+ (void)argsize;
+ return plpar_hcall_norets(XEN_MARK(hcall), cmd, vcpuid, desc);
+}
+
diff -r bbf2db4ddf54 -r a384dbf50d59 arch/powerpc/platforms/xen/setup.c
--- a/arch/powerpc/platforms/xen/setup.c Tue Dec 19 09:22:37 2006 -0500
+++ b/arch/powerpc/platforms/xen/setup.c Sun Jan 21 08:34:45 2007 -0500
@@ -168,42 +168,10 @@ static void xen_power_save(void)
HYPERVISOR_sched_op(SCHEDOP_block, NULL);
}
-#ifdef CONFIG_SMP
-
-int __init smp_xen_probe(void)
-{
- return 1;
-}
-
-void smp_xen_message_pass(int target, int msg)
-{
- printk("%s(%d, %d)\n", __func__, target, msg);
-}
-
-void __devinit smp_xen_setup_cpu(int cpu)
-{
- printk("%s(%d)\n", __func__, cpu);
-}
-
-struct smp_ops_t xen_smp_ops = {
- .probe = smp_xen_probe,
- .message_pass = smp_xen_message_pass,
- .kick_cpu = smp_generic_kick_cpu,
- .setup_cpu = smp_xen_setup_cpu,
- .give_timebase = smp_generic_give_timebase,
- .take_timebase = smp_generic_take_timebase,
-};
-#endif /* CONFIG_SMP */
-
void __init xen_setup_arch(void)
{
/* init to some ~sane value until calibrate_delay() runs */
loops_per_jiffy = 50000000;
-
- /* Setup SMP callback */
-#ifdef CONFIG_SMP
- smp_ops = &xen_smp_ops;
-#endif
/* Lookup PCI hosts */
if (is_initial_xendomain())
@@ -211,6 +179,10 @@ void __init xen_setup_arch(void)
#ifdef CONFIG_DUMMY_CONSOLE
conswitchp = &dummy_con;
+#endif
+#ifdef CONFIG_SMP
+ /* let them fly */
+ xen_setup_smp();
#endif
printk(KERN_INFO "Using Xen idle loop\n");
diff -r bbf2db4ddf54 -r a384dbf50d59 arch/powerpc/platforms/xen/setup.h
--- a/arch/powerpc/platforms/xen/setup.h Tue Dec 19 09:22:37 2006 -0500
+++ b/arch/powerpc/platforms/xen/setup.h Sun Jan 21 08:34:45 2007 -0500
@@ -27,3 +27,4 @@ extern void free_foreign_page(struct pag
extern void free_foreign_page(struct page *page);
extern void __init xen_setup_time(struct machdep_calls *host_md);
+extern void xen_setup_smp(void);
diff -r bbf2db4ddf54 -r a384dbf50d59 include/asm-powerpc/xen/asm/hypercall.h
--- a/include/asm-powerpc/xen/asm/hypercall.h Tue Dec 19 09:22:37 2006 -0500
+++ b/include/asm-powerpc/xen/asm/hypercall.h Sun Jan 21 08:34:45 2007 -0500
@@ -44,6 +44,7 @@ extern int HYPERVISOR_physdev_op(int cmd
extern int HYPERVISOR_physdev_op(int cmd, void *op);
extern int HYPERVISOR_grant_table_op(unsigned int cmd, void *uop,
unsigned int count);
+extern int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args);
extern int HYPERVISOR_memory_op(unsigned int cmd, void *arg);
extern int HYPERVISOR_multicall(void *call_list, int nr_calls);
diff -r bbf2db4ddf54 -r a384dbf50d59 include/asm-powerpc/xen/asm/hypervisor.h
--- a/include/asm-powerpc/xen/asm/hypervisor.h Tue Dec 19 09:22:37 2006 -0500
+++ b/include/asm-powerpc/xen/asm/hypervisor.h Sun Jan 21 08:34:45 2007 -0500
@@ -146,6 +146,8 @@ int direct_remap_pfn_range(struct vm_are
#define DYNIRQ_BASE (PIRQ_BASE + NR_PIRQS)
#define NR_DYNIRQS 256
+#define NR_IPIS 4 /* PPC_MSG_DEBUGGER_BREAK + 1 */
+
#if NR_IRQS < (NR_PIRQS + NR_DYNIRQS)
#error to many Xen IRQs
#endif
diff -r bbf2db4ddf54 -r a384dbf50d59 arch/powerpc/platforms/xen/smp.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/powerpc/platforms/xen/smp.c Sun Jan 21 08:34:45 2007 -0500
@@ -0,0 +1,424 @@
+#include <linux/kernel.h>
+#include <linux/config.h>
+#include <linux/bootmem.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/vcpu.h>
+#include <xen/evtchn.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/hypervisor.h>
+#include "setup.h"
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(fmt...) printk(KERN_EMERG fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+static inline void *xen_of_alloc(ulong size)
+{
+ if (mem_init_done)
+ return kmalloc(size, GFP_KERNEL);
+ return alloc_bootmem(size);
+}
+static inline void xen_of_free(void *ptr)
+{
+ /* if this happens with the boot allocator then we are screwed */
+ BUG_ON(!mem_init_done);
+ kfree(ptr);
+}
+
+static struct property *dup_prop(struct property *op)
+{
+ struct property *np;
+ void *p;
+ ulong sz;
+
+
+ /* allocate everything in one go in case it fails */
+ sz = sizeof (*np); /* prop node */
+ sz += strlen(op->name) + 1; /* prop name */
+ sz += op->length; /* prop value */
+
+ p = xen_of_alloc(sz);
+ if (!p)
+ return NULL;
+ memset(p, 0, sz);
+
+ /* prop node first */
+ np = p;
+ p += sizeof (*np);
+
+ /* value next becuase we want it aligned */
+ np->value = p;
+ p += op->length;
+
+ /* name */
+ np->name = p;
+
+ /* copy it all */
+ strcpy(np->name, op->name);
+ np->length = op->length;
+ memcpy(np->value, op->value, np->length);
+
+ return np;
+}
+
+static int dup_properties(struct device_node *dst, struct device_node *src)
+{
+ struct property *op;
+ struct property *np;
+ struct property *lp;
+ int rc = 0;
+
+ DBG("%s: duping to new cpu node: %s\n", __func__, dst->full_name);
+
+ np = lp = NULL;
+ for (op = src->properties; op != 0; op = op->next) {
+ lp = np;
+ np = dup_prop(op);
+ if (!np)
+ break;
+
+ prom_add_property(dst, np);
+ }
+
+ if (!np) {
+ DBG("%s: FAILED duping: %s\n", __func__, dst->full_name);
+ /* we could not allocate enuff so free what we have
+ * allocated */
+ rc = -ENOMEM;
+ for (op = dst->properties; lp && op != lp; op = op->next)
+ xen_of_free(op);
+ }
+
+ return rc;
+}
+
+/* returns added device node so it can be added to procfs in the case
+ * of hotpluging */
+static struct device_node *xen_add_vcpu_node(struct device_node *boot_cpu,
+ uint cpu)
+{
+ struct device_node *new_cpu;
+ struct property *pp;
+ void *p;
+ int sz;
+ int type_sz;
+ int name_sz;
+
+ DBG("%s: boot cpu: %s\n", __func__, boot_cpu->full_name);
+
+ /* allocate in one shot in case we fail */
+ name_sz = strlen(boot_cpu->name) + 1;
+ type_sz = strlen(boot_cpu->type) + 1;
+
+ sz = sizeof (*new_cpu); /* the node */
+ sz += strlen(boot_cpu->full_name) + 3; /* full_name */
+ sz += name_sz; /* name */
+ sz += type_sz; /* type */
+
+ p = xen_of_alloc(sz);
+ if (!p)
+ return NULL;
+ memset(p, 0, sz);
+
+ /* the node */
+ new_cpu = p;
+ p += sizeof (*new_cpu);
+
+ /* name */
+ new_cpu->name = p;
+ strcpy(new_cpu->name, boot_cpu->name);
+ p += name_sz;
+
+ /* type */
+ new_cpu->type = p;
+ strcpy(new_cpu->type, boot_cpu->type);
+ p += type_sz;
+
+ /* full_name */
+ new_cpu->full_name = p;
+
+ /* assemble new full_name */
+ pp = of_find_property(boot_cpu, "name", NULL);
+ if (!pp)
+ panic("%s: no name prop\n", __func__);
+
+ DBG("%s: name is: %s = %s\n", __func__, pp->name, pp->value);
+ sprintf(new_cpu->full_name, "/cpus/%s@%u", pp->value, cpu);
+
+ if (dup_properties(new_cpu, boot_cpu)) {
+ xen_of_free(new_cpu);
+ return NULL;
+ }
+
+ /* fixup reg property */
+ DBG("%s: updating reg: %d\n", __func__, cpu);
+ pp = of_find_property(new_cpu, "reg", NULL);
+ if (!pp)
+ panic("%s: no reg prop\n", __func__);
+ *(int *)pp->value = cpu;
+
+ if (mem_init_done)
+ OF_MARK_DYNAMIC(new_cpu);
+
+ kref_init(&new_cpu->kref);
+
+ /* insert the node */
+ new_cpu->parent = of_get_parent(boot_cpu);
+ of_attach_node(new_cpu);
+ of_node_put(new_cpu->parent);
+
+ return new_cpu;
+}
+
+static void cpu_initialize_context(unsigned int vcpu, ulong entry)
+{
+ vcpu_guest_context_t ctxt;
+
+ memset(&ctxt.user_regs, 0x55, sizeof(ctxt.user_regs));
+
+ ctxt.user_regs.pc = entry;
+ ctxt.user_regs.msr = 0;
+ ctxt.user_regs.gprs[1] = 0; /* Linux uses its own stack */
+ ctxt.user_regs.gprs[3] = vcpu;
+
+ /* XXX verify this *** */
+ /* There is a buggy kernel that does not zero the "local_paca", so
+ * we must make sure this register is 0 */
+ ctxt.user_regs.gprs[13] = 0;
+
+ DBG("%s: initializing vcpu: %d\n", __func__, vcpu);
+
+ if (HYPERVISOR_vcpu_op(VCPUOP_initialise, vcpu, &ctxt))
+ panic("%s: VCPUOP_initialise failed, vcpu: %d\n",
+ __func__, vcpu);
+
+}
+
+static int xen_start_vcpu(uint vcpu, ulong entry)
+{
+ DBG("%s: starting vcpu: %d\n", __func__, vcpu);
+
+ cpu_initialize_context(vcpu, entry);
+
+ DBG("%s: Spinning up vcpu: %d\n", __func__, vcpu);
+ return HYPERVISOR_vcpu_op(VCPUOP_up, vcpu, NULL);
+}
+
+extern void __secondary_hold(void);
+extern unsigned long __secondary_hold_spinloop;
+extern unsigned long __secondary_hold_acknowledge;
+
+static void xen_boot_secondary_vcpus(void)
+{
+ int vcpu;
+ int rc;
+ const unsigned long mark = (unsigned long)-1;
+ unsigned long *spinloop = &__secondary_hold_spinloop;
+ unsigned long *acknowledge = &__secondary_hold_acknowledge;
+#ifdef CONFIG_PPC64
+ /* __secondary_hold is actually a descriptor, not the text address */
+ unsigned long secondary_hold = __pa(*(unsigned long *)__secondary_hold);
+#else
+ unsigned long secondary_hold = __pa(__secondary_hold);
+#endif
+ struct device_node *boot_cpu;
+
+ DBG("%s: finding CPU node\n", __func__);
+ boot_cpu = of_find_node_by_type(NULL, "cpu");
+ if (!boot_cpu)
+ panic("%s: Cannot find Booting CPU node\n", __func__);
+
+ /* Set the common spinloop variable, so all of the secondary cpus
+ * will block when they are awakened from their OF spinloop.
+ * This must occur for both SMP and non SMP kernels, since OF will
+ * be trashed when we move the kernel.
+ */
+ *spinloop = 0;
+
+ DBG("%s: Searching for all vcpu numbers > 0\n", __func__);
+ /* try and start as many as we can */
+ for (vcpu = 1; vcpu < NR_CPUS; vcpu++) {
+ int i;
+
+ rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, vcpu, NULL);
+ if (rc < 0)
+ continue;
+
+ DBG("%s: Found vcpu: %d\n", __func__, vcpu);
+ /* Init the acknowledge var which will be reset by
+ * the secondary cpu when it awakens from its OF
+ * spinloop.
+ */
+ *acknowledge = mark;
+
+ DBG("%s: Starting vcpu: %d at pc: 0x%lx\n", __func__,
+ vcpu, secondary_hold);
+ rc = xen_start_vcpu(vcpu, secondary_hold);
+ if (rc)
+ panic("%s: xen_start_vpcu() failed\n", __func__);
+
+
+ DBG("%s: Waiting for ACK on vcpu: %d\n", __func__, vcpu);
+ for (i = 0; (i < 100000000) && (*acknowledge == mark); i++)
+ mb();
+
+ if (*acknowledge == vcpu)
+ DBG("%s: Recieved for ACK on vcpu: %d\n",
+ __func__, vcpu);
+
+ xen_add_vcpu_node(boot_cpu, vcpu);
+
+ cpu_set(vcpu, cpu_present_map);
+ set_hard_smp_processor_id(vcpu, vcpu);
+ }
+ of_node_put(boot_cpu);
+ DBG("%s: end...\n", __func__);
+}
+
+static int __init smp_xen_probe(void)
+{
+ return cpus_weight(cpu_present_map);
+}
+
+static irqreturn_t xen_ppc_msg_reschedule(int irq, void *dev_id,
+ struct pt_regs *regs)
+{
+ smp_message_recv(PPC_MSG_RESCHEDULE, regs);
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t xen_ppc_msg_call_function(int irq, void *dev_id,
+ struct pt_regs *regs)
+{
+ smp_message_recv(PPC_MSG_CALL_FUNCTION, regs);
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t xen_ppc_msg_debugger_break(int irq, void *dev_id,
+ struct pt_regs *regs)
+{
+ smp_message_recv(PPC_MSG_DEBUGGER_BREAK, regs);
+ return IRQ_HANDLED;
+}
+
+struct message {
+ irqreturn_t (*f)(int, void *, struct pt_regs *);
+ int num;
+ char *name;
+};
+static struct message ipi_msgs[] = {
+ {
+ .num = PPC_MSG_RESCHEDULE,
+ .f = xen_ppc_msg_reschedule,
+ .name = "IPI-resched"
+ },
+ {
+ .num = PPC_MSG_CALL_FUNCTION,
+ .f = xen_ppc_msg_call_function,
+ .name = "IPI-function"
+ },
+ {
+ .num = PPC_MSG_DEBUGGER_BREAK,
+ .f = xen_ppc_msg_debugger_break,
+ .name = "IPI-debug"
+ }
+};
+
+DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]);
+
+static void __devinit smp_xen_setup_cpu(int cpu)
+{
+ int irq;
+ int i;
+ const int nr_ipis = ARRAY_SIZE(__get_cpu_var(ipi_to_irq));
+
+ /* big scary include web could mess with our values, so we
+ * make sure they are sane */
+ BUG_ON(ARRAY_SIZE(ipi_msgs) > nr_ipis);
+
+ for (i = 0; i < ARRAY_SIZE(ipi_msgs); i++) {
+ BUG_ON(ipi_msgs[i].num >= nr_ipis);
+
+ irq = bind_ipi_to_irqhandler(ipi_msgs[i].num,
+ cpu,
+ ipi_msgs[i].f,
+ SA_INTERRUPT,
+ ipi_msgs[i].name,
+ NULL);
+ BUG_ON(irq < 0);
+ per_cpu(ipi_to_irq, cpu)[ipi_msgs[i].num] = irq;
+ DBG("%s: cpu: %d vector :%d irq: %d\n",
+ __func__, cpu, ipi_msgs[i].num, irq);
+ }
+}
+
+static inline void send_IPI_one(unsigned int cpu, int vector)
+{
+ int irq;
+
+ irq = per_cpu(ipi_to_irq, cpu)[vector];
+ BUG_ON(irq < 0);
+
+ DBG("%s: cpu: %d vector :%d irq: %d!\n",
+ __func__, cpu, vector, irq);
+ DBG("%s: per_cpu[%p]: %d %d %d %d\n",
+ __func__, per_cpu(ipi_to_irq, cpu),
+ per_cpu(ipi_to_irq, cpu)[0],
+ per_cpu(ipi_to_irq, cpu)[1],
+ per_cpu(ipi_to_irq, cpu)[2],
+ per_cpu(ipi_to_irq, cpu)[3]);
+
+ notify_remote_via_irq(irq);
+}
+
+static void smp_xen_message_pass(int target, int msg)
+{
+ int cpu;
+
+ switch (msg) {
+ case PPC_MSG_RESCHEDULE:
+ case PPC_MSG_CALL_FUNCTION:
+ case PPC_MSG_DEBUGGER_BREAK:
+ break;
+ default:
+ panic("SMP %d: smp_message_pass: unknown msg %d\n",
+ smp_processor_id(), msg);
+ return;
+ }
+ switch (target) {
+ case MSG_ALL:
+ case MSG_ALL_BUT_SELF:
+ for_each_online_cpu(cpu) {
+ if (target == MSG_ALL_BUT_SELF &&
+ cpu == smp_processor_id())
+ continue;
+ send_IPI_one(cpu, msg);
+ }
+ break;
+ default:
+ send_IPI_one(target, msg);
+ break;
+ }
+}
+
+static struct smp_ops_t xen_smp_ops = {
+ .probe = smp_xen_probe,
+ .message_pass = smp_xen_message_pass,
+ .kick_cpu = smp_generic_kick_cpu,
+ .setup_cpu = smp_xen_setup_cpu,
+};
+
+void xen_setup_smp(void)
+{
+ smp_ops = &xen_smp_ops;
+
+ xen_boot_secondary_vcpus();
+ smp_release_cpus();
+}
_______________________________________________
Xen-ppc-devel mailing list
Xen-ppc-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ppc-devel
|