This patch rolls up and rebases the following patches for submission
against current tip of tree:
* Memory barrier after sp store
* Flush the ERAT early for secondary CPUs
* SMP and IPI support
Signed-off-by: Amos Waterland <apw@xxxxxxxxxx>
---
changeset : b30cb72ed5e2+31ae0
machines : kpblade1 cso103 cso102 kpblade7 cso98 cso99
fail : 0
transient : 1
pass : 224
total : 225
reliability : 100%
---
arch/powerpc/external.c | 32 +++++
arch/powerpc/mpic.c | 9 -
arch/powerpc/mpic_init.c | 48 ++++++++
arch/powerpc/powerpc64/exceptions.S | 4
arch/powerpc/setup.c | 50 +++++----
arch/powerpc/smp.c | 135 +++++++++++++++++++++++--
include/asm-powerpc/mach-default/irq_vectors.h | 22 ----
include/asm-powerpc/smp.h | 17 +++
8 files changed, 261 insertions(+), 56 deletions(-)
diff -r b30cb72ed5e2 xen/arch/powerpc/external.c
--- a/xen/arch/powerpc/external.c Fri Nov 03 16:53:17 2006 -0500
+++ b/xen/arch/powerpc/external.c Thu Nov 09 18:02:59 2006 -0500
@@ -82,7 +82,16 @@ void do_external(struct cpu_user_regs *r
vec = xen_mpic_get_irq(regs);
- if (vec != -1) {
+ if (vector_is_ipi(vec)) {
+ /* do_IRQ is fundamentally broken for reliable IPI delivery. */
+ irq_desc_t *desc = &irq_desc[vec];
+ regs->entry_vector = vec;
+ spin_lock(&desc->lock);
+ desc->handler->ack(vec);
+ desc->action->handler(vector_to_irq(vec), desc->action->dev_id, regs);
+ desc->handler->end(vec);
+ spin_unlock(&desc->lock);
+ } else if (vec != -1) {
DBG("EE:0x%lx isrc: %d\n", regs->msr, vec);
regs->entry_vector = vec;
do_IRQ(regs);
@@ -253,3 +262,24 @@ int ioapic_guest_write(unsigned long phy
BUG_ON(val != val);
return 0;
}
+
+void send_IPI_mask(cpumask_t mask, int vector)
+{
+ unsigned int cpus;
+ int const bits = 8 * sizeof(cpus);
+
+ switch(vector) {
+ case CALL_FUNCTION_VECTOR:
+ case EVENT_CHECK_VECTOR:
+ break;
+ default:
+ BUG();
+ return;
+ }
+
+ BUG_ON(NR_CPUS > bits);
+ BUG_ON(fls(mask.bits[0]) > bits);
+
+ cpus = mask.bits[0];
+ mpic_send_ipi(vector, cpus);
+}
diff -r b30cb72ed5e2 xen/arch/powerpc/mpic.c
--- a/xen/arch/powerpc/mpic.c Fri Nov 03 16:53:17 2006 -0500
+++ b/xen/arch/powerpc/mpic.c Thu Nov 09 18:02:59 2006 -0500
@@ -27,10 +27,6 @@
#define alloc_bootmem(x) xmalloc_bytes(x)
-#define request_irq(irq, handler, f, devname, dev_id) \
- panic("IPI requested: %d: %p: %s: %p\n", irq, handler, devname, dev_id)
-
-typedef int irqreturn_t;
#define IRQ_NONE (0)
#define IRQ_HANDLED (1)
@@ -96,11 +92,6 @@ typedef int irqreturn_t;
#endif
#include <asm/mpic.h>
#include <asm/smp.h>
-
-static inline void smp_message_recv(int msg, struct pt_regs *regs)
-{
- return;
-}
#ifdef DEBUG
#define DBG(fmt...) printk(fmt)
diff -r b30cb72ed5e2 xen/arch/powerpc/mpic_init.c
--- a/xen/arch/powerpc/mpic_init.c Fri Nov 03 16:53:17 2006 -0500
+++ b/xen/arch/powerpc/mpic_init.c Thu Nov 09 18:02:59 2006 -0500
@@ -22,6 +22,7 @@
#include <xen/init.h>
#include <xen/lib.h>
#include <asm/mpic.h>
+#include <errno.h>
#include "mpic_init.h"
#include "oftree.h"
#include "of-devtree.h"
@@ -358,6 +359,42 @@ static struct hw_interrupt_type *share_m
#endif
+static unsigned int mpic_startup_ipi(unsigned int irq)
+{
+ mpic->hc_ipi.enable(irq);
+ return 0;
+}
+
+int request_irq(unsigned int irq,
+ irqreturn_t (*handler)(int, void *, struct cpu_user_regs *),
+ unsigned long irqflags, const char * devname, void *dev_id)
+{
+ int retval;
+ struct irqaction *action;
+ void (*func)(int, void *, struct cpu_user_regs *);
+
+ action = xmalloc(struct irqaction);
+ if (!action) {
+ BUG();
+ return -ENOMEM;
+ }
+
+ /* Xen's handler prototype is slightly different than Linux's. */
+ func = (void (*)(int, void *, struct cpu_user_regs *))handler;
+
+ action->handler = func;
+ action->name = devname;
+ action->dev_id = dev_id;
+
+ retval = setup_irq(irq, action);
+ if (retval) {
+ BUG();
+ xfree(action);
+ }
+
+ return retval;
+}
+
struct hw_interrupt_type *xen_mpic_init(struct hw_interrupt_type *xen_irq)
{
unsigned int isu_size;
@@ -397,6 +434,11 @@ struct hw_interrupt_type *xen_mpic_init(
hit = share_mpic(&mpic->hc_irq, xen_irq);
printk("%s: success\n", __func__);
+
+ mpic->hc_ipi.ack = xen_irq->ack;
+ mpic->hc_ipi.startup = mpic_startup_ipi;
+ mpic_request_ipis();
+
return hit;
}
@@ -406,3 +448,9 @@ int xen_mpic_get_irq(struct cpu_user_reg
return mpic_get_one_irq(mpic, regs);
}
+
+int vector_is_ipi(int vector)
+{
+ BUG_ON(!mpic);
+ return (mpic->ipi_offset <= vector) && (vector < mpic->ipi_offset + 4);
+}
diff -r b30cb72ed5e2 xen/arch/powerpc/powerpc64/exceptions.S
--- a/xen/arch/powerpc/powerpc64/exceptions.S Fri Nov 03 16:53:17 2006 -0500
+++ b/xen/arch/powerpc/powerpc64/exceptions.S Thu Nov 09 18:03:18 2006 -0500
@@ -564,6 +564,10 @@ _GLOBAL(sleep)
*/
.globl spin_start
spin_start:
+ /* We discovered by experiment that the ERAT must be flushed early. */
+ isync
+ slbia
+ isync
/* Do a cache flush for our text, in case the loader didn't */
LOADADDR(r9, _start)
diff -r b30cb72ed5e2 xen/arch/powerpc/setup.c
--- a/xen/arch/powerpc/setup.c Fri Nov 03 16:53:17 2006 -0500
+++ b/xen/arch/powerpc/setup.c Thu Nov 09 18:05:17 2006 -0500
@@ -37,6 +37,7 @@
#include <xen/keyhandler.h>
#include <acm/acm_hooks.h>
#include <public/version.h>
+#include <asm/mpic.h>
#include <asm/processor.h>
#include <asm/desc.h>
#include <asm/cache.h>
@@ -88,6 +89,8 @@ struct ns16550_defaults ns16550;
extern char __per_cpu_start[], __per_cpu_data_end[], __per_cpu_end[];
+static struct domain *idle_domain;
+
volatile struct processor_area * volatile global_cpu_table[NR_CPUS];
int is_kernel_text(unsigned long addr)
@@ -159,8 +162,6 @@ static void percpu_free_unused_areas(voi
static void __init start_of_day(void)
{
- struct domain *idle_domain;
-
init_IRQ();
scheduler_init();
@@ -175,23 +176,6 @@ static void __init start_of_day(void)
/* for some reason we need to set our own bit in the thread map */
cpu_set(0, cpu_sibling_map[0]);
- percpu_free_unused_areas();
-
- {
- /* FIXME: Xen assumes that an online CPU is a schedualable
- * CPU, but we just are not there yet. Remove this fragment when
- * scheduling processors actually works. */
- int cpuid;
-
- printk("WARNING!: Taking all secondary CPUs offline\n");
-
- for_each_online_cpu(cpuid) {
- if (cpuid == 0)
- continue;
- cpu_clear(cpuid, cpu_online_map);
- }
- }
-
initialize_keytable();
/* Register another key that will allow for the the Harware Probe
* to be contacted, this works with RiscWatch probes and should
@@ -201,7 +185,6 @@ static void __init start_of_day(void)
timer_init();
serial_init_postirq();
do_initcalls();
- schedulers_start();
}
void startup_cpu_idle_loop(void)
@@ -234,6 +217,7 @@ static void init_parea(int cpuid)
pa->whoami = cpuid;
pa->hard_id = cpu_hard_id[cpuid];
pa->hyp_stack_base = (void *)((ulong)stack + STACK_SIZE);
+ mb();
/* This store has the effect of invoking secondary_cpu_init. */
global_cpu_table[cpuid] = pa;
@@ -263,9 +247,22 @@ static int kick_secondary_cpus(int maxcp
/* This is the first C code that secondary processors invoke. */
int secondary_cpu_init(int cpuid, unsigned long r4)
{
+ struct vcpu *vcpu;
+
cpu_initialize(cpuid);
smp_generic_take_timebase();
+
+ /* If we are online, we must be able to ACK IPIs. */
+ mpic_setup_this_cpu();
cpu_set(cpuid, cpu_online_map);
+
+ vcpu = alloc_vcpu(idle_domain, cpuid, cpuid);
+ BUG_ON(vcpu == NULL);
+
+ set_current(idle_domain->vcpu[cpuid]);
+ idle_vcpu[cpuid] = current;
+ startup_cpu_idle_loop();
+
while(1);
}
@@ -340,6 +337,10 @@ static void __init __start_xen(multiboot
debugger_trap_immediate();
#endif
+ start_of_day();
+
+ mpic_setup_this_cpu();
+
/* Deal with secondary processors. */
if (opt_nosmp || ofd_boot_cpu == -1) {
printk("nosmp: leaving secondary processors spinning forever\n");
@@ -348,7 +349,11 @@ static void __init __start_xen(multiboot
kick_secondary_cpus(max_cpus);
}
- start_of_day();
+ /* Secondary processors must be online before we call this. */
+ schedulers_start();
+
+ /* This cannot be called before secondary cpus are marked online. */
+ percpu_free_unused_areas();
/* Create initial domain 0. */
dom0 = domain_create(0);
@@ -406,6 +411,9 @@ static void __init __start_xen(multiboot
console_end_sync();
domain_unpause_by_systemcontroller(dom0);
+#ifdef DEBUG_IPI
+ ipi_torture_test();
+#endif
startup_cpu_idle_loop();
}
diff -r b30cb72ed5e2 xen/arch/powerpc/smp.c
--- a/xen/arch/powerpc/smp.c Fri Nov 03 16:53:17 2006 -0500
+++ b/xen/arch/powerpc/smp.c Thu Nov 09 18:02:59 2006 -0500
@@ -22,6 +22,8 @@
#include <xen/smp.h>
#include <asm/flushtlb.h>
#include <asm/debugger.h>
+#include <asm/mpic.h>
+#include <asm/mach-default/irq_vectors.h>
int smp_num_siblings = 1;
int smp_num_cpus = 1;
@@ -50,7 +52,7 @@ void smp_send_event_check_mask(cpumask_t
{
cpu_clear(smp_processor_id(), mask);
if (!cpus_empty(mask))
- unimplemented();
+ send_IPI_mask(mask, EVENT_CHECK_VECTOR);
}
@@ -65,8 +67,20 @@ int smp_call_function(void (*func) (void
void smp_send_stop(void)
{
- unimplemented();
-}
+ BUG();
+}
+
+struct call_data_struct {
+ void (*func) (void *info);
+ void *info;
+ int wait;
+ atomic_t started;
+ atomic_t finished;
+ cpumask_t selected;
+};
+
+static DEFINE_SPINLOCK(call_lock);
+static struct call_data_struct call_data;
int on_selected_cpus(
cpumask_t selected,
@@ -75,6 +89,115 @@ int on_selected_cpus(
int retry,
int wait)
{
- unimplemented();
- return 0;
-}
+ int t, retval = 0, nr_cpus = cpus_weight(selected);
+
+ spin_lock(&call_lock);
+
+ call_data.func = func;
+ call_data.info = info;
+ call_data.wait = wait;
+ call_data.wait = 1; /* Until we get RCU around call_data. */
+ atomic_set(&call_data.started, 0);
+ atomic_set(&call_data.finished, 0);
+ mb();
+
+ send_IPI_mask(selected, CALL_FUNCTION_VECTOR);
+
+ /* We always wait for an initiation ACK from remote CPU. */
+ for (t = 0; atomic_read(&call_data.started) != nr_cpus; t++) {
+ if (t && t % timebase_freq == 0) {
+ printk("IPI start stall: %d ACKS to %d SYNS\n",
+ atomic_read(&call_data.started), nr_cpus);
+ }
+ }
+
+ /* If told to, we wait for a completion ACK from remote CPU. */
+ if (wait) {
+ for (t = 0; atomic_read(&call_data.finished) != nr_cpus; t++) {
+ if (t && t % timebase_freq == 0) {
+ printk("IPI finish stall: %d ACKS to %d SYNS\n",
+ atomic_read(&call_data.finished), nr_cpus);
+ }
+ }
+ }
+
+ spin_unlock(&call_lock);
+
+ return retval;
+}
+
+void smp_call_function_interrupt(struct cpu_user_regs *regs)
+{
+
+ void (*func)(void *info) = call_data.func;
+ void *info = call_data.info;
+ int wait = call_data.wait;
+
+ atomic_inc(&call_data.started);
+ mb();
+ (*func)(info);
+ mb();
+
+ if (wait)
+ atomic_inc(&call_data.finished);
+
+ return;
+}
+
+void smp_event_check_interrupt(void)
+{
+ /* We are knocked out of NAP state at least. */
+ return;
+}
+
+void smp_message_recv(int msg, struct cpu_user_regs *regs)
+{
+ switch(msg) {
+ case CALL_FUNCTION_VECTOR:
+ smp_call_function_interrupt(regs);
+ break;
+ case EVENT_CHECK_VECTOR:
+ smp_event_check_interrupt();
+ break;
+ default:
+ BUG();
+ break;
+ }
+}
+
+#ifdef DEBUG_IPI
+static void debug_ipi_ack(void *info)
+{
+ return;
+}
+
+void ipi_torture_test(void)
+{
+ int cpu;
+ unsigned long before, after, delta;
+ unsigned long min = ~0, max = 0, mean = 0, sum = 0, tick = 0;
+ cpumask_t mask;
+
+ cpus_clear(mask);
+
+ while (tick < 1000000) {
+ for_each_online_cpu(cpu) {
+ cpu_set(cpu, mask);
+ before = mftb();
+ on_selected_cpus(mask, debug_ipi_ack, NULL, 1, 1);
+ after = mftb();
+ cpus_clear(mask);
+
+ delta = after - before;
+ if (delta > max) max = delta;
+ if (delta < min) min = delta;
+ sum += delta;
+ tick++;
+ }
+ }
+
+ mean = sum / tick;
+
+ printk("IPI tb ticks: min = %ld max = %ld mean = %ld\n", min, max, mean);
+}
+#endif
diff -r b30cb72ed5e2 xen/include/asm-powerpc/mach-default/irq_vectors.h
--- a/xen/include/asm-powerpc/mach-default/irq_vectors.h Fri Nov 03
16:53:17 2006 -0500
+++ b/xen/include/asm-powerpc/mach-default/irq_vectors.h Thu Nov 09
18:02:59 2006 -0500
@@ -37,26 +37,10 @@
#define FAST_TRAP -1 /* 0x80 */
#define FIRST_SYSTEM_VECTOR -1
+#define CALL_FUNCTION_VECTOR 0x0
+#define EVENT_CHECK_VECTOR 0x1
+
#if 0
-
-/*
- * Vectors 0-16 in some cases are used for ISA interrupts.
- */
-
-/*
- * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
- *
- * some of the following vectors are 'rare', they are merged
- * into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
- * TLB, reschedule and local APIC vectors are performance-critical.
- *
- * Vectors 0xf0-0xfa are free (reserved for future Linux use).
- */
-#define SPURIOUS_APIC_VECTOR 0xff
-#define ERROR_APIC_VECTOR 0xfe
-#define INVALIDATE_TLB_VECTOR 0xfd
-#define EVENT_CHECK_VECTOR 0xfc
-#define CALL_FUNCTION_VECTOR 0xfb
#define THERMAL_APIC_VECTOR 0xf0
/*
diff -r b30cb72ed5e2 xen/include/asm-powerpc/smp.h
--- a/xen/include/asm-powerpc/smp.h Fri Nov 03 16:53:17 2006 -0500
+++ b/xen/include/asm-powerpc/smp.h Thu Nov 09 18:02:59 2006 -0500
@@ -35,4 +35,21 @@ extern cpumask_t cpu_core_map[];
extern cpumask_t cpu_core_map[];
extern void __devinit smp_generic_take_timebase(void);
extern void __devinit smp_generic_give_timebase(void);
+
+#define SA_INTERRUPT 0x20000000u
+typedef int irqreturn_t;
+extern int request_irq(unsigned int irq,
+ irqreturn_t (*handler)(int, void *, struct cpu_user_regs *),
+ unsigned long irqflags, const char * devname, void *dev_id);
+void smp_message_recv(int msg, struct cpu_user_regs *regs);
+void smp_call_function_interrupt(struct cpu_user_regs *regs);
+void smp_event_check_interrupt(void);
+void send_IPI_mask(cpumask_t mask, int vector);
+int vector_is_ipi(int vector);
+
+#undef DEBUG_IPI
+#ifdef DEBUG_IPI
+void ipi_torture_test(void);
#endif
+
+#endif
_______________________________________________
Xen-ppc-devel mailing list
Xen-ppc-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ppc-devel
|