# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID bb0dc0ae23bb1fe49c197f38951fc424eef2905e
# Parent 2ccaa3879417ba40b112fcf9d1ef4d45c82e25ca
Fix Xen's interrupt acknowledgement routines on certain
(apparently broken) IO-APIC hardware:
1. Do not mask/unmask the IO-APIC pin during normal ISR
processing. This seems to have really bizarre side effects
on some chipsets.
2. Since we instead tickle the local APIC in the ->end
irq hook function, it *must* run on the CPU that
received the interrupt. Therefore we track which CPUs
need to do final acknowledgement and IPI them if
necessary to do so.
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
diff -r 2ccaa3879417 -r bb0dc0ae23bb xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c Fri Apr 14 10:58:49 2006
+++ b/xen/arch/x86/io_apic.c Fri Apr 14 11:01:15 2006
@@ -190,16 +190,16 @@
__modify_IO_APIC_irq(irq, 0, 0x00010000);
}
-/* trigger = 0 */
-static void __edge_IO_APIC_irq (unsigned int irq)
-{
- __modify_IO_APIC_irq(irq, 0, 0x00008000);
-}
-
-/* trigger = 1 */
-static void __level_IO_APIC_irq (unsigned int irq)
-{
- __modify_IO_APIC_irq(irq, 0x00008000, 0);
+/* mask = 1, trigger = 0 */
+static void __mask_and_edge_IO_APIC_irq (unsigned int irq)
+{
+ __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000);
+}
+
+/* mask = 0, trigger = 1 */
+static void __unmask_and_level_IO_APIC_irq (unsigned int irq)
+{
+ __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000);
}
static void mask_IO_APIC_irq (unsigned int irq)
@@ -1323,10 +1323,13 @@
static void mask_and_ack_level_ioapic_irq (unsigned int irq)
{
+}
+
+static void end_level_ioapic_irq (unsigned int irq)
+{
unsigned long v;
int i;
- mask_IO_APIC_irq(irq);
/*
* It appears there is an erratum which affects at least version 0x11
* of I/O APIC (that's the 82093AA and cores integrated into various
@@ -1355,15 +1358,10 @@
if (!(v & (1 << (i & 0x1f)))) {
atomic_inc(&irq_mis_count);
spin_lock(&ioapic_lock);
- __edge_IO_APIC_irq(irq);
- __level_IO_APIC_irq(irq);
+ __mask_and_edge_IO_APIC_irq(irq);
+ __unmask_and_level_IO_APIC_irq(irq);
spin_unlock(&ioapic_lock);
}
-}
-
-static void end_level_ioapic_irq (unsigned int irq)
-{
- unmask_IO_APIC_irq(irq);
}
static unsigned int startup_edge_ioapic_vector(unsigned int vector)
diff -r 2ccaa3879417 -r bb0dc0ae23bb xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c Fri Apr 14 10:58:49 2006
+++ b/xen/arch/x86/irq.c Fri Apr 14 11:01:15 2006
@@ -148,6 +148,11 @@
u8 nr_guests;
u8 in_flight;
u8 shareable;
+ u8 ack_type;
+#define ACKTYPE_NONE 0 /* Final ACK is not required */
+#define ACKTYPE_SINGLE 1 /* Final ACK on any CPU */
+#define ACKTYPE_MULTI 2 /* Final ACK on the CPU that was interrupted */
+ cpumask_t cpu_ack_map;
struct domain *guest[IRQ_MAX_GUESTS];
} irq_guest_action_t;
@@ -159,35 +164,109 @@
struct domain *d;
int i;
+ if ( unlikely(action->nr_guests == 0) )
+ {
+ /* An interrupt may slip through while freeing an ACKTYPE_MULTI irq. */
+ ASSERT(action->ack_type == ACKTYPE_MULTI);
+ desc->handler->end(vector);
+ return;
+ }
+
+ if ( action->ack_type == ACKTYPE_MULTI )
+ cpu_set(smp_processor_id(), action->cpu_ack_map);
+
for ( i = 0; i < action->nr_guests; i++ )
{
d = action->guest[i];
- if ( !test_and_set_bit(irq, &d->pirq_mask) )
+ if ( (action->ack_type != ACKTYPE_NONE) &&
+ !test_and_set_bit(irq, &d->pirq_mask) )
action->in_flight++;
send_guest_pirq(d, irq);
}
}
+static void end_guest_irq(void *data)
+{
+ irq_desc_t *desc = data;
+ irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
+ unsigned long flags;
+
+ spin_lock_irqsave(&desc->lock, flags);
+ if ( (desc->status & IRQ_GUEST) &&
+ (action->in_flight == 0) &&
+ test_and_clear_bit(smp_processor_id(), &action->cpu_ack_map) )
+ desc->handler->end(desc - irq_desc);
+ spin_unlock_irqrestore(&desc->lock, flags);
+}
+
int pirq_guest_unmask(struct domain *d)
{
- irq_desc_t *desc;
- unsigned int pirq;
- shared_info_t *s = d->shared_info;
+ irq_desc_t *desc;
+ irq_guest_action_t *action;
+ cpumask_t cpu_ack_map = CPU_MASK_NONE;
+ unsigned int pirq, cpu = smp_processor_id();
+ shared_info_t *s = d->shared_info;
for ( pirq = find_first_bit(d->pirq_mask, NR_PIRQS);
pirq < NR_PIRQS;
pirq = find_next_bit(d->pirq_mask, NR_PIRQS, pirq+1) )
{
- desc = &irq_desc[irq_to_vector(pirq)];
+ desc = &irq_desc[irq_to_vector(pirq)];
+ action = (irq_guest_action_t *)desc->action;
+
spin_lock_irq(&desc->lock);
if ( !test_bit(d->pirq_to_evtchn[pirq], &s->evtchn_mask[0]) &&
- test_and_clear_bit(pirq, &d->pirq_mask) &&
- (--((irq_guest_action_t *)desc->action)->in_flight == 0) )
- desc->handler->end(irq_to_vector(pirq));
+ test_and_clear_bit(pirq, &d->pirq_mask) )
+ {
+ ASSERT(action->ack_type != ACKTYPE_NONE);
+ if ( --action->in_flight == 0 )
+ {
+ if ( (action->ack_type == ACKTYPE_SINGLE) ||
+ test_and_clear_bit(cpu, &action->cpu_ack_map) )
+ desc->handler->end(irq_to_vector(pirq));
+ cpu_ack_map = action->cpu_ack_map;
+ }
+ }
spin_unlock_irq(&desc->lock);
+
+ if ( !cpus_empty(cpu_ack_map) )
+ {
+ on_selected_cpus(cpu_ack_map, end_guest_irq, desc, 1, 0);
+ cpu_ack_map = CPU_MASK_NONE;
+ }
}
return 0;
+}
+
+int pirq_acktype(int irq)
+{
+ irq_desc_t *desc;
+ unsigned int vector;
+
+ vector = irq_to_vector(irq);
+ if ( vector == 0 )
+ return ACKTYPE_NONE;
+
+ desc = &irq_desc[vector];
+
+ /*
+ * Edge-triggered IO-APIC interrupts need no final acknowledgement:
+ * we ACK early during interrupt processing.
+ */
+ if ( !strcmp(desc->handler->typename, "IO-APIC-edge") )
+ return ACKTYPE_NONE;
+
+ /* Legacy PIC interrupts can be acknowledged from any CPU. */
+ if ( !strcmp(desc->handler->typename, "XT-PIC") )
+ return ACKTYPE_SINGLE;
+
+ /*
+ * By default assume that an interrupt must be finally acknowledged on
+ * the CPU on which it was received. This is true for level-triggered
+ * IO-APIC interrupts, for example, where we tickle the LAPIC to EOI.
+ */
+ return ACKTYPE_MULTI;
}
int pirq_guest_bind(struct vcpu *v, int irq, int will_share)
@@ -230,10 +309,12 @@
goto out;
}
- action->nr_guests = 0;
- action->in_flight = 0;
- action->shareable = will_share;
-
+ action->nr_guests = 0;
+ action->in_flight = 0;
+ action->shareable = will_share;
+ action->ack_type = pirq_acktype(irq);
+ action->cpu_ack_map = CPU_MASK_NONE;
+
desc->depth = 0;
desc->status |= IRQ_GUEST;
desc->status &= ~IRQ_DISABLED;
@@ -271,6 +352,7 @@
unsigned int vector = irq_to_vector(irq);
irq_desc_t *desc = &irq_desc[vector];
irq_guest_action_t *action;
+ cpumask_t cpu_ack_map;
unsigned long flags;
int i;
@@ -280,28 +362,60 @@
action = (irq_guest_action_t *)desc->action;
- if ( test_and_clear_bit(irq, &d->pirq_mask) &&
- (--action->in_flight == 0) )
- desc->handler->end(vector);
-
- if ( action->nr_guests == 1 )
- {
- desc->action = NULL;
- xfree(action);
- desc->depth = 1;
- desc->status |= IRQ_DISABLED;
- desc->status &= ~IRQ_GUEST;
- desc->handler->shutdown(vector);
- }
- else
- {
- i = 0;
- while ( action->guest[i] && (action->guest[i] != d) )
- i++;
- memmove(&action->guest[i], &action->guest[i+1], IRQ_MAX_GUESTS-i-1);
- action->nr_guests--;
- }
-
+ i = 0;
+ while ( action->guest[i] && (action->guest[i] != d) )
+ i++;
+ memmove(&action->guest[i], &action->guest[i+1], IRQ_MAX_GUESTS-i-1);
+ action->nr_guests--;
+
+ switch ( action->ack_type )
+ {
+ case ACKTYPE_SINGLE:
+ if ( test_and_clear_bit(irq, &d->pirq_mask) &&
+ (--action->in_flight == 0) )
+ desc->handler->end(vector);
+ break;
+ case ACKTYPE_MULTI:
+ if ( test_and_clear_bit(irq, &d->pirq_mask) )
+ --action->in_flight;
+ while ( action->in_flight == 0 )
+ {
+ /* We cannot release guest info until all pending ACKs are done. */
+ cpu_ack_map = action->cpu_ack_map;
+ if ( cpus_empty(cpu_ack_map) )
+ break;
+
+ /* We cannot hold the lock while interrupting other CPUs. */
+ spin_unlock_irqrestore(&desc->lock, flags);
+ on_selected_cpus(cpu_ack_map, end_guest_irq, desc, 1, 1);
+ spin_lock_irqsave(&desc->lock, flags);
+
+ /* The world can change while we do not hold the lock. */
+ if ( !(desc->status & IRQ_GUEST) )
+ goto out;
+ if ( (action->ack_type != ACKTYPE_MULTI) ||
+ (action->nr_guests != 0) )
+ break;
+ }
+ break;
+ }
+
+ BUG_ON(test_bit(irq, &d->pirq_mask));
+
+ if ( action->nr_guests != 0 )
+ goto out;
+
+ BUG_ON(action->in_flight != 0);
+ BUG_ON(!cpus_empty(action->cpu_ack_map));
+
+ desc->action = NULL;
+ xfree(action);
+ desc->depth = 1;
+ desc->status |= IRQ_DISABLED;
+ desc->status &= ~IRQ_GUEST;
+ desc->handler->shutdown(vector);
+
+ out:
spin_unlock_irqrestore(&desc->lock, flags);
return 0;
}
diff -r 2ccaa3879417 -r bb0dc0ae23bb xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c Fri Apr 14 10:58:49 2006
+++ b/xen/arch/x86/physdev.c Fri Apr 14 11:01:15 2006
@@ -18,6 +18,9 @@
extern int
ioapic_guest_write(
unsigned long physbase, unsigned int reg, u32 pval);
+extern int
+pirq_acktype(
+ int irq);
/*
* Demuxing hypercall.
@@ -43,8 +46,7 @@
if ( (irq < 0) || (irq >= NR_IRQS) )
break;
op.u.irq_status_query.flags = 0;
- /* Edge-triggered interrupts don't need an explicit unmask downcall. */
- if ( !strstr(irq_desc[irq_to_vector(irq)].handler->typename, "edge") )
+ if ( pirq_acktype(irq) != 0 )
op.u.irq_status_query.flags |= PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY;
ret = 0;
break;
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|