WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] x86: Implement per-cpu vector for xen hyp

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] x86: Implement per-cpu vector for xen hypervisor
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Wed, 19 Aug 2009 07:40:13 -0700
Delivery-date: Wed, 19 Aug 2009 07:42:34 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1250682826 -3600
# Node ID d33e9aae74c65494c02cc04c3a44c5958c577936
# Parent  722c7e94e76421452c035b3e9a46f805a2815445
x86: Implement per-cpu vector for xen hypervisor

Since Xen and Linux has big differece in code base, it
is very hard to port Linux's patch and apply it to Xen
directly, so this patch only adopts core logic of Linux,
and make it work for Xen.

Key changes:
1. vector allocation algorithm
2. all IRQ chips' set_affinity logic
3. IRQ migration when cpu hot remove.
4. Break assumptions which depend on global vector policy.

Signed-off-by: Xiantao Zhang <xiantao.zhang@xxxxxxxxx>
---
 xen/arch/x86/apic.c                            |   10 
 xen/arch/x86/cpu/mcheck/mce_intel.c            |    4 
 xen/arch/x86/genapic/delivery.c                |   16 
 xen/arch/x86/genapic/x2apic.c                  |    8 
 xen/arch/x86/hpet.c                            |   28 -
 xen/arch/x86/hvm/vmx/vmx.c                     |   12 
 xen/arch/x86/i8259.c                           |   23 -
 xen/arch/x86/io_apic.c                         |  284 +++++++++++++---
 xen/arch/x86/irq.c                             |  438 ++++++++++++++++++-------
 xen/arch/x86/msi.c                             |   43 +-
 xen/arch/x86/physdev.c                         |   13 
 xen/arch/x86/setup.c                           |    4 
 xen/arch/x86/smp.c                             |   48 ++
 xen/arch/x86/smpboot.c                         |   26 -
 xen/drivers/passthrough/amd/iommu_init.c       |   74 ++--
 xen/drivers/passthrough/vtd/iommu.c            |   77 ++--
 xen/include/asm-x86/apic.h                     |    6 
 xen/include/asm-x86/apicdef.h                  |    2 
 xen/include/asm-x86/genapic.h                  |   20 -
 xen/include/asm-x86/irq.h                      |   58 +++
 xen/include/asm-x86/mach-default/irq_vectors.h |    3 
 xen/include/asm-x86/mach-generic/mach_apic.h   |    1 
 xen/include/asm-x86/smp.h                      |    2 
 xen/include/xen/cpumask.h                      |   18 -
 xen/include/xen/irq.h                          |    9 
 25 files changed, 929 insertions(+), 298 deletions(-)

diff -r 722c7e94e764 -r d33e9aae74c6 xen/arch/x86/apic.c
--- a/xen/arch/x86/apic.c       Wed Aug 19 12:53:04 2009 +0100
+++ b/xen/arch/x86/apic.c       Wed Aug 19 12:53:46 2009 +0100
@@ -70,7 +70,7 @@ int modern_apic(void)
  */
 void ack_bad_irq(unsigned int irq)
 {
-    printk("unexpected IRQ trap at vector %02x\n", irq);
+    printk("unexpected IRQ trap at irq %02x\n", irq);
     /*
      * Currently unexpected vectors happen only on SMP and APIC.
      * We _must_ ack these because every local APIC has only N
@@ -1197,9 +1197,11 @@ int reprogram_timer(s_time_t timeout)
 
 fastcall void smp_apic_timer_interrupt(struct cpu_user_regs * regs)
 {
+    struct cpu_user_regs *old_regs = set_irq_regs(regs);
     ack_APIC_irq();
     perfc_incr(apic_timer);
     raise_softirq(TIMER_SOFTIRQ);
+    set_irq_regs(old_regs);
 }
 
 /*
@@ -1208,6 +1210,7 @@ fastcall void smp_spurious_interrupt(str
 fastcall void smp_spurious_interrupt(struct cpu_user_regs *regs)
 {
     unsigned long v;
+    struct cpu_user_regs *old_regs = set_irq_regs(regs);
 
     irq_enter();
     /*
@@ -1223,6 +1226,7 @@ fastcall void smp_spurious_interrupt(str
     printk(KERN_INFO "spurious APIC interrupt on CPU#%d, should never 
happen.\n",
            smp_processor_id());
     irq_exit();
+    set_irq_regs(old_regs);
 }
 
 /*
@@ -1232,6 +1236,7 @@ fastcall void smp_error_interrupt(struct
 fastcall void smp_error_interrupt(struct cpu_user_regs *regs)
 {
     unsigned long v, v1;
+    struct cpu_user_regs *old_regs = set_irq_regs(regs);
 
     irq_enter();
     /* First tickle the hardware, only then report what went on. -- REW */
@@ -1254,6 +1259,7 @@ fastcall void smp_error_interrupt(struct
     printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n",
             smp_processor_id(), v , v1);
     irq_exit();
+    set_irq_regs(old_regs);
 }
 
 /*
@@ -1262,8 +1268,10 @@ fastcall void smp_error_interrupt(struct
 
 fastcall void smp_pmu_apic_interrupt(struct cpu_user_regs *regs)
 {
+    struct cpu_user_regs *old_regs = set_irq_regs(regs);
     ack_APIC_irq();
     hvm_do_pmu_interrupt(regs);
+    set_irq_regs(old_regs);
 }
 
 /*
diff -r 722c7e94e764 -r d33e9aae74c6 xen/arch/x86/cpu/mcheck/mce_intel.c
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c       Wed Aug 19 12:53:04 2009 +0100
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c       Wed Aug 19 12:53:46 2009 +0100
@@ -84,9 +84,11 @@ static void (*vendor_thermal_interrupt)(
 
 fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs)
 {
+    struct cpu_user_regs *old_regs = set_irq_regs(regs);
     irq_enter();
     vendor_thermal_interrupt(regs);
     irq_exit();
+    set_irq_regs(old_regs);
 }
 
 /* P4/Xeon Thermal regulation detect and init */
@@ -964,6 +966,7 @@ fastcall void smp_cmci_interrupt(struct 
 {
     mctelem_cookie_t mctc;
     struct mca_summary bs;
+    struct cpu_user_regs *old_regs = set_irq_regs(regs);
 
     ack_APIC_irq();
     irq_enter();
@@ -984,6 +987,7 @@ fastcall void smp_cmci_interrupt(struct 
         mctelem_dismiss(mctc);
 
     irq_exit();
+    set_irq_regs(old_regs);
 }
 
 void mce_intel_feature_init(struct cpuinfo_x86 *c)
diff -r 722c7e94e764 -r d33e9aae74c6 xen/arch/x86/genapic/delivery.c
--- a/xen/arch/x86/genapic/delivery.c   Wed Aug 19 12:53:04 2009 +0100
+++ b/xen/arch/x86/genapic/delivery.c   Wed Aug 19 12:53:46 2009 +0100
@@ -29,13 +29,17 @@ cpumask_t target_cpus_flat(void)
 cpumask_t target_cpus_flat(void)
 {
        return cpu_online_map;
+}
+
+cpumask_t vector_allocation_domain_flat(int cpu)
+{
+       return cpu_online_map;
 } 
 
 unsigned int cpu_mask_to_apicid_flat(cpumask_t cpumask)
 {
-       return cpus_addr(cpumask)[0];
+       return cpus_addr(cpumask)[0]&0xFF;
 }
-
 
 /*
  * PHYSICAL DELIVERY MODE (unicast to physical APIC IDs).
@@ -57,8 +61,12 @@ void clustered_apic_check_phys(void)
 
 cpumask_t target_cpus_phys(void)
 {
-       /* IRQs will get bound more accurately later. */
-       return cpumask_of_cpu(0);
+       return cpu_online_map;
+}
+
+cpumask_t vector_allocation_domain_phys(int cpu)
+{
+       return cpumask_of_cpu(cpu);
 }
 
 unsigned int cpu_mask_to_apicid_phys(cpumask_t cpumask)
diff -r 722c7e94e764 -r d33e9aae74c6 xen/arch/x86/genapic/x2apic.c
--- a/xen/arch/x86/genapic/x2apic.c     Wed Aug 19 12:53:04 2009 +0100
+++ b/xen/arch/x86/genapic/x2apic.c     Wed Aug 19 12:53:46 2009 +0100
@@ -47,8 +47,12 @@ void clustered_apic_check_x2apic(void)
 
 cpumask_t target_cpus_x2apic(void)
 {
-    /* Deliver interrupts only to CPU0 for now */
-    return cpumask_of_cpu(0);
+    return cpu_online_map;
+}
+
+cpumask_t vector_allocation_domain_x2apic(int cpu)
+{
+       return cpumask_of_cpu(cpu);
 }
 
 unsigned int cpu_mask_to_apicid_x2apic(cpumask_t cpumask)
diff -r 722c7e94e764 -r d33e9aae74c6 xen/arch/x86/hpet.c
--- a/xen/arch/x86/hpet.c       Wed Aug 19 12:53:04 2009 +0100
+++ b/xen/arch/x86/hpet.c       Wed Aug 19 12:53:46 2009 +0100
@@ -287,6 +287,9 @@ static void hpet_msi_shutdown(unsigned i
 
 static void hpet_msi_ack(unsigned int irq)
 {
+    struct irq_desc *desc = irq_to_desc(irq);
+
+    irq_complete_move(&desc);
     ack_APIC_irq();
 }
 
@@ -298,24 +301,19 @@ static void hpet_msi_set_affinity(unsign
 {
     struct msi_msg msg;
     unsigned int dest;
-    cpumask_t tmp;
-    int vector = irq_to_vector(irq);
-
-    cpus_and(tmp, mask, cpu_online_map);
-    if ( cpus_empty(tmp) )
-        mask = TARGET_CPUS;
-
-    dest = cpu_mask_to_apicid(mask);
-
-    hpet_msi_read(vector, &msg);
-
+    struct irq_desc * desc = irq_to_desc(irq);
+    struct irq_cfg *cfg= desc->chip_data;
+
+    dest = set_desc_affinity(desc, mask);
+    if (dest == BAD_APICID)
+        return;
+
+    hpet_msi_read(irq, &msg);
     msg.data &= ~MSI_DATA_VECTOR_MASK;
-    msg.data |= MSI_DATA_VECTOR(vector);
+    msg.data |= MSI_DATA_VECTOR(cfg->vector);
     msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
     msg.address_lo |= MSI_ADDR_DEST_ID(dest);
-
-    hpet_msi_write(vector, &msg);
-    irq_desc[irq].affinity = mask;
+    hpet_msi_write(irq, &msg);
 }
 
 /*
diff -r 722c7e94e764 -r d33e9aae74c6 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Wed Aug 19 12:53:04 2009 +0100
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Wed Aug 19 12:53:46 2009 +0100
@@ -2061,13 +2061,14 @@ static void vmx_do_extint(struct cpu_use
 
     asmlinkage void do_IRQ(struct cpu_user_regs *);
     fastcall void smp_apic_timer_interrupt(struct cpu_user_regs *);
-    fastcall void smp_event_check_interrupt(void);
+    fastcall void smp_event_check_interrupt(struct cpu_user_regs *regs);
     fastcall void smp_invalidate_interrupt(void);
-    fastcall void smp_call_function_interrupt(void);
+    fastcall void smp_call_function_interrupt(struct cpu_user_regs *regs);
     fastcall void smp_spurious_interrupt(struct cpu_user_regs *regs);
     fastcall void smp_error_interrupt(struct cpu_user_regs *regs);
     fastcall void smp_pmu_apic_interrupt(struct cpu_user_regs *regs);
     fastcall void smp_cmci_interrupt(struct cpu_user_regs *regs);
+    fastcall void smp_irq_move_cleanup_interrupt(struct cpu_user_regs *regs);
 #ifdef CONFIG_X86_MCE_THERMAL
     fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs);
 #endif
@@ -2080,17 +2081,20 @@ static void vmx_do_extint(struct cpu_use
 
     switch ( vector )
     {
+    case IRQ_MOVE_CLEANUP_VECTOR:
+        smp_irq_move_cleanup_interrupt(regs);
+        break;
     case LOCAL_TIMER_VECTOR:
         smp_apic_timer_interrupt(regs);
         break;
     case EVENT_CHECK_VECTOR:
-        smp_event_check_interrupt();
+        smp_event_check_interrupt(regs);
         break;
     case INVALIDATE_TLB_VECTOR:
         smp_invalidate_interrupt();
         break;
     case CALL_FUNCTION_VECTOR:
-        smp_call_function_interrupt();
+        smp_call_function_interrupt(regs);
         break;
     case SPURIOUS_APIC_VECTOR:
         smp_spurious_interrupt(regs);
diff -r 722c7e94e764 -r d33e9aae74c6 xen/arch/x86/i8259.c
--- a/xen/arch/x86/i8259.c      Wed Aug 19 12:53:04 2009 +0100
+++ b/xen/arch/x86/i8259.c      Wed Aug 19 12:53:46 2009 +0100
@@ -58,6 +58,7 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BU
  * is no hardware IRQ pin equivalent for them, they are triggered
  * through the ICC by us (IPIs)
  */
+BUILD_SMP_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
 BUILD_SMP_INTERRUPT(event_check_interrupt,EVENT_CHECK_VECTOR)
 BUILD_SMP_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
 BUILD_SMP_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
@@ -374,7 +375,7 @@ static struct irqaction cascade = { no_a
 
 void __init init_IRQ(void)
 {
-    int i, vector;
+    int vector, irq, cpu = smp_processor_id();
 
     init_bsp_APIC();
 
@@ -389,15 +390,17 @@ void __init init_IRQ(void)
         set_intr_gate(vector, interrupt[vector]);
     }
 
-    for ( i = 0; i < 16; i++ )
-    {
-        vector_irq[LEGACY_VECTOR(i)] = i;
-        irq_desc[i].handler = &i8259A_irq_type;
-    }
-
-    /* Never allocate the hypercall vector or Linux/BSD fast-trap vector. */
-    vector_irq[HYPERCALL_VECTOR] = NEVER_ASSIGN_IRQ;
-    vector_irq[0x80] = NEVER_ASSIGN_IRQ;
+    for (irq = 0; irq < 16; irq++) {
+        struct irq_desc *desc = irq_to_desc(irq);
+        struct irq_cfg *cfg = desc->chip_data;
+        
+        desc->handler = &i8259A_irq_type;
+        per_cpu(vector_irq, cpu)[FIRST_LEGACY_VECTOR + irq] = irq;
+        cfg->domain = cpumask_of_cpu(cpu);
+        cfg->vector = FIRST_LEGACY_VECTOR + irq;
+    }
+    
+    per_cpu(vector_irq, cpu)[FIRST_HIPRIORITY_VECTOR] = 0;
 
     apic_intr_init();
 
diff -r 722c7e94e764 -r d33e9aae74c6 xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c    Wed Aug 19 12:53:04 2009 +0100
+++ b/xen/arch/x86/io_apic.c    Wed Aug 19 12:53:46 2009 +0100
@@ -30,7 +30,6 @@
 #include <xen/pci.h>
 #include <xen/pci_regs.h>
 #include <xen/keyhandler.h>
-#include <asm/io.h>
 #include <asm/mc146818rtc.h>
 #include <asm/smp.h>
 #include <asm/desc.h>
@@ -85,7 +84,36 @@ static struct irq_pin_list {
     int apic, pin;
     unsigned int next;
 } *irq_2_pin;
+
+static int *pin_irq_map;
+
 static unsigned int irq_2_pin_free_entry;
+
+/* Use an arry to record pin_2_irq_mapping */
+static int get_irq_from_apic_pin(int apic, int pin)
+{
+    int i, pin_base = 0;
+
+    ASSERT(apic < nr_ioapics);
+    
+    for (i = 0; i < apic; i++)
+        pin_base += nr_ioapic_registers[i];
+
+    return pin_irq_map[pin_base + pin];
+}
+
+static void set_irq_to_apic_pin(int apic, int pin, int irq)
+{
+    
+    int i, pin_base = 0;
+
+    ASSERT(apic < nr_ioapics);
+    
+    for (i = 0; i < apic; i++)
+        pin_base += nr_ioapic_registers[i];
+
+    pin_irq_map[pin_base + pin] = irq;
+}
 
 /*
  * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
@@ -100,7 +128,7 @@ static void add_pin_to_irq(unsigned int 
         BUG_ON((entry->apic == apic) && (entry->pin == pin));
         entry = irq_2_pin + entry->next;
     }
-
+    
     BUG_ON((entry->apic == apic) && (entry->pin == pin));
 
     if (entry->pin != -1) {
@@ -113,6 +141,8 @@ static void add_pin_to_irq(unsigned int 
     }
     entry->apic = apic;
     entry->pin = pin;
+
+    set_irq_to_apic_pin(apic, pin, irq);
 }
 
 static void remove_pin_at_irq(unsigned int irq, int apic, int pin)
@@ -145,14 +175,16 @@ static void remove_pin_at_irq(unsigned i
         entry->next = irq_2_pin_free_entry;
         irq_2_pin_free_entry = entry - irq_2_pin;
     }
+
+    set_irq_to_apic_pin(apic, pin, -1);
 }
 
 /*
  * Reroute an IRQ to a different pin.
  */
 static void __init replace_pin_at_irq(unsigned int irq,
-                                     int oldapic, int oldpin,
-                                     int newapic, int newpin)
+                      int oldapic, int oldpin,
+                      int newapic, int newpin)
 {
     struct irq_pin_list *entry = irq_2_pin + irq;
 
@@ -232,7 +264,7 @@ static void clear_IO_APIC_pin(unsigned i
 {
     struct IO_APIC_route_entry entry;
     unsigned long flags;
-       
+    
     /* Check delivery_mode to be sure we're not clearing an SMI pin */
     spin_lock_irqsave(&ioapic_lock, flags);
     *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
@@ -262,32 +294,160 @@ static void clear_IO_APIC (void)
 }
 
 #ifdef CONFIG_SMP
-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
+fastcall void smp_irq_move_cleanup_interrupt(struct cpu_user_regs *regs)
+{
+    unsigned vector, me;
+    struct cpu_user_regs *old_regs = set_irq_regs(regs);
+
+    ack_APIC_irq();
+    irq_enter();
+
+    me = smp_processor_id();
+    for (vector = FIRST_DYNAMIC_VECTOR; vector < NR_VECTORS; vector++) {
+        unsigned int irq;
+        unsigned int irr;
+        struct irq_desc *desc;
+        struct irq_cfg *cfg;
+        irq = __get_cpu_var(vector_irq)[vector];
+
+        if (irq == -1)
+            continue;
+
+        desc = irq_to_desc(irq);
+        if (!desc)
+            continue;
+
+        cfg = desc->chip_data;
+        spin_lock(&desc->lock);
+        if (!cfg->move_cleanup_count)
+            goto unlock;
+
+        if (vector == cfg->vector && cpu_isset(me, cfg->domain))
+            goto unlock;
+
+        irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
+        /*
+         * Check if the vector that needs to be cleanedup is
+         * registered at the cpu's IRR. If so, then this is not
+         * the best time to clean it up. Lets clean it up in the
+         * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
+         * to myself.
+         */
+        if (irr  & (1 << (vector % 32))) {
+            genapic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
+            goto unlock;
+        }
+        __get_cpu_var(vector_irq)[vector] = -1;
+        cfg->move_cleanup_count--;
+unlock:
+        spin_unlock(&desc->lock);
+    }
+
+    irq_exit();
+    set_irq_regs(old_regs);
+}
+
+static void send_cleanup_vector(struct irq_cfg *cfg)
+{
+    cpumask_t cleanup_mask;
+
+    cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+    cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+    genapic->send_IPI_mask(&cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+
+    cfg->move_in_progress = 0;
+}
+
+void irq_complete_move(struct irq_desc **descp)
+{
+    struct irq_desc *desc = *descp;
+    struct irq_cfg *cfg = desc->chip_data;
+    unsigned vector, me;
+
+    if (likely(!cfg->move_in_progress))
+        return;
+
+    vector = get_irq_regs()->entry_vector;
+    me = smp_processor_id();
+
+    if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
+        send_cleanup_vector(cfg);
+}
+
+unsigned int set_desc_affinity(struct irq_desc *desc, cpumask_t mask)
+{
+    struct irq_cfg *cfg;
+    unsigned int irq;
+    int ret;
+    cpumask_t dest_mask;
+
+    if (!cpus_intersects(mask, cpu_online_map))
+        return BAD_APICID;
+
+    irq = desc->irq;
+    cfg = desc->chip_data;
+    
+    lock_vector_lock();   
+    ret = __assign_irq_vector(irq, cfg, mask);
+    unlock_vector_lock();
+    
+    if (ret < 0)
+        return BAD_APICID;
+
+    cpus_copy(desc->affinity, mask);
+    cpus_and(dest_mask, desc->affinity, cfg->domain);
+
+    return cpu_mask_to_apicid(dest_mask);
+}
+
+static void
+set_ioapic_affinity_irq_desc(struct irq_desc *desc,
+                                        const struct cpumask mask)
 {
     unsigned long flags;
-    int pin;
-    struct irq_pin_list *entry = irq_2_pin + irq;
-    unsigned int apicid_value;
-
-    cpus_and(cpumask, cpumask, cpu_online_map);
-    if (cpus_empty(cpumask))
-        cpumask = TARGET_CPUS;
-
-    apicid_value = cpu_mask_to_apicid(cpumask);
-    /* Prepare to do the io_apic_write */
-    apicid_value = apicid_value << 24;
+    unsigned int dest;
+    int pin, irq;
+    struct irq_cfg *cfg;
+    struct irq_pin_list *entry;
+
+    irq = desc->irq;
+    cfg = desc->chip_data;
+
     spin_lock_irqsave(&ioapic_lock, flags);
-    for (;;) {
-        pin = entry->pin;
-        if (pin == -1)
-            break;
-        io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value);
-        if (!entry->next)
-            break;
-        entry = irq_2_pin + entry->next;
-    }
-    set_irq_info(irq, cpumask);
+    dest = set_desc_affinity(desc, mask);
+    if (dest != BAD_APICID) {
+        /* Only the high 8 bits are valid. */
+        dest = SET_APIC_LOGICAL_ID(dest);
+        entry = irq_2_pin + irq;
+        for (;;) {
+            unsigned int data;
+            pin = entry->pin;
+            if (pin == -1)
+                break;
+
+            io_apic_write(entry->apic, 0x10 + 1 + pin*2, dest);
+            data = io_apic_read(entry->apic, 0x10 + pin*2);
+            data &= ~IO_APIC_REDIR_VECTOR_MASK;
+            data |= cfg->vector & 0xFF;
+            io_apic_modify(entry->apic, 0x10 + pin*2, data);
+
+            if (!entry->next)
+                break;
+            entry = irq_2_pin + entry->next;
+        }
+    }
     spin_unlock_irqrestore(&ioapic_lock, flags);
+
+}
+
+static void
+set_ioapic_affinity_irq(unsigned int irq, const struct cpumask mask)
+{
+    struct irq_desc *desc;
+
+    desc = irq_to_desc(irq);
+
+    set_ioapic_affinity_irq_desc(desc, mask);
 }
 #endif /* CONFIG_SMP */
 
@@ -373,6 +533,7 @@ void /*__init*/ setup_ioapic_dest(void)
 void /*__init*/ setup_ioapic_dest(void)
 {
     int pin, ioapic, irq, irq_entry;
+    struct irq_cfg *cfg;
 
     if (skip_ioapic_setup == 1)
         return;
@@ -383,7 +544,9 @@ void /*__init*/ setup_ioapic_dest(void)
             if (irq_entry == -1)
                 continue;
             irq = pin_2_irq(irq_entry, ioapic, pin);
-            set_ioapic_affinity_irq(irq, TARGET_CPUS);
+            cfg = irq_cfg(irq);
+            BUG_ON(cpus_empty(cfg->domain));
+            set_ioapic_affinity_irq(irq, cfg->domain);
         }
 
     }
@@ -409,7 +572,7 @@ static int EISA_ELCR(unsigned int irq)
  * EISA conforming in the MP table, that means its trigger type must
  * be read in from the ELCR */
 
-#define default_EISA_trigger(idx)      (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
+#define default_EISA_trigger(idx)    (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
 #define default_EISA_polarity(idx)     (0)
 
 /* ISA interrupts are always polarity zero edge triggered,
@@ -682,11 +845,12 @@ static void __init setup_IO_APIC_irqs(vo
     struct IO_APIC_route_entry entry;
     int apic, pin, idx, irq, first_notcon = 1, vector;
     unsigned long flags;
+    struct irq_cfg *cfg;
 
     apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 
     for (apic = 0; apic < nr_ioapics; apic++) {
-       for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+        for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
 
             /*
              * add it to the IO-APIC irq-routing table:
@@ -695,9 +859,7 @@ static void __init setup_IO_APIC_irqs(vo
 
             entry.delivery_mode = INT_DELIVERY_MODE;
             entry.dest_mode = INT_DEST_MODE;
-            entry.mask = 0;                            /* enable IRQ */
-            entry.dest.logical.logical_dest = 
-                cpu_mask_to_apicid(TARGET_CPUS);
+            entry.mask = 0;                /* enable IRQ */
 
             idx = find_irq_entry(apic,pin,mp_INT);
             if (idx == -1) {
@@ -736,12 +898,16 @@ static void __init setup_IO_APIC_irqs(vo
 
             if (IO_APIC_IRQ(irq)) {
                 vector = assign_irq_vector(irq);
+                BUG_ON(vector < 0);
                 entry.vector = vector;
                 ioapic_register_intr(irq, IOAPIC_AUTO);
 
                 if (!apic && (irq < 16))
                     disable_8259A_irq(irq);
             }
+            cfg = irq_cfg(irq);
+            entry.dest.logical.logical_dest = 
+                cpu_mask_to_apicid(cfg->domain);
             spin_lock_irqsave(&ioapic_lock, flags);
             io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
             io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
@@ -968,11 +1134,16 @@ static void __init enable_IO_APIC(void)
 
     /* Initialise dynamic irq_2_pin free list. */
     irq_2_pin = xmalloc_array(struct irq_pin_list, PIN_MAP_SIZE);
-    memset(irq_2_pin, 0, nr_irqs_gsi * sizeof(*irq_2_pin));
+    memset(irq_2_pin, 0, PIN_MAP_SIZE * sizeof(*irq_2_pin));
+    pin_irq_map = xmalloc_array(int, nr_irqs_gsi);
+    memset(pin_irq_map, 0, nr_irqs_gsi * sizeof(int));
+        
     for (i = 0; i < PIN_MAP_SIZE; i++)
         irq_2_pin[i].pin = -1;
     for (i = irq_2_pin_free_entry = nr_irqs_gsi; i < PIN_MAP_SIZE; i++)
         irq_2_pin[i].next = i + 1;
+    for (i = 0; i < nr_irqs_gsi; i++)
+        pin_irq_map[i] = -1;
 
     for(apic = 0; apic < nr_ioapics; apic++) {
         int pin;
@@ -1266,7 +1437,11 @@ static unsigned int startup_edge_ioapic_
  */
 static void ack_edge_ioapic_irq(unsigned int irq)
 {
-    if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
+    struct irq_desc *desc = irq_to_desc(irq);
+    
+    irq_complete_move(&desc);
+
+    if ((desc->status & (IRQ_PENDING | IRQ_DISABLED))
         == (IRQ_PENDING | IRQ_DISABLED))
         mask_IO_APIC_irq(irq);
     ack_APIC_irq();
@@ -1309,6 +1484,9 @@ static void mask_and_ack_level_ioapic_ir
 {
     unsigned long v;
     int i;
+    struct irq_desc *desc = irq_to_desc(irq);
+
+    irq_complete_move(&desc);
 
     if ( ioapic_ack_new )
         return;
@@ -1446,6 +1624,8 @@ static void ack_msi_irq(unsigned int irq
 {
     struct irq_desc *desc = irq_to_desc(irq);
 
+    irq_complete_move(&desc);
+
     if ( msi_maskable_irq(desc->msi_desc) )
         ack_APIC_irq(); /* ACKTYPE_NONE */
 }
@@ -1597,7 +1777,7 @@ static inline void check_timer(void)
 static inline void check_timer(void)
 {
     int apic1, pin1, apic2, pin2;
-    int vector;
+    int vector, ret;
     unsigned long flags;
 
     local_irq_save(flags);
@@ -1606,8 +1786,12 @@ static inline void check_timer(void)
      * get/set the timer IRQ vector:
      */
     disable_8259A_irq(0);
-    vector = assign_irq_vector(0);
-
+    vector = FIRST_HIPRIORITY_VECTOR;
+    clear_irq_vector(0);
+
+    if ((ret = bind_irq_vector(0, vector, (cpumask_t)CPU_MASK_ALL)))
+        printk(KERN_ERR"..IRQ0 is not set correctly with ioapic!!!, err:%d\n", 
ret);
+    
     irq_desc[0].depth  = 0;
     irq_desc[0].status &= ~IRQ_DISABLED;
     irq_desc[0].handler = &ioapic_edge_type;
@@ -1914,6 +2098,7 @@ int io_apic_set_pci_routing (int ioapic,
 {
     struct IO_APIC_route_entry entry;
     unsigned long flags;
+    int vector;
 
     if (!IO_APIC_IRQ(irq)) {
         printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
@@ -1942,7 +2127,10 @@ int io_apic_set_pci_routing (int ioapic,
     if (irq >= 16)
         add_pin_to_irq(irq, ioapic, pin);
 
-    entry.vector = assign_irq_vector(irq);
+    vector = assign_irq_vector(irq);
+    if (vector < 0)
+        return vector;
+    entry.vector = vector;
 
     apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
                "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
@@ -2014,7 +2202,6 @@ int ioapic_guest_write(unsigned long phy
 
     /* Write first half from guest; second half is target info. */
     *(u32 *)&new_rte = val;
-    new_rte.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
 
     /*
      * What about weird destination types?
@@ -2060,10 +2247,10 @@ int ioapic_guest_write(unsigned long phy
     }
 
     if ( old_rte.vector >= FIRST_DYNAMIC_VECTOR )
-        old_irq = vector_irq[old_rte.vector];
-
-    if ( new_rte.vector >= FIRST_DYNAMIC_VECTOR )
-        new_irq = vector_irq[new_rte.vector];
+        old_irq = get_irq_from_apic_pin(apic, pin);
+
+    /* FIXME: dirty hack to support per-cpu vector. */
+    new_irq = new_rte.vector;
 
     if ( (old_irq != new_irq) && (old_irq >= 0) && IO_APIC_IRQ(old_irq) )
     {
@@ -2096,6 +2283,8 @@ int ioapic_guest_write(unsigned long phy
 
         /* Mask iff level triggered. */
         new_rte.mask = new_rte.trigger;
+        /* Set the vector field to the real vector! */
+        new_rte.vector = irq_cfg[new_irq].vector;
     }
     else if ( !new_rte.mask )
     {
@@ -2104,6 +2293,8 @@ int ioapic_guest_write(unsigned long phy
         new_rte.mask = 1;
     }
 
+    new_rte.dest.logical.logical_dest = 
+    cpu_mask_to_apicid(irq_cfg[new_irq].domain);
 
     io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&new_rte) + 0));
     io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&new_rte) + 1));
@@ -2144,11 +2335,12 @@ void dump_ioapic_irq_info(void)
 
             printk("vector=%u, delivery_mode=%u, dest_mode=%s, "
                    "delivery_status=%d, polarity=%d, irr=%d, "
-                   "trigger=%s, mask=%d\n",
+                   "trigger=%s, mask=%d, dest_id:%d\n",
                    rte.vector, rte.delivery_mode,
                    rte.dest_mode ? "logical" : "physical",
                    rte.delivery_status, rte.polarity, rte.irr,
-                   rte.trigger ? "level" : "edge", rte.mask);
+                   rte.trigger ? "level" : "edge", rte.mask,
+                   rte.dest.logical.logical_dest);
 
             if ( entry->next == 0 )
                 break;
diff -r 722c7e94e764 -r d33e9aae74c6 xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c        Wed Aug 19 12:53:04 2009 +0100
+++ b/xen/arch/x86/irq.c        Wed Aug 19 12:53:46 2009 +0100
@@ -20,6 +20,7 @@
 #include <asm/msi.h>
 #include <asm/current.h>
 #include <asm/flushtlb.h>
+#include <asm/mach-generic/mach_apic.h>
 #include <public/physdev.h>
 
 /* opt_noirqbalance: If true, software IRQ balancing/affinity is disabled. */
@@ -38,12 +39,71 @@ int __read_mostly *irq_status = NULL;
 #define IRQ_USED        (1)
 #define IRQ_RSVD        (2)
 
+#define IRQ_VECTOR_UNASSIGNED (0)
+
+DECLARE_BITMAP(used_vectors, NR_VECTORS);
+
+struct irq_cfg __read_mostly *irq_cfg = NULL;
+
 static struct timer *irq_guest_eoi_timer;
 
 static DEFINE_SPINLOCK(vector_lock);
-int vector_irq[NR_VECTORS] __read_mostly = {
-    [0 ... NR_VECTORS - 1] = FREE_TO_ASSIGN_IRQ
+
+DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
+    [0 ... NR_VECTORS - 1] = -1
 };
+
+DEFINE_PER_CPU(struct cpu_user_regs *, __irq_regs);
+
+void lock_vector_lock(void)
+{
+    /* Used to the online set of cpus does not change
+     * during assign_irq_vector.
+     */
+    spin_lock(&vector_lock);
+}
+
+void unlock_vector_lock(void)
+{
+    spin_unlock(&vector_lock);
+}
+
+static int __bind_irq_vector(int irq, int vector, cpumask_t domain)
+{
+    cpumask_t mask;
+    int cpu;
+    struct irq_cfg *cfg = irq_cfg(irq);
+
+    BUG_ON((unsigned)irq >= nr_irqs);
+    BUG_ON((unsigned)vector >= NR_VECTORS);
+
+    cpus_and(mask, domain, cpu_online_map);
+    if (cpus_empty(mask))
+        return -EINVAL;
+    if ((cfg->vector == vector) && cpus_equal(cfg->domain, domain))
+        return 0;
+    if (cfg->vector != IRQ_VECTOR_UNASSIGNED) 
+        return -EBUSY;
+    for_each_cpu_mask(cpu, mask)
+        per_cpu(vector_irq, cpu)[vector] = irq;
+    cfg->vector = vector;
+    cfg->domain = domain;
+    irq_status[irq] = IRQ_USED;
+    if (IO_APIC_IRQ(irq))
+        irq_vector[irq] = vector;
+    return 0;
+}
+
+int bind_irq_vector(int irq, int vector, cpumask_t domain)
+{
+    unsigned long flags;
+    int ret;
+
+    spin_lock_irqsave(&vector_lock, flags);
+    ret = __bind_irq_vector(irq, vector, domain);
+    spin_unlock_irqrestore(&vector_lock, flags);
+    return ret;
+}
 
 static inline int find_unassigned_irq(void)
 {
@@ -69,7 +129,7 @@ int create_irq(void)
     irq = find_unassigned_irq();
     if (irq < 0)
          goto out;
-    ret = __assign_irq_vector(irq);
+    ret = __assign_irq_vector(irq, irq_cfg(irq), TARGET_CPUS);
     if (ret < 0)
         irq = ret;
 out:
@@ -81,8 +141,8 @@ void dynamic_irq_cleanup(unsigned int ir
 void dynamic_irq_cleanup(unsigned int irq)
 {
     struct irq_desc *desc = irq_to_desc(irq);
+    unsigned long flags;
     struct irqaction *action;
-    unsigned long flags;
 
     spin_lock_irqsave(&desc->lock, flags);
     desc->status  |= IRQ_DISABLED;
@@ -102,12 +162,39 @@ void dynamic_irq_cleanup(unsigned int ir
         xfree(action);
 }
 
+static void init_one_irq_status(int irq);
+
 static void __clear_irq_vector(int irq)
 {
-    int vector = irq_vector[irq];
-    vector_irq[vector] = FREE_TO_ASSIGN_IRQ;
-    irq_vector[irq] = 0;
-    irq_status[irq] = IRQ_UNUSED;
+    int cpu, vector;
+    cpumask_t tmp_mask;
+    struct irq_cfg *cfg = irq_cfg(irq);
+
+    BUG_ON(!cfg->vector);
+
+    vector = cfg->vector;
+    cpus_and(tmp_mask, cfg->domain, cpu_online_map);
+
+    for_each_cpu_mask(cpu, tmp_mask)
+        per_cpu(vector_irq, cpu)[vector] = -1;
+
+    cfg->vector = IRQ_VECTOR_UNASSIGNED;
+    cpus_clear(cfg->domain);
+    init_one_irq_status(irq);
+
+    if (likely(!cfg->move_in_progress))
+        return;
+    for_each_cpu_mask(cpu, tmp_mask) {
+        for (vector = FIRST_DYNAMIC_VECTOR; vector <= LAST_DYNAMIC_VECTOR;
+                                vector++) {
+            if (per_cpu(vector_irq, cpu)[vector] != irq)
+                continue;
+            per_cpu(vector_irq, cpu)[vector] = -1;
+             break;
+        }
+     }
+
+    cfg->move_in_progress = 0;
 }
 
 void clear_irq_vector(int irq)
@@ -121,6 +208,7 @@ void clear_irq_vector(int irq)
 
 void destroy_irq(unsigned int irq)
 {
+    BUG_ON(!MSI_IRQ(irq));
     dynamic_irq_cleanup(irq);
     clear_irq_vector(irq);
 }
@@ -128,12 +216,16 @@ int irq_to_vector(int irq)
 int irq_to_vector(int irq)
 {
     int vector = -1;
+    struct irq_cfg *cfg;
 
     BUG_ON(irq >= nr_irqs || irq < 0);
 
-    if (IO_APIC_IRQ(irq) || MSI_IRQ(irq))
+    if (IO_APIC_IRQ(irq))
         vector = irq_vector[irq];
-    else
+    else if(MSI_IRQ(irq)) {
+        cfg = irq_cfg(irq);
+        vector = cfg->vector;
+    } else
         vector = LEGACY_VECTOR(irq);
 
     return vector;
@@ -141,13 +233,13 @@ int irq_to_vector(int irq)
 
 static void init_one_irq_desc(struct irq_desc *desc)
 {
-        desc->status  = IRQ_DISABLED;
-        desc->handler = &no_irq_type;
-        desc->action  = NULL;
-        desc->depth   = 1;
-        desc->msi_desc = NULL;
-        spin_lock_init(&desc->lock);
-        cpus_setall(desc->affinity);
+    desc->status  = IRQ_DISABLED;
+    desc->handler = &no_irq_type;
+    desc->action  = NULL;
+    desc->depth   = 1;
+    desc->msi_desc = NULL;
+    spin_lock_init(&desc->lock);
+    cpus_setall(desc->affinity);
 }
 
 static void init_one_irq_status(int irq)
@@ -155,30 +247,51 @@ static void init_one_irq_status(int irq)
     irq_status[irq] = IRQ_UNUSED;
 }
 
+static void init_one_irq_cfg(struct irq_cfg *cfg)
+{
+    cfg->vector = IRQ_VECTOR_UNASSIGNED;
+    cpus_clear(cfg->domain);
+    cpus_clear(cfg->old_domain);
+}
+
 int init_irq_data(void)
 {
     struct irq_desc *desc;
+    struct irq_cfg *cfg;
     int irq;
 
     irq_desc = xmalloc_array(struct irq_desc, nr_irqs);
+    irq_cfg = xmalloc_array(struct irq_cfg, nr_irqs);
     irq_status = xmalloc_array(int, nr_irqs);
     irq_guest_eoi_timer = xmalloc_array(struct timer, nr_irqs);
-    irq_vector = xmalloc_array(u8, nr_irqs);
+    irq_vector = xmalloc_array(u8, nr_irqs_gsi);
     
-    if (!irq_desc || !irq_status ||! irq_vector || !irq_guest_eoi_timer)
-        return -1;
+    if (!irq_desc || !irq_cfg || !irq_status ||! irq_vector ||
+        !irq_guest_eoi_timer)
+        return -ENOMEM;
 
     memset(irq_desc, 0,  nr_irqs * sizeof(*irq_desc));
+    memset(irq_cfg, 0,  nr_irqs * sizeof(*irq_cfg));
     memset(irq_status, 0,  nr_irqs * sizeof(*irq_status));
-    memset(irq_vector, 0, nr_irqs * sizeof(*irq_vector));
+    memset(irq_vector, 0, nr_irqs_gsi * sizeof(*irq_vector));
     memset(irq_guest_eoi_timer, 0, nr_irqs * sizeof(*irq_guest_eoi_timer));
     
     for (irq = 0; irq < nr_irqs; irq++) {
         desc = irq_to_desc(irq);
+        cfg = irq_cfg(irq);
         desc->irq = irq;
+        desc->chip_data = cfg;
         init_one_irq_desc(desc);
+        init_one_irq_cfg(cfg);
         init_one_irq_status(irq);
     }
+
+    /* Never allocate the hypercall vector or Linux/BSD fast-trap vector. */
+    set_bit(LEGACY_SYSCALL_VECTOR, used_vectors);
+    set_bit(HYPERCALL_VECTOR, used_vectors);
+    
+    /* IRQ_MOVE_CLEANUP_VECTOR used for clean up vectors */
+    set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
 
     return 0;
 }
@@ -210,54 +323,133 @@ struct hw_interrupt_type no_irq_type = {
 
 atomic_t irq_err_count;
 
-int __assign_irq_vector(int irq)
-{
-    static unsigned current_vector = FIRST_DYNAMIC_VECTOR;
-    unsigned vector;
-
-    BUG_ON(irq >= nr_irqs || irq < 0);
-
-    if ((irq_to_vector(irq) > 0)) 
-        return irq_to_vector(irq);
-
-    vector = current_vector;
-    while (vector_irq[vector] != FREE_TO_ASSIGN_IRQ) {
+int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
+{
+    /*
+     * NOTE! The local APIC isn't very good at handling
+     * multiple interrupts at the same interrupt level.
+     * As the interrupt level is determined by taking the
+     * vector number and shifting that right by 4, we
+     * want to spread these out a bit so that they don't
+     * all fall in the same interrupt level.
+     *
+     * Also, we've got to be careful not to trash gate
+     * 0x80, because int 0x80 is hm, kind of importantish. ;)
+     */
+    static int current_vector = FIRST_DYNAMIC_VECTOR, current_offset = 0;
+    unsigned int old_vector;
+    int cpu, err;
+    cpumask_t tmp_mask;
+
+    if ((cfg->move_in_progress) || cfg->move_cleanup_count)
+        return -EBUSY;
+
+    old_vector = irq_to_vector(irq);
+    if (old_vector) {
+        cpus_and(tmp_mask, mask, cpu_online_map);
+        cpus_and(tmp_mask, cfg->domain, tmp_mask);
+        if (!cpus_empty(tmp_mask)) {
+            cfg->vector = old_vector;
+            return 0;
+        }
+    }
+
+    /* Only try and allocate irqs on cpus that are present */
+    cpus_and(mask, mask, cpu_online_map);
+
+    err = -ENOSPC;
+    for_each_cpu_mask(cpu, mask) {
+        int new_cpu;
+        int vector, offset;
+
+        tmp_mask = vector_allocation_domain(cpu);
+        cpus_and(tmp_mask, tmp_mask, cpu_online_map);
+
+        vector = current_vector;
+        offset = current_offset;
+next:
         vector += 8;
-        if (vector > LAST_DYNAMIC_VECTOR)
-            vector = FIRST_DYNAMIC_VECTOR + ((vector + 1) & 7);
-
-        if (vector == current_vector)
-            return -ENOSPC;
-    }
-
-    current_vector = vector;
-    vector_irq[vector] = irq;
-    irq_vector[irq] = vector;
-    irq_status[irq] = IRQ_USED;
-
-    return vector;
+        if (vector > LAST_DYNAMIC_VECTOR) {
+            /* If out of vectors on large boxen, must share them. */
+            offset = (offset + 1) % 8;
+            vector = FIRST_DYNAMIC_VECTOR + offset;
+        }
+        if (unlikely(current_vector == vector))
+            continue;
+
+        if (test_bit(vector, used_vectors))
+            goto next;
+
+        for_each_cpu_mask(new_cpu, tmp_mask)
+            if (per_cpu(vector_irq, new_cpu)[vector] != -1)
+                goto next;
+        /* Found one! */
+        current_vector = vector;
+        current_offset = offset;
+        if (old_vector) {
+            cfg->move_in_progress = 1;
+            cpus_copy(cfg->old_domain, cfg->domain);
+        }
+        for_each_cpu_mask(new_cpu, tmp_mask)
+            per_cpu(vector_irq, new_cpu)[vector] = irq;
+        cfg->vector = vector;
+        cpus_copy(cfg->domain, tmp_mask);
+
+        irq_status[irq] = IRQ_USED;
+            if (IO_APIC_IRQ(irq))
+                    irq_vector[irq] = vector;
+        err = 0;
+        break;
+    }
+    return err;
 }
 
 int assign_irq_vector(int irq)
 {
     int ret;
     unsigned long flags;
+    struct irq_cfg *cfg = &irq_cfg[irq];
     
+    BUG_ON(irq >= nr_irqs || irq <0);
+
     spin_lock_irqsave(&vector_lock, flags);
-    ret = __assign_irq_vector(irq);
+    ret = __assign_irq_vector(irq, cfg, TARGET_CPUS);
+    if (!ret)
+        ret = cfg->vector;
     spin_unlock_irqrestore(&vector_lock, flags);
-
     return ret;
 }
 
+/*
+ * Initialize vector_irq on a new cpu. This function must be called
+ * with vector_lock held.
+ */
+void __setup_vector_irq(int cpu)
+{
+    int irq, vector;
+    struct irq_cfg *cfg;
+
+    /* Clear vector_irq */
+    for (vector = 0; vector < NR_VECTORS; ++vector)
+        per_cpu(vector_irq, cpu)[vector] = -1;
+    /* Mark the inuse vectors */
+    for (irq = 0; irq < nr_irqs; ++irq) {
+        cfg = irq_cfg(irq);
+        if (!cpu_isset(cpu, cfg->domain))
+            continue;
+        vector = irq_to_vector(irq);
+        per_cpu(vector_irq, cpu)[vector] = irq;
+    }
+}
 
 asmlinkage void do_IRQ(struct cpu_user_regs *regs)
 {
     struct irqaction *action;
     uint32_t          tsc_in;
+    struct irq_desc  *desc;
     unsigned int      vector = regs->entry_vector;
-    int irq = vector_irq[vector];
-    struct irq_desc  *desc;
+    int irq = __get_cpu_var(vector_irq[vector]);
+    struct cpu_user_regs *old_regs = set_irq_regs(regs);
     
     perfc_incr(irqs);
 
@@ -265,6 +457,7 @@ asmlinkage void do_IRQ(struct cpu_user_r
         ack_APIC_irq();
         printk("%s: %d.%d No irq handler for vector (irq %d)\n",
                 __func__, smp_processor_id(), vector, irq);
+        set_irq_regs(old_regs);
         return;
     }
 
@@ -281,6 +474,7 @@ asmlinkage void do_IRQ(struct cpu_user_r
         TRACE_3D(TRC_TRACE_IRQ, irq, tsc_in, get_cycles());
         irq_exit();
         spin_unlock(&desc->lock);
+        set_irq_regs(old_regs);
         return;
     }
 
@@ -314,6 +508,7 @@ asmlinkage void do_IRQ(struct cpu_user_r
  out:
     desc->handler->end(irq);
     spin_unlock(&desc->lock);
+    set_irq_regs(old_regs);
 }
 
 int request_irq(unsigned int irq,
@@ -412,6 +607,7 @@ typedef struct {
 #define ACKTYPE_UNMASK 1     /* Unmask PIC hardware (from any CPU)   */
 #define ACKTYPE_EOI    2     /* EOI on the CPU that was interrupted  */
     cpumask_t cpu_eoi_map;   /* CPUs that need to EOI this interrupt */
+    u8 eoi_vector;           /* vector awaiting the EOI*/
     struct domain *guest[IRQ_MAX_GUESTS];
 } irq_guest_action_t;
 
@@ -472,7 +668,7 @@ static void __do_IRQ_guest(int irq)
     struct domain      *d;
     int                 i, sp, already_pending = 0;
     struct pending_eoi *peoi = this_cpu(pending_eoi);
-    int vector = irq_to_vector(irq);
+    int vector = get_irq_regs()->entry_vector;
 
     if ( unlikely(action->nr_guests == 0) )
     {
@@ -492,6 +688,7 @@ static void __do_IRQ_guest(int irq)
         peoi[sp].ready = 0;
         pending_eoi_sp(peoi) = sp+1;
         cpu_set(smp_processor_id(), action->cpu_eoi_map);
+        action->eoi_vector = vector;
     }
 
     for ( i = 0; i < action->nr_guests; i++ )
@@ -583,7 +780,8 @@ static void flush_ready_eoi(void)
 
     while ( (--sp >= 0) && peoi[sp].ready )
     {
-        irq = vector_irq[peoi[sp].vector];
+        irq = __get_cpu_var(vector_irq[peoi[sp].vector]);
+        ASSERT(irq > 0);
         desc = irq_to_desc(irq);
         spin_lock(&desc->lock);
         desc->handler->end(irq);
@@ -607,9 +805,10 @@ static void __set_eoi_ready(struct irq_d
         return;
 
     sp = pending_eoi_sp(peoi);
+
     do {
         ASSERT(sp > 0);
-    } while ( peoi[--sp].vector != irq_to_vector(irq) );
+    } while ( peoi[--sp].vector != action->eoi_vector );
     ASSERT(!peoi[sp].ready);
     peoi[sp].ready = 1;
 }
@@ -1233,57 +1432,58 @@ extern void dump_ioapic_irq_info(void);
 
 static void dump_irqs(unsigned char key)
 {
-    int i, glob_irq, irq, vector;
+    int i, irq, pirq;
     struct irq_desc *desc;
+    struct irq_cfg *cfg;
     irq_guest_action_t *action;
     struct domain *d;
     unsigned long flags;
 
     printk("Guest interrupt information:\n");
 
-    for ( vector = 0; vector < NR_VECTORS; vector++ )
-    {
-
-        glob_irq = vector_to_irq(vector);
-        if (glob_irq < 0)
+    for ( irq = 0; irq < nr_irqs; irq++ )
+    {
+
+        desc = irq_to_desc(irq);
+        cfg = desc->chip_data;
+
+        if ( !desc->handler || desc->handler == &no_irq_type )
             continue;
 
-        desc = irq_to_desc(glob_irq);
-        if ( desc == NULL || desc->handler == &no_irq_type )
-            continue;
-
         spin_lock_irqsave(&desc->lock, flags);
 
         if ( !(desc->status & IRQ_GUEST) )
-            printk("   Vec%3d IRQ%3d: type=%-15s status=%08x "
-                   "mapped, unbound\n",
-                   vector, glob_irq, desc->handler->typename, desc->status);
+            /* Only show CPU0 - CPU31's affinity info.*/
+            printk("   IRQ:%4d, IRQ affinity:0x%08x, Vec:%3d type=%-15s"
+                    " status=%08x mapped, unbound\n",
+                   irq, *(int*)cfg->domain.bits, cfg->vector,
+                    desc->handler->typename, desc->status);
         else
         {
             action = (irq_guest_action_t *)desc->action;
 
-            printk("   Vec%3d IRQ%3d: type=%-15s status=%08x "
-                   "in-flight=%d domain-list=",
-                   vector, glob_irq, desc->handler->typename,
-                   desc->status, action->in_flight);
+            printk("   IRQ:%4d, IRQ affinity:0x%08x, Vec:%3d type=%-15s "
+                    "status=%08x in-flight=%d domain-list=",
+                   irq, *(int*)cfg->domain.bits, cfg->vector,
+                   desc->handler->typename, desc->status, action->in_flight);
 
             for ( i = 0; i < action->nr_guests; i++ )
             {
                 d = action->guest[i];
-                irq = domain_irq_to_pirq(d, vector_irq[vector]);
+                pirq = domain_irq_to_pirq(d, irq);
                 printk("%u:%3d(%c%c%c%c)",
-                       d->domain_id, irq,
-                       (test_bit(d->pirq_to_evtchn[glob_irq],
+                       d->domain_id, pirq,
+                       (test_bit(d->pirq_to_evtchn[pirq],
                                  &shared_info(d, evtchn_pending)) ?
                         'P' : '-'),
-                       (test_bit(d->pirq_to_evtchn[glob_irq] /
+                       (test_bit(d->pirq_to_evtchn[pirq] /
                                  BITS_PER_EVTCHN_WORD(d),
                                  &vcpu_info(d->vcpu[0], evtchn_pending_sel)) ?
                         'S' : '-'),
-                       (test_bit(d->pirq_to_evtchn[glob_irq],
+                       (test_bit(d->pirq_to_evtchn[pirq],
                                  &shared_info(d, evtchn_mask)) ?
                         'M' : '-'),
-                       (test_bit(glob_irq, d->pirq_mask) ?
+                       (test_bit(pirq, d->pirq_mask) ?
                         'M' : '-'));
                 if ( i != action->nr_guests )
                     printk(",");
@@ -1315,53 +1515,69 @@ __initcall(setup_dump_irqs);
 #include <asm/mach-generic/mach_apic.h>
 #include <xen/delay.h>
 
-void fixup_irqs(cpumask_t map)
-{
-    unsigned int vector, sp;
+/* A cpu has been removed from cpu_online_mask.  Re-set irq affinities. */
+void fixup_irqs(void)
+{
+    unsigned int irq, sp;
     static int warned;
+    struct irq_desc *desc;
     irq_guest_action_t *action;
     struct pending_eoi *peoi;
-    irq_desc_t         *desc;
-    unsigned long       flags;
-
-    /* Direct all future interrupts away from this CPU. */
-    for ( vector = 0; vector < NR_VECTORS; vector++ )
-    {
-        cpumask_t mask;
-        if ( vector_to_irq(vector) == 2 )
+    for(irq = 0; irq < nr_irqs; irq++ ) {
+        int break_affinity = 0;
+        int set_affinity = 1;
+        cpumask_t affinity;
+        if (irq == 2)
             continue;
-
-        desc = irq_to_desc(vector_to_irq(vector));
-
-        spin_lock_irqsave(&desc->lock, flags);
-
-        cpus_and(mask, desc->affinity, map);
-        if ( any_online_cpu(mask) == NR_CPUS )
+        desc = irq_to_desc(irq);
+        /* interrupt's are disabled at this point */
+        spin_lock(&desc->lock);
+
+        affinity = desc->affinity;
+        if (!desc->action ||
+            cpus_equal(affinity, cpu_online_map)) {
+            spin_unlock(&desc->lock);
+            continue;
+        }
+
+        cpus_and(affinity, affinity, cpu_online_map);
+        if ( any_online_cpu(affinity) == NR_CPUS )
         {
-            printk("Breaking affinity for vector %u (irq %i)\n",
-                   vector, vector_to_irq(vector));
-            mask = map;
+            break_affinity = 1;
+            affinity = cpu_online_map;
         }
-        if ( desc->handler->set_affinity )
-            desc->handler->set_affinity(vector, mask);
-        else if ( desc->action && !(warned++) )
-            printk("Cannot set affinity for vector %u (irq %i)\n",
-                   vector, vector_to_irq(vector));
-
-        spin_unlock_irqrestore(&desc->lock, flags);
-    }
-
-    /* Service any interrupts that beat us in the re-direction race. */
+
+        if (desc->handler->disable)
+            desc->handler->disable(irq);
+
+        if (desc->handler->set_affinity)
+            desc->handler->set_affinity(irq, affinity);
+        else if (!(warned++))
+            set_affinity = 0;
+
+        if (desc->handler->enable)
+            desc->handler->enable(irq);
+
+        spin_unlock(&desc->lock);
+
+        if (break_affinity && set_affinity)
+            printk("Broke affinity for irq %i\n", irq);
+        else if (!set_affinity)
+            printk("Cannot set affinity for irq %i\n", irq);
+    }
+
+    /* That doesn't seem sufficient.  Give it 1ms. */
     local_irq_enable();
     mdelay(1);
     local_irq_disable();
 
     /* Clean up cpu_eoi_map of every interrupt to exclude this CPU. */
-    for ( vector = 0; vector < NR_VECTORS; vector++ )
-    {
-        if ( !(irq_desc[vector_to_irq(vector)].status & IRQ_GUEST) )
+    for ( irq = 0; irq < nr_irqs; irq++ )
+    {
+        desc = irq_to_desc(irq);
+        if ( !(desc->status & IRQ_GUEST) )
             continue;
-        action = (irq_guest_action_t *)irq_desc[vector_to_irq(vector)].action;
+        action = (irq_guest_action_t *)desc->action;
         cpu_clear(smp_processor_id(), action->cpu_eoi_map);
     }
 
diff -r 722c7e94e764 -r d33e9aae74c6 xen/arch/x86/msi.c
--- a/xen/arch/x86/msi.c        Wed Aug 19 12:53:04 2009 +0100
+++ b/xen/arch/x86/msi.c        Wed Aug 19 12:53:46 2009 +0100
@@ -120,13 +120,19 @@ void msi_compose_msg(struct pci_dev *pde
                             struct msi_msg *msg)
 {
     unsigned dest;
-    cpumask_t tmp;
-    int vector = irq_to_vector(irq);
-
-    tmp = TARGET_CPUS;
-    if ( vector )
-    {
-        dest = cpu_mask_to_apicid(tmp);
+    cpumask_t domain;
+    struct irq_cfg *cfg = irq_cfg(irq);
+    int vector = cfg->vector;
+    domain = cfg->domain;
+
+    if ( cpus_empty( domain ) ) {
+        dprintk(XENLOG_ERR,"%s, compose msi message error!!\n", __func__);
+           return;
+    }
+
+    if ( vector ) {
+
+        dest = cpu_mask_to_apicid(domain);
 
         msg->address_hi = MSI_ADDR_BASE_HI;
         msg->address_lo =
@@ -274,11 +280,23 @@ static void write_msi_msg(struct msi_des
 
 void set_msi_affinity(unsigned int irq, cpumask_t mask)
 {
-    struct msi_desc *desc = irq_desc[irq].msi_desc;
     struct msi_msg msg;
     unsigned int dest;
+    struct irq_desc *desc = irq_to_desc(irq);
+    struct msi_desc *msi_desc = desc->msi_desc;
+    struct irq_cfg *cfg = desc->chip_data;
+
+    dest = set_desc_affinity(desc, mask);
+    if (dest == BAD_APICID || !msi_desc)
+        return;
+
+    ASSERT(spin_is_locked(&desc->lock));
 
     memset(&msg, 0, sizeof(msg));
+    read_msi_msg(msi_desc, &msg);
+
+    msg.data &= ~MSI_DATA_VECTOR_MASK;
+    msg.data |= MSI_DATA_VECTOR(cfg->vector);
     cpus_and(mask, mask, cpu_online_map);
     if ( cpus_empty(mask) )
         mask = TARGET_CPUS;
@@ -287,13 +305,16 @@ void set_msi_affinity(unsigned int irq, 
     if ( !desc )
         return;
 
-    ASSERT(spin_is_locked(&irq_desc[irq].lock));
-    read_msi_msg(desc, &msg);
+    ASSERT(spin_is_locked(&desc->lock));
+    read_msi_msg(msi_desc, &msg);
+
+    msg.data &= ~MSI_DATA_VECTOR_MASK;
+    msg.data |= MSI_DATA_VECTOR(cfg->vector);
 
     msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
     msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
-    write_msi_msg(desc, &msg);
+    write_msi_msg(msi_desc, &msg);
 }
 
 static void msi_set_enable(struct pci_dev *dev, int enable)
diff -r 722c7e94e764 -r d33e9aae74c6 xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c    Wed Aug 19 12:53:04 2009 +0100
+++ b/xen/arch/x86/physdev.c    Wed Aug 19 12:53:46 2009 +0100
@@ -329,6 +329,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
 
     case PHYSDEVOP_alloc_irq_vector: {
         struct physdev_irq irq_op;
+        int vector;
 
         ret = -EFAULT;
         if ( copy_from_guest(&irq_op, arg, 1) != 0 )
@@ -344,8 +345,16 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
 
         irq = irq_op.irq;
         ret = -EINVAL;
-
-        irq_op.vector = assign_irq_vector(irq);
+        
+        /* FIXME: Once dom0 breaks GSI IRQ limit, it is
+            a must to eliminate the limit here */
+        BUG_ON(irq >= 256);
+        
+        vector = assign_irq_vector(irq);
+        if (vector >= FIRST_DYNAMIC_VECTOR)
+            irq_op.vector = irq;
+        else
+            irq_op.vector = -ENOSPC;
 
         spin_lock(&pcidevs_lock);
         spin_lock(&dom0->event_lock);
diff -r 722c7e94e764 -r d33e9aae74c6 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Wed Aug 19 12:53:04 2009 +0100
+++ b/xen/arch/x86/setup.c      Wed Aug 19 12:53:46 2009 +0100
@@ -921,9 +921,9 @@ void __init __start_xen(unsigned long mb
 
     init_apic_mappings();
 
+    percpu_init_areas();
+
     init_IRQ();
-    
-    percpu_init_areas();
 
     xsm_init(&initrdidx, mbi, initial_images_start);
 
diff -r 722c7e94e764 -r d33e9aae74c6 xen/arch/x86/smp.c
--- a/xen/arch/x86/smp.c        Wed Aug 19 12:53:04 2009 +0100
+++ b/xen/arch/x86/smp.c        Wed Aug 19 12:53:46 2009 +0100
@@ -26,7 +26,11 @@
  * send_IPI_mask(cpumask, vector): sends @vector IPI to CPUs in @cpumask,
  * excluding the local CPU. @cpumask may be empty.
  */
-#define send_IPI_mask (genapic->send_IPI_mask)
+
+void send_IPI_mask(const cpumask_t *mask, int vector)
+{
+    genapic->send_IPI_mask(mask, vector);
+}
 
 /*
  *     Some notes on x86 processor bugs affecting SMP operation:
@@ -89,6 +93,41 @@ void apic_wait_icr_idle(void)
         cpu_relax();
 }
 
+static void __default_send_IPI_shortcut(unsigned int shortcut, int vector,
+                                    unsigned int dest)
+{
+    unsigned int cfg;
+
+    /*
+     * Wait for idle.
+     */
+    apic_wait_icr_idle();
+
+    /*
+     * prepare target chip field
+     */
+    cfg = __prepare_ICR(shortcut, vector) | dest;
+    /*
+     * Send the IPI. The write to APIC_ICR fires this off.
+     */
+    apic_write_around(APIC_ICR, cfg);
+}
+
+void send_IPI_self_flat(int vector)
+{
+    __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
+}
+
+void send_IPI_self_phys(int vector)
+{
+    __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
+}
+
+void send_IPI_self_x2apic(int vector)
+{
+    apic_write(APIC_SELF_IPI, vector);    
+}
+
 void send_IPI_mask_flat(const cpumask_t *cpumask, int vector)
 {
     unsigned long mask = cpus_addr(*cpumask)[0];
@@ -337,8 +376,10 @@ void smp_send_nmi_allbutself(void)
 
 fastcall void smp_event_check_interrupt(struct cpu_user_regs *regs)
 {
+    struct cpu_user_regs *old_regs = set_irq_regs(regs);
     ack_APIC_irq();
     perfc_incr(ipis);
+    set_irq_regs(old_regs);
 }
 
 static void __smp_call_function_interrupt(void)
@@ -369,7 +410,10 @@ static void __smp_call_function_interrup
 
 fastcall void smp_call_function_interrupt(struct cpu_user_regs *regs)
 {
+    struct cpu_user_regs *old_regs = set_irq_regs(regs);
+
     ack_APIC_irq();
     perfc_incr(ipis);
     __smp_call_function_interrupt();
-}
+    set_irq_regs(old_regs);
+}
diff -r 722c7e94e764 -r d33e9aae74c6 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c    Wed Aug 19 12:53:04 2009 +0100
+++ b/xen/arch/x86/smpboot.c    Wed Aug 19 12:53:46 2009 +0100
@@ -512,7 +512,12 @@ void __devinit start_secondary(void *unu
        set_cpu_sibling_map(raw_smp_processor_id());
        wmb();
 
+    /* Initlize vector_irq for BSPs */
+    lock_vector_lock();
+    __setup_vector_irq(smp_processor_id());
        cpu_set(smp_processor_id(), cpu_online_map);
+    unlock_vector_lock();
+
        per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
 
        init_percpu_time();
@@ -1232,10 +1237,9 @@ remove_siblinginfo(int cpu)
        cpu_clear(cpu, cpu_sibling_setup_map);
 }
 
-extern void fixup_irqs(cpumask_t map);
+extern void fixup_irqs(void);
 int __cpu_disable(void)
 {
-       cpumask_t map = cpu_online_map;
        int cpu = smp_processor_id();
 
        /*
@@ -1262,8 +1266,8 @@ int __cpu_disable(void)
 
        remove_siblinginfo(cpu);
 
-       cpu_clear(cpu, map);
-       fixup_irqs(map);
+       cpu_clear(cpu, cpu_online_map);
+       fixup_irqs();
        /* It's now safe to remove this processor from the online map */
        cpu_clear(cpu, cpu_online_map);
 
@@ -1477,14 +1481,13 @@ void __init smp_cpus_done(unsigned int m
 
 void __init smp_intr_init(void)
 {
-       int irq, seridx;
+       int irq, seridx, cpu = smp_processor_id();
 
        /*
         * IRQ0 must be given a fixed assignment and initialized,
         * because it's used before the IO-APIC is set up.
         */
        irq_vector[0] = FIRST_HIPRIORITY_VECTOR;
-       vector_irq[FIRST_HIPRIORITY_VECTOR] = 0;
 
        /*
         * Also ensure serial interrupts are high priority. We do not
@@ -1493,9 +1496,14 @@ void __init smp_intr_init(void)
        for (seridx = 0; seridx < 2; seridx++) {
                if ((irq = serial_irq(seridx)) < 0)
                        continue;
-               irq_vector[irq] = FIRST_HIPRIORITY_VECTOR + seridx + 1;
-               vector_irq[FIRST_HIPRIORITY_VECTOR + seridx + 1] = irq;
-       }
+        irq_vector[irq] = FIRST_HIPRIORITY_VECTOR + seridx + 1;
+        per_cpu(vector_irq, cpu)[FIRST_HIPRIORITY_VECTOR + seridx + 1] = irq;
+        irq_cfg[irq].vector = FIRST_HIPRIORITY_VECTOR + seridx + 1;
+        irq_cfg[irq].domain = (cpumask_t)CPU_MASK_ALL;
+       }
+
+    /* IPI for cleanuping vectors after irq move */
+    set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
 
        /* IPI for event checking. */
        set_intr_gate(EVENT_CHECK_VECTOR, event_check_interrupt);
diff -r 722c7e94e764 -r d33e9aae74c6 xen/drivers/passthrough/amd/iommu_init.c
--- a/xen/drivers/passthrough/amd/iommu_init.c  Wed Aug 19 12:53:04 2009 +0100
+++ b/xen/drivers/passthrough/amd/iommu_init.c  Wed Aug 19 12:53:46 2009 +0100
@@ -26,6 +26,7 @@
 #include <asm/msi.h>
 #include <asm/hvm/svm/amd-iommu-proto.h>
 #include <asm-x86/fixmap.h>
+#include <mach_apic.h>
 
 static struct amd_iommu **irq_to_iommu;
 static int nr_amd_iommus;
@@ -303,40 +304,46 @@ static int amd_iommu_read_event_log(stru
     return -EFAULT;
 }
 
-static void amd_iommu_msi_data_init(struct amd_iommu *iommu)
-{
-    u32 msi_data;
+static void iommu_msi_set_affinity(unsigned int irq, cpumask_t mask)
+{
+    struct msi_msg msg;
+    unsigned int dest;
+    struct amd_iommu *iommu = irq_to_iommu[irq];
+    struct irq_desc *desc = irq_to_desc(irq);
+    struct irq_cfg *cfg = desc->chip_data;
     u8 bus = (iommu->bdf >> 8) & 0xff;
     u8 dev = PCI_SLOT(iommu->bdf & 0xff);
     u8 func = PCI_FUNC(iommu->bdf & 0xff);
-    int vector = irq_to_vector(iommu->irq);
-
-    msi_data = MSI_DATA_TRIGGER_EDGE |
-        MSI_DATA_LEVEL_ASSERT |
-        MSI_DATA_DELIVERY_FIXED |
-        MSI_DATA_VECTOR(vector);
+
+    dest = set_desc_affinity(desc, mask);
+    if (dest == BAD_APICID){
+        gdprintk(XENLOG_ERR, "Set iommu interrupt affinity error!\n");
+        return;
+    }
+
+    memset(&msg, 0, sizeof(msg)); 
+    msg.data = MSI_DATA_VECTOR(cfg->vector) & 0xff;
+    msg.data |= 1 << 14;
+    msg.data |= (INT_DELIVERY_MODE != dest_LowestPrio) ?
+        MSI_DATA_DELIVERY_FIXED:
+        MSI_DATA_DELIVERY_LOWPRI;
+
+    msg.address_hi =0;
+    msg.address_lo = (MSI_ADDRESS_HEADER << (MSI_ADDRESS_HEADER_SHIFT + 8)); 
+    msg.address_lo |= INT_DEST_MODE ? MSI_ADDR_DESTMODE_LOGIC:
+                    MSI_ADDR_DESTMODE_PHYS;
+    msg.address_lo |= (INT_DELIVERY_MODE != dest_LowestPrio) ?
+                    MSI_ADDR_REDIRECTION_CPU:
+                    MSI_ADDR_REDIRECTION_LOWPRI;
+    msg.address_lo |= MSI_ADDR_DEST_ID(dest & 0xff);
 
     pci_conf_write32(bus, dev, func,
-        iommu->msi_cap + PCI_MSI_DATA_64, msi_data);
-}
-
-static void amd_iommu_msi_addr_init(struct amd_iommu *iommu, int phy_cpu)
-{
-
-    int bus = (iommu->bdf >> 8) & 0xff;
-    int dev = PCI_SLOT(iommu->bdf & 0xff);
-    int func = PCI_FUNC(iommu->bdf & 0xff);
-
-    u32 address_hi = 0;
-    u32 address_lo = MSI_ADDR_HEADER |
-            MSI_ADDR_DESTMODE_PHYS |
-            MSI_ADDR_REDIRECTION_CPU |
-            MSI_ADDR_DEST_ID(phy_cpu);
-
+        iommu->msi_cap + PCI_MSI_DATA_64, msg.data);
     pci_conf_write32(bus, dev, func,
-        iommu->msi_cap + PCI_MSI_ADDRESS_LO, address_lo);
+        iommu->msi_cap + PCI_MSI_ADDRESS_LO, msg.address_lo);
     pci_conf_write32(bus, dev, func,
-        iommu->msi_cap + PCI_MSI_ADDRESS_HI, address_hi);
+        iommu->msi_cap + PCI_MSI_ADDRESS_HI, msg.address_hi);
+    
 }
 
 static void amd_iommu_msi_enable(struct amd_iommu *iommu, int flag)
@@ -373,6 +380,9 @@ static void iommu_msi_mask(unsigned int 
 {
     unsigned long flags;
     struct amd_iommu *iommu = irq_to_iommu[irq];
+    struct irq_desc *desc = irq_to_desc(irq);
+
+    irq_complete_move(&desc);
 
     /* FIXME: do not support mask bits at the moment */
     if ( iommu->maskbit )
@@ -395,11 +405,6 @@ static void iommu_msi_end(unsigned int i
     ack_APIC_irq();
 }
 
-static void iommu_msi_set_affinity(unsigned int irq, cpumask_t dest)
-{
-    struct amd_iommu *iommu = irq_to_iommu[irq];
-    amd_iommu_msi_addr_init(iommu, cpu_physical_id(first_cpu(dest)));
-}
 
 static struct hw_interrupt_type iommu_msi_type = {
     .typename = "AMD_IOV_MSI",
@@ -485,7 +490,7 @@ static int set_iommu_interrupt_handler(s
         gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no irqs\n");
         return 0;
     }
-
+    
     irq_desc[irq].handler = &iommu_msi_type;
     irq_to_iommu[irq] = iommu;
     ret = request_irq(irq, amd_iommu_page_fault, 0,
@@ -524,8 +529,7 @@ void enable_iommu(struct amd_iommu *iomm
     register_iommu_event_log_in_mmio_space(iommu);
     register_iommu_exclusion_range(iommu);
 
-    amd_iommu_msi_data_init (iommu);
-    amd_iommu_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map)));
+    iommu_msi_set_affinity(iommu->irq, cpu_online_map);
     amd_iommu_msi_enable(iommu, IOMMU_CONTROL_ENABLED);
 
     set_iommu_command_buffer_control(iommu, IOMMU_CONTROL_ENABLED);
diff -r 722c7e94e764 -r d33e9aae74c6 xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c       Wed Aug 19 12:53:04 2009 +0100
+++ b/xen/drivers/passthrough/vtd/iommu.c       Wed Aug 19 12:53:46 2009 +0100
@@ -794,6 +794,9 @@ static void dma_msi_mask(unsigned int ir
 {
     unsigned long flags;
     struct iommu *iommu = irq_to_iommu[irq];
+    struct irq_desc *desc = irq_to_desc(irq);
+
+    irq_complete_move(&desc);
 
     /* mask it */
     spin_lock_irqsave(&iommu->register_lock, flags);
@@ -813,42 +816,45 @@ static void dma_msi_end(unsigned int irq
     ack_APIC_irq();
 }
 
-static void dma_msi_data_init(struct iommu *iommu, int irq)
-{
-    u32 msi_data = 0;
+static void dma_msi_set_affinity(unsigned int irq, cpumask_t mask)
+{
+    struct msi_msg msg;
+    unsigned int dest;
     unsigned long flags;
-    int vector = irq_to_vector(irq);
-
-    /* Fixed, edge, assert mode. Follow MSI setting */
-    msi_data |= vector & 0xff;
-    msi_data |= 1 << 14;
+
+    struct iommu *iommu = irq_to_iommu[irq];
+    struct irq_desc *desc = irq_to_desc(irq);
+    struct irq_cfg *cfg = desc->chip_data;
 
     spin_lock_irqsave(&iommu->register_lock, flags);
-    dmar_writel(iommu->reg, DMAR_FEDATA_REG, msi_data);
+    dest = set_desc_affinity(desc, mask);
+    if (dest == BAD_APICID){
+        gdprintk(XENLOG_ERR VTDPREFIX, "Set iommu interrupt affinity 
error!\n");
+        return;
+    }
+    
+    memset(&msg, 0, sizeof(msg)); 
+    msg.data = MSI_DATA_VECTOR(cfg->vector) & 0xff;
+    msg.data |= 1 << 14;
+    msg.data |= (INT_DELIVERY_MODE != dest_LowestPrio) ?
+        MSI_DATA_DELIVERY_FIXED:
+        MSI_DATA_DELIVERY_LOWPRI;
+
+    /* Follow MSI setting */
+    if (x2apic_enabled)
+        msg.address_hi = dest & 0xFFFFFF00;
+    msg.address_lo = (MSI_ADDRESS_HEADER << (MSI_ADDRESS_HEADER_SHIFT + 8)); 
+    msg.address_lo |= INT_DEST_MODE ? MSI_ADDR_DESTMODE_LOGIC:
+                    MSI_ADDR_DESTMODE_PHYS;
+    msg.address_lo |= (INT_DELIVERY_MODE != dest_LowestPrio) ?
+                    MSI_ADDR_REDIRECTION_CPU:
+                    MSI_ADDR_REDIRECTION_LOWPRI;
+    msg.address_lo |= MSI_ADDR_DEST_ID(dest & 0xff);
+
+    dmar_writel(iommu->reg, DMAR_FEDATA_REG, msg.data);
+    dmar_writel(iommu->reg, DMAR_FEADDR_REG, msg.address_lo);
+    dmar_writel(iommu->reg, DMAR_FEUADDR_REG, msg.address_hi);
     spin_unlock_irqrestore(&iommu->register_lock, flags);
-}
-
-static void dma_msi_addr_init(struct iommu *iommu, int phy_cpu)
-{
-    u64 msi_address;
-    unsigned long flags;
-
-    /* Physical, dedicated cpu. Follow MSI setting */
-    msi_address = (MSI_ADDRESS_HEADER << (MSI_ADDRESS_HEADER_SHIFT + 8));
-    msi_address |= MSI_PHYSICAL_MODE << 2;
-    msi_address |= MSI_REDIRECTION_HINT_MODE << 3;
-    msi_address |= phy_cpu << MSI_TARGET_CPU_SHIFT;
-
-    spin_lock_irqsave(&iommu->register_lock, flags);
-    dmar_writel(iommu->reg, DMAR_FEADDR_REG, (u32)msi_address);
-    dmar_writel(iommu->reg, DMAR_FEUADDR_REG, (u32)(msi_address >> 32));
-    spin_unlock_irqrestore(&iommu->register_lock, flags);
-}
-
-static void dma_msi_set_affinity(unsigned int irq, cpumask_t dest)
-{
-    struct iommu *iommu = irq_to_iommu[irq];
-    dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(dest)));
 }
 
 static struct hw_interrupt_type dma_msi_type = {
@@ -1584,6 +1590,7 @@ static int init_vtd_hw(void)
     int irq = -1;
     int ret;
     unsigned long flags;
+    struct irq_cfg *cfg;
 
     for_each_drhd_unit ( drhd )
     {
@@ -1598,8 +1605,10 @@ static int init_vtd_hw(void)
             }
             iommu->irq = irq;
         }
-        dma_msi_data_init(iommu, iommu->irq);
-        dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map)));
+
+        cfg = irq_cfg(irq);
+        dma_msi_set_affinity(irq, cfg->domain);
+
         clear_fault_bits(iommu);
 
         spin_lock_irqsave(&iommu->register_lock, flags);
diff -r 722c7e94e764 -r d33e9aae74c6 xen/include/asm-x86/apic.h
--- a/xen/include/asm-x86/apic.h        Wed Aug 19 12:53:04 2009 +0100
+++ b/xen/include/asm-x86/apic.h        Wed Aug 19 12:53:46 2009 +0100
@@ -14,6 +14,12 @@
 #define APIC_QUIET   0
 #define APIC_VERBOSE 1
 #define APIC_DEBUG   2
+
+#define        SET_APIC_LOGICAL_ID(x)  (((x)<<24))
+
+#define IO_APIC_REDIR_VECTOR_MASK      0x000FF
+#define IO_APIC_REDIR_DEST_LOGICAL     0x00800
+#define IO_APIC_REDIR_DEST_PHYSICAL    0x00000
 
 extern int apic_verbosity;
 extern int x2apic_enabled;
diff -r 722c7e94e764 -r d33e9aae74c6 xen/include/asm-x86/apicdef.h
--- a/xen/include/asm-x86/apicdef.h     Wed Aug 19 12:53:04 2009 +0100
+++ b/xen/include/asm-x86/apicdef.h     Wed Aug 19 12:53:46 2009 +0100
@@ -107,7 +107,7 @@
 #define                APIC_TDCR       0x3E0
 
 /* Only available in x2APIC mode */
-#define                APIC_SELF_IPI   0x400
+#define                APIC_SELF_IPI   0x3F0
 
 #define                        APIC_TDR_DIV_TMBASE     (1<<2)
 #define                        APIC_TDR_DIV_1          0xB
diff -r 722c7e94e764 -r d33e9aae74c6 xen/include/asm-x86/genapic.h
--- a/xen/include/asm-x86/genapic.h     Wed Aug 19 12:53:04 2009 +0100
+++ b/xen/include/asm-x86/genapic.h     Wed Aug 19 12:53:46 2009 +0100
@@ -34,8 +34,10 @@ struct genapic {
        void (*init_apic_ldr)(void);
        void (*clustered_apic_check)(void);
        cpumask_t (*target_cpus)(void);
+       cpumask_t (*vector_allocation_domain)(int cpu);
        unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
        void (*send_IPI_mask)(const cpumask_t *mask, int vector);
+    void (*send_IPI_self)(int vector);
 };
 
 #define APICFUNC(x) .x = x
@@ -53,41 +55,53 @@ cpumask_t target_cpus_flat(void);
 cpumask_t target_cpus_flat(void);
 unsigned int cpu_mask_to_apicid_flat(cpumask_t cpumask);
 void send_IPI_mask_flat(const cpumask_t *mask, int vector);
+void send_IPI_self_flat(int vector);
+cpumask_t vector_allocation_domain_flat(int cpu);
 #define GENAPIC_FLAT \
        .int_delivery_mode = dest_LowestPrio, \
        .int_dest_mode = 1 /* logical delivery */, \
        .init_apic_ldr = init_apic_ldr_flat, \
        .clustered_apic_check = clustered_apic_check_flat, \
        .target_cpus = target_cpus_flat, \
+       .vector_allocation_domain = vector_allocation_domain_flat, \
        .cpu_mask_to_apicid = cpu_mask_to_apicid_flat, \
-       .send_IPI_mask = send_IPI_mask_flat
+       .send_IPI_mask = send_IPI_mask_flat, \
+       .send_IPI_self = send_IPI_self_flat
 
 void init_apic_ldr_x2apic(void);
 void clustered_apic_check_x2apic(void);
 cpumask_t target_cpus_x2apic(void);
 unsigned int cpu_mask_to_apicid_x2apic(cpumask_t cpumask);
 void send_IPI_mask_x2apic(const cpumask_t *mask, int vector);
+void send_IPI_self_x2apic(int vector);
+cpumask_t vector_allocation_domain_x2apic(int cpu);
 #define GENAPIC_X2APIC \
        .int_delivery_mode = dest_Fixed, \
        .int_dest_mode = 0 /* physical delivery */, \
        .init_apic_ldr = init_apic_ldr_x2apic, \
        .clustered_apic_check = clustered_apic_check_x2apic, \
        .target_cpus = target_cpus_x2apic, \
+       .vector_allocation_domain = vector_allocation_domain_x2apic, \
        .cpu_mask_to_apicid = cpu_mask_to_apicid_x2apic, \
-       .send_IPI_mask = send_IPI_mask_x2apic
+       .send_IPI_mask = send_IPI_mask_x2apic,       \
+       .send_IPI_self = send_IPI_self_x2apic
 
 void init_apic_ldr_phys(void);
 void clustered_apic_check_phys(void);
 cpumask_t target_cpus_phys(void);
 unsigned int cpu_mask_to_apicid_phys(cpumask_t cpumask);
 void send_IPI_mask_phys(const cpumask_t *mask, int vector);
+void send_IPI_self_phys(int vector);
+cpumask_t vector_allocation_domain_phys(int cpu);
 #define GENAPIC_PHYS \
        .int_delivery_mode = dest_Fixed, \
        .int_dest_mode = 0 /* physical delivery */, \
        .init_apic_ldr = init_apic_ldr_phys, \
        .clustered_apic_check = clustered_apic_check_phys, \
        .target_cpus = target_cpus_phys, \
+       .vector_allocation_domain = vector_allocation_domain_phys, \
        .cpu_mask_to_apicid = cpu_mask_to_apicid_phys, \
-       .send_IPI_mask = send_IPI_mask_phys
+       .send_IPI_mask = send_IPI_mask_phys, \
+       .send_IPI_self = send_IPI_self_phys
 
 #endif
diff -r 722c7e94e764 -r d33e9aae74c6 xen/include/asm-x86/irq.h
--- a/xen/include/asm-x86/irq.h Wed Aug 19 12:53:04 2009 +0100
+++ b/xen/include/asm-x86/irq.h Wed Aug 19 12:53:46 2009 +0100
@@ -5,7 +5,10 @@
 
 #include <xen/config.h>
 #include <asm/atomic.h>
+#include <xen/cpumask.h>
+#include <xen/smp.h>
 #include <irq_vectors.h>
+#include <asm/percpu.h>
 
 #define IO_APIC_IRQ(irq)    (((irq) >= 16 && (irq) < nr_irqs_gsi) \
         || (((irq) < 16) && (1<<(irq)) & io_apic_irqs))
@@ -22,10 +25,44 @@
 #define MAX_GSI_IRQS PAGE_SIZE * 8
 #define MAX_NR_IRQS (2 * MAX_GSI_IRQS)
 
-extern int vector_irq[NR_VECTORS];
+#define irq_cfg(irq)        &irq_cfg[(irq)]
+
+struct irq_cfg {
+        int  vector;
+        cpumask_t domain;
+        cpumask_t old_domain;
+        unsigned move_cleanup_count;
+        u8 move_in_progress : 1;
+};
+
+extern struct irq_cfg *irq_cfg;
+
+typedef int vector_irq_t[NR_VECTORS];
+DECLARE_PER_CPU(vector_irq_t, vector_irq);
+
 extern u8 *irq_vector;
 
-extern int irq_to_vector(int irq);
+/*
+ * Per-cpu current frame pointer - the location of the last exception frame on
+ * the stack
+ */
+DECLARE_PER_CPU(struct cpu_user_regs *, __irq_regs);
+
+static inline struct cpu_user_regs *get_irq_regs(void)
+{
+       return __get_cpu_var(__irq_regs);
+}
+
+static inline struct cpu_user_regs *set_irq_regs(struct cpu_user_regs 
*new_regs)
+{
+       struct cpu_user_regs *old_regs, **pp_regs = &__get_cpu_var(__irq_regs);
+
+       old_regs = *pp_regs;
+       *pp_regs = new_regs;
+       return old_regs;
+}
+
+
 #define platform_legacy_irq(irq)       ((irq) < 16)
 
 fastcall void event_check_interrupt(void);
@@ -37,6 +74,7 @@ fastcall void spurious_interrupt(void);
 fastcall void spurious_interrupt(void);
 fastcall void thermal_interrupt(void);
 fastcall void cmci_interrupt(void);
+fastcall void irq_move_cleanup_interrupt(void);
 
 void disable_8259A_irq(unsigned int irq);
 void enable_8259A_irq(unsigned int irq);
@@ -66,10 +104,24 @@ int  init_irq_data(void);
 int  init_irq_data(void);
 
 void clear_irq_vector(int irq);
-int __assign_irq_vector(int irq);
 
+int irq_to_vector(int irq);
 int create_irq(void);
 void destroy_irq(unsigned int irq);
+
+struct irq_desc;
+extern void irq_complete_move(struct irq_desc **descp);
+
+void lock_vector_lock(void);
+void unlock_vector_lock(void);
+
+void __setup_vector_irq(int cpu);
+
+void move_native_irq(int irq);
+
+int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask);
+
+int bind_irq_vector(int irq, int vector, cpumask_t domain);
 
 #define domain_pirq_to_irq(d, pirq) ((d)->arch.pirq_irq[pirq])
 #define domain_irq_to_pirq(d, irq) ((d)->arch.irq_pirq[irq])
diff -r 722c7e94e764 -r d33e9aae74c6 
xen/include/asm-x86/mach-default/irq_vectors.h
--- a/xen/include/asm-x86/mach-default/irq_vectors.h    Wed Aug 19 12:53:04 
2009 +0100
+++ b/xen/include/asm-x86/mach-default/irq_vectors.h    Wed Aug 19 12:53:46 
2009 +0100
@@ -28,6 +28,9 @@
 /* Dynamically-allocated vectors available to any driver. */
 #define FIRST_DYNAMIC_VECTOR   0x20
 #define LAST_DYNAMIC_VECTOR    0xdf
+#define NR_DYNAMIC_VECTORS     (LAST_DYNAMIC_VECTOR - FIRST_DYNAMIC_VECTOR + 1)
+
+#define IRQ_MOVE_CLEANUP_VECTOR FIRST_DYNAMIC_VECTOR
 
 #define NR_VECTORS 256
 
diff -r 722c7e94e764 -r d33e9aae74c6 
xen/include/asm-x86/mach-generic/mach_apic.h
--- a/xen/include/asm-x86/mach-generic/mach_apic.h      Wed Aug 19 12:53:04 
2009 +0100
+++ b/xen/include/asm-x86/mach-generic/mach_apic.h      Wed Aug 19 12:53:46 
2009 +0100
@@ -14,6 +14,7 @@
 #define init_apic_ldr (genapic->init_apic_ldr)
 #define clustered_apic_check (genapic->clustered_apic_check) 
 #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
+#define vector_allocation_domain(cpu) (genapic->vector_allocation_domain(cpu))
 
 static inline void enable_apic_mode(void)
 {
diff -r 722c7e94e764 -r d33e9aae74c6 xen/include/asm-x86/smp.h
--- a/xen/include/asm-x86/smp.h Wed Aug 19 12:53:04 2009 +0100
+++ b/xen/include/asm-x86/smp.h Wed Aug 19 12:53:46 2009 +0100
@@ -36,6 +36,8 @@ DECLARE_PER_CPU(cpumask_t, cpu_core_map)
 DECLARE_PER_CPU(cpumask_t, cpu_core_map);
 
 void smp_send_nmi_allbutself(void);
+
+void  send_IPI_mask(const cpumask_t *mask, int vector);
 
 extern void (*mtrr_hook) (void);
 
diff -r 722c7e94e764 -r d33e9aae74c6 xen/include/xen/cpumask.h
--- a/xen/include/xen/cpumask.h Wed Aug 19 12:53:04 2009 +0100
+++ b/xen/include/xen/cpumask.h Wed Aug 19 12:53:46 2009 +0100
@@ -79,7 +79,7 @@
 #include <xen/bitmap.h>
 #include <xen/kernel.h>
 
-typedef struct { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;
+typedef struct cpumask{ DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;
 
 #define cpu_set(cpu, dst) __cpu_set((cpu), &(dst))
 static inline void __cpu_set(int cpu, volatile cpumask_t *dstp)
@@ -112,6 +112,16 @@ static inline int __cpu_test_and_set(int
 static inline int __cpu_test_and_set(int cpu, cpumask_t *addr)
 {
        return test_and_set_bit(cpu, addr->bits);
+}
+
+/**
+ * cpumask_test_cpu - test for a cpu in a cpumask
+ */
+#define cpumask_test_cpu(cpu, cpumask) __cpu_test((cpu), &(cpumask))
+
+static inline int __cpu_test(int cpu, cpumask_t *addr)
+{
+       return test_bit(cpu, addr->bits);
 }
 
 #define cpu_test_and_clear(cpu, cpumask) __cpu_test_and_clear((cpu), 
&(cpumask))
@@ -193,6 +203,12 @@ static inline int __cpus_weight(const cp
 static inline int __cpus_weight(const cpumask_t *srcp, int nbits)
 {
        return bitmap_weight(srcp->bits, nbits);
+}
+
+#define cpus_copy(dest, src) __cpus_copy(&(dest), &(src))
+static inline void __cpus_copy(cpumask_t *dstp, cpumask_t *srcp)
+{
+       bitmap_copy(dstp->bits, srcp->bits, NR_CPUS);
 }
 
 #define cpus_shift_right(dst, src, n) \
diff -r 722c7e94e764 -r d33e9aae74c6 xen/include/xen/irq.h
--- a/xen/include/xen/irq.h     Wed Aug 19 12:53:04 2009 +0100
+++ b/xen/include/xen/irq.h     Wed Aug 19 12:53:46 2009 +0100
@@ -70,12 +70,15 @@ typedef struct irq_desc{
     struct msi_desc   *msi_desc;
     struct irqaction *action;  /* IRQ action list */
     unsigned int depth;                /* nested irq disables */
+#if defined(__i386__) || defined(__x86_64__)
+    struct irq_cfg *chip_data;
+#endif
     int irq;
     spinlock_t lock;
     cpumask_t affinity;
 } __cacheline_aligned irq_desc_t;
 
-#ifndef CONFIG_X86
+#if defined(__ia64__)
 extern irq_desc_t irq_desc[NR_VECTORS];
 
 #define setup_irq(irq, action) \
@@ -116,11 +119,13 @@ static inline void set_native_irq_info(u
 
 static inline void set_irq_info(int irq, cpumask_t mask)
 {
-#ifdef CONFIG_X86
+#if defined(__i386__) || defined(__x86_64__)
     set_native_irq_info(irq, mask);
 #else
     set_native_irq_info(irq_to_vector(irq), mask);
 #endif
 }
 
+unsigned int set_desc_affinity(struct irq_desc *desc, cpumask_t mask);
+
 #endif /* __XEN_IRQ_H__ */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] x86: Implement per-cpu vector for xen hypervisor, Xen patchbot-unstable <=