diff -r 9f7602e3127c arch/x86/io_apic.c --- a/arch/x86/io_apic.c Thu Oct 13 16:25:28 2011 -0500 +++ b/arch/x86/io_apic.c Fri Oct 14 10:58:44 2011 -0500 @@ -569,6 +569,13 @@ } __get_cpu_var(vector_irq)[vector] = -1; cfg->move_cleanup_count--; + + if ( cfg->move_cleanup_count == 0 + && cfg->used_vectors ) + { + ASSERT(test_bit(vector, cfg->used_vectors)); + clear_bit(vector, cfg->used_vectors); + } unlock: spin_unlock(&desc->lock); } diff -r 9f7602e3127c arch/x86/irq.c --- a/arch/x86/irq.c Thu Oct 13 16:25:28 2011 -0500 +++ b/arch/x86/irq.c Fri Oct 14 10:58:44 2011 -0500 @@ -24,6 +24,8 @@ #include #include +static void parse_irq_vector_map_param(char *s); + /* opt_noirqbalance: If true, software IRQ balancing/affinity is disabled. */ bool_t __read_mostly opt_noirqbalance = 0; boolean_param("noirqbalance", opt_noirqbalance); @@ -31,6 +33,12 @@ unsigned int __read_mostly nr_irqs_gsi = 16; unsigned int __read_mostly nr_irqs; integer_param("nr_irqs", nr_irqs); + +/* This default may be changed by the AMD IOMMU code */ +int __read_mostly opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_DEFAULT; +custom_param("irq_vector_map", parse_irq_vector_map_param); + +vmask_t global_used_vector_map; u8 __read_mostly *irq_vector; struct irq_desc __read_mostly *irq_desc = NULL; @@ -59,6 +67,26 @@ /* irq_ratelimit: the max irq rate allowed in every 10ms, set 0 to disable */ static unsigned int __read_mostly irq_ratelimit_threshold = 10000; integer_param("irq_ratelimit", irq_ratelimit_threshold); + +static void __init parse_irq_vector_map_param(char *s) +{ + char *ss; + + do { + ss = strchr(s, ','); + if ( ss ) + *ss = '\0'; + + if ( !strcmp(s, "none")) + opt_irq_vector_map=OPT_IRQ_VECTOR_MAP_NONE; + else if ( !strcmp(s, "global")) + opt_irq_vector_map=OPT_IRQ_VECTOR_MAP_GLOBAL; + else if ( !strcmp(s, "per-device")) + opt_irq_vector_map=OPT_IRQ_VECTOR_MAP_PERDEV; + + s = ss + 1; + } while ( ss ); +} /* Must be called when irq disabled */ void lock_vector_lock(void) @@ -94,6 +122,11 @@ per_cpu(vector_irq, cpu)[vector] = irq; cfg->vector = vector; cfg->cpu_mask = online_mask; + if ( cfg->used_vectors ) + { + ASSERT(!test_bit(vector, cfg->used_vectors)); + set_bit(vector, cfg->used_vectors); + } irq_status[irq] = IRQ_USED; if (IO_APIC_IRQ(irq)) irq_vector[irq] = vector; @@ -158,6 +191,7 @@ desc->depth = 1; desc->msi_desc = NULL; desc->handler = &no_irq_type; + desc->chip_data->used_vectors=NULL; cpus_setall(desc->affinity); spin_unlock_irqrestore(&desc->lock, flags); @@ -190,6 +224,7 @@ if (likely(!cfg->move_in_progress)) return; + cpus_and(tmp_mask, cfg->old_cpu_mask, cpu_online_map); for_each_cpu_mask(cpu, tmp_mask) { for (vector = FIRST_DYNAMIC_VECTOR; vector <= LAST_DYNAMIC_VECTOR; @@ -200,6 +235,12 @@ break; } } + + if ( cfg->used_vectors ) + { + ASSERT(test_bit(vector, cfg->used_vectors)); + clear_bit(vector, cfg->used_vectors); + } cfg->move_in_progress = 0; } @@ -260,6 +301,7 @@ cfg->vector = IRQ_VECTOR_UNASSIGNED; cpus_clear(cfg->cpu_mask); cpus_clear(cfg->old_cpu_mask); + cfg->used_vectors = NULL; } int init_irq_data(void) @@ -329,6 +371,41 @@ end_none }; +static vmask_t *irq_get_used_vector_mask(int irq) +{ + vmask_t *ret = NULL; + + if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_GLOBAL ) + { + struct irq_desc *desc = irq_to_desc(irq); + + ret = &global_used_vector_map; + + if ( desc->chip_data->used_vectors ) + { + printk(XENLOG_INFO "%s: Strange, unassigned irq %d already has used_vectors!\n", + __func__, irq); + } + else + { + int vector; + + vector = irq_to_vector(irq); + if ( vector > 0 ) + { + printk(XENLOG_INFO "%s: Strange, irq %d already assigned vector %d!\n", + __func__, irq, vector); + + ASSERT(!test_bit(vector, ret)); + + set_bit(vector, ret); + } + } + } + + return ret; +} + int __assign_irq_vector(int irq, struct irq_cfg *cfg, const cpumask_t *mask) { /* @@ -347,6 +424,7 @@ int cpu, err; unsigned long flags; cpumask_t tmp_mask; + vmask_t *irq_used_vectors = NULL; old_vector = irq_to_vector(irq); if (old_vector) { @@ -361,6 +439,17 @@ return -EAGAIN; err = -ENOSPC; + + /* This is the only place normal IRQs are ever marked + * as "in use". If they're not in use yet, check to see + * if we need to assign a global vector mask. */ + if ( irq_status[irq] == IRQ_USED ) + { + irq_used_vectors = cfg->used_vectors; + } + else + irq_used_vectors = irq_get_used_vector_mask(irq); + for_each_cpu_mask(cpu, *mask) { int new_cpu; int vector, offset; @@ -386,6 +475,10 @@ if (test_bit(vector, used_vectors)) goto next; + if (irq_used_vectors + && test_bit(vector, irq_used_vectors) ) + goto next; + for_each_cpu_mask(new_cpu, tmp_mask) if (per_cpu(vector_irq, new_cpu)[vector] != -1) goto next; @@ -403,8 +496,20 @@ cpus_copy(cfg->cpu_mask, tmp_mask); irq_status[irq] = IRQ_USED; - if (IO_APIC_IRQ(irq)) - irq_vector[irq] = vector; + ASSERT((cfg->used_vectors == NULL) + || (cfg->used_vectors == irq_used_vectors)); + cfg->used_vectors = irq_used_vectors; + + if (IO_APIC_IRQ(irq)) + irq_vector[irq] = vector; + + if ( cfg->used_vectors ) + { + ASSERT(!test_bit(vector, cfg->used_vectors)); + + set_bit(vector, cfg->used_vectors); + } + err = 0; local_irq_restore(flags); break; @@ -1505,7 +1610,7 @@ if ( !IS_PRIV(current->domain) && !(IS_PRIV_FOR(current->domain, d) && - irq_access_permitted(current->domain, pirq))) + irq_access_permitted(current->domain, pirq))) return -EPERM; if ( pirq < 0 || pirq >= d->nr_pirqs || irq < 0 || irq >= nr_irqs ) @@ -1553,8 +1658,22 @@ if ( desc->handler != &no_irq_type ) dprintk(XENLOG_G_ERR, "dom%d: irq %d in use\n", - d->domain_id, irq); + d->domain_id, irq); desc->handler = &pci_msi_type; + + if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV + && !desc->chip_data->used_vectors ) + { + desc->chip_data->used_vectors = &pdev->info.used_vectors; + if ( desc->chip_data->vector != IRQ_VECTOR_UNASSIGNED ) + { + int vector = desc->chip_data->vector; + ASSERT(!test_bit(vector, desc->chip_data->used_vectors)); + + set_bit(vector, desc->chip_data->used_vectors); + } + } + d->arch.pirq_irq[pirq] = irq; d->arch.irq_pirq[irq] = pirq; setup_msi_irq(pdev, msi_desc, irq); diff -r 9f7602e3127c drivers/passthrough/amd/pci_amd_iommu.c --- a/drivers/passthrough/amd/pci_amd_iommu.c Thu Oct 13 16:25:28 2011 -0500 +++ b/drivers/passthrough/amd/pci_amd_iommu.c Fri Oct 14 10:58:44 2011 -0500 @@ -24,6 +24,9 @@ #include #include #include + +extern bool_t __read_mostly opt_irq_perdev_vector_map; +extern bool_t __read_mostly iommu_amd_perdev_vector_map; extern unsigned short ivrs_bdf_entries; extern struct ivrs_mappings *ivrs_mappings; @@ -166,6 +169,35 @@ return -ENODEV; } + /* + * AMD IOMMUs don't distinguish between vectors destined for + * different cpus when doing interrupt remapping. This means + * that interrupts going through the same intremap table + * can't share the same vector. + * + * If irq_vector_map isn't specified, choose a sensible default: + * - If we're using per-device interemap tables, per-device + * vector non-sharing maps + * - If we're using a global interemap table, global vector + * non-sharing map + */ + if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_DEFAULT ) + { + if ( amd_iommu_perdev_intremap ) + { + printk("AMD-Vi: Enabling per-device vector maps\n"); + opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_PERDEV; + } + else + { + printk("AMD-Vi: Enabling global vector map\n"); + opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_GLOBAL; + } + } + else + { + printk("AMD-Vi: Not overriding irq_vector_map setting\n"); + } return scan_pci_devices(); } diff -r 9f7602e3127c include/asm-x86/irq.h --- a/include/asm-x86/irq.h Thu Oct 13 16:25:28 2011 -0500 +++ b/include/asm-x86/irq.h Fri Oct 14 10:58:44 2011 -0500 @@ -23,11 +23,16 @@ #define irq_to_desc(irq) (&irq_desc[irq]) #define irq_cfg(irq) (&irq_cfg[irq]) +typedef struct { + DECLARE_BITMAP(_bits,NR_VECTORS); +} vmask_t; + struct irq_cfg { int vector; cpumask_t cpu_mask; cpumask_t old_cpu_mask; unsigned move_cleanup_count; + vmask_t *used_vectors; u8 move_in_progress : 1; }; @@ -39,6 +44,13 @@ extern u8 *irq_vector; extern bool_t opt_noirqbalance; + +#define OPT_IRQ_VECTOR_MAP_DEFAULT 0 /* Do the default thing */ +#define OPT_IRQ_VECTOR_MAP_NONE 1 /* None */ +#define OPT_IRQ_VECTOR_MAP_GLOBAL 2 /* One global vector map (no vector sharing) */ +#define OPT_IRQ_VECTOR_MAP_PERDEV 3 /* Per-device vetor map (no vector sharing w/in a device) */ + +extern int opt_irq_vector_map; /* * Per-cpu current frame pointer - the location of the last exception frame on diff -r 9f7602e3127c include/xen/pci.h --- a/include/xen/pci.h Thu Oct 13 16:25:28 2011 -0500 +++ b/include/xen/pci.h Fri Oct 14 10:58:44 2011 -0500 @@ -11,6 +11,7 @@ #include #include #include +#include /* * The PCI interface treats multi-function devices as independent @@ -38,6 +39,7 @@ u8 bus; u8 devfn; } physfn; + vmask_t used_vectors; }; struct pci_dev {