allow efficient allocation of multiple CPU masks at once This is to better amortize the cost of storing an extra pointer when dynamically allocating CPU masks: If a certain entity (struct irq_desc being the first user) wants multiple, it can get away with holding just a single pointer, which is especially beneficial when a hypervisor with large NR_CPUS gets run on systems with a relatively small number of actual CPUs. If rationale, concept, and implementation are deemed reasonable, this could/should also be used for the three CPU masks that get allocated per vCPU. Signed-off-by: Jan Beulich --- 2011-10-18.orig/xen/arch/ia64/linux-xen/irq_ia64.c 2011-10-20 14:46:19.000000000 +0200 +++ 2011-10-18/xen/arch/ia64/linux-xen/irq_ia64.c 2011-10-18 18:23:19.000000000 +0200 @@ -303,6 +303,9 @@ int __init request_irq_vector(unsigned i void __init init_IRQ (void) { +#ifdef XEN + BUG_ON(init_irq_data()); +#endif register_percpu_irq(IA64_SPURIOUS_INT_VECTOR, NULL); #ifdef CONFIG_SMP register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction); --- 2011-10-18.orig/xen/arch/ia64/xen/irq.c 2011-10-18 11:20:02.000000000 +0200 +++ 2011-10-18/xen/arch/ia64/xen/irq.c 2011-10-18 18:22:27.000000000 +0200 @@ -74,17 +74,27 @@ unsigned int __ia64_local_vector_to_irq /* * Controller mappings for all interrupt sources: */ -irq_desc_t irq_desc[NR_IRQS] = { - [0 ... NR_IRQS-1] = { - .status = IRQ_DISABLED, - .handler = &no_irq_type, - .lock = SPIN_LOCK_UNLOCKED - .arch = { - .vector = -1, - .cpu_mask = CPU_MASK_ALL, - } +irq_desc_t irq_desc[NR_IRQS]; + +int __init arch_init_one_irq_desc(struct irq_desc *desc) +{ + desc->arch.vector = -1; + cpumask_setall(irq_desc_cpu_mask(desc)); + + return 0; +} + +int __init init_irq_data(void) +{ + unsigned int irq; + + for (irq = 0; irq < NR_IRQS; irq++) { + struct irq_desc *desc = irq_to_desc(irq); + + desc->irq = irq; + init_one_irq_desc(desc); } -}; +} void __do_IRQ_guest(int irq); --- 2011-10-18.orig/xen/arch/x86/i8259.c 2011-10-18 11:13:04.000000000 +0200 +++ 2011-10-18/xen/arch/x86/i8259.c 2011-10-18 15:25:25.000000000 +0200 @@ -398,7 +398,7 @@ void __init init_IRQ(void) desc->handler = &i8259A_irq_type; per_cpu(vector_irq, cpu)[FIRST_LEGACY_VECTOR + irq] = irq; - cpumask_copy(&desc->arch.cpu_mask, cpumask_of(cpu)); + cpumask_copy(irq_desc_cpu_mask(desc), cpumask_of(cpu)); desc->arch.vector = FIRST_LEGACY_VECTOR + irq; } --- 2011-10-18.orig/xen/arch/x86/io_apic.c 2011-10-18 13:32:32.000000000 +0200 +++ 2011-10-18/xen/arch/x86/io_apic.c 2011-10-20 14:47:44.000000000 +0200 @@ -648,20 +648,21 @@ static int pin_2_irq(int idx, int apic, void /*__init*/ setup_ioapic_dest(void) { int pin, ioapic, irq, irq_entry; - struct irq_cfg *cfg; if (skip_ioapic_setup) return; for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { for (pin = 0; pin < nr_ioapic_entries[ioapic]; pin++) { + struct irq_desc *desc; + irq_entry = find_irq_entry(ioapic, pin, mp_INT); if (irq_entry == -1) continue; irq = pin_2_irq(irq_entry, ioapic, pin); - cfg = irq_cfg(irq); - BUG_ON(cpus_empty(cfg->cpu_mask)); - set_ioapic_affinity_irq(irq_to_desc(irq), &cfg->cpu_mask); + desc = irq_to_desc(irq); + BUG_ON(cpumask_empty(irq_desc_cpu_mask(desc))); + set_ioapic_affinity_irq(desc, irq_desc_cpu_mask(desc)); } } @@ -956,12 +957,12 @@ static void __init setup_IO_APIC_irqs(vo struct IO_APIC_route_entry entry; int apic, pin, idx, irq, first_notcon = 1, vector; unsigned long flags; - struct irq_cfg *cfg; apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); for (apic = 0; apic < nr_ioapics; apic++) { for (pin = 0; pin < nr_ioapic_entries[apic]; pin++) { + struct irq_desc *desc; /* * add it to the IO-APIC irq-routing table: @@ -1016,9 +1017,9 @@ static void __init setup_IO_APIC_irqs(vo if (!apic && platform_legacy_irq(irq)) disable_8259A_irq(irq_to_desc(irq)); } - cfg = irq_cfg(irq); + desc = irq_to_desc(irq); SET_DEST(entry.dest.dest32, entry.dest.logical.logical_dest, - cpu_mask_to_apicid(&cfg->cpu_mask)); + cpu_mask_to_apicid(irq_desc_cpu_mask(desc))); spin_lock_irqsave(&ioapic_lock, flags); __ioapic_write_entry(apic, pin, 0, entry); set_native_irq_info(irq, TARGET_CPUS); @@ -2372,7 +2373,7 @@ int ioapic_guest_write(unsigned long phy rte.vector = cfg->vector; SET_DEST(rte.dest.dest32, rte.dest.logical.logical_dest, - cpu_mask_to_apicid(&cfg->cpu_mask)); + cpu_mask_to_apicid(irq_desc_cpu_mask(desc))); io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&rte) + 0)); io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&rte) + 1)); --- 2011-10-18.orig/xen/arch/x86/irq.c 2011-10-18 13:41:41.000000000 +0200 +++ 2011-10-18/xen/arch/x86/irq.c 2011-10-18 17:36:39.000000000 +0200 @@ -25,6 +25,7 @@ #include static void parse_irq_vector_map_param(char *s); +static int __assign_irq_vector(int irq, struct irq_desc *, const cpumask_t *); /* opt_noirqbalance: If true, software IRQ balancing/affinity is disabled. */ bool_t __read_mostly opt_noirqbalance = 0; @@ -110,7 +111,7 @@ static int __init __bind_irq_vector(int { cpumask_t online_mask; int cpu; - struct irq_cfg *cfg = irq_cfg(irq); + struct irq_desc *desc = irq_to_desc(irq); BUG_ON((unsigned)irq >= nr_irqs); BUG_ON((unsigned)vector >= NR_VECTORS); @@ -118,21 +119,22 @@ static int __init __bind_irq_vector(int cpumask_and(&online_mask, cpu_mask, &cpu_online_map); if (cpumask_empty(&online_mask)) return -EINVAL; - if ((cfg->vector == vector) && cpumask_equal(&cfg->cpu_mask, &online_mask)) + if ( (desc->arch.vector == vector) && + cpumask_equal(irq_desc_cpu_mask(desc), &online_mask) ) return 0; - if (cfg->vector != IRQ_VECTOR_UNASSIGNED) + if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED ) return -EBUSY; trace_irq_mask(TRC_HW_IRQ_BIND_VECTOR, irq, vector, &online_mask); for_each_cpu_mask(cpu, online_mask) per_cpu(vector_irq, cpu)[vector] = irq; - cfg->vector = vector; - cpumask_copy(&cfg->cpu_mask, &online_mask); - if ( cfg->used_vectors ) + desc->arch.vector = vector; + cpumask_copy(irq_desc_cpu_mask(desc), &online_mask); + if ( desc->arch.used_vectors ) { - ASSERT(!test_bit(vector, cfg->used_vectors)); - set_bit(vector, cfg->used_vectors); + ASSERT(!test_bit(vector, desc->arch.used_vectors)); + set_bit(vector, desc->arch.used_vectors); } - cfg->used = IRQ_USED; + desc->arch.used = IRQ_USED; if (IO_APIC_IRQ(irq)) irq_vector[irq] = vector; return 0; @@ -166,14 +168,17 @@ int create_irq(void) { unsigned long flags; int irq, ret; - irq = -ENOSPC; + struct irq_desc *desc; spin_lock_irqsave(&vector_lock, flags); irq = find_unassigned_irq(); if (irq < 0) goto out; - ret = __assign_irq_vector(irq, irq_cfg(irq), TARGET_CPUS); + desc = irq_to_desc(irq); + ret = init_one_irq_desc(desc); + if (!ret) + ret = __assign_irq_vector(irq, desc, TARGET_CPUS); if (ret < 0) irq = ret; out: @@ -197,7 +202,7 @@ static void dynamic_irq_cleanup(unsigned desc->msi_desc = NULL; desc->handler = &no_irq_type; desc->arch.used_vectors = NULL; - cpumask_setall(&desc->affinity); + cpumask_setall(irq_desc_affinity(desc)); spin_unlock_irqrestore(&desc->lock, flags); /* Wait to make sure it's not being used on another CPU */ @@ -211,38 +216,38 @@ static void __clear_irq_vector(int irq) { int cpu, vector, old_vector; cpumask_t tmp_mask; - struct irq_cfg *cfg = irq_cfg(irq); + struct irq_desc *desc = irq_to_desc(irq); - BUG_ON(!cfg->vector); + BUG_ON(!desc->arch.vector); - /* Always clear cfg->vector */ - vector = cfg->vector; - cpumask_and(&tmp_mask, &cfg->cpu_mask, &cpu_online_map); + /* Always clear desc->arch.vector */ + vector = desc->arch.vector; + cpumask_and(&tmp_mask, irq_desc_cpu_mask(desc), &cpu_online_map); for_each_cpu_mask(cpu, tmp_mask) { ASSERT( per_cpu(vector_irq, cpu)[vector] == irq ); per_cpu(vector_irq, cpu)[vector] = -1; } - cfg->vector = IRQ_VECTOR_UNASSIGNED; - cpumask_clear(&cfg->cpu_mask); + desc->arch.vector = IRQ_VECTOR_UNASSIGNED; + cpumask_clear(irq_desc_cpu_mask(desc)); - if ( cfg->used_vectors ) + if ( desc->arch.used_vectors ) { - ASSERT(test_bit(vector, cfg->used_vectors)); - clear_bit(vector, cfg->used_vectors); + ASSERT(test_bit(vector, desc->arch.used_vectors)); + clear_bit(vector, desc->arch.used_vectors); } - cfg->used = IRQ_UNUSED; + desc->arch.used = IRQ_UNUSED; trace_irq_mask(TRC_HW_IRQ_CLEAR_VECTOR, irq, vector, &tmp_mask); - if (likely(!cfg->move_in_progress)) + if ( likely(!desc->arch.move_in_progress) ) return; - /* If we were in motion, also clear cfg->old_vector */ - old_vector = cfg->old_vector; - cpumask_and(&tmp_mask, &cfg->old_cpu_mask, &cpu_online_map); + /* If we were in motion, also clear desc->arch.old_vector */ + old_vector = desc->arch.old_vector; + cpumask_and(&tmp_mask, irq_desc_old_cpu_mask(desc), &cpu_online_map); for_each_cpu_mask(cpu, tmp_mask) { ASSERT( per_cpu(vector_irq, cpu)[old_vector] == irq ); @@ -250,16 +255,16 @@ static void __clear_irq_vector(int irq) per_cpu(vector_irq, cpu)[old_vector] = -1; } - cfg->old_vector = IRQ_VECTOR_UNASSIGNED; - cpumask_clear(&cfg->old_cpu_mask); + desc->arch.old_vector = IRQ_VECTOR_UNASSIGNED; + cpumask_clear(irq_desc_old_cpu_mask(desc)); - if ( cfg->used_vectors ) + if ( desc->arch.used_vectors ) { - ASSERT(test_bit(old_vector, cfg->used_vectors)); - clear_bit(old_vector, cfg->used_vectors); + ASSERT(test_bit(old_vector, desc->arch.used_vectors)); + clear_bit(old_vector, desc->arch.used_vectors); } - cfg->move_in_progress = 0; + desc->arch.move_in_progress = 0; } void clear_irq_vector(int irq) @@ -296,25 +301,14 @@ int irq_to_vector(int irq) return vector; } -static void __init init_one_irq_desc(struct irq_desc *desc) +int arch_init_one_irq_desc(struct irq_desc *desc) { - desc->status = IRQ_DISABLED; - desc->handler = &no_irq_type; - desc->action = NULL; - desc->msi_desc = NULL; - spin_lock_init(&desc->lock); - cpumask_setall(&desc->affinity); - INIT_LIST_HEAD(&desc->rl_link); -} + desc->arch.vector = IRQ_VECTOR_UNASSIGNED; + desc->arch.old_vector = IRQ_VECTOR_UNASSIGNED; + cpumask_clear(irq_desc_cpu_mask(desc)); + cpumask_clear(irq_desc_old_cpu_mask(desc)); -static void __init init_one_irq_cfg(struct irq_cfg *cfg) -{ - cfg->vector = IRQ_VECTOR_UNASSIGNED; - cfg->old_vector = IRQ_VECTOR_UNASSIGNED; - cpumask_clear(&cfg->cpu_mask); - cpumask_clear(&cfg->old_cpu_mask); - cfg->used_vectors = NULL; - cfg->used = IRQ_UNUSED; + return 0; } int __init init_irq_data(void) @@ -331,12 +325,13 @@ int __init init_irq_data(void) if ( !irq_desc || !irq_vector ) return -ENOMEM; - for (irq = 0; irq < nr_irqs; irq++) { + for (irq = 0; irq < nr_irqs_gsi; irq++) { desc = irq_to_desc(irq); desc->irq = irq; init_one_irq_desc(desc); - init_one_irq_cfg(&desc->arch); } + for (; irq < nr_irqs; irq++) + irq_to_desc(irq)->irq = irq; /* Never allocate the hypercall vector or Linux/BSD fast-trap vector. */ set_bit(LEGACY_SYSCALL_VECTOR, used_vectors); @@ -403,7 +398,8 @@ static vmask_t *irq_get_used_vector_mask return ret; } -int __assign_irq_vector(int irq, struct irq_cfg *cfg, const cpumask_t *mask) +static int __assign_irq_vector( + int irq, struct irq_desc *desc, const cpumask_t *mask) { /* * NOTE! The local APIC isn't very good at handling @@ -426,13 +422,13 @@ int __assign_irq_vector(int irq, struct old_vector = irq_to_vector(irq); if (old_vector) { cpumask_and(&tmp_mask, mask, &cpu_online_map); - if (cpumask_intersects(&tmp_mask, &cfg->cpu_mask)) { - cfg->vector = old_vector; + if (cpumask_intersects(&tmp_mask, irq_desc_cpu_mask(desc))) { + desc->arch.vector = old_vector; return 0; } } - if ((cfg->move_in_progress) || cfg->move_cleanup_count) + if ( desc->arch.move_in_progress || desc->arch.move_cleanup_count ) return -EAGAIN; err = -ENOSPC; @@ -440,9 +436,9 @@ int __assign_irq_vector(int irq, struct /* This is the only place normal IRQs are ever marked * as "in use". If they're not in use yet, check to see * if we need to assign a global vector mask. */ - if ( cfg->used == IRQ_USED ) + if ( desc->arch.used == IRQ_USED ) { - irq_used_vectors = cfg->used_vectors; + irq_used_vectors = desc->arch.used_vectors; } else irq_used_vectors = irq_get_used_vector_mask(irq); @@ -485,29 +481,29 @@ next: current_offset = offset; local_irq_save(flags); if (old_vector) { - cfg->move_in_progress = 1; - cpumask_copy(&cfg->old_cpu_mask, &cfg->cpu_mask); - cfg->old_vector = cfg->vector; + desc->arch.move_in_progress = 1; + cpumask_copy(irq_desc_old_cpu_mask(desc), irq_desc_cpu_mask(desc)); + desc->arch.old_vector = desc->arch.vector; } trace_irq_mask(TRC_HW_IRQ_ASSIGN_VECTOR, irq, vector, &tmp_mask); for_each_cpu_mask(new_cpu, tmp_mask) per_cpu(vector_irq, new_cpu)[vector] = irq; - cfg->vector = vector; - cpumask_copy(&cfg->cpu_mask, &tmp_mask); + desc->arch.vector = vector; + cpumask_copy(irq_desc_cpu_mask(desc), &tmp_mask); - cfg->used = IRQ_USED; - ASSERT((cfg->used_vectors == NULL) - || (cfg->used_vectors == irq_used_vectors)); - cfg->used_vectors = irq_used_vectors; + desc->arch.used = IRQ_USED; + ASSERT((desc->arch.used_vectors == NULL) + || (desc->arch.used_vectors == irq_used_vectors)); + desc->arch.used_vectors = irq_used_vectors; if (IO_APIC_IRQ(irq)) irq_vector[irq] = vector; - if ( cfg->used_vectors ) + if ( desc->arch.used_vectors ) { - ASSERT(!test_bit(vector, cfg->used_vectors)); + ASSERT(!test_bit(vector, desc->arch.used_vectors)); - set_bit(vector, cfg->used_vectors); + set_bit(vector, desc->arch.used_vectors); } err = 0; @@ -521,16 +517,15 @@ int assign_irq_vector(int irq) { int ret; unsigned long flags; - struct irq_cfg *cfg = irq_cfg(irq); struct irq_desc *desc = irq_to_desc(irq); BUG_ON(irq >= nr_irqs || irq <0); spin_lock_irqsave(&vector_lock, flags); - ret = __assign_irq_vector(irq, cfg, TARGET_CPUS); + ret = __assign_irq_vector(irq, desc, TARGET_CPUS); if (!ret) { - ret = cfg->vector; - cpumask_copy(&desc->affinity, &cfg->cpu_mask); + ret = desc->arch.vector; + cpumask_copy(irq_desc_affinity(desc), irq_desc_cpu_mask(desc)); } spin_unlock_irqrestore(&vector_lock, flags); return ret; @@ -543,15 +538,16 @@ int assign_irq_vector(int irq) void __setup_vector_irq(int cpu) { int irq, vector; - struct irq_cfg *cfg; /* Clear vector_irq */ for (vector = 0; vector < NR_VECTORS; ++vector) per_cpu(vector_irq, cpu)[vector] = -1; /* Mark the inuse vectors */ for (irq = 0; irq < nr_irqs; ++irq) { - cfg = irq_cfg(irq); - if (!cpu_isset(cpu, cfg->cpu_mask)) + struct irq_desc *desc = irq_to_desc(irq); + + if (!irq_desc_initialized(desc) || + !cpumask_test_cpu(cpu, irq_desc_cpu_mask(desc))) continue; vector = irq_to_vector(irq); per_cpu(vector_irq, cpu)[vector] = irq; @@ -560,12 +556,14 @@ void __setup_vector_irq(int cpu) void move_masked_irq(struct irq_desc *desc) { + cpumask_t *pending_mask = irq_desc_pending_mask(desc); + if (likely(!(desc->status & IRQ_MOVE_PENDING))) return; desc->status &= ~IRQ_MOVE_PENDING; - if (unlikely(cpus_empty(desc->pending_mask))) + if (unlikely(cpumask_empty(pending_mask))) return; if (!desc->handler->set_affinity) @@ -580,10 +578,10 @@ void move_masked_irq(struct irq_desc *de * * For correct operation this depends on the caller masking the irqs. */ - if (likely(cpus_intersects(desc->pending_mask, cpu_online_map))) - desc->handler->set_affinity(desc, &desc->pending_mask); + if ( likely(cpumask_intersects(pending_mask, &cpu_online_map)) ) + desc->handler->set_affinity(desc, pending_mask); - cpumask_clear(&desc->pending_mask); + cpumask_clear(pending_mask); } void move_native_irq(struct irq_desc *desc) @@ -626,7 +624,8 @@ fastcall void smp_irq_move_cleanup_inter if (!desc->arch.move_cleanup_count) goto unlock; - if (vector == desc->arch.vector && cpumask_test_cpu(me, &desc->arch.cpu_mask)) + if ( vector == desc->arch.vector && + cpumask_test_cpu(me, irq_desc_cpu_mask(desc)) ) goto unlock; irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); @@ -653,7 +652,7 @@ fastcall void smp_irq_move_cleanup_inter if ( desc->arch.move_cleanup_count == 0 ) { desc->arch.old_vector = IRQ_VECTOR_UNASSIGNED; - cpumask_clear(&desc->arch.old_cpu_mask); + cpumask_clear(irq_desc_old_cpu_mask(desc)); if ( desc->arch.used_vectors ) { @@ -673,7 +672,7 @@ static void send_cleanup_vector(struct i { cpumask_t cleanup_mask; - cpumask_and(&cleanup_mask, &desc->arch.old_cpu_mask, &cpu_online_map); + cpumask_and(&cleanup_mask, irq_desc_old_cpu_mask(desc), &cpu_online_map); desc->arch.move_cleanup_count = cpumask_weight(&cleanup_mask); genapic->send_IPI_mask(&cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); @@ -690,7 +689,8 @@ void irq_complete_move(struct irq_desc * vector = get_irq_regs()->entry_vector; me = smp_processor_id(); - if (vector == desc->arch.vector && cpumask_test_cpu(me, &desc->arch.cpu_mask)) + if ( vector == desc->arch.vector && + cpumask_test_cpu(me, irq_desc_cpu_mask(desc)) ) send_cleanup_vector(desc); } @@ -708,15 +708,15 @@ unsigned int set_desc_affinity(struct ir local_irq_save(flags); lock_vector_lock(); - ret = __assign_irq_vector(irq, &desc->arch, mask); + ret = __assign_irq_vector(irq, desc, mask); unlock_vector_lock(); local_irq_restore(flags); if (ret < 0) return BAD_APICID; - cpumask_copy(&desc->affinity, mask); - cpumask_and(&dest_mask, mask, &desc->arch.cpu_mask); + cpumask_copy(irq_desc_affinity(desc), mask); + cpumask_and(&dest_mask, mask, irq_desc_cpu_mask(desc)); return cpu_mask_to_apicid(&dest_mask); } @@ -730,7 +730,7 @@ void irq_set_affinity(struct irq_desc *d ASSERT(spin_is_locked(&desc->lock)); desc->status &= ~IRQ_MOVE_PENDING; wmb(); - cpumask_copy(&desc->pending_mask, mask); + cpumask_copy(irq_desc_pending_mask(desc), mask); wmb(); desc->status |= IRQ_MOVE_PENDING; } @@ -1992,13 +1992,13 @@ static void dump_irqs(unsigned char key) desc = irq_to_desc(irq); - if ( !desc->handler || desc->handler == &no_irq_type ) + if ( !irq_desc_initialized(desc) || desc->handler == &no_irq_type ) continue; spin_lock_irqsave(&desc->lock, flags); cpumask_scnprintf(keyhandler_scratch, sizeof(keyhandler_scratch), - &desc->affinity); + irq_desc_affinity(desc)); printk(" IRQ:%4d affinity:%s vec:%02x type=%-15s" " status=%08x ", irq, keyhandler_scratch, desc->arch.vector, @@ -2073,10 +2073,12 @@ void fixup_irqs(void) continue; desc = irq_to_desc(irq); + if ( !irq_desc_initialized(desc) ) + continue; spin_lock(&desc->lock); - cpumask_copy(&affinity, &desc->affinity); + cpumask_copy(&affinity, irq_desc_affinity(desc)); if ( !desc->action || cpumask_subset(&affinity, &cpu_online_map) ) { spin_unlock(&desc->lock); --- 2011-10-18.orig/xen/arch/x86/msi.c 2011-10-18 11:14:29.000000000 +0200 +++ 2011-10-18/xen/arch/x86/msi.c 2011-10-18 15:30:47.000000000 +0200 @@ -125,13 +125,13 @@ void msi_compose_msg(struct irq_desc *de unsigned dest; int vector = desc->arch.vector; - if ( cpumask_empty(&desc->arch.cpu_mask) ) { + if ( cpumask_empty(irq_desc_cpu_mask(desc)) ) { dprintk(XENLOG_ERR,"%s, compose msi message error!!\n", __func__); return; } if ( vector ) { - dest = cpu_mask_to_apicid(&desc->arch.cpu_mask); + dest = cpu_mask_to_apicid(irq_desc_cpu_mask(desc)); msg->address_hi = MSI_ADDR_BASE_HI; msg->address_lo = --- 2011-10-18.orig/xen/arch/x86/smpboot.c 2011-10-14 16:05:11.000000000 +0200 +++ 2011-10-18/xen/arch/x86/smpboot.c 2011-10-18 15:41:44.000000000 +0200 @@ -1011,7 +1011,7 @@ void __init smp_intr_init(void) irq_vector[irq] = FIRST_HIPRIORITY_VECTOR + seridx + 1; per_cpu(vector_irq, cpu)[FIRST_HIPRIORITY_VECTOR + seridx + 1] = irq; irq_to_desc(irq)->arch.vector = FIRST_HIPRIORITY_VECTOR + seridx + 1; - cpumask_copy(&irq_to_desc(irq)->arch.cpu_mask, &cpu_online_map); + cpumask_copy(irq_desc_cpu_mask(irq_to_desc(irq)), &cpu_online_map); } /* IPI for cleanuping vectors after irq move */ --- 2011-10-18.orig/xen/common/Makefile 2011-10-20 14:46:19.000000000 +0200 +++ 2011-10-18/xen/common/Makefile 2011-10-18 15:43:44.000000000 +0200 @@ -5,6 +5,7 @@ obj-y += domctl.o obj-y += domain.o obj-y += event_channel.o obj-y += grant_table.o +obj-y += irq.o obj-y += kernel.o obj-y += keyhandler.o obj-y += kexec.o --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ 2011-10-18/xen/common/irq.c 2011-10-18 17:29:12.000000000 +0200 @@ -0,0 +1,18 @@ +#include +#include + +int init_one_irq_desc(struct irq_desc *desc) +{ + if (irq_desc_initialized(desc)) + return 0; + + desc->status = IRQ_DISABLED; + desc->handler = &no_irq_type; + spin_lock_init(&desc->lock); + if ( !alloc_cpumask_array(desc->cpumasks) ) + return -ENOMEM; + cpumask_setall(irq_desc_affinity(desc)); + INIT_LIST_HEAD(&desc->rl_link); + + return arch_init_one_irq_desc(desc); +} --- 2011-10-18.orig/xen/drivers/passthrough/vtd/iommu.c 2011-10-18 11:18:48.000000000 +0200 +++ 2011-10-18/xen/drivers/passthrough/vtd/iommu.c 2011-10-18 15:29:15.000000000 +0200 @@ -1965,17 +1965,18 @@ static int init_vtd_hw(void) struct iommu_flush *flush = NULL; int ret; unsigned long flags; - struct irq_cfg *cfg; /* * Basic VT-d HW init: set VT-d interrupt, clear VT-d faults. */ for_each_drhd_unit ( drhd ) { + struct irq_desc *desc; + iommu = drhd->iommu; - cfg = irq_cfg(iommu->irq); - dma_msi_set_affinity(irq_to_desc(iommu->irq), &cfg->cpu_mask); + desc = irq_to_desc(iommu->irq); + dma_msi_set_affinity(desc, irq_desc_cpu_mask(desc)); clear_fault_bits(iommu); --- 2011-10-18.orig/xen/include/asm-ia64/linux-xen/asm/irq.h 2011-10-18 11:01:50.000000000 +0200 +++ 2011-10-18/xen/include/asm-ia64/linux-xen/asm/irq.h 2011-10-18 18:20:06.000000000 +0200 @@ -18,8 +18,12 @@ struct irq_cfg { #define arch_irq_desc irq_cfg int vector; - cpumask_t cpu_mask; }; + +#define ARCH_IRQ_DESC_NR_CPUMASKS 1 +#define irq_desc_cpu_mask(desc) arch_irq_desc_cpumask(desc, 0) + +int init_irq_data(void); #endif static __inline__ int --- 2011-10-18.orig/xen/include/asm-x86/irq.h 2011-10-18 11:01:23.000000000 +0200 +++ 2011-10-18/xen/include/asm-x86/irq.h 2011-10-18 15:38:38.000000000 +0200 @@ -33,14 +33,17 @@ struct irq_cfg { #define arch_irq_desc irq_cfg s16 vector; /* vector itself is only 8 bits, */ s16 old_vector; /* but we use -1 for unassigned */ - cpumask_t cpu_mask; - cpumask_t old_cpu_mask; unsigned move_cleanup_count; vmask_t *used_vectors; u8 move_in_progress : 1; u8 used: 1; }; +#define ARCH_IRQ_DESC_NR_CPUMASKS 3 +#define irq_desc_cpu_mask(desc) arch_irq_desc_cpumask(desc, 0) +#define irq_desc_old_cpu_mask(desc) arch_irq_desc_cpumask(desc, 1) +#define irq_desc_pending_mask(desc) arch_irq_desc_cpumask(desc, 2) + /* For use with irq_cfg.used */ #define IRQ_UNUSED (0) #define IRQ_USED (1) @@ -174,8 +177,6 @@ void __setup_vector_irq(int cpu); void move_native_irq(struct irq_desc *); void move_masked_irq(struct irq_desc *); -int __assign_irq_vector(int irq, struct irq_cfg *, const cpumask_t *); - int bind_irq_vector(int irq, int vector, const cpumask_t *); void irq_set_affinity(struct irq_desc *, const cpumask_t *mask); --- 2011-10-18.orig/xen/include/xen/cpumask.h 2011-10-19 17:30:43.000000000 +0200 +++ 2011-10-18/xen/include/xen/cpumask.h 2011-10-19 17:30:48.000000000 +0200 @@ -369,6 +369,36 @@ static inline void free_cpumask_var(cpum { xfree(mask); } + +typedef cpumask_t (*cpumask_array_t)[]; +#define DECLARE_CPUMASK_ARRAY(name, dim) cpumask_t (*name)[dim] +#define CPUMASK_ARRAY_LONGS (nr_cpumask_bits / BITS_PER_LONG) +#define cpumask_array_index(arr, n) \ + container_of((*(arr))->bits + (n) * CPUMASK_ARRAY_LONGS, \ + typeof(**(arr)), bits[0]) + +#define alloc_cpumask_array(m) __alloc_cpumask_array(&(m), ARRAY_SIZE(*(m))) +static inline bool_t __alloc_cpumask_array(cpumask_array_t *mask, + unsigned int dim) +{ + *(void **)mask = xmalloc_array(unsigned long, + dim * CPUMASK_ARRAY_LONGS); + return *mask != NULL; +} + +#define zalloc_cpumask_array(m) __zalloc_cpumask_array(&(m), ARRAY_SIZE(*(m))) +static inline bool_t __zalloc_cpumask_array(cpumask_array_t *mask, + unsigned int dim) +{ + *(void **)mask = xzalloc_array(unsigned long, + dim * CPUMASK_ARRAY_LONGS); + return *mask != NULL; +} + +static inline void free_cpumask_array(cpumask_array_t mask) +{ + xfree(mask); +} #else typedef cpumask_t cpumask_var_t[1]; @@ -386,6 +416,32 @@ static inline bool_t zalloc_cpumask_var( static inline void free_cpumask_var(cpumask_var_t mask) { } + +typedef cpumask_t cpumask_array_t[]; +#define DECLARE_CPUMASK_ARRAY(name, dim) cpumask_t name[dim] +#define cpumask_array_index(arr, n) (&(arr)[n]) + +#define alloc_cpumask_array(m) __alloc_cpumask_array(&(m), ARRAY_SIZE(m)) +static inline bool_t __alloc_cpumask_array(cpumask_array_t *mask, + unsigned int dim) +{ + return 1; +} + +#define zalloc_cpumask_array(m) __zalloc_cpumask_array(&(m), ARRAY_SIZE(m)) +static inline bool_t __zalloc_cpumask_array(cpumask_array_t *mask, + unsigned int dim) +{ + cpumask_t *m; + + for (m = *mask; dim--; ++m) + cpumask_clear(m); + return 1; +} + +static inline void free_cpumask_array(cpumask_array_t mask) +{ +} #endif #if NR_CPUS > 1 --- 2011-10-18.orig/xen/include/xen/irq.h 2011-10-18 13:37:04.000000000 +0200 +++ 2011-10-18/xen/include/xen/irq.h 2011-10-18 17:29:41.000000000 +0200 @@ -63,6 +63,17 @@ extern unsigned int nr_irqs; #endif struct msi_desc; + +#define ARCH_IRQ_DESC_FIRST_CPUMASK 1 +#ifndef ARCH_IRQ_DESC_NR_CPUMASKS +#define ARCH_IRQ_DESC_NR_CPUMASKS 0 +#endif + +#define irq_desc_cpumask(desc, idx) cpumask_array_index((desc)->cpumasks, idx) +#define irq_desc_affinity(desc) irq_desc_cpumask(desc, 0) +#define arch_irq_desc_cpumask(desc, idx) \ + irq_desc_cpumask(desc, ARCH_IRQ_DESC_FIRST_CPUMASK + (idx)) + /* * This is the "IRQ descriptor", which contains various information * about the irq, including what kind of hardware handling it has, @@ -76,8 +87,8 @@ typedef struct irq_desc { int irq; spinlock_t lock; struct arch_irq_desc arch; - cpumask_t affinity; - cpumask_t pending_mask; /* IRQ migration pending mask */ + DECLARE_CPUMASK_ARRAY(cpumasks, + ARCH_IRQ_DESC_FIRST_CPUMASK + ARCH_IRQ_DESC_NR_CPUMASKS); /* irq ratelimit */ s_time_t rl_quantum_start; @@ -85,6 +96,11 @@ typedef struct irq_desc { struct list_head rl_link; } __cacheline_aligned irq_desc_t; +int init_one_irq_desc(struct irq_desc *); +int arch_init_one_irq_desc(struct irq_desc *); + +#define irq_desc_initialized(desc) ((desc)->handler != NULL) + #if defined(__ia64__) extern irq_desc_t irq_desc[NR_VECTORS]; @@ -153,7 +169,7 @@ extern irq_desc_t *pirq_spin_lock_irq_de static inline void set_native_irq_info(unsigned int irq, const cpumask_t *mask) { - cpumask_copy(&irq_desc[irq].affinity, mask); + cpumask_copy(irq_desc_affinity(irq_to_desc(irq)), mask); } unsigned int set_desc_affinity(struct irq_desc *, const cpumask_t *);