[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v4 05/13] x86/IRQ: fix locking around vector management


  • To: "xen-devel@xxxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxxx>
  • From: Jan Beulich <JBeulich@xxxxxxxx>
  • Date: Tue, 16 Jul 2019 07:39:58 +0000
  • Accept-language: en-US
  • Arc-authentication-results: i=1; mx.microsoft.com 1;spf=pass smtp.mailfrom=suse.com;dmarc=pass action=none header.from=suse.com;dkim=pass header.d=suse.com;arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck; bh=8OSDl/1ltzuH4FKjkG4mGUSd0Iy2sK7prqgf1+74uNU=; b=Xan1cPO/wb/RzX4TgJrKmdDZ/oBhTFwY/sshi0zUCXCwLxX69FfmtgkZYGvW1Ltq+ErAzBjtGnBrRwWz4005IUXjL+TAMToocx+KPjazDvFfqgl1VAQTTt0zV86dvrc/Rp9hIzBXLG4HK4OjEvgC0ShQgOVjaq8ornQ4UUn9oQGrWDIhc9IP+KHK5UY5JkGCJxM16SGU5AcA7o3RtySLRWccm1t4jdY8HO51Pyl6CzfV5P+tP8uRPF00lKy+ueeFwwKEWAogbYgt9l4UsqFyx1gA4CoycEnpo3Vev1yonK7ihsELOY2RVYipD1JEHusUCE2mDtYJR8H1nwhTx65YKw==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=ehKHYUWMRUvAe+2jkxB2SiZhFGpBazz++hNcM3qAbe/RADrLOpyt4OkKciEj5ZVMNYxxX15vskHh0/xfQHCF1XuHykfIry8sZC/dDmqlatlzI17i8bRj5XttDebjHWbKljmNV18rTMXO4ZHWIvsotB54eEozPwAR9/jD1GuRrv+PY9H0Ra4T5CaBTr32U9nVAVUDYb9r6mhJbVdBmyzZtNZSnnLlgoZdgMBI0G/vofK0Fwus56r3HvgcBszE2JmkcCyTqELMY+Qt0fVT1XTrv2Dbk2XsgY9nFtH1V2Q5KWPu/kSDXDoXhC+8nKgXt0CtMHahkzOmlRrCgyayR8GZ5Q==
  • Authentication-results: spf=none (sender IP is ) smtp.mailfrom=JBeulich@xxxxxxxx;
  • Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Wei Liu <wl@xxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>
  • Delivery-date: Tue, 16 Jul 2019 07:47:59 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>
  • Thread-index: AQHVO6mrVR2llFRiTk2C0NteVXbnZA==
  • Thread-topic: [PATCH v4 05/13] x86/IRQ: fix locking around vector management

All of __{assign,bind,clear}_irq_vector() manipulate struct irq_desc
fields, and hence ought to be called with the descriptor lock held in
addition to vector_lock. This is currently the case for only
set_desc_affinity() (in the common case) and destroy_irq(), which also
clarifies what the nesting behavior between the locks has to be.
Reflect the new expectation by having these functions all take a
descriptor as parameter instead of an interrupt number.

Also take care of the two special cases of calls to set_desc_affinity():
set_ioapic_affinity_irq() and VT-d's dma_msi_set_affinity() get called
directly as well, and in these cases the descriptor locks hadn't got
acquired till now. For set_ioapic_affinity_irq() this means acquiring /
releasing of the IO-APIC lock can be plain spin_{,un}lock() then.

Drop one of the two leading underscores from all three functions at
the same time.

There's one case left where descriptors get manipulated with just
vector_lock held: setup_vector_irq() assumes its caller to acquire
vector_lock, and hence can't itself acquire the descriptor locks (wrong
lock order). I don't currently see how to address this.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Reviewed-by: Kevin Tian <kevin.tian@xxxxxxxxx> [VT-d]
Reviewed-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
Acked-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
v4: Adjust comment ahead of setup_vector_irq().
v3: Also drop one leading underscore from a comment. Re-base.
v2: Also adjust set_ioapic_affinity_irq() and VT-d's
     dma_msi_set_affinity().

--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -550,14 +550,14 @@ static void clear_IO_APIC (void)
  static void
  set_ioapic_affinity_irq(struct irq_desc *desc, const cpumask_t *mask)
  {
-    unsigned long flags;
      unsigned int dest;
      int pin, irq;
      struct irq_pin_list *entry;
  
      irq = desc->irq;
  
-    spin_lock_irqsave(&ioapic_lock, flags);
+    spin_lock(&ioapic_lock);
+
      dest = set_desc_affinity(desc, mask);
      if (dest != BAD_APICID) {
          if ( !x2apic_enabled )
@@ -580,8 +580,8 @@ set_ioapic_affinity_irq(struct irq_desc
              entry = irq_2_pin + entry->next;
          }
      }
-    spin_unlock_irqrestore(&ioapic_lock, flags);
  
+    spin_unlock(&ioapic_lock);
  }
  
  /*
@@ -674,16 +674,19 @@ void /*__init*/ setup_ioapic_dest(void)
      for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
          for (pin = 0; pin < nr_ioapic_entries[ioapic]; pin++) {
              struct irq_desc *desc;
+            unsigned long flags;
  
              irq_entry = find_irq_entry(ioapic, pin, mp_INT);
              if (irq_entry == -1)
                  continue;
              irq = pin_2_irq(irq_entry, ioapic, pin);
              desc = irq_to_desc(irq);
+
+            spin_lock_irqsave(&desc->lock, flags);
              BUG_ON(!cpumask_intersects(desc->arch.cpu_mask, &cpu_online_map));
              set_ioapic_affinity_irq(desc, desc->arch.cpu_mask);
+            spin_unlock_irqrestore(&desc->lock, flags);
          }
-
      }
  }
  
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -27,6 +27,7 @@
  #include <public/physdev.h>
  
  static int parse_irq_vector_map_param(const char *s);
+static void _clear_irq_vector(struct irq_desc *desc);
  
  /* opt_noirqbalance: If true, software IRQ balancing/affinity is disabled. */
  bool __read_mostly opt_noirqbalance;
@@ -143,13 +144,12 @@ static void trace_irq_mask(uint32_t even
          _trace_irq_mask(event, irq, vector, mask);
  }
  
-static int __init __bind_irq_vector(int irq, int vector, const cpumask_t 
*cpu_mask)
+static int __init _bind_irq_vector(struct irq_desc *desc, int vector,
+                                   const cpumask_t *cpu_mask)
  {
      cpumask_t online_mask;
      int cpu;
-    struct irq_desc *desc = irq_to_desc(irq);
  
-    BUG_ON((unsigned)irq >= nr_irqs);
      BUG_ON((unsigned)vector >= NR_VECTORS);
  
      cpumask_and(&online_mask, cpu_mask, &cpu_online_map);
@@ -160,9 +160,9 @@ static int __init __bind_irq_vector(int
          return 0;
      if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED )
          return -EBUSY;
-    trace_irq_mask(TRC_HW_IRQ_BIND_VECTOR, irq, vector, &online_mask);
+    trace_irq_mask(TRC_HW_IRQ_BIND_VECTOR, desc->irq, vector, &online_mask);
      for_each_cpu(cpu, &online_mask)
-        per_cpu(vector_irq, cpu)[vector] = irq;
+        per_cpu(vector_irq, cpu)[vector] = desc->irq;
      desc->arch.vector = vector;
      cpumask_copy(desc->arch.cpu_mask, &online_mask);
      if ( desc->arch.used_vectors )
@@ -176,12 +176,18 @@ static int __init __bind_irq_vector(int
  
  int __init bind_irq_vector(int irq, int vector, const cpumask_t *cpu_mask)
  {
+    struct irq_desc *desc = irq_to_desc(irq);
      unsigned long flags;
      int ret;
  
-    spin_lock_irqsave(&vector_lock, flags);
-    ret = __bind_irq_vector(irq, vector, cpu_mask);
-    spin_unlock_irqrestore(&vector_lock, flags);
+    BUG_ON((unsigned)irq >= nr_irqs);
+
+    spin_lock_irqsave(&desc->lock, flags);
+    spin_lock(&vector_lock);
+    ret = _bind_irq_vector(desc, vector, cpu_mask);
+    spin_unlock(&vector_lock);
+    spin_unlock_irqrestore(&desc->lock, flags);
+
      return ret;
  }
  
@@ -266,18 +272,20 @@ void destroy_irq(unsigned int irq)
  
      spin_lock_irqsave(&desc->lock, flags);
      desc->handler = &no_irq_type;
-    clear_irq_vector(irq);
+    spin_lock(&vector_lock);
+    _clear_irq_vector(desc);
+    spin_unlock(&vector_lock);
      desc->arch.used_vectors = NULL;
      spin_unlock_irqrestore(&desc->lock, flags);
  
      xfree(action);
  }
  
-static void __clear_irq_vector(int irq)
+static void _clear_irq_vector(struct irq_desc *desc)
  {
-    int cpu, vector, old_vector;
+    unsigned int cpu;
+    int vector, old_vector, irq = desc->irq;
      cpumask_t tmp_mask;
-    struct irq_desc *desc = irq_to_desc(irq);
  
      BUG_ON(!desc->arch.vector);
  
@@ -323,11 +331,14 @@ static void __clear_irq_vector(int irq)
  
  void clear_irq_vector(int irq)
  {
+    struct irq_desc *desc = irq_to_desc(irq);
      unsigned long flags;
  
-    spin_lock_irqsave(&vector_lock, flags);
-    __clear_irq_vector(irq);
-    spin_unlock_irqrestore(&vector_lock, flags);
+    spin_lock_irqsave(&desc->lock, flags);
+    spin_lock(&vector_lock);
+    _clear_irq_vector(desc);
+    spin_unlock(&vector_lock);
+    spin_unlock_irqrestore(&desc->lock, flags);
  }
  
  int irq_to_vector(int irq)
@@ -462,8 +473,7 @@ static vmask_t *irq_get_used_vector_mask
      return ret;
  }
  
-static int __assign_irq_vector(
-    int irq, struct irq_desc *desc, const cpumask_t *mask)
+static int _assign_irq_vector(struct irq_desc *desc, const cpumask_t *mask)
  {
      /*
       * NOTE! The local APIC isn't very good at handling
@@ -477,7 +487,8 @@ static int __assign_irq_vector(
       * 0x80, because int 0x80 is hm, kind of importantish. ;)
       */
      static int current_vector = FIRST_DYNAMIC_VECTOR, current_offset = 0;
-    int cpu, err, old_vector;
+    unsigned int cpu;
+    int err, old_vector, irq = desc->irq;
      vmask_t *irq_used_vectors = NULL;
  
      old_vector = irq_to_vector(irq);
@@ -590,8 +601,12 @@ int assign_irq_vector(int irq, const cpu
      
      BUG_ON(irq >= nr_irqs || irq <0);
  
-    spin_lock_irqsave(&vector_lock, flags);
-    ret = __assign_irq_vector(irq, desc, mask ?: TARGET_CPUS);
+    spin_lock_irqsave(&desc->lock, flags);
+
+    spin_lock(&vector_lock);
+    ret = _assign_irq_vector(desc, mask ?: TARGET_CPUS);
+    spin_unlock(&vector_lock);
+
      if ( !ret )
      {
          ret = desc->arch.vector;
@@ -600,14 +615,16 @@ int assign_irq_vector(int irq, const cpu
          else
              cpumask_setall(desc->affinity);
      }
-    spin_unlock_irqrestore(&vector_lock, flags);
+
+    spin_unlock_irqrestore(&desc->lock, flags);
  
      return ret;
  }
  
  /*
   * Initialize vector_irq on a new cpu. This function must be called
- * with vector_lock held.
+ * with vector_lock held.  For this reason it may not itself acquire
+ * the IRQ descriptor locks, as lock nesting is the other way around.
   */
  void setup_vector_irq(unsigned int cpu)
  {
@@ -775,7 +792,6 @@ void irq_complete_move(struct irq_desc *
  
  unsigned int set_desc_affinity(struct irq_desc *desc, const cpumask_t *mask)
  {
-    unsigned int irq;
      int ret;
      unsigned long flags;
      cpumask_t dest_mask;
@@ -783,10 +799,8 @@ unsigned int set_desc_affinity(struct ir
      if (!cpumask_intersects(mask, &cpu_online_map))
          return BAD_APICID;
  
-    irq = desc->irq;
-
      spin_lock_irqsave(&vector_lock, flags);
-    ret = __assign_irq_vector(irq, desc, mask);
+    ret = _assign_irq_vector(desc, mask);
      spin_unlock_irqrestore(&vector_lock, flags);
  
      if (ret < 0)
@@ -2453,7 +2467,7 @@ void fixup_irqs(const cpumask_t *mask, b
  
          /*
           * In order for the affinity adjustment below to be successful, we
-         * need __assign_irq_vector() to succeed. This in particular means
+         * need _assign_irq_vector() to succeed. This in particular means
           * clearing desc->arch.move_in_progress if this would otherwise
           * prevent the function from succeeding. Since there's no way for the
           * flag to get cleared anymore when there's no possible destination
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -2134,11 +2134,16 @@ static void adjust_irq_affinity(struct a
      unsigned int node = rhsa ? pxm_to_node(rhsa->proximity_domain)
                               : NUMA_NO_NODE;
      const cpumask_t *cpumask = &cpu_online_map;
+    struct irq_desc *desc;
  
      if ( node < MAX_NUMNODES && node_online(node) &&
           cpumask_intersects(&node_to_cpumask(node), cpumask) )
          cpumask = &node_to_cpumask(node);
-    dma_msi_set_affinity(irq_to_desc(drhd->iommu->msi.irq), cpumask);
+
+    desc = irq_to_desc(drhd->iommu->msi.irq);
+    spin_lock_irq(&desc->lock);
+    dma_msi_set_affinity(desc, cpumask);
+    spin_unlock_irq(&desc->lock);
  }
  
  static int adjust_vtd_irq_affinities(void)

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.