[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v2] x86/passthrough: fix migration of MSI when using posted interrupts



Hi Roger & Jan,

I got my test env back, and back the patch to stable-4.12, run same
test, I still seen original issue, guest kernel printed error:

 kernel:do_IRQ: 20.114 No irq handler for vector (irq -1)

After that, pass through infiniband VF stopped to work.

My patch as below, please check:

diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c
index a1a43cd792..2d175d2a00 100644
--- a/xen/arch/x86/hvm/vlapic.c
+++ b/xen/arch/x86/hvm/vlapic.c
@@ -111,6 +111,12 @@ static void vlapic_clear_irr(int vector, struct vlapic 
*vlapic)
     vlapic_clear_vector(vector, &vlapic->regs->data[APIC_IRR]);
 }
 
+void vlapic_sync_pir_to_irr(struct vcpu *v)
+{
+    if ( hvm_funcs.sync_pir_to_irr )
+        hvm_funcs.sync_pir_to_irr(v);
+}
+
 static int vlapic_find_highest_irr(struct vlapic *vlapic)
 {
     if ( hvm_funcs.sync_pir_to_irr )
diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
index 4290c7c710..b628adea4c 100644
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -341,7 +341,7 @@ int pt_irq_create_bind(
     {
         uint8_t dest, delivery_mode;
         bool dest_mode;
-        int dest_vcpu_id;
+        int dest_vcpu_id, prev_vcpu_id = -1;
         const struct vcpu *vcpu;
         uint32_t gflags = pt_irq_bind->u.msi.gflags &
                           ~XEN_DOMCTL_VMSI_X86_UNMASKED;
@@ -411,6 +411,7 @@ int pt_irq_create_bind(
 
                 pirq_dpci->gmsi.gvec = pt_irq_bind->u.msi.gvec;
                 pirq_dpci->gmsi.gflags = gflags;
+                prev_vcpu_id = pirq_dpci->gmsi.dest_vcpu_id;
             }
         }
         /* Calculate dest_vcpu_id for MSI-type pirq migration. */
@@ -432,7 +433,10 @@ int pt_irq_create_bind(
                 vcpu = vector_hashing_dest(d, dest, dest_mode,
                                            pirq_dpci->gmsi.gvec);
             if ( vcpu )
+            {
                 pirq_dpci->gmsi.posted = true;
+                pirq_dpci->gmsi.dest_vcpu_id = vcpu->vcpu_id;
+            }
         }
         if ( vcpu && iommu_enabled )
             hvm_migrate_pirq(pirq_dpci, vcpu);
@@ -440,7 +444,8 @@ int pt_irq_create_bind(
         /* Use interrupt posting if it is supported. */
         if ( iommu_intpost )
             pi_update_irte(vcpu ? &vcpu->arch.hvm.vmx.pi_desc : NULL,
-                           info, pirq_dpci->gmsi.gvec);
+                           info, pirq_dpci->gmsi.gvec,
+                           prev_vcpu_id >= 0 ? d->vcpu[prev_vcpu_id] : NULL);
 
         if ( pt_irq_bind->u.msi.gflags & XEN_DOMCTL_VMSI_X86_UNMASKED )
         {
@@ -729,7 +734,9 @@ int pt_irq_destroy_bind(
             what = "bogus";
     }
     else if ( pirq_dpci && pirq_dpci->gmsi.posted )
-        pi_update_irte(NULL, pirq, 0);
+        pi_update_irte(NULL, pirq, 0,
+                       pirq_dpci->gmsi.dest_vcpu_id >= 0
+                       ? d->vcpu[pirq_dpci->gmsi.dest_vcpu_id] : NULL);
 
     if ( pirq_dpci && (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) &&
          list_empty(&pirq_dpci->digl_list) )
diff --git a/xen/drivers/passthrough/vtd/intremap.c 
b/xen/drivers/passthrough/vtd/intremap.c
index c9927e4706..d788a4b9e7 100644
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -961,12 +961,13 @@ void iommu_disable_x2apic_IR(void)
         disable_qinval(drhd->iommu);
 }
 
+#ifdef CONFIG_HVM
 /*
  * This function is used to update the IRTE for posted-interrupt
  * when guest changes MSI/MSI-X information.
  */
 int pi_update_irte(const struct pi_desc *pi_desc, const struct pirq *pirq,
-    const uint8_t gvec)
+    const uint8_t gvec, struct vcpu *prev)
 {
     struct irq_desc *desc;
     struct msi_desc *msi_desc;
@@ -979,8 +980,8 @@ int pi_update_irte(const struct pi_desc *pi_desc, const 
struct pirq *pirq,
     msi_desc = desc->msi_desc;
     if ( !msi_desc )
     {
-        rc = -ENODEV;
-        goto unlock_out;
+        spin_unlock_irq(&desc->lock);
+        return -ENODEV;
     }
     msi_desc->pi_desc = pi_desc;
     msi_desc->gvec = gvec;
@@ -989,10 +990,10 @@ int pi_update_irte(const struct pi_desc *pi_desc, const 
struct pirq *pirq,
 
     ASSERT(pcidevs_locked());
 
-    return msi_msg_write_remap_rte(msi_desc, &msi_desc->msg);
-
- unlock_out:
-    spin_unlock_irq(&desc->lock);
+    rc = msi_msg_write_remap_rte(msi_desc, &msi_desc->msg);
+    if ( !rc && prev )
+         vlapic_sync_pir_to_irr(prev);
 
     return rc;
 }
+#endif
diff --git a/xen/include/asm-x86/hvm/vlapic.h b/xen/include/asm-x86/hvm/vlapic.h
index dde66b4f0f..b0017d1dae 100644
--- a/xen/include/asm-x86/hvm/vlapic.h
+++ b/xen/include/asm-x86/hvm/vlapic.h
@@ -150,4 +150,6 @@ bool_t vlapic_match_dest(
     const struct vlapic *target, const struct vlapic *source,
     int short_hand, uint32_t dest, bool_t dest_mode);
 
+void vlapic_sync_pir_to_irr(struct vcpu *v);
+
 #endif /* __ASM_X86_HVM_VLAPIC_H__ */
diff --git a/xen/include/asm-x86/iommu.h b/xen/include/asm-x86/iommu.h
index 8dc392473d..32bfa23648 100644
--- a/xen/include/asm-x86/iommu.h
+++ b/xen/include/asm-x86/iommu.h
@@ -99,7 +99,7 @@ void iommu_disable_x2apic_IR(void);
 extern bool untrusted_msi;
 
 int pi_update_irte(const struct pi_desc *pi_desc, const struct pirq *pirq,
-                   const uint8_t gvec);
+                   const uint8_t gvec, struct vcpu *prev);
 
 #endif /* !__ARCH_X86_IOMMU_H__ */
 /*

Thanks,
Joe

On 10/9/19 5:52 AM, Roger Pau Monne wrote:
> When using posted interrupts and the guest migrates MSI from vCPUs Xen
> needs to flush any pending PIRR vectors on the previous vCPU, or else
> those vectors could get wrongly injected at a later point when the MSI
> fields are already updated.
> 
> Rename sync_pir_to_irr to vlapic_sync_pir_to_irr and export it so it
> can be called when updating the posted interrupt descriptor field in
> pi_update_irte. While there also remove the unlock_out from
> pi_update_irte, it's used in a single goto and removing it makes the
> function smaller.
> 
> Note that PIRR is synced to IRR both in pt_irq_destroy_bind and
> pt_irq_create_bind when the interrupt delivery data is being updated.
> 
> Also store the vCPU ID in multi-destination mode when using posted
> interrupts and the interrupt is bound to a single vCPU in order for
> posted interrupts to be used.
> 
> While there guard pi_update_irte with CONFIG_HVM since it's only used
> with HVM guests.
> 
> Reported-by: Joe Jin <joe.jin@xxxxxxxxxx>
> Signed-off-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
> ---
> Cc: Joe Jin <joe.jin@xxxxxxxxxx>
> Cc: Juergen Gross <jgross@xxxxxxxx>
> ---
> I would like to see a bug fix for this issue in 4.13. The fix here only
> affects posted interrupts, hence I think the risk of breaking anything
> else is low.
> ---
> Changes since v1:
>  - Store the vcpu id also in multi-dest mode if the interrupt is bound
>    to a vcpu for posted delivery.
>  - s/#if/#ifdef/.
> ---
>  xen/arch/x86/hvm/vlapic.c              |  6 +++---
>  xen/drivers/passthrough/io.c           | 13 ++++++++++---
>  xen/drivers/passthrough/vtd/intremap.c | 15 ++++++++-------
>  xen/include/asm-x86/hvm/vlapic.h       |  2 ++
>  xen/include/asm-x86/iommu.h            |  2 +-
>  5 files changed, 24 insertions(+), 14 deletions(-)
> 
> diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c
> index 9466258d6f..d255ad8db7 100644
> --- a/xen/arch/x86/hvm/vlapic.c
> +++ b/xen/arch/x86/hvm/vlapic.c
> @@ -106,7 +106,7 @@ static void vlapic_clear_irr(int vector, struct vlapic 
> *vlapic)
>      vlapic_clear_vector(vector, &vlapic->regs->data[APIC_IRR]);
>  }
>  
> -static void sync_pir_to_irr(struct vcpu *v)
> +void vlapic_sync_pir_to_irr(struct vcpu *v)
>  {
>      if ( hvm_funcs.sync_pir_to_irr )
>          alternative_vcall(hvm_funcs.sync_pir_to_irr, v);
> @@ -114,7 +114,7 @@ static void sync_pir_to_irr(struct vcpu *v)
>  
>  static int vlapic_find_highest_irr(struct vlapic *vlapic)
>  {
> -    sync_pir_to_irr(vlapic_vcpu(vlapic));
> +    vlapic_sync_pir_to_irr(vlapic_vcpu(vlapic));
>  
>      return vlapic_find_highest_vector(&vlapic->regs->data[APIC_IRR]);
>  }
> @@ -1493,7 +1493,7 @@ static int lapic_save_regs(struct vcpu *v, 
> hvm_domain_context_t *h)
>      if ( !has_vlapic(v->domain) )
>          return 0;
>  
> -    sync_pir_to_irr(v);
> +    vlapic_sync_pir_to_irr(v);
>  
>      return hvm_save_entry(LAPIC_REGS, v->vcpu_id, h, vcpu_vlapic(v)->regs);
>  }
> diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
> index b292e79382..5bf1877726 100644
> --- a/xen/drivers/passthrough/io.c
> +++ b/xen/drivers/passthrough/io.c
> @@ -341,7 +341,7 @@ int pt_irq_create_bind(
>      {
>          uint8_t dest, delivery_mode;
>          bool dest_mode;
> -        int dest_vcpu_id;
> +        int dest_vcpu_id, prev_vcpu_id = -1;
>          const struct vcpu *vcpu;
>          uint32_t gflags = pt_irq_bind->u.msi.gflags &
>                            ~XEN_DOMCTL_VMSI_X86_UNMASKED;
> @@ -411,6 +411,7 @@ int pt_irq_create_bind(
>  
>                  pirq_dpci->gmsi.gvec = pt_irq_bind->u.msi.gvec;
>                  pirq_dpci->gmsi.gflags = gflags;
> +                prev_vcpu_id = pirq_dpci->gmsi.dest_vcpu_id;
>              }
>          }
>          /* Calculate dest_vcpu_id for MSI-type pirq migration. */
> @@ -432,7 +433,10 @@ int pt_irq_create_bind(
>                  vcpu = vector_hashing_dest(d, dest, dest_mode,
>                                             pirq_dpci->gmsi.gvec);
>              if ( vcpu )
> +            {
>                  pirq_dpci->gmsi.posted = true;
> +                pirq_dpci->gmsi.dest_vcpu_id = vcpu->vcpu_id;
> +            }
>          }
>          if ( vcpu && is_iommu_enabled(d) )
>              hvm_migrate_pirq(pirq_dpci, vcpu);
> @@ -440,7 +444,8 @@ int pt_irq_create_bind(
>          /* Use interrupt posting if it is supported. */
>          if ( iommu_intpost )
>              pi_update_irte(vcpu ? &vcpu->arch.hvm.vmx.pi_desc : NULL,
> -                           info, pirq_dpci->gmsi.gvec);
> +                           info, pirq_dpci->gmsi.gvec,
> +                           prev_vcpu_id >= 0 ? d->vcpu[prev_vcpu_id] : NULL);
>  
>          if ( pt_irq_bind->u.msi.gflags & XEN_DOMCTL_VMSI_X86_UNMASKED )
>          {
> @@ -729,7 +734,9 @@ int pt_irq_destroy_bind(
>              what = "bogus";
>      }
>      else if ( pirq_dpci && pirq_dpci->gmsi.posted )
> -        pi_update_irte(NULL, pirq, 0);
> +        pi_update_irte(NULL, pirq, 0,
> +                       pirq_dpci->gmsi.dest_vcpu_id >= 0
> +                       ? d->vcpu[pirq_dpci->gmsi.dest_vcpu_id] : NULL);
>  
>      if ( pirq_dpci && (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) &&
>           list_empty(&pirq_dpci->digl_list) )
> diff --git a/xen/drivers/passthrough/vtd/intremap.c 
> b/xen/drivers/passthrough/vtd/intremap.c
> index bf846195c4..07c1c1627a 100644
> --- a/xen/drivers/passthrough/vtd/intremap.c
> +++ b/xen/drivers/passthrough/vtd/intremap.c
> @@ -946,12 +946,13 @@ void intel_iommu_disable_eim(void)
>          disable_qinval(drhd->iommu);
>  }
>  
> +#ifdef CONFIG_HVM
>  /*
>   * This function is used to update the IRTE for posted-interrupt
>   * when guest changes MSI/MSI-X information.
>   */
>  int pi_update_irte(const struct pi_desc *pi_desc, const struct pirq *pirq,
> -    const uint8_t gvec)
> +    const uint8_t gvec, struct vcpu *prev)
>  {
>      struct irq_desc *desc;
>      struct msi_desc *msi_desc;
> @@ -964,8 +965,8 @@ int pi_update_irte(const struct pi_desc *pi_desc, const 
> struct pirq *pirq,
>      msi_desc = desc->msi_desc;
>      if ( !msi_desc )
>      {
> -        rc = -ENODEV;
> -        goto unlock_out;
> +        spin_unlock_irq(&desc->lock);
> +        return -ENODEV;
>      }
>      msi_desc->pi_desc = pi_desc;
>      msi_desc->gvec = gvec;
> @@ -974,10 +975,10 @@ int pi_update_irte(const struct pi_desc *pi_desc, const 
> struct pirq *pirq,
>  
>      ASSERT(pcidevs_locked());
>  
> -    return msi_msg_write_remap_rte(msi_desc, &msi_desc->msg);
> -
> - unlock_out:
> -    spin_unlock_irq(&desc->lock);
> +    rc = msi_msg_write_remap_rte(msi_desc, &msi_desc->msg);
> +    if ( !rc && prev )
> +         vlapic_sync_pir_to_irr(prev);
>  
>      return rc;
>  }
> +#endif
> diff --git a/xen/include/asm-x86/hvm/vlapic.h 
> b/xen/include/asm-x86/hvm/vlapic.h
> index dde66b4f0f..b0017d1dae 100644
> --- a/xen/include/asm-x86/hvm/vlapic.h
> +++ b/xen/include/asm-x86/hvm/vlapic.h
> @@ -150,4 +150,6 @@ bool_t vlapic_match_dest(
>      const struct vlapic *target, const struct vlapic *source,
>      int short_hand, uint32_t dest, bool_t dest_mode);
>  
> +void vlapic_sync_pir_to_irr(struct vcpu *v);
> +
>  #endif /* __ASM_X86_HVM_VLAPIC_H__ */
> diff --git a/xen/include/asm-x86/iommu.h b/xen/include/asm-x86/iommu.h
> index 85741f7c96..314dcfbe47 100644
> --- a/xen/include/asm-x86/iommu.h
> +++ b/xen/include/asm-x86/iommu.h
> @@ -119,7 +119,7 @@ static inline void iommu_disable_x2apic(void)
>  extern bool untrusted_msi;
>  
>  int pi_update_irte(const struct pi_desc *pi_desc, const struct pirq *pirq,
> -                   const uint8_t gvec);
> +                   const uint8_t gvec, struct vcpu *prev);
>  
>  #endif /* !__ARCH_X86_IOMMU_H__ */
>  /*
> 


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.