[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [RFC PATCH] hvm/vpt: fix inconsistent views of vIOAPIC in vmx_intr_assist()



When injecting periodic timer interrupt in vmx_intr_assist(), multiple read
operation is operated during one event delivery and incur to inconsistent
views of vIOAPIC. For example, if a periodic timer interrupt is from PIT, when
set the corresponding bit in vIRR, the corresponding RTE is accessed in
pt_update_irq(). When this function returns, it accesses the RTE again to get
the vector it set in vIRR.  Between the two accesses, the content of RTE may
have been changed by another CPU for no protection method in use. This case
can incur the assertion failure in vmx_intr_assist().  Also after a periodic
timer interrupt is injected successfully, pt_irq_posted() will decrease the
count (pending_intr_nr). But if the corresponding RTE has been changed,
pt_irq_posted() will not decrease the count, resulting one more timer
interrupt.

More discussion and history can be found in
1.https://lists.xenproject.org/archives/html/xen-devel/2017-03/msg00906.html
2.https://lists.xenproject.org/archives/html/xen-devel/2017-01/msg01019.html

This patch simply uses irq_lock to avoid these inconsistent views of vIOAPIC.
To fix the deadlock, provide locked version of several functions.

Signed-off-by: Chao Gao <chao.gao@xxxxxxxxx>
---
 xen/arch/x86/hvm/irq.c      | 22 ++++++++++++++++------
 xen/arch/x86/hvm/svm/intr.c | 21 +++++++++++++++------
 xen/arch/x86/hvm/vmx/intr.c |  9 +++++++++
 xen/arch/x86/hvm/vpic.c     | 20 ++++++++++++++------
 xen/arch/x86/hvm/vpt.c      |  4 ++--
 xen/include/xen/hvm/irq.h   |  4 ++++
 6 files changed, 60 insertions(+), 20 deletions(-)

diff --git a/xen/arch/x86/hvm/irq.c b/xen/arch/x86/hvm/irq.c
index 8625584..a1af61e 100644
--- a/xen/arch/x86/hvm/irq.c
+++ b/xen/arch/x86/hvm/irq.c
@@ -126,37 +126,47 @@ void hvm_pci_intx_deassert(
     spin_unlock(&d->arch.hvm_domain.irq_lock);
 }
 
-void hvm_isa_irq_assert(
+void hvm_isa_irq_assert_locked(
     struct domain *d, unsigned int isa_irq)
 {
     struct hvm_irq *hvm_irq = hvm_domain_irq(d);
     unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq);
 
     ASSERT(isa_irq <= 15);
-
-    spin_lock(&d->arch.hvm_domain.irq_lock);
+    ASSERT(spin_is_locked(&d->arch.hvm_domain.irq_lock));
 
     if ( !__test_and_set_bit(isa_irq, &hvm_irq->isa_irq.i) &&
          (hvm_irq->gsi_assert_count[gsi]++ == 0) )
         assert_irq(d, gsi, isa_irq);
+}
 
+void hvm_isa_irq_assert(
+    struct domain *d, unsigned int isa_irq)
+{
+    spin_lock(&d->arch.hvm_domain.irq_lock);
+    hvm_isa_irq_assert_locked(d, isa_irq);
     spin_unlock(&d->arch.hvm_domain.irq_lock);
 }
 
-void hvm_isa_irq_deassert(
+void hvm_isa_irq_deassert_locked(
     struct domain *d, unsigned int isa_irq)
 {
     struct hvm_irq *hvm_irq = hvm_domain_irq(d);
     unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq);
 
     ASSERT(isa_irq <= 15);
-
-    spin_lock(&d->arch.hvm_domain.irq_lock);
+    ASSERT(spin_is_locked(&d->arch.hvm_domain.irq_lock));
 
     if ( __test_and_clear_bit(isa_irq, &hvm_irq->isa_irq.i) &&
          (--hvm_irq->gsi_assert_count[gsi] == 0) )
         deassert_irq(d, isa_irq);
+}
 
+void hvm_isa_irq_deassert(
+    struct domain *d, unsigned int isa_irq)
+{
+    spin_lock(&d->arch.hvm_domain.irq_lock);
+    hvm_isa_irq_deassert_locked(d, isa_irq);
     spin_unlock(&d->arch.hvm_domain.irq_lock);
 }
 
diff --git a/xen/arch/x86/hvm/svm/intr.c b/xen/arch/x86/hvm/svm/intr.c
index 8511ff0..d2a318c 100644
--- a/xen/arch/x86/hvm/svm/intr.c
+++ b/xen/arch/x86/hvm/svm/intr.c
@@ -137,18 +137,25 @@ void svm_intr_assist(void)
     struct hvm_intack intack;
     enum hvm_intblk intblk;
 
+    /*
+     * Avoid the vIOAPIC RTE being changed by another vCPU.
+     * Otherwise, pt_update_irq() may return a wrong vector which is not in
+     * vIRR and pt_irq_posted() may not recognize a timer interrupt has been
+     * injected.
+     */
+    spin_lock(&v->domain->arch.hvm_domain.irq_lock);
     /* Crank the handle on interrupt state. */
     pt_update_irq(v);
 
     do {
         intack = hvm_vcpu_has_pending_irq(v);
         if ( likely(intack.source == hvm_intsrc_none) )
-            return;
+            goto out;
 
         intblk = hvm_interrupt_blocked(v, intack);
         if ( intblk == hvm_intblk_svm_gif ) {
             ASSERT(nestedhvm_enabled(v->domain));
-            return;
+            goto out;
         }
 
         /* Interrupts for the nested guest are already
@@ -165,13 +172,13 @@ void svm_intr_assist(void)
             switch (rc) {
             case NSVM_INTR_NOTINTERCEPTED:
                 /* Inject interrupt into 2nd level guest directly. */
-                break; 
+                goto out;
             case NSVM_INTR_NOTHANDLED:
             case NSVM_INTR_FORCEVMEXIT:
-                return;
+                goto out;
             case NSVM_INTR_MASKED:
                 /* Guest already enabled an interrupt window. */
-                return;
+                goto out;
             default:
                 panic("%s: nestedsvm_vcpu_interrupt can't handle value %#x",
                     __func__, rc);
@@ -195,7 +202,7 @@ void svm_intr_assist(void)
         if ( unlikely(vmcb->eventinj.fields.v) || intblk )
         {
             svm_enable_intr_window(v, intack);
-            return;
+            goto out;
         }
 
         intack = hvm_vcpu_ack_pending_irq(v, intack);
@@ -216,6 +223,8 @@ void svm_intr_assist(void)
     intack = hvm_vcpu_has_pending_irq(v);
     if ( unlikely(intack.source != hvm_intsrc_none) )
         svm_enable_intr_window(v, intack);
+ out:
+    spin_unlock(&v->domain->arch.hvm_domain.irq_lock);
 }
 
 /*
diff --git a/xen/arch/x86/hvm/vmx/intr.c b/xen/arch/x86/hvm/vmx/intr.c
index 1eb9f38..8f3d1a7 100644
--- a/xen/arch/x86/hvm/vmx/intr.c
+++ b/xen/arch/x86/hvm/vmx/intr.c
@@ -234,6 +234,14 @@ void vmx_intr_assist(void)
         return;
     }
 
+    /*
+     * Avoid the vIOAPIC RTE being changed by another vCPU.
+     * Otherwise, pt_update_irq() may return a wrong vector which is not in
+     * vIRR and pt_irq_posted() may not recognize a timer interrupt has been
+     * injected.
+     */
+    spin_lock(&v->domain->arch.hvm_domain.irq_lock);
+
     /* Crank the handle on interrupt state. */
     if ( is_hvm_vcpu(v) )
         pt_vector = pt_update_irq(v);
@@ -396,6 +404,7 @@ void vmx_intr_assist(void)
     }
 
  out:
+    spin_unlock(&v->domain->arch.hvm_domain.irq_lock);
     if ( !nestedhvm_vcpu_in_guestmode(v) &&
          !cpu_has_vmx_virtual_intr_delivery &&
          cpu_has_vmx_tpr_shadow )
diff --git a/xen/arch/x86/hvm/vpic.c b/xen/arch/x86/hvm/vpic.c
index e160bbd..8d02e52 100644
--- a/xen/arch/x86/hvm/vpic.c
+++ b/xen/arch/x86/hvm/vpic.c
@@ -153,14 +153,13 @@ static void __vpic_intack(struct hvm_hw_vpic *vpic, int 
irq)
     vpic_update_int_output(vpic);
 }
 
-static int vpic_intack(struct hvm_hw_vpic *vpic)
+static int vpic_intack_locked(struct hvm_hw_vpic *vpic)
 {
     int irq = -1;
 
-    vpic_lock(vpic);
-
+    ASSERT(vpic_is_locked(vpic));
     if ( !vpic->int_output )
-        goto out;
+        return irq;
 
     irq = vpic_get_highest_priority_irq(vpic);
     BUG_ON(irq < 0);
@@ -175,7 +174,15 @@ static int vpic_intack(struct hvm_hw_vpic *vpic)
         irq += 8;
     }
 
- out:
+    return irq;
+}
+
+static int vpic_intack(struct hvm_hw_vpic *vpic)
+{
+    int irq;
+
+    vpic_lock(vpic);
+    irq = vpic_intack_locked(vpic);
     vpic_unlock(vpic);
     return irq;
 }
@@ -487,13 +494,14 @@ int vpic_ack_pending_irq(struct vcpu *v)
     struct hvm_hw_vpic *vpic = &v->domain->arch.hvm_domain.vpic[0];
 
     ASSERT(has_vpic(v->domain));
+    ASSERT(vpic_is_locked(vpic));
 
     TRACE_2D(TRC_HVM_EMUL_PIC_PEND_IRQ_CALL, vlapic_accept_pic_intr(v),
              vpic->int_output);
     if ( !vlapic_accept_pic_intr(v) || !vpic->int_output )
         return -1;
 
-    irq = vpic_intack(vpic);
+    irq = vpic_intack_locked(vpic);
     if ( irq == -1 )
         return -1;
 
diff --git a/xen/arch/x86/hvm/vpt.c b/xen/arch/x86/hvm/vpt.c
index e3f2039..d048f26 100644
--- a/xen/arch/x86/hvm/vpt.c
+++ b/xen/arch/x86/hvm/vpt.c
@@ -296,8 +296,8 @@ int pt_update_irq(struct vcpu *v)
         vlapic_set_irq(vcpu_vlapic(v), irq, 0);
     else
     {
-        hvm_isa_irq_deassert(v->domain, irq);
-        hvm_isa_irq_assert(v->domain, irq);
+        hvm_isa_irq_deassert_locked(v->domain, irq);
+        hvm_isa_irq_assert_locked(v->domain, irq);
     }
 
     /*
diff --git a/xen/include/xen/hvm/irq.h b/xen/include/xen/hvm/irq.h
index f041252..a8f36f8 100644
--- a/xen/include/xen/hvm/irq.h
+++ b/xen/include/xen/hvm/irq.h
@@ -119,8 +119,12 @@ void hvm_pci_intx_deassert(
 /* Modify state of an ISA device's IRQ wire. */
 void hvm_isa_irq_assert(
     struct domain *d, unsigned int isa_irq);
+void hvm_isa_irq_assert_locked(
+    struct domain *d, unsigned int isa_irq);
 void hvm_isa_irq_deassert(
     struct domain *d, unsigned int isa_irq);
+void hvm_isa_irq_deassert_locked(
+    struct domain *d, unsigned int isa_irq);
 
 int hvm_set_pci_link_route(struct domain *d, u8 link, u8 isa_irq);
 
-- 
1.8.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.