[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v4 08/10] evtchn: closing of ports doesn't need to hold both domains' event locks

To: "xen-devel@xxxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxxx>
From: Jan Beulich <jbeulich@xxxxxxxx>
Date: Tue, 5 Jan 2021 14:12:45 +0100
Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, George Dunlap <George.Dunlap@xxxxxxxxxxxxx>, Ian Jackson <iwj@xxxxxxxxxxxxxx>, Julien Grall <julien@xxxxxxx>, Wei Liu <wl@xxxxxxx>, Stefano Stabellini <sstabellini@xxxxxxxxxx>
Delivery-date: Tue, 05 Jan 2021 13:12:49 +0000
List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

The local domain's lock is needed for the port freeing, but for the
remote side the per-channel lock is sufficient. Other logic then needs
rearranging, though, including the early dropping of both locks (and the
remote domain ref) in the ECS_PIRQ and ECS_VIRQ cases.

Note in particular that there is no real race with evtchn_bind_vcpu():
ECS_INTERDOMAIN and ECS_UNBOUND get treated identically there, and
evtchn_close() doesn't care about the notification vCPU ID.

Note also that we can't use double_evtchn_unlock() or
evtchn_write_unlock() when releasing locks to cover for possible races.
See the respective code comment.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
v4: New.

--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -605,15 +605,15 @@ static long evtchn_bind_pirq(evtchn_bind
 int evtchn_close(struct domain *d1, int port1, bool guest)
 {
     struct domain *d2 = NULL;
-    struct evtchn *chn1 = _evtchn_from_port(d1, port1), *chn2;
+    struct evtchn *chn1 = _evtchn_from_port(d1, port1), *chn2 = NULL;
     long           rc = 0;
 
     if ( !chn1 )
         return -EINVAL;
 
- again:
     write_lock(&d1->event_lock);
 
+ again:
     /* Guest cannot close a Xen-attached event channel. */
     if ( unlikely(consumer_is_xen(chn1)) && guest )
     {
@@ -634,6 +634,22 @@ int evtchn_close(struct domain *d1, int
     case ECS_PIRQ: {
         struct pirq *pirq = pirq_info(d1, chn1->u.pirq.irq);
 
+        /*
+         * We don't require the per-channel lock here, so in case a race
+         * happened on the interdomain path below better release both.
+         */
+        if ( unlikely(chn2) )
+        {
+            /*
+             * See evtchn_write_unlock() vs plain write_unlock() comment in
+             * ECS_INTERDOMAIN handling below.
+             */
+            write_unlock(&chn1->lock);
+            write_unlock(&chn2->lock);
+            put_domain(d2);
+            chn2 = NULL;
+        }
+
         if ( pirq )
         {
             if ( !is_hvm_domain(d1) )
@@ -653,6 +669,22 @@ int evtchn_close(struct domain *d1, int
         struct vcpu *v;
         unsigned long flags;
 
+        /*
+         * The per-channel locks nest inside the vIRQ ones, so we must release
+         * them if a race happened on the interdomain path below.
+         */
+        if ( unlikely(chn2) )
+        {
+            /*
+             * See evtchn_write_unlock() vs plain write_unlock() comment in
+             * ECS_INTERDOMAIN handling below.
+             */
+            write_unlock(&chn1->lock);
+            write_unlock(&chn2->lock);
+            put_domain(d2);
+            chn2 = NULL;
+        }
+
         v = d1->vcpu[virq_is_global(chn1->u.virq) ? 0 : chn1->notify_vcpu_id];
 
         write_lock_irqsave(&v->virq_lock, flags);
@@ -669,63 +701,87 @@ int evtchn_close(struct domain *d1, int
     case ECS_INTERDOMAIN:
         if ( d2 == NULL )
         {
-            d2 = chn1->u.interdomain.remote_dom;
+            evtchn_write_lock(chn1);
 
-            /* If we unlock d1 then we could lose d2. Must get a reference. */
-            if ( unlikely(!get_domain(d2)) )
-                BUG();
-
-            if ( d1 < d2 )
-                write_lock(&d2->event_lock);
-            else if ( d1 != d2 )
-            {
-                write_unlock(&d1->event_lock);
-                write_lock(&d2->event_lock);
-                goto again;
-            }
+            if ( chn1->state == ECS_INTERDOMAIN )
+                d2 = chn1->u.interdomain.remote_dom;
+            else
+                /* See comment further down. */
+                write_unlock(&chn1->lock);
         }
-        else if ( d2 != chn1->u.interdomain.remote_dom )
+
+        if ( d2 != chn1->u.interdomain.remote_dom )
         {
             /*
-             * We can only get here if the port was closed and re-bound after
-             * unlocking d1 but before locking d2 above. We could retry but
-             * it is easier to return the same error as if we had seen the
-             * port in ECS_FREE. It must have passed through that state for
-             * us to end up here, so it's a valid error to return.
+             * We can only get here if the port was closed and re-bound
+             * - before locking chn1 or
+             * - after unlocking chn1 but before locking both channels
+             * above.  We could retry but it is easier to return the same error
+             * as if we had seen the port in ECS_FREE.  It must have passed
+             * through that state for us to end up here, so it's a valid error
+             * to return.
              */
+            if ( d2 && !chn2 )
+                write_unlock(&chn1->lock);
             rc = -EINVAL;
             goto out;
         }
 
-        chn2 = _evtchn_from_port(d2, chn1->u.interdomain.remote_port);
-        BUG_ON(!chn2);
-        BUG_ON(chn2->state != ECS_INTERDOMAIN);
-        BUG_ON(chn2->u.interdomain.remote_dom != d1);
+        if ( !chn2 )
+        {
+            /* If we unlock chn1 then we could lose d2. Must get a reference. 
*/
+            if ( unlikely(!get_domain(d2)) )
+                BUG();
 
-        double_evtchn_lock(chn1, chn2);
+            chn2 = _evtchn_from_port(d2, chn1->u.interdomain.remote_port);
+            BUG_ON(!chn2);
 
-        evtchn_free(d1, chn1);
+            if ( chn1 > chn2 )
+            {
+                /*
+                 * Cannot use evtchn_write_unlock() here, as its assertion
+                 * likely won't hold.  However, a race - as per the comment
+                 * below - indicates a transition through ECS_FREE must
+                 * have occurred, so the assumptions by users of
+                 * evtchn_read_trylock() still hold (i.e. they're similarly
+                 * fine to bail).
+                 */
+                write_unlock(&chn1->lock);
+                double_evtchn_lock(chn2, chn1);
+                goto again;
+            }
+
+            evtchn_write_lock(chn2);
+        }
+
+        BUG_ON(chn2->state != ECS_INTERDOMAIN);
+        BUG_ON(chn2->u.interdomain.remote_dom != d1);
 
         chn2->state = ECS_UNBOUND;
         chn2->u.unbound.remote_domid = d1->domain_id;
 
-        double_evtchn_unlock(chn1, chn2);
-
-        goto out;
+        break;
 
     default:
         BUG();
     }
 
-    evtchn_write_lock(chn1);
+    if ( !chn2 )
+        evtchn_write_lock(chn1);
     evtchn_free(d1, chn1);
-    evtchn_write_unlock(chn1);
+    if ( !chn2 )
+        evtchn_write_unlock(chn1);
 
  out:
-    if ( d2 != NULL )
+    if ( chn2 )
     {
-        if ( d1 != d2 )
-            write_unlock(&d2->event_lock);
+        /*
+         * See evtchn_write_unlock() vs plain write_unlock() comment in
+         * ECS_INTERDOMAIN handling above.  In principle we could use
+         * double_evtchn_unlock() on the ECS_INTERDOMAIN success path.
+         */
+        write_unlock(&chn1->lock);
+        write_unlock(&chn2->lock);
         put_domain(d2);
     }

References:
- [PATCH v4 00/10] evtchn: (not so) recent XSAs follow-on
  - From: Jan Beulich

Prev by Date: Re: [PATCH v2] vPCI/MSI-X: fold clearing of entry->updated
Next by Date: [PATCH v4 09/10] evtchn: type adjustments
Previous by thread: Re: [PATCH v4 07/10] evtchn: add helper for port_is_valid() + evtchn_from_port()
Next by thread: [PATCH v4 09/10] evtchn: type adjustments
Index(es):
- Date
- Thread

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.