[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v3 1/2] IOMMU/x86: disallow device assignment to PoD guests


  • To: "xen-devel@xxxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxxx>
  • From: Jan Beulich <jbeulich@xxxxxxxx>
  • Date: Tue, 4 Jan 2022 10:41:32 +0100
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=suse.com; dmarc=pass action=none header.from=suse.com; dkim=pass header.d=suse.com; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=H5dgdFi5lZ4igKvqS/wOHJKgdrw1TKxZCXD4+d9UN1E=; b=Jg7ap+jwMH5uEFrnN18oBPGfnUrvZC3dMdednX4zDo4nvzXkMHX1BMaxC7Fy9eztmIR0T7WY9sajmduBuL6+KVmUy8tDCtle21u594JoMTZLV1bkG+DlrRpm8bK/GBoAwPKQYnlskrVOheP0rErl9A08dFq6ruOH5ejUQUZ13ZUBwC/zddzYcV4anh6rED/bo+ADycgOG5WmJSIRlOOYcLDygCX42OK44OnphLXpSopyUGR4IBNzplJeGiCDMjpuHUGLLIwYesUSVYkXqUG6J64xPx9TxM7DtNGFCf9o0ffs3RONMQVaVPUJkYlTeiF3trr3/eUnKYSrbpGEa6JopA==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=GjpfgNZAFwClw8owTnLgK7jgRgMP/esKCbXMei3E0MVBP5zVSMPvtK8ws/7i/HtwoNJOgikkn9gZ5dGhMPtsbJWlTZFxzkgnEFSpz4HhQvof1tWv1VrzjFbscEvsjILWYwN05EDhwIWKxvFyQhepeL0ditApYmiQJGqSIhMUz8K9vR4OkM4XWx+dv51FcGj5KL9el5gq7QZg7h904sXP6kkU0VDeJZ6Fp2vq8+8YIuQO3+xqP1Un7YxRrc3D51UfDJWmND8tVbCBhmSChM9qX9ErnLzekxsvXzGhIv6bcopK+JoEjcfzobReirC7C10WQmUtgSHdDwFMdNmJC2cDiA==
  • Authentication-results: dkim=none (message not signed) header.d=none;dmarc=none action=none header.from=suse.com;
  • Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Wei Liu <wl@xxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>, George Dunlap <george.dunlap@xxxxxxxxxx>, Paul Durrant <paul@xxxxxxx>, Tamas K Lengyel <tamas@xxxxxxxxxxxxx>, Petre Pircalabu <ppircalabu@xxxxxxxxxxxxxxx>, Alexandru Isaila <aisaila@xxxxxxxxxxxxxxx>
  • Delivery-date: Tue, 04 Jan 2022 09:41:35 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

While it is okay for IOMMU page tables to get set up for guests starting
in PoD mode, actual device assignment may only occur once all PoD
entries have been removed from the P2M. So far this was enforced only
for boot-time assignment, and only in the tool stack.

Also use the new function to replace p2m_pod_entry_count(): Its unlocked
access to p2m->pod.entry_count wasn't really okay (irrespective of the
result being stale by the time the caller gets to see it).

To allow the tool stack to see a consistent snapshot of PoD state, move
the tail of XENMEM_{get,set}_pod_target handling into a function, adding
proper locking there.

In libxl take the liberty to use the new local variable r also for a
pre-existing call into libxc.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
If p2m->pod.entry_count == p2m->pod.count it is in principle possible to
permit device assignment by actively resolving all remaining PoD entries.

Initially I thought this was introduced by f89f555827a6 ("remove late
(on-demand) construction of IOMMU page tables"), but without
arch_iommu_use_permitted() checking for PoD I think the issue has been
there before that.
---
v3: In p2m_pod_set_mem_target() move check down.
v2: New.

--- a/tools/libs/light/libxl_pci.c
+++ b/tools/libs/light/libxl_pci.c
@@ -1619,8 +1619,13 @@ void libxl__device_pci_add(libxl__egc *e
     pas->callback = device_pci_add_stubdom_done;
 
     if (libxl__domain_type(gc, domid) == LIBXL_DOMAIN_TYPE_HVM) {
-        rc = xc_test_assign_device(ctx->xch, domid, pci_encode_bdf(pci));
-        if (rc) {
+        int r;
+        uint64_t cache, ents;
+
+        rc = ERROR_FAIL;
+
+        r = xc_test_assign_device(ctx->xch, domid, pci_encode_bdf(pci));
+        if (r) {
             LOGD(ERROR, domid,
                  "PCI device %04x:%02x:%02x.%u %s?",
                  pci->domain, pci->bus, pci->dev, pci->func,
@@ -1628,6 +1633,22 @@ void libxl__device_pci_add(libxl__egc *e
                  : "already assigned to a different guest");
             goto out;
         }
+
+        r = xc_domain_get_pod_target(ctx->xch, domid, NULL, &cache, &ents);
+        if (r) {
+            LOGED(ERROR, domid, "Cannot determine PoD status");
+            goto out;
+        }
+        /*
+         * In principle it is sufficient for the domain to have ballooned down
+         * enough such that ents <= cache.  But any remaining entries would
+         * need resolving first.  Until such time when this gets effected,
+         * refuse assignment as long as any entries are left.
+         */
+        if (ents /* > cache */) {
+            LOGD(ERROR, domid, "Cannot assign device with PoD still active");
+            goto out;
+        }
     }
 
     rc = libxl__device_pci_setdefault(gc, domid, pci, !starting);
--- a/xen/arch/x86/mm/p2m-pod.c
+++ b/xen/arch/x86/mm/p2m-pod.c
@@ -20,6 +20,7 @@
  */
 
 #include <xen/event.h>
+#include <xen/iocap.h>
 #include <xen/ioreq.h>
 #include <xen/mm.h>
 #include <xen/sched.h>
@@ -359,7 +360,10 @@ p2m_pod_set_mem_target(struct domain *d,
 
     ASSERT( pod_target >= p2m->pod.count );
 
-    ret = p2m_pod_set_cache_target(p2m, pod_target, 1/*preemptible*/);
+    if ( has_arch_pdevs(d) || cache_flush_permitted(d) )
+        ret = -ENOTEMPTY;
+    else
+        ret = p2m_pod_set_cache_target(p2m, pod_target, 1/*preemptible*/);
 
 out:
     pod_unlock(p2m);
@@ -367,6 +371,23 @@ out:
     return ret;
 }
 
+void p2m_pod_get_mem_target(const struct domain *d, xen_pod_target_t *target)
+{
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+
+    ASSERT(is_hvm_domain(d));
+
+    pod_lock(p2m);
+    lock_page_alloc(p2m);
+
+    target->tot_pages       = domain_tot_pages(d);
+    target->pod_cache_pages = p2m->pod.count;
+    target->pod_entries     = p2m->pod.entry_count;
+
+    unlock_page_alloc(p2m);
+    pod_unlock(p2m);
+}
+
 int p2m_pod_empty_cache(struct domain *d)
 {
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
@@ -1384,6 +1405,9 @@ guest_physmap_mark_populate_on_demand(st
     if ( !paging_mode_translate(d) )
         return -EINVAL;
 
+    if ( has_arch_pdevs(d) || cache_flush_permitted(d) )
+        return -ENOTEMPTY;
+
     do {
         rc = mark_populate_on_demand(d, gfn, chunk_order);
 
@@ -1405,3 +1429,20 @@ void p2m_pod_init(struct p2m_domain *p2m
     for ( i = 0; i < ARRAY_SIZE(p2m->pod.mrp.list); ++i )
         p2m->pod.mrp.list[i] = gfn_x(INVALID_GFN);
 }
+
+bool p2m_pod_active(const struct domain *d)
+{
+    struct p2m_domain *p2m;
+    bool res;
+
+    if ( !is_hvm_domain(d) )
+        return false;
+
+    p2m = p2m_get_hostp2m(d);
+
+    pod_lock(p2m);
+    res = p2m->pod.entry_count | p2m->pod.count;
+    pod_unlock(p2m);
+
+    return res;
+}
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -4778,7 +4778,6 @@ long arch_memory_op(unsigned long cmd, X
     {
         xen_pod_target_t target;
         struct domain *d;
-        struct p2m_domain *p2m;
 
         if ( copy_from_guest(&target, arg, 1) )
             return -EFAULT;
@@ -4787,7 +4786,9 @@ long arch_memory_op(unsigned long cmd, X
         if ( d == NULL )
             return -ESRCH;
 
-        if ( cmd == XENMEM_set_pod_target )
+        if ( !is_hvm_domain(d) )
+            rc = -EINVAL;
+        else if ( cmd == XENMEM_set_pod_target )
             rc = xsm_set_pod_target(XSM_PRIV, d);
         else
             rc = xsm_get_pod_target(XSM_PRIV, d);
@@ -4813,10 +4814,7 @@ long arch_memory_op(unsigned long cmd, X
         }
         else if ( rc >= 0 )
         {
-            p2m = p2m_get_hostp2m(d);
-            target.tot_pages       = domain_tot_pages(d);
-            target.pod_cache_pages = p2m->pod.count;
-            target.pod_entries     = p2m->pod.entry_count;
+            p2m_pod_get_mem_target(d, &target);
 
             if ( __copy_to_guest(arg, &target, 1) )
             {
--- a/xen/common/vm_event.c
+++ b/xen/common/vm_event.c
@@ -639,7 +639,7 @@ int vm_event_domctl(struct domain *d, st
 
             rc = -EXDEV;
             /* Disallow paging in a PoD guest */
-            if ( p2m_pod_entry_count(p2m_get_hostp2m(d)) )
+            if ( p2m_pod_active(d) )
                 break;
 
             /* domain_pause() not required here, see XSA-99 */
--- a/xen/drivers/passthrough/x86/iommu.c
+++ b/xen/drivers/passthrough/x86/iommu.c
@@ -454,11 +454,12 @@ bool arch_iommu_use_permitted(const stru
 {
     /*
      * Prevent device assign if mem paging, mem sharing or log-dirty
-     * have been enabled for this domain.
+     * have been enabled for this domain, or if PoD is still in active use.
      */
     return d == dom_io ||
            (likely(!mem_sharing_enabled(d)) &&
             likely(!mem_paging_enabled(d)) &&
+            likely(!p2m_pod_active(d)) &&
             likely(!p2m_get_hostp2m(d)->global_logdirty));
 }
 
--- a/xen/arch/x86/include/asm/p2m.h
+++ b/xen/arch/x86/include/asm/p2m.h
@@ -661,6 +661,12 @@ int p2m_pod_empty_cache(struct domain *d
  * domain matches target */
 int p2m_pod_set_mem_target(struct domain *d, unsigned long target);
 
+/* Obtain a consistent snapshot of PoD related domain state. */
+void p2m_pod_get_mem_target(const struct domain *d, xen_pod_target_t *target);
+
+/* Check whether PoD is (still) active in a domain. */
+bool p2m_pod_active(const struct domain *d);
+
 /* Scan pod cache when offline/broken page triggered */
 int
 p2m_pod_offline_or_broken_hit(struct page_info *p);
@@ -669,11 +675,6 @@ p2m_pod_offline_or_broken_hit(struct pag
 void
 p2m_pod_offline_or_broken_replace(struct page_info *p);
 
-static inline long p2m_pod_entry_count(const struct p2m_domain *p2m)
-{
-    return p2m->pod.entry_count;
-}
-
 void p2m_pod_init(struct p2m_domain *p2m);
 
 #else
@@ -689,6 +690,11 @@ static inline int p2m_pod_empty_cache(st
     return 0;
 }
 
+static inline bool p2m_pod_active(const struct domain *d)
+{
+    return false;
+}
+
 static inline int p2m_pod_offline_or_broken_hit(struct page_info *p)
 {
     return 0;
@@ -699,11 +705,6 @@ static inline void p2m_pod_offline_or_br
     ASSERT_UNREACHABLE();
 }
 
-static inline long p2m_pod_entry_count(const struct p2m_domain *p2m)
-{
-    return 0;
-}
-
 static inline void p2m_pod_init(struct p2m_domain *p2m) {}
 
 #endif




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.