[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v3 10/10] x86/MSI-X: provide hypercall interface for mask-all control



Qemu shouldn't be fiddling with this bit directly, as the hypervisor
may (and now does) use it for its own purposes. Provide it with a
replacement interface, allowing the hypervisor to track host and guest
masking intentions independently (clearing the bit only when both want
it clear).

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
Whether the permission check should really be an XSM_TARGET one needs
to be determined: That allowing the guest to issue the hypercalls on
itself means permitting it to bypass the device model, and thus render
device model state stale.

--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -1807,6 +1807,17 @@ int xc_physdev_unmap_pirq(xc_interface *
                           int domid,
                           int pirq);
 
+int xc_physdev_msix_enable(xc_interface *xch,
+                           int segment,
+                           int bus,
+                           int devfn,
+                           int on);
+int xc_physdev_msix_mask_all(xc_interface *xch,
+                             int segment,
+                             int bus,
+                             int devfn,
+                             int mask);
+
 int xc_hvm_set_pci_intx_level(
     xc_interface *xch, domid_t dom,
     uint8_t domain, uint8_t bus, uint8_t device, uint8_t intx,
--- a/tools/libxc/xc_physdev.c
+++ b/tools/libxc/xc_physdev.c
@@ -112,3 +112,38 @@ int xc_physdev_unmap_pirq(xc_interface *
     return rc;
 }
 
+int xc_physdev_msix_enable(xc_interface *xch,
+                           int segment,
+                           int bus,
+                           int devfn,
+                           int on)
+{
+    struct physdev_pci_device dev = {
+        .seg = segment,
+        .bus = bus,
+        .devfn = devfn
+    };
+
+    return do_physdev_op(xch,
+                         on ? PHYSDEVOP_msix_enable
+                            : PHYSDEVOP_msix_disable,
+                         &dev, sizeof(dev));
+}
+
+int xc_physdev_msix_mask_all(xc_interface *xch,
+                             int segment,
+                             int bus,
+                             int devfn,
+                             int mask)
+{
+    struct physdev_pci_device dev = {
+        .seg = segment,
+        .bus = bus,
+        .devfn = devfn
+    };
+
+    return do_physdev_op(xch,
+                         mask ? PHYSDEVOP_msix_mask_all
+                              : PHYSDEVOP_msix_unmask_all,
+                         &dev, sizeof(dev));
+}
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -392,7 +392,7 @@ static bool_t msi_set_mask_bit(struct ir
     struct pci_dev *pdev;
     u16 seg, control;
     u8 bus, slot, func;
-    bool_t flag = host || guest;
+    bool_t flag = host || guest, maskall;
 
     ASSERT(spin_is_locked(&desc->lock));
     BUG_ON(!entry || !entry->dev);
@@ -415,13 +415,17 @@ static bool_t msi_set_mask_bit(struct ir
         }
         break;
     case PCI_CAP_ID_MSIX:
+        maskall = pdev->msix->host_maskall;
         control = pci_conf_read16(seg, bus, slot, func,
                                   msix_control_reg(entry->msi_attrib.pos));
         if ( unlikely(!(control & PCI_MSIX_FLAGS_ENABLE)) )
+        {
+            pdev->msix->host_maskall = 1;
             pci_conf_write16(seg, bus, slot, func,
                              msix_control_reg(entry->msi_attrib.pos),
                              control | (PCI_MSIX_FLAGS_ENABLE |
                                         PCI_MSIX_FLAGS_MASKALL));
+        }
         if ( likely(memory_decoded(pdev)) )
         {
             writel(flag, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
@@ -434,7 +438,7 @@ static bool_t msi_set_mask_bit(struct ir
         {
             domid_t domid = pdev->domain->domain_id;
 
-            control |= PCI_MSIX_FLAGS_MASKALL;
+            maskall = 1;
             if ( pdev->msix->warned != domid )
             {
                 pdev->msix->warned = domid;
@@ -444,6 +448,9 @@ static bool_t msi_set_mask_bit(struct ir
                        PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
             }
         }
+        pdev->msix->host_maskall = maskall;
+        if ( maskall || pdev->msix->guest_maskall )
+            control |= PCI_MSIX_FLAGS_MASKALL;
         pci_conf_write16(seg, bus, slot, func,
                          msix_control_reg(entry->msi_attrib.pos), control);
         return flag;
@@ -840,6 +847,7 @@ static int msix_capability_init(struct p
     u8 bus = dev->bus;
     u8 slot = PCI_SLOT(dev->devfn);
     u8 func = PCI_FUNC(dev->devfn);
+    bool_t maskall = msix->host_maskall;
 
     ASSERT(spin_is_locked(&pcidevs_lock));
 
@@ -850,6 +858,7 @@ static int msix_capability_init(struct p
      * to mask all the vectors to prevent interrupts coming in before they're
      * fully set up.
      */
+    msix->host_maskall = 1;
     pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
                      control | (PCI_MSIX_FLAGS_ENABLE |
                                 PCI_MSIX_FLAGS_MASKALL));
@@ -972,6 +981,10 @@ static int msix_capability_init(struct p
 
     if ( !msix->used_entries )
     {
+        maskall = 0;
+        msix->guest_maskall = 0;
+        control &= ~PCI_MSIX_FLAGS_MASKALL;
+
         if ( rangeset_add_range(mmio_ro_ranges, msix->table.first,
                                 msix->table.last) )
             WARN();
@@ -1002,6 +1015,7 @@ static int msix_capability_init(struct p
     ++msix->used_entries;
 
     /* Restore MSI-X enabled bits */
+    msix->host_maskall = maskall;
     pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
 
     return 0;
@@ -1142,11 +1156,15 @@ static void __pci_disable_msix(struct ms
                                            PCI_CAP_ID_MSIX);
     u16 control = pci_conf_read16(seg, bus, slot, func,
                                   msix_control_reg(entry->msi_attrib.pos));
+    bool_t maskall = dev->msix->host_maskall;
 
     if ( unlikely(!(control & PCI_MSIX_FLAGS_ENABLE)) )
+    {
+        dev->msix->host_maskall = 1;
         pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
                          control | (PCI_MSIX_FLAGS_ENABLE |
                                     PCI_MSIX_FLAGS_MASKALL));
+    }
 
     BUG_ON(list_empty(&dev->msi_list));
 
@@ -1158,8 +1176,11 @@ static void __pci_disable_msix(struct ms
                "cannot disable IRQ %d: masking MSI-X on %04x:%02x:%02x.%u\n",
                entry->irq, dev->seg, dev->bus,
                PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
-        control |= PCI_MSIX_FLAGS_MASKALL;
+        maskall = 1;
     }
+    dev->msix->host_maskall = maskall;
+    if ( maskall || dev->msix->guest_maskall )
+        control |= PCI_MSIX_FLAGS_MASKALL;
     pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
 
     _pci_cleanup_msix(dev->msix);
@@ -1203,6 +1224,62 @@ int pci_prepare_msix(u16 seg, u8 bus, u8
     return rc;
 }
 
+int pci_msix_enable(u16 seg, u8 bus, u8 devfn, bool_t on)
+{
+    int rc;
+    struct pci_dev *pdev;
+
+    if ( !use_msi )
+        return -EOPNOTSUPP;
+
+    spin_lock(&pcidevs_lock);
+    pdev = pci_get_pdev(seg, bus, devfn);
+    if ( !pdev || !pdev->msix || !pdev->domain )
+        rc = -ENODEV;
+    else if ( !is_hvm_domain(pdev->domain) )
+        rc = -ENXIO;
+    else if ( (rc = xsm_manage_domain_pirq(XSM_TARGET, pdev->domain)) == 0 )
+        msix_set_enable(pdev, on);
+    spin_unlock(&pcidevs_lock);
+
+    return rc;
+}
+
+int pci_msix_maskall(u16 seg, u8 bus, u8 devfn, bool_t mask)
+{
+    int rc;
+    struct pci_dev *pdev;
+    u8 slot = PCI_SLOT(devfn), func = PCI_FUNC(devfn);
+
+    if ( !use_msi )
+        return -EOPNOTSUPP;
+
+    spin_lock(&pcidevs_lock);
+    pdev = pci_get_pdev(seg, bus, devfn);
+    if ( !pdev || !pdev->msix || !pdev->domain )
+        rc = -ENODEV;
+    else if ( !is_hvm_domain(pdev->domain) )
+        rc = -ENXIO;
+    else if ( (rc = xsm_manage_domain_pirq(XSM_TARGET, pdev->domain)) == 0 )
+    {
+        unsigned int pos = pci_find_cap_offset(seg, bus, slot, func,
+                                               PCI_CAP_ID_MSIX);
+        u16 control = pci_conf_read16(seg, bus, slot, func,
+                                      msix_control_reg(pos));
+
+        BUG_ON(!pos);
+        pdev->msix->guest_maskall = mask;
+        if ( pdev->msix->host_maskall || mask )
+            control |= PCI_MSIX_FLAGS_MASKALL;
+        else
+            control &= ~PCI_MSIX_FLAGS_MASKALL;
+        pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
+    }
+    spin_unlock(&pcidevs_lock);
+
+    return rc;
+}
+
 /*
  * Notice: only construct the msi_desc
  * no change to irq_desc here, and the interrupt is masked
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -665,6 +665,30 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
         break;
     }
 
+    case PHYSDEVOP_msix_enable:
+    case PHYSDEVOP_msix_disable: {
+        struct physdev_pci_device dev;
+
+        if ( copy_from_guest(&dev, arg, 1) )
+            ret = -EFAULT;
+        else
+            ret = pci_msix_enable(dev.seg, dev.bus, dev.devfn,
+                                  cmd == PHYSDEVOP_msix_enable);
+        break;
+    }
+
+    case PHYSDEVOP_msix_mask_all:
+    case PHYSDEVOP_msix_unmask_all: {
+        struct physdev_pci_device dev;
+
+        if ( copy_from_guest(&dev, arg, 1) )
+            ret = -EFAULT;
+        else
+            ret = pci_msix_maskall(dev.seg, dev.bus, dev.devfn,
+                                   cmd == PHYSDEVOP_msix_mask_all);
+        break;
+    }
+
     case PHYSDEVOP_pci_mmcfg_reserved: {
         struct physdev_pci_mmcfg_reserved info;
 
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -83,6 +83,8 @@ struct msi_desc;
 extern int pci_enable_msi(struct msi_info *msi, struct msi_desc **desc);
 extern void pci_disable_msi(struct msi_desc *desc);
 extern int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool_t off);
+extern int pci_msix_enable(u16 seg, u8 bus, u8 devfn, bool_t on);
+extern int pci_msix_maskall(u16 seg, u8 bus, u8 devfn, bool_t mask);
 extern void pci_cleanup_msi(struct pci_dev *pdev);
 extern int setup_msi_irq(struct irq_desc *, struct msi_desc *);
 extern int __setup_msi_irq(struct irq_desc *, struct msi_desc *,
@@ -233,6 +235,7 @@ struct arch_msix {
     int table_refcnt[MAX_MSIX_TABLE_PAGES];
     int table_idx[MAX_MSIX_TABLE_PAGES];
     spinlock_t table_lock;
+    bool_t host_maskall, guest_maskall;
     domid_t warned;
 };
 
--- a/xen/include/public/physdev.h
+++ b/xen/include/public/physdev.h
@@ -315,6 +315,14 @@ DEFINE_XEN_GUEST_HANDLE(physdev_pci_devi
  */
 #define PHYSDEVOP_prepare_msix          30
 #define PHYSDEVOP_release_msix          31
+/*
+ * The device model domain for a guest should be using these instead of
+ * fiddling with the respective flags in the MSI-X capability structure.
+ */
+#define PHYSDEVOP_msix_enable           32
+#define PHYSDEVOP_msix_disable          33
+#define PHYSDEVOP_msix_mask_all         34
+#define PHYSDEVOP_msix_unmask_all       35
 struct physdev_pci_device {
     /* IN */
     uint16_t seg;
--- a/xen/include/xsm/dummy.h
+++ b/xen/include/xsm/dummy.h
@@ -462,6 +462,12 @@ static XSM_INLINE int xsm_map_domain_irq
     return xsm_default_action(action, current->domain, d);
 }
 
+static XSM_INLINE int xsm_manage_domain_pirq(XSM_DEFAULT_ARG struct domain *d)
+{
+    XSM_ASSERT_ACTION(XSM_TARGET);
+    return xsm_default_action(action, current->domain, d);
+}
+
 static XSM_INLINE int xsm_unmap_domain_pirq(XSM_DEFAULT_ARG struct domain *d)
 {
     XSM_ASSERT_ACTION(XSM_TARGET);
--- a/xen/include/xsm/xsm.h
+++ b/xen/include/xsm/xsm.h
@@ -105,6 +105,7 @@ struct xsm_operations {
     char *(*show_irq_sid) (int irq);
     int (*map_domain_pirq) (struct domain *d);
     int (*map_domain_irq) (struct domain *d, int irq, void *data);
+    int (*manage_domain_pirq) (struct domain *d);
     int (*unmap_domain_pirq) (struct domain *d);
     int (*unmap_domain_irq) (struct domain *d, int irq, void *data);
     int (*bind_pt_irq) (struct domain *d, struct xen_domctl_bind_pt_irq *bind);
@@ -424,6 +425,11 @@ static inline int xsm_map_domain_irq (xs
     return xsm_ops->map_domain_irq(d, irq, data);
 }
 
+static inline int xsm_manage_domain_pirq(xsm_default_t def, struct domain *d)
+{
+    return xsm_ops->manage_domain_pirq(d);
+}
+
 static inline int xsm_unmap_domain_pirq (xsm_default_t def, struct domain *d)
 {
     return xsm_ops->unmap_domain_pirq(d);
--- a/xen/xsm/dummy.c
+++ b/xen/xsm/dummy.c
@@ -79,6 +79,7 @@ void xsm_fixup_ops (struct xsm_operation
     set_to_dummy_if_null(ops, show_irq_sid);
     set_to_dummy_if_null(ops, map_domain_pirq);
     set_to_dummy_if_null(ops, map_domain_irq);
+    set_to_dummy_if_null(ops, manage_domain_pirq);
     set_to_dummy_if_null(ops, unmap_domain_pirq);
     set_to_dummy_if_null(ops, unmap_domain_irq);
     set_to_dummy_if_null(ops, bind_pt_irq);
--- a/xen/xsm/flask/hooks.c
+++ b/xen/xsm/flask/hooks.c
@@ -881,6 +881,11 @@ static int flask_map_domain_irq (struct 
     return rc;
 }
 
+static int flask_manage_domain_pirq(struct domain *d)
+{
+    return current_has_perm(d, SECCLASS_RESOURCE, RESOURCE__USE);
+}
+
 static int flask_unmap_domain_pirq (struct domain *d)
 {
     return current_has_perm(d, SECCLASS_RESOURCE, RESOURCE__REMOVE);
@@ -1633,6 +1638,7 @@ static struct xsm_operations flask_ops =
 
     .map_domain_pirq = flask_map_domain_pirq,
     .map_domain_irq = flask_map_domain_irq,
+    .manage_domain_pirq = flask_manage_domain_pirq,
     .unmap_domain_pirq = flask_unmap_domain_pirq,
     .unmap_domain_irq = flask_unmap_domain_irq,
     .bind_pt_irq = flask_bind_pt_irq,


Attachment: x86-MSI-X-maskall.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.