x86/MSI-X: provide hypercall interface for mask-all control Qemu shouldn't be fiddling with this bit directly, as the hypervisor may (and now does) use it for its own purposes. Provide it with a replacement interface, allowing the hypervisor to track host and guest masking intentions independently (clearing the bit only when both want it clear). Signed-off-by: Jan Beulich --- Whether the permission check should really be an XSM_TARGET one needs to be determined: That allowing the guest to issue the hypercalls on itself means permitting it to bypass the device model, and thus render device model state stale. --- a/tools/libxc/include/xenctrl.h +++ b/tools/libxc/include/xenctrl.h @@ -1807,6 +1807,17 @@ int xc_physdev_unmap_pirq(xc_interface * int domid, int pirq); +int xc_physdev_msix_enable(xc_interface *xch, + int segment, + int bus, + int devfn, + int on); +int xc_physdev_msix_mask_all(xc_interface *xch, + int segment, + int bus, + int devfn, + int mask); + int xc_hvm_set_pci_intx_level( xc_interface *xch, domid_t dom, uint8_t domain, uint8_t bus, uint8_t device, uint8_t intx, --- a/tools/libxc/xc_physdev.c +++ b/tools/libxc/xc_physdev.c @@ -112,3 +112,38 @@ int xc_physdev_unmap_pirq(xc_interface * return rc; } +int xc_physdev_msix_enable(xc_interface *xch, + int segment, + int bus, + int devfn, + int on) +{ + struct physdev_pci_device dev = { + .seg = segment, + .bus = bus, + .devfn = devfn + }; + + return do_physdev_op(xch, + on ? PHYSDEVOP_msix_enable + : PHYSDEVOP_msix_disable, + &dev, sizeof(dev)); +} + +int xc_physdev_msix_mask_all(xc_interface *xch, + int segment, + int bus, + int devfn, + int mask) +{ + struct physdev_pci_device dev = { + .seg = segment, + .bus = bus, + .devfn = devfn + }; + + return do_physdev_op(xch, + mask ? PHYSDEVOP_msix_mask_all + : PHYSDEVOP_msix_unmask_all, + &dev, sizeof(dev)); +} --- a/xen/arch/x86/msi.c +++ b/xen/arch/x86/msi.c @@ -392,7 +392,7 @@ static bool_t msi_set_mask_bit(struct ir struct pci_dev *pdev; u16 seg, control; u8 bus, slot, func; - bool_t flag = host || guest; + bool_t flag = host || guest, maskall; ASSERT(spin_is_locked(&desc->lock)); BUG_ON(!entry || !entry->dev); @@ -415,13 +415,17 @@ static bool_t msi_set_mask_bit(struct ir } break; case PCI_CAP_ID_MSIX: + maskall = pdev->msix->host_maskall; control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(entry->msi_attrib.pos)); if ( unlikely(!(control & PCI_MSIX_FLAGS_ENABLE)) ) + { + pdev->msix->host_maskall = 1; pci_conf_write16(seg, bus, slot, func, msix_control_reg(entry->msi_attrib.pos), control | (PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL)); + } if ( likely(memory_decoded(pdev)) ) { writel(flag, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); @@ -434,7 +438,7 @@ static bool_t msi_set_mask_bit(struct ir { domid_t domid = pdev->domain->domain_id; - control |= PCI_MSIX_FLAGS_MASKALL; + maskall = 1; if ( pdev->msix->warned != domid ) { pdev->msix->warned = domid; @@ -444,6 +448,9 @@ static bool_t msi_set_mask_bit(struct ir PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); } } + pdev->msix->host_maskall = maskall; + if ( maskall || pdev->msix->guest_maskall ) + control |= PCI_MSIX_FLAGS_MASKALL; pci_conf_write16(seg, bus, slot, func, msix_control_reg(entry->msi_attrib.pos), control); return flag; @@ -840,6 +847,7 @@ static int msix_capability_init(struct p u8 bus = dev->bus; u8 slot = PCI_SLOT(dev->devfn); u8 func = PCI_FUNC(dev->devfn); + bool_t maskall = msix->host_maskall; ASSERT(spin_is_locked(&pcidevs_lock)); @@ -850,6 +858,7 @@ static int msix_capability_init(struct p * to mask all the vectors to prevent interrupts coming in before they're * fully set up. */ + msix->host_maskall = 1; pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control | (PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL)); @@ -972,6 +981,10 @@ static int msix_capability_init(struct p if ( !msix->used_entries ) { + maskall = 0; + msix->guest_maskall = 0; + control &= ~PCI_MSIX_FLAGS_MASKALL; + if ( rangeset_add_range(mmio_ro_ranges, msix->table.first, msix->table.last) ) WARN(); @@ -1002,6 +1015,7 @@ static int msix_capability_init(struct p ++msix->used_entries; /* Restore MSI-X enabled bits */ + msix->host_maskall = maskall; pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control); return 0; @@ -1142,11 +1156,15 @@ static void __pci_disable_msix(struct ms PCI_CAP_ID_MSIX); u16 control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(entry->msi_attrib.pos)); + bool_t maskall = dev->msix->host_maskall; if ( unlikely(!(control & PCI_MSIX_FLAGS_ENABLE)) ) + { + dev->msix->host_maskall = 1; pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control | (PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL)); + } BUG_ON(list_empty(&dev->msi_list)); @@ -1158,8 +1176,11 @@ static void __pci_disable_msix(struct ms "cannot disable IRQ %d: masking MSI-X on %04x:%02x:%02x.%u\n", entry->irq, dev->seg, dev->bus, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn)); - control |= PCI_MSIX_FLAGS_MASKALL; + maskall = 1; } + dev->msix->host_maskall = maskall; + if ( maskall || dev->msix->guest_maskall ) + control |= PCI_MSIX_FLAGS_MASKALL; pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control); _pci_cleanup_msix(dev->msix); @@ -1203,6 +1224,62 @@ int pci_prepare_msix(u16 seg, u8 bus, u8 return rc; } +int pci_msix_enable(u16 seg, u8 bus, u8 devfn, bool_t on) +{ + int rc; + struct pci_dev *pdev; + + if ( !use_msi ) + return -EOPNOTSUPP; + + spin_lock(&pcidevs_lock); + pdev = pci_get_pdev(seg, bus, devfn); + if ( !pdev || !pdev->msix || !pdev->domain ) + rc = -ENODEV; + else if ( !is_hvm_domain(pdev->domain) ) + rc = -ENXIO; + else if ( (rc = xsm_manage_domain_pirq(XSM_TARGET, pdev->domain)) == 0 ) + msix_set_enable(pdev, on); + spin_unlock(&pcidevs_lock); + + return rc; +} + +int pci_msix_maskall(u16 seg, u8 bus, u8 devfn, bool_t mask) +{ + int rc; + struct pci_dev *pdev; + u8 slot = PCI_SLOT(devfn), func = PCI_FUNC(devfn); + + if ( !use_msi ) + return -EOPNOTSUPP; + + spin_lock(&pcidevs_lock); + pdev = pci_get_pdev(seg, bus, devfn); + if ( !pdev || !pdev->msix || !pdev->domain ) + rc = -ENODEV; + else if ( !is_hvm_domain(pdev->domain) ) + rc = -ENXIO; + else if ( (rc = xsm_manage_domain_pirq(XSM_TARGET, pdev->domain)) == 0 ) + { + unsigned int pos = pci_find_cap_offset(seg, bus, slot, func, + PCI_CAP_ID_MSIX); + u16 control = pci_conf_read16(seg, bus, slot, func, + msix_control_reg(pos)); + + BUG_ON(!pos); + pdev->msix->guest_maskall = mask; + if ( pdev->msix->host_maskall || mask ) + control |= PCI_MSIX_FLAGS_MASKALL; + else + control &= ~PCI_MSIX_FLAGS_MASKALL; + pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control); + } + spin_unlock(&pcidevs_lock); + + return rc; +} + /* * Notice: only construct the msi_desc * no change to irq_desc here, and the interrupt is masked --- a/xen/arch/x86/physdev.c +++ b/xen/arch/x86/physdev.c @@ -665,6 +665,30 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H break; } + case PHYSDEVOP_msix_enable: + case PHYSDEVOP_msix_disable: { + struct physdev_pci_device dev; + + if ( copy_from_guest(&dev, arg, 1) ) + ret = -EFAULT; + else + ret = pci_msix_enable(dev.seg, dev.bus, dev.devfn, + cmd == PHYSDEVOP_msix_enable); + break; + } + + case PHYSDEVOP_msix_mask_all: + case PHYSDEVOP_msix_unmask_all: { + struct physdev_pci_device dev; + + if ( copy_from_guest(&dev, arg, 1) ) + ret = -EFAULT; + else + ret = pci_msix_maskall(dev.seg, dev.bus, dev.devfn, + cmd == PHYSDEVOP_msix_mask_all); + break; + } + case PHYSDEVOP_pci_mmcfg_reserved: { struct physdev_pci_mmcfg_reserved info; --- a/xen/include/asm-x86/msi.h +++ b/xen/include/asm-x86/msi.h @@ -83,6 +83,8 @@ struct msi_desc; extern int pci_enable_msi(struct msi_info *msi, struct msi_desc **desc); extern void pci_disable_msi(struct msi_desc *desc); extern int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool_t off); +extern int pci_msix_enable(u16 seg, u8 bus, u8 devfn, bool_t on); +extern int pci_msix_maskall(u16 seg, u8 bus, u8 devfn, bool_t mask); extern void pci_cleanup_msi(struct pci_dev *pdev); extern int setup_msi_irq(struct irq_desc *, struct msi_desc *); extern int __setup_msi_irq(struct irq_desc *, struct msi_desc *, @@ -233,6 +235,7 @@ struct arch_msix { int table_refcnt[MAX_MSIX_TABLE_PAGES]; int table_idx[MAX_MSIX_TABLE_PAGES]; spinlock_t table_lock; + bool_t host_maskall, guest_maskall; domid_t warned; }; --- a/xen/include/public/physdev.h +++ b/xen/include/public/physdev.h @@ -315,6 +315,14 @@ DEFINE_XEN_GUEST_HANDLE(physdev_pci_devi */ #define PHYSDEVOP_prepare_msix 30 #define PHYSDEVOP_release_msix 31 +/* + * The device model domain for a guest should be using these instead of + * fiddling with the respective flags in the MSI-X capability structure. + */ +#define PHYSDEVOP_msix_enable 32 +#define PHYSDEVOP_msix_disable 33 +#define PHYSDEVOP_msix_mask_all 34 +#define PHYSDEVOP_msix_unmask_all 35 struct physdev_pci_device { /* IN */ uint16_t seg; --- a/xen/include/xsm/dummy.h +++ b/xen/include/xsm/dummy.h @@ -462,6 +462,12 @@ static XSM_INLINE int xsm_map_domain_irq return xsm_default_action(action, current->domain, d); } +static XSM_INLINE int xsm_manage_domain_pirq(XSM_DEFAULT_ARG struct domain *d) +{ + XSM_ASSERT_ACTION(XSM_TARGET); + return xsm_default_action(action, current->domain, d); +} + static XSM_INLINE int xsm_unmap_domain_pirq(XSM_DEFAULT_ARG struct domain *d) { XSM_ASSERT_ACTION(XSM_TARGET); --- a/xen/include/xsm/xsm.h +++ b/xen/include/xsm/xsm.h @@ -105,6 +105,7 @@ struct xsm_operations { char *(*show_irq_sid) (int irq); int (*map_domain_pirq) (struct domain *d); int (*map_domain_irq) (struct domain *d, int irq, void *data); + int (*manage_domain_pirq) (struct domain *d); int (*unmap_domain_pirq) (struct domain *d); int (*unmap_domain_irq) (struct domain *d, int irq, void *data); int (*bind_pt_irq) (struct domain *d, struct xen_domctl_bind_pt_irq *bind); @@ -424,6 +425,11 @@ static inline int xsm_map_domain_irq (xs return xsm_ops->map_domain_irq(d, irq, data); } +static inline int xsm_manage_domain_pirq(xsm_default_t def, struct domain *d) +{ + return xsm_ops->manage_domain_pirq(d); +} + static inline int xsm_unmap_domain_pirq (xsm_default_t def, struct domain *d) { return xsm_ops->unmap_domain_pirq(d); --- a/xen/xsm/dummy.c +++ b/xen/xsm/dummy.c @@ -79,6 +79,7 @@ void xsm_fixup_ops (struct xsm_operation set_to_dummy_if_null(ops, show_irq_sid); set_to_dummy_if_null(ops, map_domain_pirq); set_to_dummy_if_null(ops, map_domain_irq); + set_to_dummy_if_null(ops, manage_domain_pirq); set_to_dummy_if_null(ops, unmap_domain_pirq); set_to_dummy_if_null(ops, unmap_domain_irq); set_to_dummy_if_null(ops, bind_pt_irq); --- a/xen/xsm/flask/hooks.c +++ b/xen/xsm/flask/hooks.c @@ -881,6 +881,11 @@ static int flask_map_domain_irq (struct return rc; } +static int flask_manage_domain_pirq(struct domain *d) +{ + return current_has_perm(d, SECCLASS_RESOURCE, RESOURCE__USE); +} + static int flask_unmap_domain_pirq (struct domain *d) { return current_has_perm(d, SECCLASS_RESOURCE, RESOURCE__REMOVE); @@ -1633,6 +1638,7 @@ static struct xsm_operations flask_ops = .map_domain_pirq = flask_map_domain_pirq, .map_domain_irq = flask_map_domain_irq, + .manage_domain_pirq = flask_manage_domain_pirq, .unmap_domain_pirq = flask_unmap_domain_pirq, .unmap_domain_irq = flask_unmap_domain_irq, .bind_pt_irq = flask_bind_pt_irq,