WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 1/2] hvm: passthrough MSI-X mask bit acceleration

To: xen-devel@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-devel] [PATCH 1/2] hvm: passthrough MSI-X mask bit acceleration
From: Qing He <qing.he@xxxxxxxxx>
Date: Thu, 26 Feb 2009 17:28:05 +0800
Delivery-date: Thu, 26 Feb 2009 01:28:53 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <1235640486-20584-1-git-send-email-qing.he@xxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <1235640486-20584-1-git-send-email-qing.he@xxxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
Add a new parameter to DOMCTL_bind_pt_irq to allow Xen to know the
guest physical address of MSI-X table. Also add a new MMIO intercept
handler to intercept that gpa in order to handle MSI-X vector mask
bit operation in the hypervisor. This reduces the load of device model
considerably if the guest does mask and unmask frequently

Signed-off-by: Qing He <qing.he@xxxxxxxxx>
---
 tools/libxc/xc_domain.c          |    4 
 tools/libxc/xenctrl.h            |    3 
 xen/arch/x86/hvm/hvm.c           |    7 
 xen/arch/x86/hvm/intercept.c     |    6 
 xen/arch/x86/hvm/vmsi.c          |  280 +++++++++++++++++++++++++++++++++++++++
 xen/arch/x86/msi.c               |   20 ++
 xen/drivers/passthrough/io.c     |    6 
 xen/include/asm-x86/hvm/domain.h |    4 
 xen/include/asm-x86/msi.h        |    2 
 xen/include/public/domctl.h      |    1 
 xen/include/xen/pci.h            |    3 
 11 files changed, 331 insertions(+), 5 deletions(-)

diff -r f8187a343ad2 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c   Fri Feb 20 17:02:36 2009 +0000
+++ b/tools/libxc/xc_domain.c   Thu Feb 26 13:29:10 2009 +0800
@@ -920,7 +920,8 @@ int xc_domain_update_msi_irq(
     uint32_t domid,
     uint32_t gvec,
     uint32_t pirq,
-    uint32_t gflags)
+    uint32_t gflags,
+    uint64_t gtable)
 {
     int rc;
     xen_domctl_bind_pt_irq_t *bind;
@@ -936,6 +937,7 @@ int xc_domain_update_msi_irq(
     bind->machine_irq = pirq;
     bind->u.msi.gvec = gvec;
     bind->u.msi.gflags = gflags;
+    bind->u.msi.gtable = gtable;
 
     rc = do_domctl(xc_handle, &domctl);
     return rc;
diff -r f8187a343ad2 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Fri Feb 20 17:02:36 2009 +0000
+++ b/tools/libxc/xenctrl.h     Thu Feb 26 13:29:10 2009 +0800
@@ -1092,7 +1092,8 @@ int xc_domain_update_msi_irq(
     uint32_t domid,
     uint32_t gvec,
     uint32_t pirq,
-    uint32_t gflags);
+    uint32_t gflags,
+    uint64_t gtable);
 
 int xc_domain_unbind_msi_irq(int xc_handle,
                              uint32_t domid,
diff -r f8187a343ad2 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Fri Feb 20 17:02:36 2009 +0000
+++ b/xen/arch/x86/hvm/hvm.c    Thu Feb 26 13:29:10 2009 +0800
@@ -308,6 +308,9 @@ int hvm_domain_initialise(struct domain 
     spin_lock_init(&d->arch.hvm_domain.irq_lock);
     spin_lock_init(&d->arch.hvm_domain.uc_lock);
 
+    INIT_LIST_HEAD(&d->arch.hvm_domain.msixtbl_list);
+    spin_lock_init(&d->arch.hvm_domain.msixtbl_list_lock);
+
     hvm_init_guest_time(d);
 
     d->arch.hvm_domain.params[HVM_PARAM_HPET_ENABLED] = 1;
@@ -348,10 +351,14 @@ int hvm_domain_initialise(struct domain 
     return rc;
 }
 
+extern void msixtbl_pt_cleanup(struct domain *d);
+
 void hvm_domain_relinquish_resources(struct domain *d)
 {
     hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.ioreq);
     hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq);
+
+    msixtbl_pt_cleanup(d);
 
     /* Stop all asynchronous timer actions. */
     rtc_deinit(d);
diff -r f8187a343ad2 xen/arch/x86/hvm/intercept.c
--- a/xen/arch/x86/hvm/intercept.c      Fri Feb 20 17:02:36 2009 +0000
+++ b/xen/arch/x86/hvm/intercept.c      Thu Feb 26 13:29:10 2009 +0800
@@ -35,14 +35,16 @@ extern struct hvm_mmio_handler hpet_mmio
 extern struct hvm_mmio_handler hpet_mmio_handler;
 extern struct hvm_mmio_handler vlapic_mmio_handler;
 extern struct hvm_mmio_handler vioapic_mmio_handler;
+extern struct hvm_mmio_handler msixtbl_mmio_handler;
 
-#define HVM_MMIO_HANDLER_NR 3
+#define HVM_MMIO_HANDLER_NR 4
 
 static struct hvm_mmio_handler *hvm_mmio_handlers[HVM_MMIO_HANDLER_NR] =
 {
     &hpet_mmio_handler,
     &vlapic_mmio_handler,
-    &vioapic_mmio_handler
+    &vioapic_mmio_handler,
+    &msixtbl_mmio_handler
 };
 
 static int hvm_mmio_access(struct vcpu *v,
diff -r f8187a343ad2 xen/arch/x86/hvm/vmsi.c
--- a/xen/arch/x86/hvm/vmsi.c   Fri Feb 20 17:02:36 2009 +0000
+++ b/xen/arch/x86/hvm/vmsi.c   Thu Feb 26 13:29:10 2009 +0800
@@ -193,3 +193,283 @@ int vmsi_deliver(struct domain *d, int p
     return 1;
 }
 
+/* MSI-X mask bit hypervisor interception */
+struct msixtbl_entry
+{
+    struct list_head list;
+    atomic_t refcnt;    /* how many bind_pt_irq called for the device */
+
+    /* TODO: resolve the potential race by destruction of pdev */
+    struct pci_dev *pdev;
+    unsigned long gtable;       /* gpa of msix table */
+    unsigned long table_len;
+    unsigned long table_flags[MAX_MSIX_TABLE_ENTRIES / BITS_PER_LONG + 1];
+
+    struct rcu_head rcu;
+};
+
+static struct msixtbl_entry *msixtbl_find_entry(
+    struct vcpu *v, unsigned long addr)
+{
+    struct msixtbl_entry *entry;
+    struct domain *d = v->domain;
+
+    list_for_each_entry( entry, &d->arch.hvm_domain.msixtbl_list, list )
+        if ( addr >= entry->gtable &&
+             addr < entry->gtable + entry->table_len )
+            return entry;
+
+    return NULL;
+}
+
+static void __iomem *msixtbl_addr_to_virt(
+    struct msixtbl_entry *entry, unsigned long addr)
+{
+    int idx, nr_page;
+
+    if ( !entry )
+        return NULL;
+
+    nr_page = (addr >> PAGE_SHIFT) -
+              (entry->gtable >> PAGE_SHIFT);
+
+    if ( !entry->pdev )
+        return NULL;
+
+    idx = entry->pdev->msix_table_idx[nr_page];
+    if ( !idx )
+        return NULL;
+
+    return (void *)(fix_to_virt(idx) +
+                    (addr & ((1UL << PAGE_SHIFT) - 1)));
+}
+
+static int msixtbl_read(
+    struct vcpu *v, unsigned long address,
+    unsigned long len, unsigned long *pval)
+{
+    unsigned long offset;
+    struct msixtbl_entry *entry;
+    void *virt;
+    int r = X86EMUL_UNHANDLEABLE;
+
+    rcu_read_lock();
+
+    if ( len != 4 )
+        goto out;
+
+    offset = address & (PCI_MSIX_ENTRY_SIZE - 1);
+    if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET)
+        goto out;
+
+    entry = msixtbl_find_entry(v, address);
+    virt = msixtbl_addr_to_virt(entry, address);
+    if ( !virt )
+        goto out;
+
+    *pval = readl(virt);
+    r = X86EMUL_OKAY;
+
+out:
+    rcu_read_unlock();
+    return r;
+}
+
+static int msixtbl_write(struct vcpu *v, unsigned long address,
+                        unsigned long len, unsigned long val)
+{
+    unsigned long offset;
+    struct msixtbl_entry *entry;
+    void *virt;
+    int nr_entry;
+    int r = X86EMUL_UNHANDLEABLE;
+
+    rcu_read_lock();
+
+    if ( len != 4 )
+        goto out;
+
+    entry = msixtbl_find_entry(v, address);
+    nr_entry = (address - entry->gtable) % PCI_MSIX_ENTRY_SIZE;
+
+    offset = address & (PCI_MSIX_ENTRY_SIZE - 1);
+    if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET)
+    {
+        set_bit(nr_entry, &entry->table_flags);
+        goto out;
+    }
+
+    /* exit to device model if address/data has been modified */
+    if ( test_and_clear_bit(nr_entry, &entry->table_flags) )
+        goto out;
+
+    virt = msixtbl_addr_to_virt(entry, address);
+    if ( !virt )
+        goto out;
+
+    writel(val, virt);
+    r = X86EMUL_OKAY;
+
+out:
+    rcu_read_unlock();
+    return r;
+}
+
+static int msixtbl_range(struct vcpu *v, unsigned long addr)
+{
+    struct msixtbl_entry *entry;
+    void *virt;
+
+    rcu_read_lock();
+
+    entry = msixtbl_find_entry(v, addr);
+    virt = msixtbl_addr_to_virt(entry, addr);
+
+    rcu_read_unlock();
+
+    return !!virt;
+}
+
+struct hvm_mmio_handler msixtbl_mmio_handler = {
+    .check_handler = msixtbl_range,
+    .read_handler = msixtbl_read,
+    .write_handler = msixtbl_write
+};
+
+static struct msixtbl_entry *add_msixtbl_entry(struct domain *d,
+                                               struct pci_dev *pdev,
+                                               uint64_t gtable)
+{
+    struct msixtbl_entry *entry;
+    u32 len;
+
+    entry = xmalloc(struct msixtbl_entry);
+    if ( !entry )
+        return NULL;
+
+    memset(entry, 0, sizeof(struct msixtbl_entry));
+        
+    INIT_LIST_HEAD(&entry->list);
+    INIT_RCU_HEAD(&entry->rcu);
+    atomic_set(&entry->refcnt, 0);
+
+    len = pci_msix_get_table_len(pdev);
+    entry->table_len = len;
+    entry->pdev = pdev;
+    entry->gtable = (unsigned long) gtable;
+
+    list_add_rcu(&entry->list, &d->arch.hvm_domain.msixtbl_list);
+
+    return entry;
+}
+
+static void free_msixtbl_entry(struct rcu_head *rcu)
+{
+    struct msixtbl_entry *entry;
+
+    entry = container_of (rcu, struct msixtbl_entry, rcu);
+
+    xfree(entry);
+}
+
+static void del_msixtbl_entry(struct msixtbl_entry *entry)
+{
+    list_del_rcu(&entry->list);
+    call_rcu(&entry->rcu, free_msixtbl_entry);
+}
+
+int msixtbl_pt_register(struct domain *d, int pirq, uint64_t gtable)
+{
+    irq_desc_t *irq_desc;
+    struct msi_desc *msi_desc;
+    struct pci_dev *pdev;
+    struct msixtbl_entry *entry;
+    int r = -EINVAL;
+
+    /* pcidevs_lock already held */
+    irq_desc = domain_spin_lock_irq_desc(d, pirq, NULL);
+
+    if ( irq_desc->handler != &pci_msi_type )
+        goto out;
+
+    msi_desc = irq_desc->msi_desc;
+    if ( !msi_desc )
+        goto out;
+
+    pdev = msi_desc->dev;
+
+    spin_lock(&d->arch.hvm_domain.msixtbl_list_lock);
+
+    list_for_each_entry( entry, &d->arch.hvm_domain.msixtbl_list, list )
+        if ( pdev == entry->pdev )
+            goto found;
+
+    entry = add_msixtbl_entry(d, pdev, gtable);
+    if ( !entry )
+    {
+        spin_unlock(&d->arch.hvm_domain.msixtbl_list_lock);
+        goto out;
+    }
+
+found:
+    atomic_inc(&entry->refcnt);
+
+    spin_unlock(&d->arch.hvm_domain.msixtbl_list_lock);
+
+out:
+    spin_unlock_irq(&irq_desc->lock);
+    return r;
+
+}
+
+void msixtbl_pt_unregister(struct domain *d, int pirq)
+{
+    irq_desc_t *irq_desc;
+    struct msi_desc *msi_desc;
+    struct pci_dev *pdev;
+    struct msixtbl_entry *entry;
+
+    /* pcidevs_lock already held */
+    irq_desc = domain_spin_lock_irq_desc(d, pirq, NULL);
+
+    if ( irq_desc->handler != &pci_msi_type )
+        goto out;
+
+    msi_desc = irq_desc->msi_desc;
+    if ( !msi_desc )
+        goto out;
+
+    pdev = msi_desc->dev;
+
+    spin_lock(&d->arch.hvm_domain.msixtbl_list_lock);
+
+    list_for_each_entry( entry, &d->arch.hvm_domain.msixtbl_list, list )
+        if ( pdev == entry->pdev )
+            goto found;
+
+    spin_unlock(&d->arch.hvm_domain.msixtbl_list_lock);
+
+
+out:
+    spin_unlock(&irq_desc->lock);
+    return;
+
+found:
+    if ( !atomic_dec_and_test(&entry->refcnt) )
+        del_msixtbl_entry(entry);
+
+    spin_unlock(&d->arch.hvm_domain.msixtbl_list_lock);
+    spin_unlock(&irq_desc->lock);
+}
+void msixtbl_pt_cleanup(struct domain *d, int pirq)
+{
+    struct msixtbl_entry *entry, *temp;
+
+    spin_lock(&d->arch.hvm_domain.msixtbl_list_lock);
+
+    list_for_each_entry_safe( entry, temp,
+                              &d->arch.hvm_domain.msixtbl_list, list )
+        del_msixtbl_entry(entry);
+
+    spin_unlock(&d->arch.hvm_domain.msixtbl_list_lock);
+}
diff -r f8187a343ad2 xen/arch/x86/msi.c
--- a/xen/arch/x86/msi.c        Fri Feb 20 17:02:36 2009 +0000
+++ b/xen/arch/x86/msi.c        Thu Feb 26 13:29:10 2009 +0800
@@ -839,3 +839,23 @@ int pci_restore_msi_state(struct pci_dev
     return 0;
 }
 
+unsigned int pci_msix_get_table_len(struct pci_dev *pdev)
+{
+    int pos;
+    u16 control;
+    u8 bus, slot, func;
+    unsigned int len;
+
+    bus = pdev->bus;
+    slot = PCI_SLOT(pdev->devfn);
+    func = PCI_FUNC(pdev->devfn);
+
+    pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSIX);
+    if ( !pos )
+        return 0;
+
+    control = pci_conf_read16(bus, slot, func, msix_control_reg(pos));
+    len = msix_table_size(control) * PCI_MSIX_ENTRY_SIZE;
+
+    return len;
+}
diff -r f8187a343ad2 xen/drivers/passthrough/io.c
--- a/xen/drivers/passthrough/io.c      Fri Feb 20 17:02:36 2009 +0000
+++ b/xen/drivers/passthrough/io.c      Thu Feb 26 13:29:10 2009 +0800
@@ -58,6 +58,9 @@ static void pt_irq_time_out(void *data)
     pirq_guest_eoi(irq_map->dom, machine_gsi);
 }
 
+extern int msixtbl_pt_register(struct domain *d, int pirq, uint64_t gtable);
+extern int msixtbl_pt_unregister(struct domain *d, int pirq);
+
 int pt_irq_create_bind_vtd(
     struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind)
 {
@@ -115,6 +118,8 @@ int pt_irq_create_bind_vtd(
                 spin_unlock(&d->event_lock);
                 return rc;
             }
+            if ( pt_irq_bind->u.msi.gtable )
+                msixtbl_pt_register(d, pirq, pt_irq_bind->u.msi.gtable);
         }
         else if (hvm_irq_dpci->mirq[pirq].gmsi.gvec != pt_irq_bind->u.msi.gvec
                 ||hvm_irq_dpci->msi_gvec_pirq[pt_irq_bind->u.msi.gvec] != pirq)
@@ -259,6 +264,7 @@ int pt_irq_destroy_bind_vtd(
         if ( list_empty(&hvm_irq_dpci->mirq[machine_gsi].digl_list) )
         {
             pirq_guest_unbind(d, machine_gsi);
+            msixtbl_pt_unregister(d, machine_gsi);
             if ( pt_irq_need_timer(hvm_irq_dpci->mirq[machine_gsi].flags) )
                 kill_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, 
machine_gsi)]);
             hvm_irq_dpci->mirq[machine_gsi].dom   = NULL;
diff -r f8187a343ad2 xen/include/asm-x86/hvm/domain.h
--- a/xen/include/asm-x86/hvm/domain.h  Fri Feb 20 17:02:36 2009 +0000
+++ b/xen/include/asm-x86/hvm/domain.h  Thu Feb 26 13:29:10 2009 +0800
@@ -75,6 +75,10 @@ struct hvm_domain {
     /* Pass-through */
     struct hvm_iommu       hvm_iommu;
 
+    /* hypervisor intercepted msix table */
+    struct list_head       msixtbl_list;
+    spinlock_t             msixtbl_list_lock;
+
     struct viridian_domain viridian;
 
     bool_t                 hap_enabled;
diff -r f8187a343ad2 xen/include/asm-x86/msi.h
--- a/xen/include/asm-x86/msi.h Fri Feb 20 17:02:36 2009 +0000
+++ b/xen/include/asm-x86/msi.h Thu Feb 26 13:29:10 2009 +0800
@@ -80,6 +80,8 @@ extern void teardown_msi_vector(int vect
 extern void teardown_msi_vector(int vector);
 extern int msi_free_vector(struct msi_desc *entry);
 extern int pci_restore_msi_state(struct pci_dev *pdev);
+
+extern unsigned int pci_msix_get_table_len(struct pci_dev *pdev);
 
 struct msi_desc {
        struct {
diff -r f8187a343ad2 xen/include/public/domctl.h
--- a/xen/include/public/domctl.h       Fri Feb 20 17:02:36 2009 +0000
+++ b/xen/include/public/domctl.h       Thu Feb 26 13:29:10 2009 +0800
@@ -485,6 +485,7 @@ struct xen_domctl_bind_pt_irq {
         struct {
             uint8_t gvec;
             uint32_t gflags;
+            uint64_t gtable;
         } msi;
     } u;
 };
diff -r f8187a343ad2 xen/include/xen/pci.h
--- a/xen/include/xen/pci.h     Fri Feb 20 17:02:36 2009 +0000
+++ b/xen/include/xen/pci.h     Thu Feb 26 13:29:10 2009 +0800
@@ -29,7 +29,8 @@
 #define PCI_BDF(b,d,f)  ((((b) & 0xff) << 8) | PCI_DEVFN(d,f))
 #define PCI_BDF2(b,df)  ((((b) & 0xff) << 8) | ((df) & 0xff))
 
-#define MAX_MSIX_TABLE_PAGES    8    /* 2048 entries */
+#define MAX_MSIX_TABLE_ENTRIES  2048
+#define MAX_MSIX_TABLE_PAGES    8
 struct pci_dev {
     struct list_head alldevs_list;
     struct list_head domain_list;

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>