# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1235989597 0
# Node ID 9bc5799566be5aaa96f2b78c3170452e3584765a
# Parent d0df93e627bcdbfc77ed82ef4273577c24178b68
hvm: passthrough MSI-X mask bit acceleration
Add a new parameter to DOMCTL_bind_pt_irq to allow Xen to know the
guest physical address of MSI-X table. Also add a new MMIO intercept
handler to intercept that gpa in order to handle MSI-X vector mask
bit operation in the hypervisor. This reduces the load of device model
considerably if the guest does mask and unmask frequently
Signed-off-by: Qing He <qing.he@xxxxxxxxx>
---
tools/libxc/xc_domain.c | 4
tools/libxc/xenctrl.h | 3
xen/arch/x86/hvm/hvm.c | 7
xen/arch/x86/hvm/intercept.c | 8 -
xen/arch/x86/hvm/vmsi.c | 280 +++++++++++++++++++++++++++++++++++++++
xen/arch/x86/msi.c | 20 ++
xen/drivers/passthrough/io.c | 6
xen/include/asm-x86/hvm/domain.h | 4
xen/include/asm-x86/msi.h | 2
xen/include/public/domctl.h | 1
xen/include/xen/pci.h | 3
11 files changed, 332 insertions(+), 6 deletions(-)
diff -r d0df93e627bc -r 9bc5799566be tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c Mon Mar 02 10:23:50 2009 +0000
+++ b/tools/libxc/xc_domain.c Mon Mar 02 10:26:37 2009 +0000
@@ -920,7 +920,8 @@ int xc_domain_update_msi_irq(
uint32_t domid,
uint32_t gvec,
uint32_t pirq,
- uint32_t gflags)
+ uint32_t gflags,
+ uint64_t gtable)
{
int rc;
xen_domctl_bind_pt_irq_t *bind;
@@ -936,6 +937,7 @@ int xc_domain_update_msi_irq(
bind->machine_irq = pirq;
bind->u.msi.gvec = gvec;
bind->u.msi.gflags = gflags;
+ bind->u.msi.gtable = gtable;
rc = do_domctl(xc_handle, &domctl);
return rc;
diff -r d0df93e627bc -r 9bc5799566be tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h Mon Mar 02 10:23:50 2009 +0000
+++ b/tools/libxc/xenctrl.h Mon Mar 02 10:26:37 2009 +0000
@@ -1092,7 +1092,8 @@ int xc_domain_update_msi_irq(
uint32_t domid,
uint32_t gvec,
uint32_t pirq,
- uint32_t gflags);
+ uint32_t gflags,
+ uint64_t gtable);
int xc_domain_unbind_msi_irq(int xc_handle,
uint32_t domid,
diff -r d0df93e627bc -r 9bc5799566be xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c Mon Mar 02 10:23:50 2009 +0000
+++ b/xen/arch/x86/hvm/hvm.c Mon Mar 02 10:26:37 2009 +0000
@@ -308,6 +308,9 @@ int hvm_domain_initialise(struct domain
spin_lock_init(&d->arch.hvm_domain.irq_lock);
spin_lock_init(&d->arch.hvm_domain.uc_lock);
+ INIT_LIST_HEAD(&d->arch.hvm_domain.msixtbl_list);
+ spin_lock_init(&d->arch.hvm_domain.msixtbl_list_lock);
+
hvm_init_guest_time(d);
d->arch.hvm_domain.params[HVM_PARAM_HPET_ENABLED] = 1;
@@ -348,10 +351,14 @@ int hvm_domain_initialise(struct domain
return rc;
}
+extern void msixtbl_pt_cleanup(struct domain *d);
+
void hvm_domain_relinquish_resources(struct domain *d)
{
hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.ioreq);
hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq);
+
+ msixtbl_pt_cleanup(d);
/* Stop all asynchronous timer actions. */
rtc_deinit(d);
diff -r d0df93e627bc -r 9bc5799566be xen/arch/x86/hvm/intercept.c
--- a/xen/arch/x86/hvm/intercept.c Mon Mar 02 10:23:50 2009 +0000
+++ b/xen/arch/x86/hvm/intercept.c Mon Mar 02 10:26:37 2009 +0000
@@ -35,14 +35,16 @@ extern struct hvm_mmio_handler hpet_mmio
extern struct hvm_mmio_handler hpet_mmio_handler;
extern struct hvm_mmio_handler vlapic_mmio_handler;
extern struct hvm_mmio_handler vioapic_mmio_handler;
-
-#define HVM_MMIO_HANDLER_NR 3
+extern struct hvm_mmio_handler msixtbl_mmio_handler;
+
+#define HVM_MMIO_HANDLER_NR 4
static struct hvm_mmio_handler *hvm_mmio_handlers[HVM_MMIO_HANDLER_NR] =
{
&hpet_mmio_handler,
&vlapic_mmio_handler,
- &vioapic_mmio_handler
+ &vioapic_mmio_handler,
+ &msixtbl_mmio_handler
};
static int hvm_mmio_access(struct vcpu *v,
diff -r d0df93e627bc -r 9bc5799566be xen/arch/x86/hvm/vmsi.c
--- a/xen/arch/x86/hvm/vmsi.c Mon Mar 02 10:23:50 2009 +0000
+++ b/xen/arch/x86/hvm/vmsi.c Mon Mar 02 10:26:37 2009 +0000
@@ -193,3 +193,283 @@ int vmsi_deliver(struct domain *d, int p
return 1;
}
+/* MSI-X mask bit hypervisor interception */
+struct msixtbl_entry
+{
+ struct list_head list;
+ atomic_t refcnt; /* how many bind_pt_irq called for the device */
+
+ /* TODO: resolve the potential race by destruction of pdev */
+ struct pci_dev *pdev;
+ unsigned long gtable; /* gpa of msix table */
+ unsigned long table_len;
+ unsigned long table_flags[MAX_MSIX_TABLE_ENTRIES / BITS_PER_LONG + 1];
+
+ struct rcu_head rcu;
+};
+
+static struct msixtbl_entry *msixtbl_find_entry(
+ struct vcpu *v, unsigned long addr)
+{
+ struct msixtbl_entry *entry;
+ struct domain *d = v->domain;
+
+ list_for_each_entry( entry, &d->arch.hvm_domain.msixtbl_list, list )
+ if ( addr >= entry->gtable &&
+ addr < entry->gtable + entry->table_len )
+ return entry;
+
+ return NULL;
+}
+
+static void __iomem *msixtbl_addr_to_virt(
+ struct msixtbl_entry *entry, unsigned long addr)
+{
+ int idx, nr_page;
+
+ if ( !entry )
+ return NULL;
+
+ nr_page = (addr >> PAGE_SHIFT) -
+ (entry->gtable >> PAGE_SHIFT);
+
+ if ( !entry->pdev )
+ return NULL;
+
+ idx = entry->pdev->msix_table_idx[nr_page];
+ if ( !idx )
+ return NULL;
+
+ return (void *)(fix_to_virt(idx) +
+ (addr & ((1UL << PAGE_SHIFT) - 1)));
+}
+
+static int msixtbl_read(
+ struct vcpu *v, unsigned long address,
+ unsigned long len, unsigned long *pval)
+{
+ unsigned long offset;
+ struct msixtbl_entry *entry;
+ void *virt;
+ int r = X86EMUL_UNHANDLEABLE;
+
+ rcu_read_lock();
+
+ if ( len != 4 )
+ goto out;
+
+ offset = address & (PCI_MSIX_ENTRY_SIZE - 1);
+ if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET)
+ goto out;
+
+ entry = msixtbl_find_entry(v, address);
+ virt = msixtbl_addr_to_virt(entry, address);
+ if ( !virt )
+ goto out;
+
+ *pval = readl(virt);
+ r = X86EMUL_OKAY;
+
+out:
+ rcu_read_unlock();
+ return r;
+}
+
+static int msixtbl_write(struct vcpu *v, unsigned long address,
+ unsigned long len, unsigned long val)
+{
+ unsigned long offset;
+ struct msixtbl_entry *entry;
+ void *virt;
+ int nr_entry;
+ int r = X86EMUL_UNHANDLEABLE;
+
+ rcu_read_lock();
+
+ if ( len != 4 )
+ goto out;
+
+ entry = msixtbl_find_entry(v, address);
+ nr_entry = (address - entry->gtable) % PCI_MSIX_ENTRY_SIZE;
+
+ offset = address & (PCI_MSIX_ENTRY_SIZE - 1);
+ if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET)
+ {
+ set_bit(nr_entry, &entry->table_flags);
+ goto out;
+ }
+
+ /* exit to device model if address/data has been modified */
+ if ( test_and_clear_bit(nr_entry, &entry->table_flags) )
+ goto out;
+
+ virt = msixtbl_addr_to_virt(entry, address);
+ if ( !virt )
+ goto out;
+
+ writel(val, virt);
+ r = X86EMUL_OKAY;
+
+out:
+ rcu_read_unlock();
+ return r;
+}
+
+static int msixtbl_range(struct vcpu *v, unsigned long addr)
+{
+ struct msixtbl_entry *entry;
+ void *virt;
+
+ rcu_read_lock();
+
+ entry = msixtbl_find_entry(v, addr);
+ virt = msixtbl_addr_to_virt(entry, addr);
+
+ rcu_read_unlock();
+
+ return !!virt;
+}
+
+struct hvm_mmio_handler msixtbl_mmio_handler = {
+ .check_handler = msixtbl_range,
+ .read_handler = msixtbl_read,
+ .write_handler = msixtbl_write
+};
+
+static struct msixtbl_entry *add_msixtbl_entry(struct domain *d,
+ struct pci_dev *pdev,
+ uint64_t gtable)
+{
+ struct msixtbl_entry *entry;
+ u32 len;
+
+ entry = xmalloc(struct msixtbl_entry);
+ if ( !entry )
+ return NULL;
+
+ memset(entry, 0, sizeof(struct msixtbl_entry));
+
+ INIT_LIST_HEAD(&entry->list);
+ INIT_RCU_HEAD(&entry->rcu);
+ atomic_set(&entry->refcnt, 0);
+
+ len = pci_msix_get_table_len(pdev);
+ entry->table_len = len;
+ entry->pdev = pdev;
+ entry->gtable = (unsigned long) gtable;
+
+ list_add_rcu(&entry->list, &d->arch.hvm_domain.msixtbl_list);
+
+ return entry;
+}
+
+static void free_msixtbl_entry(struct rcu_head *rcu)
+{
+ struct msixtbl_entry *entry;
+
+ entry = container_of (rcu, struct msixtbl_entry, rcu);
+
+ xfree(entry);
+}
+
+static void del_msixtbl_entry(struct msixtbl_entry *entry)
+{
+ list_del_rcu(&entry->list);
+ call_rcu(&entry->rcu, free_msixtbl_entry);
+}
+
+int msixtbl_pt_register(struct domain *d, int pirq, uint64_t gtable)
+{
+ irq_desc_t *irq_desc;
+ struct msi_desc *msi_desc;
+ struct pci_dev *pdev;
+ struct msixtbl_entry *entry;
+ int r = -EINVAL;
+
+ /* pcidevs_lock already held */
+ irq_desc = domain_spin_lock_irq_desc(d, pirq, NULL);
+
+ if ( irq_desc->handler != &pci_msi_type )
+ goto out;
+
+ msi_desc = irq_desc->msi_desc;
+ if ( !msi_desc )
+ goto out;
+
+ pdev = msi_desc->dev;
+
+ spin_lock(&d->arch.hvm_domain.msixtbl_list_lock);
+
+ list_for_each_entry( entry, &d->arch.hvm_domain.msixtbl_list, list )
+ if ( pdev == entry->pdev )
+ goto found;
+
+ entry = add_msixtbl_entry(d, pdev, gtable);
+ if ( !entry )
+ {
+ spin_unlock(&d->arch.hvm_domain.msixtbl_list_lock);
+ goto out;
+ }
+
+found:
+ atomic_inc(&entry->refcnt);
+
+ spin_unlock(&d->arch.hvm_domain.msixtbl_list_lock);
+
+out:
+ spin_unlock_irq(&irq_desc->lock);
+ return r;
+
+}
+
+void msixtbl_pt_unregister(struct domain *d, int pirq)
+{
+ irq_desc_t *irq_desc;
+ struct msi_desc *msi_desc;
+ struct pci_dev *pdev;
+ struct msixtbl_entry *entry;
+
+ /* pcidevs_lock already held */
+ irq_desc = domain_spin_lock_irq_desc(d, pirq, NULL);
+
+ if ( irq_desc->handler != &pci_msi_type )
+ goto out;
+
+ msi_desc = irq_desc->msi_desc;
+ if ( !msi_desc )
+ goto out;
+
+ pdev = msi_desc->dev;
+
+ spin_lock(&d->arch.hvm_domain.msixtbl_list_lock);
+
+ list_for_each_entry( entry, &d->arch.hvm_domain.msixtbl_list, list )
+ if ( pdev == entry->pdev )
+ goto found;
+
+ spin_unlock(&d->arch.hvm_domain.msixtbl_list_lock);
+
+
+out:
+ spin_unlock(&irq_desc->lock);
+ return;
+
+found:
+ if ( !atomic_dec_and_test(&entry->refcnt) )
+ del_msixtbl_entry(entry);
+
+ spin_unlock(&d->arch.hvm_domain.msixtbl_list_lock);
+ spin_unlock(&irq_desc->lock);
+}
+void msixtbl_pt_cleanup(struct domain *d, int pirq)
+{
+ struct msixtbl_entry *entry, *temp;
+
+ spin_lock(&d->arch.hvm_domain.msixtbl_list_lock);
+
+ list_for_each_entry_safe( entry, temp,
+ &d->arch.hvm_domain.msixtbl_list, list )
+ del_msixtbl_entry(entry);
+
+ spin_unlock(&d->arch.hvm_domain.msixtbl_list_lock);
+}
diff -r d0df93e627bc -r 9bc5799566be xen/arch/x86/msi.c
--- a/xen/arch/x86/msi.c Mon Mar 02 10:23:50 2009 +0000
+++ b/xen/arch/x86/msi.c Mon Mar 02 10:26:37 2009 +0000
@@ -839,3 +839,23 @@ int pci_restore_msi_state(struct pci_dev
return 0;
}
+unsigned int pci_msix_get_table_len(struct pci_dev *pdev)
+{
+ int pos;
+ u16 control;
+ u8 bus, slot, func;
+ unsigned int len;
+
+ bus = pdev->bus;
+ slot = PCI_SLOT(pdev->devfn);
+ func = PCI_FUNC(pdev->devfn);
+
+ pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSIX);
+ if ( !pos )
+ return 0;
+
+ control = pci_conf_read16(bus, slot, func, msix_control_reg(pos));
+ len = msix_table_size(control) * PCI_MSIX_ENTRY_SIZE;
+
+ return len;
+}
diff -r d0df93e627bc -r 9bc5799566be xen/drivers/passthrough/io.c
--- a/xen/drivers/passthrough/io.c Mon Mar 02 10:23:50 2009 +0000
+++ b/xen/drivers/passthrough/io.c Mon Mar 02 10:26:37 2009 +0000
@@ -58,6 +58,9 @@ static void pt_irq_time_out(void *data)
pirq_guest_eoi(irq_map->dom, machine_gsi);
}
+extern int msixtbl_pt_register(struct domain *d, int pirq, uint64_t gtable);
+extern int msixtbl_pt_unregister(struct domain *d, int pirq);
+
int pt_irq_create_bind_vtd(
struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind)
{
@@ -115,6 +118,8 @@ int pt_irq_create_bind_vtd(
spin_unlock(&d->event_lock);
return rc;
}
+ if ( pt_irq_bind->u.msi.gtable )
+ msixtbl_pt_register(d, pirq, pt_irq_bind->u.msi.gtable);
}
else if (hvm_irq_dpci->mirq[pirq].gmsi.gvec != pt_irq_bind->u.msi.gvec
||hvm_irq_dpci->msi_gvec_pirq[pt_irq_bind->u.msi.gvec] != pirq)
@@ -259,6 +264,7 @@ int pt_irq_destroy_bind_vtd(
if ( list_empty(&hvm_irq_dpci->mirq[machine_gsi].digl_list) )
{
pirq_guest_unbind(d, machine_gsi);
+ msixtbl_pt_unregister(d, machine_gsi);
if ( pt_irq_need_timer(hvm_irq_dpci->mirq[machine_gsi].flags) )
kill_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d,
machine_gsi)]);
hvm_irq_dpci->mirq[machine_gsi].dom = NULL;
diff -r d0df93e627bc -r 9bc5799566be xen/include/asm-x86/hvm/domain.h
--- a/xen/include/asm-x86/hvm/domain.h Mon Mar 02 10:23:50 2009 +0000
+++ b/xen/include/asm-x86/hvm/domain.h Mon Mar 02 10:26:37 2009 +0000
@@ -75,6 +75,10 @@ struct hvm_domain {
/* Pass-through */
struct hvm_iommu hvm_iommu;
+ /* hypervisor intercepted msix table */
+ struct list_head msixtbl_list;
+ spinlock_t msixtbl_list_lock;
+
struct viridian_domain viridian;
bool_t hap_enabled;
diff -r d0df93e627bc -r 9bc5799566be xen/include/asm-x86/msi.h
--- a/xen/include/asm-x86/msi.h Mon Mar 02 10:23:50 2009 +0000
+++ b/xen/include/asm-x86/msi.h Mon Mar 02 10:26:37 2009 +0000
@@ -80,6 +80,8 @@ extern void teardown_msi_vector(int vect
extern void teardown_msi_vector(int vector);
extern int msi_free_vector(struct msi_desc *entry);
extern int pci_restore_msi_state(struct pci_dev *pdev);
+
+extern unsigned int pci_msix_get_table_len(struct pci_dev *pdev);
struct msi_desc {
struct {
diff -r d0df93e627bc -r 9bc5799566be xen/include/public/domctl.h
--- a/xen/include/public/domctl.h Mon Mar 02 10:23:50 2009 +0000
+++ b/xen/include/public/domctl.h Mon Mar 02 10:26:37 2009 +0000
@@ -485,6 +485,7 @@ struct xen_domctl_bind_pt_irq {
struct {
uint8_t gvec;
uint32_t gflags;
+ uint64_t gtable;
} msi;
} u;
};
diff -r d0df93e627bc -r 9bc5799566be xen/include/xen/pci.h
--- a/xen/include/xen/pci.h Mon Mar 02 10:23:50 2009 +0000
+++ b/xen/include/xen/pci.h Mon Mar 02 10:26:37 2009 +0000
@@ -29,7 +29,8 @@
#define PCI_BDF(b,d,f) ((((b) & 0xff) << 8) | PCI_DEVFN(d,f))
#define PCI_BDF2(b,df) ((((b) & 0xff) << 8) | ((df) & 0xff))
-#define MAX_MSIX_TABLE_PAGES 8 /* 2048 entries */
+#define MAX_MSIX_TABLE_ENTRIES 2048
+#define MAX_MSIX_TABLE_PAGES 8
struct pci_dev {
struct list_head alldevs_list;
struct list_head domain_list;
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|