[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 2/6] ioemu:passthrough: MSI-INTx interrupt translation support



passthrough: MSI-INTx interrupt translation support
    
This patch enables Xen to use MSI for MSI-capable devices as the
underlying interrupt source even if the guest does not explicitly
use it. The guest will still see an IO-APIC based INTx interrupt
translated by Xen from the MSI irq.

If the guest enables MSI or MSI-X for the passthrough device, this
translation is automatically turned off. It can also be disabled in
the config file at domain startup time.

Signed-off-by: Qing He <qing.he@xxxxxxxxx>
---

diff --git a/hw/pass-through.c b/hw/pass-through.c
index d280ff6..625e644 100644
--- a/hw/pass-through.c
+++ b/hw/pass-through.c
@@ -34,6 +34,7 @@ struct php_dev {
     uint8_t r_bus;
     uint8_t r_dev;
     uint8_t r_func;
+    char *opt;
 };
 struct dpci_infos {
 
@@ -492,7 +493,7 @@ static struct pt_reg_info_tbl pt_emu_reg_msi_tbl[] = {
         .size       = 2,
         .init_val   = 0x0000,
         .ro_mask    = 0x018E,
-        .emu_mask   = 0xFFFE,
+        .emu_mask   = 0xFFFF,
         .init       = pt_msgctrl_reg_init,
         .u.w.read   = pt_word_reg_read,
         .u.w.write  = pt_msgctrl_reg_write,
@@ -692,7 +693,7 @@ static int token_value(char *token)
     return strtol(token, NULL, 16);
 }
 
-static int next_bdf(char **str, int *seg, int *bus, int *dev, int *func)
+static int next_bdf(char **str, int *seg, int *bus, int *dev, int *func, char 
**opt)
 {
     char *token;
     const char *delim = ":.-";
@@ -711,18 +712,60 @@ static int next_bdf(char **str, int *seg, int *bus, int 
*dev, int *func)
     *dev  = token_value(token);
 
     token  = strsep(str, delim);
+    *opt = strchr(token, ',');
+    if (*opt)
+        *(*opt)++ = '\0';
+
     *func  = token_value(token);
 
     return 1;
 }
 
+static int get_next_keyval(char **option, char **key, char **val)
+{
+    char *opt, *k, *v;
+
+    k = *option;
+    opt = strchr(k, ',');
+    if (opt)
+        *opt++ = '\0';
+    v = strchr(k, '=');
+    if (!v)
+        return -1;
+    *v++ = '\0';
+
+    *key = k;
+    *val = v;
+    *option = opt;
+
+    return 0;
+}
+
+static void msi_set_enable(struct pt_dev *ptdev, int en)
+{
+    uint16_t val;
+    uint32_t address;
+    if (!ptdev->msi)
+        return;
+
+    address = ptdev->msi->ctrl_offset;
+    if (!address)
+        return;
+
+    val = pci_read_word(ptdev->pci_dev, address);
+    val &= ~PCI_MSI_FLAGS_ENABLE;
+    val |= en & PCI_MSI_FLAGS_ENABLE;
+    pci_write_word(ptdev->pci_dev, address, val);
+}
+
 /* Insert a new pass-through device into a specific pci slot.
  * input  dom:bus:dev.func@slot, chose free one if slot == 0
  * return -1: required slot not available
  *         0: no free hotplug slots, but normal slot should okay
  *        >0: the new hotplug slot
  */
-static int __insert_to_pci_slot(int bus, int dev, int func, int slot)
+static int __insert_to_pci_slot(int bus, int dev, int func, int slot,
+                                char *opt)
 {
     int i, php_slot;
 
@@ -759,6 +802,7 @@ found:
     dpci_infos.php_devs[php_slot].r_bus  = bus;
     dpci_infos.php_devs[php_slot].r_dev  = dev;
     dpci_infos.php_devs[php_slot].r_func = func;
+    dpci_infos.php_devs[php_slot].opt = opt;
     return PHP_TO_PCI_SLOT(php_slot);
 }
 
@@ -768,19 +812,19 @@ found:
 int insert_to_pci_slot(char *bdf_slt)
 {
     int seg, bus, dev, func, slot;
-    char *bdf_str, *slt_str;
+    char *bdf_str, *slt_str, *opt;
     const char *delim="@";
 
     bdf_str = strsep(&bdf_slt, delim);
     slt_str = bdf_slt;
     slot = token_value(slt_str);
 
-    if ( !next_bdf(&bdf_str, &seg, &bus, &dev, &func))
+    if ( !next_bdf(&bdf_str, &seg, &bus, &dev, &func, &opt))
     {
         return -1;
     }
 
-    return __insert_to_pci_slot(bus, dev, func, slot);
+    return __insert_to_pci_slot(bus, dev, func, slot, opt);
 
 }
 
@@ -807,8 +851,9 @@ int test_pci_slot(int slot)
 int bdf_to_slot(char *bdf_str)
 {
     int seg, bus, dev, func, i;
+    char *opt;
 
-    if ( !next_bdf(&bdf_str, &seg, &bus, &dev, &func))
+    if ( !next_bdf(&bdf_str, &seg, &bus, &dev, &func, &opt))
     {
         return -1;
     }
@@ -1960,9 +2005,15 @@ static uint32_t pt_msgctrl_reg_init(struct pt_dev *ptdev,
         pci_write_word(pdev, real_offset, reg_field & ~PCI_MSI_FLAGS_ENABLE);
     }
     ptdev->msi->flags |= (reg_field | MSI_FLAG_UNINIT);
+    ptdev->msi->ctrl_offset = real_offset;
     
     /* All register is 0 after reset, except first 4 byte */
     reg_field &= reg->ro_mask;
+
+    if (ptdev->msi_trans_cap) {
+        PT_LOG("Turning on MSI-INTx translation\n");
+        ptdev->msi_trans_en = 1;
+    }
     
     return reg_field;
 }
@@ -2673,6 +2724,34 @@ static int pt_linkctrl2_reg_write(struct pt_dev *ptdev,
     return 0;
 }
 
+static void pt_unmap_msi_translate(struct pt_dev *ptdev)
+{
+    uint16_t e_device, e_intx;
+    int rc;
+
+    /* MSI_ENABLE bit should be disabed until the new handler is set */
+    msi_set_enable(ptdev, 0);
+
+    e_device = (ptdev->dev.devfn >> 3) & 0x1f;
+    /* fix virtual interrupt pin to INTA# */
+    e_intx = 0;
+    rc = xc_domain_unbind_pt_irq(xc_handle, domid, ptdev->msi->pirq,
+                                 PT_IRQ_TYPE_MSI_TRANSLATE, 0,
+                                 e_device, e_intx, 0);
+    if (rc < 0)
+        PT_LOG("Error: Unbinding pt irq for MSI-INTx failed! rc=%d\n", rc);
+
+    if (ptdev->machine_irq)
+    {
+        rc = xc_domain_bind_pt_pci_irq(xc_handle, domid, ptdev->machine_irq,
+                                       0, e_device, e_intx);
+        if ( rc < 0 )
+            PT_LOG("Error: Rebinding of interrupt failed! rc=%d\n", rc);
+    }
+
+    ptdev->msi_trans_en = 0;
+}
+
 /* write Message Control register */
 static int pt_msgctrl_reg_write(struct pt_dev *ptdev, 
     struct pt_reg_tbl *cfg_entry, 
@@ -2682,7 +2761,9 @@ static int pt_msgctrl_reg_write(struct pt_dev *ptdev,
     uint16_t writable_mask = 0;
     uint16_t throughable_mask = 0;
     uint16_t old_ctrl = cfg_entry->data;
+    uint8_t e_device, e_intx;
     PCIDevice *pd = (PCIDevice *)ptdev;
+    uint16_t val;
 
     /* Currently no support for multi-vector */
     if ((*value & PCI_MSI_FLAGS_QSIZE) != 0x0)
@@ -2699,21 +2780,29 @@ static int pt_msgctrl_reg_write(struct pt_dev *ptdev,
     PT_LOG("old_ctrl:%04xh new_ctrl:%04xh\n", old_ctrl, cfg_entry->data);
     
     /* create value for writing to I/O device register */
+    val = *value;
     throughable_mask = ~reg->emu_mask & valid_mask;
     *value = ((*value & throughable_mask) | (dev_value & ~throughable_mask));
 
     /* update MSI */
-    if (*value & PCI_MSI_FLAGS_ENABLE)
+    if (val & PCI_MSI_FLAGS_ENABLE)
     {
         /* setup MSI pirq for the first time */
         if (ptdev->msi->flags & MSI_FLAG_UNINIT)
         {
-            /* Init physical one */
-            PT_LOG("setup msi for dev %x\n", pd->devfn);
-            if (pt_msi_setup(ptdev))
+            if (ptdev->msi_trans_en) {
+                PT_LOG("guest enabling MSI, disable MSI-INTx translation\n");
+                pt_unmap_msi_translate(ptdev);
+            }
+            else
             {
-                PT_LOG("pt_msi_setup error!!!\n");
-                return -1;
+                /* Init physical one */
+                PT_LOG("setup msi for dev %x\n", pd->devfn);
+                if (pt_msi_setup(ptdev))
+                {
+                    PT_LOG("pt_msi_setup error!!!\n");
+                    return -1;
+                }
             }
             pt_msi_update(ptdev);
 
@@ -2725,6 +2814,12 @@ static int pt_msgctrl_reg_write(struct pt_dev *ptdev,
     else
         ptdev->msi->flags &= ~PCI_MSI_FLAGS_ENABLE;
 
+    /* pass through MSI_ENABLE bit when no MSI-INTx translation */
+    if (!ptdev->msi_trans_en) {
+        *value &= ~PCI_MSI_FLAGS_ENABLE;
+        *value |= val & PCI_MSI_FLAGS_ENABLE;
+    }
+
     return 0;
 }
 
@@ -2870,7 +2965,13 @@ static int pt_msixctrl_reg_write(struct pt_dev *ptdev,
 
     /* update MSI-X */
     if ((*value & PCI_MSIX_ENABLE) && !(*value & PCI_MSIX_MASK))
+    {
+        if (ptdev->msi_trans_en) {
+            PT_LOG("guest enabling MSI-X, disable MSI-INTx translation\n");
+            pt_unmap_msi_translate(ptdev);
+        }
         pt_msix_update(ptdev);
+    }
 
     ptdev->msix->enabled = !!(*value & PCI_MSIX_ENABLE);
 
@@ -2879,7 +2980,8 @@ static int pt_msixctrl_reg_write(struct pt_dev *ptdev,
 
 struct pt_dev * register_real_device(PCIBus *e_bus,
         const char *e_dev_name, int e_devfn, uint8_t r_bus, uint8_t r_dev,
-        uint8_t r_func, uint32_t machine_irq, struct pci_access *pci_access)
+        uint8_t r_func, uint32_t machine_irq, struct pci_access *pci_access,
+        char *opt)
 {
     int rc = -1, i;
     struct pt_dev *assigned_device = NULL;
@@ -2887,6 +2989,8 @@ struct pt_dev * register_real_device(PCIBus *e_bus,
     uint8_t e_device, e_intx;
     struct pci_config_cf8 machine_bdf;
     int free_pci_slot = -1;
+    char *key, *val;
+    int msi_translate;
 
     PT_LOG("Assigning real physical device %02x:%02x.%x ...\n",
         r_bus, r_dev, r_func);
@@ -2908,13 +3012,41 @@ struct pt_dev * register_real_device(PCIBus *e_bus,
 
     if ( e_devfn == PT_VIRT_DEVFN_AUTO ) {
         /*indicate a static assignment(not hotplug), so find a free PCI hot 
plug slot */
-        free_pci_slot = __insert_to_pci_slot(r_bus, r_dev, r_func, 0);
+        free_pci_slot = __insert_to_pci_slot(r_bus, r_dev, r_func, 0, NULL);
         if ( free_pci_slot > 0 )
             e_devfn = free_pci_slot  << 3;
         else
             PT_LOG("Error: no free virtual PCI hot plug slot, thus no live 
migration.\n");
     }
 
+    msi_translate = direct_pci_msitranslate;
+    while (opt) {
+        if (get_next_keyval(&opt, &key, &val)) {
+            PT_LOG("Error: unrecognized PCI assignment option \"%s\"\n", opt);
+            break;
+        }
+
+        if (strcmp(key, "msitranslate") == 0)
+        {
+            if (strcmp(val, "0") == 0 || strcmp(val, "no") == 0)
+            {
+                PT_LOG("Disable MSI translation via per device option\n");
+                msi_translate = 0;
+            }
+            else if (strcmp(val, "1") == 0 || strcmp(val, "yes") == 0)
+            {
+                PT_LOG("Enable MSI translation via per device option\n");
+                msi_translate = 1;
+            }
+            else
+                PT_LOG("Error: unrecognized value for msitranslate=\n");
+        }
+        else
+            PT_LOG("Error: unrecognized PCI assignment option \"%s=%s\"\n", 
key, val);
+
+    }
+
+
     /* Register device */
     assigned_device = (struct pt_dev *) pci_register_device(e_bus, e_dev_name,
                                 sizeof(struct pt_dev), e_devfn,
@@ -2929,6 +3061,7 @@ struct pt_dev * register_real_device(PCIBus *e_bus,
         dpci_infos.php_devs[PCI_TO_PHP_SLOT(free_pci_slot)].pt_dev = 
assigned_device;
 
     assigned_device->pci_dev = pci_dev;
+    assigned_device->msi_trans_cap = msi_translate;
 
     /* Assign device */
     machine_bdf.reg = 0;
@@ -2960,6 +3093,28 @@ struct pt_dev * register_real_device(PCIBus *e_bus,
     /* fix virtual interrupt pin to INTA# */
     e_intx = 0;
 
+    while (assigned_device->msi_trans_en)
+    {
+        if (pt_msi_setup(assigned_device))
+        {
+            PT_LOG("Error: MSI-INTx translation MSI setup failed, fallback\n");
+            assigned_device->msi_trans_en = 0;
+            break;
+        }
+
+        rc = xc_domain_bind_pt_irq(xc_handle, domid, 
assigned_device->msi->pirq,
+                                   PT_IRQ_TYPE_MSI_TRANSLATE, 0,
+                                   e_device, e_intx, 0);
+        if ( rc < 0)
+        {
+            PT_LOG("Error: MSI-INTx translation bind failed, fallback\n");
+            assigned_device->msi_trans_en = 0;
+            break;
+        }
+        msi_set_enable(assigned_device, 1);
+        break;
+    }
+
     if ( PT_MACHINE_IRQ_AUTO == machine_irq )
     {
         int pirq = pci_dev->irq;
@@ -2973,9 +3125,15 @@ struct pt_dev * register_real_device(PCIBus *e_bus,
             PT_LOG("Error: Mapping irq failed, rc = %d\n", rc);
         }
         else
+        {
             machine_irq = pirq;
+            assigned_device->machine_irq = pirq;
+        }
     }
 
+    if (assigned_device->msi_trans_en)
+        goto out;
+
     /* bind machine_irq to device */
     if ( 0 != machine_irq )
     {
@@ -2995,8 +3153,9 @@ struct pt_dev * register_real_device(PCIBus *e_bus,
     }
 
 out:
-    PT_LOG("Real physical device %02x:%02x.%x registered successfuly!\n", 
-        r_bus, r_dev, r_func);
+    PT_LOG("Real physical device %02x:%02x.%x registered successfuly!\n"
+           "IRQ type = %s\n", r_bus, r_dev, r_func,
+           assigned_device->msi_trans_en? "MSI-INTx":"INTx");
 
     return assigned_device;
 }
@@ -3029,9 +3188,9 @@ int unregister_real_device(int php_slot)
     e_device = (assigned_device->dev.devfn >> 3) & 0x1f;
     /* fix virtual interrupt pin to INTA# */
     e_intx = 0;
-    machine_irq = pci_dev->irq;
+    machine_irq = assigned_device->machine_irq;
 
-    if ( machine_irq != 0 ) {
+    if ( assigned_device->msi_trans_en == 0 && machine_irq ) {
         rc = xc_domain_unbind_pt_irq(xc_handle, domid, machine_irq, 
PT_IRQ_TYPE_PCI, 0,
                                        e_device, e_intx, 0);
         if ( rc < 0 )
@@ -3040,6 +3199,16 @@ int unregister_real_device(int php_slot)
             PT_LOG("Error: Unbinding of interrupt failed! rc=%d\n", rc);
         }
     }
+    else if (assigned_device->msi_trans_en)
+    {
+        rc = xc_domain_unbind_pt_irq(xc_handle, domid, 
assigned_device->msi->pirq,
+                                     PT_IRQ_TYPE_MSI_TRANSLATE, 0,
+                                     e_device, e_intx, 0);
+        if (rc < 0)
+            PT_LOG("Error: Unbinding pt irq for MSI-INTx failed! rc=%d\n", rc);
+    }
+
+    /* TODO: unmap passthrough MSI and MSI-X irqs */
 
     /* delete all emulated config registers */
     pt_config_delete(assigned_device);
@@ -3075,7 +3244,10 @@ int power_on_php_slot(int php_slot)
             php_dev->r_dev,
             php_dev->r_func,
             PT_MACHINE_IRQ_AUTO,
-            dpci_infos.pci_access);
+            dpci_infos.pci_access,
+            php_dev->opt);
+
+    php_dev->opt = NULL;
 
     php_dev->pt_dev = pt_dev;
 
@@ -3097,6 +3269,7 @@ int pt_init(PCIBus *e_bus, const char *direct_pci)
     char slot_str[8];
     char *direct_pci_head = NULL;
     char *direct_pci_p = NULL;
+    char *opt;
 
     /* Initialize libpci */
     pci_access = pci_alloc();
@@ -3125,11 +3298,11 @@ int pt_init(PCIBus *e_bus, const char *direct_pci)
     vslots = qemu_mallocz ( strlen(direct_pci) / 3 );
 
     /* Assign given devices to guest */
-    while ( next_bdf(&direct_pci_p, &seg, &b, &d, &f) )
+    while ( next_bdf(&direct_pci_p, &seg, &b, &d, &f, &opt) )
     {
         /* Register real device with the emulated bus */
         pt_dev = register_real_device(e_bus, "DIRECT PCI", PT_VIRT_DEVFN_AUTO,
-            b, d, f, PT_MACHINE_IRQ_AUTO, pci_access);
+            b, d, f, PT_MACHINE_IRQ_AUTO, pci_access, opt);
         if ( pt_dev == NULL )
         {
             PT_LOG("Error: Registration failed (%02x:%02x.%x)\n", b, d, f);
diff --git a/hw/pass-through.h b/hw/pass-through.h
index 8aa664b..a7d2727 100644
--- a/hw/pass-through.h
+++ b/hw/pass-through.h
@@ -121,6 +121,7 @@ struct pt_region {
 
 struct pt_msi_info {
     uint32_t flags;
+    uint32_t ctrl_offset; /* saved control offset */
     int pirq;          /* guest pirq corresponding */
     uint32_t addr_lo;  /* guest message address */
     uint32_t addr_hi;  /* guest message upper address */
@@ -158,6 +159,10 @@ struct pt_dev {
                                                 /* emul reg group list */
     struct pt_msi_info *msi;                    /* MSI virtualization */
     struct pt_msix_info *msix;                  /* MSI-X virtualization */
+    int machine_irq;                            /* saved pirq */
+    /* Physical MSI to guest INTx translation when possible */
+    int msi_trans_cap;
+    int msi_trans_en;
 };
 
 /* Used for formatting PCI BDF into cf8 format */
diff --git a/hw/pci.h b/hw/pci.h
index 4adc4d7..a527a39 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -64,6 +64,7 @@ struct PCIDevice {
 };
 
 extern char direct_pci_str[];
+extern int direct_pci_msitranslate;
 
 PCIDevice *pci_register_device(PCIBus *bus, const char *name,
                                int instance_size, int devfn,
diff --git a/xenstore.c b/xenstore.c
index 86e8b63..ff3d023 100644
--- a/xenstore.c
+++ b/xenstore.c
@@ -290,8 +290,10 @@ const char *xenstore_get_guest_uuid(void) {
 #endif
 }
 
-#define DIRECT_PCI_STR_LEN 160
+#define DIRECT_PCI_STR_LEN 512
+#define PT_PCI_MSITRANSLATE_DEFAULT 1
 char direct_pci_str[DIRECT_PCI_STR_LEN];
+int direct_pci_msitranslate;
 void xenstore_parse_domain_config(int hvm_domid)
 {
     char **e_danger = NULL;
@@ -556,20 +558,50 @@ void xenstore_parse_domain_config(int hvm_domid)
             free(dev);
             dev = xs_read(xsh, XBT_NULL, buf, &len);
 
-            if ( strlen(dev) + strlen(direct_pci_str) > DIRECT_PCI_STR_LEN ) {
+            if ( strlen(dev) + strlen(direct_pci_str) > DIRECT_PCI_STR_LEN - 
1) {
                 fprintf(stderr, "qemu: too many pci pass-through devices\n");
                 memset(direct_pci_str, 0, DIRECT_PCI_STR_LEN);
                 goto out;
             }
 
+
             /* append to direct_pci_str */
+            if ( !dev )
+                continue;
+
+            strcat(direct_pci_str, dev);
+
+            if (pasprintf(&buf, "/local/domain/0/backend/pci/%u/%u/opts-%d",
+                          hvm_domid, pci_devid, i) != -1) {
+                free(dev);
+                dev = xs_read(xsh, XBT_NULL, buf, &len);
+            }
             if ( dev ) {
+                if ( strlen(dev) + strlen(direct_pci_str) > DIRECT_PCI_STR_LEN 
- 2) {
+                    fprintf(stderr, "qemu: too many pci pass-through 
devices\n");
+                    memset(direct_pci_str, 0, DIRECT_PCI_STR_LEN);
+                    goto out;
+                }
+                strcat(direct_pci_str, ",");
                 strcat(direct_pci_str, dev);
-                strcat(direct_pci_str, "-");
             }
+
+            strcat(direct_pci_str, "-");
         }
     }
 
+    /* get the pci pass-through parameter */
+    if (pasprintf(&buf, "/local/domain/0/backend/pci/%u/%u/msitranslate",
+                  hvm_domid, pci_devid) == -1)
+        goto out;
+
+    free(params);
+    params = xs_read(xsh, XBT_NULL, buf, &len);
+    if (params)
+        direct_pci_msitranslate = atoi(params);
+    else
+        direct_pci_msitranslate = PT_PCI_MSITRANSLATE_DEFAULT;
+
  out:
     free(danger_type);
     free(params);

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.