WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH v2 2/2] ioemu: make management of PCI D-states by gue

To: Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH v2 2/2] ioemu: make management of PCI D-states by guest optional
From: Kouya Shimura <kouya@xxxxxxxxxxxxxx>
Date: Thu, 12 Mar 2009 15:05:38 +0900
Cc: Yuji Shimada <shimada-yxb@xxxxxxxxxxxxxxx>, xen-devel@xxxxxxxxxxxxxxxxxxx
Delivery-date: Wed, 11 Mar 2009 23:06:08 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <7kfxhrfcmv.fsf@xxxxxxxxxxxxxxxxxxxxxxxxxx> <7kd4cvfche.fsf@xxxxxxxxxxxxxxxxxxxxxxxxxx> <20090311171304.973D.27C06F64@xxxxxxxxxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
This is an updated patch per Shimada-san's advice.

Commit 8c771eb6294afc5b3754a9e3de51568d4e5986c2 enables the guest OS
to program D0-D3hot states of the assigned device, however,
D3hot state in some PCI devices causes the failure of domain
creation/destruction.

With this patch, we can configure a guest to manage the PCI D-states
or not for each PCI passthru device.

A corresponding change is committed to xen-unstable.hg.
    chageset: 19279:ec671455fb05ca6714deeaca78aacb1026ca4752

Signed-off-by: Kouya Shimura <kouya@xxxxxxxxxxxxxx>

diff --git a/hw/pass-through.c b/hw/pass-through.c
index 4a86309..78a8e8f 100644
--- a/hw/pass-through.c
+++ b/hw/pass-through.c
@@ -105,6 +105,9 @@ static int pt_long_reg_read(struct pt_dev *ptdev,
 static int pt_bar_reg_read(struct pt_dev *ptdev,
     struct pt_reg_tbl *cfg_entry,
     uint32_t *value, uint32_t valid_mask);
+static int pt_pmcsr_reg_read(struct pt_dev *ptdev,
+    struct pt_reg_tbl *cfg_entry,
+    uint16_t *value, uint16_t valid_mask);
 static int pt_byte_reg_write(struct pt_dev *ptdev,
     struct pt_reg_tbl *cfg_entry,
     uint8_t *value, uint8_t dev_value, uint8_t valid_mask);
@@ -407,7 +410,7 @@ static struct pt_reg_info_tbl pt_emu_reg_pm_tbl[] = {
         .ro_mask    = 0xE1FC,
         .emu_mask   = 0x8100,
         .init       = pt_pmcsr_reg_init,
-        .u.w.read   = pt_word_reg_read,
+        .u.w.read   = pt_pmcsr_reg_read,
         .u.w.write  = pt_pmcsr_reg_write,
         .u.w.restore  = pt_pmcsr_reg_restore,
     },
@@ -2341,6 +2344,9 @@ static uint32_t pt_pmc_reg_init(struct pt_dev *ptdev,
 {
     PCIDevice *d = &ptdev->dev;
 
+    if (!ptdev->power_mgmt)
+        return reg->init_val;
+
     /* set Power Management Capabilities register */
     ptdev->pm_state->pmc_field = *(uint16_t *)(d->config + real_offset);
 
@@ -2354,6 +2360,9 @@ static uint32_t pt_pmcsr_reg_init(struct pt_dev *ptdev,
     PCIDevice *d = &ptdev->dev;
     uint16_t cap_ver  = 0;
 
+    if (!ptdev->power_mgmt)
+        return reg->init_val;
+
     /* check PCI Power Management support version */
     cap_ver = ptdev->pm_state->pmc_field & PCI_PM_CAP_VER_MASK;
 
@@ -2553,6 +2562,9 @@ static uint8_t pt_reg_grp_size_init(struct pt_dev *ptdev,
 static uint8_t pt_pm_size_init(struct pt_dev *ptdev,
         struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset)
 {
+    if (!ptdev->power_mgmt)
+        return grp_reg->grp_size;
+
     ptdev->pm_state = qemu_mallocz(sizeof(struct pt_pm_info));
     if (!ptdev->pm_state)
     {
@@ -2806,6 +2818,25 @@ static int pt_bar_reg_read(struct pt_dev *ptdev,
    return 0;
 }
 
+
+/* read Power Management Control/Status register */
+static int pt_pmcsr_reg_read(struct pt_dev *ptdev,
+        struct pt_reg_tbl *cfg_entry,
+        uint16_t *value, uint16_t valid_mask)
+{
+    struct pt_reg_info_tbl *reg = cfg_entry->reg;
+    uint16_t valid_emu_mask = reg->emu_mask;
+
+    if (!ptdev->power_mgmt)
+        valid_emu_mask |= PCI_PM_CTRL_STATE_MASK | PCI_PM_CTRL_NO_SOFT_RESET;
+
+    valid_emu_mask = valid_emu_mask & valid_mask ;
+    *value = PT_MERGE_VALUE(*value, cfg_entry->data, ~valid_emu_mask);
+
+    return 0;
+}
+
+
 /* write byte size emulate register */
 static int pt_byte_reg_write(struct pt_dev *ptdev,
         struct pt_reg_tbl *cfg_entry,
@@ -3077,19 +3108,26 @@ static int pt_pmcsr_reg_write(struct pt_dev *ptdev,
 {
     struct pt_reg_info_tbl *reg = cfg_entry->reg;
     PCIDevice *d = &ptdev->dev;
+    uint16_t emu_mask = reg->emu_mask;
     uint16_t writable_mask = 0;
     uint16_t throughable_mask = 0;
     struct pt_pm_info *pm_state = ptdev->pm_state;
     uint16_t read_val = 0;
 
+    if (!ptdev->power_mgmt)
+        emu_mask |= PCI_PM_CTRL_STATE_MASK | PCI_PM_CTRL_NO_SOFT_RESET;
+
     /* modify emulate register */
-    writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask;
+    writable_mask = emu_mask & ~reg->ro_mask & valid_mask;
     cfg_entry->data = PT_MERGE_VALUE(*value, cfg_entry->data, writable_mask);
 
     /* create value for writing to I/O device register */
-    throughable_mask = ~reg->emu_mask & valid_mask;
+    throughable_mask = ~emu_mask & valid_mask;
     *value = PT_MERGE_VALUE(*value, dev_value, throughable_mask);
 
+    if (!ptdev->power_mgmt)
+        return 0;
+
     /* set I/O device power state */
     pm_state->cur_state = (dev_value & PCI_PM_CTRL_STATE_MASK);
 
@@ -3564,7 +3602,7 @@ struct pt_dev * register_real_device(PCIBus *e_bus,
     struct pci_config_cf8 machine_bdf;
     int free_pci_slot = -1;
     char *key, *val;
-    int msi_translate;
+    int msi_translate, power_mgmt;
 
     PT_LOG("Assigning real physical device %02x:%02x.%x ...\n",
         r_bus, r_dev, r_func);
@@ -3597,6 +3635,7 @@ struct pt_dev * register_real_device(PCIBus *e_bus,
     }
 
     msi_translate = direct_pci_msitranslate;
+    power_mgmt = direct_pci_power_mgmt;
     while (opt) {
         if (get_next_keyval(&opt, &key, &val)) {
             PT_LOG("Error: unrecognized PCI assignment option \"%s\"\n", opt);
@@ -3618,6 +3657,21 @@ struct pt_dev * register_real_device(PCIBus *e_bus,
             else
                 PT_LOG("Error: unrecognized value for msitranslate=\n");
         }
+        else if (strcmp(key, "power_mgmt") == 0)
+        {
+            if (strcmp(val, "0") == 0)
+            {
+                PT_LOG("Disable PCI Power Management\n");
+                power_mgmt = 0;
+            }
+            else if (strcmp(val, "1") == 0)
+            {
+                PT_LOG("Enable PCI Power Management\n");
+                power_mgmt = 1;
+            }
+            else
+                PT_LOG("Error: unrecognized value for power_mgmt=\n");
+        }
         else
             PT_LOG("Error: unrecognized PCI assignment option \"%s=%s\"\n", 
key, val);
 
@@ -3639,6 +3693,7 @@ struct pt_dev * register_real_device(PCIBus *e_bus,
 
     assigned_device->pci_dev = pci_dev;
     assigned_device->msi_trans_cap = msi_translate;
+    assigned_device->power_mgmt = power_mgmt;
 
     /* Assign device */
     machine_bdf.reg = 0;
diff --git a/hw/pass-through.h b/hw/pass-through.h
index e86d311..b7b5a79 100644
--- a/hw/pass-through.h
+++ b/hw/pass-through.h
@@ -217,6 +217,7 @@ struct pt_dev {
     /* Physical MSI to guest INTx translation when possible */
     int msi_trans_cap;
     int msi_trans_en;
+    int power_mgmt;
     struct pt_pm_info *pm_state;                /* PM virtualization */
 };
 
diff --git a/hw/pci.h b/hw/pci.h
index 2800499..10fa601 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -65,6 +65,7 @@ struct PCIDevice {
 
 extern char direct_pci_str[];
 extern int direct_pci_msitranslate;
+extern int direct_pci_power_mgmt;
 
 PCIDevice *pci_register_device(PCIBus *bus, const char *name,
                                int instance_size, int devfn,
diff --git a/xenstore.c b/xenstore.c
index 928e950..4ee6ceb 100644
--- a/xenstore.c
+++ b/xenstore.c
@@ -303,8 +303,10 @@ const char *xenstore_get_guest_uuid(void)
 
 #define DIRECT_PCI_STR_LEN 512
 #define PT_PCI_MSITRANSLATE_DEFAULT 1
+#define PT_PCI_POWER_MANAGEMENT_DEFAULT 0
 char direct_pci_str[DIRECT_PCI_STR_LEN];
 int direct_pci_msitranslate;
+int direct_pci_power_mgmt;
 void xenstore_parse_domain_config(int hvm_domid)
 {
     char **e_danger = NULL;
@@ -603,15 +605,26 @@ void xenstore_parse_domain_config(int hvm_domid)
 
     /* get the pci pass-through parameter */
     if (pasprintf(&buf, "/local/domain/0/backend/pci/%u/%u/msitranslate",
-                  hvm_domid, pci_devid) == -1)
-        goto out;
+                  hvm_domid, pci_devid) != -1)
+    {
+        free(params);
+        params = xs_read(xsh, XBT_NULL, buf, &len);
+        if (params)
+            direct_pci_msitranslate = atoi(params);
+        else
+            direct_pci_msitranslate = PT_PCI_MSITRANSLATE_DEFAULT;
+    }
 
-    free(params);
-    params = xs_read(xsh, XBT_NULL, buf, &len);
-    if (params)
-        direct_pci_msitranslate = atoi(params);
-    else
-        direct_pci_msitranslate = PT_PCI_MSITRANSLATE_DEFAULT;
+    if (pasprintf(&buf, "/local/domain/0/backend/pci/%u/%u/power_mgmt",
+                  hvm_domid, pci_devid) != -1)
+    {
+        free(params);
+        params = xs_read(xsh, XBT_NULL, buf, &len);
+        if (params)
+            direct_pci_power_mgmt = atoi(params);
+        else
+            direct_pci_power_mgmt = PT_PCI_POWER_MANAGEMENT_DEFAULT;
+    }
 
  out:
     free(danger_type);
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel