[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 2/2] ioemu: Enable guest OS to program D0-D3hot states of an assigned device



This patch enables guest OS to program D0-D3hot states of assigned
device.

Thanks,
--
Yuji Shimada.


Signed-off-by: Yuji Shimada <shimada-yxb@xxxxxxxxxxxxxxx>

diff --git a/hw/pass-through.c b/hw/pass-through.c
index e76a3c3..ca9037d 100644
--- a/hw/pass-through.c
+++ b/hw/pass-through.c
@@ -27,6 +27,7 @@
 #include "pci/pci.h"
 #include "pt-msi.h"
 #include "qemu-xen.h"
+#include <unistd.h>
 
 struct php_dev {
     struct pt_dev *pt_dev;
@@ -60,6 +61,10 @@ static uint32_t pt_irqpin_reg_init(struct pt_dev *ptdev,
     struct pt_reg_info_tbl *reg, uint32_t real_offset);
 static uint32_t pt_bar_reg_init(struct pt_dev *ptdev,
     struct pt_reg_info_tbl *reg, uint32_t real_offset);
+static uint32_t pt_pmc_reg_init(struct pt_dev *ptdev,
+    struct pt_reg_info_tbl *reg, uint32_t real_offset);
+static uint32_t pt_pmcsr_reg_init(struct pt_dev *ptdev,
+    struct pt_reg_info_tbl *reg, uint32_t real_offset);
 static uint32_t pt_linkctrl_reg_init(struct pt_dev *ptdev,
     struct pt_reg_info_tbl *reg, uint32_t real_offset);
 static uint32_t pt_devctrl2_reg_init(struct pt_dev *ptdev,
@@ -78,6 +83,8 @@ static uint32_t pt_msixctrl_reg_init(struct pt_dev *ptdev,
     struct pt_reg_info_tbl *reg, uint32_t real_offset);
 static uint8_t pt_reg_grp_size_init(struct pt_dev *ptdev,
     struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset);
+static uint8_t pt_pm_size_init(struct pt_dev *ptdev,
+    struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset);
 static uint8_t pt_msi_size_init(struct pt_dev *ptdev,
     struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset);
 static uint8_t pt_msix_size_init(struct pt_dev *ptdev,
@@ -146,6 +153,24 @@ static int pt_msgdata_reg_write(struct pt_dev *ptdev,
 static int pt_msixctrl_reg_write(struct pt_dev *ptdev, 
     struct pt_reg_tbl *cfg_entry, 
     uint16_t *value, uint16_t dev_value, uint16_t valid_mask);
+static int pt_byte_reg_restore(struct pt_dev *ptdev, 
+    struct pt_reg_tbl *cfg_entry, 
+    uint32_t real_offset, uint8_t dev_value, uint8_t *value);
+static int pt_word_reg_restore(struct pt_dev *ptdev, 
+    struct pt_reg_tbl *cfg_entry, 
+    uint32_t real_offset, uint16_t dev_value, uint16_t *value);
+static int pt_long_reg_restore(struct pt_dev *ptdev, 
+    struct pt_reg_tbl *cfg_entry, 
+    uint32_t real_offset, uint32_t dev_value, uint32_t *value);
+static int pt_cmd_reg_restore(struct pt_dev *ptdev, 
+    struct pt_reg_tbl *cfg_entry, 
+    uint32_t real_offset, uint16_t dev_value, uint16_t *value);
+static int pt_pmcsr_reg_restore(struct pt_dev *ptdev, 
+    struct pt_reg_tbl *cfg_entry, 
+    uint32_t real_offset, uint16_t dev_value, uint16_t *value);
+static int pt_bar_reg_restore(struct pt_dev *ptdev, 
+    struct pt_reg_tbl *cfg_entry, 
+    uint32_t real_offset, uint32_t dev_value, uint32_t *value);
 
 /* pt_reg_info_tbl declaration
  * - only for emulated register (either a part or whole bit).
@@ -166,6 +191,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_vendor_reg_init,
         .u.w.read   = pt_word_reg_read,
         .u.w.write  = pt_word_reg_write,
+        .u.w.restore  = NULL,
     },
     /* Device ID reg */
     {
@@ -177,6 +203,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_device_reg_init,
         .u.w.read   = pt_word_reg_read,
         .u.w.write  = pt_word_reg_write,
+        .u.w.restore  = NULL,
     },
     /* Command reg */
     {
@@ -188,6 +215,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_common_reg_init,
         .u.w.read   = pt_word_reg_read,
         .u.w.write  = pt_cmd_reg_write,
+        .u.w.restore  = pt_cmd_reg_restore,
     },
     /* Capabilities Pointer reg */
     {
@@ -199,6 +227,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_ptr_reg_init,
         .u.b.read   = pt_byte_reg_read,
         .u.b.write  = pt_byte_reg_write,
+        .u.b.restore  = NULL,
     },
     /* Status reg */
     /* use emulated Cap Ptr value to initialize, 
@@ -213,6 +242,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_status_reg_init,
         .u.w.read   = pt_word_reg_read,
         .u.w.write  = pt_word_reg_write,
+        .u.w.restore  = NULL,
     },
     /* Cache Line Size reg */
     {
@@ -224,6 +254,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_common_reg_init,
         .u.b.read   = pt_byte_reg_read,
         .u.b.write  = pt_byte_reg_write,
+        .u.b.restore  = pt_byte_reg_restore,
     },
     /* Latency Timer reg */
     {
@@ -235,6 +266,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_common_reg_init,
         .u.b.read   = pt_byte_reg_read,
         .u.b.write  = pt_byte_reg_write,
+        .u.b.restore  = pt_byte_reg_restore,
     },
     /* Header Type reg */
     {
@@ -246,6 +278,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_common_reg_init,
         .u.b.read   = pt_byte_reg_read,
         .u.b.write  = pt_byte_reg_write,
+        .u.b.restore  = NULL,
     },
     /* Interrupt Line reg */
     {
@@ -257,6 +290,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_common_reg_init,
         .u.b.read   = pt_byte_reg_read,
         .u.b.write  = pt_byte_reg_write,
+        .u.b.restore  = NULL,
     },
     /* Interrupt Pin reg */
     {
@@ -268,6 +302,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_irqpin_reg_init,
         .u.b.read   = pt_byte_reg_read,
         .u.b.write  = pt_byte_reg_write,
+        .u.b.restore  = NULL,
     },
     /* BAR 0 reg */
     /* mask of BAR need to be decided later, depends on IO/MEM type */
@@ -278,6 +313,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_bar_reg_init,
         .u.dw.read  = pt_bar_reg_read,
         .u.dw.write = pt_bar_reg_write,
+        .u.dw.restore = pt_bar_reg_restore,
     },
     /* BAR 1 reg */
     {
@@ -287,6 +323,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_bar_reg_init,
         .u.dw.read  = pt_bar_reg_read,
         .u.dw.write = pt_bar_reg_write,
+        .u.dw.restore = pt_bar_reg_restore,
     },
     /* BAR 2 reg */
     {
@@ -296,6 +333,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_bar_reg_init,
         .u.dw.read  = pt_bar_reg_read,
         .u.dw.write = pt_bar_reg_write,
+        .u.dw.restore = pt_bar_reg_restore,
     },
     /* BAR 3 reg */
     {
@@ -305,6 +343,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_bar_reg_init,
         .u.dw.read  = pt_bar_reg_read,
         .u.dw.write = pt_bar_reg_write,
+        .u.dw.restore = pt_bar_reg_restore,
     },
     /* BAR 4 reg */
     {
@@ -314,6 +353,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_bar_reg_init,
         .u.dw.read  = pt_bar_reg_read,
         .u.dw.write = pt_bar_reg_write,
+        .u.dw.restore = pt_bar_reg_restore,
     },
     /* BAR 5 reg */
     {
@@ -323,6 +363,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_bar_reg_init,
         .u.dw.read  = pt_bar_reg_read,
         .u.dw.write = pt_bar_reg_write,
+        .u.dw.restore = pt_bar_reg_restore,
     },
     /* Expansion ROM BAR reg */
     {
@@ -334,6 +375,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_bar_reg_init,
         .u.dw.read  = pt_long_reg_read,
         .u.dw.write = pt_exp_rom_bar_reg_write,
+        .u.dw.restore = pt_long_reg_restore,
     },
     {
         .size = 0,
@@ -352,6 +394,7 @@ static struct pt_reg_info_tbl pt_emu_reg_pm_tbl[] = {
         .init       = pt_ptr_reg_init,
         .u.b.read   = pt_byte_reg_read,
         .u.b.write  = pt_byte_reg_write,
+        .u.b.restore  = NULL,
     },
     /* Power Management Capabilities reg */
     {
@@ -359,10 +402,11 @@ static struct pt_reg_info_tbl pt_emu_reg_pm_tbl[] = {
         .size       = 2,
         .init_val   = 0x0000,
         .ro_mask    = 0xFFFF,
-        .emu_mask   = 0xFFE8,
-        .init       = pt_common_reg_init,
+        .emu_mask   = 0xF9C8,
+        .init       = pt_pmc_reg_init,
         .u.w.read   = pt_word_reg_read,
         .u.w.write  = pt_word_reg_write,
+        .u.w.restore  = NULL,
     },
     /* PCI Power Management Control/Status reg */
     {
@@ -370,21 +414,11 @@ static struct pt_reg_info_tbl pt_emu_reg_pm_tbl[] = {
         .size       = 2,
         .init_val   = 0x0008,
         .ro_mask    = 0x60FC,
-        .emu_mask   = 0xFF0B,
-        .init       = pt_common_reg_init,
+        .emu_mask   = 0x8100,
+        .init       = pt_pmcsr_reg_init,
         .u.w.read   = pt_word_reg_read,
         .u.w.write  = pt_pmcsr_reg_write,
-    },
-    /* Data reg */
-    {
-        .offset     = PCI_PM_DATA_REGISTER,
-        .size       = 1,
-        .init_val   = 0x00,
-        .ro_mask    = 0xFF,
-        .emu_mask   = 0xFF,
-        .init       = pt_common_reg_init,
-        .u.b.read   = pt_byte_reg_read,
-        .u.b.write  = pt_byte_reg_write,
+        .u.w.restore  = pt_pmcsr_reg_restore,
     },
     {
         .size = 0,
@@ -403,6 +437,7 @@ static struct pt_reg_info_tbl pt_emu_reg_vpd_tbl[] = {
         .init       = pt_ptr_reg_init,
         .u.b.read   = pt_byte_reg_read,
         .u.b.write  = pt_byte_reg_write,
+        .u.b.restore  = NULL,
     },
     {
         .size = 0,
@@ -421,6 +456,7 @@ static struct pt_reg_info_tbl pt_emu_reg_vendor_tbl[] = {
         .init       = pt_ptr_reg_init,
         .u.b.read   = pt_byte_reg_read,
         .u.b.write  = pt_byte_reg_write,
+        .u.b.restore  = NULL,
     },
     {
         .size = 0,
@@ -439,6 +475,7 @@ static struct pt_reg_info_tbl pt_emu_reg_pcie_tbl[] = {
         .init       = pt_ptr_reg_init,
         .u.b.read   = pt_byte_reg_read,
         .u.b.write  = pt_byte_reg_write,
+        .u.b.restore  = NULL,
     },
     /* Device Capabilities reg */
     {
@@ -450,6 +487,7 @@ static struct pt_reg_info_tbl pt_emu_reg_pcie_tbl[] = {
         .init       = pt_common_reg_init,
         .u.dw.read  = pt_long_reg_read,
         .u.dw.write = pt_long_reg_write,
+        .u.dw.restore = NULL,
     },
     /* Device Control reg */
     {
@@ -461,6 +499,7 @@ static struct pt_reg_info_tbl pt_emu_reg_pcie_tbl[] = {
         .init       = pt_common_reg_init,
         .u.w.read   = pt_word_reg_read,
         .u.w.write  = pt_devctrl_reg_write,
+        .u.w.restore  = pt_word_reg_restore,
     },
     /* Link Control reg */
     {
@@ -472,6 +511,7 @@ static struct pt_reg_info_tbl pt_emu_reg_pcie_tbl[] = {
         .init       = pt_linkctrl_reg_init,
         .u.w.read   = pt_word_reg_read,
         .u.w.write  = pt_linkctrl_reg_write,
+        .u.w.restore  = pt_word_reg_restore,
     },
     /* Device Control 2 reg */
     {
@@ -483,6 +523,7 @@ static struct pt_reg_info_tbl pt_emu_reg_pcie_tbl[] = {
         .init       = pt_devctrl2_reg_init,
         .u.w.read   = pt_word_reg_read,
         .u.w.write  = pt_devctrl2_reg_write,
+        .u.w.restore  = pt_word_reg_restore,
     },
     /* Link Control 2 reg */
     {
@@ -494,6 +535,7 @@ static struct pt_reg_info_tbl pt_emu_reg_pcie_tbl[] = {
         .init       = pt_linkctrl2_reg_init,
         .u.w.read   = pt_word_reg_read,
         .u.w.write  = pt_linkctrl2_reg_write,
+        .u.w.restore  = pt_word_reg_restore,
     },
     {
         .size = 0,
@@ -512,6 +554,7 @@ static struct pt_reg_info_tbl pt_emu_reg_msi_tbl[] = {
         .init       = pt_ptr_reg_init,
         .u.b.read   = pt_byte_reg_read,
         .u.b.write  = pt_byte_reg_write,
+        .u.b.restore  = NULL,
     },
     /* Message Control reg */
     {
@@ -523,6 +566,7 @@ static struct pt_reg_info_tbl pt_emu_reg_msi_tbl[] = {
         .init       = pt_msgctrl_reg_init,
         .u.w.read   = pt_word_reg_read,
         .u.w.write  = pt_msgctrl_reg_write,
+        .u.w.restore  = NULL,
     },
     /* Message Address reg */
     {
@@ -534,6 +578,7 @@ static struct pt_reg_info_tbl pt_emu_reg_msi_tbl[] = {
         .init       = pt_msgaddr32_reg_init,
         .u.dw.read  = pt_long_reg_read,
         .u.dw.write = pt_msgaddr32_reg_write,
+        .u.dw.restore = NULL,
     },
     /* Message Upper Address reg (if PCI_MSI_FLAGS_64BIT set) */
     {
@@ -545,6 +590,7 @@ static struct pt_reg_info_tbl pt_emu_reg_msi_tbl[] = {
         .init       = pt_msgaddr64_reg_init,
         .u.dw.read  = pt_long_reg_read,
         .u.dw.write = pt_msgaddr64_reg_write,
+        .u.dw.restore = NULL,
     },
     /* Message Data reg (16 bits of data for 32-bit devices) */
     {
@@ -556,6 +602,7 @@ static struct pt_reg_info_tbl pt_emu_reg_msi_tbl[] = {
         .init       = pt_msgdata_reg_init,
         .u.w.read   = pt_word_reg_read,
         .u.w.write  = pt_msgdata_reg_write,
+        .u.w.restore  = NULL,
     },
     /* Message Data reg (16 bits of data for 64-bit devices) */
     {
@@ -567,6 +614,7 @@ static struct pt_reg_info_tbl pt_emu_reg_msi_tbl[] = {
         .init       = pt_msgdata_reg_init,
         .u.w.read   = pt_word_reg_read,
         .u.w.write  = pt_msgdata_reg_write,
+        .u.w.restore  = NULL,
     },
     {
         .size = 0,
@@ -585,6 +633,7 @@ static struct pt_reg_info_tbl pt_emu_reg_msix_tbl[] = {
         .init       = pt_ptr_reg_init,
         .u.b.read   = pt_byte_reg_read,
         .u.b.write  = pt_byte_reg_write,
+        .u.b.restore  = NULL,
     },
     /* Message Control reg */
     {
@@ -596,6 +645,7 @@ static struct pt_reg_info_tbl pt_emu_reg_msix_tbl[] = {
         .init       = pt_msixctrl_reg_init,
         .u.w.read   = pt_word_reg_read,
         .u.w.write  = pt_msixctrl_reg_write,
+        .u.w.restore  = NULL,
     },
     {
         .size = 0,
@@ -624,7 +674,7 @@ static const struct pt_reg_grp_info_tbl 
pt_emu_reg_grp_tbl[] = {
         .grp_id     = PCI_CAP_ID_PM,
         .grp_type   = GRP_TYPE_EMU,
         .grp_size   = PCI_PM_SIZEOF,
-        .size_init  = pt_reg_grp_size_init,
+        .size_init  = pt_pm_size_init,
         .emu_reg_tbl= pt_emu_reg_pm_tbl,
     },
     /* AGP Capability Structure reg group */
@@ -777,23 +827,6 @@ static int get_next_keyval(char **option, char **key, char 
**val)
     return 0;
 }
 
-static void msi_set_enable(struct pt_dev *ptdev, int en)
-{
-    uint16_t val;
-    uint32_t address;
-    if (!ptdev->msi)
-        return;
-
-    address = ptdev->msi->ctrl_offset;
-    if (!address)
-        return;
-
-    val = pci_read_word(ptdev->pci_dev, address);
-    val &= ~PCI_MSI_FLAGS_ENABLE;
-    val |= en & PCI_MSI_FLAGS_ENABLE;
-    pci_write_word(ptdev->pci_dev, address, val);
-}
-
 /* Insert a new pass-through device into a specific pci slot.
  * input  dom:bus:dev.func@slot, chose free one if slot == 0
  * return -1: required slot not available
@@ -1084,6 +1117,7 @@ static void pt_pci_write_config(PCIDevice *d, uint32_t 
address, uint32_t val,
 {
     struct pt_dev *assigned_device = (struct pt_dev *)d;
     struct pci_dev *pci_dev = assigned_device->pci_dev;
+    struct pt_pm_info *pm_state = assigned_device->pm_state;
     struct pt_reg_grp_tbl *reg_grp_entry = NULL;
     struct pt_reg_grp_info_tbl *reg_grp = NULL;
     struct pt_reg_tbl *reg_entry = NULL;
@@ -1144,6 +1178,13 @@ static void pt_pci_write_config(PCIDevice *d, uint32_t 
address, uint32_t val,
             (d->devfn & 0x7), address, len);
     }
 
+    /* check power state transition flags */
+    if (pm_state->flags & PT_FLAG_TRANSITING)
+        /* can't accept untill previous power state transition is completed.
+         * so finished previous request here.
+         */
+        qemu_run_one_timer(pm_state->pm_timer);
+
     /* find register group entry */
     reg_grp_entry = pt_find_reg_grp(assigned_device, address);
     if (reg_grp_entry)
@@ -1274,6 +1315,11 @@ out:
         break;
     }
 
+    if (pm_state->flags & PT_FLAG_TRANSITING)
+        /* set QEMUTimer */
+        qemu_mod_timer(pm_state->pm_timer,
+            (qemu_get_clock(rt_clock) + pm_state->pm_delay));
+
 exit:
     return;
 }
@@ -1282,6 +1328,7 @@ static uint32_t pt_pci_read_config(PCIDevice *d, uint32_t 
address, int len)
 {
     struct pt_dev *assigned_device = (struct pt_dev *)d;
     struct pci_dev *pci_dev = assigned_device->pci_dev;
+    struct pt_pm_info *pm_state = assigned_device->pm_state;
     uint32_t val = 0xFFFFFFFF;
     struct pt_reg_grp_tbl *reg_grp_entry = NULL;
     struct pt_reg_grp_info_tbl *reg_grp = NULL;
@@ -1324,6 +1371,13 @@ static uint32_t pt_pci_read_config(PCIDevice *d, 
uint32_t address, int len)
         goto exit;
     }
 
+    /* check power state transition flags */
+    if (pm_state->flags & PT_FLAG_TRANSITING)
+        /* can't accept untill previous power state transition is completed.
+         * so finished previous request here.
+         */
+        qemu_run_one_timer(pm_state->pm_timer);
+
     /* find register group entry */
     reg_grp_entry = pt_find_reg_grp(assigned_device, address);
     if (reg_grp_entry)
@@ -1643,6 +1697,35 @@ uint8_t find_cap_offset(struct pci_dev *pci_dev, uint8_t 
cap)
     return 0;
 }
 
+uint32_t find_ext_cap_offset(struct pci_dev *pci_dev, uint32_t cap)
+{
+    uint32_t header = 0;
+    int max_cap = 480;
+    int pos = 0x100;
+
+    do
+    {
+        header = pci_read_long(pci_dev, pos);
+        /*
+         * If we have no capabilities, this is indicated by cap ID,
+         * cap version and next pointer all being 0.
+         */
+        if (header == 0)
+            break;
+
+        if (PCI_EXT_CAP_ID(header) == cap)
+            return pos;
+
+        pos = PCI_EXT_CAP_NEXT(header);
+        if (pos < 0x100)
+            break;
+
+        max_cap--;
+    }while (max_cap > 0);
+
+    return 0;
+}
+
 /* parse BAR */
 static int pt_bar_reg_parse(
         struct pt_dev *ptdev, struct pt_reg_info_tbl *reg)
@@ -1751,6 +1834,298 @@ static void pt_bar_mapping(struct pt_dev *ptdev, int 
io_enable, int mem_enable)
     return;
 }
 
+/* check power state transition */
+int check_power_state(struct pt_dev *ptdev)
+{
+    struct pt_pm_info *pm_state = ptdev->pm_state;
+    PCIDevice *d = &ptdev->dev;
+    uint16_t read_val = 0;
+    uint16_t cur_state = 0;
+
+    /* get current power state */
+    read_val = pci_read_word(ptdev->pci_dev,
+                                (pm_state->pm_base + PCI_PM_CTRL));
+    cur_state = read_val & PCI_PM_CTRL_STATE_MASK;
+
+    if (pm_state->req_state != cur_state)
+    {
+        PT_LOG("Error: Failed to change power state. " 
+            "[%02x:%02x.%x][requested state:%d][current state:%d]\n", 
+            pci_bus_num(d->bus), ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7), 
+            pm_state->req_state, cur_state);
+        return -1;
+    }
+    return 0;
+}
+
+/* save AER register */
+static void pt_aer_reg_save(struct pt_dev *ptdev)
+{
+    PCIDevice *d = &ptdev->dev;
+    uint32_t aer_base = ptdev->pm_state->aer_base;
+    int i = 0;
+    /* Root Port and Root Complex Event Collector need size expansion */
+    int aer_size = 0x2c;
+
+    for (i=0; i < aer_size; i+=4)
+    {
+        switch (i) {
+        /* after reset, following register values should be restored.
+         * So, save them.
+         */
+        case PCI_ERR_UNCOR_MASK:
+        case PCI_ERR_UNCOR_SEVER:
+        case PCI_ERR_COR_MASK:
+        case PCI_ERR_CAP:
+            *(uint32_t*)(d->config + (aer_base + i))
+                 = pci_read_long(ptdev->pci_dev, (aer_base + i));
+            break;
+        default:
+            break;
+        }
+    }
+}
+
+/* restore AER register */
+static void pt_aer_reg_restore(struct pt_dev *ptdev)
+{
+    PCIDevice *d = &ptdev->dev;
+    uint32_t aer_base = ptdev->pm_state->aer_base;
+    int i = 0;
+    uint32_t config = 0;
+    /* Root Port and Root Complex Event Collector need size expansion */
+    int aer_size = 0x2c;
+
+    for (i=0; i < aer_size; i+=4)
+    {
+        switch (i) {
+        /* the following registers should be reconfigured to correct values
+         * after reset. restore them.
+         */
+        case PCI_ERR_UNCOR_MASK:
+        case PCI_ERR_UNCOR_SEVER:
+        case PCI_ERR_COR_MASK:
+        case PCI_ERR_CAP:
+            config = *(uint32_t*)(d->config + (aer_base + i));
+            pci_write_long(ptdev->pci_dev, (aer_base + i), config);
+            break;
+        /* other registers should not be reconfigured after reset 
+         * if there is no reason
+         */
+        default:
+            break;
+        }
+    }
+}
+
+/* reset Interrupt and I/O resource  */
+void pt_reset_interrupt_and_io_mapping(struct pt_dev *ptdev)
+{
+    PCIDevice *d = &ptdev->dev;
+    PCIIORegion *r;
+    int i = 0;
+
+    /* disable MSI/MSI-X and MSI-INTx translation */
+    if (ptdev->msi)
+        pt_msi_disable(ptdev);
+    if (ptdev->msix)
+        pt_msix_disable(ptdev);
+
+    /* clear all virtual region address */
+    for (i=0; i<PCI_NUM_REGIONS; i++)
+    {
+        r = &d->io_regions[i];
+        r->addr = -1;
+    }
+
+    /* unmapping BAR */
+    pt_bar_mapping(ptdev, 0, 0);
+}
+
+/* restore a part of I/O device register */
+static void pt_config_restore(struct pt_dev *ptdev)
+{
+    struct pt_reg_grp_tbl *reg_grp_entry = NULL;
+    struct pt_reg_grp_info_tbl *reg_grp = NULL;
+    struct pt_reg_tbl *reg_entry = NULL;
+    struct pt_reg_info_tbl *reg = NULL;
+    uint32_t real_offset = 0;
+    uint32_t read_val = 0;
+    uint32_t val = 0;
+    int ret = 0;
+    PCIDevice *d = &ptdev->dev;
+
+    /* find emulate register group entry */
+    for (reg_grp_entry = ptdev->reg_grp_tbl_head.lh_first; reg_grp_entry;
+        reg_grp_entry = reg_grp_entry->entries.le_next)
+    {
+        /* find emulate register entry */
+        for (reg_entry = reg_grp_entry->reg_tbl_head.lh_first; reg_entry;
+            reg_entry = reg_entry->entries.le_next)
+        {
+            reg = reg_entry->reg;
+
+            /* check whether restoring is needed */
+            if (!reg->u.b.restore)
+                continue;
+
+            real_offset = (reg_grp_entry->base_offset + reg->offset);
+
+            /* read I/O device register value */
+            switch (reg->size) {
+            case 1:
+                read_val = pci_read_byte(ptdev->pci_dev, real_offset);
+                break;
+            case 2:
+                read_val = pci_read_word(ptdev->pci_dev, real_offset);
+                break;
+            case 4:
+                read_val = pci_read_long(ptdev->pci_dev, real_offset);
+                break;
+            }
+
+            val = 0;
+
+            /* restore based on register size */
+            switch (reg->size) {
+            case 1:
+                /* byte register */
+                ret = reg->u.b.restore(ptdev, reg_entry, real_offset,
+                           (uint8_t)read_val, (uint8_t *)&val);
+                break;
+            case 2:
+                /* word register */
+                ret = reg->u.w.restore(ptdev, reg_entry, real_offset,
+                           (uint16_t)read_val, (uint16_t *)&val);
+                break;
+            case 4:
+                /* double word register */
+                ret = reg->u.dw.restore(ptdev, reg_entry, real_offset,
+                           (uint32_t)read_val, (uint32_t *)&val);
+                break;
+            }
+
+            /* restoring error */
+            if (ret < 0)
+            {
+                /* exit I/O emulator */
+                PT_LOG("Internal error: Invalid restoring " 
+                    "return value[%d]. I/O emulator exit.\n", ret);
+                exit(1);
+            }
+
+#ifdef PT_DEBUG_PCI_CONFIG_ACCESS
+            PT_LOG("[%02x:%02x.%x]: address=%04x val=0x%08x len=%d\n", 
+                pci_bus_num(d->bus), (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), 
+                real_offset, val, reg->size);
+#endif
+
+            switch (reg->size) {
+            case 1:
+                pci_write_byte(ptdev->pci_dev, real_offset, val);
+                break;
+            case 2:
+                pci_write_word(ptdev->pci_dev, real_offset, val);
+                break;
+            case 4:
+                pci_write_long(ptdev->pci_dev, real_offset, val);
+                break;
+            }
+        }
+    }
+
+    /* if AER supported, restore it */
+    if (ptdev->pm_state->aer_base)
+        pt_aer_reg_restore(ptdev);
+}
+
+/* reinitialize all emulate registers */
+static void pt_config_reinit(struct pt_dev *ptdev)
+{
+    struct pt_reg_grp_tbl *reg_grp_entry = NULL;
+    struct pt_reg_grp_info_tbl *reg_grp = NULL;
+    struct pt_reg_tbl *reg_entry = NULL;
+    struct pt_reg_info_tbl *reg = NULL;
+
+    /* find emulate register group entry */
+    for (reg_grp_entry = ptdev->reg_grp_tbl_head.lh_first; reg_grp_entry;
+        reg_grp_entry = reg_grp_entry->entries.le_next)
+    {
+        /* find emulate register entry */
+        for (reg_entry = reg_grp_entry->reg_tbl_head.lh_first; reg_entry;
+            reg_entry = reg_entry->entries.le_next)
+        {
+            reg = reg_entry->reg;
+            if (reg->init)
+                /* initialize emulate register */
+                reg_entry->data = reg->init(ptdev, reg_entry->reg,
+                                   (reg_grp_entry->base_offset + reg->offset));
+        }
+    }
+}
+
+void pt_from_d3hot_to_d0_with_reset(void *opaque)
+{
+    struct pt_dev *ptdev = opaque;
+    PCIDevice *d = &ptdev->dev;
+    struct pt_pm_info *pm_state = ptdev->pm_state;
+    uint8_t e_device = 0;
+    uint8_t e_intx = 0;
+    int ret = 0;
+
+    /* check power state */
+    ret = check_power_state(ptdev);
+
+    if (ret < 0)
+        goto out;
+
+    PT_LOG("Reinitialize PCI configuration registers " 
+        "due to power state transition with internal reset. [%02x:%02x.%x]\n", 
+        pci_bus_num(d->bus), ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7));
+
+    /* restore a part of I/O device register */
+    pt_config_restore(ptdev);
+
+    /* reinitialize all emulate register */
+    pt_config_reinit(ptdev);
+
+    /* setup MSI-INTx translation if support */
+    ret = pt_enable_msi_translate(ptdev);
+
+    /* rebind machine_irq to device */
+    if (ret < 0 && ptdev->machine_irq != 0)
+    {
+        e_device = (ptdev->dev.devfn >> 3) & 0x1f;
+        /* fix virtual interrupt pin to INTA# */
+        e_intx = 0;
+
+        ret = xc_domain_bind_pt_pci_irq(xc_handle, domid, ptdev->machine_irq,
+                                       0, e_device, e_intx);
+        if (ret < 0)
+            PT_LOG("Error: Rebinding of interrupt failed! ret=%d\n", ret);
+    }
+
+out:
+    /* power state transition flags off */
+    pm_state->flags &= ~PT_FLAG_TRANSITING;
+
+    qemu_free_timer(pm_state->pm_timer);
+}
+
+void pt_default_power_transition(void *opaque)
+{
+    struct pt_dev *ptdev = opaque;
+    struct pt_pm_info *pm_state = ptdev->pm_state;
+
+    /* check power state */
+    check_power_state(ptdev);
+
+    /* power state transition flags off */
+    pm_state->flags &= ~PT_FLAG_TRANSITING;
+
+    qemu_free_timer(pm_state->pm_timer);
+}
+
 /* initialize emulate register */
 static int pt_config_reg_init(struct pt_dev *ptdev,
         struct pt_reg_grp_tbl *reg_grp,
@@ -1878,6 +2253,15 @@ static void pt_config_delete(struct pt_dev *ptdev)
     if (ptdev->msi)
         free(ptdev->msi);
 
+    /* free Power Management info table */
+    if (ptdev->pm_state)
+    {
+        if (ptdev->pm_state->pm_timer)
+            qemu_free_timer(ptdev->pm_state->pm_timer);
+
+        free(ptdev->pm_state);
+    }
+
     /* free all register group entry */
     while ((reg_grp_entry = ptdev->reg_grp_tbl_head.lh_first) != NULL)
     {
@@ -2027,6 +2411,36 @@ static uint32_t pt_bar_reg_init(struct pt_dev *ptdev,
     return reg_field;
 }
 
+/* initialize Power Management Capabilities register */
+static uint32_t pt_pmc_reg_init(struct pt_dev *ptdev,
+        struct pt_reg_info_tbl *reg, uint32_t real_offset)
+{
+    PCIDevice *d = &ptdev->dev;
+
+    /* set Power Management Capabilities register */
+    ptdev->pm_state->pmc_field = *(uint16_t *)(d->config + real_offset);
+
+    return reg->init_val;
+}
+
+/* initialize PCI Power Management Control/Status register */
+static uint32_t pt_pmcsr_reg_init(struct pt_dev *ptdev,
+        struct pt_reg_info_tbl *reg, uint32_t real_offset)
+{
+    PCIDevice *d = &ptdev->dev;
+    uint16_t cap_ver  = 0;
+
+    /* check PCI Power Management support version */
+    cap_ver = ptdev->pm_state->pmc_field & PCI_PM_CAP_VER_MASK;
+
+    if (cap_ver > 2)
+        /* set No Soft Reset */
+        ptdev->pm_state->no_soft_reset = (*(uint8_t *)(d->config + real_offset)
+            & (uint8_t)PCI_PM_CTRL_NO_SOFT_RESET);
+
+    return reg->init_val;
+}
+
 /* initialize Link Control register */
 static uint32_t pt_linkctrl_reg_init(struct pt_dev *ptdev,
         struct pt_reg_info_tbl *reg, uint32_t real_offset)
@@ -2108,11 +2522,6 @@ static uint32_t pt_msgctrl_reg_init(struct pt_dev *ptdev,
     /* All register is 0 after reset, except first 4 byte */
     reg_field &= reg->ro_mask;
 
-    if (ptdev->msi_trans_cap) {
-        PT_LOG("Turning on MSI-INTx translation\n");
-        ptdev->msi_trans_en = 1;
-    }
-    
     return reg_field;
 }
 
@@ -2180,7 +2589,9 @@ static uint32_t pt_msixctrl_reg_init(struct pt_dev *ptdev,
         pci_write_word(pdev, real_offset, reg_field & ~PCI_MSIX_ENABLE);
         reg_field &= ~(PCI_MSIX_ENABLE | PCI_MSIX_MASK);
     }
-    
+
+    ptdev->msix->ctrl_offset = real_offset;
+
     return reg_field;
 }
 
@@ -2191,6 +2602,32 @@ static uint8_t pt_reg_grp_size_init(struct pt_dev *ptdev,
     return grp_reg->grp_size;
 }
 
+/* get Power Management Capability Structure register group size */
+static uint8_t pt_pm_size_init(struct pt_dev *ptdev,
+        struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset)
+{
+    ptdev->pm_state = qemu_mallocz(sizeof(struct pt_pm_info));
+    if (!ptdev->pm_state)
+    {
+        /* exit I/O emulator */
+        PT_LOG("Error: Allocating pt_pm_info failed. I/O emulator exit.\n");
+        exit(1);
+    }
+
+    /* set Power Management Capability base offset */
+    ptdev->pm_state->pm_base = base_offset;
+
+    /* find AER register and set AER Capability base offset */
+    ptdev->pm_state->aer_base = find_ext_cap_offset(ptdev->pci_dev,
+        (uint32_t)PCI_EXT_CAP_ID_AER);
+
+    /* save AER register */
+    if (ptdev->pm_state->aer_base)
+        pt_aer_reg_save(ptdev);
+
+    return grp_reg->grp_size;
+}
+
 /* get MSI Capability Structure register group size */
 static uint8_t pt_msi_size_init(struct pt_dev *ptdev,
         struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset)
@@ -2215,7 +2652,8 @@ static uint8_t pt_msi_size_init(struct pt_dev *ptdev,
         exit(1);
     }
     memset(ptdev->msi, 0, sizeof(struct pt_msi_info));
-    
+    ptdev->msi->pirq = -1;
+
     return msi_size;
 }
 
@@ -2705,18 +3143,17 @@ static int pt_pmcsr_reg_write(struct pt_dev *ptdev,
         uint16_t *value, uint16_t dev_value, uint16_t valid_mask)
 {
     struct pt_reg_info_tbl *reg = cfg_entry->reg;
+    PCIDevice *d = &ptdev->dev;
     uint16_t writable_mask = 0;
     uint16_t throughable_mask = 0;
     uint16_t pmcsr_mask = (PCI_PM_CTRL_PME_ENABLE | 
                            PCI_PM_CTRL_DATA_SEL_MASK |
                            PCI_PM_CTRL_PME_STATUS);
+    struct pt_pm_info *pm_state = ptdev->pm_state;
+    uint16_t read_val = 0;
 
     /* modify emulate register */
     writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask & ~pmcsr_mask;
-    /* ignore it when the requested state neither D3 nor D0 */
-    if (((*value & PCI_PM_CTRL_STATE_MASK) != PCI_PM_CTRL_STATE_MASK) &&
-        ((*value & PCI_PM_CTRL_STATE_MASK) != 0))
-        writable_mask &= ~PCI_PM_CTRL_STATE_MASK;
 
     cfg_entry->data = ((*value & writable_mask) |
                        (cfg_entry->data & ~writable_mask));
@@ -2726,6 +3163,100 @@ static int pt_pmcsr_reg_write(struct pt_dev *ptdev,
     *value = ((*value & throughable_mask) |
               (dev_value & ~throughable_mask));
 
+    /* set I/O device power state */
+    pm_state->cur_state = (dev_value & PCI_PM_CTRL_STATE_MASK);
+
+    /* set Guest requested PowerState */
+    pm_state->req_state = (*value & PCI_PM_CTRL_STATE_MASK);
+
+    /* check power state transition or not */
+    if (pm_state->cur_state == pm_state->req_state)
+        /* not power state transition */
+        return 0;
+
+    /* check enable power state transition */
+    if ((pm_state->req_state != 0) &&
+        (pm_state->cur_state > pm_state->req_state))
+    {
+        PT_LOG("Error: Invalid power transition. "
+            "[%02x:%02x.%x][requested state:%d][current state:%d]\n",
+            pci_bus_num(d->bus), ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
+            pm_state->req_state, pm_state->cur_state);
+
+        return 0;
+    }
+
+    /* check if this device supports the requested power state */
+    if (((pm_state->req_state == 1) && !(pm_state->pmc_field & PCI_PM_CAP_D1))
+        || ((pm_state->req_state == 2) &&
+        !(pm_state->pmc_field & PCI_PM_CAP_D2)))
+    {
+        PT_LOG("Error: Invalid power transition. "
+            "[%02x:%02x.%x][requested state:%d][current state:%d]\n",
+            pci_bus_num(d->bus), ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
+            pm_state->req_state, pm_state->cur_state);
+
+        return 0;
+    }
+
+    /* in case of transition related to D3hot, it's necessary to wait 10 ms.
+     * But because writing to register will be performed later on actually,
+     * don't start QEMUTimer right now, just alloc and init QEMUTimer here.
+     */
+    if ((pm_state->cur_state == 3) || (pm_state->req_state == 3))
+    {
+        if (pm_state->req_state == 0)
+        {
+            /* alloc and init QEMUTimer */
+            if (!pm_state->no_soft_reset)
+            {
+                pm_state->pm_timer = qemu_new_timer(rt_clock,
+                    pt_from_d3hot_to_d0_with_reset, ptdev);
+
+                /* reset Interrupt and I/O resource mapping */
+                pt_reset_interrupt_and_io_mapping(ptdev);
+            }
+            else
+                pm_state->pm_timer = qemu_new_timer(rt_clock,
+                    pt_default_power_transition, ptdev);
+        }
+        else
+            /* alloc and init QEMUTimer */
+            pm_state->pm_timer = qemu_new_timer(rt_clock,
+                pt_default_power_transition, ptdev);
+
+        /* set power state transition delay */
+        pm_state->pm_delay = 10;
+
+        /* power state transition flags on */
+        pm_state->flags |= PT_FLAG_TRANSITING;
+    }
+    /* in case of transition related to D0, D1 and D2,
+     * no need to use QEMUTimer.
+     * So, we perfom writing to register here and then read it back.
+     */
+    else
+    {
+        /* write power state to I/O device register */
+        pci_write_word(ptdev->pci_dev,
+                        (pm_state->pm_base + PCI_PM_CTRL), *value);
+
+        /* in case of transition related to D2,
+         * it's necessary to wait 200 usec.
+         * But because QEMUTimer do not support microsec unit right now,
+         * so we do wait ourself here.
+         */
+        if ((pm_state->cur_state == 2) || (pm_state->req_state == 2))
+            usleep(200);
+
+        /* check power state */
+        check_power_state(ptdev);
+
+        /* recreate value for writing to I/O device register */
+        *value = pci_read_word(ptdev->pci_dev,
+                                (pm_state->pm_base + PCI_PM_CTRL));
+    }
+
     return 0;
 }
 
@@ -2760,8 +3291,7 @@ static int pt_linkctrl_reg_write(struct pt_dev *ptdev,
     struct pt_reg_info_tbl *reg = cfg_entry->reg;
     uint16_t writable_mask = 0;
     uint16_t throughable_mask = 0;
-    uint16_t linkctrl_mask = (PCI_EXP_LNKCTL_ASPM | 0x04 |
-                              PCI_EXP_LNKCTL_DISABLE |
+    uint16_t linkctrl_mask = (0x04 | PCI_EXP_LNKCTL_DISABLE |
                               PCI_EXP_LNKCTL_RETRAIN | 
                               0x0400 | 0x0800 | 0xF000);
 
@@ -2825,34 +3355,6 @@ static int pt_linkctrl2_reg_write(struct pt_dev *ptdev,
     return 0;
 }
 
-static void pt_unmap_msi_translate(struct pt_dev *ptdev)
-{
-    uint16_t e_device, e_intx;
-    int rc;
-
-    /* MSI_ENABLE bit should be disabed until the new handler is set */
-    msi_set_enable(ptdev, 0);
-
-    e_device = (ptdev->dev.devfn >> 3) & 0x1f;
-    /* fix virtual interrupt pin to INTA# */
-    e_intx = 0;
-    rc = xc_domain_unbind_pt_irq(xc_handle, domid, ptdev->msi->pirq,
-                                 PT_IRQ_TYPE_MSI_TRANSLATE, 0,
-                                 e_device, e_intx, 0);
-    if (rc < 0)
-        PT_LOG("Error: Unbinding pt irq for MSI-INTx failed! rc=%d\n", rc);
-
-    if (ptdev->machine_irq)
-    {
-        rc = xc_domain_bind_pt_pci_irq(xc_handle, domid, ptdev->machine_irq,
-                                       0, e_device, e_intx);
-        if ( rc < 0 )
-            PT_LOG("Error: Rebinding of interrupt failed! rc=%d\n", rc);
-    }
-
-    ptdev->msi_trans_en = 0;
-}
-
 /* write Message Control register */
 static int pt_msgctrl_reg_write(struct pt_dev *ptdev, 
     struct pt_reg_tbl *cfg_entry, 
@@ -2893,7 +3395,7 @@ static int pt_msgctrl_reg_write(struct pt_dev *ptdev,
         {
             if (ptdev->msi_trans_en) {
                 PT_LOG("guest enabling MSI, disable MSI-INTx translation\n");
-                pt_unmap_msi_translate(ptdev);
+                pt_disable_msi_translate(ptdev);
             }
             else
             {
@@ -3075,7 +3577,7 @@ static int pt_msixctrl_reg_write(struct pt_dev *ptdev,
     {
         if (ptdev->msi_trans_en) {
             PT_LOG("guest enabling MSI-X, disable MSI-INTx translation\n");
-            pt_unmap_msi_translate(ptdev);
+            pt_disable_msi_translate(ptdev);
         }
         pt_msix_update(ptdev);
     }
@@ -3085,6 +3587,141 @@ static int pt_msixctrl_reg_write(struct pt_dev *ptdev,
     return 0;
 }
 
+/* restore byte size emulate register */
+static int pt_byte_reg_restore(struct pt_dev *ptdev, 
+        struct pt_reg_tbl *cfg_entry, 
+        uint32_t real_offset, uint8_t dev_value, uint8_t *value)
+{
+    struct pt_reg_info_tbl *reg = cfg_entry->reg;
+    PCIDevice *d = &ptdev->dev;
+
+    /* use I/O device register's value as restore value */
+    *value = *(uint8_t *)(d->config + real_offset);
+
+    /* create value for restoring to I/O device register */
+    *value = PT_MERGE_VALUE(*value, dev_value, reg->emu_mask);
+
+    return 0;
+}
+
+/* restore word size emulate register */
+static int pt_word_reg_restore(struct pt_dev *ptdev, 
+    struct pt_reg_tbl *cfg_entry, 
+    uint32_t real_offset, uint16_t dev_value, uint16_t *value)
+{
+    struct pt_reg_info_tbl *reg = cfg_entry->reg;
+    PCIDevice *d = &ptdev->dev;
+
+    /* use I/O device register's value as restore value */
+    *value = *(uint16_t *)(d->config + real_offset);
+
+    /* create value for restoring to I/O device register */
+    *value = PT_MERGE_VALUE(*value, dev_value, reg->emu_mask);
+
+    return 0;
+}
+
+/* restore long size emulate register */
+static int pt_long_reg_restore(struct pt_dev *ptdev, 
+    struct pt_reg_tbl *cfg_entry, 
+    uint32_t real_offset, uint32_t dev_value, uint32_t *value)
+{
+    struct pt_reg_info_tbl *reg = cfg_entry->reg;
+    PCIDevice *d = &ptdev->dev;
+
+    /* use I/O device register's value as restore value */
+    *value = *(uint32_t *)(d->config + real_offset);
+
+    /* create value for restoring to I/O device register */
+    *value = PT_MERGE_VALUE(*value, dev_value, reg->emu_mask);
+
+    return 0;
+}
+
+/* restore Command register */
+static int pt_cmd_reg_restore(struct pt_dev *ptdev, 
+    struct pt_reg_tbl *cfg_entry, 
+    uint32_t real_offset, uint16_t dev_value, uint16_t *value)
+{
+    struct pt_reg_info_tbl *reg = cfg_entry->reg;
+    PCIDevice *d = &ptdev->dev;
+    uint16_t restorable_mask = 0;
+
+    /* use I/O device register's value as restore value */
+    *value = *(uint16_t *)(d->config + real_offset);
+
+    /* create value for restoring to I/O device register
+     * but do not include Fast Back-to-Back Enable bit.
+     */
+    restorable_mask = reg->emu_mask & ~PCI_COMMAND_FAST_BACK;
+    *value = PT_MERGE_VALUE(*value, dev_value, restorable_mask);
+
+    return 0;
+}
+
+/* restore BAR */
+static int pt_bar_reg_restore(struct pt_dev *ptdev, 
+    struct pt_reg_tbl *cfg_entry, 
+    uint32_t real_offset, uint32_t dev_value, uint32_t *value)
+{
+    struct pt_reg_info_tbl *reg = cfg_entry->reg;
+    uint32_t bar_emu_mask = 0;
+    int index = 0;
+
+    /* get BAR index */
+    index = pt_bar_offset_to_index(reg->offset);
+    if (index < 0)
+    {
+        /* exit I/O emulator */
+        PT_LOG("Internal error: Invalid BAR index[%d]. "
+            "I/O emulator exit.\n", index);
+        exit(1);
+    }
+
+    /* use value from kernel sysfs */
+    if (ptdev->bases[index].bar_flag == PT_BAR_FLAG_UPPER)
+        *value = ptdev->pci_dev->base_addr[index-1] >> 32;
+    else
+        *value = ptdev->pci_dev->base_addr[index];
+
+    /* set emulate mask depend on BAR flag */
+    switch (ptdev->bases[index].bar_flag)
+    {
+    case PT_BAR_FLAG_MEM:
+        bar_emu_mask = PT_BAR_MEM_EMU_MASK;
+        break;
+    case PT_BAR_FLAG_IO:
+        bar_emu_mask = PT_BAR_IO_EMU_MASK;
+        break;
+    case PT_BAR_FLAG_UPPER:
+        bar_emu_mask = PT_BAR_ALLF;
+        break;
+    default:
+        break;
+    }
+
+    /* create value for restoring to I/O device register */
+    *value = PT_MERGE_VALUE(*value, dev_value, bar_emu_mask);
+
+    return 0;
+}
+
+/* restore Power Management Control/Status register */
+static int pt_pmcsr_reg_restore(struct pt_dev *ptdev, 
+    struct pt_reg_tbl *cfg_entry, 
+    uint32_t real_offset, uint16_t dev_value, uint16_t *value)
+{
+    struct pt_reg_info_tbl *reg = cfg_entry->reg;
+
+    /* create value for restoring to I/O device register
+     * No need to restore, just clear PME Enable and PME Status bit
+     * Note: register type of PME Status bit is RW1C, so clear by writing 1b
+     */
+    *value = (dev_value & ~PCI_PM_CTRL_PME_ENABLE) | PCI_PM_CTRL_PME_STATUS;
+
+    return 0;
+}
+
 struct pt_dev * register_real_device(PCIBus *e_bus,
         const char *e_dev_name, int e_devfn, uint8_t r_bus, uint8_t r_dev,
         uint8_t r_func, uint32_t machine_irq, struct pci_access *pci_access,
@@ -3197,32 +3834,6 @@ struct pt_dev * register_real_device(PCIBus *e_bus,
     if (!assigned_device->dev.config[0x3d])
         goto out;
 
-    e_device = (assigned_device->dev.devfn >> 3) & 0x1f;
-    /* fix virtual interrupt pin to INTA# */
-    e_intx = 0;
-
-    while (assigned_device->msi_trans_en)
-    {
-        if (pt_msi_setup(assigned_device))
-        {
-            PT_LOG("Error: MSI-INTx translation MSI setup failed, fallback\n");
-            assigned_device->msi_trans_en = 0;
-            break;
-        }
-
-        rc = xc_domain_bind_pt_irq(xc_handle, domid, 
assigned_device->msi->pirq,
-                                   PT_IRQ_TYPE_MSI_TRANSLATE, 0,
-                                   e_device, e_intx, 0);
-        if ( rc < 0)
-        {
-            PT_LOG("Error: MSI-INTx translation bind failed, fallback\n");
-            assigned_device->msi_trans_en = 0;
-            break;
-        }
-        msi_set_enable(assigned_device, 1);
-        break;
-    }
-
     if ( PT_MACHINE_IRQ_AUTO == machine_irq )
     {
         int pirq = pci_dev->irq;
@@ -3242,12 +3853,16 @@ struct pt_dev * register_real_device(PCIBus *e_bus,
         }
     }
 
-    if (assigned_device->msi_trans_en)
-        goto out;
+    /* setup MSI-INTx translation if support */
+    rc = pt_enable_msi_translate(assigned_device);
 
     /* bind machine_irq to device */
-    if ( 0 != machine_irq )
+    if (rc < 0 && machine_irq != 0)
     {
+        e_device = (assigned_device->dev.devfn >> 3) & 0x1f;
+        /* fix virtual interrupt pin to INTA# */
+        e_intx = 0;
+
         rc = xc_domain_bind_pt_pci_irq(xc_handle, domid, machine_irq, 0,
                                        e_device, e_intx);
         if ( rc < 0 )
diff --git a/hw/pass-through.h b/hw/pass-through.h
index 7a623be..4704d83 100644
--- a/hw/pass-through.h
+++ b/hw/pass-through.h
@@ -24,6 +24,7 @@
 #include "pci/pci.h"
 #include "exec-all.h"
 #include "sys-queue.h"
+#include "qemu-timer.h"
 
 /* Log acesss */
 #define PT_LOGGING_ENABLED
@@ -59,6 +60,12 @@
 #define PCI_CAP_ID_SSVID        0x0D
 #endif
 
+#ifdef PCI_PM_CTRL_NO_SOFT_RESET
+#undef PCI_PM_CTRL_NO_SOFT_RESET
+#endif
+/* No Soft Reset for D3hot->D0 */
+#define PCI_PM_CTRL_NO_SOFT_RESET 0x0008
+
 #ifndef PCI_MSI_FLAGS_MASK_BIT
 /* interrupt masking & reporting supported */
 #define PCI_MSI_FLAGS_MASK_BIT  0x0100
@@ -79,6 +86,19 @@
 #define PCI_EXP_TYPE_ROOT_EC     0xa
 #endif
 
+#ifndef PCI_EXT_CAP_ID
+/* Extended Capabilities (PCI-X 2.0 and PCI Express) */
+#define PCI_EXT_CAP_ID(header)   (header & 0x0000ffff)
+#endif
+
+#ifndef PCI_EXT_CAP_NEXT
+/* Extended Capabilities (PCI-X 2.0 and PCI Express) */
+#define PCI_EXT_CAP_NEXT(header) ((header >> 20) & 0xffc)
+#endif
+
+/* power state transition */
+#define PT_FLAG_TRANSITING 0x0001
+
 #define PT_INVALID_REG          0xFFFFFFFF      /* invalid register value */
 #define PT_BAR_ALLF             0xFFFFFFFF      /* BAR ALLF value */
 #define PT_BAR_MEM_RO_MASK      0x0000000F      /* BAR ReadOnly mask(Memory) */
@@ -102,6 +122,8 @@ enum {
     }\
 } while(0)
 
+#define PT_MERGE_VALUE(value, data, val_mask) \
+    (((value) & (val_mask)) | ((data) & ~(val_mask)))
 
 struct pt_region {
     /* Virtual phys base & size */
@@ -135,6 +157,7 @@ struct msix_entry_info {
 };
 
 struct pt_msix_info {
+    uint32_t ctrl_offset;
     int enabled;
     int total_entries;
     int bar_index;
@@ -147,6 +170,18 @@ struct pt_msix_info {
     struct msix_entry_info msix_entry[0];
 };
 
+struct pt_pm_info {
+    QEMUTimer *pm_timer;  /* QEMUTimer struct */
+    int no_soft_reset;    /* No Soft Reset flags */
+    uint16_t flags;       /* power state transition flags */
+    uint16_t pmc_field;   /* Power Management Capabilities field */
+    int pm_delay;         /* power state transition delay */
+    uint16_t cur_state;   /* current power state */
+    uint16_t req_state;   /* requested power state */
+    uint32_t pm_base;     /* Power Management Capability reg base offset */
+    uint32_t aer_base;    /* AER Capability reg base offset */
+};
+
 /*
     This structure holds the context of the mapping functions
     and data that is relevant for qemu device management.
@@ -163,6 +198,7 @@ struct pt_dev {
     /* Physical MSI to guest INTx translation when possible */
     int msi_trans_cap;
     int msi_trans_en;
+    struct pt_pm_info *pm_state;                /* PM virtualization */
 };
 
 /* Used for formatting PCI BDF into cf8 format */
@@ -260,6 +296,24 @@ typedef int (*conf_byte_read) (struct pt_dev *ptdev,
                                struct pt_reg_tbl *cfg_entry, 
                                uint8_t *value,
                                uint8_t valid_mask);
+/* emul reg long restore method */
+typedef int (*conf_dword_restore) (struct pt_dev *ptdev,
+                                   struct pt_reg_tbl *cfg_entry, 
+                                   uint32_t real_offset,
+                                   uint32_t dev_value,
+                                   uint32_t *value);
+/* emul reg word restore method */
+typedef int (*conf_word_restore) (struct pt_dev *ptdev,
+                                  struct pt_reg_tbl *cfg_entry, 
+                                  uint32_t real_offset,
+                                  uint16_t dev_value,
+                                  uint16_t *value);
+/* emul reg byte restore method */
+typedef int (*conf_byte_restore) (struct pt_dev *ptdev,
+                                  struct pt_reg_tbl *cfg_entry, 
+                                  uint32_t real_offset,
+                                  uint8_t dev_value,
+                                  uint8_t *value);
 
 /* emul reg infomation table */
 struct pt_reg_info_tbl {
@@ -281,18 +335,24 @@ struct pt_reg_info_tbl {
             conf_dword_write write;
             /* emul reg long read method */
             conf_dword_read read;
+            /* emul reg long restore method */
+            conf_dword_restore restore;
         } dw;
         struct {
             /* emul reg word write method */
             conf_word_write write;
             /* emul reg word read method */
             conf_word_read read;
+            /* emul reg word restore method */
+            conf_word_restore restore;
         } w;
         struct {
             /* emul reg byte write method */
             conf_byte_write write;
             /* emul reg byte read method */
             conf_byte_read read;
+            /* emul reg byte restore method */
+            conf_byte_restore restore;
         } b;
     } u;
 };
diff --git a/hw/pci.h b/hw/pci.h
index a527a39..2800499 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -44,7 +44,7 @@ typedef struct PCIIORegion {
 
 struct PCIDevice {
     /* PCI config space */
-    uint8_t config[256];
+    uint8_t config[4096];
 
     /* the following fields are read only */
     PCIBus *bus;
diff --git a/hw/pt-msi.c b/hw/pt-msi.c
index 9898763..c7a8f22 100644
--- a/hw/pt-msi.c
+++ b/hw/pt-msi.c
@@ -22,6 +22,41 @@
 #include "pt-msi.h"
 #include <sys/mman.h>
 
+static void msi_set_enable(struct pt_dev *dev, int en)
+{
+    uint16_t val = 0;
+    uint32_t address = 0;
+    if (!dev->msi)
+        return;
+
+    address = dev->msi->ctrl_offset;
+    if (!address)
+        return;
+
+    val = pci_read_word(dev->pci_dev, address);
+    val &= ~PCI_MSI_FLAGS_ENABLE;
+    val |= en & PCI_MSI_FLAGS_ENABLE;
+    pci_write_word(dev->pci_dev, address, val);
+}
+
+static void msix_set_enable(struct pt_dev *dev, int en)
+{
+    uint16_t val = 0;
+    uint32_t address = 0;
+    if (!dev->msix)
+        return;
+
+    address = dev->msix->ctrl_offset;
+    if (!address)
+        return;
+
+    val = pci_read_word(dev->pci_dev, address);
+    val &= ~PCI_MSIX_ENABLE;
+    if (en)
+        val |= PCI_MSIX_ENABLE;
+    pci_write_word(dev->pci_dev, address, val);
+}
+
 /* MSI virtuailization functions */
 
 /*
@@ -95,6 +130,141 @@ int pt_msi_update(struct pt_dev *d)
                                      d->msi->pirq, gflags);
 }
 
+void pt_msi_disable(struct pt_dev *dev)
+{
+    PCIDevice *d = &dev->dev;
+    uint8_t gvec = 0;
+    uint32_t gflags = 0;
+    uint64_t addr = 0;
+    uint8_t e_device = 0;
+    uint8_t e_intx = 0;
+
+    msi_set_enable(dev, 0);
+
+    e_device = (dev->dev.devfn >> 3) & 0x1f;
+    /* fix virtual interrupt pin to INTA# */
+    e_intx = 0;
+
+    if (dev->msi_trans_en)
+    {
+        if (xc_domain_unbind_pt_irq(xc_handle, domid, dev->msi->pirq,
+                                    PT_IRQ_TYPE_MSI_TRANSLATE, 0,
+                                    e_device, e_intx, 0))
+        {
+            PT_LOG("Error: Unbinding pt irq for MSI-INTx failed!\n");
+            goto out;
+        }
+    }
+    else if (!(dev->msi->flags & MSI_FLAG_UNINIT))
+    {
+        /* get vector, address, flags info, etc. */
+        gvec = dev->msi->data & 0xFF;
+        addr = (uint64_t)dev->msi->addr_hi << 32 | dev->msi->addr_lo;
+        gflags = __get_msi_gflags(dev->msi->data, addr);
+
+        PT_LOG("Unbind msi with pirq %x, gvec %x\n",
+                dev->msi->pirq, gvec);
+
+        if (xc_domain_unbind_msi_irq(xc_handle, domid, gvec,
+                                        dev->msi->pirq, gflags))
+        {
+            PT_LOG("Error: Unbinding of MSI failed. [%02x:%02x.%x]\n", 
+                pci_bus_num(d->bus), 
+                ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7));
+            goto out;
+        }
+    }
+
+    if (dev->msi->pirq != -1)
+    {
+        PT_LOG("Unmap msi with pirq %x\n", dev->msi->pirq);
+
+        if (xc_physdev_unmap_pirq(xc_handle, domid, dev->msi->pirq))
+        {
+            PT_LOG("Error: Unmapping of MSI failed. [%02x:%02x.%x]\n", 
+               pci_bus_num(d->bus), 
+               ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7));
+            goto out;
+        }
+    }
+    /* unbind INTx */
+    if (dev->msi_trans_cap && !dev->msi_trans_en)
+    {
+        if (xc_domain_unbind_pt_irq(xc_handle, domid, dev->machine_irq,
+                        PT_IRQ_TYPE_PCI, 0, e_device, e_intx, 0))
+            PT_LOG("Error: Unbinding of interrupt failed!\n");
+    }
+
+out:
+    /* clear msi info */
+    dev->msi->flags = 0;
+    dev->msi->pirq = -1;
+    dev->msi_trans_en = 0;
+}
+
+/* MSI-INTx translation virtulization functions */
+int pt_enable_msi_translate(struct pt_dev* dev)
+{
+    uint8_t e_device = 0;
+    uint8_t e_intx = 0;
+
+    if (!(dev->msi && dev->msi_trans_cap))
+        return -1;
+
+    msi_set_enable(dev, 0);
+    dev->msi_trans_en = 0;
+
+    if (pt_msi_setup(dev))
+    {
+        PT_LOG("Error: MSI-INTx translation MSI setup failed, fallback\n");
+        return -1;
+    }
+
+    e_device = (dev->dev.devfn >> 3) & 0x1f;
+    /* fix virtual interrupt pin to INTA# */
+    e_intx = 0;
+
+    if (xc_domain_bind_pt_irq(xc_handle, domid, dev->msi->pirq,
+                               PT_IRQ_TYPE_MSI_TRANSLATE, 0,
+                               e_device, e_intx, 0))
+    {
+        PT_LOG("Error: MSI-INTx translation bind failed, fallback\n");
+        return -1;
+    }
+
+    msi_set_enable(dev, 1);
+    dev->msi_trans_en = 1;
+
+    return 0;
+}
+
+void pt_disable_msi_translate(struct pt_dev *dev)
+{
+    uint8_t e_device = 0;
+    uint8_t e_intx = 0;
+
+    /* MSI_ENABLE bit should be disabed until the new handler is set */
+    msi_set_enable(dev, 0);
+
+    e_device = (dev->dev.devfn >> 3) & 0x1f;
+    /* fix virtual interrupt pin to INTA# */
+    e_intx = 0;
+
+    if (xc_domain_unbind_pt_irq(xc_handle, domid, dev->msi->pirq,
+                                 PT_IRQ_TYPE_MSI_TRANSLATE, 0,
+                                 e_device, e_intx, 0))
+        PT_LOG("Error: Unbinding pt irq for MSI-INTx failed!\n");
+
+    if (dev->machine_irq)
+    {
+        if (xc_domain_bind_pt_pci_irq(xc_handle, domid, dev->machine_irq,
+                                       0, e_device, e_intx))
+            PT_LOG("Error: Rebinding of interrupt failed!\n");
+    }
+
+    dev->msi_trans_en = 0;
+}
+
 /* MSI-X virtulization functions */
 static void mask_physical_msix_entry(struct pt_dev *dev, int entry_nr, int 
mask)
 {
@@ -159,6 +329,52 @@ int pt_msix_update(struct pt_dev *dev)
     return 0;
 }
 
+void pt_msix_disable(struct pt_dev *dev)
+{
+    PCIDevice *d = &dev->dev;
+    uint8_t gvec = 0;
+    uint32_t gflags = 0;
+    uint64_t addr = 0;
+    int i = 0;
+    struct msix_entry_info *entry = NULL;
+
+    msix_set_enable(dev, 0);
+
+    for ( i = 0; i < dev->msix->total_entries; i++ )
+    {
+        entry = &dev->msix->msix_entry[i];
+
+        if (entry->pirq == -1)
+            continue;
+
+        gvec = entry->io_mem[2] & 0xff;
+        addr = *(uint64_t *)&entry->io_mem[0];
+        gflags = __get_msi_gflags(entry->io_mem[2], addr);
+
+        PT_LOG("Unbind msix with pirq %x, gvec %x\n",
+                entry->pirq, gvec);
+
+        if (xc_domain_unbind_msi_irq(xc_handle, domid, gvec,
+                                        entry->pirq, gflags))
+            PT_LOG("Error: Unbinding of MSI-X failed. [%02x:%02x.%x]\n", 
+                pci_bus_num(d->bus), 
+                ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7));
+        else
+        {
+            PT_LOG("Unmap msix with pirq %x\n", entry->pirq);
+
+            if (xc_physdev_unmap_pirq(xc_handle,
+                                         domid, entry->pirq))
+                PT_LOG("Error: Unmapping of MSI-X failed. [%02x:%02x.%x]\n",
+                    pci_bus_num(d->bus),
+                    ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7));
+        }
+        /* clear msi-x info */
+        entry->pirq = -1;
+        entry->flags = 0;
+    }
+}
+
 static void pci_msix_invalid_write(void *opaque, target_phys_addr_t addr,
                                    uint32_t val)
 {
diff --git a/hw/pt-msi.h b/hw/pt-msi.h
index a8632d5..dea0848 100644
--- a/hw/pt-msi.h
+++ b/hw/pt-msi.h
@@ -85,9 +85,21 @@ __get_msi_gflags(uint32_t data, uint64_t addr);
 int
 pt_msi_update(struct pt_dev *d);
 
+void
+pt_msi_disable(struct pt_dev *dev);
+
+int
+pt_enable_msi_translate(struct pt_dev* dev);
+
+void
+pt_disable_msi_translate(struct pt_dev *dev);
+
 int
 pt_msix_update(struct pt_dev *dev);
 
+void
+pt_msix_disable(struct pt_dev *dev);
+
 int
 remove_msix_mapping(struct pt_dev *dev, int bar_index);
 
diff --git a/qemu-timer.h b/qemu-timer.h
index 7408edc..181428f 100644
--- a/qemu-timer.h
+++ b/qemu-timer.h
@@ -31,6 +31,8 @@ extern int64_t ticks_per_sec;
 void qemu_get_timer(QEMUFile *f, QEMUTimer *ts);
 void qemu_put_timer(QEMUFile *f, QEMUTimer *ts);
 
+void qemu_run_one_timer(QEMUTimer *ts);
+
 /* ptimer.c */
 typedef struct ptimer_state ptimer_state;
 typedef void (*ptimer_cb)(void *opaque);
diff --git a/vl.c b/vl.c
index dd5d155..8539f6d 100644
--- a/vl.c
+++ b/vl.c
@@ -1286,6 +1286,22 @@ void qemu_get_timer(QEMUFile *f, QEMUTimer *ts)
     }
 }
 
+/* run the specified timer */
+void qemu_run_one_timer(QEMUTimer *ts)
+{
+    uint64_t current_time;
+
+    /* remove timer from the list before calling the callback */
+    qemu_del_timer(ts);
+
+    while ((current_time = qemu_get_clock(rt_clock)) < ts->expire_time)
+        /* sleep until the expire time */
+        usleep((ts->expire_time - current_time) * 1000);
+
+    /* run the callback */
+    ts->cb(ts->opaque);
+}
+
 static void timer_save(QEMUFile *f, void *opaque)
 {
     if (cpu_ticks_enabled) {


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.