WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] merge with xen-unstable.hg (staging)

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] merge with xen-unstable.hg (staging)
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Thu, 22 Nov 2007 12:00:55 -0800
Delivery-date: Thu, 22 Nov 2007 12:04:32 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Alex Williamson <alex.williamson@xxxxxx>
# Date 1195661526 25200
# Node ID 53dc1cf505060a06e5b34a4812fce4312743ca26
# Parent  9a9ddc04eea2cac0ccfe8be2b9259b4edea5ec9d
# Parent  05cbf512b82b2665d407395bac73b9cca0c396b4
merge with xen-unstable.hg (staging)
---
 xen/include/asm-powerpc/smpboot.h   |   21 
 xen/include/asm-x86/smpboot.h       |   16 
 tools/ioemu/Makefile.target         |    2 
 tools/ioemu/hw/e100.c               | 2464 ++++++++++++++++++++++++++++++++++++
 tools/ioemu/hw/pci.c                |    2 
 xen/arch/x86/hvm/hpet.c             |    8 
 xen/arch/x86/irq.c                  |    1 
 xen/arch/x86/mm.c                   |   15 
 xen/arch/x86/physdev.c              |    1 
 xen/arch/x86/smp.c                  |    1 
 xen/arch/x86/traps.c                |    3 
 xen/arch/x86/x86_32/seg_fixup.c     |  183 +-
 xen/include/asm-x86/desc.h          |   63 
 xen/include/asm-x86/system.h        |  365 +----
 xen/include/asm-x86/x86_32/system.h |  114 +
 xen/include/asm-x86/x86_64/system.h |   68 
 16 files changed, 2897 insertions(+), 430 deletions(-)

diff -r 9a9ddc04eea2 -r 53dc1cf50506 tools/ioemu/Makefile.target
--- a/tools/ioemu/Makefile.target       Tue Nov 20 11:53:44 2007 -0700
+++ b/tools/ioemu/Makefile.target       Wed Nov 21 09:12:06 2007 -0700
@@ -399,7 +399,7 @@ VL_OBJS+= usb.o usb-hub.o usb-linux.o us
 VL_OBJS+= usb.o usb-hub.o usb-linux.o usb-hid.o usb-ohci.o usb-msd.o
 
 # PCI network cards
-VL_OBJS+= ne2000.o rtl8139.o pcnet.o
+VL_OBJS+= ne2000.o rtl8139.o pcnet.o e100.o
 
 ifeq ($(TARGET_BASE_ARCH), i386)
 # Hardware support
diff -r 9a9ddc04eea2 -r 53dc1cf50506 tools/ioemu/hw/e100.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ioemu/hw/e100.c     Wed Nov 21 09:12:06 2007 -0700
@@ -0,0 +1,2464 @@
+/*
+ * QEMU E100(i82557) ethernet card emulation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  
USA
+ *
+ * Copyright (c) 2006-2007 Stefan Weil
+ * Copyright (c) 2006-2007 Zhang Xin(xing.z.zhang@xxxxxxxxx)
+ *
+ * Support OS:
+ *      x86 linux and windows
+ *      PAE linux and windows
+ *      x86_64 linux and windows
+ *      IA64 linux and windows
+ *
+ * Untested:
+ *      Big-endian machine
+ *
+ * References:
+ *
+ * Intel 8255x 10/100 Mbps Ethernet Controller Family
+ * Open Source Software Developer Manual
+ */
+
+#include <assert.h>
+#include "vl.h"
+
+enum
+{
+    E100_PCI_VENDOR_ID = 0x00,        /* 16 bits */
+    E100_PCI_DEVICE_ID = 0x02,        /* 16 bits */
+    E100_PCI_COMMAND = 0x04,        /* 16 bits */
+    E100_PCI_STATUS = 0x06,            /* 16 bits */
+    E100_PCI_REVISION_ID = 0x08,    /* 8 bits */
+    E100_PCI_CLASS_CODE = 0x0b,        /* 8 bits */
+    E100_PCI_SUBCLASS_CODE = 0x0a,    /* 8 bits */
+    E100_PCI_HEADER_TYPE = 0x0e,    /* 8 bits */
+    E100_PCI_BASE_ADDRESS_0 = 0x10,    /* 32 bits */
+    E100_PCI_BASE_ADDRESS_1 = 0x14,    /* 32 bits */
+    E100_PCI_BASE_ADDRESS_2 = 0x18,    /* 32 bits */
+    E100_PCI_BASE_ADDRESS_3 = 0x1c,    /* 32 bits */
+    E100_PCI_BASE_ADDRESS_4 = 0x20,    /* 32 bits */
+    E100_PCI_BASE_ADDRESS_5 = 0x24    /* 32 bits */
+}PCI_CONFIGURE_SPACE;
+
+#define PCI_CONFIG_8(offset, value) \
+    (*(uint8_t *)&pci_conf[offset] = (value))
+#define PCI_CONFIG_16(offset, value) \
+    (*(uint16_t *)&pci_conf[offset] = cpu_to_le16(value))
+#define PCI_CONFIG_32(offset, value) \
+    (*(uint32_t *)&pci_conf[offset] = cpu_to_le32(value))
+
+// Alias for Control/Status register read/write
+#define CSR_STATUS  scb_status
+#define CSR_CMD scb_cmd
+#define CSR_POINTER scb_pointer
+#define CSR_PORT port
+#define CSR_EEPROM eeprom_ctrl
+#define CSR_MDI mdi_ctrl
+#define CSR_PM pm_reg
+
+#define CSR(class, field)   \
+    (s->pci_mem.csr.class.u.field)
+#define CSR_VAL(class)  \
+    (s->pci_mem.csr.class.val)
+
+#define CSR_READ(x, type)    \
+    ({  \
+        type t; \
+        memcpy(&t, &s->pci_mem.mem[x], sizeof(type)); \
+        t;  \
+     })
+
+#define CSR_WRITE(x, val, type)    \
+    ({  \
+        type t = val; \
+        memcpy(&s->pci_mem.mem[x], &t, sizeof(type)); \
+     })
+
+#define SET_CU_STATE(val)    \
+    (CSR(CSR_STATUS, cus) = val)
+#define GET_CU_STATE    \
+    (CSR(CSR_STATUS, cus))
+
+#define SET_RU_STATE(val)    \
+    (CSR(CSR_STATUS, rus) = val)
+#define GET_RU_STATE    \
+    (CSR(CSR_STATUS, rus))
+
+#define KiB 1024
+
+#define EEPROM_SIZE     64
+
+#define BIT(n) (1U << (n))
+
+/* debug E100 card */
+//#define DEBUG_E100
+
+#ifdef DEBUG_E100
+#define logout(fmt, args...) fprintf(stderr, "EE100\t%-28s" fmt, __func__, 
##args)
+#else
+#define logout(fmt, args...) ((void)0)
+#endif
+
+#define MAX_ETH_FRAME_SIZE 1514
+
+/* This driver supports several different devices which are declared here. */
+#define i82551          0x82551
+#define i82557B         0x82557b
+#define i82557C         0x82557c
+#define i82558B         0x82558b
+#define i82559C         0x82559c
+#define i82559ER        0x82559e
+#define i82562          0x82562
+
+#define PCI_MEM_SIZE            (4 * KiB)
+#define PCI_IO_SIZE             (64)
+#define PCI_FLASH_SIZE          (128 * KiB)
+
+enum
+{
+    OP_READ,
+    OP_WRITE,
+} OPERTAION_DIRECTION;
+
+/* The SCB accepts the following controls for the Tx and Rx units: */
+enum
+{
+    CU_NOP = 0x0000,        /* No operation */
+    CU_START = 0x0010,        /* CU start     */
+    CU_RESUME = 0x0020,        /* CU resume    */
+    CU_STATSADDR = 0x0040,    /* Load dump counters address */
+    CU_SHOWSTATS = 0x0050,    /* Dump statistical counters */
+    CU_CMD_BASE = 0x0060,    /* Load CU base address */
+    CU_DUMPSTATS = 0x0070,    /* Dump and reset statistical counters */
+    CU_S_RESUME = 0x00a0    /* CU static resume */
+}CONTROL_UNIT_COMMAND;
+
+enum
+{
+    RU_NOP = 0x0000,
+    RU_START = 0x0001,
+    RU_RESUME = 0x0002,
+    RU_DMA_REDIRECT = 0x0003,
+    RU_ABORT = 0x0004,
+    RU_LOAD_HDS = 0x0005,
+    RU_ADDR_LOAD = 0x0006,
+    RU_RESUMENR = 0x0007,
+}RECEIVE_UNIT_COMMAND;
+
+/* SCB status word descriptions */
+enum
+{
+    CU_IDLE = 0,
+    CU_SUSPENDED = 1,
+    CU_LPQ_ACTIVE = 2,
+    CU_HQP_ACTIVE = 3
+} CONTROL_UINT_STATE;
+
+enum
+{
+    RU_IDLE = 0,
+    RU_SUSPENDED = 1,
+    RU_NO_RESOURCES =2,
+    RU_READY = 4
+} RECEIVE_UNIT_STATE;
+
+enum
+{
+    PORT_SOFTWARE_RESET = 0,
+    PORT_SELF_TEST = 1,
+    PORT_SELECTIVE_RESET = 2,
+    PORT_DUMP = 3,
+    PORT_DUMP_WAKE_UP = 7,
+}SCB_PORT_SELECTION_FUNCTION;
+
+enum
+{
+    CBL_NOP = 0,
+    CBL_IASETUP = 1,
+    CBL_CONFIGURE = 2,
+    CBL_MULTCAST_ADDR_SETUP = 3,
+    CBL_TRANSMIT = 4,
+    CBL_LOAD_MICROCODE = 5,
+    CBL_DUMP = 6,
+    CBL_DIAGNOSE = 7,
+}CBL_COMMAND;
+
+enum
+{
+    SCB_STATUS = 0,            /* SCB base + 0x00h, RU states + CU states + 
STAT/ACK */
+    SCB_ACK = 1,            /* SCB ack/stat */
+    SCB_CMD = 2,            /* RU command + CU command + S bit + M bit */
+    SCB_INTERRUPT_MASK = 3, /* Interrupts mask bits */
+    SCB_POINTER = 4,        /* SCB general pointer, depending on command type 
*/
+    SCB_PORT = 8,            /* SCB port register */
+    SCB_EEPROM = 0xe,        /* SCB eeprom control register */
+    SCB_MDI =0x10,            /* SCB MDI control register */
+} CSR_OFFSETS;
+
+enum
+{
+    EEPROM_SK = 0x01,
+    EEPROM_CS = 0x02,
+    EEPROM_DI = 0x04,
+    EEPROM_DO = 0x08,
+} EEPROM_CONTROL_REGISTER;
+
+enum
+{
+    EEPROM_READ = 0x2,
+    EEPROM_WRITE = 0x1,
+    EEPROM_ERASE = 0x3,
+} EEPROM_OPCODE;
+
+enum
+{
+    MDI_WRITE = 0x1,
+    MDI_READ = 0x2,
+} MDI_OPCODE;
+
+enum
+{
+    INT_FCP = BIT(8),
+    INT_SWI = BIT(10),
+    INT_MDI = BIT(11),
+    INT_RNR = BIT(12),
+    INT_CNA = BIT(13),
+    INT_FR = BIT(14),
+    INT_CX_TNO = BIT(15),
+} E100_INTERRUPT;
+
+enum
+{
+    CSR_MEMORY_BASE,
+    CSR_IO_BASE,
+    FLASH_MEMORY_BASE,
+    REGION_NUM
+}E100_PCI_MEMORY_REGION;
+
+typedef struct {
+    uint32_t tx_good_frames,        // Good frames transmitted
+             tx_max_collisions,     // Fatal frames -- had max collisions
+             tx_late_collisions,    // Fatal frames -- had a late coll.
+             tx_underruns,          // Transmit underruns (fatal or 
re-transmit)
+             tx_lost_crs,           // Frames transmitted without CRS
+             tx_deferred,           // Deferred transmits
+             tx_single_collisions,  // Transmits that had 1 and only 1 coll.
+             tx_multiple_collisions,// Transmits that had multiple coll.
+             tx_total_collisions,   // Transmits that had 1+ collisions.
+
+             rx_good_frames,        // Good frames received
+             rx_crc_errors,         // Aligned frames that had a CRC error
+             rx_alignment_errors,   // Receives that had alignment errors
+             rx_resource_errors,    // Good frame dropped due to lack of 
resources
+             rx_overrun_errors,     // Overrun errors - bus was busy
+             rx_cdt_errors,         // Received frames that encountered coll.
+             rx_short_frame_errors, // Received frames that were to short
+
+             complete_word;         // A005h indicates dump cmd completion,
+                                    // A007h indicates dump and reset cmd 
completion.
+
+// TODO: Add specific field for i82558, i82559
+} __attribute__ ((packed)) e100_stats_t;
+
+#define EEPROM_I82557_ADDRBIT 6
+/* Below data is dumped from a real I82557 card */
+static const uint16_t eeprom_i82557[] =
+{
+    0x300, 0xe147, 0x2fa4, 0x203, 0x0, 0x201, 0x4701, 0x0, 0x7414, 0x6207,
+    0x4082, 0xb, 0x8086, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x128, 0x0, 0x0, 0x0, 0x0, 
0x0,
+    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc374,
+};
+
+static const uint8_t e100_pci_configure[] =
+{
+    0x86, 0x80, 0x29, 0x12, 0x17, 0x00, 0x90, 0x02, 0x08, 0x00, 0x00, 0x02, 
0x10, 0x20, 0x00, 0x00,
+    0x00, 0x00, 0x10, 0x50, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 
0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x86, 0x80, 0x0b, 0x00,
+    0x00, 0x00, 0xf0, 0xff, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x09, 0x01, 0x08, 0x38,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x01, 0x00, 0x22, 0xfe,
+    0x00, 0x40, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00, 0x00, 0x00,
+};
+
+typedef struct
+{
+#define OPCODE      0xb
+#define ADDR        0xc
+#define DATA        0xd
+#define NOP         0xe
+
+#define EEPROM_RESET_ALL      0xfe
+#define EEPROM_SELECT_RESET   0xff
+    uint8_t  start_bit;
+    uint8_t  opcode;
+    uint8_t  address;
+    uint16_t data;  //This must be 16 bit represents a register in eeprom
+
+    uint32_t val;
+    uint32_t val_len;
+    uint8_t  val_type;  // What data type is in DI. opcode?address?data?
+
+    uint8_t cs;
+    uint8_t sk;
+
+    // This two fileds only be reset when device init
+    uint16_t addr_len;
+    uint16_t contents[256]; // 256 is enough to all device(i82557 ... i82559)
+} eeprom_t;
+
+// Control/Status register structure
+typedef struct
+{
+    /* SCB status word */
+    union
+    {
+        uint16_t val;
+        struct
+        {
+            uint8_t rs1:2;  // Reserved
+            uint8_t rus:4;  // RU status
+            uint8_t cus:2;  // CU status
+            uint8_t stat_ack; // Stat/ACK
+        }u;
+    }scb_status;
+
+    /* SCB command word */
+    union
+    {
+        uint16_t val;
+        struct
+        {
+            uint8_t ru_cmd:3;   // RU command
+            uint8_t rs1:1;      // Reserved
+            uint8_t cu_cmd:4;   // CU command
+            uint8_t m:1;        // Interrup mask bit(1:mask all interrupt)
+            uint8_t si:1;       // Use for software cause interrupt
+            uint8_t simb:6;     // Specific interrupt mask bit
+        }u;
+    }scb_cmd;
+
+    /* SCB general pointer */
+    union
+    {
+        uint32_t val;
+        struct
+        {
+            uint32_t scb_ptr;
+        }u;
+    }scb_pointer;
+
+    /* Port interface */
+    union
+    {
+        uint32_t val;
+        struct
+        {
+            uint8_t opcode:4;   // Op code for function selection
+            uint32_t ptr:28;    // Result pointer
+        }u;
+    }port;
+
+    uint16_t rs1;               // Reserved
+
+    /* EEPROM control register */
+    union
+    {
+        uint16_t val;
+        struct
+        {
+            uint8_t eesk:1;      // Serial clock
+            uint8_t eecs:1;      // Chip select
+            uint8_t eedi:1;      // Serial data in
+            uint8_t eedo:1;      // Serial data out
+            uint8_t rs1:4;       // Reserved
+            uint8_t data;
+        }u;
+    }eeprom_ctrl;
+
+    /* MDI control register */
+    union
+    {
+        uint32_t val;
+        struct
+        {
+            uint16_t data;       // Data
+            uint8_t regaddr:5;   // PHY register address
+            uint8_t phyaddr:5;   // PHY address
+            uint8_t opcode:2;    // Opcode
+            uint8_t r:1;         // Ready
+            uint8_t ie:1;        // Interrup enable
+            uint8_t rs1:2;       // Reserved
+        }u;
+    } mdi_ctrl;
+
+    /* Receive byte counter register */
+    uint32_t rx_byte_counter;
+
+    /* Early receive interrupt register */
+    uint8_t early_interrupt;
+
+    /* Flow control register */
+    union
+    {
+        uint16_t val;
+    }flow_ctrl;
+
+    /* Power management driver register */
+    union
+    {
+        uint8_t val;
+        struct
+        {
+            uint8_t pme_s:1;     // PME status
+            uint8_t tco_r:1;     // TCO request
+            uint8_t f_tco_i:1;   // Force TCO indication
+            uint8_t tco_re:1;    // TCO ready
+            uint8_t rs1:1;       // Reserved
+            uint8_t isp:1;       // Intersting packet
+            uint8_t mg:1;        // Magic packet
+            uint8_t lsci:1;      // Link status change indication
+        }u;
+    }pm_reg;
+
+    /* General control register */
+    uint8_t gen_ctrl;
+
+    /* General status register */
+    uint8_t gen_status;
+
+    /* These are reserved or we don't support register */
+    uint8_t others[30];
+} __attribute__ ((packed)) csr_t;
+
+typedef struct
+{
+    uint8_t byte_count;
+    uint8_t rx_fifo_limit:4;
+    uint8_t tx_fifo_limit:4;
+    uint8_t adpt_inf_spacing;
+    uint8_t rs1;
+    uint8_t rx_dma_max_bytes;
+    uint8_t tx_dma_max_bytes:7;
+    uint8_t dmbc_en:1;
+    uint8_t late_scb:1,
+            rs2:1,
+            tno_intr:1,
+            ci_intr:1,
+            rs3:1,
+            rs4:1,
+            dis_overrun_rx:1,
+            save_bad_frame:1;
+    uint8_t dis_short_rx:1,
+            underrun_retry:2,
+            rs5:5;
+    uint8_t mii:1,
+            rs6:7;
+    uint8_t rs7;
+    uint8_t rs8:3,
+            nsai:1,
+            preamble_len:2,
+            loopback:2;
+    uint8_t linear_prio:3,
+            rs9:5;
+    uint8_t pri_mode:1,
+            rs10:3,
+            interframe_spacing:4;
+    uint16_t rs11;
+    uint8_t promiscuous:1,
+            broadcast_dis:1,
+            rs12:5,
+            crs_cdt:1;
+    uint16_t rs13;
+    uint8_t strip:1,
+            padding:1,
+            rx_crc:1,
+            rs14:5;
+    uint8_t rs15:6,
+            force_fdx:1,
+            fdx_en:1;
+    uint8_t rs16:6,
+            mul_ia:2;
+    uint8_t rs17:3,
+            mul_all:1,
+            rs18:4;
+} __attribute__ ((packed)) i82557_cfg_t;
+
+typedef struct {
+    VLANClientState *vc;
+    PCIDevice *pci_dev;
+    int mmio_index;
+    uint8_t scb_stat;           /* SCB stat/ack byte */
+    uint32_t region_base_addr[REGION_NUM];         /* PCI region addresses */
+    uint8_t macaddr[6];
+    uint16_t mdimem[32];
+    eeprom_t eeprom;
+    uint32_t device;            /* device variant */
+
+    uint8_t mult_list[8];       /* Multicast address list */
+    int is_multcast_enable;
+
+    /* (cu_base + cu_offset) address the next command block in the command 
block list. */
+    uint32_t cu_base;           /* CU base address */
+    uint32_t cu_offset;         /* CU address offset */
+    uint32_t cu_next;           /* Point to next command when CU go to suspend 
*/
+
+    /* (ru_base + ru_offset) address the RFD in the Receive Frame Area. */
+    uint32_t ru_base;           /* RU base address */
+    uint32_t ru_offset;         /* RU address offset */
+
+    uint32_t statsaddr;         /* pointer to e100_stats_t */
+
+    e100_stats_t statistics;        /* statistical counters */
+
+    /* Configuration bytes. */
+    i82557_cfg_t config;
+
+    /* FIFO buffer of card. The packet that need to be sent buffered in it */
+    uint8_t pkt_buf[MAX_ETH_FRAME_SIZE+4];
+    /* Data length in FIFO buffer */
+    int pkt_buf_len;
+
+    /* Data in mem is always in the byte order of the controller (le). */
+    union
+    {
+        csr_t csr;
+        uint8_t mem[PCI_MEM_SIZE];
+    }pci_mem;
+
+} E100State;
+
+/* CB structure, filled by device driver
+ * This is a common structure of CB. In some
+ * special case such as TRANSMIT command, the
+ * reserved field will be used.
+ */
+struct  control_block
+{
+    uint16_t rs1:13;            /* reserved */
+    uint8_t ok:1;               /* 1:command executed without error, otherwise 
0 */
+    uint8_t rs2:1;
+    uint8_t c:1;                /* execution status. set by device, clean by 
software */
+    uint8_t cmd:3;              /* command */
+    uint16_t rs3:10;            /* most time equal to 0 */
+    uint8_t i:1;                /* whether trigger interrupt after execution. 
1:yes; 0:no */
+    uint8_t s:1;                /* suspend */
+    uint8_t el:1;               /* end flag */
+    uint32_t link_addr;
+} __attribute__ ((packed));
+
+typedef struct
+{
+    uint32_t tx_desc_addr;      /* transmit buffer decsriptor array address. */
+    uint16_t tcb_bytes:14;         /* transmit command block byte count (in 
lower 14 bits)*/
+    uint8_t rs1:1;
+    uint8_t eof:1;
+    uint8_t tx_threshold;       /* transmit threshold */
+    uint8_t tbd_num;          /* TBD number */
+} __attribute__ ((packed)) tbd_t;
+
+/* Receive frame descriptore structure */
+typedef struct
+{
+    uint16_t status:13;     // Result of receive opration
+    uint8_t ok:1;           // 1:receive without error, otherwise 0
+    uint8_t rs1:1;
+    uint8_t c:1;            // 1:receive complete
+    uint8_t rs2:3;
+    uint8_t sf:1;           // 0:simplified mode
+    uint8_t h:1;            // 1:header RFD
+    uint16_t rs3:9;
+    uint8_t s:1;            // 1:go to suspend
+    uint8_t el:1;           // 1:last RFD
+    uint32_t link_addr;     // Add on RU base point to next RFD
+    uint32_t rs4;
+    uint16_t count:14;      // Number of bytes written into data area
+    uint8_t f:1;            // Set by device when count field update
+    uint8_t eof:1;          // Set by device when placing data into data area 
complete
+    uint16_t size:14;       // Buffer size (even number)
+    uint8_t rs5:2;
+} __attribute__ ((packed)) rfd_t;
+
+enum
+{
+    RX_COLLISION = BIT(0),  // 1:Receive collision detected
+    RX_IA_MATCH = BIT(1),      // 0:Receive frame match individual address
+    RX_NO_MATCH = BIT(2), // 1:Receive frame match no address
+    RX_ERR = BIT(4),        // 1:Receive frame error
+    RX_TYPE = BIT(5),       // 1:Receive frame is a type frame
+    RX_SHORT = BIT(7),      // 1:Receive frame is too short
+    RX_DMA_ERR = BIT(8),
+    RX_LARGE = BIT(9),      // 1:Receive frame is too large
+    RX_CRC_ERR = BIT(10),
+} RFD_STATUS;
+
+typedef struct PCIE100State {
+    PCIDevice dev;
+    E100State e100;
+} PCIE100State;
+
+/* Default values for MDI (PHY) registers */
+static const uint16_t e100_mdi_default[] = {
+    /* MDI Registers 0 - 6, 7 */
+    0x3000, 0x780d, 0x02a8, 0x0154, 0x05e1, 0x0000, 0x0000, 0x0000,
+    /* MDI Registers 8 - 15 */
+    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+    /* MDI Registers 16 - 31 */
+    0x0003, 0x0000, 0x0001, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+};
+
+static const uint8_t broadcast_macaddr[6] =
+    { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+
+/* Debugging codes */
+#ifdef  DEBUG_E100
+
+static void e100_dump(char *comment, uint8_t *info, int len)
+{
+    int i;
+
+    if ( !comment || !info )
+        return;
+
+    fprintf(stderr, "EE100\t%-24s%s", __func__, comment);
+    for ( i=0; i<len; i++ )
+        fprintf(stderr, "%x ", info[i]);
+
+    fprintf(stderr, "\n");
+}
+
+static const char *regname[] =
+{
+    [0] = "SCB Status", [1] = "SCB Ack",
+    [2] = "SCB Cmd", [3] = "SCB Interrupt Mask",
+    [4] = "SCB Pointer", [8] = "SCB Port",
+    [0xc] = "SCB Flash", [0xe] = "SCB Eeprom",
+    [0x10] = "SCB Ctrl MDI", [0x14] = "SCB Early RX",
+};
+#define SCBNAME(x)    \
+    ( (x) < (sizeof(regname) / sizeof(regname[0])) ? regname[(x)] : "Unknown 
SCB Register" )
+
+static const char *cb_cmd_name[] =
+{
+    [CBL_NOP] = "NOP", [CBL_IASETUP] = "Individual address setup",
+    [CBL_CONFIGURE] = "Configure", [CBL_MULTCAST_ADDR_SETUP] = "Set Multcast 
address list",
+    [CBL_TRANSMIT] = "Transmit", [CBL_LOAD_MICROCODE] = "Load microcode",
+    [CBL_DUMP] = "Dump", [CBL_DIAGNOSE] = "Diagnose",
+};
+#define CB_CMD_NAME(x)  \
+    ( (x) < (sizeof(cb_cmd_name) / sizeof(cb_cmd_name[0])) ? cb_cmd_name[(x)] 
: "Unknown CB command" )
+
+static const char *eeprom_opcode_name[] =
+{
+    [0] = "Unknow", [EEPROM_WRITE] = "Write",
+    [EEPROM_READ] = "Read", [EEPROM_ERASE] = "Erase",
+};
+#define EEPROM_OPCODE_NAME(x)   \
+    ( (x) < (sizeof(eeprom_opcode_name) / sizeof(eeprom_opcode_name[0])) ?  \
+      eeprom_opcode_name[(x)] : "Unknown" )
+
+static struct eeprom_trace_data
+{
+    uint8_t eedo[256];
+    uint8_t di[256];
+    int op;
+    int i;
+    uint32_t data;
+}etd = {.op = NOP};
+
+static void eeprom_trace(int eedo, int di, int dir, int next_op, int clr)
+{
+    int i;
+
+    if ( clr )
+    {
+        char *opname = NULL;
+
+        switch ( etd.op )
+        {
+            case NOP:
+                break;
+            case OPCODE:
+                opname = "opcode";
+                break;
+            case ADDR:
+                opname = "address";
+                break;
+            case DATA:
+                opname = "data transfer";
+                break;
+            default:
+                opname = "Unknown";
+        }
+
+        if ( opname )
+        {
+            logout("EEPROM trace:\n");
+            fprintf(stderr, "\toperation: %s\n", opname);
+            fprintf(stderr, "\tDI track:");
+            for ( i=0; i<etd.i; i++ )
+                fprintf(stderr, "%x ", etd.di[i]);
+            fprintf(stderr, "\n\tDO track:");
+            for ( i=0; i<etd.i; i++ )
+                fprintf(stderr, "%x ", etd.eedo[i]);
+            fprintf(stderr, "\n\tData:%#x\n", etd.data);
+        }
+
+
+        memset(&etd, 0x0, sizeof(etd));
+        etd.op = next_op;
+
+        return;
+    }
+
+    etd.eedo[etd.i] = eedo;
+    etd.di[etd.i] = di;
+    etd.i ++;
+    if ( dir == EEPROM_READ && etd.op == DATA )
+        etd.data = (etd.data << 1) | eedo;
+    else
+        etd.data = (etd.data << 1) | di;
+}
+
+#define INT_NAME(x) \
+    ({  \
+     char *name = NULL; \
+     switch (x) \
+     {  \
+     case INT_FCP:  \
+            name = "FCP";   \
+            break;  \
+     case INT_SWI:  \
+            name = "SWI";   \
+            break;  \
+     case INT_MDI:  \
+            name = "MDI";   \
+            break;  \
+     case INT_RNR:  \
+            name = "RNR";   \
+            break;  \
+     case INT_CNA:  \
+            name = "CNA";   \
+            break;  \
+     case INT_FR:   \
+            name = "FR";    \
+            break;  \
+     case INT_CX_TNO:   \
+            name ="CX/TNO"; \
+            break;  \
+     default:   \
+            name ="Unknown"; \
+     }  \
+     name;  \
+     })
+
+#else
+static void e100_dump(char *comment, uint8_t *info, int len) {}
+static void eeprom_trace(int eedo, int di, int dir, int next_op, int clr) {}
+#endif
+
+static void pci_reset(E100State * s)
+{
+    uint8_t *pci_conf = s->pci_dev->config;
+
+    memcpy(pci_conf, &e100_pci_configure[0], sizeof(e100_pci_configure));
+    logout("%p\n", s);
+
+    /* I82557 */
+    PCI_CONFIG_8(E100_PCI_REVISION_ID, 0x01);
+
+    PCI_CONFIG_8(0x3c, 0x0);
+
+}
+
+static void e100_selective_reset(E100State * s)
+{
+
+    memset(s->pci_mem.mem, 0x0, sizeof(s->pci_mem.mem));
+    // Set RU/CU to idle, maintain the register mentioned in spec,
+    SET_CU_STATE(CU_IDLE);
+    SET_RU_STATE(RU_IDLE);
+    logout("CU and RU go to idle\n");
+
+    s->ru_offset = 0;
+    s->cu_offset = 0;
+    s->cu_next = 0;
+
+    // For 82557, special interrupt bits are all 1
+    CSR(CSR_CMD, simb) = 0x3f;
+    // Set PHY to 1
+    CSR_VAL(CSR_MDI) |= BIT(21);
+
+    /* Initialize EEDO bit to 1. Due to driver would detect dummy 0 at
+     * EEDO bit, so initialize it to 1 is safety a way.
+     */
+    CSR(CSR_EEPROM, eedo) = 1;
+    // no pending interrupts
+    s->scb_stat = 0;
+
+    return;
+}
+
+static void e100_software_reset(E100State *s)
+{
+    memset(s->pci_mem.mem, 0x0, sizeof(s->pci_mem.mem));
+    // Clear multicast list
+    memset(s->mult_list, 0x0, sizeof(s->mult_list));
+    // Set MDI register to default value
+    memcpy(&s->mdimem[0], &e100_mdi_default[0], sizeof(s->mdimem));
+    s->is_multcast_enable = 1;
+    /* Clean FIFO buffer */
+    memset(s->pkt_buf, 0x0, sizeof(s->pkt_buf));
+    s->pkt_buf_len = 0;
+
+    memset(&s->statistics, 0x0, sizeof(s->statistics));
+    e100_selective_reset(s);
+    return;
+}
+
+static void e100_reset(void *opaque)
+{
+    E100State *s = (E100State *) opaque;
+    logout("%p\n", s);
+    e100_software_reset(s);
+}
+
+
+static void e100_save(QEMUFile * f, void *opaque)
+{
+    //TODO
+    return;
+}
+
+static int e100_load(QEMUFile * f, void *opaque, int version_id)
+{
+    //TODO
+    return 0;
+}
+
+/* Interrupt functions */
+static void e100_interrupt(E100State *s, uint16_t int_type)
+{
+
+    //TODO: Add another i8255x card supported mask bit
+    if ( !CSR(CSR_CMD,m) )
+    {
+        //Set bit in stat/ack, so driver can no what interrupt happen
+        CSR_VAL(CSR_STATUS) |= int_type;
+        s->scb_stat = CSR(CSR_STATUS, stat_ack);
+
+        /* SCB maske and SCB Bit M do not disable interrupt. */
+        logout("Trigger an interrupt(type = %s(%#x), SCB Status = %#x)\n",
+                INT_NAME(int_type), int_type, CSR_VAL(CSR_STATUS));
+        pci_set_irq(s->pci_dev, 0, 1);
+    }
+}
+
+static void e100_interrupt_ack(E100State * s, uint8_t ack)
+{
+
+    /* Ignore acknowledege if driver write 0 to ack or
+     * according interrupt bit is not set
+     */
+    if ( !ack || !(s->scb_stat & ack) )
+    {
+        logout("Illegal interrupt ack(ack=%#x, SCB Stat/Ack=%#x), ignore it\n",
+                ack, s->scb_stat);
+        // Due to we do write operation before e100_execute(), so
+        // we must restore value of ack field here
+        CSR(CSR_STATUS, stat_ack) = s->scb_stat;
+        return;
+    }
+
+    s->scb_stat &= ~ack;
+    CSR(CSR_STATUS, stat_ack) = s->scb_stat;
+
+    logout("Interrupt ack(name=%s,val=%#x)\n", INT_NAME(({uint16_t bit = 
ack<<8;bit;})),ack);
+    if ( !s->scb_stat )
+    {
+        logout("All interrupts are acknowledeged, de-assert interrupt line\n");
+        pci_set_irq(s->pci_dev, 0, 0);
+    }
+}
+
+static void e100_self_test(uint32_t res_addr)
+{
+    struct
+    {
+        uint32_t st_sign;           /* Self Test Signature */
+        uint32_t st_result;         /* Self Test Results */
+    } test_res;
+
+    test_res.st_sign = (uint32_t)-1;
+    test_res.st_result = 0; // Our self test always success
+    cpu_physical_memory_write(res_addr, (uint8_t *)&test_res, 
sizeof(test_res));
+
+    logout("Write self test result to %#x\n", res_addr);
+}
+
+static void scb_port_func(E100State *s, uint32_t val, int dir)
+{
+#define PORT_SELECTION_MASK 0xfU
+
+    uint32_t sel = val & PORT_SELECTION_MASK;
+
+    switch ( sel )
+    {
+        case PORT_SOFTWARE_RESET:
+            logout("do PORT_SOFTWARE_RESET!\n");
+            e100_software_reset(s);
+            break;
+        case PORT_SELF_TEST:
+            e100_self_test(val & ~PORT_SELECTION_MASK);
+            logout("do PORT_SELF_TEST!\n");
+            break;
+        case PORT_SELECTIVE_RESET:
+            logout("do PORT_SELECTIVE_RESET!\n");
+            e100_selective_reset(s);
+            break;
+        case PORT_DUMP:
+            logout("do PORT_SOFTWARE_RESET!\n");
+            break;
+        case PORT_DUMP_WAKE_UP:
+            logout("do PORT_SOFTWARE_RESET!\n");
+            break;
+        default:
+            logout("Unkonw SCB port command(selection function = %#x)\n", sel);
+    }
+}
+
+static void e100_write_mdi(E100State *s, uint32_t val)
+{
+    uint32_t ie = (val & 0x20000000) >> 29;
+    uint32_t opcode = (val & 0x0c000000) >> 26;
+    uint32_t phyaddr = (val & 0x03e00000) >> 21;
+    uint32_t regaddr = (val & 0x001f0000) >> 16;
+    uint32_t data = val & 0x0000ffff;
+
+    logout("Write MDI:\n"
+           "\topcode:%#x\n"
+           "\tphy address:%#x\n"
+           "\treg address:%#x\n"
+           "\tie:%#x\n"
+           "\tdata:%#x\n",
+           opcode, phyaddr, regaddr, ie, data);
+
+    /* We use default value --- PHY1
+     * If driver operate on other PHYs, do nothing and
+     * deceive it that the operation is finished
+     */
+    if ( phyaddr != 1 )
+    {
+        logout("Unsupport PHY address(phy = %#x)\n", phyaddr);
+        goto done;
+    }
+
+    // 1: MDI write
+    // 2: MDI read
+    if ( opcode != MDI_WRITE && opcode != MDI_READ )
+    {
+        logout("Invalid Opcode(opcode = %#x)\n", opcode);
+        return;
+    }
+
+    // Current only support MDI generic registers.
+    if ( regaddr > 6 )
+    {
+        logout("Invalid phy register index( phy register addr = %#x)\n", 
regaddr);
+    }
+
+    if ( opcode == MDI_WRITE )
+    {
+        // MDI write
+        switch ( regaddr )
+        {
+            case 0:    // Control Register
+                if ( data & 0x8000 ) // Reset
+                {
+                    /* Reset status and control registers to default. */
+                    s->mdimem[0] = e100_mdi_default[0];
+                    s->mdimem[1] = e100_mdi_default[1];
+                    data = s->mdimem[regaddr];
+                }
+                else
+                {
+                    /* Restart Auto Configuration = Normal Operation */
+                    data &= ~0x0200;
+                }
+                break;
+            case 1:    // Status Register
+                logout("Invalid write on readonly register(opcode = %#x)\n", 
opcode);
+                data = s->mdimem[regaddr];
+                break;
+            case 2:
+            case 3:
+            case 4:
+            case 5:
+            case 6:
+                break;
+        }
+        s->mdimem[regaddr] = data;
+        logout("MDI WRITE: reg = %#x, data = %#x\n", regaddr, data);
+    }
+    else if ( opcode == MDI_READ )
+    {
+        // MDI read
+        switch ( regaddr )
+        {
+            case 0: // Control Register
+                if ( data & 0x8000 ) // Reset
+                {
+                    /* Reset status and control registers to default. */
+                    s->mdimem[0] = e100_mdi_default[0];
+                    s->mdimem[1] = e100_mdi_default[1];
+                }
+                break;
+            case 1: // Status Register
+                // Auto Negotiation complete, set sticky bit to 1
+                s->mdimem[regaddr] |= 0x0026;
+                break;
+            case 2: // PHY Identification Register (Word 1)
+            case 3: // PHY Identification Register (Word 2)
+                break;
+            case 5: // Auto-Negotiation Link Partner Ability Register
+                s->mdimem[regaddr] = 0x41fe;
+                break;
+            case 6: // Auto-Negotiation Expansion Register
+                s->mdimem[regaddr] = 0x0001;
+                break;
+        }
+        data = s->mdimem[regaddr];
+        logout("MDI READ: reg = %#x, data = %#x\n", regaddr, data);
+    }
+
+    /* Emulation takes no time to finish MDI transaction.
+     * Set MDI bit in SCB status register. */
+done:
+    val |= BIT(28);
+    val = (val & 0xffff0000) + data;
+    CSR_WRITE(SCB_MDI, val, uint32_t);
+
+    if ( ie )
+        e100_interrupt(s, (uint16_t)INT_MDI);
+}
+
+static void scb_mdi_func(E100State *s, uint32_t val, int dir)
+{
+    if ( dir == OP_READ )
+        // Do nothing, just tell driver we are ready
+        CSR_VAL(CSR_MDI) |= BIT(28);
+    else if ( dir == OP_WRITE )
+        e100_write_mdi(s, val);
+    else
+        logout("Invalid operation direction(dir=%x)\n", dir);
+
+}
+
+static void eeprom_reset(E100State *s, int type)
+{
+    eeprom_t *e = &s->eeprom;
+
+    if ( type == EEPROM_RESET_ALL )
+    {
+        memset(e, 0x0, sizeof(eeprom_t));
+        e->val_type = NOP;
+        logout("EEPROM reset all\n");
+        return;
+    }
+
+    CSR(CSR_EEPROM, eedo) = 1;
+    e->start_bit = 0;
+    e->opcode = 0;
+    e->address = 0;
+    e->data = 0;
+
+    e->val = 0;
+    e->val_len = 0;
+    e->val_type = NOP;
+
+    e->cs = 0;
+    e->sk = 0;
+    logout("EEPROM select reset\n");
+}
+
+static void do_eeprom_op(E100State *s, eeprom_t *e, int cs, int sk, int di, 
int dir)
+{
+    int assert_cs = (cs == 1 && e->cs == 0);
+    int de_assert_cs = (cs == 0 && e->cs == 1);
+    int de_assert_sk = (sk == 0 && e->sk == 1);
+
+    // Chip select is not be enabled
+    if ( cs == 0 && e->cs == 0 )
+    {
+        logout("Invalid EECS signal\n");
+        return;
+    }
+
+    // update state
+    e->cs = cs;
+    e->sk = sk;
+
+    // Do nothing
+    if ( assert_cs )
+    {
+        logout("EECS assert\n");
+        return;
+    }
+
+    // Complete one command
+    if ( de_assert_cs )
+    {
+        if ( e->val_type == DATA && e->opcode == EEPROM_WRITE )
+        {
+            e->data = e->val;
+            memcpy((void *)((unsigned long)e->contents + e->address),
+                    &e->data, sizeof(e->data));
+            logout("EEPROM write complete(data=%#x)\n", e->data);
+        }
+        eeprom_trace(0,0,0,NOP,1);
+        eeprom_reset(s, EEPROM_SELECT_RESET);
+        logout("EECS de-asserted\n");
+        return;
+    }
+
+    // Chip is selected and serial clock is change, so the operation is vaild
+    if ( cs == 1 && de_assert_sk == 1)
+    {
+        // Set start bit
+        if ( e->start_bit == 0 && di == 1 )
+        {
+             e->start_bit = di;
+             e->val_len = 0;
+             e->val = 0;
+             e->val_type = OPCODE;
+
+             eeprom_trace(0,0,0,OPCODE,1);
+             logout("EEPROM start bit set\n");
+             return;
+        }
+        // Data in DI is vaild
+        else if ( e->start_bit == 1 )
+        {
+            // If current operation is eeprom read, ignore DI
+            if ( !(e->val_type == DATA && e->opcode == EEPROM_READ) )
+            {
+                e->val = (e->val << 1) | di;
+                e->val_len ++;
+            }
+
+            switch ( e->val_type )
+            {
+                // Get the opcode.
+                case OPCODE:
+                    eeprom_trace(CSR(CSR_EEPROM, eedo), di, e->opcode, 0, 0);
+                    if ( e->val_len  == 2 )
+                    {
+                        e->opcode = e->val;
+                        e->val = 0;
+                        e->val_len = 0;
+                        e->val_type = ADDR;
+
+                        eeprom_trace(0,0,0,ADDR,1);
+                        logout("EEPROM get opcode(opcode name=%s,opcode=%#x 
)\n",
+                                EEPROM_OPCODE_NAME(e->opcode), e->opcode);
+                    }
+                    break;
+                // Get address
+                case ADDR:
+                    eeprom_trace(CSR(CSR_EEPROM, eedo), di, e->opcode, 0, 0);
+                    if ( e->val_len == e->addr_len )
+                    {
+                        e->address = e->val;
+                        e->val = 0;
+                        e->val_len = 0;
+                        e->val_type = DATA;
+
+                        // We prepare data eary for later read operation
+                        if ( e->opcode == EEPROM_READ )
+                        {
+                            memcpy(&e->data, (void *)(e->contents + 
e->address),
+                                    sizeof(e->data));
+                            logout("EEPROM prepare data to 
read(addr=%#x,data=%#x)\n", 
+                                    e->address, e->data);
+                        }
+
+                        // Write dummy 0 to response to driver the address is 
written complete
+                        CSR(CSR_EEPROM, eedo) = 0;
+                        eeprom_trace(0,0,0,DATA,1);
+                        logout("EEPROM get address(addr=%#x)\n", e->address);
+                    }
+                    break;
+                // Only do data out operation
+                case DATA:
+                    if ( e->opcode == EEPROM_READ )
+                    {
+                        // Start from the most significant bit
+                        //uint16_t t = ((e->data & (1<<(sizeof(e->data)*8 - 
e->val_len - 1))) != 0);
+                        uint16_t t = !!(e->data & (0x8000U >> e->val_len));
+
+                        CSR(CSR_EEPROM, eedo) = t;
+
+                        logout("EEPROM read(reg address=%#x, reg val=%#x, 
do=%#x, len=%#x)\n", 
+                                e->address, e->data, t, e->val_len);
+
+                        if ( e->val_len > sizeof(e->data)*8 )
+                        {
+                            /* Driver may do more write op to de-assert EESK,
+                             * So we let EEPROM go to idle after a register be
+                             * read complete
+                             */
+                            e->val_type = NOP;
+                            logout("Read complete\n");
+
+                            break;
+                        }
+
+                        e->val_len ++;
+                    }
+                    eeprom_trace(CSR(CSR_EEPROM, eedo), di, e->opcode, 0, 0);
+                    // Do eerpom write when CS de-assert
+                    break;
+                default:
+                    break;
+            }
+        }
+    }
+
+    return;
+}
+
+
+static void scb_eeprom_func(E100State *s, uint32_t val, int dir)
+{
+    int eecs = ((val & EEPROM_CS) != 0);
+    int eesk = ((val & EEPROM_SK) != 0);
+    int eedi = ((val & EEPROM_DI) != 0);
+
+    logout("EEPROM: Old(cs=%#x, sk=%#x), New(cs=%#x, sk=%#x, di=%#x)\n", 
+            s->eeprom.cs, s->eeprom.sk, eecs, eesk, eedi);
+
+    do_eeprom_op(s, &s->eeprom, eecs, eesk, eedi, dir);
+
+    return;
+}
+
+static void e100_ru_command(E100State *s, uint8_t val)
+{
+    switch ( val )
+    {
+        case RU_NOP:
+            /* Will not be here */
+            break;
+        case RU_START:
+            /* RU start */
+
+            SET_RU_STATE(RU_READY);
+            logout("RU is set to ready\n");
+            s->ru_offset = CSR_VAL(CSR_POINTER);
+            logout("RFD offset is at %#x\n", s->ru_offset);
+            break;
+        case RU_RESUME:
+            /* RU Resume */
+            if ( GET_RU_STATE == RU_SUSPENDED )
+                SET_RU_STATE(RU_READY);
+            logout("RU resume to ready\n");
+            break;
+        case RU_ADDR_LOAD:
+            /* Load RU base */
+            s->ru_base = CSR_VAL(CSR_POINTER);
+            logout("Load RU base address at %#x\n", s->ru_base);
+            break;
+        case RU_DMA_REDIRECT:
+            logout("RU DMA redirect not implemented\n");
+            break;
+        case RU_ABORT:
+            e100_interrupt(s, INT_RNR);
+            SET_RU_STATE(RU_IDLE);
+            logout("RU abort, go to idle\n");
+            break;
+        case RU_LOAD_HDS:
+            logout("RU load header data size(HDS) not implemented\n");
+        default:
+            break;
+    }
+}
+
+// This function will change CU's state, so CU start and
+// CU resume must set CU's state before it
+static void e100_execute_cb_list(E100State *s, int is_resume)
+{
+
+    struct control_block cb = {0};
+    uint32_t cb_addr;
+
+    if ( !is_resume )
+        s->cu_offset = CSR_VAL(CSR_POINTER);
+
+    /* If call from CU resume, cu_offset has been set */
+
+    while (1)
+    {
+        cb_addr = s->cu_base + s->cu_offset;
+        cpu_physical_memory_read(cb_addr, (uint8_t *)&cb, sizeof(cb));
+
+
+        switch ( cb.cmd )
+        {
+            case CBL_NOP:
+                /* Do nothing */
+                break;
+            case CBL_IASETUP:
+                cpu_physical_memory_read(cb_addr + 8, &s->macaddr[0], 
sizeof(s->macaddr));
+                e100_dump("Setup Individual Address:", &s->macaddr[0], 6);
+                break;
+            case CBL_CONFIGURE:
+                {
+                    i82557_cfg_t *cfg = &s->config;
+
+                    assert(sizeof(s->config) == 22);
+                    cpu_physical_memory_read(cb_addr + 8, (uint8_t *)cfg, 
sizeof(s->config));
+                    logout("Setup card configuration:"
+                            "\tbyte count:%d\n"
+                            "\tRx FIFO limit:%d\n"
+                            "\tTx FIFO limit:%d\n"
+                            "\tAdaptive interframe spacing:%d\n"
+                            "\tRx DMA max:%d\n"
+                            "\tTX DMA max:%d\n"
+                            "\tDMBC enable:%d\n"
+                            "\tLate SCB:%d\n"
+                            "\tTNO:%d\n"
+                            "\tCI:%d\n"
+                            "\tDiscard overrun RX:%d\n"
+                            "\tSave bad frame:%d\n"
+                            "\tDiscard short RX:%d\n"
+                            "\tunderrun retry:%d\n"
+                            "\tMII:%d\n"
+                            "\tNSAI:%d\n"
+                            "\tPreamble len:%d\n"
+                            "\tloopback:%d\n"
+                            "\tliner pro:%d\n"
+                            "\tPRI mode:%d\n"
+                            "\tinterframe spacing:%d\n"
+                            "\tpromiscuous:%d\n"
+                            "\tbroadcast dis:%d\n"
+                            "\tCRS CDT:%d\n"
+                            "\tstripping:%d\n"
+                            "\tpadding:%d\n"
+                            "\tRX crc:%d\n"
+                            "\tforce fdx:%d\n"
+                            "\tfdx enable:%d\n"
+                            "\tmultiple IA:%d\n"
+                            "\tmulticast all:%d\n",
+                        cfg->byte_count, cfg->rx_fifo_limit, 
cfg->tx_fifo_limit,
+                        cfg->adpt_inf_spacing, cfg->rx_dma_max_bytes, 
cfg->tx_dma_max_bytes,
+                        cfg->dmbc_en, cfg->late_scb, cfg->tno_intr, 
cfg->ci_intr,
+                        cfg->dis_overrun_rx, cfg->save_bad_frame, 
cfg->dis_short_rx,
+                        cfg->underrun_retry, cfg->mii, cfg->nsai, 
cfg->preamble_len,
+                        cfg->loopback, cfg->linear_prio, cfg->pri_mode, 
cfg->interframe_spacing,
+                        cfg->promiscuous, cfg->broadcast_dis, cfg->crs_cdt, 
cfg->strip,
+                        cfg->padding, cfg->rx_crc, cfg->force_fdx, cfg->fdx_en,
+                        cfg->mul_ia, cfg->mul_all);
+                }
+                break;
+            case CBL_MULTCAST_ADDR_SETUP:
+                {
+                    uint16_t mult_list_count = 0;
+                    uint16_t size = 0;
+
+                    cpu_physical_memory_read(cb_addr + 8, (uint8_t 
*)&mult_list_count, 2);
+                    mult_list_count = (mult_list_count << 2) >> 2;
+
+                    if ( !mult_list_count )
+                    {
+                        logout("Multcast disabled(multicast count=0)\n");
+                        s->is_multcast_enable = 0;
+                        memset(s->mult_list, 0x0, sizeof(s->mult_list));
+                        break;
+                    }
+                    size = mult_list_count > sizeof(s->mult_list) ?
+                        sizeof(s->mult_list) : mult_list_count;
+                    cpu_physical_memory_read(cb_addr + 12, &s->mult_list[0], 
size);
+
+                    e100_dump("Setup Multicast list: ", &s->mult_list[0], 
size);
+                    break;
+                }
+            case CBL_TRANSMIT:
+                {
+                    struct
+                    {
+                        struct control_block cb;
+                        tbd_t tbd;
+                    } __attribute__ ((packed)) tx;
+
+                    struct
+                    {
+                        uint32_t addr;
+                        uint16_t size;
+                        uint16_t is_el_set;
+                    } tx_buf = {0};
+
+                    uint32_t tbd_array;
+                    uint16_t tcb_bytes;
+                    uint8_t sf;
+                    int len = s->pkt_buf_len;
+
+                    assert( len < sizeof(s->pkt_buf));
+
+                    cpu_physical_memory_read(cb_addr, (uint8_t *)&tx, 
sizeof(tx));
+                    tbd_array = le32_to_cpu(tx.tbd.tx_desc_addr);
+                    tcb_bytes = le16_to_cpu(tx.tbd.tcb_bytes);
+                    // Indicate use what mode to transmit(simple or flexible)
+                    sf = tx.cb.rs3 & 0x1;
+
+                    logout("Get a TBD:\n"
+                            "\tTBD array address:%#x\n"
+                            "\tTCB byte count:%#x\n"
+                            "\tEOF:%#x\n"
+                            "\tTransmit Threshold:%#x\n"
+                            "\tTBD number:%#x\n"
+                            "\tUse %s mode to send frame\n",
+                            tbd_array, tcb_bytes, tx.tbd.eof,
+                            tx.tbd.tx_threshold, tx.tbd.tbd_num,
+                            sf ? "Flexible" : "Simple");
+
+                    if ( !sf || tbd_array == (uint32_t)-1 )
+                    {
+                        /* Simple mode */
+
+                        /* For simple mode, TCB bytes should not be zero.
+                         * But we still check here for safety
+                         */
+                        if ( !tcb_bytes || tcb_bytes > sizeof(s->pkt_buf) )
+                            break;
+
+                        cpu_physical_memory_read(cb_addr+16, &s->pkt_buf[0], 
tcb_bytes);
+                        len = tcb_bytes;
+                        logout("simple mode(size=%d)\n", len);
+
+                    }
+                    else
+                    {
+                        /* Flexible mode */
+
+                        /* For flexible mode, TBD num should not be zero.
+                         * But we still check here for safety
+                         */
+                        if ( !tx.tbd.tbd_num )
+                            break;
+
+                        // I82557 don't support extend TCB
+                        if ( s->device == i82557C || s->device == i82557B )
+                        {
+                            /* Standard TCB mode */
+
+                            int i;
+
+                            for ( i=0; i<tx.tbd.tbd_num; i++ )
+                            {
+
+                                cpu_physical_memory_read(tbd_array, (uint8_t 
*)&tx_buf,
+                                        sizeof(tx_buf));
+                                tx_buf.is_el_set &= 0x1;
+                                tx_buf.size &= 0x7fff;
+                                tbd_array += 8;
+
+                                if ( tx_buf.size > sizeof(s->pkt_buf) - len )
+                                {
+                                    logout("Warning: Get a too big TBD, ignore 
it"
+                                            "(buf addr %#x, size %d, 
el:%#x)\n",
+                                            tx_buf.addr, tx_buf.size, 
tx_buf.is_el_set);
+                                    continue;
+                                }
+
+                                cpu_physical_memory_read(tx_buf.addr, 
&s->pkt_buf[len],
+                                        tx_buf.size);
+
+                                logout("TBD (standard mode): buf addr %#x, 
size %d, el:%#x\n",
+                                        tx_buf.addr, tx_buf.size, 
tx_buf.is_el_set);
+                                len += tx_buf.size;
+
+                                if ( tx_buf.is_el_set )
+                                    break;
+                            }
+
+                        }
+                        //FIXME: Extend mode is not be tested
+                        else
+                        {
+                            /* Extend TCB mode */
+
+                            /* A strandard TCB followed by two TBDs */
+                            uint32_t tbd_addr = cb_addr+16;
+                            int i = 0;
+
+
+                            for ( ; i<2 && i<tx.tbd.tbd_num; i++ )
+                            {
+
+                                cpu_physical_memory_read(tbd_array, (uint8_t 
*)&tx_buf,
+                                        sizeof(tx_buf));
+                                tx_buf.is_el_set &= 0x1;
+                                tbd_addr += 8;
+
+                                /* From Intel's spec, size of TBD equal to zero
+                                 * has same effect with EL bit set
+                                 */
+                                if ( tx_buf.size == 0 )
+                                {
+                                    tx_buf.is_el_set = 1;
+                                    break;
+                                }
+
+                                if ( tx_buf.size + len > sizeof(s->pkt_buf) )
+                                {
+                                    logout("TX frame is too large, discarding 
it"
+                                            "(buf addr=%#x, size=%#x)\n", 
tx_buf.addr,
+                                            tx_buf.size);
+                                    //continue;
+                                    break;
+                                }
+
+                                logout("TBD (extended mode): buf addr %#08x, 
size %#04x, el:%#x\n",
+                                        tx_buf.addr, tx_buf.size, 
tx_buf.is_el_set);
+                                cpu_physical_memory_read(tx_buf.addr, 
&s->pkt_buf[len],
+                                        tx_buf.size);
+
+                                len += tx_buf.size;
+
+                                if ( tx_buf.is_el_set )
+                                    break;
+                            }
+
+                            /* In extend TCB mode, TDB array point to the 
thrid TBD
+                             * if it is not NULL(0xffffffff) and EL bit of 
before
+                             * two TBDs is not set
+                             */
+                            if ( tbd_array != (uint32_t)-1 && 
!tx_buf.is_el_set )
+                            {
+                                tbd_addr = tbd_array;
+
+                                /* TBD number includes first two TBDs, so don't
+                                 * initialize i here
+                                 */
+                                for ( ; i<tx.tbd.tbd_num; i++ )
+                                {
+                                    cpu_physical_memory_read(tbd_addr, 
(uint8_t *)&tx_buf,
+                                            sizeof(tx_buf));
+                                    tx_buf.is_el_set &= 0x1;
+                                    tbd_addr += 8;
+
+                                    cpu_physical_memory_read(tx_buf.addr, 
&s->pkt_buf[len],
+                                            tx_buf.size);
+                                    logout("TBD (extended mode): buf addr 
0x%#08x, size 0x%#04x\n",
+                                            tx_buf.addr, tx_buf.size);
+
+                                    len += tx_buf.size;
+
+                                    if ( tx_buf.is_el_set )
+                                        break;
+                                }
+                            }
+                        }
+                    }
+
+
+                    s->pkt_buf_len = len;
+
+/* Below codes are used for Threshold. But with these logic, network of guest
+ * getting bad performance. So I comment it and leave codes here to hope anyone
+ * fix it
+ */
+#if 0
+                    /* If threshold is set, only send packet when threshold
+                     * bytes are read
+                     */
+                    if ( tx.tbd.tx_threshold && s->pkt_buf_len < 
tx.tbd.tx_threshold * 8 )
+                    {
+                        logout("Current data length in FIFO buffer:%d\n", 
s->pkt_buf_len);
+                        break;
+                    }
+#endif
+
+                    if ( s->pkt_buf_len )
+                    {
+                        qemu_send_packet(s->vc, s->pkt_buf, s->pkt_buf_len);
+                        s->statistics.tx_good_frames ++;
+                        logout("Send out frame successful(size=%d,"
+                                "already sent %d frames)\n", s->pkt_buf_len,
+                                s->statistics.tx_good_frames);
+                        s->pkt_buf_len = 0;
+                    }
+
+                    e100_dump("Dest addr:", (uint8_t *)s->pkt_buf, 6);
+                    e100_dump("Src addr:", (uint8_t *)(s->pkt_buf+6), 6);
+                    e100_dump("type:", (uint8_t *)(s->pkt_buf+8), 2);
+
+                    break;
+                }
+            case CBL_LOAD_MICROCODE:
+#ifdef DEBUG_E100
+                {
+                    /* Don't support load marco code, just dump it */
+                    #define MICRO_CODE_LEN 256
+                    uint8_t micro_code[MICRO_CODE_LEN] = {0};
+                    cpu_physical_memory_read(cb_addr+8, micro_code, 
MICRO_CODE_LEN);
+                    e100_dump("Load micro code:", micro_code, MICRO_CODE_LEN);
+                }
+#endif
+                break;
+            case CBL_DUMP:
+                logout("Control block dump\n");
+                break;
+            case CBL_DIAGNOSE:
+                logout("Control block diagnose\n");
+                break;
+            default:
+                logout("Unknown Control block command(val=%#x)\n", cb.cmd);
+                break;
+        }
+
+        /* Now, we finished executing a command, update status of CB.
+         * We always success
+         */
+        cb.c = 1;
+        cb.ok = 1;
+        // Only update C bit and OK bit field in TCB
+        cpu_physical_memory_write(cb_addr, (uint8_t *)&cb, 2);
+
+        logout("Finished a command from CB list:\n"
+                "\tok:%d\n"
+                "\tc:%d\n"
+                "\tcommand name:%s(cmd=%#x)\n"
+                "\ti:%d\n"
+                "\ts:%d\n"
+                "\tel:%d\n"
+                "\tlink address:%#x\n",
+                cb.ok, cb.c, CB_CMD_NAME(cb.cmd), cb.cmd,
+                cb.i, cb.s, cb.el, cb.link_addr);
+
+        if ( cb.i )
+            e100_interrupt(s, (uint16_t)INT_CX_TNO);
+
+        // Suspend CU
+        if ( cb.s )
+        {
+            logout("CU go to suspend\n");
+            SET_CU_STATE(CU_SUSPENDED);
+            s->cu_next = cb.link_addr; // Save it for go on executing when 
resume
+
+            // Trigger CNA interrupt only when CNA mode is configured
+            if ( !(s->config.ci_intr) && cb.i )
+                e100_interrupt(s, (uint16_t)INT_CNA);
+
+            return;
+        }
+
+        // This is last command in CB list, CU go back to IDLE
+        if ( cb.el )
+        {
+            logout("Command block list is empty, CU go to idle\n");
+            SET_CU_STATE(CU_IDLE);
+            /* Either in CNA mode or CI mode, interrupt need be triggered
+             * when CU go to idle.
+             */
+            if ( cb.i )
+                e100_interrupt(s, (uint16_t)INT_CNA);
+
+            return;
+        }
+
+        s->cu_offset = le32_to_cpu(cb.link_addr); // get next CB offset
+    }
+}
+
+static void dump_statistics(E100State * s, uint32_t complete_word)
+{
+    /* Dump statistical data. Most data is never changed by the emulation
+     * and always 0.
+     */
+    s->statistics.complete_word = complete_word;
+    cpu_physical_memory_write(s->statsaddr, (uint8_t *)&s->statistics, 
sizeof(s->statistics));
+
+}
+
+static void e100_cu_command(E100State *s, uint8_t val)
+{
+
+    switch ( val )
+    {
+        case CU_NOP:
+            /* Will not be here */
+            break;
+        case CU_START:
+            /* This strictly follow Intel's spec */
+            if ( GET_CU_STATE != CU_IDLE && GET_CU_STATE != CU_SUSPENDED )
+            {
+                logout("Illegal CU start command. Device is not idle or 
suspend\n");
+                return;
+            }
+
+            SET_CU_STATE(CU_LPQ_ACTIVE);
+            logout("CU start\n");
+
+            e100_execute_cb_list(s, 0);
+            break;
+        case CU_RESUME:
+            {
+                uint32_t previous_cb = s->cu_base + s->cu_offset;
+                struct control_block cb;
+
+                /* Resume from suspend */
+
+                /* FIXME:From Intel's spec, CU resume from idle is
+                 * forbidden, but e100 drive in linux
+                 * indeed do this.
+                 */
+                if ( GET_CU_STATE == CU_IDLE )
+                {
+                    logout("Illegal resume form IDLE\n");
+                }
+
+                cpu_physical_memory_read(previous_cb, (uint8_t *)&cb,
+                                        sizeof(cb));
+
+                //FIXME: Need any speical handle when CU is active ?
+
+                /* Driver must clean S bit in previous CB when
+                 * it issue CU resume command
+                 */
+                if ( cb.s )
+                {
+                    logout("CU still in suspend\n");
+                    break;
+                }
+
+                SET_CU_STATE(CU_LPQ_ACTIVE);
+                if ( cb.el )
+                {
+                    logout("CB list is empty, CU just go to active\n");
+                    break;
+                }
+
+                // Continue next command
+                s->cu_offset = s->cu_next;
+
+                e100_execute_cb_list(s, 1);
+
+                logout("CU resume\n");
+            }
+            break;
+        case CU_STATSADDR:
+            /* Load dump counters address */
+            s->statsaddr = CSR_VAL(CSR_POINTER);
+            logout("Load Stats address at %#x\n", s->statsaddr);
+            break;
+        case CU_SHOWSTATS:
+            /* Dump statistical counters */
+            dump_statistics(s, 0xa005);
+            logout("Execute dump statistics\n");
+            break;
+        case CU_CMD_BASE:
+            /* Load CU base */
+            s->cu_base = CSR_VAL(CSR_POINTER);
+            logout("Load CU base at %x\n", s->cu_base);
+            break;
+        case CU_DUMPSTATS:
+            /* Dump statistical counters and reset counters. */
+            dump_statistics(s, 0xa007);
+            memset(&s->statistics, 0x0, sizeof(s->statistics));
+            logout("Execute dump and reset statistics\n");
+            break;
+        case CU_S_RESUME:
+            /* CU static resume */
+            logout("CU static resume is not implemented\n");
+            break;
+        default:
+            logout("Unknown CU command(val=%#x)\n", val);
+            break;
+    }
+
+}
+
+static void scb_cmd_func(E100State *s, uint16_t val, int dir)
+{
+    /* ignore NOP operation */
+    if ( val & 0x0f )
+    {
+        e100_ru_command(s, val & 0x0f);
+        CSR(CSR_CMD, ru_cmd) = 0;
+    }
+    else if ( val & 0xf0 )
+    {
+        e100_cu_command(s, val & 0xf0);
+        CSR(CSR_CMD, cu_cmd) = 0;
+    }
+
+}
+
+enum
+{
+    WRITEB,
+    WRITEW,
+    WRITEL,
+    OP_IS_READ,
+} WRITE_BYTES;
+
+/* Driver may issue a command by writting one 32bit-entry,
+ * two 16bit-entries or four 8bit-entries. In late two case, we
+ * must wait until driver finish writting to the highest byte. The parameter
+ * 'bytes' means write action of driver(writeb, wirtew, wirtel)
+ */
+static void e100_execute(E100State *s, uint32_t addr_offset,
+        uint32_t val, int dir, int bytes)
+{
+
+    switch ( addr_offset )
+    {
+        case SCB_STATUS:
+            if ( bytes == WRITEB )
+                break;
+        case SCB_ACK:
+            if ( dir == OP_WRITE )
+            {
+                uint8_t _val = 0;
+                if ( bytes == WRITEB )
+                    _val = (uint8_t)val;
+                else if ( bytes == WRITEW )
+                    _val = ((uint16_t)val) >> 8;
+                else if ( bytes == WRITEL)
+                {
+                    // This should not be happen
+                    _val = ((uint16_t)val) >> 8;
+                    logout("WARNNING: Drvier write 4 bytes to CSR register at 
offset %d,"
+                           "emulator may do things wrong!!!\n", addr_offset);
+                }
+
+                e100_interrupt_ack(s, _val);
+            }
+            break;
+        case SCB_CMD:
+            if ( dir == OP_WRITE )
+                scb_cmd_func(s, val, dir);
+
+/* I don't know whether there is any driver writes command words and
+ * interrupt mask at same time by two bytes. This is not a regular operation.
+ * but if we meet the case, below codes could copy with it. As far
+ * as I know. windows's and linux's driver don't do this thing.
+ */
+#if 0
+            if ( bytes == WRITEW && (val&0xff00) != 0 )
+                ;
+            else
+                break;
+#endif
+            break;
+        case SCB_INTERRUPT_MASK:
+            if ( dir == OP_WRITE )
+            {
+                uint8_t _val = 0;
+                if ( bytes == WRITEB )
+                    _val = (uint8_t)val;
+                else if ( bytes == WRITEW )
+                    _val = (val & 0xff00) >> 8;
+                else
+                    logout("WARNNING: Drvier write 4 bytes to CSR register at 
offset %d,"
+                           "emulator may do things wrong!!!\n", addr_offset);
+
+                // Driver generates a software interrupt
+                if ( _val & BIT(1) )
+                    e100_interrupt(s, INT_SWI);
+            }
+            break;
+        case SCB_PORT ... SCB_PORT + 3:
+            if ( dir == OP_WRITE )
+            {
+                // Waitting for driver write to the highest byte
+                if ( (bytes == WRITEB && addr_offset != SCB_PORT + 3) ||
+                     (bytes == WRITEW && addr_offset != SCB_PORT + 2) )
+                    break;
+
+                scb_port_func(s, CSR_VAL(CSR_PORT), dir);
+            }
+            break;
+        case SCB_MDI ... SCB_MDI + 3:
+            if ( dir == OP_WRITE )
+            {
+                // Waitting for driver write to the highest byte
+                if ( (bytes == WRITEB && addr_offset != SCB_MDI + 3) ||
+                     (bytes == WRITEW && addr_offset != SCB_MDI + 2) )
+                    break;
+            }
+
+            scb_mdi_func(s, CSR_VAL(CSR_MDI), dir);
+            break;
+        case SCB_EEPROM:
+            if ( dir == OP_WRITE )
+                scb_eeprom_func(s, val, dir);
+            // Nothing need do when driver read EEPROM registers of CSR
+            break;
+        case SCB_POINTER:
+            break;
+        default:
+            logout("Driver operate on CSR reg(offset=%#x,dir=%s,val=%#x)\n",
+                    addr_offset, dir==OP_WRITE?"write":"read", val);
+    }
+
+}
+
+/* MMIO access functions */
+static uint8_t e100_read1(E100State * s, uint32_t addr_offset)
+{
+    uint8_t val = -1;
+
+    if ( addr_offset + sizeof(val) >= sizeof(s->pci_mem.mem) )
+    {
+        logout("Invaild read, beyond memory boundary(addr:%#x)\n", addr_offset
+                + s->region_base_addr[CSR_MEMORY_BASE]);
+        return val;
+    }
+
+
+    e100_execute(s, addr_offset, val, OP_READ, OP_IS_READ);
+    val = CSR_READ(addr_offset, uint8_t);
+    logout("READ1: Register name = %s, addr_offset = %#x, val=%#x\n", 
SCBNAME(addr_offset), addr_offset, val);
+
+    return val;
+}
+
+static uint16_t e100_read2(E100State * s, uint32_t addr_offset)
+{
+    uint16_t val = -1;
+
+    if ( addr_offset + sizeof(val) >= sizeof(s->pci_mem.mem) )
+    {
+        logout("Invaild read, beyond memory boundary(addr:%#x)\n", addr_offset 
+                + s->region_base_addr[CSR_MEMORY_BASE]);
+        return val;
+    }
+
+    e100_execute(s, addr_offset, val, OP_READ, OP_IS_READ);
+    val = CSR_READ(addr_offset, uint16_t);
+    logout("READ2: Register name = %s, addr_offset = %#x, val=%#x\n", 
SCBNAME(addr_offset), addr_offset, val);
+
+    return val;
+
+}
+
+static uint32_t e100_read4(E100State * s, uint32_t addr_offset)
+{
+    uint32_t val = -1;
+
+    if ( addr_offset + sizeof(val) >= sizeof(s->pci_mem.mem) )
+    {
+        logout("Invaild read, beyond memory boundary(addr:%#x)\n", addr_offset 
+                + s->region_base_addr[CSR_MEMORY_BASE]);
+        return val;
+    }
+
+    e100_execute(s, addr_offset, val, OP_READ, OP_IS_READ);
+    val = CSR_READ(addr_offset, uint32_t);
+    logout("READ4: Register name = %s, addr_offset = %#x, val=%#x\n", 
SCBNAME(addr_offset), addr_offset, val);
+
+    return val;
+
+}
+
+static uint32_t pci_mmio_readb(void *opaque, target_phys_addr_t addr)
+{
+    E100State *s = opaque;
+    addr -= s->region_base_addr[CSR_MEMORY_BASE];
+    return e100_read1(s, addr);
+}
+
+static uint32_t pci_mmio_readw(void *opaque, target_phys_addr_t addr)
+{
+    E100State *s = opaque;
+    addr -= s->region_base_addr[CSR_MEMORY_BASE];
+    return e100_read2(s, addr);
+}
+
+static uint32_t pci_mmio_readl(void *opaque, target_phys_addr_t addr)
+{
+    E100State *s = opaque;
+    addr -= s->region_base_addr[CSR_MEMORY_BASE];
+    return e100_read4(s, addr);
+}
+
+static CPUReadMemoryFunc *pci_mmio_read[] = {
+    pci_mmio_readb,
+    pci_mmio_readw,
+    pci_mmio_readl
+};
+
+static void e100_write1(E100State * s, uint32_t addr_offset, uint8_t val)
+{
+    if ( addr_offset + sizeof(val) >= sizeof(s->pci_mem.mem) )
+    {
+        logout("Invaild write, beyond memory boundary(addr = %#x, val = 
%#x\n", addr_offset
+                + s->region_base_addr[CSR_MEMORY_BASE], val);
+        return;
+    }
+
+    // SCB stauts is read-only word, can not be directly write
+    if ( addr_offset == SCB_STATUS )
+    {
+        return;
+    }
+    // EEDO bit of eeprom register is read-only, can not be written;
+    else if ( addr_offset == SCB_EEPROM )
+    {
+        int eedo = BIT(3) & CSR_VAL(CSR_EEPROM);
+        CSR_WRITE(addr_offset, val, uint8_t);
+        CSR(CSR_EEPROM, eedo) = !!(eedo & EEPROM_DO);
+
+        logout("WRITE1: Register name = %s, addr_offset = %#x, val = %#x\n", 
SCBNAME(addr_offset),addr_offset, (uint8_t)CSR_VAL(CSR_EEPROM));
+        return;
+    }
+    else
+    {
+        CSR_WRITE(addr_offset, val, uint8_t);
+    }
+
+    logout("WRITE1: Register name = %s, addr_offset = %#x, val = %#x\n", 
SCBNAME(addr_offset),addr_offset, val);
+    return;
+}
+
+static void e100_write2(E100State * s, uint32_t addr_offset, uint16_t val)
+{
+    if ( addr_offset + sizeof(val) >= sizeof(s->pci_mem.mem) )
+    {
+        logout("Invaild write, beyond memory boundary(addr = %#x, val = 
%#x\n", addr_offset
+                + s->region_base_addr[CSR_MEMORY_BASE], val);
+        return;
+    }
+
+    // SCB stauts is readonly word, can not be directly write
+    if ( addr_offset == SCB_STATUS )
+    {
+        uint8_t __val = val >> 8;
+        CSR_WRITE(addr_offset+1, __val, uint8_t);
+    }
+    // EEDO bit of eeprom register is read-only, can not be written;
+    else if ( addr_offset == SCB_EEPROM )
+    {
+        int eedo = BIT(3) & CSR_VAL(CSR_EEPROM);
+        CSR_WRITE(addr_offset, val, uint16_t);
+        CSR(CSR_EEPROM, eedo) = !!(eedo & EEPROM_DO);
+
+        logout("WRITE1: Register name = %s, addr_offset = %#x, val = %#x\n", 
SCBNAME(addr_offset),addr_offset, CSR_VAL(CSR_EEPROM));
+        return;
+    }
+    else
+    {
+        CSR_WRITE(addr_offset, val, uint16_t);
+    }
+
+    logout("WRITE2: Register name = %s, addr_offset = %#x, val = %#x\n", 
SCBNAME(addr_offset),addr_offset, val);
+    return;
+}
+
+static void e100_write4(E100State * s, uint32_t addr_offset, uint32_t val)
+{
+    if ( addr_offset + sizeof(val) >= sizeof(s->pci_mem.mem) )
+    {
+        logout("Invaild write, beyond memory boundary(addr = %#x, val = 
%#x\n", addr_offset 
+                + s->region_base_addr[CSR_MEMORY_BASE], val);
+        return;
+    }
+
+    // SCB stauts is readonly word, can not be directly write
+    if ( addr_offset == SCB_STATUS )
+    {
+        uint8_t __val[4] = {0};
+
+        //FIXME: any un-aligned reference ?
+        *(uint32_t *)&__val = val;
+
+        CSR_WRITE(addr_offset+1, __val[1], uint8_t);
+        CSR_WRITE(addr_offset+2, __val[2], uint8_t);
+        CSR_WRITE(addr_offset+3, __val[3], uint8_t);
+    }
+    /* No write4 opertaion on EEPROM register */
+    else
+    {
+        CSR_WRITE(addr_offset, val, uint32_t);
+    }
+
+    logout("WRITE4: Register name = %s, addr_offset = %#x, val = %#x\n", 
SCBNAME(addr_offset),addr_offset, val);
+    return;
+}
+
+static void pci_mmio_writeb(void *opaque, target_phys_addr_t addr, uint32_t 
val)
+{
+    E100State *s = opaque;
+    addr -= s->region_base_addr[CSR_MEMORY_BASE];
+    e100_write1(s, addr, val);
+    e100_execute(s, addr, val, OP_WRITE, WRITEB);
+}
+
+static void pci_mmio_writew(void *opaque, target_phys_addr_t addr, uint32_t 
val)
+{
+    E100State *s = opaque;
+    addr -= s->region_base_addr[CSR_MEMORY_BASE];
+    e100_write2(s, addr, val);
+    e100_execute(s, addr, val, OP_WRITE, WRITEW);
+}
+
+static void pci_mmio_writel(void *opaque, target_phys_addr_t addr, uint32_t 
val)
+{
+    E100State *s = opaque;
+    addr -= s->region_base_addr[CSR_MEMORY_BASE];
+    e100_write4(s, addr, val);
+    (void)e100_execute(s, addr, val, OP_WRITE, WRITEL);
+}
+
+static CPUWriteMemoryFunc *pci_mmio_write[] = {
+    pci_mmio_writeb,
+    pci_mmio_writew,
+    pci_mmio_writel
+};
+
+static void pci_mmio_map(PCIDevice * pci_dev, int region_num,
+                         uint32_t addr, uint32_t size, int type)
+{
+    PCIE100State *d = (PCIE100State *) pci_dev;
+
+    logout("region %d, addr=0x%08x, size=0x%08x, type=%d\n",
+           region_num, addr, size, type);
+
+    if ( region_num == CSR_MEMORY_BASE ) {
+        /* Map control / status registers. */
+        cpu_register_physical_memory(addr, size, d->e100.mmio_index);
+        d->e100.region_base_addr[region_num] = addr;
+    }
+}
+
+/* IO access functions */
+static void ioport_write1(void *opaque, uint32_t addr, uint32_t val)
+{
+    E100State *s = opaque;
+    addr -= s->region_base_addr[CSR_IO_BASE];
+    e100_write1(s, addr, val);
+    (void)e100_execute(s, addr, (uint32_t)val, OP_WRITE, WRITEB);
+}
+
+static void ioport_write2(void *opaque, uint32_t addr, uint32_t val)
+{
+    E100State *s = opaque;
+    addr -= s->region_base_addr[CSR_IO_BASE];
+    e100_write2(s, addr, val);
+    (void)e100_execute(s, addr, (uint32_t)val, OP_WRITE, WRITEW);
+}
+
+static void ioport_write4(void *opaque, uint32_t addr, uint32_t val)
+{
+    E100State *s = opaque;
+    addr -= s->region_base_addr[CSR_IO_BASE];
+    e100_write4(s, addr, val);
+    (void)e100_execute(s, addr, (uint32_t)val, OP_WRITE, WRITEL);
+}
+
+static uint32_t ioport_read1(void *opaque, uint32_t addr)
+{
+    E100State *s = opaque;
+    addr -= s->region_base_addr[CSR_IO_BASE];
+    return e100_read1(s, addr);
+}
+
+static uint32_t ioport_read2(void *opaque, uint32_t addr)
+{
+    E100State *s = opaque;
+    addr -= s->region_base_addr[CSR_IO_BASE];
+    return e100_read2(s, addr);
+}
+
+static uint32_t ioport_read4(void *opaque, uint32_t addr)
+{
+    E100State *s = opaque;
+    addr -= s->region_base_addr[CSR_IO_BASE];
+    return e100_read4(s, addr);
+}
+
+static void pci_ioport_map(PCIDevice * pci_dev, int region_num,
+                    uint32_t addr, uint32_t size, int type)
+{
+    PCIE100State *d = (PCIE100State *) pci_dev;
+    E100State *s = &d->e100;
+
+    logout("region %d, addr=0x%08x, size=0x%08x, type=%d\n",
+           region_num, addr, size, type);
+
+    if ( region_num != 1 )
+    {
+        logout("Invaid region number!\n");
+        return;
+    }
+
+    register_ioport_write(addr, size, 1, ioport_write1, s);
+    register_ioport_read(addr, size, 1, ioport_read1, s);
+    register_ioport_write(addr, size, 2, ioport_write2, s);
+    register_ioport_read(addr, size, 2, ioport_read2, s);
+    register_ioport_write(addr, size, 4, ioport_write4, s);
+    register_ioport_read(addr, size, 4, ioport_read4, s);
+
+    s->region_base_addr[region_num] = addr;
+}
+
+/* From FreeBSD */
+#define POLYNOMIAL 0x04c11db6
+static int compute_mcast_idx(const uint8_t *ep)
+{
+    uint32_t crc;
+    int carry, i, j;
+    uint8_t b;
+
+    crc = 0xffffffff;
+    for (i = 0; i < 6; i++) {
+        b = *ep++;
+        for (j = 0; j < 8; j++) {
+            carry = ((crc & 0x80000000L) ? 1 : 0) ^ (b & 0x01);
+            crc <<= 1;
+            b >>= 1;
+            if (carry)
+                crc = ((crc ^ POLYNOMIAL) | carry);
+        }
+    }
+    return (crc >> 26);
+}
+
+/* Eerpro100 receive functions */
+static int e100_can_receive(void *opaque)
+{
+    E100State *s = opaque;
+
+    int is_ready = (GET_RU_STATE == RU_READY);
+    logout("%s\n", is_ready ? "EEPro100 receiver is ready"
+            : "EEPro100 receiver is not ready");
+    return is_ready;
+}
+
+static void e100_receive(void *opaque, const uint8_t * buf, int size)
+{
+    E100State *s = opaque;
+    uint32_t rfd_addr = 0;
+    rfd_t rfd = {0};
+
+
+    if ( GET_RU_STATE != RU_READY )
+    {
+        //logout("RU is not ready. Begin discarding frame(state=%x)\n", 
GET_RU_STATE);
+        return;
+    }
+
+    rfd_addr = s->ru_base + s->ru_offset;
+    cpu_physical_memory_read(rfd_addr, (uint8_t *)&rfd, sizeof(rfd_t));
+
+    if ( (size > MAX_ETH_FRAME_SIZE+4) )
+    {
+        /* Long frame and configuration byte 18/3 (long receive ok) not set:
+         * Long frames are discarded. */
+        logout("Discard long frame(size=%d)\n", size);
+
+        return;
+    }
+    else if ( !memcmp(buf, s->macaddr, sizeof(s->macaddr)) )
+    {
+        /* The frame is for me */
+        logout("Receive a frame for me(size=%d)\n", size);
+        e100_dump("FRAME:", (uint8_t *)buf, size);
+    }
+    else if ( !memcmp(buf, broadcast_macaddr, sizeof(broadcast_macaddr)) )
+    {
+        if ( s->config.broadcast_dis && !s->config.promiscuous )
+        {
+            logout("Discard a broadcast frame\n");
+            return;
+        }
+
+        /* Broadcast frame */
+        rfd.status |= RX_IA_MATCH;
+        logout("Receive a broadcast frame(size=%d)\n", size);
+    }
+    else if ( s->is_multcast_enable && buf[0] & 0x1 )
+    {
+        int mcast_idx = compute_mcast_idx(buf);
+        if ( !(s->mult_list[mcast_idx >> 3] & (1 << (mcast_idx & 7))) )
+        {
+            logout("Multicast address mismatch, discard\n");
+            return;
+        }
+        logout("Receive a multicast frame(size=%d)\n", size);
+    }
+    else if ( size < 64 && (s->config.dis_short_rx) )
+    {
+        /* From Intel's spec, short frame should be discarded
+         * when configuration byte 7/0 (discard short receive) set.
+         * But this will cause frame lossing such as ICMP frame, ARP frame.
+         * So we check is the frame for me before discarding short frame
+         */
+
+        /* Save Bad Frame bit */
+        if ( s->config.save_bad_frame )
+        {
+            rfd.status |= RX_SHORT;
+            s->statistics.rx_short_frame_errors ++;
+        }
+        logout("Receive a short frame(size=%d), discard it\n", size);
+        return;
+    }
+    else if ( s->config.promiscuous )
+    {
+        /* Promiscuous: receive all. No address match */
+        logout("Received frame in promiscuous mode(size=%d)\n", size);
+        rfd.status |= RX_NO_MATCH;
+    }
+    else
+    {
+        e100_dump("Unknown frame, MAC = ", (uint8_t *)buf, 6);
+        return;
+    }
+    e100_dump("Get frame, MAC = ", (uint8_t *)buf, 6);
+
+    rfd.c = 1;
+    rfd.ok = 1;
+    rfd.f = 1;
+    rfd.eof = 1;
+    rfd.status &= ~RX_COLLISION;
+    rfd.count = size;
+
+    logout("Get a RFD configure:\n"
+            "\tstatus:%#x\n"
+            "\tok:%#x\n" "\tc:%#x\n" "\tsf:%#x\n"
+            "\th:%#x\n" "\ts:%#x\n" "\tel:%#x\n"
+            "\tlink add:%#x\n" "\tactual count:%#x\n"
+            "\tf:%#x\n" "\teof:%#x\n" "\tsize:%#x\n",
+            rfd.status, rfd.ok, rfd.c, rfd.sf, rfd.h,
+            rfd.s, rfd.el, rfd.link_addr, rfd.count,
+            rfd.f, rfd.eof, rfd.size);
+
+    cpu_physical_memory_write(rfd_addr, (uint8_t *)&rfd, sizeof(rfd));
+    cpu_physical_memory_write(rfd_addr + sizeof(rfd_t), buf, size);
+    s->statistics.rx_good_frames ++;
+    s->ru_offset = le32_to_cpu(rfd.link_addr);
+
+    e100_interrupt(s, INT_FR);
+
+    if ( rfd.el || rfd.s )
+    {
+        /* Go to suspend */
+        SET_RU_STATE(RU_SUSPENDED);
+        e100_interrupt(s, INT_RNR);
+        logout("RFD met S or EL bit set, RU go to suspend\n");
+        return;
+    }
+
+    logout("Complete a frame receive(size = %d)\n", size);
+    return;
+}
+
+static void eeprom_init(E100State *s)
+{
+    int i;
+    int chksum = 0;
+    /* Add 64 * 2 EEPROM. i82557 and i82558 support a 64 word EEPROM,
+     * i82559 and later support 64 or 256 word EEPROM. */
+    eeprom_reset(s, EEPROM_RESET_ALL);
+    s->eeprom.addr_len = EEPROM_I82557_ADDRBIT;
+    memcpy(s->eeprom.contents, eeprom_i82557, sizeof(eeprom_i82557));
+    /* Dirver is going to get MAC from eeprom*/
+    memcpy((uint8_t *)s->eeprom.contents, s->macaddr, sizeof(s->macaddr));
+
+    /* The last word in eeprom saving checksum value.
+     * After we update MAC in eeprom, the checksum need be re-calculate
+     * and saved at the end of eeprom
+     */
+    for ( i=0; i<(1<<s->eeprom.addr_len)-1; i++ )
+        chksum += s->eeprom.contents[i];
+    s->eeprom.contents[i] = 0xBABA - chksum;
+
+}
+
+static void e100_init(PCIBus * bus, NICInfo * nd,
+        const char *name, uint32_t device)
+{
+    PCIE100State *d;
+    E100State *s;
+
+    logout("\n");
+
+    d = (PCIE100State *) pci_register_device(bus, name,
+            sizeof(PCIE100State), -1,
+            NULL, NULL);
+
+    s = &d->e100;
+    s->device = device;
+    s->pci_dev = &d->dev;
+
+    pci_reset(s);
+
+
+    /* Handler for memory-mapped I/O */
+    d->e100.mmio_index =
+        cpu_register_io_memory(0, pci_mmio_read, pci_mmio_write, s);
+
+    //CSR Memory mapped base
+    pci_register_io_region(&d->dev, 0, PCI_MEM_SIZE,
+            PCI_ADDRESS_SPACE_MEM | PCI_ADDRESS_SPACE_MEM_PREFETCH,
+            pci_mmio_map);
+    //CSR I/O mapped base
+    pci_register_io_region(&d->dev, 1, PCI_IO_SIZE, PCI_ADDRESS_SPACE_IO,
+            pci_ioport_map);
+    //Flash memory mapped base
+    pci_register_io_region(&d->dev, 2, PCI_FLASH_SIZE, PCI_ADDRESS_SPACE_MEM,
+            pci_mmio_map);
+
+    memcpy(s->macaddr, nd->macaddr, 6);
+    e100_dump("MAC ADDR", (uint8_t *)&s->macaddr[0], 6);
+
+    eeprom_init(s);
+
+    e100_reset(s);
+
+    s->vc = qemu_new_vlan_client(nd->vlan, e100_receive, e100_can_receive, s);
+
+    snprintf(s->vc->info_str, sizeof(s->vc->info_str),
+            "e100 pci macaddr=%02x:%02x:%02x:%02x:%02x:%02x",
+            s->macaddr[0],
+            s->macaddr[1],
+            s->macaddr[2], s->macaddr[3], s->macaddr[4], s->macaddr[5]);
+
+    qemu_register_reset(e100_reset, s);
+
+    register_savevm(name, 0, 3, e100_save, e100_load, s);
+}
+
+void pci_e100_init(PCIBus * bus, NICInfo * nd)
+{
+    e100_init(bus, nd, "e100", i82557C);
+}
+
diff -r 9a9ddc04eea2 -r 53dc1cf50506 tools/ioemu/hw/pci.c
--- a/tools/ioemu/hw/pci.c      Tue Nov 20 11:53:44 2007 -0700
+++ b/tools/ioemu/hw/pci.c      Wed Nov 21 09:12:06 2007 -0700
@@ -565,6 +565,8 @@ void pci_nic_init(PCIBus *bus, NICInfo *
         pci_rtl8139_init(bus, nd, devfn);
     } else if (strcmp(nd->model, "pcnet") == 0) {
         pci_pcnet_init(bus, nd, devfn);
+    } else if (strcmp(nd->model, "e100") == 0) {
+        pci_e100_init(bus, nd);
     } else {
         fprintf(stderr, "qemu: Unsupported NIC: %s\n", nd->model);
         exit (1);
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/arch/x86/hvm/hpet.c
--- a/xen/arch/x86/hvm/hpet.c   Tue Nov 20 11:53:44 2007 -0700
+++ b/xen/arch/x86/hvm/hpet.c   Wed Nov 21 09:12:06 2007 -0700
@@ -127,9 +127,13 @@ static inline int hpet_check_access_leng
 {
     if ( (addr & (len - 1)) || (len > 8) )
     {
-        gdprintk(XENLOG_ERR, "HPET: access across register boundary: "
+        /*
+         * According to ICH9 specification, unaligned accesses may result
+         * in unexpected behaviour or master abort, but should not crash/hang.
+         * Hence we read all-ones, drop writes, and log a warning.
+         */
+        gdprintk(XENLOG_WARNING, "HPET: access across register boundary: "
                  "%lx %lx\n", addr, len);
-        domain_crash(current->domain);
         return -EINVAL;
     }
 
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c        Tue Nov 20 11:53:44 2007 -0700
+++ b/xen/arch/x86/irq.c        Wed Nov 21 09:12:06 2007 -0700
@@ -15,7 +15,6 @@
 #include <xen/keyhandler.h>
 #include <xen/compat.h>
 #include <asm/current.h>
-#include <asm/smpboot.h>
 #include <asm/iommu.h>
 
 /* opt_noirqbalance: If true, software IRQ balancing/affinity is disabled. */
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Tue Nov 20 11:53:44 2007 -0700
+++ b/xen/arch/x86/mm.c Wed Nov 21 09:12:06 2007 -0700
@@ -3007,7 +3007,8 @@ long set_gdt(struct vcpu *v,
         return -EINVAL;
 
     /* Check the pages in the new GDT. */
-    for ( i = 0; i < nr_pages; i++ ) {
+    for ( i = 0; i < nr_pages; i++ )
+    {
         mfn = frames[i] = gmfn_to_mfn(d, frames[i]);
         if ( !mfn_valid(mfn) ||
              !get_page_and_type(mfn_to_page(mfn), d, PGT_gdt_page) )
@@ -3073,23 +3074,15 @@ long do_update_descriptor(u64 pa, u64 de
 
     *(u64 *)&d = desc;
 
-    LOCK_BIGLOCK(dom);
-
     mfn = gmfn_to_mfn(dom, gmfn);
     if ( (((unsigned int)pa % sizeof(struct desc_struct)) != 0) ||
          !mfn_valid(mfn) ||
          !check_descriptor(dom, &d) )
-    {
-        UNLOCK_BIGLOCK(dom);
         return -EINVAL;
-    }
 
     page = mfn_to_page(mfn);
     if ( unlikely(!get_page(page, dom)) )
-    {
-        UNLOCK_BIGLOCK(dom);
         return -EINVAL;
-    }
 
     /* Check if the given frame is in use in an unsafe context. */
     switch ( page->u.inuse.type_info & PGT_type_mask )
@@ -3112,7 +3105,7 @@ long do_update_descriptor(u64 pa, u64 de
 
     /* All is good so make the update. */
     gdt_pent = map_domain_page(mfn);
-    memcpy(&gdt_pent[offset], &d, 8);
+    atomic_write64((uint64_t *)&gdt_pent[offset], *(uint64_t *)&d);
     unmap_domain_page(gdt_pent);
 
     put_page_type(page);
@@ -3121,8 +3114,6 @@ long do_update_descriptor(u64 pa, u64 de
 
  out:
     put_page(page);
-
-    UNLOCK_BIGLOCK(dom);
 
     return ret;
 }
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c    Tue Nov 20 11:53:44 2007 -0700
+++ b/xen/arch/x86/physdev.c    Wed Nov 21 09:12:06 2007 -0700
@@ -8,7 +8,6 @@
 #include <xen/event.h>
 #include <xen/guest_access.h>
 #include <asm/current.h>
-#include <asm/smpboot.h>
 #include <asm/hypercall.h>
 #include <public/xen.h>
 #include <public/physdev.h>
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/arch/x86/smp.c
--- a/xen/arch/x86/smp.c        Tue Nov 20 11:53:44 2007 -0700
+++ b/xen/arch/x86/smp.c        Wed Nov 21 09:12:06 2007 -0700
@@ -18,7 +18,6 @@
 #include <asm/smp.h>
 #include <asm/mc146818rtc.h>
 #include <asm/flushtlb.h>
-#include <asm/smpboot.h>
 #include <asm/hardirq.h>
 #include <asm/ipi.h>
 #include <asm/hvm/support.h>
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Tue Nov 20 11:53:44 2007 -0700
+++ b/xen/arch/x86/traps.c      Wed Nov 21 09:12:06 2007 -0700
@@ -2583,7 +2583,10 @@ void set_system_gate(unsigned int n, voi
 
 void set_task_gate(unsigned int n, unsigned int sel)
 {
+    idt_table[n].b = 0;
+    wmb(); /* disable gate /then/ rewrite */
     idt_table[n].a = sel << 16;
+    wmb(); /* rewrite /then/ enable gate */
     idt_table[n].b = 0x8500;
 }
 
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/arch/x86/x86_32/seg_fixup.c
--- a/xen/arch/x86/x86_32/seg_fixup.c   Tue Nov 20 11:53:44 2007 -0700
+++ b/xen/arch/x86/x86_32/seg_fixup.c   Wed Nov 21 09:12:06 2007 -0700
@@ -42,7 +42,7 @@
 #define O  OPCODE_BYTE
 #define M  HAS_MODRM
 
-static unsigned char insn_decode[256] = {
+static const unsigned char insn_decode[256] = {
     /* 0x00 - 0x0F */
     O|M, O|M, O|M, O|M, X, X, X, X,
     O|M, O|M, O|M, O|M, X, X, X, X,
@@ -69,7 +69,7 @@ static unsigned char insn_decode[256] = 
     X, X, X, X, X, X, X, X,
     /* 0x80 - 0x8F */
     O|M|1, O|M|4, O|M|1, O|M|1, O|M, O|M, O|M, O|M,
-    O|M, O|M, O|M, O|M, O|M, O|M, O|M, X,
+    O|M, O|M, O|M, O|M, O|M, X|M, O|M, O|M,
     /* 0x90 - 0x9F */
     X, X, X, X, X, X, X, X,
     X, X, X, X, X, X, X, X,
@@ -89,17 +89,17 @@ static unsigned char insn_decode[256] = 
     X, X, X, X, X, X, X, X,
     X, X, X, X, X, X, X, X,
     /* 0xF0 - 0xFF */
-    X, X, X, X, X, X, X, X,
+    X, X, X, X, X, X, O|M, O|M,
     X, X, X, X, X, X, O|M, O|M
 };
 
-static unsigned char twobyte_decode[256] = {
+static const unsigned char twobyte_decode[256] = {
     /* 0x00 - 0x0F */
     X, X, X, X, X, X, X, X,
     X, X, X, X, X, X, X, X,
     /* 0x10 - 0x1F */
     X, X, X, X, X, X, X, X,
-    X, X, X, X, X, X, X, X,
+    O|M, X, X, X, X, X, X, X,
     /* 0x20 - 0x2F */
     X, X, X, X, X, X, X, X,
     X, X, X, X, X, X, X, X,
@@ -122,16 +122,16 @@ static unsigned char twobyte_decode[256]
     X, X, X, X, X, X, X, X,
     X, X, X, X, X, X, X, X,
     /* 0x90 - 0x9F */
-    X, X, X, X, X, X, X, X,
-    X, X, X, X, X, X, X, X,
+    O|M, O|M, O|M, O|M, O|M, O|M, O|M, O|M,
+    O|M, O|M, O|M, O|M, O|M, O|M, O|M, O|M,
     /* 0xA0 - 0xAF */
-    X, X, X, X, X, X, X, X,
-    X, X, X, X, X, X, X, X,
+    X, X, X, O|M, O|M|1, O|M, O|M, X,
+    X, X, X, O|M, O|M|1, O|M, X, O|M,
     /* 0xB0 - 0xBF */
-    X, X, X, X, X, X, X, X,
-    X, X, X, X, X, X, X, X,
+    X, X, X, O|M, X, X, O|M, O|M,
+    X, X, O|M|1, O|M, O|M, O|M, O|M, O|M,
     /* 0xC0 - 0xCF */
-    X, X, X, X, X, X, X, X,
+    O|M, O|M, X, O|M, X, X, X, O|M,
     X, X, X, X, X, X, X, X,
     /* 0xD0 - 0xDF */
     X, X, X, X, X, X, X, X,
@@ -153,24 +153,24 @@ static unsigned char twobyte_decode[256]
  *  @base  (OUT): Decoded linear base address.
  *  @limit (OUT): Decoded segment limit, in bytes. 0 == unlimited (4GB).
  */
-int get_baselimit(u16 seg, unsigned long *base, unsigned long *limit)
+static int get_baselimit(u16 seg, unsigned long *base, unsigned long *limit)
 {
-    struct vcpu *d = current;
-    unsigned long *table, a, b;
-    int            ldt = !!(seg & 4);
-    int            idx = (seg >> 3) & 8191;
+    struct vcpu *curr = current;
+    uint32_t    *table, a, b;
+    int          ldt = !!(seg & 4);
+    int          idx = (seg >> 3) & 8191;
 
     /* Get base and check limit. */
     if ( ldt )
     {
-        table = (unsigned long *)LDT_VIRT_START(d);
-        if ( idx >= d->arch.guest_context.ldt_ents )
+        table = (uint32_t *)LDT_VIRT_START(curr);
+        if ( idx >= curr->arch.guest_context.ldt_ents )
             goto fail;
     }
     else /* gdt */
     {
-        table = (unsigned long *)GDT_VIRT_START(d);
-        if ( idx >= d->arch.guest_context.gdt_ents )
+        table = (uint32_t *)GDT_VIRT_START(curr);
+        if ( idx >= curr->arch.guest_context.gdt_ents )
             goto fail;
     }
 
@@ -204,7 +204,7 @@ int get_baselimit(u16 seg, unsigned long
 }
 
 /* Turn a segment+offset into a linear address. */
-int linearise_address(u16 seg, unsigned long off, unsigned long *linear)
+static int linearise_address(u16 seg, unsigned long off, unsigned long *linear)
 {
     unsigned long base, limit;
 
@@ -219,31 +219,31 @@ int linearise_address(u16 seg, unsigned 
     return 1;
 }
 
-int fixup_seg(u16 seg, unsigned long offset)
+static int fixup_seg(u16 seg, unsigned long offset)
 {
-    struct vcpu *d = current;
-    unsigned long *table, a, b, base, limit;
-    int            ldt = !!(seg & 4);
-    int            idx = (seg >> 3) & 8191;
+    struct vcpu *curr = current;
+    uint32_t    *table, a, b, base, limit;
+    int          ldt = !!(seg & 4);
+    int          idx = (seg >> 3) & 8191;
 
     /* Get base and check limit. */
     if ( ldt )
     {
-        table = (unsigned long *)LDT_VIRT_START(d);
-        if ( idx >= d->arch.guest_context.ldt_ents )
+        table = (uint32_t *)LDT_VIRT_START(curr);
+        if ( idx >= curr->arch.guest_context.ldt_ents )
         {
             dprintk(XENLOG_DEBUG, "Segment %04x out of LDT range (%ld)\n",
-                    seg, d->arch.guest_context.ldt_ents);
+                    seg, curr->arch.guest_context.ldt_ents);
             goto fail;
         }
     }
     else /* gdt */
     {
-        table = (unsigned long *)GDT_VIRT_START(d);
-        if ( idx >= d->arch.guest_context.gdt_ents )
+        table = (uint32_t *)GDT_VIRT_START(curr);
+        if ( idx >= curr->arch.guest_context.gdt_ents )
         {
             dprintk(XENLOG_DEBUG, "Segment %04x out of GDT range (%ld)\n",
-                    seg, d->arch.guest_context.gdt_ents);
+                    seg, curr->arch.guest_context.gdt_ents);
             goto fail;
         }
     }
@@ -261,7 +261,7 @@ int fixup_seg(u16 seg, unsigned long off
                _SEGMENT_G|_SEGMENT_CODE|_SEGMENT_DPL)) != 
          (_SEGMENT_P|_SEGMENT_S|_SEGMENT_DB|_SEGMENT_G|_SEGMENT_DPL) )
     {
-        dprintk(XENLOG_DEBUG, "Bad segment %08lx:%08lx\n", a, b);
+        dprintk(XENLOG_DEBUG, "Bad segment %08x:%08x\n", a, b);
         goto fail;
     }
 
@@ -291,8 +291,7 @@ int fixup_seg(u16 seg, unsigned long off
         }
     }
 
-    dprintk(XENLOG_DEBUG, "None of the above! "
-            "(%08lx:%08lx, %08lx, %08lx, %08lx)\n",
+    dprintk(XENLOG_DEBUG, "None of the above! (%08x:%08x, %08x, %08x, %08x)\n",
             a, b, base, limit, base+limit);
 
  fail:
@@ -303,9 +302,8 @@ int fixup_seg(u16 seg, unsigned long off
     a &= ~0x0ffff; a |= limit & 0x0ffff;
     b &= ~0xf0000; b |= limit & 0xf0000;
     b ^= _SEGMENT_EC; /* grows-up <-> grows-down */
-    /* NB. These can't fault. Checked readable above; must also be writable. */
-    table[2*idx+0] = a;
-    table[2*idx+1] = b;
+    /* NB. This can't fault. Checked readable above; must also be writable. */
+    atomic_write64((uint64_t *)&table[2*idx], ((uint64_t)b<<32) | a);
     return 1;
 }
 
@@ -315,18 +313,15 @@ int fixup_seg(u16 seg, unsigned long off
  */
 int gpf_emulate_4gb(struct cpu_user_regs *regs)
 {
-    struct vcpu *d = current;
-    struct trap_info   *ti;
-    struct trap_bounce *tb;
-    u8            modrm, mod, reg, rm, decode;
-    void         *memreg;
-    unsigned long offset;
-    u8            disp8;
-    u32           disp32 = 0;
+    struct vcpu   *curr = current;
+    u8             modrm, mod, rm, decode;
+    const u32     *base, *index = NULL;
+    unsigned long  offset;
+    s8             disp8;
+    s32            disp32 = 0;
     u8            *eip;         /* ptr to instruction start */
     u8            *pb, b;       /* ptr into instr. / current instr. byte */
-    int            gs_override = 0;
-    int            twobyte = 0;
+    int            gs_override = 0, scale = 0, twobyte = 0;
 
     /* WARNING: We only work for ring-3 segments. */
     if ( unlikely(vm86_mode(regs)) || unlikely(!ring_3(regs)) )
@@ -356,6 +351,9 @@ int gpf_emulate_4gb(struct cpu_user_regs
                     "legal instruction\n");
             goto fail;
         }
+
+        if ( twobyte )
+            break;
 
         switch ( b )
         {
@@ -375,6 +373,9 @@ int gpf_emulate_4gb(struct cpu_user_regs
         case 0x65: /* GS override */
             gs_override = 1;
             break;
+        case 0x0f: /* Not really a prefix byte */
+            twobyte = 1;
+            break;
         default: /* Not a prefix byte */
             goto done_prefix;
         }
@@ -387,32 +388,10 @@ int gpf_emulate_4gb(struct cpu_user_regs
         goto fail;
     }
 
-    decode = insn_decode[b]; /* opcode byte */
+    decode = (!twobyte ? insn_decode : twobyte_decode)[b];
     pb++;
-    if ( decode == 0 && b == 0x0f )
-    {
-        twobyte = 1;
-
-        if ( get_user(b, pb) )
-        {
-            dprintk(XENLOG_DEBUG,
-                    "Fault while accessing byte %ld of instruction\n",
-                    (long)(pb-eip));
-            goto page_fault;
-        }
-
-        if ( (pb - eip) >= 15 )
-        {
-            dprintk(XENLOG_DEBUG, "Too many opcode bytes for a "
-                    "legal instruction\n");
-            goto fail;
-        }
-
-        decode = twobyte_decode[b];
-        pb++;
-    }
-
-    if ( decode == 0 )
+
+    if ( !(decode & OPCODE_BYTE) )
     {
         dprintk(XENLOG_DEBUG, "Unsupported %sopcode %02x\n",
                 twobyte ? "two byte " : "", b);
@@ -422,12 +401,12 @@ int gpf_emulate_4gb(struct cpu_user_regs
     if ( !(decode & HAS_MODRM) )
     {
         /* Must be a <disp32>, or bail. */
-        if ( (decode & 7) != 4 )
+        if ( (decode & INSN_SUFFIX_BYTES) != 4 )
             goto fail;
 
         if ( get_user(offset, (u32 *)pb) )
         {
-            dprintk(XENLOG_DEBUG, "Fault while extracting <disp32>.\n");
+            dprintk(XENLOG_DEBUG, "Fault while extracting <moffs32>.\n");
             goto page_fault;
         }
         pb += 4;
@@ -448,29 +427,39 @@ int gpf_emulate_4gb(struct cpu_user_regs
     pb++;
 
     mod = (modrm >> 6) & 3;
-    reg = (modrm >> 3) & 7;
     rm  = (modrm >> 0) & 7;
 
     if ( rm == 4 )
     {
-        dprintk(XENLOG_DEBUG, "FIXME: Add decoding for the SIB byte.\n");
-        goto fixme;
+        u8 sib;
+
+        if ( get_user(sib, pb) )
+        {
+            dprintk(XENLOG_DEBUG, "Fault while extracting sib byte\n");
+            goto page_fault;
+        }
+
+        pb++;
+
+        rm = sib & 7;
+        if ( (sib & 0x38) != 0x20 )
+            index = decode_register((sib >> 3) & 7, regs, 0);
+        scale = sib >> 6;
     }
 
     /* Decode R/M field. */
-    memreg = decode_register(rm,  regs, 0);
+    base = decode_register(rm, regs, 0);
 
     /* Decode Mod field. */
-    switch ( modrm >> 6 )
+    switch ( mod )
     {
     case 0:
-        disp32 = 0;
         if ( rm == 5 ) /* disp32 rather than (EBP) */
         {
-            memreg = NULL;
+            base = NULL;
             if ( get_user(disp32, (u32 *)pb) )
             {
-                dprintk(XENLOG_DEBUG, "Fault while extracting <disp8>.\n");
+                dprintk(XENLOG_DEBUG, "Fault while extracting <base32>.\n");
                 goto page_fault;
             }
             pb += 4;
@@ -484,13 +473,13 @@ int gpf_emulate_4gb(struct cpu_user_regs
             goto page_fault;
         }
         pb++;
-        disp32 = (disp8 & 0x80) ? (disp8 | ~0xff) : disp8;;
+        disp32 = disp8;
         break;
 
     case 2:
         if ( get_user(disp32, (u32 *)pb) )
         {
-            dprintk(XENLOG_DEBUG, "Fault while extracting <disp8>.\n");
+            dprintk(XENLOG_DEBUG, "Fault while extracting <disp32>.\n");
             goto page_fault;
         }
         pb += 4;
@@ -502,8 +491,10 @@ int gpf_emulate_4gb(struct cpu_user_regs
     }
 
     offset = disp32;
-    if ( memreg != NULL )
-        offset += *(u32 *)memreg;
+    if ( base != NULL )
+        offset += *base;
+    if ( index != NULL )
+        offset += *index << scale;
 
  skip_modrm:
     if ( !fixup_seg((u16)regs->gs, offset) )
@@ -513,10 +504,11 @@ int gpf_emulate_4gb(struct cpu_user_regs
     perfc_incr(seg_fixups);
 
     /* If requested, give a callback on otherwise unused vector 15. */
-    if ( VM_ASSIST(d->domain, VMASST_TYPE_4gb_segments_notify) )
-    {
-        ti  = &d->arch.guest_context.trap_ctxt[15];
-        tb  = &d->arch.trap_bounce;
+    if ( VM_ASSIST(curr->domain, VMASST_TYPE_4gb_segments_notify) )
+    {
+        struct trap_info   *ti  = &curr->arch.guest_context.trap_ctxt[15];
+        struct trap_bounce *tb  = &curr->arch.trap_bounce;
+
         tb->flags      = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
         tb->error_code = pb - eip;
         tb->cs         = ti->cs;
@@ -527,13 +519,6 @@ int gpf_emulate_4gb(struct cpu_user_regs
 
     return EXCRET_fault_fixed;
 
- fixme:
-    dprintk(XENLOG_DEBUG, "Undecodable instruction "
-            "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x "
-            "caused GPF(0) at %04x:%08x\n",
-            eip[0], eip[1], eip[2], eip[3],
-            eip[4], eip[5], eip[6], eip[7],
-            regs->cs, regs->eip);
  fail:
     return 0;
 
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/include/asm-powerpc/smpboot.h
--- a/xen/include/asm-powerpc/smpboot.h Tue Nov 20 11:53:44 2007 -0700
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- * Copyright (C) IBM Corp. 2005
- *
- * Authors: Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
- */
-
-#include "../asm-x86/smpboot.h"
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/include/asm-x86/desc.h
--- a/xen/include/asm-x86/desc.h        Tue Nov 20 11:53:44 2007 -0700
+++ b/xen/include/asm-x86/desc.h        Wed Nov 21 09:12:06 2007 -0700
@@ -143,6 +143,11 @@ typedef struct {
 
 #define _set_gate(gate_addr,type,dpl,addr)               \
 do {                                                     \
+    (gate_addr)->a = 0;                                  \
+    wmb(); /* disable gate /then/ rewrite */             \
+    (gate_addr)->b =                                     \
+        ((unsigned long)(addr) >> 32);                   \
+    wmb(); /* rewrite /then/ enable gate */              \
     (gate_addr)->a =                                     \
         (((unsigned long)(addr) & 0xFFFF0000UL) << 32) | \
         ((unsigned long)(dpl) << 45) |                   \
@@ -150,49 +155,53 @@ do {                                    
         ((unsigned long)(addr) & 0xFFFFUL) |             \
         ((unsigned long)__HYPERVISOR_CS64 << 16) |       \
         (1UL << 47);                                     \
-    (gate_addr)->b =                                     \
-        ((unsigned long)(addr) >> 32);                   \
 } while (0)
 
 #define _set_tssldt_desc(desc,addr,limit,type)           \
 do {                                                     \
+    (desc)[0].b = (desc)[1].b = 0;                       \
+    wmb(); /* disable entry /then/ rewrite */            \
     (desc)[0].a =                                        \
         ((u32)(addr) << 16) | ((u32)(limit) & 0xFFFF);   \
+    (desc)[1].a = (u32)(((unsigned long)(addr)) >> 32);  \
+    wmb(); /* rewrite /then/ enable entry */             \
     (desc)[0].b =                                        \
         ((u32)(addr) & 0xFF000000U) |                    \
         ((u32)(type) << 8) | 0x8000U |                   \
         (((u32)(addr) & 0x00FF0000U) >> 16);             \
-    (desc)[1].a = (u32)(((unsigned long)(addr)) >> 32);  \
-    (desc)[1].b = 0;                                     \
 } while (0)
 
 #elif defined(__i386__)
 
 typedef struct desc_struct idt_entry_t;
 
-#define _set_gate(gate_addr,type,dpl,addr) \
-do { \
-  int __d0, __d1; \
-  __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
- "movw %4,%%dx\n\t" \
- "movl %%eax,%0\n\t" \
- "movl %%edx,%1" \
- :"=m" (*((long *) (gate_addr))), \
-  "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
- :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
-  "3" ((char *) (addr)),"2" (__HYPERVISOR_CS << 16)); \
-} while (0)
-
-#define _set_tssldt_desc(n,addr,limit,type) \
-__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
- "movw %%ax,2(%2)\n\t" \
- "rorl $16,%%eax\n\t" \
- "movb %%al,4(%2)\n\t" \
- "movb %4,5(%2)\n\t" \
- "movb $0,6(%2)\n\t" \
- "movb %%ah,7(%2)\n\t" \
- "rorl $16,%%eax" \
- : "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type|0x80))
+#define _set_gate(gate_addr,type,dpl,addr)               \
+do {                                                     \
+    (gate_addr)->b = 0;                                  \
+    wmb(); /* disable gate /then/ rewrite */             \
+    (gate_addr)->a =                                     \
+        ((unsigned long)(addr) & 0xFFFFUL) |             \
+        ((unsigned long)__HYPERVISOR_CS << 16);          \
+    wmb(); /* rewrite /then/ enable gate */              \
+    (gate_addr)->b =                                     \
+        ((unsigned long)(addr) & 0xFFFF0000UL) |         \
+        ((unsigned long)(dpl) << 13) |                   \
+        ((unsigned long)(type) << 8) |                   \
+        (1UL << 15);                                     \
+} while (0)
+
+#define _set_tssldt_desc(desc,addr,limit,type)           \
+do {                                                     \
+    (desc)->b = 0;                                       \
+    wmb(); /* disable entry /then/ rewrite */            \
+    (desc)->a =                                          \
+        ((u32)(addr) << 16) | ((u32)(limit) & 0xFFFF);   \
+    wmb(); /* rewrite /then/ enable entry */             \
+    (desc)->b =                                          \
+        ((u32)(addr) & 0xFF000000U) |                    \
+        ((u32)(type) << 8) | 0x8000U |                   \
+        (((u32)(addr) & 0x00FF0000U) >> 16);             \
+} while (0)
 
 #endif
 
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/include/asm-x86/smpboot.h
--- a/xen/include/asm-x86/smpboot.h     Tue Nov 20 11:53:44 2007 -0700
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,16 +0,0 @@
-#ifndef __ASM_SMPBOOT_H
-#define __ASM_SMPBOOT_H
-
-static inline unsigned long apicid_to_phys_cpu_present(int apicid)
-{
-       return 1UL << apicid;
-}
-
-extern volatile int logical_apicid_2_cpu[];
-extern volatile int cpu_2_logical_apicid[];
-extern volatile int physical_apicid_2_cpu[];
-extern volatile int cpu_2_physical_apicid[];
-
-#define boot_apicid_to_cpu(apicid) physical_apicid_2_cpu[apicid]
-
-#endif
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/include/asm-x86/system.h
--- a/xen/include/asm-x86/system.h      Tue Nov 20 11:53:44 2007 -0700
+++ b/xen/include/asm-x86/system.h      Wed Nov 21 09:12:06 2007 -0700
@@ -5,69 +5,78 @@
 #include <xen/types.h>
 #include <asm/bitops.h>
 
-#define read_segment_register(name)                                     \
-({  u16 __sel;                                                          \
-    __asm__ __volatile__ ( "movw %%" STR(name) ",%0" : "=r" (__sel) );  \
-    __sel;                                                              \
+#define read_segment_register(name)                             \
+({  u16 __sel;                                                  \
+    asm volatile ( "movw %%" STR(name) ",%0" : "=r" (__sel) );  \
+    __sel;                                                      \
 })
 
 #define wbinvd() \
-       __asm__ __volatile__ ("wbinvd": : :"memory");
+    asm volatile ( "wbinvd" : : : "memory" )
 
 #define clflush(a) \
-       __asm__ __volatile__ ("clflush (%0)": :"r"(a));
+    asm volatile ( "clflush (%0)" : : "r"(a) )
 
-#define nop() __asm__ __volatile__ ("nop")
+#define nop() \
+    asm volatile ( "nop" )
 
-#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned 
long)(v),(ptr),sizeof(*(ptr))))
+#define xchg(ptr,v) \
+    ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
 
 struct __xchg_dummy { unsigned long a[100]; };
 #define __xg(x) ((volatile struct __xchg_dummy *)(x))
 
+#if defined(__i386__)
+# include <asm/x86_32/system.h>
+#elif defined(__x86_64__)
+# include <asm/x86_64/system.h>
+#endif
 
 /*
  * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
  * Note 2: xchg has side effect, so that attribute volatile is necessary,
  *   but generally the primitive is invalid, *ptr is output argument. --ANK
  */
-static always_inline unsigned long __xchg(unsigned long x, volatile void * 
ptr, int size)
+static always_inline unsigned long __xchg(
+    unsigned long x, volatile void *ptr, int size)
 {
-       switch (size) {
-               case 1:
-                       __asm__ __volatile__("xchgb %b0,%1"
-                               :"=q" (x)
-                               :"m" (*__xg((volatile void *)ptr)), "0" (x)
-                               :"memory");
-                       break;
-               case 2:
-                       __asm__ __volatile__("xchgw %w0,%1"
-                               :"=r" (x)
-                               :"m" (*__xg((volatile void *)ptr)), "0" (x)
-                               :"memory");
-                       break;
+    switch ( size )
+    {
+    case 1:
+        asm volatile ( "xchgb %b0,%1"
+                       : "=q" (x)
+                       : "m" (*__xg((volatile void *)ptr)), "0" (x)
+                       : "memory" );
+        break;
+    case 2:
+        asm volatile ( "xchgw %w0,%1"
+                       : "=r" (x)
+                       : "m" (*__xg((volatile void *)ptr)), "0" (x)
+                       : "memory" );
+        break;
 #if defined(__i386__)
-               case 4:
-                       __asm__ __volatile__("xchgl %0,%1"
-                               :"=r" (x)
-                               :"m" (*__xg((volatile void *)ptr)), "0" (x)
-                               :"memory");
-                       break;
+    case 4:
+        asm volatile ( "xchgl %0,%1"
+                       : "=r" (x)
+                       : "m" (*__xg((volatile void *)ptr)), "0" (x)
+                       : "memory" );
+        break;
 #elif defined(__x86_64__)
-               case 4:
-                       __asm__ __volatile__("xchgl %k0,%1"
-                               :"=r" (x)
-                               :"m" (*__xg((volatile void *)ptr)), "0" (x)
-                               :"memory");
-                       break;
-               case 8:
-                       __asm__ __volatile__("xchgq %0,%1"
-                               :"=r" (x)
-                               :"m" (*__xg((volatile void *)ptr)), "0" (x)
-                               :"memory");
-                       break;
+    case 4:
+        asm volatile ( "xchgl %k0,%1"
+                       : "=r" (x)
+                       : "m" (*__xg((volatile void *)ptr)), "0" (x)
+                       : "memory" );
+        break;
+    case 8:
+        asm volatile ( "xchgq %0,%1"
+                       : "=r" (x)
+                       : "m" (*__xg((volatile void *)ptr)), "0" (x)
+                       : "memory" );
+        break;
 #endif
-       }
-       return x;
+    }
+    return x;
 }
 
 /*
@@ -79,230 +88,88 @@ static always_inline unsigned long __cmp
 static always_inline unsigned long __cmpxchg(
     volatile void *ptr, unsigned long old, unsigned long new, int size)
 {
-       unsigned long prev;
-       switch (size) {
-       case 1:
-               __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
-                                    : "=a"(prev)
-                                    : "q"(new), "m"(*__xg((volatile void 
*)ptr)), "0"(old)
-                                    : "memory");
-               return prev;
-       case 2:
-               __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
-                                    : "=a"(prev)
-                                    : "r"(new), "m"(*__xg((volatile void 
*)ptr)), "0"(old)
-                                    : "memory");
-               return prev;
+    unsigned long prev;
+    switch ( size )
+    {
+    case 1:
+        asm volatile ( LOCK_PREFIX "cmpxchgb %b1,%2"
+                       : "=a" (prev)
+                       : "q" (new), "m" (*__xg((volatile void *)ptr)),
+                       "0" (old)
+                       : "memory" );
+        return prev;
+    case 2:
+        asm volatile ( LOCK_PREFIX "cmpxchgw %w1,%2"
+                       : "=a" (prev)
+                       : "r" (new), "m" (*__xg((volatile void *)ptr)),
+                       "0" (old)
+                       : "memory" );
+        return prev;
 #if defined(__i386__)
-       case 4:
-               __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2"
-                                    : "=a"(prev)
-                                    : "r"(new), "m"(*__xg((volatile void 
*)ptr)), "0"(old)
-                                    : "memory");
-               return prev;
+    case 4:
+        asm volatile ( LOCK_PREFIX "cmpxchgl %1,%2"
+                       : "=a" (prev)
+                       : "r" (new), "m" (*__xg((volatile void *)ptr)),
+                       "0" (old)
+                       : "memory" );
+        return prev;
 #elif defined(__x86_64__)
-       case 4:
-               __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %k1,%2"
-                                    : "=a"(prev)
-                                    : "r"(new), "m"(*__xg((volatile void 
*)ptr)), "0"(old)
-                                    : "memory");
-               return prev;
-       case 8:
-               __asm__ __volatile__(LOCK_PREFIX "cmpxchgq %1,%2"
-                                    : "=a"(prev)
-                                    : "r"(new), "m"(*__xg((volatile void 
*)ptr)), "0"(old)
-                                    : "memory");
-               return prev;
+    case 4:
+        asm volatile ( LOCK_PREFIX "cmpxchgl %k1,%2"
+                       : "=a" (prev)
+                       : "r" (new), "m" (*__xg((volatile void *)ptr)),
+                       "0" (old)
+                       : "memory" );
+        return prev;
+    case 8:
+        asm volatile ( LOCK_PREFIX "cmpxchgq %1,%2"
+                       : "=a" (prev)
+                       : "r" (new), "m" (*__xg((volatile void *)ptr)),
+                       "0" (old)
+                       : "memory" );
+        return prev;
 #endif
-       }
-       return old;
+    }
+    return old;
 }
 
 #define __HAVE_ARCH_CMPXCHG
 
-#if BITS_PER_LONG == 64
-
-#define cmpxchg(ptr,o,n)                                                \
-    ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),            \
-                                   (unsigned long)(n),sizeof(*(ptr))))
-#else
-
-static always_inline unsigned long long __cmpxchg8b(
-    volatile void *ptr, unsigned long long old, unsigned long long new)
-{
-    unsigned long long prev;
-    __asm__ __volatile__ (
-        LOCK_PREFIX "cmpxchg8b %3"
-        : "=A" (prev)
-        : "c" ((u32)(new>>32)), "b" ((u32)new),
-          "m" (*__xg((volatile void *)ptr)), "0" (old)
-        : "memory" );
-    return prev;
-}
-
-#define cmpxchg(ptr,o,n)                                \
-({                                                      \
-    __typeof__(*(ptr)) __prev;                          \
-    switch ( sizeof(*(ptr)) ) {                         \
-    case 8:                                             \
-        __prev = ((__typeof__(*(ptr)))__cmpxchg8b(      \
-            (ptr),                                      \
-            (unsigned long long)(o),                    \
-            (unsigned long long)(n)));                  \
-        break;                                          \
-    default:                                            \
-        __prev = ((__typeof__(*(ptr)))__cmpxchg(        \
-            (ptr),                                      \
-            (unsigned long)(o),                         \
-            (unsigned long)(n),                         \
-            sizeof(*(ptr))));                           \
-        break;                                          \
-    }                                                   \
-    __prev;                                             \
-})
-
-#endif
-
-
 /*
- * This function causes value _o to be changed to _n at location _p.
- * If this access causes a fault then we return 1, otherwise we return 0.
- * If no fault occurs then _o is updated to the value we saw at _p. If this
- * is the same as the initial value of _o then _n is written to location _p.
+ * Both Intel and AMD agree that, from a programmer's viewpoint:
+ *  Loads cannot be reordered relative to other loads.
+ *  Stores cannot be reordered relative to other stores.
+ * 
+ * Intel64 Architecture Memory Ordering White Paper
+ * <http://developer.intel.com/products/processor/manuals/318147.pdf>
+ * 
+ * AMD64 Architecture Programmer's Manual, Volume 2: System Programming
+ * <http://www.amd.com/us-en/assets/content_type/\
+ *  white_papers_and_tech_docs/24593.pdf>
  */
-#ifdef __i386__
-#define __cmpxchg_user(_p,_o,_n,_isuff,_oppre,_regtype)                 \
-    __asm__ __volatile__ (                                              \
-        "1: " LOCK_PREFIX "cmpxchg"_isuff" %"_oppre"2,%3\n"             \
-        "2:\n"                                                          \
-        ".section .fixup,\"ax\"\n"                                      \
-        "3:     movl $1,%1\n"                                           \
-        "       jmp 2b\n"                                               \
-        ".previous\n"                                                   \
-        ".section __ex_table,\"a\"\n"                                   \
-        "       .align 4\n"                                             \
-        "       .long 1b,3b\n"                                          \
-        ".previous"                                                     \
-        : "=a" (_o), "=r" (_rc)                                         \
-        : _regtype (_n), "m" (*__xg((volatile void *)_p)), "0" (_o), "1" (0) \
-        : "memory");
-#define cmpxchg_user(_p,_o,_n)                                          \
-({                                                                      \
-    int _rc;                                                            \
-    switch ( sizeof(*(_p)) ) {                                          \
-    case 1:                                                             \
-        __cmpxchg_user(_p,_o,_n,"b","b","q");                           \
-        break;                                                          \
-    case 2:                                                             \
-        __cmpxchg_user(_p,_o,_n,"w","w","r");                           \
-        break;                                                          \
-    case 4:                                                             \
-        __cmpxchg_user(_p,_o,_n,"l","","r");                            \
-        break;                                                          \
-    case 8:                                                             \
-        __asm__ __volatile__ (                                          \
-            "1: " LOCK_PREFIX "cmpxchg8b %4\n"                          \
-            "2:\n"                                                      \
-            ".section .fixup,\"ax\"\n"                                  \
-            "3:     movl $1,%1\n"                                       \
-            "       jmp 2b\n"                                           \
-            ".previous\n"                                               \
-            ".section __ex_table,\"a\"\n"                               \
-            "       .align 4\n"                                         \
-            "       .long 1b,3b\n"                                      \
-            ".previous"                                                 \
-            : "=A" (_o), "=r" (_rc)                                     \
-            : "c" ((u32)((u64)(_n)>>32)), "b" ((u32)(_n)),              \
-              "m" (*__xg((volatile void *)(_p))), "0" (_o), "1" (0)     \
-            : "memory");                                                \
-        break;                                                          \
-    }                                                                   \
-    _rc;                                                                \
-})
-#else
-#define __cmpxchg_user(_p,_o,_n,_isuff,_oppre,_regtype)                 \
-    __asm__ __volatile__ (                                              \
-        "1: " LOCK_PREFIX "cmpxchg"_isuff" %"_oppre"2,%3\n"             \
-        "2:\n"                                                          \
-        ".section .fixup,\"ax\"\n"                                      \
-        "3:     movl $1,%1\n"                                           \
-        "       jmp 2b\n"                                               \
-        ".previous\n"                                                   \
-        ".section __ex_table,\"a\"\n"                                   \
-        "       .align 8\n"                                             \
-        "       .quad 1b,3b\n"                                          \
-        ".previous"                                                     \
-        : "=a" (_o), "=r" (_rc)                                         \
-        : _regtype (_n), "m" (*__xg((volatile void *)_p)), "0" (_o), "1" (0) \
-        : "memory");
-#define cmpxchg_user(_p,_o,_n)                                          \
-({                                                                      \
-    int _rc;                                                            \
-    switch ( sizeof(*(_p)) ) {                                          \
-    case 1:                                                             \
-        __cmpxchg_user(_p,_o,_n,"b","b","q");                           \
-        break;                                                          \
-    case 2:                                                             \
-        __cmpxchg_user(_p,_o,_n,"w","w","r");                           \
-        break;                                                          \
-    case 4:                                                             \
-        __cmpxchg_user(_p,_o,_n,"l","k","r");                           \
-        break;                                                          \
-    case 8:                                                             \
-        __cmpxchg_user(_p,_o,_n,"q","","r");                            \
-        break;                                                          \
-    }                                                                   \
-    _rc;                                                                \
-})
-#endif
-
-#if defined(__i386__)
-#define mb()   __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
-#define rmb()  __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
-#elif defined(__x86_64__)
-#define mb()    __asm__ __volatile__ ("mfence":::"memory")
-#define rmb()   __asm__ __volatile__ ("lfence":::"memory")
-#endif
-#define wmb()  __asm__ __volatile__ ("": : :"memory")
+#define rmb()           barrier()
+#define wmb()           barrier()
 
 #ifdef CONFIG_SMP
-#define smp_mb()       mb()
-#define smp_rmb()      rmb()
-#define smp_wmb()      wmb()
+#define smp_mb()        mb()
+#define smp_rmb()       rmb()
+#define smp_wmb()       wmb()
 #else
-#define smp_mb()       barrier()
-#define smp_rmb()      barrier()
-#define smp_wmb()      barrier()
+#define smp_mb()        barrier()
+#define smp_rmb()       barrier()
+#define smp_wmb()       barrier()
 #endif
 
 #define set_mb(var, value) do { xchg(&var, value); } while (0)
 #define set_wmb(var, value) do { var = value; wmb(); } while (0)
 
-/* interrupt control.. */
-#if defined(__i386__)
-#define __save_flags(x)                __asm__ __volatile__("pushfl ; popl 
%0":"=g" (x): /* no input */)
-#define __restore_flags(x)     __asm__ __volatile__("pushl %0 ; popfl": /* no 
output */ :"g" (x):"memory", "cc")
-#elif defined(__x86_64__)
-#define __save_flags(x)                do { __asm__ __volatile__("# save_flags 
\n\t pushfq ; popq %q0":"=g" (x): /* no input */ :"memory"); } while (0)
-#define __restore_flags(x)     __asm__ __volatile__("# restore_flags \n\t 
pushq %0 ; popfq": /* no output */ :"g" (x):"memory", "cc")
-#endif
-#define __cli()                __asm__ __volatile__("cli": : :"memory")
-#define __sti()                        __asm__ __volatile__("sti": : :"memory")
+#define local_irq_disable()     asm volatile ( "cli" : : : "memory" )
+#define local_irq_enable()      asm volatile ( "sti" : : : "memory" )
+
 /* used in the idle loop; sti takes one instruction cycle to complete */
-#define safe_halt()            __asm__ __volatile__("sti; hlt": : :"memory")
+#define safe_halt()     asm volatile ( "sti; hlt" : : : "memory" )
 /* used when interrupts are already enabled or to shutdown the processor */
-#define halt()                 __asm__ __volatile__("hlt": : :"memory")
-
-/* For spinlocks etc */
-#if defined(__i386__)
-#define local_irq_save(x)      __asm__ __volatile__("pushfl ; popl %0 ; 
cli":"=g" (x): /* no input */ :"memory")
-#define local_irq_restore(x)   __restore_flags(x)
-#elif defined(__x86_64__)
-#define local_irq_save(x)      do { __asm__ __volatile__("# local_irq_save 
\n\t pushfq ; popq %0 ; cli":"=g" (x): /* no input */ :"memory"); } while (0)
-#define local_irq_restore(x)   __asm__ __volatile__("# local_irq_restore \n\t 
pushq %0 ; popfq": /* no output */ :"g" (x):"memory")
-#endif
-#define local_irq_disable()    __cli()
-#define local_irq_enable()     __sti()
+#define halt()          asm volatile ( "hlt" : : : "memory" )
 
 static inline int local_irq_is_enabled(void)
 {
@@ -311,8 +178,8 @@ static inline int local_irq_is_enabled(v
     return !!(flags & (1<<9)); /* EFLAGS_IF */
 }
 
-#define BROKEN_ACPI_Sx         0x0001
-#define BROKEN_INIT_AFTER_S1   0x0002
+#define BROKEN_ACPI_Sx          0x0001
+#define BROKEN_INIT_AFTER_S1    0x0002
 
 void trap_init(void);
 void percpu_traps_init(void);
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/include/asm-x86/x86_32/system.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/x86_32/system.h       Wed Nov 21 09:12:06 2007 -0700
@@ -0,0 +1,114 @@
+#ifndef __X86_32_SYSTEM_H__
+#define __X86_32_SYSTEM_H__
+
+static always_inline unsigned long long __cmpxchg8b(
+    volatile void *ptr, unsigned long long old, unsigned long long new)
+{
+    unsigned long long prev;
+    asm volatile (
+        LOCK_PREFIX "cmpxchg8b %3"
+        : "=A" (prev)
+        : "c" ((u32)(new>>32)), "b" ((u32)new),
+          "m" (*__xg((volatile void *)ptr)), "0" (old)
+        : "memory" );
+    return prev;
+}
+
+#define cmpxchg(ptr,o,n)                                \
+({                                                      \
+    __typeof__(*(ptr)) __prev;                          \
+    switch ( sizeof(*(ptr)) ) {                         \
+    case 8:                                             \
+        __prev = ((__typeof__(*(ptr)))__cmpxchg8b(      \
+            (ptr),                                      \
+            (unsigned long long)(o),                    \
+            (unsigned long long)(n)));                  \
+        break;                                          \
+    default:                                            \
+        __prev = ((__typeof__(*(ptr)))__cmpxchg(        \
+            (ptr),                                      \
+            (unsigned long)(o),                         \
+            (unsigned long)(n),                         \
+            sizeof(*(ptr))));                           \
+        break;                                          \
+    }                                                   \
+    __prev;                                             \
+})
+
+/*
+ * This function causes value _o to be changed to _n at location _p.
+ * If this access causes a fault then we return 1, otherwise we return 0.
+ * If no fault occurs then _o is updated to the value we saw at _p. If this
+ * is the same as the initial value of _o then _n is written to location _p.
+ */
+#define __cmpxchg_user(_p,_o,_n,_isuff,_oppre,_regtype)                 \
+    asm volatile (                                                      \
+        "1: " LOCK_PREFIX "cmpxchg"_isuff" %"_oppre"2,%3\n"             \
+        "2:\n"                                                          \
+        ".section .fixup,\"ax\"\n"                                      \
+        "3:     movl $1,%1\n"                                           \
+        "       jmp 2b\n"                                               \
+        ".previous\n"                                                   \
+        ".section __ex_table,\"a\"\n"                                   \
+        "       .align 4\n"                                             \
+        "       .long 1b,3b\n"                                          \
+        ".previous"                                                     \
+        : "=a" (_o), "=r" (_rc)                                         \
+        : _regtype (_n), "m" (*__xg((volatile void *)_p)), "0" (_o), "1" (0) \
+        : "memory");
+
+#define cmpxchg_user(_p,_o,_n)                                          \
+({                                                                      \
+    int _rc;                                                            \
+    switch ( sizeof(*(_p)) ) {                                          \
+    case 1:                                                             \
+        __cmpxchg_user(_p,_o,_n,"b","b","q");                           \
+        break;                                                          \
+    case 2:                                                             \
+        __cmpxchg_user(_p,_o,_n,"w","w","r");                           \
+        break;                                                          \
+    case 4:                                                             \
+        __cmpxchg_user(_p,_o,_n,"l","","r");                            \
+        break;                                                          \
+    case 8:                                                             \
+        asm volatile (                                                  \
+            "1: " LOCK_PREFIX "cmpxchg8b %4\n"                          \
+            "2:\n"                                                      \
+            ".section .fixup,\"ax\"\n"                                  \
+            "3:     movl $1,%1\n"                                       \
+            "       jmp 2b\n"                                           \
+            ".previous\n"                                               \
+            ".section __ex_table,\"a\"\n"                               \
+            "       .align 4\n"                                         \
+            "       .long 1b,3b\n"                                      \
+            ".previous"                                                 \
+            : "=A" (_o), "=r" (_rc)                                     \
+            : "c" ((u32)((u64)(_n)>>32)), "b" ((u32)(_n)),              \
+              "m" (*__xg((volatile void *)(_p))), "0" (_o), "1" (0)     \
+            : "memory");                                                \
+        break;                                                          \
+    }                                                                   \
+    _rc;                                                                \
+})
+
+static inline void atomic_write64(uint64_t *p, uint64_t v)
+{
+    uint64_t w = *p, x;
+    while ( (x = __cmpxchg8b(p, w, v)) != w )
+        w = x;
+}
+
+#define mb()                    \
+    asm volatile ( "lock; addl $0,0(%%esp)" : : : "memory" )
+
+#define __save_flags(x)         \
+    asm volatile ( "pushfl ; popl %0" : "=g" (x) : )
+#define __restore_flags(x)      \
+    asm volatile ( "pushl %0 ; popfl" : : "g" (x) : "memory", "cc" )
+
+#define local_irq_save(x)       \
+    asm volatile ( "pushfl ; popl %0 ; cli" : "=g" (x) : : "memory" )
+#define local_irq_restore(x)    \
+    __restore_flags(x)
+
+#endif /* __X86_32_SYSTEM_H__ */
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/include/asm-x86/x86_64/system.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/x86_64/system.h       Wed Nov 21 09:12:06 2007 -0700
@@ -0,0 +1,68 @@
+#ifndef __X86_64_SYSTEM_H__
+#define __X86_64_SYSTEM_H__
+
+#define cmpxchg(ptr,o,n)                                                \
+    ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),            \
+                                   (unsigned long)(n),sizeof(*(ptr))))
+
+/*
+ * This function causes value _o to be changed to _n at location _p.
+ * If this access causes a fault then we return 1, otherwise we return 0.
+ * If no fault occurs then _o is updated to the value we saw at _p. If this
+ * is the same as the initial value of _o then _n is written to location _p.
+ */
+#define __cmpxchg_user(_p,_o,_n,_isuff,_oppre,_regtype)                 \
+    asm volatile (                                                      \
+        "1: " LOCK_PREFIX "cmpxchg"_isuff" %"_oppre"2,%3\n"             \
+        "2:\n"                                                          \
+        ".section .fixup,\"ax\"\n"                                      \
+        "3:     movl $1,%1\n"                                           \
+        "       jmp 2b\n"                                               \
+        ".previous\n"                                                   \
+        ".section __ex_table,\"a\"\n"                                   \
+        "       .align 8\n"                                             \
+        "       .quad 1b,3b\n"                                          \
+        ".previous"                                                     \
+        : "=a" (_o), "=r" (_rc)                                         \
+        : _regtype (_n), "m" (*__xg((volatile void *)_p)), "0" (_o), "1" (0) \
+        : "memory");
+
+#define cmpxchg_user(_p,_o,_n)                                          \
+({                                                                      \
+    int _rc;                                                            \
+    switch ( sizeof(*(_p)) ) {                                          \
+    case 1:                                                             \
+        __cmpxchg_user(_p,_o,_n,"b","b","q");                           \
+        break;                                                          \
+    case 2:                                                             \
+        __cmpxchg_user(_p,_o,_n,"w","w","r");                           \
+        break;                                                          \
+    case 4:                                                             \
+        __cmpxchg_user(_p,_o,_n,"l","k","r");                           \
+        break;                                                          \
+    case 8:                                                             \
+        __cmpxchg_user(_p,_o,_n,"q","","r");                            \
+        break;                                                          \
+    }                                                                   \
+    _rc;                                                                \
+})
+
+static inline void atomic_write64(uint64_t *p, uint64_t v)
+{
+    *p = v;
+}
+
+#define mb()                    \
+    asm volatile ( "mfence" : : : "memory" )
+
+#define __save_flags(x)         \
+    asm volatile ( "pushfq ; popq %q0" : "=g" (x) : :"memory" )
+#define __restore_flags(x)      \
+    asm volatile ( "pushq %0 ; popfq" : : "g" (x) : "memory", "cc" )
+
+#define local_irq_save(x)       \
+    asm volatile ( "pushfq ; popq %0 ; cli" : "=g" (x) : : "memory" )
+#define local_irq_restore(x)    \
+    __restore_flags(x)
+
+#endif /* __X86_64_SYSTEM_H__ */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>