# HG changeset patch
# User Alex Williamson <alex.williamson@xxxxxx>
# Date 1195661526 25200
# Node ID 53dc1cf505060a06e5b34a4812fce4312743ca26
# Parent 9a9ddc04eea2cac0ccfe8be2b9259b4edea5ec9d
# Parent 05cbf512b82b2665d407395bac73b9cca0c396b4
merge with xen-unstable.hg (staging)
---
xen/include/asm-powerpc/smpboot.h | 21
xen/include/asm-x86/smpboot.h | 16
tools/ioemu/Makefile.target | 2
tools/ioemu/hw/e100.c | 2464 ++++++++++++++++++++++++++++++++++++
tools/ioemu/hw/pci.c | 2
xen/arch/x86/hvm/hpet.c | 8
xen/arch/x86/irq.c | 1
xen/arch/x86/mm.c | 15
xen/arch/x86/physdev.c | 1
xen/arch/x86/smp.c | 1
xen/arch/x86/traps.c | 3
xen/arch/x86/x86_32/seg_fixup.c | 183 +-
xen/include/asm-x86/desc.h | 63
xen/include/asm-x86/system.h | 365 +----
xen/include/asm-x86/x86_32/system.h | 114 +
xen/include/asm-x86/x86_64/system.h | 68
16 files changed, 2897 insertions(+), 430 deletions(-)
diff -r 9a9ddc04eea2 -r 53dc1cf50506 tools/ioemu/Makefile.target
--- a/tools/ioemu/Makefile.target Tue Nov 20 11:53:44 2007 -0700
+++ b/tools/ioemu/Makefile.target Wed Nov 21 09:12:06 2007 -0700
@@ -399,7 +399,7 @@ VL_OBJS+= usb.o usb-hub.o usb-linux.o us
VL_OBJS+= usb.o usb-hub.o usb-linux.o usb-hid.o usb-ohci.o usb-msd.o
# PCI network cards
-VL_OBJS+= ne2000.o rtl8139.o pcnet.o
+VL_OBJS+= ne2000.o rtl8139.o pcnet.o e100.o
ifeq ($(TARGET_BASE_ARCH), i386)
# Hardware support
diff -r 9a9ddc04eea2 -r 53dc1cf50506 tools/ioemu/hw/e100.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ioemu/hw/e100.c Wed Nov 21 09:12:06 2007 -0700
@@ -0,0 +1,2464 @@
+/*
+ * QEMU E100(i82557) ethernet card emulation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
USA
+ *
+ * Copyright (c) 2006-2007 Stefan Weil
+ * Copyright (c) 2006-2007 Zhang Xin(xing.z.zhang@xxxxxxxxx)
+ *
+ * Support OS:
+ * x86 linux and windows
+ * PAE linux and windows
+ * x86_64 linux and windows
+ * IA64 linux and windows
+ *
+ * Untested:
+ * Big-endian machine
+ *
+ * References:
+ *
+ * Intel 8255x 10/100 Mbps Ethernet Controller Family
+ * Open Source Software Developer Manual
+ */
+
+#include <assert.h>
+#include "vl.h"
+
+enum
+{
+ E100_PCI_VENDOR_ID = 0x00, /* 16 bits */
+ E100_PCI_DEVICE_ID = 0x02, /* 16 bits */
+ E100_PCI_COMMAND = 0x04, /* 16 bits */
+ E100_PCI_STATUS = 0x06, /* 16 bits */
+ E100_PCI_REVISION_ID = 0x08, /* 8 bits */
+ E100_PCI_CLASS_CODE = 0x0b, /* 8 bits */
+ E100_PCI_SUBCLASS_CODE = 0x0a, /* 8 bits */
+ E100_PCI_HEADER_TYPE = 0x0e, /* 8 bits */
+ E100_PCI_BASE_ADDRESS_0 = 0x10, /* 32 bits */
+ E100_PCI_BASE_ADDRESS_1 = 0x14, /* 32 bits */
+ E100_PCI_BASE_ADDRESS_2 = 0x18, /* 32 bits */
+ E100_PCI_BASE_ADDRESS_3 = 0x1c, /* 32 bits */
+ E100_PCI_BASE_ADDRESS_4 = 0x20, /* 32 bits */
+ E100_PCI_BASE_ADDRESS_5 = 0x24 /* 32 bits */
+}PCI_CONFIGURE_SPACE;
+
+#define PCI_CONFIG_8(offset, value) \
+ (*(uint8_t *)&pci_conf[offset] = (value))
+#define PCI_CONFIG_16(offset, value) \
+ (*(uint16_t *)&pci_conf[offset] = cpu_to_le16(value))
+#define PCI_CONFIG_32(offset, value) \
+ (*(uint32_t *)&pci_conf[offset] = cpu_to_le32(value))
+
+// Alias for Control/Status register read/write
+#define CSR_STATUS scb_status
+#define CSR_CMD scb_cmd
+#define CSR_POINTER scb_pointer
+#define CSR_PORT port
+#define CSR_EEPROM eeprom_ctrl
+#define CSR_MDI mdi_ctrl
+#define CSR_PM pm_reg
+
+#define CSR(class, field) \
+ (s->pci_mem.csr.class.u.field)
+#define CSR_VAL(class) \
+ (s->pci_mem.csr.class.val)
+
+#define CSR_READ(x, type) \
+ ({ \
+ type t; \
+ memcpy(&t, &s->pci_mem.mem[x], sizeof(type)); \
+ t; \
+ })
+
+#define CSR_WRITE(x, val, type) \
+ ({ \
+ type t = val; \
+ memcpy(&s->pci_mem.mem[x], &t, sizeof(type)); \
+ })
+
+#define SET_CU_STATE(val) \
+ (CSR(CSR_STATUS, cus) = val)
+#define GET_CU_STATE \
+ (CSR(CSR_STATUS, cus))
+
+#define SET_RU_STATE(val) \
+ (CSR(CSR_STATUS, rus) = val)
+#define GET_RU_STATE \
+ (CSR(CSR_STATUS, rus))
+
+#define KiB 1024
+
+#define EEPROM_SIZE 64
+
+#define BIT(n) (1U << (n))
+
+/* debug E100 card */
+//#define DEBUG_E100
+
+#ifdef DEBUG_E100
+#define logout(fmt, args...) fprintf(stderr, "EE100\t%-28s" fmt, __func__,
##args)
+#else
+#define logout(fmt, args...) ((void)0)
+#endif
+
+#define MAX_ETH_FRAME_SIZE 1514
+
+/* This driver supports several different devices which are declared here. */
+#define i82551 0x82551
+#define i82557B 0x82557b
+#define i82557C 0x82557c
+#define i82558B 0x82558b
+#define i82559C 0x82559c
+#define i82559ER 0x82559e
+#define i82562 0x82562
+
+#define PCI_MEM_SIZE (4 * KiB)
+#define PCI_IO_SIZE (64)
+#define PCI_FLASH_SIZE (128 * KiB)
+
+enum
+{
+ OP_READ,
+ OP_WRITE,
+} OPERTAION_DIRECTION;
+
+/* The SCB accepts the following controls for the Tx and Rx units: */
+enum
+{
+ CU_NOP = 0x0000, /* No operation */
+ CU_START = 0x0010, /* CU start */
+ CU_RESUME = 0x0020, /* CU resume */
+ CU_STATSADDR = 0x0040, /* Load dump counters address */
+ CU_SHOWSTATS = 0x0050, /* Dump statistical counters */
+ CU_CMD_BASE = 0x0060, /* Load CU base address */
+ CU_DUMPSTATS = 0x0070, /* Dump and reset statistical counters */
+ CU_S_RESUME = 0x00a0 /* CU static resume */
+}CONTROL_UNIT_COMMAND;
+
+enum
+{
+ RU_NOP = 0x0000,
+ RU_START = 0x0001,
+ RU_RESUME = 0x0002,
+ RU_DMA_REDIRECT = 0x0003,
+ RU_ABORT = 0x0004,
+ RU_LOAD_HDS = 0x0005,
+ RU_ADDR_LOAD = 0x0006,
+ RU_RESUMENR = 0x0007,
+}RECEIVE_UNIT_COMMAND;
+
+/* SCB status word descriptions */
+enum
+{
+ CU_IDLE = 0,
+ CU_SUSPENDED = 1,
+ CU_LPQ_ACTIVE = 2,
+ CU_HQP_ACTIVE = 3
+} CONTROL_UINT_STATE;
+
+enum
+{
+ RU_IDLE = 0,
+ RU_SUSPENDED = 1,
+ RU_NO_RESOURCES =2,
+ RU_READY = 4
+} RECEIVE_UNIT_STATE;
+
+enum
+{
+ PORT_SOFTWARE_RESET = 0,
+ PORT_SELF_TEST = 1,
+ PORT_SELECTIVE_RESET = 2,
+ PORT_DUMP = 3,
+ PORT_DUMP_WAKE_UP = 7,
+}SCB_PORT_SELECTION_FUNCTION;
+
+enum
+{
+ CBL_NOP = 0,
+ CBL_IASETUP = 1,
+ CBL_CONFIGURE = 2,
+ CBL_MULTCAST_ADDR_SETUP = 3,
+ CBL_TRANSMIT = 4,
+ CBL_LOAD_MICROCODE = 5,
+ CBL_DUMP = 6,
+ CBL_DIAGNOSE = 7,
+}CBL_COMMAND;
+
+enum
+{
+ SCB_STATUS = 0, /* SCB base + 0x00h, RU states + CU states +
STAT/ACK */
+ SCB_ACK = 1, /* SCB ack/stat */
+ SCB_CMD = 2, /* RU command + CU command + S bit + M bit */
+ SCB_INTERRUPT_MASK = 3, /* Interrupts mask bits */
+ SCB_POINTER = 4, /* SCB general pointer, depending on command type
*/
+ SCB_PORT = 8, /* SCB port register */
+ SCB_EEPROM = 0xe, /* SCB eeprom control register */
+ SCB_MDI =0x10, /* SCB MDI control register */
+} CSR_OFFSETS;
+
+enum
+{
+ EEPROM_SK = 0x01,
+ EEPROM_CS = 0x02,
+ EEPROM_DI = 0x04,
+ EEPROM_DO = 0x08,
+} EEPROM_CONTROL_REGISTER;
+
+enum
+{
+ EEPROM_READ = 0x2,
+ EEPROM_WRITE = 0x1,
+ EEPROM_ERASE = 0x3,
+} EEPROM_OPCODE;
+
+enum
+{
+ MDI_WRITE = 0x1,
+ MDI_READ = 0x2,
+} MDI_OPCODE;
+
+enum
+{
+ INT_FCP = BIT(8),
+ INT_SWI = BIT(10),
+ INT_MDI = BIT(11),
+ INT_RNR = BIT(12),
+ INT_CNA = BIT(13),
+ INT_FR = BIT(14),
+ INT_CX_TNO = BIT(15),
+} E100_INTERRUPT;
+
+enum
+{
+ CSR_MEMORY_BASE,
+ CSR_IO_BASE,
+ FLASH_MEMORY_BASE,
+ REGION_NUM
+}E100_PCI_MEMORY_REGION;
+
+typedef struct {
+ uint32_t tx_good_frames, // Good frames transmitted
+ tx_max_collisions, // Fatal frames -- had max collisions
+ tx_late_collisions, // Fatal frames -- had a late coll.
+ tx_underruns, // Transmit underruns (fatal or
re-transmit)
+ tx_lost_crs, // Frames transmitted without CRS
+ tx_deferred, // Deferred transmits
+ tx_single_collisions, // Transmits that had 1 and only 1 coll.
+ tx_multiple_collisions,// Transmits that had multiple coll.
+ tx_total_collisions, // Transmits that had 1+ collisions.
+
+ rx_good_frames, // Good frames received
+ rx_crc_errors, // Aligned frames that had a CRC error
+ rx_alignment_errors, // Receives that had alignment errors
+ rx_resource_errors, // Good frame dropped due to lack of
resources
+ rx_overrun_errors, // Overrun errors - bus was busy
+ rx_cdt_errors, // Received frames that encountered coll.
+ rx_short_frame_errors, // Received frames that were to short
+
+ complete_word; // A005h indicates dump cmd completion,
+ // A007h indicates dump and reset cmd
completion.
+
+// TODO: Add specific field for i82558, i82559
+} __attribute__ ((packed)) e100_stats_t;
+
+#define EEPROM_I82557_ADDRBIT 6
+/* Below data is dumped from a real I82557 card */
+static const uint16_t eeprom_i82557[] =
+{
+ 0x300, 0xe147, 0x2fa4, 0x203, 0x0, 0x201, 0x4701, 0x0, 0x7414, 0x6207,
+ 0x4082, 0xb, 0x8086, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x128, 0x0, 0x0, 0x0, 0x0,
0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc374,
+};
+
+static const uint8_t e100_pci_configure[] =
+{
+ 0x86, 0x80, 0x29, 0x12, 0x17, 0x00, 0x90, 0x02, 0x08, 0x00, 0x00, 0x02,
0x10, 0x20, 0x00, 0x00,
+ 0x00, 0x00, 0x10, 0x50, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50,
0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x86, 0x80, 0x0b, 0x00,
+ 0x00, 0x00, 0xf0, 0xff, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x09, 0x01, 0x08, 0x38,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x00, 0x22, 0xfe,
+ 0x00, 0x40, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
+};
+
+typedef struct
+{
+#define OPCODE 0xb
+#define ADDR 0xc
+#define DATA 0xd
+#define NOP 0xe
+
+#define EEPROM_RESET_ALL 0xfe
+#define EEPROM_SELECT_RESET 0xff
+ uint8_t start_bit;
+ uint8_t opcode;
+ uint8_t address;
+ uint16_t data; //This must be 16 bit represents a register in eeprom
+
+ uint32_t val;
+ uint32_t val_len;
+ uint8_t val_type; // What data type is in DI. opcode?address?data?
+
+ uint8_t cs;
+ uint8_t sk;
+
+ // This two fileds only be reset when device init
+ uint16_t addr_len;
+ uint16_t contents[256]; // 256 is enough to all device(i82557 ... i82559)
+} eeprom_t;
+
+// Control/Status register structure
+typedef struct
+{
+ /* SCB status word */
+ union
+ {
+ uint16_t val;
+ struct
+ {
+ uint8_t rs1:2; // Reserved
+ uint8_t rus:4; // RU status
+ uint8_t cus:2; // CU status
+ uint8_t stat_ack; // Stat/ACK
+ }u;
+ }scb_status;
+
+ /* SCB command word */
+ union
+ {
+ uint16_t val;
+ struct
+ {
+ uint8_t ru_cmd:3; // RU command
+ uint8_t rs1:1; // Reserved
+ uint8_t cu_cmd:4; // CU command
+ uint8_t m:1; // Interrup mask bit(1:mask all interrupt)
+ uint8_t si:1; // Use for software cause interrupt
+ uint8_t simb:6; // Specific interrupt mask bit
+ }u;
+ }scb_cmd;
+
+ /* SCB general pointer */
+ union
+ {
+ uint32_t val;
+ struct
+ {
+ uint32_t scb_ptr;
+ }u;
+ }scb_pointer;
+
+ /* Port interface */
+ union
+ {
+ uint32_t val;
+ struct
+ {
+ uint8_t opcode:4; // Op code for function selection
+ uint32_t ptr:28; // Result pointer
+ }u;
+ }port;
+
+ uint16_t rs1; // Reserved
+
+ /* EEPROM control register */
+ union
+ {
+ uint16_t val;
+ struct
+ {
+ uint8_t eesk:1; // Serial clock
+ uint8_t eecs:1; // Chip select
+ uint8_t eedi:1; // Serial data in
+ uint8_t eedo:1; // Serial data out
+ uint8_t rs1:4; // Reserved
+ uint8_t data;
+ }u;
+ }eeprom_ctrl;
+
+ /* MDI control register */
+ union
+ {
+ uint32_t val;
+ struct
+ {
+ uint16_t data; // Data
+ uint8_t regaddr:5; // PHY register address
+ uint8_t phyaddr:5; // PHY address
+ uint8_t opcode:2; // Opcode
+ uint8_t r:1; // Ready
+ uint8_t ie:1; // Interrup enable
+ uint8_t rs1:2; // Reserved
+ }u;
+ } mdi_ctrl;
+
+ /* Receive byte counter register */
+ uint32_t rx_byte_counter;
+
+ /* Early receive interrupt register */
+ uint8_t early_interrupt;
+
+ /* Flow control register */
+ union
+ {
+ uint16_t val;
+ }flow_ctrl;
+
+ /* Power management driver register */
+ union
+ {
+ uint8_t val;
+ struct
+ {
+ uint8_t pme_s:1; // PME status
+ uint8_t tco_r:1; // TCO request
+ uint8_t f_tco_i:1; // Force TCO indication
+ uint8_t tco_re:1; // TCO ready
+ uint8_t rs1:1; // Reserved
+ uint8_t isp:1; // Intersting packet
+ uint8_t mg:1; // Magic packet
+ uint8_t lsci:1; // Link status change indication
+ }u;
+ }pm_reg;
+
+ /* General control register */
+ uint8_t gen_ctrl;
+
+ /* General status register */
+ uint8_t gen_status;
+
+ /* These are reserved or we don't support register */
+ uint8_t others[30];
+} __attribute__ ((packed)) csr_t;
+
+typedef struct
+{
+ uint8_t byte_count;
+ uint8_t rx_fifo_limit:4;
+ uint8_t tx_fifo_limit:4;
+ uint8_t adpt_inf_spacing;
+ uint8_t rs1;
+ uint8_t rx_dma_max_bytes;
+ uint8_t tx_dma_max_bytes:7;
+ uint8_t dmbc_en:1;
+ uint8_t late_scb:1,
+ rs2:1,
+ tno_intr:1,
+ ci_intr:1,
+ rs3:1,
+ rs4:1,
+ dis_overrun_rx:1,
+ save_bad_frame:1;
+ uint8_t dis_short_rx:1,
+ underrun_retry:2,
+ rs5:5;
+ uint8_t mii:1,
+ rs6:7;
+ uint8_t rs7;
+ uint8_t rs8:3,
+ nsai:1,
+ preamble_len:2,
+ loopback:2;
+ uint8_t linear_prio:3,
+ rs9:5;
+ uint8_t pri_mode:1,
+ rs10:3,
+ interframe_spacing:4;
+ uint16_t rs11;
+ uint8_t promiscuous:1,
+ broadcast_dis:1,
+ rs12:5,
+ crs_cdt:1;
+ uint16_t rs13;
+ uint8_t strip:1,
+ padding:1,
+ rx_crc:1,
+ rs14:5;
+ uint8_t rs15:6,
+ force_fdx:1,
+ fdx_en:1;
+ uint8_t rs16:6,
+ mul_ia:2;
+ uint8_t rs17:3,
+ mul_all:1,
+ rs18:4;
+} __attribute__ ((packed)) i82557_cfg_t;
+
+typedef struct {
+ VLANClientState *vc;
+ PCIDevice *pci_dev;
+ int mmio_index;
+ uint8_t scb_stat; /* SCB stat/ack byte */
+ uint32_t region_base_addr[REGION_NUM]; /* PCI region addresses */
+ uint8_t macaddr[6];
+ uint16_t mdimem[32];
+ eeprom_t eeprom;
+ uint32_t device; /* device variant */
+
+ uint8_t mult_list[8]; /* Multicast address list */
+ int is_multcast_enable;
+
+ /* (cu_base + cu_offset) address the next command block in the command
block list. */
+ uint32_t cu_base; /* CU base address */
+ uint32_t cu_offset; /* CU address offset */
+ uint32_t cu_next; /* Point to next command when CU go to suspend
*/
+
+ /* (ru_base + ru_offset) address the RFD in the Receive Frame Area. */
+ uint32_t ru_base; /* RU base address */
+ uint32_t ru_offset; /* RU address offset */
+
+ uint32_t statsaddr; /* pointer to e100_stats_t */
+
+ e100_stats_t statistics; /* statistical counters */
+
+ /* Configuration bytes. */
+ i82557_cfg_t config;
+
+ /* FIFO buffer of card. The packet that need to be sent buffered in it */
+ uint8_t pkt_buf[MAX_ETH_FRAME_SIZE+4];
+ /* Data length in FIFO buffer */
+ int pkt_buf_len;
+
+ /* Data in mem is always in the byte order of the controller (le). */
+ union
+ {
+ csr_t csr;
+ uint8_t mem[PCI_MEM_SIZE];
+ }pci_mem;
+
+} E100State;
+
+/* CB structure, filled by device driver
+ * This is a common structure of CB. In some
+ * special case such as TRANSMIT command, the
+ * reserved field will be used.
+ */
+struct control_block
+{
+ uint16_t rs1:13; /* reserved */
+ uint8_t ok:1; /* 1:command executed without error, otherwise
0 */
+ uint8_t rs2:1;
+ uint8_t c:1; /* execution status. set by device, clean by
software */
+ uint8_t cmd:3; /* command */
+ uint16_t rs3:10; /* most time equal to 0 */
+ uint8_t i:1; /* whether trigger interrupt after execution.
1:yes; 0:no */
+ uint8_t s:1; /* suspend */
+ uint8_t el:1; /* end flag */
+ uint32_t link_addr;
+} __attribute__ ((packed));
+
+typedef struct
+{
+ uint32_t tx_desc_addr; /* transmit buffer decsriptor array address. */
+ uint16_t tcb_bytes:14; /* transmit command block byte count (in
lower 14 bits)*/
+ uint8_t rs1:1;
+ uint8_t eof:1;
+ uint8_t tx_threshold; /* transmit threshold */
+ uint8_t tbd_num; /* TBD number */
+} __attribute__ ((packed)) tbd_t;
+
+/* Receive frame descriptore structure */
+typedef struct
+{
+ uint16_t status:13; // Result of receive opration
+ uint8_t ok:1; // 1:receive without error, otherwise 0
+ uint8_t rs1:1;
+ uint8_t c:1; // 1:receive complete
+ uint8_t rs2:3;
+ uint8_t sf:1; // 0:simplified mode
+ uint8_t h:1; // 1:header RFD
+ uint16_t rs3:9;
+ uint8_t s:1; // 1:go to suspend
+ uint8_t el:1; // 1:last RFD
+ uint32_t link_addr; // Add on RU base point to next RFD
+ uint32_t rs4;
+ uint16_t count:14; // Number of bytes written into data area
+ uint8_t f:1; // Set by device when count field update
+ uint8_t eof:1; // Set by device when placing data into data area
complete
+ uint16_t size:14; // Buffer size (even number)
+ uint8_t rs5:2;
+} __attribute__ ((packed)) rfd_t;
+
+enum
+{
+ RX_COLLISION = BIT(0), // 1:Receive collision detected
+ RX_IA_MATCH = BIT(1), // 0:Receive frame match individual address
+ RX_NO_MATCH = BIT(2), // 1:Receive frame match no address
+ RX_ERR = BIT(4), // 1:Receive frame error
+ RX_TYPE = BIT(5), // 1:Receive frame is a type frame
+ RX_SHORT = BIT(7), // 1:Receive frame is too short
+ RX_DMA_ERR = BIT(8),
+ RX_LARGE = BIT(9), // 1:Receive frame is too large
+ RX_CRC_ERR = BIT(10),
+} RFD_STATUS;
+
+typedef struct PCIE100State {
+ PCIDevice dev;
+ E100State e100;
+} PCIE100State;
+
+/* Default values for MDI (PHY) registers */
+static const uint16_t e100_mdi_default[] = {
+ /* MDI Registers 0 - 6, 7 */
+ 0x3000, 0x780d, 0x02a8, 0x0154, 0x05e1, 0x0000, 0x0000, 0x0000,
+ /* MDI Registers 8 - 15 */
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ /* MDI Registers 16 - 31 */
+ 0x0003, 0x0000, 0x0001, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+};
+
+static const uint8_t broadcast_macaddr[6] =
+ { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+
+/* Debugging codes */
+#ifdef DEBUG_E100
+
+static void e100_dump(char *comment, uint8_t *info, int len)
+{
+ int i;
+
+ if ( !comment || !info )
+ return;
+
+ fprintf(stderr, "EE100\t%-24s%s", __func__, comment);
+ for ( i=0; i<len; i++ )
+ fprintf(stderr, "%x ", info[i]);
+
+ fprintf(stderr, "\n");
+}
+
+static const char *regname[] =
+{
+ [0] = "SCB Status", [1] = "SCB Ack",
+ [2] = "SCB Cmd", [3] = "SCB Interrupt Mask",
+ [4] = "SCB Pointer", [8] = "SCB Port",
+ [0xc] = "SCB Flash", [0xe] = "SCB Eeprom",
+ [0x10] = "SCB Ctrl MDI", [0x14] = "SCB Early RX",
+};
+#define SCBNAME(x) \
+ ( (x) < (sizeof(regname) / sizeof(regname[0])) ? regname[(x)] : "Unknown
SCB Register" )
+
+static const char *cb_cmd_name[] =
+{
+ [CBL_NOP] = "NOP", [CBL_IASETUP] = "Individual address setup",
+ [CBL_CONFIGURE] = "Configure", [CBL_MULTCAST_ADDR_SETUP] = "Set Multcast
address list",
+ [CBL_TRANSMIT] = "Transmit", [CBL_LOAD_MICROCODE] = "Load microcode",
+ [CBL_DUMP] = "Dump", [CBL_DIAGNOSE] = "Diagnose",
+};
+#define CB_CMD_NAME(x) \
+ ( (x) < (sizeof(cb_cmd_name) / sizeof(cb_cmd_name[0])) ? cb_cmd_name[(x)]
: "Unknown CB command" )
+
+static const char *eeprom_opcode_name[] =
+{
+ [0] = "Unknow", [EEPROM_WRITE] = "Write",
+ [EEPROM_READ] = "Read", [EEPROM_ERASE] = "Erase",
+};
+#define EEPROM_OPCODE_NAME(x) \
+ ( (x) < (sizeof(eeprom_opcode_name) / sizeof(eeprom_opcode_name[0])) ? \
+ eeprom_opcode_name[(x)] : "Unknown" )
+
+static struct eeprom_trace_data
+{
+ uint8_t eedo[256];
+ uint8_t di[256];
+ int op;
+ int i;
+ uint32_t data;
+}etd = {.op = NOP};
+
+static void eeprom_trace(int eedo, int di, int dir, int next_op, int clr)
+{
+ int i;
+
+ if ( clr )
+ {
+ char *opname = NULL;
+
+ switch ( etd.op )
+ {
+ case NOP:
+ break;
+ case OPCODE:
+ opname = "opcode";
+ break;
+ case ADDR:
+ opname = "address";
+ break;
+ case DATA:
+ opname = "data transfer";
+ break;
+ default:
+ opname = "Unknown";
+ }
+
+ if ( opname )
+ {
+ logout("EEPROM trace:\n");
+ fprintf(stderr, "\toperation: %s\n", opname);
+ fprintf(stderr, "\tDI track:");
+ for ( i=0; i<etd.i; i++ )
+ fprintf(stderr, "%x ", etd.di[i]);
+ fprintf(stderr, "\n\tDO track:");
+ for ( i=0; i<etd.i; i++ )
+ fprintf(stderr, "%x ", etd.eedo[i]);
+ fprintf(stderr, "\n\tData:%#x\n", etd.data);
+ }
+
+
+ memset(&etd, 0x0, sizeof(etd));
+ etd.op = next_op;
+
+ return;
+ }
+
+ etd.eedo[etd.i] = eedo;
+ etd.di[etd.i] = di;
+ etd.i ++;
+ if ( dir == EEPROM_READ && etd.op == DATA )
+ etd.data = (etd.data << 1) | eedo;
+ else
+ etd.data = (etd.data << 1) | di;
+}
+
+#define INT_NAME(x) \
+ ({ \
+ char *name = NULL; \
+ switch (x) \
+ { \
+ case INT_FCP: \
+ name = "FCP"; \
+ break; \
+ case INT_SWI: \
+ name = "SWI"; \
+ break; \
+ case INT_MDI: \
+ name = "MDI"; \
+ break; \
+ case INT_RNR: \
+ name = "RNR"; \
+ break; \
+ case INT_CNA: \
+ name = "CNA"; \
+ break; \
+ case INT_FR: \
+ name = "FR"; \
+ break; \
+ case INT_CX_TNO: \
+ name ="CX/TNO"; \
+ break; \
+ default: \
+ name ="Unknown"; \
+ } \
+ name; \
+ })
+
+#else
+static void e100_dump(char *comment, uint8_t *info, int len) {}
+static void eeprom_trace(int eedo, int di, int dir, int next_op, int clr) {}
+#endif
+
+static void pci_reset(E100State * s)
+{
+ uint8_t *pci_conf = s->pci_dev->config;
+
+ memcpy(pci_conf, &e100_pci_configure[0], sizeof(e100_pci_configure));
+ logout("%p\n", s);
+
+ /* I82557 */
+ PCI_CONFIG_8(E100_PCI_REVISION_ID, 0x01);
+
+ PCI_CONFIG_8(0x3c, 0x0);
+
+}
+
+static void e100_selective_reset(E100State * s)
+{
+
+ memset(s->pci_mem.mem, 0x0, sizeof(s->pci_mem.mem));
+ // Set RU/CU to idle, maintain the register mentioned in spec,
+ SET_CU_STATE(CU_IDLE);
+ SET_RU_STATE(RU_IDLE);
+ logout("CU and RU go to idle\n");
+
+ s->ru_offset = 0;
+ s->cu_offset = 0;
+ s->cu_next = 0;
+
+ // For 82557, special interrupt bits are all 1
+ CSR(CSR_CMD, simb) = 0x3f;
+ // Set PHY to 1
+ CSR_VAL(CSR_MDI) |= BIT(21);
+
+ /* Initialize EEDO bit to 1. Due to driver would detect dummy 0 at
+ * EEDO bit, so initialize it to 1 is safety a way.
+ */
+ CSR(CSR_EEPROM, eedo) = 1;
+ // no pending interrupts
+ s->scb_stat = 0;
+
+ return;
+}
+
+static void e100_software_reset(E100State *s)
+{
+ memset(s->pci_mem.mem, 0x0, sizeof(s->pci_mem.mem));
+ // Clear multicast list
+ memset(s->mult_list, 0x0, sizeof(s->mult_list));
+ // Set MDI register to default value
+ memcpy(&s->mdimem[0], &e100_mdi_default[0], sizeof(s->mdimem));
+ s->is_multcast_enable = 1;
+ /* Clean FIFO buffer */
+ memset(s->pkt_buf, 0x0, sizeof(s->pkt_buf));
+ s->pkt_buf_len = 0;
+
+ memset(&s->statistics, 0x0, sizeof(s->statistics));
+ e100_selective_reset(s);
+ return;
+}
+
+static void e100_reset(void *opaque)
+{
+ E100State *s = (E100State *) opaque;
+ logout("%p\n", s);
+ e100_software_reset(s);
+}
+
+
+static void e100_save(QEMUFile * f, void *opaque)
+{
+ //TODO
+ return;
+}
+
+static int e100_load(QEMUFile * f, void *opaque, int version_id)
+{
+ //TODO
+ return 0;
+}
+
+/* Interrupt functions */
+static void e100_interrupt(E100State *s, uint16_t int_type)
+{
+
+ //TODO: Add another i8255x card supported mask bit
+ if ( !CSR(CSR_CMD,m) )
+ {
+ //Set bit in stat/ack, so driver can no what interrupt happen
+ CSR_VAL(CSR_STATUS) |= int_type;
+ s->scb_stat = CSR(CSR_STATUS, stat_ack);
+
+ /* SCB maske and SCB Bit M do not disable interrupt. */
+ logout("Trigger an interrupt(type = %s(%#x), SCB Status = %#x)\n",
+ INT_NAME(int_type), int_type, CSR_VAL(CSR_STATUS));
+ pci_set_irq(s->pci_dev, 0, 1);
+ }
+}
+
+static void e100_interrupt_ack(E100State * s, uint8_t ack)
+{
+
+ /* Ignore acknowledege if driver write 0 to ack or
+ * according interrupt bit is not set
+ */
+ if ( !ack || !(s->scb_stat & ack) )
+ {
+ logout("Illegal interrupt ack(ack=%#x, SCB Stat/Ack=%#x), ignore it\n",
+ ack, s->scb_stat);
+ // Due to we do write operation before e100_execute(), so
+ // we must restore value of ack field here
+ CSR(CSR_STATUS, stat_ack) = s->scb_stat;
+ return;
+ }
+
+ s->scb_stat &= ~ack;
+ CSR(CSR_STATUS, stat_ack) = s->scb_stat;
+
+ logout("Interrupt ack(name=%s,val=%#x)\n", INT_NAME(({uint16_t bit =
ack<<8;bit;})),ack);
+ if ( !s->scb_stat )
+ {
+ logout("All interrupts are acknowledeged, de-assert interrupt line\n");
+ pci_set_irq(s->pci_dev, 0, 0);
+ }
+}
+
+static void e100_self_test(uint32_t res_addr)
+{
+ struct
+ {
+ uint32_t st_sign; /* Self Test Signature */
+ uint32_t st_result; /* Self Test Results */
+ } test_res;
+
+ test_res.st_sign = (uint32_t)-1;
+ test_res.st_result = 0; // Our self test always success
+ cpu_physical_memory_write(res_addr, (uint8_t *)&test_res,
sizeof(test_res));
+
+ logout("Write self test result to %#x\n", res_addr);
+}
+
+static void scb_port_func(E100State *s, uint32_t val, int dir)
+{
+#define PORT_SELECTION_MASK 0xfU
+
+ uint32_t sel = val & PORT_SELECTION_MASK;
+
+ switch ( sel )
+ {
+ case PORT_SOFTWARE_RESET:
+ logout("do PORT_SOFTWARE_RESET!\n");
+ e100_software_reset(s);
+ break;
+ case PORT_SELF_TEST:
+ e100_self_test(val & ~PORT_SELECTION_MASK);
+ logout("do PORT_SELF_TEST!\n");
+ break;
+ case PORT_SELECTIVE_RESET:
+ logout("do PORT_SELECTIVE_RESET!\n");
+ e100_selective_reset(s);
+ break;
+ case PORT_DUMP:
+ logout("do PORT_SOFTWARE_RESET!\n");
+ break;
+ case PORT_DUMP_WAKE_UP:
+ logout("do PORT_SOFTWARE_RESET!\n");
+ break;
+ default:
+ logout("Unkonw SCB port command(selection function = %#x)\n", sel);
+ }
+}
+
+static void e100_write_mdi(E100State *s, uint32_t val)
+{
+ uint32_t ie = (val & 0x20000000) >> 29;
+ uint32_t opcode = (val & 0x0c000000) >> 26;
+ uint32_t phyaddr = (val & 0x03e00000) >> 21;
+ uint32_t regaddr = (val & 0x001f0000) >> 16;
+ uint32_t data = val & 0x0000ffff;
+
+ logout("Write MDI:\n"
+ "\topcode:%#x\n"
+ "\tphy address:%#x\n"
+ "\treg address:%#x\n"
+ "\tie:%#x\n"
+ "\tdata:%#x\n",
+ opcode, phyaddr, regaddr, ie, data);
+
+ /* We use default value --- PHY1
+ * If driver operate on other PHYs, do nothing and
+ * deceive it that the operation is finished
+ */
+ if ( phyaddr != 1 )
+ {
+ logout("Unsupport PHY address(phy = %#x)\n", phyaddr);
+ goto done;
+ }
+
+ // 1: MDI write
+ // 2: MDI read
+ if ( opcode != MDI_WRITE && opcode != MDI_READ )
+ {
+ logout("Invalid Opcode(opcode = %#x)\n", opcode);
+ return;
+ }
+
+ // Current only support MDI generic registers.
+ if ( regaddr > 6 )
+ {
+ logout("Invalid phy register index( phy register addr = %#x)\n",
regaddr);
+ }
+
+ if ( opcode == MDI_WRITE )
+ {
+ // MDI write
+ switch ( regaddr )
+ {
+ case 0: // Control Register
+ if ( data & 0x8000 ) // Reset
+ {
+ /* Reset status and control registers to default. */
+ s->mdimem[0] = e100_mdi_default[0];
+ s->mdimem[1] = e100_mdi_default[1];
+ data = s->mdimem[regaddr];
+ }
+ else
+ {
+ /* Restart Auto Configuration = Normal Operation */
+ data &= ~0x0200;
+ }
+ break;
+ case 1: // Status Register
+ logout("Invalid write on readonly register(opcode = %#x)\n",
opcode);
+ data = s->mdimem[regaddr];
+ break;
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ case 6:
+ break;
+ }
+ s->mdimem[regaddr] = data;
+ logout("MDI WRITE: reg = %#x, data = %#x\n", regaddr, data);
+ }
+ else if ( opcode == MDI_READ )
+ {
+ // MDI read
+ switch ( regaddr )
+ {
+ case 0: // Control Register
+ if ( data & 0x8000 ) // Reset
+ {
+ /* Reset status and control registers to default. */
+ s->mdimem[0] = e100_mdi_default[0];
+ s->mdimem[1] = e100_mdi_default[1];
+ }
+ break;
+ case 1: // Status Register
+ // Auto Negotiation complete, set sticky bit to 1
+ s->mdimem[regaddr] |= 0x0026;
+ break;
+ case 2: // PHY Identification Register (Word 1)
+ case 3: // PHY Identification Register (Word 2)
+ break;
+ case 5: // Auto-Negotiation Link Partner Ability Register
+ s->mdimem[regaddr] = 0x41fe;
+ break;
+ case 6: // Auto-Negotiation Expansion Register
+ s->mdimem[regaddr] = 0x0001;
+ break;
+ }
+ data = s->mdimem[regaddr];
+ logout("MDI READ: reg = %#x, data = %#x\n", regaddr, data);
+ }
+
+ /* Emulation takes no time to finish MDI transaction.
+ * Set MDI bit in SCB status register. */
+done:
+ val |= BIT(28);
+ val = (val & 0xffff0000) + data;
+ CSR_WRITE(SCB_MDI, val, uint32_t);
+
+ if ( ie )
+ e100_interrupt(s, (uint16_t)INT_MDI);
+}
+
+static void scb_mdi_func(E100State *s, uint32_t val, int dir)
+{
+ if ( dir == OP_READ )
+ // Do nothing, just tell driver we are ready
+ CSR_VAL(CSR_MDI) |= BIT(28);
+ else if ( dir == OP_WRITE )
+ e100_write_mdi(s, val);
+ else
+ logout("Invalid operation direction(dir=%x)\n", dir);
+
+}
+
+static void eeprom_reset(E100State *s, int type)
+{
+ eeprom_t *e = &s->eeprom;
+
+ if ( type == EEPROM_RESET_ALL )
+ {
+ memset(e, 0x0, sizeof(eeprom_t));
+ e->val_type = NOP;
+ logout("EEPROM reset all\n");
+ return;
+ }
+
+ CSR(CSR_EEPROM, eedo) = 1;
+ e->start_bit = 0;
+ e->opcode = 0;
+ e->address = 0;
+ e->data = 0;
+
+ e->val = 0;
+ e->val_len = 0;
+ e->val_type = NOP;
+
+ e->cs = 0;
+ e->sk = 0;
+ logout("EEPROM select reset\n");
+}
+
+static void do_eeprom_op(E100State *s, eeprom_t *e, int cs, int sk, int di,
int dir)
+{
+ int assert_cs = (cs == 1 && e->cs == 0);
+ int de_assert_cs = (cs == 0 && e->cs == 1);
+ int de_assert_sk = (sk == 0 && e->sk == 1);
+
+ // Chip select is not be enabled
+ if ( cs == 0 && e->cs == 0 )
+ {
+ logout("Invalid EECS signal\n");
+ return;
+ }
+
+ // update state
+ e->cs = cs;
+ e->sk = sk;
+
+ // Do nothing
+ if ( assert_cs )
+ {
+ logout("EECS assert\n");
+ return;
+ }
+
+ // Complete one command
+ if ( de_assert_cs )
+ {
+ if ( e->val_type == DATA && e->opcode == EEPROM_WRITE )
+ {
+ e->data = e->val;
+ memcpy((void *)((unsigned long)e->contents + e->address),
+ &e->data, sizeof(e->data));
+ logout("EEPROM write complete(data=%#x)\n", e->data);
+ }
+ eeprom_trace(0,0,0,NOP,1);
+ eeprom_reset(s, EEPROM_SELECT_RESET);
+ logout("EECS de-asserted\n");
+ return;
+ }
+
+ // Chip is selected and serial clock is change, so the operation is vaild
+ if ( cs == 1 && de_assert_sk == 1)
+ {
+ // Set start bit
+ if ( e->start_bit == 0 && di == 1 )
+ {
+ e->start_bit = di;
+ e->val_len = 0;
+ e->val = 0;
+ e->val_type = OPCODE;
+
+ eeprom_trace(0,0,0,OPCODE,1);
+ logout("EEPROM start bit set\n");
+ return;
+ }
+ // Data in DI is vaild
+ else if ( e->start_bit == 1 )
+ {
+ // If current operation is eeprom read, ignore DI
+ if ( !(e->val_type == DATA && e->opcode == EEPROM_READ) )
+ {
+ e->val = (e->val << 1) | di;
+ e->val_len ++;
+ }
+
+ switch ( e->val_type )
+ {
+ // Get the opcode.
+ case OPCODE:
+ eeprom_trace(CSR(CSR_EEPROM, eedo), di, e->opcode, 0, 0);
+ if ( e->val_len == 2 )
+ {
+ e->opcode = e->val;
+ e->val = 0;
+ e->val_len = 0;
+ e->val_type = ADDR;
+
+ eeprom_trace(0,0,0,ADDR,1);
+ logout("EEPROM get opcode(opcode name=%s,opcode=%#x
)\n",
+ EEPROM_OPCODE_NAME(e->opcode), e->opcode);
+ }
+ break;
+ // Get address
+ case ADDR:
+ eeprom_trace(CSR(CSR_EEPROM, eedo), di, e->opcode, 0, 0);
+ if ( e->val_len == e->addr_len )
+ {
+ e->address = e->val;
+ e->val = 0;
+ e->val_len = 0;
+ e->val_type = DATA;
+
+ // We prepare data eary for later read operation
+ if ( e->opcode == EEPROM_READ )
+ {
+ memcpy(&e->data, (void *)(e->contents +
e->address),
+ sizeof(e->data));
+ logout("EEPROM prepare data to
read(addr=%#x,data=%#x)\n",
+ e->address, e->data);
+ }
+
+ // Write dummy 0 to response to driver the address is
written complete
+ CSR(CSR_EEPROM, eedo) = 0;
+ eeprom_trace(0,0,0,DATA,1);
+ logout("EEPROM get address(addr=%#x)\n", e->address);
+ }
+ break;
+ // Only do data out operation
+ case DATA:
+ if ( e->opcode == EEPROM_READ )
+ {
+ // Start from the most significant bit
+ //uint16_t t = ((e->data & (1<<(sizeof(e->data)*8 -
e->val_len - 1))) != 0);
+ uint16_t t = !!(e->data & (0x8000U >> e->val_len));
+
+ CSR(CSR_EEPROM, eedo) = t;
+
+ logout("EEPROM read(reg address=%#x, reg val=%#x,
do=%#x, len=%#x)\n",
+ e->address, e->data, t, e->val_len);
+
+ if ( e->val_len > sizeof(e->data)*8 )
+ {
+ /* Driver may do more write op to de-assert EESK,
+ * So we let EEPROM go to idle after a register be
+ * read complete
+ */
+ e->val_type = NOP;
+ logout("Read complete\n");
+
+ break;
+ }
+
+ e->val_len ++;
+ }
+ eeprom_trace(CSR(CSR_EEPROM, eedo), di, e->opcode, 0, 0);
+ // Do eerpom write when CS de-assert
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ return;
+}
+
+
+static void scb_eeprom_func(E100State *s, uint32_t val, int dir)
+{
+ int eecs = ((val & EEPROM_CS) != 0);
+ int eesk = ((val & EEPROM_SK) != 0);
+ int eedi = ((val & EEPROM_DI) != 0);
+
+ logout("EEPROM: Old(cs=%#x, sk=%#x), New(cs=%#x, sk=%#x, di=%#x)\n",
+ s->eeprom.cs, s->eeprom.sk, eecs, eesk, eedi);
+
+ do_eeprom_op(s, &s->eeprom, eecs, eesk, eedi, dir);
+
+ return;
+}
+
+static void e100_ru_command(E100State *s, uint8_t val)
+{
+ switch ( val )
+ {
+ case RU_NOP:
+ /* Will not be here */
+ break;
+ case RU_START:
+ /* RU start */
+
+ SET_RU_STATE(RU_READY);
+ logout("RU is set to ready\n");
+ s->ru_offset = CSR_VAL(CSR_POINTER);
+ logout("RFD offset is at %#x\n", s->ru_offset);
+ break;
+ case RU_RESUME:
+ /* RU Resume */
+ if ( GET_RU_STATE == RU_SUSPENDED )
+ SET_RU_STATE(RU_READY);
+ logout("RU resume to ready\n");
+ break;
+ case RU_ADDR_LOAD:
+ /* Load RU base */
+ s->ru_base = CSR_VAL(CSR_POINTER);
+ logout("Load RU base address at %#x\n", s->ru_base);
+ break;
+ case RU_DMA_REDIRECT:
+ logout("RU DMA redirect not implemented\n");
+ break;
+ case RU_ABORT:
+ e100_interrupt(s, INT_RNR);
+ SET_RU_STATE(RU_IDLE);
+ logout("RU abort, go to idle\n");
+ break;
+ case RU_LOAD_HDS:
+ logout("RU load header data size(HDS) not implemented\n");
+ default:
+ break;
+ }
+}
+
+// This function will change CU's state, so CU start and
+// CU resume must set CU's state before it
+static void e100_execute_cb_list(E100State *s, int is_resume)
+{
+
+ struct control_block cb = {0};
+ uint32_t cb_addr;
+
+ if ( !is_resume )
+ s->cu_offset = CSR_VAL(CSR_POINTER);
+
+ /* If call from CU resume, cu_offset has been set */
+
+ while (1)
+ {
+ cb_addr = s->cu_base + s->cu_offset;
+ cpu_physical_memory_read(cb_addr, (uint8_t *)&cb, sizeof(cb));
+
+
+ switch ( cb.cmd )
+ {
+ case CBL_NOP:
+ /* Do nothing */
+ break;
+ case CBL_IASETUP:
+ cpu_physical_memory_read(cb_addr + 8, &s->macaddr[0],
sizeof(s->macaddr));
+ e100_dump("Setup Individual Address:", &s->macaddr[0], 6);
+ break;
+ case CBL_CONFIGURE:
+ {
+ i82557_cfg_t *cfg = &s->config;
+
+ assert(sizeof(s->config) == 22);
+ cpu_physical_memory_read(cb_addr + 8, (uint8_t *)cfg,
sizeof(s->config));
+ logout("Setup card configuration:"
+ "\tbyte count:%d\n"
+ "\tRx FIFO limit:%d\n"
+ "\tTx FIFO limit:%d\n"
+ "\tAdaptive interframe spacing:%d\n"
+ "\tRx DMA max:%d\n"
+ "\tTX DMA max:%d\n"
+ "\tDMBC enable:%d\n"
+ "\tLate SCB:%d\n"
+ "\tTNO:%d\n"
+ "\tCI:%d\n"
+ "\tDiscard overrun RX:%d\n"
+ "\tSave bad frame:%d\n"
+ "\tDiscard short RX:%d\n"
+ "\tunderrun retry:%d\n"
+ "\tMII:%d\n"
+ "\tNSAI:%d\n"
+ "\tPreamble len:%d\n"
+ "\tloopback:%d\n"
+ "\tliner pro:%d\n"
+ "\tPRI mode:%d\n"
+ "\tinterframe spacing:%d\n"
+ "\tpromiscuous:%d\n"
+ "\tbroadcast dis:%d\n"
+ "\tCRS CDT:%d\n"
+ "\tstripping:%d\n"
+ "\tpadding:%d\n"
+ "\tRX crc:%d\n"
+ "\tforce fdx:%d\n"
+ "\tfdx enable:%d\n"
+ "\tmultiple IA:%d\n"
+ "\tmulticast all:%d\n",
+ cfg->byte_count, cfg->rx_fifo_limit,
cfg->tx_fifo_limit,
+ cfg->adpt_inf_spacing, cfg->rx_dma_max_bytes,
cfg->tx_dma_max_bytes,
+ cfg->dmbc_en, cfg->late_scb, cfg->tno_intr,
cfg->ci_intr,
+ cfg->dis_overrun_rx, cfg->save_bad_frame,
cfg->dis_short_rx,
+ cfg->underrun_retry, cfg->mii, cfg->nsai,
cfg->preamble_len,
+ cfg->loopback, cfg->linear_prio, cfg->pri_mode,
cfg->interframe_spacing,
+ cfg->promiscuous, cfg->broadcast_dis, cfg->crs_cdt,
cfg->strip,
+ cfg->padding, cfg->rx_crc, cfg->force_fdx, cfg->fdx_en,
+ cfg->mul_ia, cfg->mul_all);
+ }
+ break;
+ case CBL_MULTCAST_ADDR_SETUP:
+ {
+ uint16_t mult_list_count = 0;
+ uint16_t size = 0;
+
+ cpu_physical_memory_read(cb_addr + 8, (uint8_t
*)&mult_list_count, 2);
+ mult_list_count = (mult_list_count << 2) >> 2;
+
+ if ( !mult_list_count )
+ {
+ logout("Multcast disabled(multicast count=0)\n");
+ s->is_multcast_enable = 0;
+ memset(s->mult_list, 0x0, sizeof(s->mult_list));
+ break;
+ }
+ size = mult_list_count > sizeof(s->mult_list) ?
+ sizeof(s->mult_list) : mult_list_count;
+ cpu_physical_memory_read(cb_addr + 12, &s->mult_list[0],
size);
+
+ e100_dump("Setup Multicast list: ", &s->mult_list[0],
size);
+ break;
+ }
+ case CBL_TRANSMIT:
+ {
+ struct
+ {
+ struct control_block cb;
+ tbd_t tbd;
+ } __attribute__ ((packed)) tx;
+
+ struct
+ {
+ uint32_t addr;
+ uint16_t size;
+ uint16_t is_el_set;
+ } tx_buf = {0};
+
+ uint32_t tbd_array;
+ uint16_t tcb_bytes;
+ uint8_t sf;
+ int len = s->pkt_buf_len;
+
+ assert( len < sizeof(s->pkt_buf));
+
+ cpu_physical_memory_read(cb_addr, (uint8_t *)&tx,
sizeof(tx));
+ tbd_array = le32_to_cpu(tx.tbd.tx_desc_addr);
+ tcb_bytes = le16_to_cpu(tx.tbd.tcb_bytes);
+ // Indicate use what mode to transmit(simple or flexible)
+ sf = tx.cb.rs3 & 0x1;
+
+ logout("Get a TBD:\n"
+ "\tTBD array address:%#x\n"
+ "\tTCB byte count:%#x\n"
+ "\tEOF:%#x\n"
+ "\tTransmit Threshold:%#x\n"
+ "\tTBD number:%#x\n"
+ "\tUse %s mode to send frame\n",
+ tbd_array, tcb_bytes, tx.tbd.eof,
+ tx.tbd.tx_threshold, tx.tbd.tbd_num,
+ sf ? "Flexible" : "Simple");
+
+ if ( !sf || tbd_array == (uint32_t)-1 )
+ {
+ /* Simple mode */
+
+ /* For simple mode, TCB bytes should not be zero.
+ * But we still check here for safety
+ */
+ if ( !tcb_bytes || tcb_bytes > sizeof(s->pkt_buf) )
+ break;
+
+ cpu_physical_memory_read(cb_addr+16, &s->pkt_buf[0],
tcb_bytes);
+ len = tcb_bytes;
+ logout("simple mode(size=%d)\n", len);
+
+ }
+ else
+ {
+ /* Flexible mode */
+
+ /* For flexible mode, TBD num should not be zero.
+ * But we still check here for safety
+ */
+ if ( !tx.tbd.tbd_num )
+ break;
+
+ // I82557 don't support extend TCB
+ if ( s->device == i82557C || s->device == i82557B )
+ {
+ /* Standard TCB mode */
+
+ int i;
+
+ for ( i=0; i<tx.tbd.tbd_num; i++ )
+ {
+
+ cpu_physical_memory_read(tbd_array, (uint8_t
*)&tx_buf,
+ sizeof(tx_buf));
+ tx_buf.is_el_set &= 0x1;
+ tx_buf.size &= 0x7fff;
+ tbd_array += 8;
+
+ if ( tx_buf.size > sizeof(s->pkt_buf) - len )
+ {
+ logout("Warning: Get a too big TBD, ignore
it"
+ "(buf addr %#x, size %d,
el:%#x)\n",
+ tx_buf.addr, tx_buf.size,
tx_buf.is_el_set);
+ continue;
+ }
+
+ cpu_physical_memory_read(tx_buf.addr,
&s->pkt_buf[len],
+ tx_buf.size);
+
+ logout("TBD (standard mode): buf addr %#x,
size %d, el:%#x\n",
+ tx_buf.addr, tx_buf.size,
tx_buf.is_el_set);
+ len += tx_buf.size;
+
+ if ( tx_buf.is_el_set )
+ break;
+ }
+
+ }
+ //FIXME: Extend mode is not be tested
+ else
+ {
+ /* Extend TCB mode */
+
+ /* A strandard TCB followed by two TBDs */
+ uint32_t tbd_addr = cb_addr+16;
+ int i = 0;
+
+
+ for ( ; i<2 && i<tx.tbd.tbd_num; i++ )
+ {
+
+ cpu_physical_memory_read(tbd_array, (uint8_t
*)&tx_buf,
+ sizeof(tx_buf));
+ tx_buf.is_el_set &= 0x1;
+ tbd_addr += 8;
+
+ /* From Intel's spec, size of TBD equal to zero
+ * has same effect with EL bit set
+ */
+ if ( tx_buf.size == 0 )
+ {
+ tx_buf.is_el_set = 1;
+ break;
+ }
+
+ if ( tx_buf.size + len > sizeof(s->pkt_buf) )
+ {
+ logout("TX frame is too large, discarding
it"
+ "(buf addr=%#x, size=%#x)\n",
tx_buf.addr,
+ tx_buf.size);
+ //continue;
+ break;
+ }
+
+ logout("TBD (extended mode): buf addr %#08x,
size %#04x, el:%#x\n",
+ tx_buf.addr, tx_buf.size,
tx_buf.is_el_set);
+ cpu_physical_memory_read(tx_buf.addr,
&s->pkt_buf[len],
+ tx_buf.size);
+
+ len += tx_buf.size;
+
+ if ( tx_buf.is_el_set )
+ break;
+ }
+
+ /* In extend TCB mode, TDB array point to the
thrid TBD
+ * if it is not NULL(0xffffffff) and EL bit of
before
+ * two TBDs is not set
+ */
+ if ( tbd_array != (uint32_t)-1 &&
!tx_buf.is_el_set )
+ {
+ tbd_addr = tbd_array;
+
+ /* TBD number includes first two TBDs, so don't
+ * initialize i here
+ */
+ for ( ; i<tx.tbd.tbd_num; i++ )
+ {
+ cpu_physical_memory_read(tbd_addr,
(uint8_t *)&tx_buf,
+ sizeof(tx_buf));
+ tx_buf.is_el_set &= 0x1;
+ tbd_addr += 8;
+
+ cpu_physical_memory_read(tx_buf.addr,
&s->pkt_buf[len],
+ tx_buf.size);
+ logout("TBD (extended mode): buf addr
0x%#08x, size 0x%#04x\n",
+ tx_buf.addr, tx_buf.size);
+
+ len += tx_buf.size;
+
+ if ( tx_buf.is_el_set )
+ break;
+ }
+ }
+ }
+ }
+
+
+ s->pkt_buf_len = len;
+
+/* Below codes are used for Threshold. But with these logic, network of guest
+ * getting bad performance. So I comment it and leave codes here to hope anyone
+ * fix it
+ */
+#if 0
+ /* If threshold is set, only send packet when threshold
+ * bytes are read
+ */
+ if ( tx.tbd.tx_threshold && s->pkt_buf_len <
tx.tbd.tx_threshold * 8 )
+ {
+ logout("Current data length in FIFO buffer:%d\n",
s->pkt_buf_len);
+ break;
+ }
+#endif
+
+ if ( s->pkt_buf_len )
+ {
+ qemu_send_packet(s->vc, s->pkt_buf, s->pkt_buf_len);
+ s->statistics.tx_good_frames ++;
+ logout("Send out frame successful(size=%d,"
+ "already sent %d frames)\n", s->pkt_buf_len,
+ s->statistics.tx_good_frames);
+ s->pkt_buf_len = 0;
+ }
+
+ e100_dump("Dest addr:", (uint8_t *)s->pkt_buf, 6);
+ e100_dump("Src addr:", (uint8_t *)(s->pkt_buf+6), 6);
+ e100_dump("type:", (uint8_t *)(s->pkt_buf+8), 2);
+
+ break;
+ }
+ case CBL_LOAD_MICROCODE:
+#ifdef DEBUG_E100
+ {
+ /* Don't support load marco code, just dump it */
+ #define MICRO_CODE_LEN 256
+ uint8_t micro_code[MICRO_CODE_LEN] = {0};
+ cpu_physical_memory_read(cb_addr+8, micro_code,
MICRO_CODE_LEN);
+ e100_dump("Load micro code:", micro_code, MICRO_CODE_LEN);
+ }
+#endif
+ break;
+ case CBL_DUMP:
+ logout("Control block dump\n");
+ break;
+ case CBL_DIAGNOSE:
+ logout("Control block diagnose\n");
+ break;
+ default:
+ logout("Unknown Control block command(val=%#x)\n", cb.cmd);
+ break;
+ }
+
+ /* Now, we finished executing a command, update status of CB.
+ * We always success
+ */
+ cb.c = 1;
+ cb.ok = 1;
+ // Only update C bit and OK bit field in TCB
+ cpu_physical_memory_write(cb_addr, (uint8_t *)&cb, 2);
+
+ logout("Finished a command from CB list:\n"
+ "\tok:%d\n"
+ "\tc:%d\n"
+ "\tcommand name:%s(cmd=%#x)\n"
+ "\ti:%d\n"
+ "\ts:%d\n"
+ "\tel:%d\n"
+ "\tlink address:%#x\n",
+ cb.ok, cb.c, CB_CMD_NAME(cb.cmd), cb.cmd,
+ cb.i, cb.s, cb.el, cb.link_addr);
+
+ if ( cb.i )
+ e100_interrupt(s, (uint16_t)INT_CX_TNO);
+
+ // Suspend CU
+ if ( cb.s )
+ {
+ logout("CU go to suspend\n");
+ SET_CU_STATE(CU_SUSPENDED);
+ s->cu_next = cb.link_addr; // Save it for go on executing when
resume
+
+ // Trigger CNA interrupt only when CNA mode is configured
+ if ( !(s->config.ci_intr) && cb.i )
+ e100_interrupt(s, (uint16_t)INT_CNA);
+
+ return;
+ }
+
+ // This is last command in CB list, CU go back to IDLE
+ if ( cb.el )
+ {
+ logout("Command block list is empty, CU go to idle\n");
+ SET_CU_STATE(CU_IDLE);
+ /* Either in CNA mode or CI mode, interrupt need be triggered
+ * when CU go to idle.
+ */
+ if ( cb.i )
+ e100_interrupt(s, (uint16_t)INT_CNA);
+
+ return;
+ }
+
+ s->cu_offset = le32_to_cpu(cb.link_addr); // get next CB offset
+ }
+}
+
+static void dump_statistics(E100State * s, uint32_t complete_word)
+{
+ /* Dump statistical data. Most data is never changed by the emulation
+ * and always 0.
+ */
+ s->statistics.complete_word = complete_word;
+ cpu_physical_memory_write(s->statsaddr, (uint8_t *)&s->statistics,
sizeof(s->statistics));
+
+}
+
+static void e100_cu_command(E100State *s, uint8_t val)
+{
+
+ switch ( val )
+ {
+ case CU_NOP:
+ /* Will not be here */
+ break;
+ case CU_START:
+ /* This strictly follow Intel's spec */
+ if ( GET_CU_STATE != CU_IDLE && GET_CU_STATE != CU_SUSPENDED )
+ {
+ logout("Illegal CU start command. Device is not idle or
suspend\n");
+ return;
+ }
+
+ SET_CU_STATE(CU_LPQ_ACTIVE);
+ logout("CU start\n");
+
+ e100_execute_cb_list(s, 0);
+ break;
+ case CU_RESUME:
+ {
+ uint32_t previous_cb = s->cu_base + s->cu_offset;
+ struct control_block cb;
+
+ /* Resume from suspend */
+
+ /* FIXME:From Intel's spec, CU resume from idle is
+ * forbidden, but e100 drive in linux
+ * indeed do this.
+ */
+ if ( GET_CU_STATE == CU_IDLE )
+ {
+ logout("Illegal resume form IDLE\n");
+ }
+
+ cpu_physical_memory_read(previous_cb, (uint8_t *)&cb,
+ sizeof(cb));
+
+ //FIXME: Need any speical handle when CU is active ?
+
+ /* Driver must clean S bit in previous CB when
+ * it issue CU resume command
+ */
+ if ( cb.s )
+ {
+ logout("CU still in suspend\n");
+ break;
+ }
+
+ SET_CU_STATE(CU_LPQ_ACTIVE);
+ if ( cb.el )
+ {
+ logout("CB list is empty, CU just go to active\n");
+ break;
+ }
+
+ // Continue next command
+ s->cu_offset = s->cu_next;
+
+ e100_execute_cb_list(s, 1);
+
+ logout("CU resume\n");
+ }
+ break;
+ case CU_STATSADDR:
+ /* Load dump counters address */
+ s->statsaddr = CSR_VAL(CSR_POINTER);
+ logout("Load Stats address at %#x\n", s->statsaddr);
+ break;
+ case CU_SHOWSTATS:
+ /* Dump statistical counters */
+ dump_statistics(s, 0xa005);
+ logout("Execute dump statistics\n");
+ break;
+ case CU_CMD_BASE:
+ /* Load CU base */
+ s->cu_base = CSR_VAL(CSR_POINTER);
+ logout("Load CU base at %x\n", s->cu_base);
+ break;
+ case CU_DUMPSTATS:
+ /* Dump statistical counters and reset counters. */
+ dump_statistics(s, 0xa007);
+ memset(&s->statistics, 0x0, sizeof(s->statistics));
+ logout("Execute dump and reset statistics\n");
+ break;
+ case CU_S_RESUME:
+ /* CU static resume */
+ logout("CU static resume is not implemented\n");
+ break;
+ default:
+ logout("Unknown CU command(val=%#x)\n", val);
+ break;
+ }
+
+}
+
+static void scb_cmd_func(E100State *s, uint16_t val, int dir)
+{
+ /* ignore NOP operation */
+ if ( val & 0x0f )
+ {
+ e100_ru_command(s, val & 0x0f);
+ CSR(CSR_CMD, ru_cmd) = 0;
+ }
+ else if ( val & 0xf0 )
+ {
+ e100_cu_command(s, val & 0xf0);
+ CSR(CSR_CMD, cu_cmd) = 0;
+ }
+
+}
+
+enum
+{
+ WRITEB,
+ WRITEW,
+ WRITEL,
+ OP_IS_READ,
+} WRITE_BYTES;
+
+/* Driver may issue a command by writting one 32bit-entry,
+ * two 16bit-entries or four 8bit-entries. In late two case, we
+ * must wait until driver finish writting to the highest byte. The parameter
+ * 'bytes' means write action of driver(writeb, wirtew, wirtel)
+ */
+static void e100_execute(E100State *s, uint32_t addr_offset,
+ uint32_t val, int dir, int bytes)
+{
+
+ switch ( addr_offset )
+ {
+ case SCB_STATUS:
+ if ( bytes == WRITEB )
+ break;
+ case SCB_ACK:
+ if ( dir == OP_WRITE )
+ {
+ uint8_t _val = 0;
+ if ( bytes == WRITEB )
+ _val = (uint8_t)val;
+ else if ( bytes == WRITEW )
+ _val = ((uint16_t)val) >> 8;
+ else if ( bytes == WRITEL)
+ {
+ // This should not be happen
+ _val = ((uint16_t)val) >> 8;
+ logout("WARNNING: Drvier write 4 bytes to CSR register at
offset %d,"
+ "emulator may do things wrong!!!\n", addr_offset);
+ }
+
+ e100_interrupt_ack(s, _val);
+ }
+ break;
+ case SCB_CMD:
+ if ( dir == OP_WRITE )
+ scb_cmd_func(s, val, dir);
+
+/* I don't know whether there is any driver writes command words and
+ * interrupt mask at same time by two bytes. This is not a regular operation.
+ * but if we meet the case, below codes could copy with it. As far
+ * as I know. windows's and linux's driver don't do this thing.
+ */
+#if 0
+ if ( bytes == WRITEW && (val&0xff00) != 0 )
+ ;
+ else
+ break;
+#endif
+ break;
+ case SCB_INTERRUPT_MASK:
+ if ( dir == OP_WRITE )
+ {
+ uint8_t _val = 0;
+ if ( bytes == WRITEB )
+ _val = (uint8_t)val;
+ else if ( bytes == WRITEW )
+ _val = (val & 0xff00) >> 8;
+ else
+ logout("WARNNING: Drvier write 4 bytes to CSR register at
offset %d,"
+ "emulator may do things wrong!!!\n", addr_offset);
+
+ // Driver generates a software interrupt
+ if ( _val & BIT(1) )
+ e100_interrupt(s, INT_SWI);
+ }
+ break;
+ case SCB_PORT ... SCB_PORT + 3:
+ if ( dir == OP_WRITE )
+ {
+ // Waitting for driver write to the highest byte
+ if ( (bytes == WRITEB && addr_offset != SCB_PORT + 3) ||
+ (bytes == WRITEW && addr_offset != SCB_PORT + 2) )
+ break;
+
+ scb_port_func(s, CSR_VAL(CSR_PORT), dir);
+ }
+ break;
+ case SCB_MDI ... SCB_MDI + 3:
+ if ( dir == OP_WRITE )
+ {
+ // Waitting for driver write to the highest byte
+ if ( (bytes == WRITEB && addr_offset != SCB_MDI + 3) ||
+ (bytes == WRITEW && addr_offset != SCB_MDI + 2) )
+ break;
+ }
+
+ scb_mdi_func(s, CSR_VAL(CSR_MDI), dir);
+ break;
+ case SCB_EEPROM:
+ if ( dir == OP_WRITE )
+ scb_eeprom_func(s, val, dir);
+ // Nothing need do when driver read EEPROM registers of CSR
+ break;
+ case SCB_POINTER:
+ break;
+ default:
+ logout("Driver operate on CSR reg(offset=%#x,dir=%s,val=%#x)\n",
+ addr_offset, dir==OP_WRITE?"write":"read", val);
+ }
+
+}
+
+/* MMIO access functions */
+static uint8_t e100_read1(E100State * s, uint32_t addr_offset)
+{
+ uint8_t val = -1;
+
+ if ( addr_offset + sizeof(val) >= sizeof(s->pci_mem.mem) )
+ {
+ logout("Invaild read, beyond memory boundary(addr:%#x)\n", addr_offset
+ + s->region_base_addr[CSR_MEMORY_BASE]);
+ return val;
+ }
+
+
+ e100_execute(s, addr_offset, val, OP_READ, OP_IS_READ);
+ val = CSR_READ(addr_offset, uint8_t);
+ logout("READ1: Register name = %s, addr_offset = %#x, val=%#x\n",
SCBNAME(addr_offset), addr_offset, val);
+
+ return val;
+}
+
+static uint16_t e100_read2(E100State * s, uint32_t addr_offset)
+{
+ uint16_t val = -1;
+
+ if ( addr_offset + sizeof(val) >= sizeof(s->pci_mem.mem) )
+ {
+ logout("Invaild read, beyond memory boundary(addr:%#x)\n", addr_offset
+ + s->region_base_addr[CSR_MEMORY_BASE]);
+ return val;
+ }
+
+ e100_execute(s, addr_offset, val, OP_READ, OP_IS_READ);
+ val = CSR_READ(addr_offset, uint16_t);
+ logout("READ2: Register name = %s, addr_offset = %#x, val=%#x\n",
SCBNAME(addr_offset), addr_offset, val);
+
+ return val;
+
+}
+
+static uint32_t e100_read4(E100State * s, uint32_t addr_offset)
+{
+ uint32_t val = -1;
+
+ if ( addr_offset + sizeof(val) >= sizeof(s->pci_mem.mem) )
+ {
+ logout("Invaild read, beyond memory boundary(addr:%#x)\n", addr_offset
+ + s->region_base_addr[CSR_MEMORY_BASE]);
+ return val;
+ }
+
+ e100_execute(s, addr_offset, val, OP_READ, OP_IS_READ);
+ val = CSR_READ(addr_offset, uint32_t);
+ logout("READ4: Register name = %s, addr_offset = %#x, val=%#x\n",
SCBNAME(addr_offset), addr_offset, val);
+
+ return val;
+
+}
+
+static uint32_t pci_mmio_readb(void *opaque, target_phys_addr_t addr)
+{
+ E100State *s = opaque;
+ addr -= s->region_base_addr[CSR_MEMORY_BASE];
+ return e100_read1(s, addr);
+}
+
+static uint32_t pci_mmio_readw(void *opaque, target_phys_addr_t addr)
+{
+ E100State *s = opaque;
+ addr -= s->region_base_addr[CSR_MEMORY_BASE];
+ return e100_read2(s, addr);
+}
+
+static uint32_t pci_mmio_readl(void *opaque, target_phys_addr_t addr)
+{
+ E100State *s = opaque;
+ addr -= s->region_base_addr[CSR_MEMORY_BASE];
+ return e100_read4(s, addr);
+}
+
+static CPUReadMemoryFunc *pci_mmio_read[] = {
+ pci_mmio_readb,
+ pci_mmio_readw,
+ pci_mmio_readl
+};
+
+static void e100_write1(E100State * s, uint32_t addr_offset, uint8_t val)
+{
+ if ( addr_offset + sizeof(val) >= sizeof(s->pci_mem.mem) )
+ {
+ logout("Invaild write, beyond memory boundary(addr = %#x, val =
%#x\n", addr_offset
+ + s->region_base_addr[CSR_MEMORY_BASE], val);
+ return;
+ }
+
+ // SCB stauts is read-only word, can not be directly write
+ if ( addr_offset == SCB_STATUS )
+ {
+ return;
+ }
+ // EEDO bit of eeprom register is read-only, can not be written;
+ else if ( addr_offset == SCB_EEPROM )
+ {
+ int eedo = BIT(3) & CSR_VAL(CSR_EEPROM);
+ CSR_WRITE(addr_offset, val, uint8_t);
+ CSR(CSR_EEPROM, eedo) = !!(eedo & EEPROM_DO);
+
+ logout("WRITE1: Register name = %s, addr_offset = %#x, val = %#x\n",
SCBNAME(addr_offset),addr_offset, (uint8_t)CSR_VAL(CSR_EEPROM));
+ return;
+ }
+ else
+ {
+ CSR_WRITE(addr_offset, val, uint8_t);
+ }
+
+ logout("WRITE1: Register name = %s, addr_offset = %#x, val = %#x\n",
SCBNAME(addr_offset),addr_offset, val);
+ return;
+}
+
+static void e100_write2(E100State * s, uint32_t addr_offset, uint16_t val)
+{
+ if ( addr_offset + sizeof(val) >= sizeof(s->pci_mem.mem) )
+ {
+ logout("Invaild write, beyond memory boundary(addr = %#x, val =
%#x\n", addr_offset
+ + s->region_base_addr[CSR_MEMORY_BASE], val);
+ return;
+ }
+
+ // SCB stauts is readonly word, can not be directly write
+ if ( addr_offset == SCB_STATUS )
+ {
+ uint8_t __val = val >> 8;
+ CSR_WRITE(addr_offset+1, __val, uint8_t);
+ }
+ // EEDO bit of eeprom register is read-only, can not be written;
+ else if ( addr_offset == SCB_EEPROM )
+ {
+ int eedo = BIT(3) & CSR_VAL(CSR_EEPROM);
+ CSR_WRITE(addr_offset, val, uint16_t);
+ CSR(CSR_EEPROM, eedo) = !!(eedo & EEPROM_DO);
+
+ logout("WRITE1: Register name = %s, addr_offset = %#x, val = %#x\n",
SCBNAME(addr_offset),addr_offset, CSR_VAL(CSR_EEPROM));
+ return;
+ }
+ else
+ {
+ CSR_WRITE(addr_offset, val, uint16_t);
+ }
+
+ logout("WRITE2: Register name = %s, addr_offset = %#x, val = %#x\n",
SCBNAME(addr_offset),addr_offset, val);
+ return;
+}
+
+static void e100_write4(E100State * s, uint32_t addr_offset, uint32_t val)
+{
+ if ( addr_offset + sizeof(val) >= sizeof(s->pci_mem.mem) )
+ {
+ logout("Invaild write, beyond memory boundary(addr = %#x, val =
%#x\n", addr_offset
+ + s->region_base_addr[CSR_MEMORY_BASE], val);
+ return;
+ }
+
+ // SCB stauts is readonly word, can not be directly write
+ if ( addr_offset == SCB_STATUS )
+ {
+ uint8_t __val[4] = {0};
+
+ //FIXME: any un-aligned reference ?
+ *(uint32_t *)&__val = val;
+
+ CSR_WRITE(addr_offset+1, __val[1], uint8_t);
+ CSR_WRITE(addr_offset+2, __val[2], uint8_t);
+ CSR_WRITE(addr_offset+3, __val[3], uint8_t);
+ }
+ /* No write4 opertaion on EEPROM register */
+ else
+ {
+ CSR_WRITE(addr_offset, val, uint32_t);
+ }
+
+ logout("WRITE4: Register name = %s, addr_offset = %#x, val = %#x\n",
SCBNAME(addr_offset),addr_offset, val);
+ return;
+}
+
+static void pci_mmio_writeb(void *opaque, target_phys_addr_t addr, uint32_t
val)
+{
+ E100State *s = opaque;
+ addr -= s->region_base_addr[CSR_MEMORY_BASE];
+ e100_write1(s, addr, val);
+ e100_execute(s, addr, val, OP_WRITE, WRITEB);
+}
+
+static void pci_mmio_writew(void *opaque, target_phys_addr_t addr, uint32_t
val)
+{
+ E100State *s = opaque;
+ addr -= s->region_base_addr[CSR_MEMORY_BASE];
+ e100_write2(s, addr, val);
+ e100_execute(s, addr, val, OP_WRITE, WRITEW);
+}
+
+static void pci_mmio_writel(void *opaque, target_phys_addr_t addr, uint32_t
val)
+{
+ E100State *s = opaque;
+ addr -= s->region_base_addr[CSR_MEMORY_BASE];
+ e100_write4(s, addr, val);
+ (void)e100_execute(s, addr, val, OP_WRITE, WRITEL);
+}
+
+static CPUWriteMemoryFunc *pci_mmio_write[] = {
+ pci_mmio_writeb,
+ pci_mmio_writew,
+ pci_mmio_writel
+};
+
+static void pci_mmio_map(PCIDevice * pci_dev, int region_num,
+ uint32_t addr, uint32_t size, int type)
+{
+ PCIE100State *d = (PCIE100State *) pci_dev;
+
+ logout("region %d, addr=0x%08x, size=0x%08x, type=%d\n",
+ region_num, addr, size, type);
+
+ if ( region_num == CSR_MEMORY_BASE ) {
+ /* Map control / status registers. */
+ cpu_register_physical_memory(addr, size, d->e100.mmio_index);
+ d->e100.region_base_addr[region_num] = addr;
+ }
+}
+
+/* IO access functions */
+static void ioport_write1(void *opaque, uint32_t addr, uint32_t val)
+{
+ E100State *s = opaque;
+ addr -= s->region_base_addr[CSR_IO_BASE];
+ e100_write1(s, addr, val);
+ (void)e100_execute(s, addr, (uint32_t)val, OP_WRITE, WRITEB);
+}
+
+static void ioport_write2(void *opaque, uint32_t addr, uint32_t val)
+{
+ E100State *s = opaque;
+ addr -= s->region_base_addr[CSR_IO_BASE];
+ e100_write2(s, addr, val);
+ (void)e100_execute(s, addr, (uint32_t)val, OP_WRITE, WRITEW);
+}
+
+static void ioport_write4(void *opaque, uint32_t addr, uint32_t val)
+{
+ E100State *s = opaque;
+ addr -= s->region_base_addr[CSR_IO_BASE];
+ e100_write4(s, addr, val);
+ (void)e100_execute(s, addr, (uint32_t)val, OP_WRITE, WRITEL);
+}
+
+static uint32_t ioport_read1(void *opaque, uint32_t addr)
+{
+ E100State *s = opaque;
+ addr -= s->region_base_addr[CSR_IO_BASE];
+ return e100_read1(s, addr);
+}
+
+static uint32_t ioport_read2(void *opaque, uint32_t addr)
+{
+ E100State *s = opaque;
+ addr -= s->region_base_addr[CSR_IO_BASE];
+ return e100_read2(s, addr);
+}
+
+static uint32_t ioport_read4(void *opaque, uint32_t addr)
+{
+ E100State *s = opaque;
+ addr -= s->region_base_addr[CSR_IO_BASE];
+ return e100_read4(s, addr);
+}
+
+static void pci_ioport_map(PCIDevice * pci_dev, int region_num,
+ uint32_t addr, uint32_t size, int type)
+{
+ PCIE100State *d = (PCIE100State *) pci_dev;
+ E100State *s = &d->e100;
+
+ logout("region %d, addr=0x%08x, size=0x%08x, type=%d\n",
+ region_num, addr, size, type);
+
+ if ( region_num != 1 )
+ {
+ logout("Invaid region number!\n");
+ return;
+ }
+
+ register_ioport_write(addr, size, 1, ioport_write1, s);
+ register_ioport_read(addr, size, 1, ioport_read1, s);
+ register_ioport_write(addr, size, 2, ioport_write2, s);
+ register_ioport_read(addr, size, 2, ioport_read2, s);
+ register_ioport_write(addr, size, 4, ioport_write4, s);
+ register_ioport_read(addr, size, 4, ioport_read4, s);
+
+ s->region_base_addr[region_num] = addr;
+}
+
+/* From FreeBSD */
+#define POLYNOMIAL 0x04c11db6
+static int compute_mcast_idx(const uint8_t *ep)
+{
+ uint32_t crc;
+ int carry, i, j;
+ uint8_t b;
+
+ crc = 0xffffffff;
+ for (i = 0; i < 6; i++) {
+ b = *ep++;
+ for (j = 0; j < 8; j++) {
+ carry = ((crc & 0x80000000L) ? 1 : 0) ^ (b & 0x01);
+ crc <<= 1;
+ b >>= 1;
+ if (carry)
+ crc = ((crc ^ POLYNOMIAL) | carry);
+ }
+ }
+ return (crc >> 26);
+}
+
+/* Eerpro100 receive functions */
+static int e100_can_receive(void *opaque)
+{
+ E100State *s = opaque;
+
+ int is_ready = (GET_RU_STATE == RU_READY);
+ logout("%s\n", is_ready ? "EEPro100 receiver is ready"
+ : "EEPro100 receiver is not ready");
+ return is_ready;
+}
+
+static void e100_receive(void *opaque, const uint8_t * buf, int size)
+{
+ E100State *s = opaque;
+ uint32_t rfd_addr = 0;
+ rfd_t rfd = {0};
+
+
+ if ( GET_RU_STATE != RU_READY )
+ {
+ //logout("RU is not ready. Begin discarding frame(state=%x)\n",
GET_RU_STATE);
+ return;
+ }
+
+ rfd_addr = s->ru_base + s->ru_offset;
+ cpu_physical_memory_read(rfd_addr, (uint8_t *)&rfd, sizeof(rfd_t));
+
+ if ( (size > MAX_ETH_FRAME_SIZE+4) )
+ {
+ /* Long frame and configuration byte 18/3 (long receive ok) not set:
+ * Long frames are discarded. */
+ logout("Discard long frame(size=%d)\n", size);
+
+ return;
+ }
+ else if ( !memcmp(buf, s->macaddr, sizeof(s->macaddr)) )
+ {
+ /* The frame is for me */
+ logout("Receive a frame for me(size=%d)\n", size);
+ e100_dump("FRAME:", (uint8_t *)buf, size);
+ }
+ else if ( !memcmp(buf, broadcast_macaddr, sizeof(broadcast_macaddr)) )
+ {
+ if ( s->config.broadcast_dis && !s->config.promiscuous )
+ {
+ logout("Discard a broadcast frame\n");
+ return;
+ }
+
+ /* Broadcast frame */
+ rfd.status |= RX_IA_MATCH;
+ logout("Receive a broadcast frame(size=%d)\n", size);
+ }
+ else if ( s->is_multcast_enable && buf[0] & 0x1 )
+ {
+ int mcast_idx = compute_mcast_idx(buf);
+ if ( !(s->mult_list[mcast_idx >> 3] & (1 << (mcast_idx & 7))) )
+ {
+ logout("Multicast address mismatch, discard\n");
+ return;
+ }
+ logout("Receive a multicast frame(size=%d)\n", size);
+ }
+ else if ( size < 64 && (s->config.dis_short_rx) )
+ {
+ /* From Intel's spec, short frame should be discarded
+ * when configuration byte 7/0 (discard short receive) set.
+ * But this will cause frame lossing such as ICMP frame, ARP frame.
+ * So we check is the frame for me before discarding short frame
+ */
+
+ /* Save Bad Frame bit */
+ if ( s->config.save_bad_frame )
+ {
+ rfd.status |= RX_SHORT;
+ s->statistics.rx_short_frame_errors ++;
+ }
+ logout("Receive a short frame(size=%d), discard it\n", size);
+ return;
+ }
+ else if ( s->config.promiscuous )
+ {
+ /* Promiscuous: receive all. No address match */
+ logout("Received frame in promiscuous mode(size=%d)\n", size);
+ rfd.status |= RX_NO_MATCH;
+ }
+ else
+ {
+ e100_dump("Unknown frame, MAC = ", (uint8_t *)buf, 6);
+ return;
+ }
+ e100_dump("Get frame, MAC = ", (uint8_t *)buf, 6);
+
+ rfd.c = 1;
+ rfd.ok = 1;
+ rfd.f = 1;
+ rfd.eof = 1;
+ rfd.status &= ~RX_COLLISION;
+ rfd.count = size;
+
+ logout("Get a RFD configure:\n"
+ "\tstatus:%#x\n"
+ "\tok:%#x\n" "\tc:%#x\n" "\tsf:%#x\n"
+ "\th:%#x\n" "\ts:%#x\n" "\tel:%#x\n"
+ "\tlink add:%#x\n" "\tactual count:%#x\n"
+ "\tf:%#x\n" "\teof:%#x\n" "\tsize:%#x\n",
+ rfd.status, rfd.ok, rfd.c, rfd.sf, rfd.h,
+ rfd.s, rfd.el, rfd.link_addr, rfd.count,
+ rfd.f, rfd.eof, rfd.size);
+
+ cpu_physical_memory_write(rfd_addr, (uint8_t *)&rfd, sizeof(rfd));
+ cpu_physical_memory_write(rfd_addr + sizeof(rfd_t), buf, size);
+ s->statistics.rx_good_frames ++;
+ s->ru_offset = le32_to_cpu(rfd.link_addr);
+
+ e100_interrupt(s, INT_FR);
+
+ if ( rfd.el || rfd.s )
+ {
+ /* Go to suspend */
+ SET_RU_STATE(RU_SUSPENDED);
+ e100_interrupt(s, INT_RNR);
+ logout("RFD met S or EL bit set, RU go to suspend\n");
+ return;
+ }
+
+ logout("Complete a frame receive(size = %d)\n", size);
+ return;
+}
+
+static void eeprom_init(E100State *s)
+{
+ int i;
+ int chksum = 0;
+ /* Add 64 * 2 EEPROM. i82557 and i82558 support a 64 word EEPROM,
+ * i82559 and later support 64 or 256 word EEPROM. */
+ eeprom_reset(s, EEPROM_RESET_ALL);
+ s->eeprom.addr_len = EEPROM_I82557_ADDRBIT;
+ memcpy(s->eeprom.contents, eeprom_i82557, sizeof(eeprom_i82557));
+ /* Dirver is going to get MAC from eeprom*/
+ memcpy((uint8_t *)s->eeprom.contents, s->macaddr, sizeof(s->macaddr));
+
+ /* The last word in eeprom saving checksum value.
+ * After we update MAC in eeprom, the checksum need be re-calculate
+ * and saved at the end of eeprom
+ */
+ for ( i=0; i<(1<<s->eeprom.addr_len)-1; i++ )
+ chksum += s->eeprom.contents[i];
+ s->eeprom.contents[i] = 0xBABA - chksum;
+
+}
+
+static void e100_init(PCIBus * bus, NICInfo * nd,
+ const char *name, uint32_t device)
+{
+ PCIE100State *d;
+ E100State *s;
+
+ logout("\n");
+
+ d = (PCIE100State *) pci_register_device(bus, name,
+ sizeof(PCIE100State), -1,
+ NULL, NULL);
+
+ s = &d->e100;
+ s->device = device;
+ s->pci_dev = &d->dev;
+
+ pci_reset(s);
+
+
+ /* Handler for memory-mapped I/O */
+ d->e100.mmio_index =
+ cpu_register_io_memory(0, pci_mmio_read, pci_mmio_write, s);
+
+ //CSR Memory mapped base
+ pci_register_io_region(&d->dev, 0, PCI_MEM_SIZE,
+ PCI_ADDRESS_SPACE_MEM | PCI_ADDRESS_SPACE_MEM_PREFETCH,
+ pci_mmio_map);
+ //CSR I/O mapped base
+ pci_register_io_region(&d->dev, 1, PCI_IO_SIZE, PCI_ADDRESS_SPACE_IO,
+ pci_ioport_map);
+ //Flash memory mapped base
+ pci_register_io_region(&d->dev, 2, PCI_FLASH_SIZE, PCI_ADDRESS_SPACE_MEM,
+ pci_mmio_map);
+
+ memcpy(s->macaddr, nd->macaddr, 6);
+ e100_dump("MAC ADDR", (uint8_t *)&s->macaddr[0], 6);
+
+ eeprom_init(s);
+
+ e100_reset(s);
+
+ s->vc = qemu_new_vlan_client(nd->vlan, e100_receive, e100_can_receive, s);
+
+ snprintf(s->vc->info_str, sizeof(s->vc->info_str),
+ "e100 pci macaddr=%02x:%02x:%02x:%02x:%02x:%02x",
+ s->macaddr[0],
+ s->macaddr[1],
+ s->macaddr[2], s->macaddr[3], s->macaddr[4], s->macaddr[5]);
+
+ qemu_register_reset(e100_reset, s);
+
+ register_savevm(name, 0, 3, e100_save, e100_load, s);
+}
+
+void pci_e100_init(PCIBus * bus, NICInfo * nd)
+{
+ e100_init(bus, nd, "e100", i82557C);
+}
+
diff -r 9a9ddc04eea2 -r 53dc1cf50506 tools/ioemu/hw/pci.c
--- a/tools/ioemu/hw/pci.c Tue Nov 20 11:53:44 2007 -0700
+++ b/tools/ioemu/hw/pci.c Wed Nov 21 09:12:06 2007 -0700
@@ -565,6 +565,8 @@ void pci_nic_init(PCIBus *bus, NICInfo *
pci_rtl8139_init(bus, nd, devfn);
} else if (strcmp(nd->model, "pcnet") == 0) {
pci_pcnet_init(bus, nd, devfn);
+ } else if (strcmp(nd->model, "e100") == 0) {
+ pci_e100_init(bus, nd);
} else {
fprintf(stderr, "qemu: Unsupported NIC: %s\n", nd->model);
exit (1);
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/arch/x86/hvm/hpet.c
--- a/xen/arch/x86/hvm/hpet.c Tue Nov 20 11:53:44 2007 -0700
+++ b/xen/arch/x86/hvm/hpet.c Wed Nov 21 09:12:06 2007 -0700
@@ -127,9 +127,13 @@ static inline int hpet_check_access_leng
{
if ( (addr & (len - 1)) || (len > 8) )
{
- gdprintk(XENLOG_ERR, "HPET: access across register boundary: "
+ /*
+ * According to ICH9 specification, unaligned accesses may result
+ * in unexpected behaviour or master abort, but should not crash/hang.
+ * Hence we read all-ones, drop writes, and log a warning.
+ */
+ gdprintk(XENLOG_WARNING, "HPET: access across register boundary: "
"%lx %lx\n", addr, len);
- domain_crash(current->domain);
return -EINVAL;
}
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c Tue Nov 20 11:53:44 2007 -0700
+++ b/xen/arch/x86/irq.c Wed Nov 21 09:12:06 2007 -0700
@@ -15,7 +15,6 @@
#include <xen/keyhandler.h>
#include <xen/compat.h>
#include <asm/current.h>
-#include <asm/smpboot.h>
#include <asm/iommu.h>
/* opt_noirqbalance: If true, software IRQ balancing/affinity is disabled. */
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Tue Nov 20 11:53:44 2007 -0700
+++ b/xen/arch/x86/mm.c Wed Nov 21 09:12:06 2007 -0700
@@ -3007,7 +3007,8 @@ long set_gdt(struct vcpu *v,
return -EINVAL;
/* Check the pages in the new GDT. */
- for ( i = 0; i < nr_pages; i++ ) {
+ for ( i = 0; i < nr_pages; i++ )
+ {
mfn = frames[i] = gmfn_to_mfn(d, frames[i]);
if ( !mfn_valid(mfn) ||
!get_page_and_type(mfn_to_page(mfn), d, PGT_gdt_page) )
@@ -3073,23 +3074,15 @@ long do_update_descriptor(u64 pa, u64 de
*(u64 *)&d = desc;
- LOCK_BIGLOCK(dom);
-
mfn = gmfn_to_mfn(dom, gmfn);
if ( (((unsigned int)pa % sizeof(struct desc_struct)) != 0) ||
!mfn_valid(mfn) ||
!check_descriptor(dom, &d) )
- {
- UNLOCK_BIGLOCK(dom);
return -EINVAL;
- }
page = mfn_to_page(mfn);
if ( unlikely(!get_page(page, dom)) )
- {
- UNLOCK_BIGLOCK(dom);
return -EINVAL;
- }
/* Check if the given frame is in use in an unsafe context. */
switch ( page->u.inuse.type_info & PGT_type_mask )
@@ -3112,7 +3105,7 @@ long do_update_descriptor(u64 pa, u64 de
/* All is good so make the update. */
gdt_pent = map_domain_page(mfn);
- memcpy(&gdt_pent[offset], &d, 8);
+ atomic_write64((uint64_t *)&gdt_pent[offset], *(uint64_t *)&d);
unmap_domain_page(gdt_pent);
put_page_type(page);
@@ -3121,8 +3114,6 @@ long do_update_descriptor(u64 pa, u64 de
out:
put_page(page);
-
- UNLOCK_BIGLOCK(dom);
return ret;
}
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c Tue Nov 20 11:53:44 2007 -0700
+++ b/xen/arch/x86/physdev.c Wed Nov 21 09:12:06 2007 -0700
@@ -8,7 +8,6 @@
#include <xen/event.h>
#include <xen/guest_access.h>
#include <asm/current.h>
-#include <asm/smpboot.h>
#include <asm/hypercall.h>
#include <public/xen.h>
#include <public/physdev.h>
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/arch/x86/smp.c
--- a/xen/arch/x86/smp.c Tue Nov 20 11:53:44 2007 -0700
+++ b/xen/arch/x86/smp.c Wed Nov 21 09:12:06 2007 -0700
@@ -18,7 +18,6 @@
#include <asm/smp.h>
#include <asm/mc146818rtc.h>
#include <asm/flushtlb.h>
-#include <asm/smpboot.h>
#include <asm/hardirq.h>
#include <asm/ipi.h>
#include <asm/hvm/support.h>
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Tue Nov 20 11:53:44 2007 -0700
+++ b/xen/arch/x86/traps.c Wed Nov 21 09:12:06 2007 -0700
@@ -2583,7 +2583,10 @@ void set_system_gate(unsigned int n, voi
void set_task_gate(unsigned int n, unsigned int sel)
{
+ idt_table[n].b = 0;
+ wmb(); /* disable gate /then/ rewrite */
idt_table[n].a = sel << 16;
+ wmb(); /* rewrite /then/ enable gate */
idt_table[n].b = 0x8500;
}
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/arch/x86/x86_32/seg_fixup.c
--- a/xen/arch/x86/x86_32/seg_fixup.c Tue Nov 20 11:53:44 2007 -0700
+++ b/xen/arch/x86/x86_32/seg_fixup.c Wed Nov 21 09:12:06 2007 -0700
@@ -42,7 +42,7 @@
#define O OPCODE_BYTE
#define M HAS_MODRM
-static unsigned char insn_decode[256] = {
+static const unsigned char insn_decode[256] = {
/* 0x00 - 0x0F */
O|M, O|M, O|M, O|M, X, X, X, X,
O|M, O|M, O|M, O|M, X, X, X, X,
@@ -69,7 +69,7 @@ static unsigned char insn_decode[256] =
X, X, X, X, X, X, X, X,
/* 0x80 - 0x8F */
O|M|1, O|M|4, O|M|1, O|M|1, O|M, O|M, O|M, O|M,
- O|M, O|M, O|M, O|M, O|M, O|M, O|M, X,
+ O|M, O|M, O|M, O|M, O|M, X|M, O|M, O|M,
/* 0x90 - 0x9F */
X, X, X, X, X, X, X, X,
X, X, X, X, X, X, X, X,
@@ -89,17 +89,17 @@ static unsigned char insn_decode[256] =
X, X, X, X, X, X, X, X,
X, X, X, X, X, X, X, X,
/* 0xF0 - 0xFF */
- X, X, X, X, X, X, X, X,
+ X, X, X, X, X, X, O|M, O|M,
X, X, X, X, X, X, O|M, O|M
};
-static unsigned char twobyte_decode[256] = {
+static const unsigned char twobyte_decode[256] = {
/* 0x00 - 0x0F */
X, X, X, X, X, X, X, X,
X, X, X, X, X, X, X, X,
/* 0x10 - 0x1F */
X, X, X, X, X, X, X, X,
- X, X, X, X, X, X, X, X,
+ O|M, X, X, X, X, X, X, X,
/* 0x20 - 0x2F */
X, X, X, X, X, X, X, X,
X, X, X, X, X, X, X, X,
@@ -122,16 +122,16 @@ static unsigned char twobyte_decode[256]
X, X, X, X, X, X, X, X,
X, X, X, X, X, X, X, X,
/* 0x90 - 0x9F */
- X, X, X, X, X, X, X, X,
- X, X, X, X, X, X, X, X,
+ O|M, O|M, O|M, O|M, O|M, O|M, O|M, O|M,
+ O|M, O|M, O|M, O|M, O|M, O|M, O|M, O|M,
/* 0xA0 - 0xAF */
- X, X, X, X, X, X, X, X,
- X, X, X, X, X, X, X, X,
+ X, X, X, O|M, O|M|1, O|M, O|M, X,
+ X, X, X, O|M, O|M|1, O|M, X, O|M,
/* 0xB0 - 0xBF */
- X, X, X, X, X, X, X, X,
- X, X, X, X, X, X, X, X,
+ X, X, X, O|M, X, X, O|M, O|M,
+ X, X, O|M|1, O|M, O|M, O|M, O|M, O|M,
/* 0xC0 - 0xCF */
- X, X, X, X, X, X, X, X,
+ O|M, O|M, X, O|M, X, X, X, O|M,
X, X, X, X, X, X, X, X,
/* 0xD0 - 0xDF */
X, X, X, X, X, X, X, X,
@@ -153,24 +153,24 @@ static unsigned char twobyte_decode[256]
* @base (OUT): Decoded linear base address.
* @limit (OUT): Decoded segment limit, in bytes. 0 == unlimited (4GB).
*/
-int get_baselimit(u16 seg, unsigned long *base, unsigned long *limit)
+static int get_baselimit(u16 seg, unsigned long *base, unsigned long *limit)
{
- struct vcpu *d = current;
- unsigned long *table, a, b;
- int ldt = !!(seg & 4);
- int idx = (seg >> 3) & 8191;
+ struct vcpu *curr = current;
+ uint32_t *table, a, b;
+ int ldt = !!(seg & 4);
+ int idx = (seg >> 3) & 8191;
/* Get base and check limit. */
if ( ldt )
{
- table = (unsigned long *)LDT_VIRT_START(d);
- if ( idx >= d->arch.guest_context.ldt_ents )
+ table = (uint32_t *)LDT_VIRT_START(curr);
+ if ( idx >= curr->arch.guest_context.ldt_ents )
goto fail;
}
else /* gdt */
{
- table = (unsigned long *)GDT_VIRT_START(d);
- if ( idx >= d->arch.guest_context.gdt_ents )
+ table = (uint32_t *)GDT_VIRT_START(curr);
+ if ( idx >= curr->arch.guest_context.gdt_ents )
goto fail;
}
@@ -204,7 +204,7 @@ int get_baselimit(u16 seg, unsigned long
}
/* Turn a segment+offset into a linear address. */
-int linearise_address(u16 seg, unsigned long off, unsigned long *linear)
+static int linearise_address(u16 seg, unsigned long off, unsigned long *linear)
{
unsigned long base, limit;
@@ -219,31 +219,31 @@ int linearise_address(u16 seg, unsigned
return 1;
}
-int fixup_seg(u16 seg, unsigned long offset)
+static int fixup_seg(u16 seg, unsigned long offset)
{
- struct vcpu *d = current;
- unsigned long *table, a, b, base, limit;
- int ldt = !!(seg & 4);
- int idx = (seg >> 3) & 8191;
+ struct vcpu *curr = current;
+ uint32_t *table, a, b, base, limit;
+ int ldt = !!(seg & 4);
+ int idx = (seg >> 3) & 8191;
/* Get base and check limit. */
if ( ldt )
{
- table = (unsigned long *)LDT_VIRT_START(d);
- if ( idx >= d->arch.guest_context.ldt_ents )
+ table = (uint32_t *)LDT_VIRT_START(curr);
+ if ( idx >= curr->arch.guest_context.ldt_ents )
{
dprintk(XENLOG_DEBUG, "Segment %04x out of LDT range (%ld)\n",
- seg, d->arch.guest_context.ldt_ents);
+ seg, curr->arch.guest_context.ldt_ents);
goto fail;
}
}
else /* gdt */
{
- table = (unsigned long *)GDT_VIRT_START(d);
- if ( idx >= d->arch.guest_context.gdt_ents )
+ table = (uint32_t *)GDT_VIRT_START(curr);
+ if ( idx >= curr->arch.guest_context.gdt_ents )
{
dprintk(XENLOG_DEBUG, "Segment %04x out of GDT range (%ld)\n",
- seg, d->arch.guest_context.gdt_ents);
+ seg, curr->arch.guest_context.gdt_ents);
goto fail;
}
}
@@ -261,7 +261,7 @@ int fixup_seg(u16 seg, unsigned long off
_SEGMENT_G|_SEGMENT_CODE|_SEGMENT_DPL)) !=
(_SEGMENT_P|_SEGMENT_S|_SEGMENT_DB|_SEGMENT_G|_SEGMENT_DPL) )
{
- dprintk(XENLOG_DEBUG, "Bad segment %08lx:%08lx\n", a, b);
+ dprintk(XENLOG_DEBUG, "Bad segment %08x:%08x\n", a, b);
goto fail;
}
@@ -291,8 +291,7 @@ int fixup_seg(u16 seg, unsigned long off
}
}
- dprintk(XENLOG_DEBUG, "None of the above! "
- "(%08lx:%08lx, %08lx, %08lx, %08lx)\n",
+ dprintk(XENLOG_DEBUG, "None of the above! (%08x:%08x, %08x, %08x, %08x)\n",
a, b, base, limit, base+limit);
fail:
@@ -303,9 +302,8 @@ int fixup_seg(u16 seg, unsigned long off
a &= ~0x0ffff; a |= limit & 0x0ffff;
b &= ~0xf0000; b |= limit & 0xf0000;
b ^= _SEGMENT_EC; /* grows-up <-> grows-down */
- /* NB. These can't fault. Checked readable above; must also be writable. */
- table[2*idx+0] = a;
- table[2*idx+1] = b;
+ /* NB. This can't fault. Checked readable above; must also be writable. */
+ atomic_write64((uint64_t *)&table[2*idx], ((uint64_t)b<<32) | a);
return 1;
}
@@ -315,18 +313,15 @@ int fixup_seg(u16 seg, unsigned long off
*/
int gpf_emulate_4gb(struct cpu_user_regs *regs)
{
- struct vcpu *d = current;
- struct trap_info *ti;
- struct trap_bounce *tb;
- u8 modrm, mod, reg, rm, decode;
- void *memreg;
- unsigned long offset;
- u8 disp8;
- u32 disp32 = 0;
+ struct vcpu *curr = current;
+ u8 modrm, mod, rm, decode;
+ const u32 *base, *index = NULL;
+ unsigned long offset;
+ s8 disp8;
+ s32 disp32 = 0;
u8 *eip; /* ptr to instruction start */
u8 *pb, b; /* ptr into instr. / current instr. byte */
- int gs_override = 0;
- int twobyte = 0;
+ int gs_override = 0, scale = 0, twobyte = 0;
/* WARNING: We only work for ring-3 segments. */
if ( unlikely(vm86_mode(regs)) || unlikely(!ring_3(regs)) )
@@ -356,6 +351,9 @@ int gpf_emulate_4gb(struct cpu_user_regs
"legal instruction\n");
goto fail;
}
+
+ if ( twobyte )
+ break;
switch ( b )
{
@@ -375,6 +373,9 @@ int gpf_emulate_4gb(struct cpu_user_regs
case 0x65: /* GS override */
gs_override = 1;
break;
+ case 0x0f: /* Not really a prefix byte */
+ twobyte = 1;
+ break;
default: /* Not a prefix byte */
goto done_prefix;
}
@@ -387,32 +388,10 @@ int gpf_emulate_4gb(struct cpu_user_regs
goto fail;
}
- decode = insn_decode[b]; /* opcode byte */
+ decode = (!twobyte ? insn_decode : twobyte_decode)[b];
pb++;
- if ( decode == 0 && b == 0x0f )
- {
- twobyte = 1;
-
- if ( get_user(b, pb) )
- {
- dprintk(XENLOG_DEBUG,
- "Fault while accessing byte %ld of instruction\n",
- (long)(pb-eip));
- goto page_fault;
- }
-
- if ( (pb - eip) >= 15 )
- {
- dprintk(XENLOG_DEBUG, "Too many opcode bytes for a "
- "legal instruction\n");
- goto fail;
- }
-
- decode = twobyte_decode[b];
- pb++;
- }
-
- if ( decode == 0 )
+
+ if ( !(decode & OPCODE_BYTE) )
{
dprintk(XENLOG_DEBUG, "Unsupported %sopcode %02x\n",
twobyte ? "two byte " : "", b);
@@ -422,12 +401,12 @@ int gpf_emulate_4gb(struct cpu_user_regs
if ( !(decode & HAS_MODRM) )
{
/* Must be a <disp32>, or bail. */
- if ( (decode & 7) != 4 )
+ if ( (decode & INSN_SUFFIX_BYTES) != 4 )
goto fail;
if ( get_user(offset, (u32 *)pb) )
{
- dprintk(XENLOG_DEBUG, "Fault while extracting <disp32>.\n");
+ dprintk(XENLOG_DEBUG, "Fault while extracting <moffs32>.\n");
goto page_fault;
}
pb += 4;
@@ -448,29 +427,39 @@ int gpf_emulate_4gb(struct cpu_user_regs
pb++;
mod = (modrm >> 6) & 3;
- reg = (modrm >> 3) & 7;
rm = (modrm >> 0) & 7;
if ( rm == 4 )
{
- dprintk(XENLOG_DEBUG, "FIXME: Add decoding for the SIB byte.\n");
- goto fixme;
+ u8 sib;
+
+ if ( get_user(sib, pb) )
+ {
+ dprintk(XENLOG_DEBUG, "Fault while extracting sib byte\n");
+ goto page_fault;
+ }
+
+ pb++;
+
+ rm = sib & 7;
+ if ( (sib & 0x38) != 0x20 )
+ index = decode_register((sib >> 3) & 7, regs, 0);
+ scale = sib >> 6;
}
/* Decode R/M field. */
- memreg = decode_register(rm, regs, 0);
+ base = decode_register(rm, regs, 0);
/* Decode Mod field. */
- switch ( modrm >> 6 )
+ switch ( mod )
{
case 0:
- disp32 = 0;
if ( rm == 5 ) /* disp32 rather than (EBP) */
{
- memreg = NULL;
+ base = NULL;
if ( get_user(disp32, (u32 *)pb) )
{
- dprintk(XENLOG_DEBUG, "Fault while extracting <disp8>.\n");
+ dprintk(XENLOG_DEBUG, "Fault while extracting <base32>.\n");
goto page_fault;
}
pb += 4;
@@ -484,13 +473,13 @@ int gpf_emulate_4gb(struct cpu_user_regs
goto page_fault;
}
pb++;
- disp32 = (disp8 & 0x80) ? (disp8 | ~0xff) : disp8;;
+ disp32 = disp8;
break;
case 2:
if ( get_user(disp32, (u32 *)pb) )
{
- dprintk(XENLOG_DEBUG, "Fault while extracting <disp8>.\n");
+ dprintk(XENLOG_DEBUG, "Fault while extracting <disp32>.\n");
goto page_fault;
}
pb += 4;
@@ -502,8 +491,10 @@ int gpf_emulate_4gb(struct cpu_user_regs
}
offset = disp32;
- if ( memreg != NULL )
- offset += *(u32 *)memreg;
+ if ( base != NULL )
+ offset += *base;
+ if ( index != NULL )
+ offset += *index << scale;
skip_modrm:
if ( !fixup_seg((u16)regs->gs, offset) )
@@ -513,10 +504,11 @@ int gpf_emulate_4gb(struct cpu_user_regs
perfc_incr(seg_fixups);
/* If requested, give a callback on otherwise unused vector 15. */
- if ( VM_ASSIST(d->domain, VMASST_TYPE_4gb_segments_notify) )
- {
- ti = &d->arch.guest_context.trap_ctxt[15];
- tb = &d->arch.trap_bounce;
+ if ( VM_ASSIST(curr->domain, VMASST_TYPE_4gb_segments_notify) )
+ {
+ struct trap_info *ti = &curr->arch.guest_context.trap_ctxt[15];
+ struct trap_bounce *tb = &curr->arch.trap_bounce;
+
tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
tb->error_code = pb - eip;
tb->cs = ti->cs;
@@ -527,13 +519,6 @@ int gpf_emulate_4gb(struct cpu_user_regs
return EXCRET_fault_fixed;
- fixme:
- dprintk(XENLOG_DEBUG, "Undecodable instruction "
- "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x "
- "caused GPF(0) at %04x:%08x\n",
- eip[0], eip[1], eip[2], eip[3],
- eip[4], eip[5], eip[6], eip[7],
- regs->cs, regs->eip);
fail:
return 0;
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/include/asm-powerpc/smpboot.h
--- a/xen/include/asm-powerpc/smpboot.h Tue Nov 20 11:53:44 2007 -0700
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
- * Copyright (C) IBM Corp. 2005
- *
- * Authors: Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
- */
-
-#include "../asm-x86/smpboot.h"
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/include/asm-x86/desc.h
--- a/xen/include/asm-x86/desc.h Tue Nov 20 11:53:44 2007 -0700
+++ b/xen/include/asm-x86/desc.h Wed Nov 21 09:12:06 2007 -0700
@@ -143,6 +143,11 @@ typedef struct {
#define _set_gate(gate_addr,type,dpl,addr) \
do { \
+ (gate_addr)->a = 0; \
+ wmb(); /* disable gate /then/ rewrite */ \
+ (gate_addr)->b = \
+ ((unsigned long)(addr) >> 32); \
+ wmb(); /* rewrite /then/ enable gate */ \
(gate_addr)->a = \
(((unsigned long)(addr) & 0xFFFF0000UL) << 32) | \
((unsigned long)(dpl) << 45) | \
@@ -150,49 +155,53 @@ do {
((unsigned long)(addr) & 0xFFFFUL) | \
((unsigned long)__HYPERVISOR_CS64 << 16) | \
(1UL << 47); \
- (gate_addr)->b = \
- ((unsigned long)(addr) >> 32); \
} while (0)
#define _set_tssldt_desc(desc,addr,limit,type) \
do { \
+ (desc)[0].b = (desc)[1].b = 0; \
+ wmb(); /* disable entry /then/ rewrite */ \
(desc)[0].a = \
((u32)(addr) << 16) | ((u32)(limit) & 0xFFFF); \
+ (desc)[1].a = (u32)(((unsigned long)(addr)) >> 32); \
+ wmb(); /* rewrite /then/ enable entry */ \
(desc)[0].b = \
((u32)(addr) & 0xFF000000U) | \
((u32)(type) << 8) | 0x8000U | \
(((u32)(addr) & 0x00FF0000U) >> 16); \
- (desc)[1].a = (u32)(((unsigned long)(addr)) >> 32); \
- (desc)[1].b = 0; \
} while (0)
#elif defined(__i386__)
typedef struct desc_struct idt_entry_t;
-#define _set_gate(gate_addr,type,dpl,addr) \
-do { \
- int __d0, __d1; \
- __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
- "movw %4,%%dx\n\t" \
- "movl %%eax,%0\n\t" \
- "movl %%edx,%1" \
- :"=m" (*((long *) (gate_addr))), \
- "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
- :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
- "3" ((char *) (addr)),"2" (__HYPERVISOR_CS << 16)); \
-} while (0)
-
-#define _set_tssldt_desc(n,addr,limit,type) \
-__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
- "movw %%ax,2(%2)\n\t" \
- "rorl $16,%%eax\n\t" \
- "movb %%al,4(%2)\n\t" \
- "movb %4,5(%2)\n\t" \
- "movb $0,6(%2)\n\t" \
- "movb %%ah,7(%2)\n\t" \
- "rorl $16,%%eax" \
- : "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type|0x80))
+#define _set_gate(gate_addr,type,dpl,addr) \
+do { \
+ (gate_addr)->b = 0; \
+ wmb(); /* disable gate /then/ rewrite */ \
+ (gate_addr)->a = \
+ ((unsigned long)(addr) & 0xFFFFUL) | \
+ ((unsigned long)__HYPERVISOR_CS << 16); \
+ wmb(); /* rewrite /then/ enable gate */ \
+ (gate_addr)->b = \
+ ((unsigned long)(addr) & 0xFFFF0000UL) | \
+ ((unsigned long)(dpl) << 13) | \
+ ((unsigned long)(type) << 8) | \
+ (1UL << 15); \
+} while (0)
+
+#define _set_tssldt_desc(desc,addr,limit,type) \
+do { \
+ (desc)->b = 0; \
+ wmb(); /* disable entry /then/ rewrite */ \
+ (desc)->a = \
+ ((u32)(addr) << 16) | ((u32)(limit) & 0xFFFF); \
+ wmb(); /* rewrite /then/ enable entry */ \
+ (desc)->b = \
+ ((u32)(addr) & 0xFF000000U) | \
+ ((u32)(type) << 8) | 0x8000U | \
+ (((u32)(addr) & 0x00FF0000U) >> 16); \
+} while (0)
#endif
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/include/asm-x86/smpboot.h
--- a/xen/include/asm-x86/smpboot.h Tue Nov 20 11:53:44 2007 -0700
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,16 +0,0 @@
-#ifndef __ASM_SMPBOOT_H
-#define __ASM_SMPBOOT_H
-
-static inline unsigned long apicid_to_phys_cpu_present(int apicid)
-{
- return 1UL << apicid;
-}
-
-extern volatile int logical_apicid_2_cpu[];
-extern volatile int cpu_2_logical_apicid[];
-extern volatile int physical_apicid_2_cpu[];
-extern volatile int cpu_2_physical_apicid[];
-
-#define boot_apicid_to_cpu(apicid) physical_apicid_2_cpu[apicid]
-
-#endif
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/include/asm-x86/system.h
--- a/xen/include/asm-x86/system.h Tue Nov 20 11:53:44 2007 -0700
+++ b/xen/include/asm-x86/system.h Wed Nov 21 09:12:06 2007 -0700
@@ -5,69 +5,78 @@
#include <xen/types.h>
#include <asm/bitops.h>
-#define read_segment_register(name) \
-({ u16 __sel; \
- __asm__ __volatile__ ( "movw %%" STR(name) ",%0" : "=r" (__sel) ); \
- __sel; \
+#define read_segment_register(name) \
+({ u16 __sel; \
+ asm volatile ( "movw %%" STR(name) ",%0" : "=r" (__sel) ); \
+ __sel; \
})
#define wbinvd() \
- __asm__ __volatile__ ("wbinvd": : :"memory");
+ asm volatile ( "wbinvd" : : : "memory" )
#define clflush(a) \
- __asm__ __volatile__ ("clflush (%0)": :"r"(a));
+ asm volatile ( "clflush (%0)" : : "r"(a) )
-#define nop() __asm__ __volatile__ ("nop")
+#define nop() \
+ asm volatile ( "nop" )
-#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned
long)(v),(ptr),sizeof(*(ptr))))
+#define xchg(ptr,v) \
+ ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
struct __xchg_dummy { unsigned long a[100]; };
#define __xg(x) ((volatile struct __xchg_dummy *)(x))
+#if defined(__i386__)
+# include <asm/x86_32/system.h>
+#elif defined(__x86_64__)
+# include <asm/x86_64/system.h>
+#endif
/*
* Note: no "lock" prefix even on SMP: xchg always implies lock anyway
* Note 2: xchg has side effect, so that attribute volatile is necessary,
* but generally the primitive is invalid, *ptr is output argument. --ANK
*/
-static always_inline unsigned long __xchg(unsigned long x, volatile void *
ptr, int size)
+static always_inline unsigned long __xchg(
+ unsigned long x, volatile void *ptr, int size)
{
- switch (size) {
- case 1:
- __asm__ __volatile__("xchgb %b0,%1"
- :"=q" (x)
- :"m" (*__xg((volatile void *)ptr)), "0" (x)
- :"memory");
- break;
- case 2:
- __asm__ __volatile__("xchgw %w0,%1"
- :"=r" (x)
- :"m" (*__xg((volatile void *)ptr)), "0" (x)
- :"memory");
- break;
+ switch ( size )
+ {
+ case 1:
+ asm volatile ( "xchgb %b0,%1"
+ : "=q" (x)
+ : "m" (*__xg((volatile void *)ptr)), "0" (x)
+ : "memory" );
+ break;
+ case 2:
+ asm volatile ( "xchgw %w0,%1"
+ : "=r" (x)
+ : "m" (*__xg((volatile void *)ptr)), "0" (x)
+ : "memory" );
+ break;
#if defined(__i386__)
- case 4:
- __asm__ __volatile__("xchgl %0,%1"
- :"=r" (x)
- :"m" (*__xg((volatile void *)ptr)), "0" (x)
- :"memory");
- break;
+ case 4:
+ asm volatile ( "xchgl %0,%1"
+ : "=r" (x)
+ : "m" (*__xg((volatile void *)ptr)), "0" (x)
+ : "memory" );
+ break;
#elif defined(__x86_64__)
- case 4:
- __asm__ __volatile__("xchgl %k0,%1"
- :"=r" (x)
- :"m" (*__xg((volatile void *)ptr)), "0" (x)
- :"memory");
- break;
- case 8:
- __asm__ __volatile__("xchgq %0,%1"
- :"=r" (x)
- :"m" (*__xg((volatile void *)ptr)), "0" (x)
- :"memory");
- break;
+ case 4:
+ asm volatile ( "xchgl %k0,%1"
+ : "=r" (x)
+ : "m" (*__xg((volatile void *)ptr)), "0" (x)
+ : "memory" );
+ break;
+ case 8:
+ asm volatile ( "xchgq %0,%1"
+ : "=r" (x)
+ : "m" (*__xg((volatile void *)ptr)), "0" (x)
+ : "memory" );
+ break;
#endif
- }
- return x;
+ }
+ return x;
}
/*
@@ -79,230 +88,88 @@ static always_inline unsigned long __cmp
static always_inline unsigned long __cmpxchg(
volatile void *ptr, unsigned long old, unsigned long new, int size)
{
- unsigned long prev;
- switch (size) {
- case 1:
- __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
- : "=a"(prev)
- : "q"(new), "m"(*__xg((volatile void
*)ptr)), "0"(old)
- : "memory");
- return prev;
- case 2:
- __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
- : "=a"(prev)
- : "r"(new), "m"(*__xg((volatile void
*)ptr)), "0"(old)
- : "memory");
- return prev;
+ unsigned long prev;
+ switch ( size )
+ {
+ case 1:
+ asm volatile ( LOCK_PREFIX "cmpxchgb %b1,%2"
+ : "=a" (prev)
+ : "q" (new), "m" (*__xg((volatile void *)ptr)),
+ "0" (old)
+ : "memory" );
+ return prev;
+ case 2:
+ asm volatile ( LOCK_PREFIX "cmpxchgw %w1,%2"
+ : "=a" (prev)
+ : "r" (new), "m" (*__xg((volatile void *)ptr)),
+ "0" (old)
+ : "memory" );
+ return prev;
#if defined(__i386__)
- case 4:
- __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2"
- : "=a"(prev)
- : "r"(new), "m"(*__xg((volatile void
*)ptr)), "0"(old)
- : "memory");
- return prev;
+ case 4:
+ asm volatile ( LOCK_PREFIX "cmpxchgl %1,%2"
+ : "=a" (prev)
+ : "r" (new), "m" (*__xg((volatile void *)ptr)),
+ "0" (old)
+ : "memory" );
+ return prev;
#elif defined(__x86_64__)
- case 4:
- __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %k1,%2"
- : "=a"(prev)
- : "r"(new), "m"(*__xg((volatile void
*)ptr)), "0"(old)
- : "memory");
- return prev;
- case 8:
- __asm__ __volatile__(LOCK_PREFIX "cmpxchgq %1,%2"
- : "=a"(prev)
- : "r"(new), "m"(*__xg((volatile void
*)ptr)), "0"(old)
- : "memory");
- return prev;
+ case 4:
+ asm volatile ( LOCK_PREFIX "cmpxchgl %k1,%2"
+ : "=a" (prev)
+ : "r" (new), "m" (*__xg((volatile void *)ptr)),
+ "0" (old)
+ : "memory" );
+ return prev;
+ case 8:
+ asm volatile ( LOCK_PREFIX "cmpxchgq %1,%2"
+ : "=a" (prev)
+ : "r" (new), "m" (*__xg((volatile void *)ptr)),
+ "0" (old)
+ : "memory" );
+ return prev;
#endif
- }
- return old;
+ }
+ return old;
}
#define __HAVE_ARCH_CMPXCHG
-#if BITS_PER_LONG == 64
-
-#define cmpxchg(ptr,o,n) \
- ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o), \
- (unsigned long)(n),sizeof(*(ptr))))
-#else
-
-static always_inline unsigned long long __cmpxchg8b(
- volatile void *ptr, unsigned long long old, unsigned long long new)
-{
- unsigned long long prev;
- __asm__ __volatile__ (
- LOCK_PREFIX "cmpxchg8b %3"
- : "=A" (prev)
- : "c" ((u32)(new>>32)), "b" ((u32)new),
- "m" (*__xg((volatile void *)ptr)), "0" (old)
- : "memory" );
- return prev;
-}
-
-#define cmpxchg(ptr,o,n) \
-({ \
- __typeof__(*(ptr)) __prev; \
- switch ( sizeof(*(ptr)) ) { \
- case 8: \
- __prev = ((__typeof__(*(ptr)))__cmpxchg8b( \
- (ptr), \
- (unsigned long long)(o), \
- (unsigned long long)(n))); \
- break; \
- default: \
- __prev = ((__typeof__(*(ptr)))__cmpxchg( \
- (ptr), \
- (unsigned long)(o), \
- (unsigned long)(n), \
- sizeof(*(ptr)))); \
- break; \
- } \
- __prev; \
-})
-
-#endif
-
-
/*
- * This function causes value _o to be changed to _n at location _p.
- * If this access causes a fault then we return 1, otherwise we return 0.
- * If no fault occurs then _o is updated to the value we saw at _p. If this
- * is the same as the initial value of _o then _n is written to location _p.
+ * Both Intel and AMD agree that, from a programmer's viewpoint:
+ * Loads cannot be reordered relative to other loads.
+ * Stores cannot be reordered relative to other stores.
+ *
+ * Intel64 Architecture Memory Ordering White Paper
+ * <http://developer.intel.com/products/processor/manuals/318147.pdf>
+ *
+ * AMD64 Architecture Programmer's Manual, Volume 2: System Programming
+ * <http://www.amd.com/us-en/assets/content_type/\
+ * white_papers_and_tech_docs/24593.pdf>
*/
-#ifdef __i386__
-#define __cmpxchg_user(_p,_o,_n,_isuff,_oppre,_regtype) \
- __asm__ __volatile__ ( \
- "1: " LOCK_PREFIX "cmpxchg"_isuff" %"_oppre"2,%3\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: movl $1,%1\n" \
- " jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 1b,3b\n" \
- ".previous" \
- : "=a" (_o), "=r" (_rc) \
- : _regtype (_n), "m" (*__xg((volatile void *)_p)), "0" (_o), "1" (0) \
- : "memory");
-#define cmpxchg_user(_p,_o,_n) \
-({ \
- int _rc; \
- switch ( sizeof(*(_p)) ) { \
- case 1: \
- __cmpxchg_user(_p,_o,_n,"b","b","q"); \
- break; \
- case 2: \
- __cmpxchg_user(_p,_o,_n,"w","w","r"); \
- break; \
- case 4: \
- __cmpxchg_user(_p,_o,_n,"l","","r"); \
- break; \
- case 8: \
- __asm__ __volatile__ ( \
- "1: " LOCK_PREFIX "cmpxchg8b %4\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: movl $1,%1\n" \
- " jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 1b,3b\n" \
- ".previous" \
- : "=A" (_o), "=r" (_rc) \
- : "c" ((u32)((u64)(_n)>>32)), "b" ((u32)(_n)), \
- "m" (*__xg((volatile void *)(_p))), "0" (_o), "1" (0) \
- : "memory"); \
- break; \
- } \
- _rc; \
-})
-#else
-#define __cmpxchg_user(_p,_o,_n,_isuff,_oppre,_regtype) \
- __asm__ __volatile__ ( \
- "1: " LOCK_PREFIX "cmpxchg"_isuff" %"_oppre"2,%3\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: movl $1,%1\n" \
- " jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 8\n" \
- " .quad 1b,3b\n" \
- ".previous" \
- : "=a" (_o), "=r" (_rc) \
- : _regtype (_n), "m" (*__xg((volatile void *)_p)), "0" (_o), "1" (0) \
- : "memory");
-#define cmpxchg_user(_p,_o,_n) \
-({ \
- int _rc; \
- switch ( sizeof(*(_p)) ) { \
- case 1: \
- __cmpxchg_user(_p,_o,_n,"b","b","q"); \
- break; \
- case 2: \
- __cmpxchg_user(_p,_o,_n,"w","w","r"); \
- break; \
- case 4: \
- __cmpxchg_user(_p,_o,_n,"l","k","r"); \
- break; \
- case 8: \
- __cmpxchg_user(_p,_o,_n,"q","","r"); \
- break; \
- } \
- _rc; \
-})
-#endif
-
-#if defined(__i386__)
-#define mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
-#define rmb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
-#elif defined(__x86_64__)
-#define mb() __asm__ __volatile__ ("mfence":::"memory")
-#define rmb() __asm__ __volatile__ ("lfence":::"memory")
-#endif
-#define wmb() __asm__ __volatile__ ("": : :"memory")
+#define rmb() barrier()
+#define wmb() barrier()
#ifdef CONFIG_SMP
-#define smp_mb() mb()
-#define smp_rmb() rmb()
-#define smp_wmb() wmb()
+#define smp_mb() mb()
+#define smp_rmb() rmb()
+#define smp_wmb() wmb()
#else
-#define smp_mb() barrier()
-#define smp_rmb() barrier()
-#define smp_wmb() barrier()
+#define smp_mb() barrier()
+#define smp_rmb() barrier()
+#define smp_wmb() barrier()
#endif
#define set_mb(var, value) do { xchg(&var, value); } while (0)
#define set_wmb(var, value) do { var = value; wmb(); } while (0)
-/* interrupt control.. */
-#if defined(__i386__)
-#define __save_flags(x) __asm__ __volatile__("pushfl ; popl
%0":"=g" (x): /* no input */)
-#define __restore_flags(x) __asm__ __volatile__("pushl %0 ; popfl": /* no
output */ :"g" (x):"memory", "cc")
-#elif defined(__x86_64__)
-#define __save_flags(x) do { __asm__ __volatile__("# save_flags
\n\t pushfq ; popq %q0":"=g" (x): /* no input */ :"memory"); } while (0)
-#define __restore_flags(x) __asm__ __volatile__("# restore_flags \n\t
pushq %0 ; popfq": /* no output */ :"g" (x):"memory", "cc")
-#endif
-#define __cli() __asm__ __volatile__("cli": : :"memory")
-#define __sti() __asm__ __volatile__("sti": : :"memory")
+#define local_irq_disable() asm volatile ( "cli" : : : "memory" )
+#define local_irq_enable() asm volatile ( "sti" : : : "memory" )
+
/* used in the idle loop; sti takes one instruction cycle to complete */
-#define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory")
+#define safe_halt() asm volatile ( "sti; hlt" : : : "memory" )
/* used when interrupts are already enabled or to shutdown the processor */
-#define halt() __asm__ __volatile__("hlt": : :"memory")
-
-/* For spinlocks etc */
-#if defined(__i386__)
-#define local_irq_save(x) __asm__ __volatile__("pushfl ; popl %0 ;
cli":"=g" (x): /* no input */ :"memory")
-#define local_irq_restore(x) __restore_flags(x)
-#elif defined(__x86_64__)
-#define local_irq_save(x) do { __asm__ __volatile__("# local_irq_save
\n\t pushfq ; popq %0 ; cli":"=g" (x): /* no input */ :"memory"); } while (0)
-#define local_irq_restore(x) __asm__ __volatile__("# local_irq_restore \n\t
pushq %0 ; popfq": /* no output */ :"g" (x):"memory")
-#endif
-#define local_irq_disable() __cli()
-#define local_irq_enable() __sti()
+#define halt() asm volatile ( "hlt" : : : "memory" )
static inline int local_irq_is_enabled(void)
{
@@ -311,8 +178,8 @@ static inline int local_irq_is_enabled(v
return !!(flags & (1<<9)); /* EFLAGS_IF */
}
-#define BROKEN_ACPI_Sx 0x0001
-#define BROKEN_INIT_AFTER_S1 0x0002
+#define BROKEN_ACPI_Sx 0x0001
+#define BROKEN_INIT_AFTER_S1 0x0002
void trap_init(void);
void percpu_traps_init(void);
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/include/asm-x86/x86_32/system.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/x86_32/system.h Wed Nov 21 09:12:06 2007 -0700
@@ -0,0 +1,114 @@
+#ifndef __X86_32_SYSTEM_H__
+#define __X86_32_SYSTEM_H__
+
+static always_inline unsigned long long __cmpxchg8b(
+ volatile void *ptr, unsigned long long old, unsigned long long new)
+{
+ unsigned long long prev;
+ asm volatile (
+ LOCK_PREFIX "cmpxchg8b %3"
+ : "=A" (prev)
+ : "c" ((u32)(new>>32)), "b" ((u32)new),
+ "m" (*__xg((volatile void *)ptr)), "0" (old)
+ : "memory" );
+ return prev;
+}
+
+#define cmpxchg(ptr,o,n) \
+({ \
+ __typeof__(*(ptr)) __prev; \
+ switch ( sizeof(*(ptr)) ) { \
+ case 8: \
+ __prev = ((__typeof__(*(ptr)))__cmpxchg8b( \
+ (ptr), \
+ (unsigned long long)(o), \
+ (unsigned long long)(n))); \
+ break; \
+ default: \
+ __prev = ((__typeof__(*(ptr)))__cmpxchg( \
+ (ptr), \
+ (unsigned long)(o), \
+ (unsigned long)(n), \
+ sizeof(*(ptr)))); \
+ break; \
+ } \
+ __prev; \
+})
+
+/*
+ * This function causes value _o to be changed to _n at location _p.
+ * If this access causes a fault then we return 1, otherwise we return 0.
+ * If no fault occurs then _o is updated to the value we saw at _p. If this
+ * is the same as the initial value of _o then _n is written to location _p.
+ */
+#define __cmpxchg_user(_p,_o,_n,_isuff,_oppre,_regtype) \
+ asm volatile ( \
+ "1: " LOCK_PREFIX "cmpxchg"_isuff" %"_oppre"2,%3\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: movl $1,%1\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 1b,3b\n" \
+ ".previous" \
+ : "=a" (_o), "=r" (_rc) \
+ : _regtype (_n), "m" (*__xg((volatile void *)_p)), "0" (_o), "1" (0) \
+ : "memory");
+
+#define cmpxchg_user(_p,_o,_n) \
+({ \
+ int _rc; \
+ switch ( sizeof(*(_p)) ) { \
+ case 1: \
+ __cmpxchg_user(_p,_o,_n,"b","b","q"); \
+ break; \
+ case 2: \
+ __cmpxchg_user(_p,_o,_n,"w","w","r"); \
+ break; \
+ case 4: \
+ __cmpxchg_user(_p,_o,_n,"l","","r"); \
+ break; \
+ case 8: \
+ asm volatile ( \
+ "1: " LOCK_PREFIX "cmpxchg8b %4\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: movl $1,%1\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 1b,3b\n" \
+ ".previous" \
+ : "=A" (_o), "=r" (_rc) \
+ : "c" ((u32)((u64)(_n)>>32)), "b" ((u32)(_n)), \
+ "m" (*__xg((volatile void *)(_p))), "0" (_o), "1" (0) \
+ : "memory"); \
+ break; \
+ } \
+ _rc; \
+})
+
+static inline void atomic_write64(uint64_t *p, uint64_t v)
+{
+ uint64_t w = *p, x;
+ while ( (x = __cmpxchg8b(p, w, v)) != w )
+ w = x;
+}
+
+#define mb() \
+ asm volatile ( "lock; addl $0,0(%%esp)" : : : "memory" )
+
+#define __save_flags(x) \
+ asm volatile ( "pushfl ; popl %0" : "=g" (x) : )
+#define __restore_flags(x) \
+ asm volatile ( "pushl %0 ; popfl" : : "g" (x) : "memory", "cc" )
+
+#define local_irq_save(x) \
+ asm volatile ( "pushfl ; popl %0 ; cli" : "=g" (x) : : "memory" )
+#define local_irq_restore(x) \
+ __restore_flags(x)
+
+#endif /* __X86_32_SYSTEM_H__ */
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/include/asm-x86/x86_64/system.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/x86_64/system.h Wed Nov 21 09:12:06 2007 -0700
@@ -0,0 +1,68 @@
+#ifndef __X86_64_SYSTEM_H__
+#define __X86_64_SYSTEM_H__
+
+#define cmpxchg(ptr,o,n) \
+ ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o), \
+ (unsigned long)(n),sizeof(*(ptr))))
+
+/*
+ * This function causes value _o to be changed to _n at location _p.
+ * If this access causes a fault then we return 1, otherwise we return 0.
+ * If no fault occurs then _o is updated to the value we saw at _p. If this
+ * is the same as the initial value of _o then _n is written to location _p.
+ */
+#define __cmpxchg_user(_p,_o,_n,_isuff,_oppre,_regtype) \
+ asm volatile ( \
+ "1: " LOCK_PREFIX "cmpxchg"_isuff" %"_oppre"2,%3\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: movl $1,%1\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 8\n" \
+ " .quad 1b,3b\n" \
+ ".previous" \
+ : "=a" (_o), "=r" (_rc) \
+ : _regtype (_n), "m" (*__xg((volatile void *)_p)), "0" (_o), "1" (0) \
+ : "memory");
+
+#define cmpxchg_user(_p,_o,_n) \
+({ \
+ int _rc; \
+ switch ( sizeof(*(_p)) ) { \
+ case 1: \
+ __cmpxchg_user(_p,_o,_n,"b","b","q"); \
+ break; \
+ case 2: \
+ __cmpxchg_user(_p,_o,_n,"w","w","r"); \
+ break; \
+ case 4: \
+ __cmpxchg_user(_p,_o,_n,"l","k","r"); \
+ break; \
+ case 8: \
+ __cmpxchg_user(_p,_o,_n,"q","","r"); \
+ break; \
+ } \
+ _rc; \
+})
+
+static inline void atomic_write64(uint64_t *p, uint64_t v)
+{
+ *p = v;
+}
+
+#define mb() \
+ asm volatile ( "mfence" : : : "memory" )
+
+#define __save_flags(x) \
+ asm volatile ( "pushfq ; popq %q0" : "=g" (x) : :"memory" )
+#define __restore_flags(x) \
+ asm volatile ( "pushq %0 ; popfq" : : "g" (x) : "memory", "cc" )
+
+#define local_irq_save(x) \
+ asm volatile ( "pushfq ; popq %0 ; cli" : "=g" (x) : : "memory" )
+#define local_irq_restore(x) \
+ __restore_flags(x)
+
+#endif /* __X86_64_SYSTEM_H__ */
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|