WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH] non-flat protected mode HVM support

To: <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH] non-flat protected mode HVM support
From: "Jan Beulich" <jbeulich@xxxxxxxxxx>
Date: Tue, 28 Nov 2006 10:01:32 +0000
Delivery-date: Tue, 28 Nov 2006 02:00:14 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
This is now the full set of changes needed to eliminate the assumption that
segments in protected mode always have zero base addresses.
At once, this further simplifies the instruction length determination code
used for MMIO of HVM domains.

Functionality tested on 3.0.3, compile tested on -unstable.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>

Index: 2006-11-27/xen/arch/x86/hvm/instrlen.c
===================================================================
--- 2006-11-27.orig/xen/arch/x86/hvm/instrlen.c 2006-11-28 09:22:06.000000000 
+0100
+++ 2006-11-27/xen/arch/x86/hvm/instrlen.c      2006-11-28 09:24:15.000000000 
+0100
@@ -20,7 +20,6 @@
 #include <xen/config.h>
 #include <xen/sched.h>
 #include <xen/mm.h>
-#include <asm/regs.h>
 #include <asm-x86/x86_emulate.h>
 
 /* read from guest memory */
@@ -121,9 +120,7 @@ static uint8_t opcode_table[256] = {
     ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
     ByteOp|ImplicitOps, ImplicitOps,
     /* 0xB0 - 0xBF */
-    SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, 
-    SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, 
-    0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     /* 0xC0 - 0xC7 */
     ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM, 0, 0,
     0, 0, ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM,
@@ -195,54 +192,52 @@ static uint8_t twobyte_table[256] = {
 };
 
 /* 
- * insn_fetch - fetch the next 1 to 4 bytes from instruction stream 
- * @_type:   u8, u16, u32, s8, s16, or s32
- * @_size:   1, 2, or 4 bytes
+ * insn_fetch - fetch the next byte from instruction stream
  */
-#define insn_fetch(_type, _size)                                        \
-({ unsigned long _x, _ptr = _regs.eip;                                  \
-   if ( mode == X86EMUL_MODE_REAL ) _ptr += _regs.cs << 4;              \
-   rc = inst_copy_from_guest((unsigned char *)(&(_x)), _ptr, _size);    \
-   if ( rc != _size ) goto done;                                        \
-   _regs.eip += (_size);                                                \
-   length += (_size);                                                   \
-   (_type)_x;                                                           \
+#define insn_fetch()                                                    \
+({ uint8_t _x;                                                          \
+   if ( length >= 15 )                                                  \
+       return -1;                                                       \
+   if ( inst_copy_from_guest(&_x, pc, 1) != 1 ) {                       \
+       gdprintk(XENLOG_WARNING,                                         \
+                "Cannot read from address %lx (eip %lx, mode %d)\n",    \
+                pc, org_pc, mode);                                      \
+       return -1;                                                       \
+   }                                                                    \
+   pc += 1;                                                             \
+   length += 1;                                                         \
+   _x;                                                                  \
 })
 
 /**
  * hvm_instruction_length - returns the current instructions length
  *
- * @regs: guest register state
+ * @org_pc: guest instruction pointer
  * @mode: guest operating mode
  *
  * EXTERNAL this routine calculates the length of the current instruction
- * pointed to by eip.  The guest state is _not_ changed by this routine.
+ * pointed to by org_pc.  The guest state is _not_ changed by this routine.
  */
-int hvm_instruction_length(struct cpu_user_regs *regs, int mode)
+int hvm_instruction_length(unsigned long org_pc, int mode)
 {
-    uint8_t b, d, twobyte = 0, rex_prefix = 0;
-    uint8_t modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
-    unsigned int op_bytes, ad_bytes, i;
-    int rc = 0;
+    uint8_t b, d, twobyte = 0, rex_prefix = 0, modrm_reg = 0;
+    unsigned int op_default, op_bytes, ad_default, ad_bytes, tmp;
     int length = 0;
-    unsigned int tmp;
-
-    /* Shadow copy of register state. Committed on successful emulation. */
-    struct cpu_user_regs _regs = *regs;
+    unsigned long pc = org_pc;
 
     switch ( mode )
     {
     case X86EMUL_MODE_REAL:
     case X86EMUL_MODE_PROT16:
-        op_bytes = ad_bytes = 2;
+        op_bytes = op_default = ad_bytes = ad_default = 2;
         break;
     case X86EMUL_MODE_PROT32:
-        op_bytes = ad_bytes = 4;
+        op_bytes = op_default = ad_bytes = ad_default = 4;
         break;
 #ifdef __x86_64__
     case X86EMUL_MODE_PROT64:
-        op_bytes = 4;
-        ad_bytes = 8;
+        op_bytes = op_default = 4;
+        ad_bytes = ad_default = 8;
         break;
 #endif
     default:
@@ -250,18 +245,18 @@ int hvm_instruction_length(struct cpu_us
     }
 
     /* Legacy prefixes. */
-    for ( i = 0; i < 8; i++ )
+    for ( ; ; )
     {
-        switch ( b = insn_fetch(uint8_t, 1) )
+        switch ( b = insn_fetch() )
         {
         case 0x66: /* operand-size override */
-            op_bytes ^= 6;      /* switch between 2/4 bytes */
+            op_bytes = op_default ^ 6;      /* switch between 2/4 bytes */
             break;
         case 0x67: /* address-size override */
             if ( mode == X86EMUL_MODE_PROT64 )
-                ad_bytes ^= 12; /* switch between 4/8 bytes */
+                ad_bytes = ad_default ^ 12; /* switch between 4/8 bytes */
             else
-                ad_bytes ^= 6;  /* switch between 2/4 bytes */
+                ad_bytes = ad_default ^ 6;  /* switch between 2/4 bytes */
             break;
         case 0x2e: /* CS override */
         case 0x3e: /* DS override */
@@ -273,22 +268,26 @@ int hvm_instruction_length(struct cpu_us
         case 0xf3: /* REP/REPE/REPZ */
         case 0xf2: /* REPNE/REPNZ */
             break;
+#ifdef __x86_64__
+        case 0x40 ... 0x4f:
+            if ( mode == X86EMUL_MODE_PROT64 )
+            {
+                rex_prefix = b;
+                continue;
+            }
+            /* FALLTHRU */
+#endif
         default:
             goto done_prefixes;
         }
+        rex_prefix = 0;
     }
 done_prefixes:
 
     /* REX prefix. */
-    if ( (mode == X86EMUL_MODE_PROT64) && ((b & 0xf0) == 0x40) )
-    {
-        rex_prefix = b;
-        if ( b & 8 )
-            op_bytes = 8;          /* REX.W */
-        modrm_reg = (b & 4) << 1;  /* REX.R */
-        /* REX.B and REX.X do not need to be decoded. */
-        b = insn_fetch(uint8_t, 1);
-    }
+    if ( rex_prefix & 8 )
+        op_bytes = 8;                   /* REX.W */
+    /* REX.B, REX.R, and REX.X do not need to be decoded. */
 
     /* Opcode byte(s). */
     d = opcode_table[b];
@@ -298,7 +297,7 @@ done_prefixes:
         if ( b == 0x0f )
         {
             twobyte = 1;
-            b = insn_fetch(uint8_t, 1);
+            b = insn_fetch();
             d = twobyte_table[b];
         }
 
@@ -310,11 +309,11 @@ done_prefixes:
     /* ModRM and SIB bytes. */
     if ( d & ModRM )
     {
-        modrm = insn_fetch(uint8_t, 1);
-        modrm_mod |= (modrm & 0xc0) >> 6;
-        modrm_reg |= (modrm & 0x38) >> 3;
-        modrm_rm  |= (modrm & 0x07);
+        uint8_t modrm = insn_fetch();
+        uint8_t modrm_mod = (modrm & 0xc0) >> 6;
+        uint8_t modrm_rm  = (modrm & 0x07);
 
+        modrm_reg = (modrm & 0x38) >> 3;
         if ( modrm_mod == 3 )
         {
             gdprintk(XENLOG_WARNING, "Cannot parse ModRM.mod == 3.\n");
@@ -330,16 +329,16 @@ done_prefixes:
                 if ( modrm_rm == 6 ) 
                 {
                     length += 2;
-                    _regs.eip += 2; /* skip disp16 */
+                    pc += 2; /* skip disp16 */
                 }
                 break;
             case 1:
                 length += 1;
-                _regs.eip += 1; /* skip disp8 */
+                pc += 1; /* skip disp8 */
                 break;
             case 2:
                 length += 2;
-                _regs.eip += 2; /* skip disp16 */
+                pc += 2; /* skip disp16 */
                 break;
             }
         }
@@ -350,33 +349,34 @@ done_prefixes:
             {
             case 0:
                 if ( (modrm_rm == 4) && 
-                     (((insn_fetch(uint8_t, 1)) & 7) 
-                        == 5) )
+                     ((insn_fetch() & 7) == 5) )
                 {
                     length += 4;
-                    _regs.eip += 4; /* skip disp32 specified by SIB.base */
+                    pc += 4; /* skip disp32 specified by SIB.base */
                 }
                 else if ( modrm_rm == 5 )
                 {
                     length += 4;
-                    _regs.eip += 4; /* skip disp32 */
+                    pc += 4; /* skip disp32 */
                 }
                 break;
             case 1:
                 if ( modrm_rm == 4 )
                 {
-                    insn_fetch(uint8_t, 1);
+                    length += 1;
+                    pc += 1;
                 }
                 length += 1;
-                _regs.eip += 1; /* skip disp8 */
+                pc += 1; /* skip disp8 */
                 break;
             case 2:
                 if ( modrm_rm == 4 )
                 {
-                    insn_fetch(uint8_t, 1);
+                    length += 1;
+                    pc += 1;
                 }
                 length += 4;
-                _regs.eip += 4; /* skip disp32 */
+                pc += 4; /* skip disp32 */
                 break;
             }
         }
@@ -397,15 +397,12 @@ done_prefixes:
         tmp = (d & ByteOp) ? 1 : op_bytes;
         if ( tmp == 8 ) tmp = 4;
         /* NB. Immediates are sign-extended as necessary. */
-        switch ( tmp )
-        {
-        case 1: insn_fetch(int8_t,  1); break;
-        case 2: insn_fetch(int16_t, 2); break;
-        case 4: insn_fetch(int32_t, 4); break;
-        }
+        length += tmp;
+        pc += tmp;
         break;
     case SrcImmByte:
-        insn_fetch(int8_t,  1);
+        length += 1;
+        pc += 1;
         break;
     }
 
@@ -414,13 +411,9 @@ done_prefixes:
 
     switch ( b )
     {
-    case 0xa0 ... 0xa1: /* mov */
-        length += ad_bytes;
-        _regs.eip += ad_bytes; /* skip src displacement */
-        break;
-    case 0xa2 ... 0xa3: /* mov */
+    case 0xa0 ... 0xa3: /* mov */
         length += ad_bytes;
-        _regs.eip += ad_bytes; /* skip dst displacement */
+        pc += ad_bytes; /* skip src/dst displacement */
         break;
     case 0xf6 ... 0xf7: /* Grp3 */
         switch ( modrm_reg )
@@ -429,23 +422,19 @@ done_prefixes:
             /* Special case in Grp3: test has an immediate source operand. */
             tmp = (d & ByteOp) ? 1 : op_bytes;
             if ( tmp == 8 ) tmp = 4;
-            switch ( tmp )
-            {
-            case 1: insn_fetch(int8_t,  1); break;
-            case 2: insn_fetch(int16_t, 2); break;
-            case 4: insn_fetch(int32_t, 4); break;
-            }
-            goto done;
+            length += tmp;
+            pc += tmp;
+            break;
         }
         break;
     }
 
 done:
-    return length;
+    return length < 16 ? length : -1;
 
 cannot_emulate:
     gdprintk(XENLOG_WARNING,
-            "Cannot emulate %02x at address %lx (eip %lx, mode %d)\n",
-            b, (unsigned long)_regs.eip, (unsigned long)regs->eip, mode);
+            "Cannot emulate %02x at address %lx (%lx, mode %d)\n",
+            b, pc - 1, org_pc, mode);
     return -1;
 }
Index: 2006-11-27/xen/arch/x86/hvm/platform.c
===================================================================
--- 2006-11-27.orig/xen/arch/x86/hvm/platform.c 2006-11-28 09:22:06.000000000 
+0100
+++ 2006-11-27/xen/arch/x86/hvm/platform.c      2006-11-28 09:39:05.000000000 
+0100
@@ -28,6 +28,7 @@
 #include <xen/trace.h>
 #include <xen/sched.h>
 #include <asm/regs.h>
+#include <asm/x86_emulate.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/support.h>
 #include <asm/hvm/io.h>
@@ -168,13 +169,15 @@ long get_reg_value(int size, int index, 
 
 static inline unsigned char *check_prefix(unsigned char *inst,
                                           struct hvm_io_op *mmio_op,
+                                          unsigned char *ad_size,
                                           unsigned char *op_size,
+                                          unsigned char *seg_sel,
                                           unsigned char *rex_p)
 {
     while ( 1 ) {
         switch ( *inst ) {
             /* rex prefix for em64t instructions */
-        case 0x40 ... 0x4e:
+        case 0x40 ... 0x4f:
             *rex_p = *inst;
             break;
         case 0xf3: /* REPZ */
@@ -191,12 +194,13 @@ static inline unsigned char *check_prefi
         case 0x26: /* ES */
         case 0x64: /* FS */
         case 0x65: /* GS */
-            //mmio_op->seg_sel = *inst;
+            *seg_sel = *inst;
             break;
         case 0x66: /* 32bit->16bit */
             *op_size = WORD;
             break;
         case 0x67:
+            *ad_size = WORD;
             break;
         default:
             return inst;
@@ -205,7 +209,7 @@ static inline unsigned char *check_prefi
     }
 }
 
-static inline unsigned long get_immediate(int op16, const unsigned char *inst, 
int op_size)
+static inline unsigned long get_immediate(int ad_size, const unsigned char 
*inst, int op_size)
 {
     int mod, reg, rm;
     unsigned long val = 0;
@@ -216,16 +220,19 @@ static inline unsigned long get_immediat
     rm = *inst & 7;
 
     inst++; //skip ModR/M byte
-    if ( mod != 3 && rm == 4 ) {
+    if ( ad_size != WORD && mod != 3 && rm == 4 ) {
+        rm = *inst & 7;
         inst++; //skip SIB byte
     }
 
     switch ( mod ) {
     case 0:
-        if ( rm == 5 || rm == 4 ) {
-            if ( op16 )
+        if ( ad_size == WORD ) {
+            if ( rm == 6 )
                 inst = inst + 2; //disp16, skip 2 bytes
-            else
+        }
+        else {
+            if ( rm == 5 )
                 inst = inst + 4; //disp32, skip 4 bytes
         }
         break;
@@ -233,7 +240,7 @@ static inline unsigned long get_immediat
         inst++; //disp8, skip 1 byte
         break;
     case 2:
-        if ( op16 )
+        if ( ad_size == WORD )
             inst = inst + 2; //disp16, skip 2 bytes
         else
             inst = inst + 4; //disp32, skip 4 bytes
@@ -276,7 +283,6 @@ static void init_instruction(struct hvm_
     mmio_op->instr = 0;
 
     mmio_op->flags = 0;
-    //mmio_op->seg_sel = 0;
 
     mmio_op->operand[0] = 0;
     mmio_op->operand[1] = 0;
@@ -346,25 +352,52 @@ static int reg_mem(unsigned char size, u
     return DECODE_success;
 }
 
-static int mmio_decode(int realmode, unsigned char *opcode,
-                      struct hvm_io_op *mmio_op, unsigned char *op_size)
+static int mmio_decode(int mode, unsigned char *opcode,
+                       struct hvm_io_op *mmio_op,
+                       unsigned char *ad_size, unsigned char *op_size,
+                       unsigned char *seg_sel)
 {
     unsigned char size_reg = 0;
     unsigned char rex = 0;
     int index;
 
+    *ad_size = 0;
     *op_size = 0;
+    *seg_sel = 0;
     init_instruction(mmio_op);
 
-    opcode = check_prefix(opcode, mmio_op, op_size, &rex);
+    opcode = check_prefix(opcode, mmio_op, ad_size, op_size, seg_sel, &rex);
 
-    if ( realmode ) { /* meaning is reversed */
+    switch ( mode ) {
+    case X86EMUL_MODE_REAL: /* meaning is reversed */
+    case X86EMUL_MODE_PROT16:
         if ( *op_size == WORD )
             *op_size = LONG;
         else if ( *op_size == LONG )
             *op_size = WORD;
         else if ( *op_size == 0 )
             *op_size = WORD;
+        if ( *ad_size == WORD )
+            *ad_size = LONG;
+        else if ( *ad_size == LONG )
+            *ad_size = WORD;
+        else if ( *ad_size == 0 )
+            *ad_size = WORD;
+        break;
+    case X86EMUL_MODE_PROT32:
+        if ( *op_size == 0 )
+            *op_size = LONG;
+        if ( *ad_size == 0 )
+            *ad_size = LONG;
+        break;
+#ifdef __x86_64__
+    case X86EMUL_MODE_PROT64:
+        if ( *op_size == 0 )
+            *op_size = rex & 0x8 ? QUAD : LONG;
+        if ( *ad_size == 0 )
+            *ad_size = QUAD;
+        break;
+#endif
     }
 
     /* the operands order in comments conforms to AT&T convention */
@@ -471,10 +504,10 @@ static int mmio_decode(int realmode, uns
         /* opcode 0x83 always has a single byte operand */
         if ( opcode[0] == 0x83 )
             mmio_op->immediate =
-                (signed char)get_immediate(realmode, opcode + 1, BYTE);
+                (signed char)get_immediate(*ad_size, opcode + 1, BYTE);
         else
             mmio_op->immediate =
-                get_immediate(realmode, opcode + 1, *op_size);
+                get_immediate(*ad_size, opcode + 1, *op_size);
 
         mmio_op->operand[0] = mk_operand(size_reg, 0, 0, IMMEDIATE);
         mmio_op->operand[1] = mk_operand(size_reg, 0, 0, MEMORY);
@@ -598,7 +631,7 @@ static int mmio_decode(int realmode, uns
 
             mmio_op->operand[0] = mk_operand(*op_size, 0, 0, IMMEDIATE);
             mmio_op->immediate  =
-                    get_immediate(realmode, opcode + 1, *op_size);
+                    get_immediate(*ad_size, opcode + 1, *op_size);
             mmio_op->operand[1] = mk_operand(*op_size, 0, 0, MEMORY);
 
             return DECODE_success;
@@ -612,7 +645,7 @@ static int mmio_decode(int realmode, uns
 
             mmio_op->operand[0] = mk_operand(*op_size, 0, 0, IMMEDIATE);
             mmio_op->immediate =
-                    get_immediate(realmode, opcode + 1, *op_size);
+                    get_immediate(*ad_size, opcode + 1, *op_size);
             mmio_op->operand[1] = mk_operand(*op_size, 0, 0, MEMORY);
 
             return DECODE_success;
@@ -634,7 +667,7 @@ static int mmio_decode(int realmode, uns
 
             mmio_op->operand[0] = mk_operand(size_reg, 0, 0, IMMEDIATE);
             mmio_op->immediate =
-                    get_immediate(realmode, opcode + 1, *op_size);
+                    get_immediate(*ad_size, opcode + 1, *op_size);
             mmio_op->operand[1] = mk_operand(size_reg, 0, 0, MEMORY);
 
             return DECODE_success;
@@ -697,7 +730,7 @@ static int mmio_decode(int realmode, uns
             GET_OP_SIZE_FOR_NONEBYTE(*op_size);
             mmio_op->operand[0] = mk_operand(BYTE, 0, 0, IMMEDIATE);
             mmio_op->immediate =
-                    (signed char)get_immediate(realmode, opcode + 1, BYTE);
+                    (signed char)get_immediate(*ad_size, opcode + 1, BYTE);
             mmio_op->operand[1] = mk_operand(*op_size, 0, 0, MEMORY);
             return DECODE_success;
         }
@@ -866,15 +899,15 @@ static void mmio_operands(int type, unsi
 }
 
 #define GET_REPEAT_COUNT() \
-     (mmio_op->flags & REPZ ? (realmode ? regs->ecx & 0xFFFF : regs->ecx) : 1)
+     (mmio_op->flags & REPZ ? (ad_size == WORD ? regs->ecx & 0xFFFF : 
regs->ecx) : 1)
 
 void handle_mmio(unsigned long gpa)
 {
     unsigned long inst_addr;
     struct hvm_io_op *mmio_op;
     struct cpu_user_regs *regs;
-    unsigned char inst[MAX_INST_LEN], op_size;
-    int i, realmode, df, inst_len;
+    unsigned char inst[MAX_INST_LEN], ad_size, op_size, seg_sel;
+    int i, mode, df, inst_len;
     struct vcpu *v = current;
 
     mmio_op = &v->arch.hvm_vcpu.io_op;
@@ -886,27 +919,23 @@ void handle_mmio(unsigned long gpa)
 
     df = regs->eflags & X86_EFLAGS_DF ? 1 : 0;
 
-    inst_len = hvm_instruction_length(regs, hvm_guest_x86_mode(v));
+    mode = hvm_guest_x86_mode(v);
+    inst_addr = hvm_get_segment_base(v, seg_cs) + regs->eip;
+    inst_len = hvm_instruction_length(inst_addr, mode);
     if ( inst_len <= 0 )
     {
         printk("handle_mmio: failed to get instruction length\n");
         domain_crash_synchronous();
     }
 
-    realmode = hvm_realmode(v);
-    if ( realmode )
-        inst_addr = regs->cs << 4;
-    else
-        inst_addr = hvm_get_segment_base(current, seg_cs);
-    inst_addr += regs->eip;
-
     memset(inst, 0, MAX_INST_LEN);
     if ( inst_copy_from_guest(inst, inst_addr, inst_len) != inst_len ) {
         printk("handle_mmio: failed to copy instruction\n");
         domain_crash_synchronous();
     }
 
-    if ( mmio_decode(realmode, inst, mmio_op, &op_size) == DECODE_failure ) {
+    if ( mmio_decode(mode, inst, mmio_op, &ad_size, &op_size, &seg_sel)
+         == DECODE_failure ) {
         printk("handle_mmio: failed to decode instruction\n");
         printk("mmio opcode: gpa 0x%lx, len %d:", gpa, inst_len);
         for ( i = 0; i < inst_len; i++ )
@@ -926,29 +955,39 @@ void handle_mmio(unsigned long gpa)
     {
         unsigned long count = GET_REPEAT_COUNT();
         int sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1;
-        unsigned long addr = 0;
+        unsigned long addr;
         int dir, size = op_size;
 
         ASSERT(count);
 
         /* determine non-MMIO address */
-        if ( realmode ) {
-            if ( ((regs->es << 4) + (regs->edi & 0xFFFF)) == gpa ) {
-                dir = IOREQ_WRITE;
-                addr = (regs->ds << 4) + (regs->esi & 0xFFFF);
-            } else {
-                dir = IOREQ_READ;
-                addr = (regs->es << 4) + (regs->edi & 0xFFFF);
-            }
-        } else {
-            if ( gpa == regs->edi ) {
-                dir = IOREQ_WRITE;
-                addr = regs->esi;
-            } else {
-                dir = IOREQ_READ;
-                addr = regs->edi;
+        addr = regs->edi;
+        if ( ad_size == WORD )
+            addr &= 0xFFFF;
+        addr += hvm_get_segment_base(v, seg_es);
+        if ( addr == gpa )
+        {
+            enum segment seg;
+
+            dir = IOREQ_WRITE;
+            addr = regs->esi;
+            if ( ad_size == WORD )
+                addr &= 0xFFFF;
+            switch ( seg_sel )
+            {
+            case 0x26: seg = seg_es; break;
+            case 0x2e: seg = seg_cs; break;
+            case 0x36: seg = seg_ss; break;
+            case 0:
+            case 0x3e: seg = seg_ds; break;
+            case 0x64: seg = seg_fs; break;
+            case 0x65: seg = seg_gs; break;
+            default: domain_crash_synchronous();
             }
+            addr += hvm_get_segment_base(v, seg);
         }
+        else
+            dir = IOREQ_READ;
 
         if ( addr & (size - 1) )
             gdprintk(XENLOG_WARNING,
Index: 2006-11-27/xen/arch/x86/hvm/svm/svm.c
===================================================================
--- 2006-11-27.orig/xen/arch/x86/hvm/svm/svm.c  2006-11-28 09:22:06.000000000 
+0100
+++ 2006-11-27/xen/arch/x86/hvm/svm/svm.c       2006-11-28 09:43:47.000000000 
+0100
@@ -511,18 +511,24 @@ unsigned long svm_get_ctrl_reg(struct vc
 
 static unsigned long svm_get_segment_base(struct vcpu *v, enum segment seg)
 {
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+    int long_mode = 0;
+
+#ifdef __x86_64__
+    long_mode = vmcb->cs.attributes.fields.l && (vmcb->efer & EFER_LMA);
+#endif
     switch ( seg )
     {
-    case seg_cs: return v->arch.hvm_svm.vmcb->cs.base;
-    case seg_ds: return v->arch.hvm_svm.vmcb->ds.base;
-    case seg_es: return v->arch.hvm_svm.vmcb->es.base;
-    case seg_fs: return v->arch.hvm_svm.vmcb->fs.base;
-    case seg_gs: return v->arch.hvm_svm.vmcb->gs.base;
-    case seg_ss: return v->arch.hvm_svm.vmcb->ss.base;
-    case seg_tr: return v->arch.hvm_svm.vmcb->tr.base;
-    case seg_gdtr: return v->arch.hvm_svm.vmcb->gdtr.base;
-    case seg_idtr: return v->arch.hvm_svm.vmcb->idtr.base;
-    case seg_ldtr: return v->arch.hvm_svm.vmcb->ldtr.base;
+    case seg_cs: return long_mode ? 0 : vmcb->cs.base;
+    case seg_ds: return long_mode ? 0 : vmcb->ds.base;
+    case seg_es: return long_mode ? 0 : vmcb->es.base;
+    case seg_fs: return vmcb->fs.base;
+    case seg_gs: return vmcb->gs.base;
+    case seg_ss: return long_mode ? 0 : vmcb->ss.base;
+    case seg_tr: return vmcb->tr.base;
+    case seg_gdtr: return vmcb->gdtr.base;
+    case seg_idtr: return vmcb->idtr.base;
+    case seg_ldtr: return vmcb->ldtr.base;
     }
     BUG();
     return 0;
@@ -832,7 +838,6 @@ int start_svm(void)
     hvm_funcs.store_cpu_guest_regs = svm_store_cpu_guest_regs;
     hvm_funcs.load_cpu_guest_regs = svm_load_cpu_guest_regs;
 
-    hvm_funcs.realmode = svm_realmode;
     hvm_funcs.paging_enabled = svm_paging_enabled;
     hvm_funcs.long_mode_enabled = svm_long_mode_enabled;
     hvm_funcs.pae_enabled = svm_pae_enabled;
@@ -925,7 +930,7 @@ static void svm_do_general_protection_fa
         printk("Huh? We got a GP Fault with an invalid IDTR!\n");
         svm_dump_vmcb(__func__, vmcb);
         svm_dump_regs(__func__, regs);
-        svm_dump_inst(vmcb->rip);
+        svm_dump_inst(svm_rip2pointer(vmcb));
         domain_crash(v->domain);
         return;
     }
@@ -1223,22 +1228,21 @@ static void svm_get_prefix_info(
 
 /* Get the address of INS/OUTS instruction */
 static inline int svm_get_io_address(
-    struct vcpu *v, 
-    struct cpu_user_regs *regs, unsigned int dir, 
+    struct vcpu *v, struct cpu_user_regs *regs,
+    unsigned int size, ioio_info_t info,
     unsigned long *count, unsigned long *addr)
 {
     unsigned long        reg;
     unsigned int         asize = 0;
     unsigned int         isize;
-    int                  long_mode;
-    ioio_info_t          info;
+    int                  long_mode = 0;
     segment_selector_t  *seg = NULL;
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
 
-    info.bytes = vmcb->exitinfo1;
-
+#ifdef __x86_64__
     /* If we're in long mode, we shouldn't check the segment presence & limit 
*/
     long_mode = vmcb->cs.attributes.fields.l && vmcb->efer & EFER_LMA;
+#endif
 
     /* d field of cs.attributes is 1 for 32-bit, 0 for 16 or 64 bit. 
      * l field combined with EFER_LMA -> longmode says whether it's 16 or 64 
bit. 
@@ -1256,11 +1260,9 @@ static inline int svm_get_io_address(
         isize --;
 
     if (isize > 1) 
-        svm_get_prefix_info(vmcb, dir, &seg, &asize);
+        svm_get_prefix_info(vmcb, info.fields.type, &seg, &asize);
 
-    ASSERT(dir == IOREQ_READ || dir == IOREQ_WRITE);
-
-    if (dir == IOREQ_WRITE)
+    if (info.fields.type == IOREQ_WRITE)
     {
         reg = regs->esi;
         if (!seg)               /* If no prefix, used DS. */
@@ -1289,6 +1291,8 @@ static inline int svm_get_io_address(
         *addr = reg;
         *count = regs->ecx;
     }
+    if (!info.fields.rep)
+        *count = 1;
 
     if (!long_mode) {
         if (*addr > seg->limit) 
@@ -1301,7 +1305,8 @@ static inline int svm_get_io_address(
             *addr += seg->base;
         }
     }
-    
+    else if (seg == &vmcb->fs || seg == &vmcb->gs)
+        *addr += seg->base;
 
     return 1;
 }
@@ -1351,7 +1356,7 @@ static void svm_io_instruction(struct vc
         unsigned long addr, count;
         int sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1;
 
-        if (!svm_get_io_address(v, regs, dir, &count, &addr)) 
+        if (!svm_get_io_address(v, regs, size, info, &count, &addr))
         {
             /* We failed to get a valid address, so don't do the IO operation -
              * it would just get worse if we do! Hopefully the guest is handing
@@ -1365,10 +1370,6 @@ static void svm_io_instruction(struct vc
         {
             pio_opp->flags |= REPZ;
         }
-        else 
-        {
-            count = 1;
-        }
 
         /*
          * Handle string pio instructions that cross pages or that
Index: 2006-11-27/xen/arch/x86/hvm/vmx/vmx.c
===================================================================
--- 2006-11-27.orig/xen/arch/x86/hvm/vmx/vmx.c  2006-11-28 09:22:06.000000000 
+0100
+++ 2006-11-27/xen/arch/x86/hvm/vmx/vmx.c       2006-11-28 10:34:07.000000000 
+0100
@@ -503,23 +503,32 @@ static unsigned long vmx_get_ctrl_reg(st
 
 static unsigned long vmx_get_segment_base(struct vcpu *v, enum segment seg)
 {
-    unsigned long base;
+    unsigned long base = 0;
+    int long_mode = 0;
+
+    ASSERT(v == current);
+
+#ifdef __x86_64__
+    if ( vmx_long_mode_enabled(v) &&
+         (__vmread(GUEST_CS_AR_BYTES) & (1u<<13)) )
+        long_mode = 1;
+#endif
 
-    BUG_ON(v != current);
     switch ( seg )
     {
-    case seg_cs: base = __vmread(GUEST_CS_BASE); break;
-    case seg_ds: base = __vmread(GUEST_DS_BASE); break;
-    case seg_es: base = __vmread(GUEST_ES_BASE); break;
+    case seg_cs: if ( !long_mode ) base = __vmread(GUEST_CS_BASE); break;
+    case seg_ds: if ( !long_mode ) base = __vmread(GUEST_DS_BASE); break;
+    case seg_es: if ( !long_mode ) base = __vmread(GUEST_ES_BASE); break;
     case seg_fs: base = __vmread(GUEST_FS_BASE); break;
     case seg_gs: base = __vmread(GUEST_GS_BASE); break;
-    case seg_ss: base = __vmread(GUEST_SS_BASE); break;
+    case seg_ss: if ( !long_mode ) base = __vmread(GUEST_SS_BASE); break;
     case seg_tr: base = __vmread(GUEST_TR_BASE); break;
     case seg_gdtr: base = __vmread(GUEST_GDTR_BASE); break;
     case seg_idtr: base = __vmread(GUEST_IDTR_BASE); break;
     case seg_ldtr: base = __vmread(GUEST_LDTR_BASE); break;
-    default: BUG(); base = 0; break;
+    default: BUG(); break;
     }
+
     return base;
 }
 
@@ -635,7 +644,6 @@ static void vmx_setup_hvm_funcs(void)
     hvm_funcs.store_cpu_guest_regs = vmx_store_cpu_guest_regs;
     hvm_funcs.load_cpu_guest_regs = vmx_load_cpu_guest_regs;
 
-    hvm_funcs.realmode = vmx_realmode;
     hvm_funcs.paging_enabled = vmx_paging_enabled;
     hvm_funcs.long_mode_enabled = vmx_long_mode_enabled;
     hvm_funcs.pae_enabled = vmx_pae_enabled;
@@ -949,63 +957,100 @@ static void vmx_do_invlpg(unsigned long 
 }
 
 
-static int check_for_null_selector(unsigned long eip, int inst_len, int dir)
+static int vmx_check_descriptor(int long_mode, unsigned long eip, int inst_len,
+                                enum segment seg, unsigned long *base)
 {
-    unsigned char inst[MAX_INST_LEN];
-    unsigned long sel;
-    int i;
-    int inst_copy_from_guest(unsigned char *, unsigned long, int);
+    enum vmcs_field ar_field, base_field;
+    u32 ar_bytes;
 
-    /* INS can only use ES segment register, and it can't be overridden */
-    if ( dir == IOREQ_READ )
+    *base = 0;
+    if ( seg != seg_es )
     {
-        sel = __vmread(GUEST_ES_SELECTOR);
-        return sel == 0 ? 1 : 0;
+        unsigned char inst[MAX_INST_LEN];
+        int i;
+        extern int inst_copy_from_guest(unsigned char *, unsigned long, int);
+
+        if ( !long_mode )
+            eip += __vmread(GUEST_CS_BASE);
+        memset(inst, 0, MAX_INST_LEN);
+        if ( inst_copy_from_guest(inst, eip, inst_len) != inst_len )
+        {
+            gdprintk(XENLOG_ERR, "Get guest instruction failed\n");
+            domain_crash(current->domain);
+            return 0;
+        }
+
+        for ( i = 0; i < inst_len; i++ )
+        {
+            switch ( inst[i] )
+            {
+            case 0xf3: /* REPZ */
+            case 0xf2: /* REPNZ */
+            case 0xf0: /* LOCK */
+            case 0x66: /* data32 */
+            case 0x67: /* addr32 */
+#ifdef __x86_64__
+            case 0x40 ... 0x4f: /* REX */
+#endif
+                continue;
+            case 0x2e: /* CS */
+                seg = seg_cs;
+                continue;
+            case 0x36: /* SS */
+                seg = seg_ss;
+                continue;
+            case 0x26: /* ES */
+                seg = seg_es;
+                continue;
+            case 0x64: /* FS */
+                seg = seg_fs;
+                continue;
+            case 0x65: /* GS */
+                seg = seg_gs;
+                continue;
+            case 0x3e: /* DS */
+                seg = seg_ds;
+                continue;
+            }
+        }
     }
 
-    memset(inst, 0, MAX_INST_LEN);
-    if ( inst_copy_from_guest(inst, eip, inst_len) != inst_len )
+    switch ( seg )
     {
-        gdprintk(XENLOG_ERR, "Get guest instruction failed\n");
-        domain_crash(current->domain);
+    case seg_cs:
+        ar_field = GUEST_CS_AR_BYTES;
+        base_field = GUEST_CS_BASE;
+        break;
+    case seg_ds:
+        ar_field = GUEST_DS_AR_BYTES;
+        base_field = GUEST_DS_BASE;
+        break;
+    case seg_es:
+        ar_field = GUEST_ES_AR_BYTES;
+        base_field = GUEST_ES_BASE;
+        break;
+    case seg_fs:
+        ar_field = GUEST_FS_AR_BYTES;
+        base_field = GUEST_FS_BASE;
+        break;
+    case seg_gs:
+        ar_field = GUEST_FS_AR_BYTES;
+        base_field = GUEST_FS_BASE;
+        break;
+    case seg_ss:
+        ar_field = GUEST_GS_AR_BYTES;
+        base_field = GUEST_GS_BASE;
+        break;
+    default:
+        BUG();
         return 0;
     }
 
-    for ( i = 0; i < inst_len; i++ )
-    {
-        switch ( inst[i] )
-        {
-        case 0xf3: /* REPZ */
-        case 0xf2: /* REPNZ */
-        case 0xf0: /* LOCK */
-        case 0x66: /* data32 */
-        case 0x67: /* addr32 */
-            continue;
-        case 0x2e: /* CS */
-            sel = __vmread(GUEST_CS_SELECTOR);
-            break;
-        case 0x36: /* SS */
-            sel = __vmread(GUEST_SS_SELECTOR);
-            break;
-        case 0x26: /* ES */
-            sel = __vmread(GUEST_ES_SELECTOR);
-            break;
-        case 0x64: /* FS */
-            sel = __vmread(GUEST_FS_SELECTOR);
-            break;
-        case 0x65: /* GS */
-            sel = __vmread(GUEST_GS_SELECTOR);
-            break;
-        case 0x3e: /* DS */
-            /* FALLTHROUGH */
-        default:
-            /* DS is the default */
-            sel = __vmread(GUEST_DS_SELECTOR);
-        }
-        return sel == 0 ? 1 : 0;
-    }
+    if ( !long_mode || seg == seg_fs || seg == seg_gs )
+        *base = __vmread(base_field);
+    ar_bytes = __vmread(ar_field);
 
-    return 0;
+    return !(ar_bytes & 0x10000);
 }
 
 static void vmx_io_instruction(unsigned long exit_qualification,
@@ -1013,7 +1058,7 @@ static void vmx_io_instruction(unsigned 
 {
     struct cpu_user_regs *regs;
     struct hvm_io_op *pio_opp;
-    unsigned long port, size;
+    unsigned int port, size;
     int dir, df, vm86;
 
     pio_opp = &current->arch.hvm_vcpu.io_op;
@@ -1044,21 +1089,32 @@ static void vmx_io_instruction(unsigned 
     dir = test_bit(3, &exit_qualification); /* direction */
 
     if ( test_bit(4, &exit_qualification) ) { /* string instruction */
-        unsigned long addr, count = 1;
+        unsigned long addr, count = 1, base;
+        u32 ar_bytes;
         int sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1;
+        int long_mode = 0;
 
+        ar_bytes = __vmread(GUEST_CS_AR_BYTES);
+#ifdef __x86_64__
+        if ( vmx_long_mode_enabled(current) && (ar_bytes & (1u<<13)) )
+            long_mode = 1;
+#endif
         addr = __vmread(GUEST_LINEAR_ADDRESS);
 
         /*
          * In protected mode, guest linear address is invalid if the
          * selector is null.
          */
-        if ( !vm86 && check_for_null_selector(regs->eip, inst_len, dir) )
-            addr = dir == IOREQ_WRITE ? regs->esi : regs->edi;
+        if ( !vmx_check_descriptor(long_mode, regs->eip, inst_len,
+                                   dir == IOREQ_WRITE ? seg_ds : seg_es,
+                                   &base) )
+            addr = dir == IOREQ_WRITE ? base + regs->esi : regs->edi;
 
         if ( test_bit(5, &exit_qualification) ) { /* "rep" prefix */
             pio_opp->flags |= REPZ;
-            count = vm86 ? regs->ecx & 0xFFFF : regs->ecx;
+            count = regs->ecx;
+            if ( !long_mode && (vm86 || !(ar_bytes & (1u<<14))) )
+                count &= 0xFFFF;
         }
 
         /*
Index: 2006-11-27/xen/include/asm-x86/hvm/hvm.h
===================================================================
--- 2006-11-27.orig/xen/include/asm-x86/hvm/hvm.h       2006-11-28 
09:22:06.000000000 +0100
+++ 2006-11-27/xen/include/asm-x86/hvm/hvm.h    2006-11-28 09:24:15.000000000 
+0100
@@ -62,12 +62,13 @@ struct hvm_function_table {
         struct vcpu *v, struct cpu_user_regs *r);
     /*
      * Examine specifics of the guest state:
-     * 1) determine whether the guest is in real or vm8086 mode,
-     * 2) determine whether paging is enabled,
-     * 3) return the current guest control-register value
-     * 4) return the current guest segment descriptor base
+     * 1) determine whether paging is enabled,
+     * 2) determine whether long mode is enabled,
+     * 3) determine whether PAE paging is enabled,
+     * 4) determine the mode the guest is running in,
+     * 5) return the current guest control-register value
+     * 6) return the current guest segment descriptor base
      */
-    int (*realmode)(struct vcpu *v);
     int (*paging_enabled)(struct vcpu *v);
     int (*long_mode_enabled)(struct vcpu *v);
     int (*pae_enabled)(struct vcpu *v);
@@ -128,12 +129,6 @@ hvm_load_cpu_guest_regs(struct vcpu *v, 
 }
 
 static inline int
-hvm_realmode(struct vcpu *v)
-{
-    return hvm_funcs.realmode(v);
-}
-
-static inline int
 hvm_paging_enabled(struct vcpu *v)
 {
     return hvm_funcs.paging_enabled(v);
@@ -157,7 +152,7 @@ hvm_guest_x86_mode(struct vcpu *v)
     return hvm_funcs.guest_x86_mode(v);
 }
 
-int hvm_instruction_length(struct cpu_user_regs *regs, int mode);
+int hvm_instruction_length(unsigned long pc, int mode);
 
 static inline void
 hvm_update_host_cr3(struct vcpu *v)


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel