[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v3] SVM: use generic instruction decoding



... instead of custom handling. To facilitate this break out init code
from _hvm_emulate_one() into the new hvm_emulate_init(), and make
hvmemul_insn_fetch( globally available.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Reviewed-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
v3: Add comment explaining debug/non-debug build differences. Use
    unsigned int for opc_tab[] index variable.
v2: Add comment to caller field. Rename REG_POISON to PTR_POISON. Align
    opc_tab[] initializer lines.

--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -835,7 +835,7 @@ static int hvmemul_read(
         container_of(ctxt, struct hvm_emulate_ctxt, ctxt));
 }
 
-static int hvmemul_insn_fetch(
+int hvmemul_insn_fetch(
     enum x86_segment seg,
     unsigned long offset,
     void *p_data,
@@ -1766,15 +1766,14 @@ static const struct x86_emulate_ops hvm_
     .vmfunc        = hvmemul_vmfunc,
 };
 
-static int _hvm_emulate_one(struct hvm_emulate_ctxt *hvmemul_ctxt,
-    const struct x86_emulate_ops *ops)
+void hvm_emulate_init(
+    struct hvm_emulate_ctxt *hvmemul_ctxt,
+    const unsigned char *insn_buf,
+    unsigned int insn_bytes)
 {
-    struct cpu_user_regs *regs = hvmemul_ctxt->ctxt.regs;
     struct vcpu *curr = current;
-    uint32_t new_intr_shadow, pfec = PFEC_page_present;
-    struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
+    unsigned int pfec = PFEC_page_present;
     unsigned long addr;
-    int rc;
 
     if ( hvm_long_mode_enabled(curr) &&
          hvmemul_ctxt->seg_reg[x86_seg_cs].attr.fields.l )
@@ -1792,14 +1791,14 @@ static int _hvm_emulate_one(struct hvm_e
     if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 )
         pfec |= PFEC_user_mode;
 
-    hvmemul_ctxt->insn_buf_eip = regs->eip;
-    if ( !vio->mmio_insn_bytes )
+    hvmemul_ctxt->insn_buf_eip = hvmemul_ctxt->ctxt.regs->eip;
+    if ( !insn_bytes )
     {
         hvmemul_ctxt->insn_buf_bytes =
             hvm_get_insn_bytes(curr, hvmemul_ctxt->insn_buf) ?:
             (hvm_virtual_to_linear_addr(x86_seg_cs,
                                         &hvmemul_ctxt->seg_reg[x86_seg_cs],
-                                        regs->eip,
+                                        hvmemul_ctxt->insn_buf_eip,
                                         sizeof(hvmemul_ctxt->insn_buf),
                                         hvm_access_insn_fetch,
                                         hvmemul_ctxt->ctxt.addr_size,
@@ -1811,11 +1810,24 @@ static int _hvm_emulate_one(struct hvm_e
     }
     else
     {
-        hvmemul_ctxt->insn_buf_bytes = vio->mmio_insn_bytes;
-        memcpy(hvmemul_ctxt->insn_buf, vio->mmio_insn, vio->mmio_insn_bytes);
+        hvmemul_ctxt->insn_buf_bytes = insn_bytes;
+        memcpy(hvmemul_ctxt->insn_buf, insn_buf, insn_bytes);
     }
 
     hvmemul_ctxt->exn_pending = 0;
+}
+
+static int _hvm_emulate_one(struct hvm_emulate_ctxt *hvmemul_ctxt,
+    const struct x86_emulate_ops *ops)
+{
+    const struct cpu_user_regs *regs = hvmemul_ctxt->ctxt.regs;
+    struct vcpu *curr = current;
+    uint32_t new_intr_shadow;
+    struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
+    int rc;
+
+    hvm_emulate_init(hvmemul_ctxt, vio->mmio_insn, vio->mmio_insn_bytes);
+
     vio->mmio_retry = 0;
 
     if ( cpu_has_vmx )
--- a/xen/arch/x86/hvm/svm/emulate.c
+++ b/xen/arch/x86/hvm/svm/emulate.c
@@ -15,7 +15,7 @@
  * this program; If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <xen/config.h>
+#include <xen/err.h>
 #include <xen/init.h>
 #include <xen/lib.h>
 #include <xen/trace.h>
@@ -26,41 +26,6 @@
 #include <asm/hvm/svm/vmcb.h>
 #include <asm/hvm/svm/emulate.h>
 
-static unsigned int is_prefix(u8 opc)
-{
-    switch ( opc )
-    {
-    case 0x66:
-    case 0x67:
-    case 0x2E:
-    case 0x3E:
-    case 0x26:
-    case 0x64:
-    case 0x65:
-    case 0x36:
-    case 0xF0:
-    case 0xF3:
-    case 0xF2:
-    case 0x40 ... 0x4f:
-        return 1;
-    }
-    return 0;
-}
-
-static unsigned long svm_rip2pointer(struct vcpu *v, unsigned long *limit)
-{
-    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
-    unsigned long p = vmcb->cs.base + vmcb->rip;
-
-    if ( !(vmcb->cs.attr.fields.l && hvm_long_mode_enabled(v)) )
-    {
-        *limit = vmcb->cs.limit;
-        return (u32)p; /* mask to 32 bits */
-    }
-    *limit = ~0UL;
-    return p;
-}
-
 static unsigned long svm_nextrip_insn_length(struct vcpu *v)
 {
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
@@ -89,141 +54,104 @@ static unsigned long svm_nextrip_insn_le
     return vmcb->nextrip - vmcb->rip;
 }
 
-/* First byte: Length. Following bytes: Opcode bytes. */
-#define MAKE_INSTR(nm, ...) static const u8 OPCODE_##nm[] = { __VA_ARGS__ }
-MAKE_INSTR(INVD,   2, 0x0f, 0x08);
-MAKE_INSTR(WBINVD, 2, 0x0f, 0x09);
-MAKE_INSTR(CPUID,  2, 0x0f, 0xa2);
-MAKE_INSTR(RDMSR,  2, 0x0f, 0x32);
-MAKE_INSTR(WRMSR,  2, 0x0f, 0x30);
-MAKE_INSTR(VMCALL, 3, 0x0f, 0x01, 0xd9);
-MAKE_INSTR(HLT,    1, 0xf4);
-MAKE_INSTR(INT3,   1, 0xcc);
-MAKE_INSTR(RDTSC,  2, 0x0f, 0x31);
-MAKE_INSTR(PAUSE,  1, 0x90);
-MAKE_INSTR(XSETBV, 3, 0x0f, 0x01, 0xd1);
-MAKE_INSTR(VMRUN,  3, 0x0f, 0x01, 0xd8);
-MAKE_INSTR(VMLOAD, 3, 0x0f, 0x01, 0xda);
-MAKE_INSTR(VMSAVE, 3, 0x0f, 0x01, 0xdb);
-MAKE_INSTR(STGI,   3, 0x0f, 0x01, 0xdc);
-MAKE_INSTR(CLGI,   3, 0x0f, 0x01, 0xdd);
-MAKE_INSTR(INVLPGA,3, 0x0f, 0x01, 0xdf);
-
-static const u8 *const opc_bytes[INSTR_MAX_COUNT] =
-{
-    [INSTR_INVD]   = OPCODE_INVD,
-    [INSTR_WBINVD] = OPCODE_WBINVD,
-    [INSTR_CPUID]  = OPCODE_CPUID,
-    [INSTR_RDMSR]  = OPCODE_RDMSR,
-    [INSTR_WRMSR]  = OPCODE_WRMSR,
-    [INSTR_VMCALL] = OPCODE_VMCALL,
-    [INSTR_HLT]    = OPCODE_HLT,
-    [INSTR_INT3]   = OPCODE_INT3,
-    [INSTR_RDTSC]  = OPCODE_RDTSC,
-    [INSTR_PAUSE]  = OPCODE_PAUSE,
-    [INSTR_XSETBV] = OPCODE_XSETBV,
-    [INSTR_VMRUN]  = OPCODE_VMRUN,
-    [INSTR_VMLOAD] = OPCODE_VMLOAD,
-    [INSTR_VMSAVE] = OPCODE_VMSAVE,
-    [INSTR_STGI]   = OPCODE_STGI,
-    [INSTR_CLGI]   = OPCODE_CLGI,
-    [INSTR_INVLPGA] = OPCODE_INVLPGA,
+static const struct {
+    unsigned int opcode;
+    struct {
+        unsigned int rm:3;
+        unsigned int reg:3;
+        unsigned int mod:2;
+#define MODRM(mod, reg, rm) { rm, reg, mod }
+    } modrm;
+} const opc_tab[INSTR_MAX_COUNT] = {
+    [INSTR_PAUSE]   = { X86EMUL_OPC_F3(0, 0x90) },
+    [INSTR_INT3]    = { X86EMUL_OPC(   0, 0xcc) },
+    [INSTR_HLT]     = { X86EMUL_OPC(   0, 0xf4) },
+    [INSTR_XSETBV]  = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 2, 1) },
+    [INSTR_VMRUN]   = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 0) },
+    [INSTR_VMCALL]  = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 1) },
+    [INSTR_VMLOAD]  = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 2) },
+    [INSTR_VMSAVE]  = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 3) },
+    [INSTR_STGI]    = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 4) },
+    [INSTR_CLGI]    = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 5) },
+    [INSTR_INVLPGA] = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 7) },
+    [INSTR_INVD]    = { X86EMUL_OPC(0x0f, 0x08) },
+    [INSTR_WBINVD]  = { X86EMUL_OPC(0x0f, 0x09) },
+    [INSTR_WRMSR]   = { X86EMUL_OPC(0x0f, 0x30) },
+    [INSTR_RDTSC]   = { X86EMUL_OPC(0x0f, 0x31) },
+    [INSTR_RDMSR]   = { X86EMUL_OPC(0x0f, 0x32) },
+    [INSTR_CPUID]   = { X86EMUL_OPC(0x0f, 0xa2) },
 };
 
-static bool_t fetch(const struct vmcb_struct *vmcb, u8 *buf,
-                    unsigned long addr, unsigned int len)
-{
-    uint32_t pfec = (vmcb_get_cpl(vmcb) == 3) ? PFEC_user_mode : 0;
-
-    switch ( hvm_fetch_from_guest_virt(buf, addr, len, pfec) )
-    {
-    case HVMCOPY_okay:
-        break;
-    case HVMCOPY_bad_gva_to_gfn:
-        /* OK just to give up; we'll have injected #PF already */
-        return 0;
-    default:
-        /* Not OK: fetches from non-RAM pages are not supportable. */
-        gdprintk(XENLOG_WARNING, "Bad instruction fetch at %#lx (%#lx)\n",
-                 vmcb->rip, addr);
-        hvm_inject_hw_exception(TRAP_gp_fault, 0);
-        return 0;
-    }
-    return 1;
-}
-
 int __get_instruction_length_from_list(struct vcpu *v,
         const enum instruction_index *list, unsigned int list_count)
 {
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
-    unsigned int i, j, inst_len = 0;
-    enum instruction_index instr = 0;
-    u8 buf[MAX_INST_LEN];
-    const u8 *opcode = NULL;
-    unsigned long fetch_addr, fetch_limit;
-    unsigned int fetch_len, max_len;
-
+    struct hvm_emulate_ctxt ctxt;
+    struct x86_emulate_state *state;
+    unsigned int inst_len, j, modrm_rm, modrm_reg;
+    int modrm_mod;
+
+    /*
+     * In debug builds, always use x86_decode_insn() and compare with
+     * hardware.
+     */
+#ifdef NDEBUG
     if ( (inst_len = svm_nextrip_insn_length(v)) != 0 )
         return inst_len;
 
     if ( vmcb->exitcode == VMEXIT_IOIO )
         return vmcb->exitinfo2 - vmcb->rip;
+#endif
 
-    /* Fetch up to the next page break; we'll fetch from the next page
-     * later if we have to. */
-    fetch_addr = svm_rip2pointer(v, &fetch_limit);
-    if ( vmcb->rip > fetch_limit )
-        return 0;
-    max_len = min(fetch_limit - vmcb->rip + 1, MAX_INST_LEN + 0UL);
-    fetch_len = min_t(unsigned int, max_len,
-                      PAGE_SIZE - (fetch_addr & ~PAGE_MASK));
-    if ( !fetch(vmcb, buf, fetch_addr, fetch_len) )
+    ASSERT(v == current);
+    hvm_emulate_prepare(&ctxt, guest_cpu_user_regs());
+    hvm_emulate_init(&ctxt, NULL, 0);
+    state = x86_decode_insn(&ctxt.ctxt, hvmemul_insn_fetch);
+    if ( IS_ERR_OR_NULL(state) )
         return 0;
 
-    while ( (inst_len < max_len) && is_prefix(buf[inst_len]) )
-    {
-        inst_len++;
-        if ( inst_len >= fetch_len )
-        {
-            if ( !fetch(vmcb, buf + fetch_len, fetch_addr + fetch_len,
-                        max_len - fetch_len) )
-                return 0;
-            fetch_len = max_len;
-        }
+    inst_len = x86_insn_length(state, &ctxt.ctxt);
+    modrm_mod = x86_insn_modrm(state, &modrm_rm, &modrm_reg);
+    x86_emulate_free_state(state);
+#ifndef NDEBUG
+    if ( vmcb->exitcode == VMEXIT_IOIO )
+        j = vmcb->exitinfo2 - vmcb->rip;
+    else
+        j = svm_nextrip_insn_length(v);
+    if ( j && j != inst_len )
+    {
+        gprintk(XENLOG_WARNING, "insn-len[%02x]=%u (exp %u)\n",
+                ctxt.ctxt.opcode, inst_len, j);
+        return j;
     }
+#endif
 
     for ( j = 0; j < list_count; j++ )
     {
-        instr = list[j];
-        opcode = opc_bytes[instr];
+        unsigned int instr = list[j];
 
-        for ( i = 0; (i < opcode[0]) && ((inst_len + i) < max_len); i++ )
+        if ( instr >= ARRAY_SIZE(opc_tab) )
+        {
+            ASSERT_UNREACHABLE();
+            break;
+        }
+        if ( opc_tab[instr].opcode == ctxt.ctxt.opcode )
         {
-            if ( (inst_len + i) >= fetch_len ) 
-            {
-                if ( !fetch(vmcb, buf + fetch_len, fetch_addr + fetch_len,
-                            max_len - fetch_len) )
-                    return 0;
-                fetch_len = max_len;
-            }
+            if ( !opc_tab[instr].modrm.mod )
+                return inst_len;
 
-            if ( buf[inst_len+i] != opcode[i+1] )
-                goto mismatch;
+            if ( modrm_mod == opc_tab[instr].modrm.mod &&
+                 (modrm_rm & 7) == opc_tab[instr].modrm.rm &&
+                 (modrm_reg & 7) == opc_tab[instr].modrm.reg )
+                return inst_len;
         }
-        goto done;
-    mismatch: ;
     }
 
     gdprintk(XENLOG_WARNING,
-             "%s: Mismatch between expected and actual instruction bytes: "
+             "%s: Mismatch between expected and actual instruction: "
              "eip = %lx\n",  __func__, (unsigned long)vmcb->rip);
     hvm_inject_hw_exception(TRAP_gp_fault, 0);
     return 0;
-
- done:
-    inst_len += opcode[0];
-    ASSERT(inst_len <= max_len);
-    return inst_len;
 }
 
 /*
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -382,9 +382,9 @@ struct operand {
     } mem;
 };
 #ifdef __x86_64__
-#define REG_POISON ((unsigned long *) 0x8086000000008086UL) /* non-canonical */
+#define PTR_POISON ((void *)0x8086000000008086UL) /* non-canonical */
 #else
-#define REG_POISON NULL /* 32-bit builds are for user-space, so NULL is OK. */
+#define PTR_POISON NULL /* 32-bit builds are for user-space, so NULL is OK. */
 #endif
 
 typedef union {
@@ -1646,6 +1646,14 @@ struct x86_emulate_state {
 
     unsigned long eip;
     struct cpu_user_regs *regs;
+
+#ifndef NDEBUG
+    /*
+     * Track caller of x86_decode_insn() to spot missing as well as
+     * premature calls to x86_emulate_free_state().
+     */
+    void *caller;
+#endif
 };
 
 /* Helper definitions. */
@@ -1673,6 +1681,11 @@ x86_decode_onebyte(
 
     switch ( ctxt->opcode )
     {
+    case 0x90: /* nop / pause */
+        if ( repe_prefix() )
+            ctxt->opcode |= X86EMUL_OPC_F3(0, 0);
+        break;
+
     case 0x9a: /* call (far, absolute) */
     case 0xea: /* jmp (far, absolute) */
         generate_exception_if(mode_64bit(), EXC_UD, -1);
@@ -1780,7 +1793,7 @@ x86_decode(
     override_seg = -1;
     ea.type = OP_MEM;
     ea.mem.seg = x86_seg_ds;
-    ea.reg = REG_POISON;
+    ea.reg = PTR_POISON;
     state->regs = ctxt->regs;
     state->eip = ctxt->regs->eip;
 
@@ -2267,8 +2280,8 @@ x86_emulate(
     struct x86_emulate_state state;
     int rc;
     uint8_t b, d;
-    struct operand src = { .reg = REG_POISON };
-    struct operand dst = { .reg = REG_POISON };
+    struct operand src = { .reg = PTR_POISON };
+    struct operand dst = { .reg = PTR_POISON };
     enum x86_swint_type swint_type;
     struct x86_emulate_stub stub = {};
     DECLARE_ALIGNED(mmval_t, mmval);
@@ -2861,8 +2874,9 @@ x86_emulate(
         break;
 
     case 0x90: /* nop / xchg %%r8,%%rax */
+    case X86EMUL_OPC_F3(0, 0x90): /* pause / xchg %%r8,%%rax */
         if ( !(rex_prefix & 1) )
-            break; /* nop */
+            break; /* nop / pause */
         /* fall through */
 
     case 0x91 ... 0x97: /* xchg reg,%%rax */
@@ -5199,3 +5213,89 @@ x86_emulate(
 #undef vex
 #undef override_seg
 #undef ea
+
+#ifdef __XEN__
+
+#include <xen/err.h>
+
+struct x86_emulate_state *
+x86_decode_insn(
+    struct x86_emulate_ctxt *ctxt,
+    int (*insn_fetch)(
+        enum x86_segment seg, unsigned long offset,
+        void *p_data, unsigned int bytes,
+        struct x86_emulate_ctxt *ctxt))
+{
+    static DEFINE_PER_CPU(struct x86_emulate_state, state);
+    struct x86_emulate_state *state = &this_cpu(state);
+    const struct x86_emulate_ops ops = {
+        .insn_fetch = insn_fetch,
+        .read       = x86emul_unhandleable_rw,
+        .write      = PTR_POISON,
+        .cmpxchg    = PTR_POISON,
+    };
+    int rc = x86_decode(state, ctxt, &ops);
+
+    if ( unlikely(rc != X86EMUL_OKAY) )
+        return ERR_PTR(-rc);
+
+#ifndef NDEBUG
+    /*
+     * While we avoid memory allocation (by use of per-CPU data) above,
+     * nevertheless make sure callers properly release the state structure
+     * for forward compatibility.
+     */
+    if ( state->caller )
+    {
+        printk(XENLOG_ERR "Unreleased emulation state acquired by %ps\n",
+               state->caller);
+        dump_execution_state();
+    }
+    state->caller = __builtin_return_address(0);
+#endif
+
+    return state;
+}
+
+static inline void check_state(const struct x86_emulate_state *state)
+{
+#ifndef NDEBUG
+    ASSERT(state->caller);
+#endif
+}
+
+#ifndef NDEBUG
+void x86_emulate_free_state(struct x86_emulate_state *state)
+{
+    check_state(state);
+    state->caller = NULL;
+}
+#endif
+
+int
+x86_insn_modrm(const struct x86_emulate_state *state,
+               unsigned int *rm, unsigned int *reg)
+{
+    check_state(state);
+
+    if ( !(state->desc & ModRM) )
+        return -EINVAL;
+
+    if ( rm )
+        *rm = state->modrm_rm;
+    if ( reg )
+        *reg = state->modrm_reg;
+
+    return state->modrm_mod;
+}
+
+unsigned int
+x86_insn_length(const struct x86_emulate_state *state,
+                const struct x86_emulate_ctxt *ctxt)
+{
+    check_state(state);
+
+    return state->eip - ctxt->regs->eip;
+}
+
+#endif
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -532,4 +532,29 @@ x86emul_unhandleable_rw(
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt);
 
+#ifdef __XEN__
+
+struct x86_emulate_state *
+x86_decode_insn(
+    struct x86_emulate_ctxt *ctxt,
+    int (*insn_fetch)(
+        enum x86_segment seg, unsigned long offset,
+        void *p_data, unsigned int bytes,
+        struct x86_emulate_ctxt *ctxt));
+
+int
+x86_insn_modrm(const struct x86_emulate_state *state,
+               unsigned int *rm, unsigned int *reg);
+unsigned int
+x86_insn_length(const struct x86_emulate_state *state,
+                const struct x86_emulate_ctxt *ctxt);
+
+#ifdef NDEBUG
+static inline void x86_emulate_free_state(struct x86_emulate_state *state) {}
+#else
+void x86_emulate_free_state(struct x86_emulate_state *state);
+#endif
+
+#endif
+
 #endif /* __X86_EMULATE_H__ */
--- a/xen/include/asm-x86/hvm/emulate.h
+++ b/xen/include/asm-x86/hvm/emulate.h
@@ -54,6 +54,10 @@ void hvm_mem_access_emulate_one(enum emu
 void hvm_emulate_prepare(
     struct hvm_emulate_ctxt *hvmemul_ctxt,
     struct cpu_user_regs *regs);
+void hvm_emulate_init(
+    struct hvm_emulate_ctxt *hvmemul_ctxt,
+    const unsigned char *insn_buf,
+    unsigned int insn_bytes);
 void hvm_emulate_writeback(
     struct hvm_emulate_ctxt *hvmemul_ctxt);
 struct segment_register *hvmemul_get_seg_reg(
@@ -61,6 +65,11 @@ struct segment_register *hvmemul_get_seg
     struct hvm_emulate_ctxt *hvmemul_ctxt);
 int hvm_emulate_one_mmio(unsigned long mfn, unsigned long gla);
 
+int hvmemul_insn_fetch(enum x86_segment seg,
+                       unsigned long offset,
+                       void *p_data,
+                       unsigned int bytes,
+                       struct x86_emulate_ctxt *ctxt);
 int hvmemul_do_pio_buffer(uint16_t port,
                           unsigned int size,
                           uint8_t dir,


Attachment: SVM-use-generic-decode.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.