WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

Re: [Xen-devel] [PATCH 2/2] x86/emulator: generalize movq emulation (SSE

To: Jan Beulich <JBeulich@xxxxxxxx>, "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: Re: [Xen-devel] [PATCH 2/2] x86/emulator: generalize movq emulation (SSE2 and AVX variants)
From: Keir Fraser <keir@xxxxxxx>
Date: Wed, 16 Nov 2011 16:25:59 +0000
Cc:
Delivery-date: Wed, 16 Nov 2011 08:27:43 -0800
Dkim-signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=gamma; h=sender:user-agent:date:subject:from:to:message-id:thread-topic :thread-index:in-reply-to:mime-version:content-type :content-transfer-encoding; bh=V2QdqzepkW1th0bjr7BMkLh5flCNP2OScE+XllJzLEg=; b=jtdA19CaEabckbd6vWaTrL/vk+WFv5TlK5zoVMiGxRihShq05RcgkJ6H9PYSWicvfh Bt4o3iPDOX7FCi3fJfN6TnqUMgLnxMaLmyToV4i+5WXkHl5Y7ukEpIkgz2B9Wbcj/SjB nUbQkQ4IXFCUkMlNQ0rhQQV9gnqgOGjgev9GY=
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <4EC3D00602000078000614FC@xxxxxxxxxxxxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
Thread-index: AcykfGx+PpuAFAnEI0mY+7BYvqEBYA==
Thread-topic: [Xen-devel] [PATCH 2/2] x86/emulator: generalize movq emulation (SSE2 and AVX variants)
User-agent: Microsoft-Entourage/12.31.0.110725
On 16/11/2011 14:00, "Jan Beulich" <JBeulich@xxxxxxxx> wrote:

> Extend the existing movq emulation to also support its SSE2 and AVX
> variants, the latter implying the addition of VEX decoding. Fold the
> read and write cases (as most of the logic is identical), and add
> movntq and variants (as they're very similar).
> 
> Extend the testing code to also exercise these instructions.

I checked in your other patches, although I split them up and revised them
in some cases.

This one is broadly okay too, but:

 1. Don't import vm86_mode(). x86_emulate already does eflags&EFLG_VM in
some places. And that's fairly self documenting so just carry on with that.

 2. Don't import DEFINE_PER_CPU/this_cpu. I understand it works around a
critical issue but it's *so* nasty. I would rather define a nasty private
macro for declaring aligned space on the stack, like, for example:
   char __mmval[64], *mmval = (__mmval + 31) & ~32;
(suitably cleaned up, macroised, and made compilable of course ;-)

 3. There's a XXX'ed chunk of code in the middle of the patch. No
explanation. Remove it, or comment it, or something.

Note that I changed the vcpu_must_have stuff when I check it in, so those
bits will need fixup in this patch too. In particular, I don't bother
importing cpufeature.h -- the leaf/reg are already open coded with no macro
abstraction, so I see no harm in open-coding the bit number either. They
won't change and the vcpu_must_have_xxx macro name is sufficient
documentation in itself.

 -- Keir

> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
> 
> --- a/tools/tests/x86_emulator/test_x86_emulator.c
> +++ b/tools/tests/x86_emulator/test_x86_emulator.c
> @@ -1,3 +1,5 @@
> +#include <errno.h>
> +#include <stdbool.h>
>  #include <stdio.h>
>  #include <stdlib.h>
>  #include <string.h>
> @@ -53,11 +55,84 @@ static int cmpxchg(
>      return X86EMUL_OKAY;
>  }
>  
> +static int cpuid(
> +    unsigned int *eax,
> +    unsigned int *ebx,
> +    unsigned int *ecx,
> +    unsigned int *edx,
> +    struct x86_emulate_ctxt *ctxt)
> +{
> +    asm ("cpuid" : "+a" (*eax), "+c" (*ecx), "=d" (*edx), "=b" (*ebx));
> +    return X86EMUL_OKAY;
> +}
> +
> +#define cpu_has_mmx ({ \
> +    unsigned int eax = 1, ecx = 0, edx; \
> +    cpuid(&eax, &ecx, &ecx, &edx, NULL); \
> +    (edx & (1U << 23)) != 0; \
> +})
> +
> +#define cpu_has_sse ({ \
> +    unsigned int eax = 1, ecx = 0, edx; \
> +    cpuid(&eax, &ecx, &ecx, &edx, NULL); \
> +    (edx & (1U << 25)) != 0; \
> +})
> +
> +#define cpu_has_sse2 ({ \
> +    unsigned int eax = 1, ecx = 0, edx; \
> +    cpuid(&eax, &ecx, &ecx, &edx, NULL); \
> +    (edx & (1U << 26)) != 0; \
> +})
> +
> +static inline uint64_t xgetbv(uint32_t xcr)
> +{
> +    uint64_t res;
> +
> +    asm ( ".byte 0x0f, 0x01, 0xd0" : "=A" (res) : "c" (xcr) );
> +
> +    return res;
> +}
> +
> +#define cpu_has_avx ({ \
> +    unsigned int eax = 1, ecx = 0, edx; \
> +    cpuid(&eax, &edx, &ecx, &edx, NULL); \
> +    if ( !(ecx & (1U << 27)) || (xgetbv(0) & 6) != 6 ) \
> +        ecx = 0; \
> +    (ecx & (1U << 28)) != 0; \
> +})
> +
> +int get_fpu(
> +    void (*exception_callback)(void *, struct cpu_user_regs *),
> +    void *exception_callback_arg,
> +    enum x86_emulate_fpu_type type,
> +    struct x86_emulate_ctxt *ctxt)
> +{
> +    switch ( type )
> +    {
> +    case X86EMUL_FPU_fpu:
> +        break;
> +    case X86EMUL_FPU_ymm:
> +        if ( cpu_has_avx )
> +            break;
> +    case X86EMUL_FPU_xmm:
> +        if ( cpu_has_sse )
> +            break;
> +    case X86EMUL_FPU_mmx:
> +        if ( cpu_has_mmx )
> +            break;
> +    default:
> +        return X86EMUL_UNHANDLEABLE;
> +    }
> +    return X86EMUL_OKAY;
> +}
> +
>  static struct x86_emulate_ops emulops = {
>      .read       = read,
>      .insn_fetch = read,
>      .write      = write,
>      .cmpxchg    = cmpxchg,
> +    .cpuid      = cpuid,
> +    .get_fpu    = get_fpu,
>  };
>  
>  int main(int argc, char **argv)
> @@ -66,6 +141,8 @@ int main(int argc, char **argv)
>      struct cpu_user_regs regs;
>      char *instr;
>      unsigned int *res, i, j;
> +    unsigned long sp;
> +    bool stack_exec;
>      int rc;
>  #ifndef __x86_64__
>      unsigned int bcdres_native, bcdres_emul;
> @@ -85,6 +162,16 @@ int main(int argc, char **argv)
>      }
>      instr = (char *)res + 0x100;
>  
> +#ifdef __x86_64__
> +    asm ("movq %%rsp, %0" : "=g" (sp));
> +#else
> +    asm ("movl %%esp, %0" : "=g" (sp));
> +#endif
> +    stack_exec = mprotect((void *)(sp & -0x1000L) - (MMAP_SZ - 0x1000),
> +                          MMAP_SZ, PROT_READ|PROT_WRITE|PROT_EXEC) == 0;
> +    if ( !stack_exec )
> +        printf("Warning: Stack could not be made executable (%d).\n", errno);
> +
>      printf("%-40s", "Testing addl %%ecx,(%%eax)...");
>      instr[0] = 0x01; instr[1] = 0x08;
>      regs.eflags = 0x200;
> @@ -442,6 +529,108 @@ int main(int argc, char **argv)
>      printf("skipped\n");
>  #endif
>  
> +    printf("%-40s", "Testing movq %mm3,(%ecx)...");
> +    if ( stack_exec && cpu_has_mmx )
> +    {
> +        extern const unsigned char movq_to_mem[];
> +
> +        asm volatile ( "pcmpeqb %%mm3, %%mm3\n"
> +                       ".pushsection .test, \"a\", @progbits\n"
> +                       "movq_to_mem: movq %%mm3, (%0)\n"
> +                       ".popsection" :: "c" (NULL) );
> +
> +        memcpy(instr, movq_to_mem, 15);
> +        memset(res, 0x33, 64);
> +        memset(res + 8, 0xff, 8);
> +        regs.eip    = (unsigned long)&instr[0];
> +        regs.ecx    = (unsigned long)res;
> +        rc = x86_emulate(&ctxt, &emulops);
> +        if ( (rc != X86EMUL_OKAY) ||
> +             memcmp(res, res + 8, 32) )
> +            goto fail;
> +        printf("okay\n");
> +    }
> +    else
> +        printf("skipped\n");
> +
> +    printf("%-40s", "Testing movq (%edx),%mm5...");
> +    if ( stack_exec && cpu_has_mmx )
> +    {
> +        extern const unsigned char movq_from_mem[];
> +
> +        asm volatile ( "pcmpgtb %%mm5, %%mm5\n"
> +                       ".pushsection .test, \"a\", @progbits\n"
> +                       "movq_from_mem: movq (%0), %%mm5\n"
> +                       ".popsection" :: "d" (NULL) );
> +
> +        memcpy(instr, movq_from_mem, 15);
> +        regs.eip    = (unsigned long)&instr[0];
> +        regs.ecx    = 0;
> +        regs.edx    = (unsigned long)res;
> +        rc = x86_emulate(&ctxt, &emulops);
> +        if ( rc != X86EMUL_OKAY )
> +            goto fail;
> +        asm ( "pcmpeqb %%mm3, %%mm3\n\t"
> +              "pcmpeqb %%mm5, %%mm3\n\t"
> +              "pmovmskb %%mm3, %0" : "=r" (rc) );
> +        if ( rc != 0xff )
> +            goto fail;
> +        printf("okay\n");
> +    }
> +    else
> +        printf("skipped\n");
> +
> +    printf("%-40s", "Testing movdqu %xmm2,(%ecx)...");
> +    if ( stack_exec && cpu_has_sse2 )
> +    {
> +        extern const unsigned char movdqu_to_mem[];
> +
> +        asm volatile ( "pcmpeqb %%xmm2, %%xmm2\n"
> +                       ".pushsection .test, \"a\", @progbits\n"
> +                       "movdqu_to_mem: movdqu %%xmm2, (%0)\n"
> +                       ".popsection" :: "c" (NULL) );
> +
> +        memcpy(instr, movdqu_to_mem, 15);
> +        memset(res, 0x55, 64);
> +        memset(res + 8, 0xff, 16);
> +        regs.eip    = (unsigned long)&instr[0];
> +        regs.ecx    = (unsigned long)res;
> +        rc = x86_emulate(&ctxt, &emulops);
> +        if ( (rc != X86EMUL_OKAY) ||
> +             memcmp(res, res + 8, 32) )
> +            goto fail;
> +        printf("okay\n");
> +    }
> +    else
> +        printf("skipped\n");
> +
> +    printf("%-40s", "Testing movdqu (%edx),%xmm4...");
> +    if ( stack_exec && cpu_has_sse2 )
> +    {
> +        extern const unsigned char movdqu_from_mem[];
> +
> +        asm volatile ( "pcmpgtb %%xmm4, %%xmm4\n"
> +                       ".pushsection .test, \"a\", @progbits\n"
> +                       "movdqu_from_mem: movdqu (%0), %%xmm4\n"
> +                       ".popsection" :: "d" (NULL) );
> +
> +        memcpy(instr, movdqu_from_mem, 15);
> +        regs.eip    = (unsigned long)&instr[0];
> +        regs.ecx    = 0;
> +        regs.edx    = (unsigned long)res;
> +        rc = x86_emulate(&ctxt, &emulops);
> +        if ( rc != X86EMUL_OKAY )
> +            goto fail;
> +        asm ( "pcmpeqb %%xmm2, %%xmm2\n\t"
> +              "pcmpeqb %%xmm4, %%xmm2\n\t"
> +              "pmovmskb %%xmm2, %0" : "=r" (rc) );
> +        if ( rc != 0xffff )
> +            goto fail;
> +        printf("okay\n");
> +    }
> +    else
> +        printf("skipped\n");
> +
>      for ( j = 1; j <= 2; j++ )
>      {
>  #if defined(__i386__)
> --- a/tools/tests/x86_emulator/x86_emulate.c
> +++ b/tools/tests/x86_emulator/x86_emulate.c
> @@ -9,5 +9,10 @@ typedef bool bool_t;
>  
>  #define BUG() abort()
>  
> +#define DEFINE_PER_CPU(type, var) type this_cpu_##var
> +#define this_cpu(var) this_cpu_##var
> +
> +#define vm86_mode(regs) 0
> +
>  #include "x86_emulate/x86_emulate.h"
>  #include "x86_emulate/x86_emulate.c"
> --- a/xen/arch/x86/hvm/emulate.c
> +++ b/xen/arch/x86/hvm/emulate.c
> @@ -16,6 +16,7 @@
>  #include <xen/paging.h>
>  #include <xen/trace.h>
>  #include <asm/event.h>
> +#include <asm/xstate.h>
>  #include <asm/hvm/emulate.h>
>  #include <asm/hvm/hvm.h>
>  #include <asm/hvm/trace.h>
> @@ -905,6 +906,20 @@ static int hvmemul_get_fpu(
>          if ( !cpu_has_mmx )
>              return X86EMUL_UNHANDLEABLE;
>          break;
> +    case X86EMUL_FPU_xmm:
> +        if ( !cpu_has_xmm ||
> +             (curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_EM) ||
> +             !(curr->arch.hvm_vcpu.guest_cr[4] & X86_CR4_OSFXSR) )
> +            return X86EMUL_UNHANDLEABLE;
> +        break;
> +    case X86EMUL_FPU_ymm:
> +        if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) ||
> +             vm86_mode(ctxt->regs) ||
> +             !(curr->arch.hvm_vcpu.guest_cr[4] & X86_CR4_OSXSAVE) ||
> +             !(curr->arch.xcr0 & XSTATE_SSE) ||
> +             !(curr->arch.xcr0 & XSTATE_YMM) )
> +            return X86EMUL_UNHANDLEABLE;
> +        break;
>      default:
>          return X86EMUL_UNHANDLEABLE;
>      }
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -253,6 +253,47 @@ static uint8_t twobyte_table[256] = {
>      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
>  };
>  
> +#define REX_PREFIX 0x40
> +#define REX_B 0x01
> +#define REX_X 0x02
> +#define REX_R 0x04
> +#define REX_W 0x08
> +
> +#define vex_none 0
> +
> +enum vex_opcx {
> +    vex_0f = vex_none + 1,
> +    vex_0f38,
> +    vex_0f3a,
> +};
> +
> +enum vex_pfx {
> +    vex_66 = vex_none + 1,
> +    vex_f3,
> +    vex_f2
> +};
> +
> +union vex {
> +    uint8_t raw[2];
> +    struct {
> +        uint8_t opcx:5;
> +        uint8_t b:1;
> +        uint8_t x:1;
> +        uint8_t r:1;
> +        uint8_t pfx:2;
> +        uint8_t l:1;
> +        uint8_t reg:4;
> +        uint8_t w:1;
> +    };
> +};
> +
> +#define copy_REX_VEX(ptr, rex, vex) do { \
> +    if ( (vex).opcx != vex_none ) \
> +        ptr[0] = 0xc4, ptr[1] = (vex).raw[0], ptr[2] = (vex).raw[1]; \
> +    else if ( mode_64bit() ) \
> +        ptr[1] = rex | REX_PREFIX; \
> +} while (0)
> +
>  /* Type, address-of, and value of an instruction's operand. */
>  struct operand {
>      enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type;
> @@ -281,6 +322,18 @@ struct operand {
>      };
>  };
>  
> +typedef union {
> +    uint64_t mmx;
> +    uint64_t __attribute__ ((aligned(16))) xmm[2];
> +    uint64_t __attribute__ ((aligned(32))) ymm[4];
> +} mmval_t;
> +
> +/*
> + * While alignment gets specified above, this doesn't get honored by the
> + * compiler for automatic variables. Thus use a per-CPU variable instead.
> + */
> +static DEFINE_PER_CPU(mmval_t, mmval);
> +
>  /* MSRs. */
>  #define MSR_TSC          0x00000010
>  #define MSR_SYSENTER_CS  0x00000174
> @@ -972,9 +1025,12 @@ static bool_t vcpu_has(
>      generate_exception_if(!vcpu_has(leaf, subleaf, reg, \
>                                      X86_FEATURE_##feature % 32, \
>                                      ctxt, ops), EXC_UD, -1)
> +#define vcpu_must_have_mmx()  vcpu_must_have(1, 0, EDX, MMX)
> +#define vcpu_must_have_sse()  vcpu_must_have(1, 0, EDX, XMM)
>  #define vcpu_must_have_sse2() vcpu_must_have(1, 0, EDX, XMM2)
>  #define vcpu_must_have_sse3() vcpu_must_have(1, 0, ECX, XMM3)
>  #define vcpu_must_have_cx16() vcpu_must_have(1, 0, ECX, CX16)
> +#define vcpu_must_have_avx()  vcpu_must_have(1, 0, ECX, AVX)
>  
>  static int
>  in_realmode(
> @@ -1255,6 +1311,7 @@ x86_emulate(
>  
>      uint8_t b, d, sib, sib_index, sib_base, twobyte = 0, rex_prefix = 0;
>      uint8_t modrm = 0, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
> +    union vex vex = {};
>      unsigned int op_bytes, def_op_bytes, ad_bytes, def_ad_bytes;
>  #define REPE_PREFIX  1
>  #define REPNE_PREFIX 2
> @@ -1287,6 +1344,7 @@ x86_emulate(
>          {
>          case 0x66: /* operand-size override */
>              op_bytes = def_op_bytes ^ 6;
> +            vex.pfx = vex_66;
>              break;
>          case 0x67: /* address-size override */
>              ad_bytes = def_ad_bytes ^ (mode_64bit() ? 12 : 6);
> @@ -1314,9 +1372,11 @@ x86_emulate(
>              break;
>          case 0xf2: /* REPNE/REPNZ */
>              rep_prefix = REPNE_PREFIX;
> +            vex.pfx = vex_f2;
>              break;
>          case 0xf3: /* REP/REPE/REPZ */
>              rep_prefix = REPE_PREFIX;
> +            vex.pfx = vex_f3;
>              break;
>          case 0x40 ... 0x4f: /* REX */
>              if ( !mode_64bit() )
> @@ -1360,6 +1420,70 @@ x86_emulate(
>      {
>          modrm = insn_fetch_type(uint8_t);
>          modrm_mod = (modrm & 0xc0) >> 6;
> +
> +        if ( !twobyte && (b & ~1) == 0xc4 )
> +            switch ( def_ad_bytes )
> +            {
> +            default:
> +                BUG();
> +            case 2:
> +                if ( in_realmode(ctxt, ops) || vm86_mode(&_regs) )
> +                    break;
> +                /* fall through */
> +            case 4:
> +                if ( modrm_mod != 3 )
> +                    break;
> +                /* fall through */
> +            case 8:
> +                /* VEX */
> +                generate_exception_if(rex_prefix || vex.pfx, EXC_UD, -1);
> +
> +                vex.raw[0] = b;
> +                if ( b & 1 )
> +                {
> +                    vex.raw[1] = b;
> +                    vex.opcx = vex_0f;
> +                    vex.x = 1;
> +                    vex.b = 1;
> +                    vex.w = 0;
> +                }
> +                else
> +                {
> +                    vex.raw[1] = insn_fetch_type(uint8_t);
> +                    if ( mode_64bit() )
> +                    {
> +                        if ( !vex.b )
> +                            rex_prefix |= REX_B;
> +                        if ( !vex.x )
> +                            rex_prefix |= REX_X;
> +                        if ( vex.w )
> +                        {
> +                            rex_prefix |= REX_W;
> +                            op_bytes = 8;
> +                        }
> +                    }
> +                }
> +                vex.reg ^= 0xf;
> +                if ( !mode_64bit() )
> +                    vex.reg &= 0x7;
> +                else if ( !vex.r )
> +                    rex_prefix |= REX_R;
> +
> +                fail_if(vex.opcx != vex_0f);
> +                twobyte = 1;
> +                b = insn_fetch_type(uint8_t);
> +                d = twobyte_table[b];
> +
> +                /* Unrecognised? */
> +                if ( d == 0 )
> +                    goto cannot_emulate;
> +
> +                modrm = insn_fetch_type(uint8_t);
> +                modrm_mod = (modrm & 0xc0) >> 6;
> +
> +                break;
> +            }
> +
>          modrm_reg = ((rex_prefix & 4) << 1) | ((modrm & 0x38) >> 3);
>          modrm_rm  = modrm & 0x07;
>  
> @@ -3917,44 +4041,77 @@ x86_emulate(
>          break;
>      }
>  
> -    case 0x6f: /* movq mm/m64,mm */ {
> -        uint8_t stub[] = { 0x0f, 0x6f, modrm, 0xc3 };
> +    case 0x6f: /* movq mm/m64,mm */
> +               /* {,v}movdq{a,u} xmm/m128,xmm */
> +               /* vmovdq{a,u} ymm/m256,ymm */
> +    case 0x7f: /* movq mm,mm/m64 */
> +               /* {,v}movdq{a,u} xmm,xmm/m128 */
> +               /* vmovdq{a,u} ymm,ymm/m256 */
> +    case 0xe7: /* movntq mm,mm/m64 */
> +               /* {,v}movntdq xmm,xmm/m128 */
> +               /* vmovntdq{a,u} ymm,ymm/m256 */
> +    {
> +        uint8_t stub[] = { 0x3e, 0x3e, 0x0f, b, modrm, 0xc3 };
>          struct fpu_insn_ctxt fic = { .insn_bytes = sizeof(stub)-1 };
> -        uint64_t val;
> -        if ( ea.type == OP_MEM )
> +
> +        if ( vex.opcx == vex_none )
>          {
> -            unsigned long lval, hval;
> -            if ( (rc = read_ulong(ea.mem.seg, ea.mem.off+0,
> -                                  &lval, 4, ctxt, ops)) ||
> -                 (rc = read_ulong(ea.mem.seg, ea.mem.off+4,
> -                                  &hval, 4, ctxt, ops)) )
> -                goto done;
> -            val = ((uint64_t)hval << 32) | (uint32_t)lval;
> -            stub[2] = modrm & 0x38; /* movq (%eax),%mmN */
> +            switch ( vex.pfx )
> +            {
> +            case vex_f3:
> +                fail_if(b == 0xe7);
> +                /* fall through */
> +            case vex_66:
> +                vcpu_must_have_sse2();
> +                stub[0] = 0x66; /* movdqa */
> +                get_fpu(X86EMUL_FPU_xmm, &fic);
> +                ea.bytes = 16;
> +                break;
> +            case vex_none:
> +                if ( b != 0xe7 )
> +                    vcpu_must_have_mmx();
> +                else
> +                    vcpu_must_have_sse();
> +                get_fpu(X86EMUL_FPU_mmx, &fic);
> +                ea.bytes = 8;
> +                break;
> +            default:
> +                goto cannot_emulate;
> +            }
> +        }
> +        else
> +        {
> +            fail_if(vex.opcx != vex_0f || vex.reg ||
> +                    (vex.pfx != vex_66 && (vex.pfx != vex_f3 || b == 0xe7)));
> +            vcpu_must_have_avx();
> +            get_fpu(X86EMUL_FPU_ymm, &fic);
> +            ea.bytes = 16 << vex.l;
>          }
> -        get_fpu(X86EMUL_FPU_mmx, &fic);
> -        asm volatile ( "call *%0" : : "r" (stub), "a" (&val) : "memory" );
> -        put_fpu(&fic);
> -        break;
> -    }
> -
> -    case 0x7f: /* movq mm,mm/m64 */ {
> -        uint8_t stub[] = { 0x0f, 0x7f, modrm, 0xc3 };
> -        struct fpu_insn_ctxt fic = { .insn_bytes = sizeof(stub)-1 };
> -        uint64_t val;
> -        if ( ea.type == OP_MEM )
> -            stub[2] = modrm & 0x38; /* movq %mmN,(%eax) */
> -        get_fpu(X86EMUL_FPU_mmx, &fic);
> -        asm volatile ( "call *%0" : : "r" (stub), "a" (&val) : "memory" );
> -        put_fpu(&fic);
>          if ( ea.type == OP_MEM )
>          {
> -            unsigned long lval = (uint32_t)val, hval = (uint32_t)(val >> 32);
> -            if ( (rc = ops->write(ea.mem.seg, ea.mem.off+0, &lval, 4, ctxt))
> ||
> -                 (rc = ops->write(ea.mem.seg, ea.mem.off+4, &hval, 4, ctxt))
> )
> -                goto done;
> +            /* XXX
> +            generate_exception_if(vex.pfx == vex_66 &&
> +                                  (ops->ea(ea.mem.seg, ea.mem.off)
> +                                   & (ea.bytes - 1)), EXC_GP, 0); */
> +            if ( b == 0x6f )
> +                rc = ops->read(ea.mem.seg, ea.mem.off+0, &this_cpu(mmval),
> +                               ea.bytes, ctxt);
> +            /* convert memory operand to (%rAX) */
> +            rex_prefix &= ~REX_B;
> +            vex.b = 1;
> +            stub[4] &= 0x38;
> +        }
> +        if ( !rc )
> +        {
> +           copy_REX_VEX(stub, rex_prefix, vex);
> +           asm volatile ( "call *%0" : : "r" (stub), "a" (&this_cpu(mmval))
> +                                     : "memory" );
>          }
> -        break;
> +        put_fpu(&fic);
> +        if ( b != 0x6f && ea.type == OP_MEM )
> +            rc = ops->write(ea.mem.seg, ea.mem.off, &this_cpu(mmval),
> +                            ea.bytes, ctxt);
> +        goto done;
>      }
>  
>      case 0x80 ... 0x8f: /* jcc (near) */ {
> --- a/xen/arch/x86/x86_emulate/x86_emulate.h
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.h
> @@ -99,7 +99,9 @@ struct segment_register {
>  /* FPU sub-types which may be requested via ->get_fpu(). */
>  enum x86_emulate_fpu_type {
>      X86EMUL_FPU_fpu, /* Standard FPU coprocessor instruction set */
> -    X86EMUL_FPU_mmx  /* MMX instruction set (%mm0-%mm7) */
> +    X86EMUL_FPU_mmx, /* MMX instruction set (%mm0-%mm7) */
> +    X86EMUL_FPU_xmm, /* SSE instruction set (%xmm0-%xmm7/15) */
> +    X86EMUL_FPU_ymm  /* AVX/XOP instruction set (%ymm0-%ymm7/15) */
>  };
>  
>  /*
> --- a/xen/arch/x86/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate.c
> @@ -10,9 +10,11 @@
>   */
>  
>  #include <asm/cpufeature.h>
> +#include <asm/processor.h>
>  #include <asm/x86_emulate.h>
>  
>  /* Avoid namespace pollution. */
>  #undef cmpxchg
> +#undef cpuid
>  
>  #include "x86_emulate/x86_emulate.c"
> --- a/xen/include/asm-x86/cpufeature.h
> +++ b/xen/include/asm-x86/cpufeature.h
> @@ -218,7 +218,7 @@
>  #define cpu_has_x2apic          boot_cpu_has(X86_FEATURE_X2APIC)
>  
>  #define cpu_has_xsave           boot_cpu_has(X86_FEATURE_XSAVE)
> -
> +#define cpu_has_avx             boot_cpu_has(X86_FEATURE_AVX)
>  #define cpu_has_lwp             boot_cpu_has(X86_FEATURE_LWP)
>  
>  #define cpu_has_arch_perfmon    boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>