[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v3 2/3] x86/emulate: add support of emulating SSE2 instruction {, v}movd mm, r32/m32 and {, v}movq mm, r64
On Monday 01 August 2016 15:53:27 Mihai Donțu wrote: > On Monday 01 August 2016 10:52:12 Andrew Cooper wrote: > > On 01/08/16 03:52, Mihai Donțu wrote: > > > Found that Windows driver was using a SSE2 instruction MOVD. > > > > > > Signed-off-by: Zhi Wang <zhi.a.wang@xxxxxxxxx> > > > Signed-off-by: Mihai Donțu <mdontu@xxxxxxxxxxxxxxx> > > > --- > > > Picked from the XenServer 7 patch queue, as suggested by Andrew Cooper > > > > > > Changed since v2: > > > * handle the case where the destination is a GPR > > > --- > > > xen/arch/x86/x86_emulate/x86_emulate.c | 38 > > > +++++++++++++++++++++++++++++++--- > > > 1 file changed, 35 insertions(+), 3 deletions(-) > > > > > > diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c > > > b/xen/arch/x86/x86_emulate/x86_emulate.c > > > index 44de3b6..9f89ada 100644 > > > --- a/xen/arch/x86/x86_emulate/x86_emulate.c > > > +++ b/xen/arch/x86/x86_emulate/x86_emulate.c > > > @@ -204,7 +204,7 @@ static uint8_t twobyte_table[256] = { > > > /* 0x60 - 0x6F */ > > > 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, > > > /* 0x70 - 0x7F */ > > > - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, > > > + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, > > > ImplicitOps|ModRM, > > > /* 0x80 - 0x87 */ > > > ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, > > > ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, > > > @@ -4409,6 +4409,10 @@ x86_emulate( > > > case 0x6f: /* movq mm/m64,mm */ > > > /* {,v}movdq{a,u} xmm/m128,xmm */ > > > /* vmovdq{a,u} ymm/m256,ymm */ > > > + case 0x7e: /* movd mm,r/m32 */ > > > + /* movq mm,r/m64 */ > > > + /* {,v}movd xmm,r/m32 */ > > > + /* {,v}movq xmm,r/m64 */ > > > case 0x7f: /* movq mm,mm/m64 */ > > > /* {,v}movdq{a,u} xmm,xmm/m128 */ > > > /* vmovdq{a,u} ymm,ymm/m256 */ > > > @@ -4432,7 +4436,17 @@ x86_emulate( > > > host_and_vcpu_must_have(sse2); > > > buf[0] = 0x66; /* SSE */ > > > get_fpu(X86EMUL_FPU_xmm, &fic); > > > - ea.bytes = (b == 0xd6 ? 8 : 16); > > > + switch ( b ) > > > + { > > > + case 0x7e: > > > + ea.bytes = 4; > > > + break; > > > + case 0xd6: > > > + ea.bytes = 8; > > > + break; > > > + default: > > > + ea.bytes = 16; > > > + } > > > break; > > > case vex_none: > > > if ( b != 0xe7 ) > > > @@ -4452,7 +4466,17 @@ x86_emulate( > > > ((vex.pfx != vex_66) && (vex.pfx != vex_f3))); > > > host_and_vcpu_must_have(avx); > > > get_fpu(X86EMUL_FPU_ymm, &fic); > > > - ea.bytes = (b == 0xd6 ? 8 : (16 << vex.l)); > > > + switch ( b ) > > > + { > > > + case 0x7e: > > > + ea.bytes = 4; > > > + break; > > > + case 0xd6: > > > + ea.bytes = 8; > > > + break; > > > + default: > > > + ea.bytes = 16 << vex.l; > > > + } > > > } > > > if ( ea.type == OP_MEM ) > > > { > > > @@ -4468,6 +4492,14 @@ x86_emulate( > > > vex.b = 1; > > > buf[4] &= 0x38; > > > } > > > + else if ( b == 0x7e ) > > > + { > > > + /* convert the GPR destination to (%rAX) */ > > > + *((unsigned long *)&mmvalp) = (unsigned long)ea.reg; > > > + rex_prefix &= ~REX_B; > > > + vex.b = 1; > > > + buf[4] &= 0x38; > > > + } > > > > Thankyou for doing this. However, looking at it, it has some code in > > common with the "ea.type == OP_MEM" clause. > > > > Would this work? > > > > diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c > > b/xen/arch/x86/x86_emulate/x86_emulate.c > > index fe594ba..90db067 100644 > > --- a/xen/arch/x86/x86_emulate/x86_emulate.c > > +++ b/xen/arch/x86/x86_emulate/x86_emulate.c > > @@ -4453,16 +4453,25 @@ x86_emulate( > > get_fpu(X86EMUL_FPU_ymm, &fic); > > ea.bytes = 16 << vex.l; > > } > > - if ( ea.type == OP_MEM ) > > + if ( ea.type == OP_MEM || ea.type == OP_REG ) > > { > > - /* XXX enable once there is ops->ea() or equivalent > > - generate_exception_if((vex.pfx == vex_66) && > > - (ops->ea(ea.mem.seg, ea.mem.off) > > - & (ea.bytes - 1)), EXC_GP, 0); */ > > - if ( b == 0x6f ) > > - rc = ops->read(ea.mem.seg, ea.mem.off+0, mmvalp, > > - ea.bytes, ctxt); > > /* convert memory operand to (%rAX) */ > > + > > + if ( ea.type == OP_MEM) > > + { > > + /* XXX enable once there is ops->ea() or equivalent > > + generate_exception_if((vex.pfx == vex_66) && > > + (ops->ea(ea.mem.seg, ea.mem.off) > > + & (ea.bytes - 1)), EXC_GP, 0); */ > > + if ( b == 0x6f ) > > + rc = ops->read(ea.mem.seg, ea.mem.off+0, mmvalp, > > + ea.bytes, ctxt); > > + } > > + else if ( ea.type == OP_REG ) > > + { > > + *((unsigned long *)&mmvalp) = (unsigned long)ea.reg; > > + } > > + > > rex_prefix &= ~REX_B; > > vex.b = 1; > > buf[4] &= 0x38; > > > > > > This is untested, but avoids duplicating this bit of state maniupulation. > > Your suggestion makes sense, but I'm starting to doubt my initial > patch. :-) I'm testing "movq xmm1, xmm1" and noticing that it takes the > GPR-handling route and I can't seem to be able to easily prevent it > with !(rex_prefix & REX_B), as rex_prefix == 0 and vex.b == 1. I need > to take a harder look at how that class of instructions is coded. Err, I meant "vmovq xmm1, xmm1". -- Mihai DONȚU _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx https://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |