[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 1/3] x86/emul: Optimise decode_register() somewhat
The positions of GPRs inside struct cpu_user_regs doesn't follow any particular order, so as compiled, decode_register() becomes a jump table to 16 blocks which calculate the appropriate offset, at a total of 207 bytes. Instead, pre-compute the offsets at build time and use pointer arithmetic to calculate the result. The resulting function is far more reasonable: test %edx,%edx lea 0xbfb97(%rip),%rax # <cpu_user_regs_high_gpr_offsets> lea 0xbfba0(%rip),%rdx # <cpu_user_regs_gpr_offsets> cmove %rdx,%rax and $0xf,%edi movzbl (%rax,%rdi,1),%eax add %rsi,%rax retq and by observation, most callers in x86_emulate() inline and constant-propagate the highbyte_regs value of 0 to drop the test, one lea and the cmove. Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> --- CC: Jan Beulich <JBeulich@xxxxxxxx> --- xen/arch/x86/x86_emulate/x86_emulate.c | 82 ++++++++++++++++++++++++---------- 1 file changed, 58 insertions(+), 24 deletions(-) diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c index ff0a003..3f5636f 100644 --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -396,6 +396,51 @@ static const struct { /* Shift values between src and dst sizes of pmov{s,z}x{b,w,d}{w,d,q}. */ static const uint8_t pmov_convert_delta[] = { 1, 2, 3, 1, 2, 1 }; +/* + * Map GPRs by ModRM encoding to their offset within struct cpu_user_regs. + * The AH,CH,DH,BH offsets are misaligned. + */ +static const uint8_t cpu_user_regs_gpr_offsets[] = { + offsetof(struct cpu_user_regs, r(ax)), + offsetof(struct cpu_user_regs, r(cx)), + offsetof(struct cpu_user_regs, r(dx)), + offsetof(struct cpu_user_regs, r(bx)), + offsetof(struct cpu_user_regs, r(sp)), + offsetof(struct cpu_user_regs, r(bp)), + offsetof(struct cpu_user_regs, r(si)), + offsetof(struct cpu_user_regs, r(di)), +#if defined(__x86_64__) + offsetof(struct cpu_user_regs, r8), + offsetof(struct cpu_user_regs, r9), + offsetof(struct cpu_user_regs, r10), + offsetof(struct cpu_user_regs, r11), + offsetof(struct cpu_user_regs, r12), + offsetof(struct cpu_user_regs, r13), + offsetof(struct cpu_user_regs, r14), + offsetof(struct cpu_user_regs, r15), +#endif +}; +static const uint8_t cpu_user_regs_high_gpr_offsets[] = { + offsetof(struct cpu_user_regs, r(ax)), + offsetof(struct cpu_user_regs, r(cx)), + offsetof(struct cpu_user_regs, r(dx)), + offsetof(struct cpu_user_regs, r(bx)), + offsetof(struct cpu_user_regs, ah), + offsetof(struct cpu_user_regs, ch), + offsetof(struct cpu_user_regs, dh), + offsetof(struct cpu_user_regs, bh), +#if defined(__x86_64__) + offsetof(struct cpu_user_regs, r8), + offsetof(struct cpu_user_regs, r9), + offsetof(struct cpu_user_regs, r10), + offsetof(struct cpu_user_regs, r11), + offsetof(struct cpu_user_regs, r12), + offsetof(struct cpu_user_regs, r13), + offsetof(struct cpu_user_regs, r14), + offsetof(struct cpu_user_regs, r15), +#endif +}; + static const struct { uint8_t simd_size:5; uint8_t to_mem:1; @@ -1939,32 +1984,21 @@ void * decode_register( uint8_t modrm_reg, struct cpu_user_regs *regs, int highbyte_regs) { - void *p; + const uint8_t *offsets = highbyte_regs ? cpu_user_regs_high_gpr_offsets + : cpu_user_regs_gpr_offsets; - switch ( modrm_reg ) - { - case 0: p = ®s->r(ax); break; - case 1: p = ®s->r(cx); break; - case 2: p = ®s->r(dx); break; - case 3: p = ®s->r(bx); break; - case 4: p = (highbyte_regs ? ®s->ah : (void *)®s->r(sp)); break; - case 5: p = (highbyte_regs ? ®s->ch : (void *)®s->r(bp)); break; - case 6: p = (highbyte_regs ? ®s->dh : (void *)®s->r(si)); break; - case 7: p = (highbyte_regs ? ®s->bh : (void *)®s->r(di)); break; -#if defined(__x86_64__) - case 8: p = ®s->r8; break; - case 9: p = ®s->r9; break; - case 10: p = ®s->r10; break; - case 11: p = ®s->r11; break; - case 12: p = ®s->r12; break; - case 13: p = ®s->r13; break; - case 14: p = ®s->r14; break; - case 15: p = ®s->r15; break; -#endif - default: BUG(); p = NULL; break; - } + /* Check that the arrays are the same size, and a power of two. */ + BUILD_BUG_ON(ARRAY_SIZE(cpu_user_regs_gpr_offsets) != + ARRAY_SIZE(cpu_user_regs_high_gpr_offsets)); + BUILD_BUG_ON(ARRAY_SIZE(cpu_user_regs_gpr_offsets) & + (ARRAY_SIZE(cpu_user_regs_gpr_offsets) - 1)); + + ASSERT(modrm_reg < ARRAY_SIZE(cpu_user_regs_gpr_offsets)); + + /* For safety in release builds. Debug builds will hit the ASSERT() */ + modrm_reg &= ARRAY_SIZE(cpu_user_regs_gpr_offsets) - 1; - return p; + return (void *)regs + offsets[modrm_reg]; } static void *decode_vex_gpr(unsigned int vex_reg, struct cpu_user_regs *regs, -- 2.1.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |