x86emul: support INVPCID Just like for INVLPGA the HVM hook only supports PCID 0 for the time being for individual address invalidation. It also translates the other types to a full flush, which is architecturally permitted and performance-wise presumably not much worse because emulation is slow anyway. Signed-off-by: Jan Beulich Reviewed-by: Paul Durrant --- v2: New. --- a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c +++ b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c @@ -382,6 +382,7 @@ static int fuzz_tlb_op( assert(is_x86_user_segment(aux)); /* fall through */ case x86emul_invlpga: + case x86emul_invpcid: assert(ctxt->addr_size == 64 || !(addr >> 32)); break; } --- a/tools/tests/x86_emulator/test_x86_emulator.c +++ b/tools/tests/x86_emulator/test_x86_emulator.c @@ -684,6 +684,38 @@ static int read_msr( return X86EMUL_UNHANDLEABLE; } +#define INVPCID_ADDR 0x12345678 +#define INVPCID_PCID 0x123 + +static int read_cr_invpcid( + unsigned int reg, + unsigned long *val, + struct x86_emulate_ctxt *ctxt) +{ + int rc = emul_test_read_cr(reg, val, ctxt); + + if ( rc == X86EMUL_OKAY && reg == 4 ) + *val |= X86_CR4_PCIDE; + + return rc; +} + +static int tlb_op_invpcid( + enum x86emul_tlb_op op, + unsigned long addr, + unsigned long aux, + struct x86_emulate_ctxt *ctxt) +{ + static unsigned int seq; + + if ( op != x86emul_invpcid || addr != INVPCID_ADDR || + x86emul_invpcid_pcid(aux) != (seq < 4 ? 0 : INVPCID_PCID) || + x86emul_invpcid_type(aux) != (seq++ & 3) ) + return X86EMUL_UNHANDLEABLE; + + return X86EMUL_OKAY; +} + static struct x86_emulate_ops emulops = { .read = read, .insn_fetch = fetch, @@ -4482,6 +4514,46 @@ int main(int argc, char **argv) printf("okay\n"); } else + printf("skipped\n"); + + printf("%-40s", "Testing invpcid 16(%ecx),%%edx..."); + if ( stack_exec ) + { + decl_insn(invpcid); + + asm volatile ( put_insn(invpcid, "invpcid 16(%0), %1") + :: "c" (NULL), "d" (0L) ); + + res[4] = 0; + res[5] = 0; + res[6] = INVPCID_ADDR; + res[7] = 0; + regs.ecx = (unsigned long)res; + emulops.tlb_op = tlb_op_invpcid; + + for ( ; ; ) + { + for ( regs.edx = 0; regs.edx < 4; ++regs.edx ) + { + set_insn(invpcid); + rc = x86_emulate(&ctxt, &emulops); + if ( rc != X86EMUL_OKAY || !check_eip(invpcid) ) + goto fail; + } + + if ( ctxt.addr_size < 64 || res[4] == INVPCID_PCID ) + break; + + emulops.read_cr = read_cr_invpcid; + res[4] = INVPCID_PCID; + } + + emulops.read_cr = emul_test_read_cr; + emulops.tlb_op = NULL; + + printf("okay\n"); + } + else printf("skipped\n"); #undef decl_insn --- a/tools/tests/x86_emulator/x86-emulate.c +++ b/tools/tests/x86_emulator/x86-emulate.c @@ -72,6 +72,7 @@ bool emul_test_init(void) * them. */ cp.basic.movbe = true; + cp.feat.invpcid = true; cp.feat.adx = true; cp.feat.avx512pf = cp.feat.avx512f; cp.feat.rdpid = true; @@ -141,7 +142,7 @@ int emul_test_cpuid( */ if ( leaf == 7 && subleaf == 0 ) { - res->b |= 1U << 19; + res->b |= (1U << 10) | (1U << 19); if ( res->b & (1U << 16) ) res->b |= 1U << 26; res->c |= 1U << 22; --- a/xen/arch/x86/hvm/emulate.c +++ b/xen/arch/x86/hvm/emulate.c @@ -2383,8 +2383,16 @@ static int hvmemul_tlb_op( paging_invlpg(current, addr); break; + case x86emul_invpcid: + if ( x86emul_invpcid_type(aux) != X86_INVPCID_TYPE_INDIV_ADDR ) + { + hvm_asid_flush_vcpu(current); + break; + } + aux = x86emul_invpcid_pcid(aux); + /* fall through */ case x86emul_invlpga: - /* TODO: Support ASIDs. */ + /* TODO: Support ASIDs/PCIDs. */ if ( !aux ) paging_invlpg(current, addr); else --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -496,6 +496,7 @@ static const struct ext0f38_table { [0x7a ... 0x7c] = { .simd_size = simd_none, .two_op = 1 }, [0x7d ... 0x7e] = { .simd_size = simd_packed_int, .d8s = d8s_vl }, [0x7f] = { .simd_size = simd_packed_fp, .d8s = d8s_vl }, + [0x82] = { .simd_size = simd_other }, [0x83] = { .simd_size = simd_packed_int, .d8s = d8s_vl }, [0x88] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_dq }, [0x89] = { .simd_size = simd_packed_int, .two_op = 1, .d8s = d8s_dq }, @@ -1875,6 +1876,7 @@ in_protmode( #define vcpu_has_hle() (ctxt->cpuid->feat.hle) #define vcpu_has_avx2() (ctxt->cpuid->feat.avx2) #define vcpu_has_bmi2() (ctxt->cpuid->feat.bmi2) +#define vcpu_has_invpcid() (ctxt->cpuid->feat.invpcid) #define vcpu_has_rtm() (ctxt->cpuid->feat.rtm) #define vcpu_has_mpx() (ctxt->cpuid->feat.mpx) #define vcpu_has_avx512f() (ctxt->cpuid->feat.avx512f) @@ -9124,6 +9126,48 @@ x86_emulate( ASSERT(!state->simd_size); break; + case X86EMUL_OPC_66(0x0f38, 0x82): /* invpcid reg,m128 */ + vcpu_must_have(invpcid); + generate_exception_if(ea.type != OP_MEM, EXC_UD); + generate_exception_if(!mode_ring0(), EXC_GP, 0); + + if ( (rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 16, + ctxt)) != X86EMUL_OKAY ) + goto done; + + generate_exception_if(mmvalp->xmm[0] & ~0xfff, EXC_GP, 0); + dst.val = mode_64bit() ? *dst.reg : (uint32_t)*dst.reg; + + switch ( dst.val ) + { + case X86_INVPCID_TYPE_INDIV_ADDR: + generate_exception_if(!is_canonical_address(mmvalp->xmm[1]), + EXC_GP, 0); + /* fall through */ + case X86_INVPCID_TYPE_SINGLE_CTXT: + if ( !mode_64bit() || !ops->read_cr ) + cr4 = 0; + else if ( (rc = ops->read_cr(4, &cr4, ctxt)) != X86EMUL_OKAY ) + goto done; + generate_exception_if(!(cr4 & X86_CR4_PCIDE) && mmvalp->xmm[0], + EXC_GP, 0); + break; + case X86_INVPCID_TYPE_ALL_INCL_GLOBAL: + case X86_INVPCID_TYPE_ALL_NON_GLOBAL: + break; + default: + generate_exception(EXC_GP, 0); + } + + fail_if(!ops->tlb_op); + if ( (rc = ops->tlb_op(x86emul_invpcid, truncate_ea(mmvalp->xmm[1]), + x86emul_invpcid_aux(mmvalp->xmm[0], dst.val), + ctxt)) != X86EMUL_OKAY ) + goto done; + + state->simd_size = simd_none; + break; + case X86EMUL_OPC_EVEX_66(0x0f38, 0x83): /* vpmultishiftqb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ generate_exception_if(!evex.w, EXC_UD); host_and_vcpu_must_have(avx512_vbmi); --- a/xen/arch/x86/x86_emulate/x86_emulate.h +++ b/xen/arch/x86/x86_emulate/x86_emulate.h @@ -188,8 +188,26 @@ enum x86emul_cache_op { enum x86emul_tlb_op { x86emul_invlpg, x86emul_invlpga, + x86emul_invpcid, }; +static inline unsigned int x86emul_invpcid_aux(unsigned int pcid, + unsigned int type) +{ + ASSERT(!(pcid & ~0xfff)); + return (type << 12) | pcid; +} + +static inline unsigned int x86emul_invpcid_pcid(unsigned int aux) +{ + return aux & 0xfff; +} + +static inline unsigned int x86emul_invpcid_type(unsigned int aux) +{ + return aux >> 12; +} + struct x86_emulate_state; /* @@ -483,6 +501,8 @@ struct x86_emulate_ops * @addr and @aux have @op-specific meaning: * - INVLPG: @aux:@addr represent seg:offset * - INVLPGA: @addr is the linear address, @aux the ASID + * - INVPCID: @addr is the linear address, @aux the combination of + * PCID and type (see x86emul_invpcid_*()). */ int (*tlb_op)( enum x86emul_tlb_op op,