|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH 9/9] x86emul+VMX: support {RD,WR}MSRLIST
On 04.04.2023 16:55, Jan Beulich wrote:
> These are "compound" instructions to issue a series of RDMSR / WRMSR
> respectively. In the emulator we can therefore implement them by using
> the existing msr_{read,write}() hooks. The memory accesses utilize that
> the HVM ->read() / ->write() hooks are already linear-address
> (x86_seg_none) aware (by way of hvmemul_virtual_to_linear() handling
> this case).
>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
> ---
> TODO: Use VMX tertiary execution control (once bit is known; see
> //todo-s) and then further adjust cpufeatureset.h.
Argh, should have Cc-ed Kevin and Jun, even if there weren't this issue.
Jan
> RFC: In vmx_vmexit_handler() handling is forwarded to the emulator
> blindly. Alternatively we could consult the exit qualification and
> process just a single MSR at a time (without involving the
> emulator), exiting back to the guest after every iteration. (I
> don't think a mix of both models makes a lot of sense.)
>
> RFC: For PV priv_op_ops would need to gain proper read/write hooks,
> which doesn't look desirable (albeit there we could refuse to
> handle anything else than x86_seg_none); we may want to consider to
> instead not support the feature for PV guests, requiring e.g. Linux
> to process the lists in new pvops hooks.
>
> RFC: I wasn't sure whether to add preemption checks to the loops -
> thoughts?
>
> With the VMX side of the spec still unclear (tertiary execution control
> bit unspecified in ISE 046) we can't enable the insn yet for (HVM) guest
> use. The precise behavior of MSR_BARRIER is also not spelled out, so the
> (minimal) implementation is a guess for now.
>
> --- a/tools/libs/light/libxl_cpuid.c
> +++ b/tools/libs/light/libxl_cpuid.c
> @@ -240,6 +240,7 @@ int libxl_cpuid_parse_config(libxl_cpuid
> {"lkgs", 0x00000007, 1, CPUID_REG_EAX, 18, 1},
> {"wrmsrns", 0x00000007, 1, CPUID_REG_EAX, 19, 1},
> {"avx-ifma", 0x00000007, 1, CPUID_REG_EAX, 23, 1},
> + {"msrlist", 0x00000007, 1, CPUID_REG_EAX, 27, 1},
>
> {"avx-vnni-int8",0x00000007, 1, CPUID_REG_EDX, 4, 1},
> {"avx-ne-convert",0x00000007, 1, CPUID_REG_EDX, 5, 1},
> --- a/tools/misc/xen-cpuid.c
> +++ b/tools/misc/xen-cpuid.c
> @@ -195,6 +195,8 @@ static const char *const str_7a1[32] =
> [18] = "lkgs", [19] = "wrmsrns",
>
> /* 22 */ [23] = "avx-ifma",
> +
> + /* 26 */ [27] = "msrlist",
> };
>
> static const char *const str_e21a[32] =
> --- a/tools/tests/x86_emulator/predicates.c
> +++ b/tools/tests/x86_emulator/predicates.c
> @@ -342,6 +342,8 @@ static const struct {
> { { 0x01, 0xc4 }, { 2, 2 }, F, N }, /* vmxoff */
> { { 0x01, 0xc5 }, { 2, 2 }, F, N }, /* pconfig */
> { { 0x01, 0xc6 }, { 2, 2 }, F, N }, /* wrmsrns */
> + { { 0x01, 0xc6 }, { 0, 2 }, F, W, pfx_f2 }, /* rdmsrlist */
> + { { 0x01, 0xc6 }, { 0, 2 }, F, R, pfx_f3 }, /* wrmsrlist */
> { { 0x01, 0xc8 }, { 2, 2 }, F, N }, /* monitor */
> { { 0x01, 0xc9 }, { 2, 2 }, F, N }, /* mwait */
> { { 0x01, 0xca }, { 2, 2 }, F, N }, /* clac */
> --- a/tools/tests/x86_emulator/test_x86_emulator.c
> +++ b/tools/tests/x86_emulator/test_x86_emulator.c
> @@ -589,6 +589,7 @@ static int read(
> default:
> if ( !is_x86_user_segment(seg) )
> return X86EMUL_UNHANDLEABLE;
> + case x86_seg_none:
> bytes_read += bytes;
> break;
> }
> @@ -619,7 +620,7 @@ static int write(
> if ( verbose )
> printf("** %s(%u, %p,, %u,)\n", __func__, seg, (void *)offset,
> bytes);
>
> - if ( !is_x86_user_segment(seg) )
> + if ( !is_x86_user_segment(seg) && seg != x86_seg_none )
> return X86EMUL_UNHANDLEABLE;
> memcpy((void *)offset, p_data, bytes);
> return X86EMUL_OKAY;
> @@ -711,6 +712,10 @@ static int read_msr(
> {
> switch ( reg )
> {
> + case 0x0000002f: /* BARRIER */
> + *val = 0;
> + return X86EMUL_OKAY;
> +
> case 0xc0000080: /* EFER */
> *val = ctxt->addr_size > 32 ? 0x500 /* LME|LMA */ : 0;
> return X86EMUL_OKAY;
> @@ -1499,9 +1504,53 @@ int main(int argc, char **argv)
> (gs_base != 0x0000111122224444UL) ||
> gs_base_shadow )
> goto fail;
> + printf("okay\n");
>
> cp.extd.nscb = i;
> emulops.write_segment = NULL;
> +
> + printf("%-40s", "Testing rdmsrlist...");
> + instr[0] = 0xf2; instr[1] = 0x0f; instr[2] = 0x01; instr[3] = 0xc6;
> + regs.rip = (unsigned long)&instr[0];
> + regs.rsi = (unsigned long)(res + 0x80);
> + regs.rdi = (unsigned long)(res + 0x80 + 0x40 * 2);
> + regs.rcx = 0x0002000100008000UL;
> + gs_base_shadow = 0x0000222244446666UL;
> + memset(res + 0x80, ~0, 0x40 * 8 * 2);
> + res[0x80 + 0x0f * 2] = 0xc0000101; /* GS_BASE */
> + res[0x80 + 0x0f * 2 + 1] = 0;
> + res[0x80 + 0x20 * 2] = 0xc0000102; /* SHADOW_GS_BASE */
> + res[0x80 + 0x20 * 2 + 1] = 0;
> + res[0x80 + 0x31 * 2] = 0x2f; /* BARRIER */
> + res[0x80 + 0x31 * 2 + 1] = 0;
> + rc = x86_emulate(&ctxt, &emulops);
> + if ( (rc != X86EMUL_OKAY) ||
> + (regs.rip != (unsigned long)&instr[4]) ||
> + regs.rcx ||
> + (res[0x80 + (0x40 + 0x0f) * 2] != (unsigned int)gs_base) ||
> + (res[0x80 + (0x40 + 0x0f) * 2 + 1] != (gs_base >> (8 *
> sizeof(int)))) ||
> + (res[0x80 + (0x40 + 0x20) * 2] != (unsigned int)gs_base_shadow) ||
> + (res[0x80 + (0x40 + 0x20) * 2 + 1] != (gs_base_shadow >> (8 *
> sizeof(int)))) ||
> + res[0x80 + (0x40 + 0x31) * 2] || res[0x80 + (0x40 + 0x31) * 2 + 1] )
> + goto fail;
> + printf("okay\n");
> +
> + printf("%-40s", "Testing wrmsrlist...");
> + instr[0] = 0xf3; instr[1] = 0x0f; instr[2] = 0x01; instr[3] = 0xc6;
> + regs.eip = (unsigned long)&instr[0];
> + regs.rsi -= 0x11 * 8;
> + regs.rdi -= 0x11 * 8;
> + regs.rcx = 0x0002000100000000UL;
> + res[0x80 + 0x0f * 2] = 0xc0000102; /* SHADOW_GS_BASE */
> + res[0x80 + 0x20 * 2] = 0xc0000101; /* GS_BASE */
> + rc = x86_emulate(&ctxt, &emulops);
> + if ( (rc != X86EMUL_OKAY) ||
> + (regs.rip != (unsigned long)&instr[4]) ||
> + regs.rcx ||
> + (gs_base != 0x0000222244446666UL) ||
> + (gs_base_shadow != 0x0000111122224444UL) )
> + goto fail;
> +
> emulops.write_msr = NULL;
> #endif
> printf("okay\n");
> --- a/tools/tests/x86_emulator/x86-emulate.c
> +++ b/tools/tests/x86_emulator/x86-emulate.c
> @@ -88,6 +88,7 @@ bool emul_test_init(void)
> cp.feat.rdpid = true;
> cp.feat.lkgs = true;
> cp.feat.wrmsrns = true;
> + cp.feat.msrlist = true;
> cp.extd.clzero = true;
>
> if ( cpu_has_xsave )
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -835,6 +835,17 @@ static void cf_check vmx_cpuid_policy_ch
> else
> vmx_set_msr_intercept(v, MSR_PKRS, VMX_MSR_RW);
>
> + if ( cp->feat.msrlist )
> + {
> + vmx_clear_msr_intercept(v, MSR_BARRIER, VMX_MSR_RW);
> + //todo enable MSRLIST tertiary execution control
> + }
> + else
> + {
> + vmx_set_msr_intercept(v, MSR_BARRIER, VMX_MSR_RW);
> + //todo disable MSRLIST tertiary execution control
> + }
> +
> out:
> vmx_vmcs_exit(v);
>
> @@ -3705,6 +3716,22 @@ gp_fault:
> return X86EMUL_EXCEPTION;
> }
>
> +static bool cf_check is_msrlist(
> + const struct x86_emulate_state *state, const struct x86_emulate_ctxt
> *ctxt)
> +{
> +
> + if ( ctxt->opcode == X86EMUL_OPC(0x0f, 0x01) )
> + {
> + unsigned int rm, reg;
> + int mode = x86_insn_modrm(state, &rm, ®);
> +
> + /* This also includes WRMSRNS; should be okay. */
> + return mode == 3 && rm == 6 && !reg;
> + }
> +
> + return false;
> +}
> +
> static void vmx_do_extint(struct cpu_user_regs *regs)
> {
> unsigned long vector;
> @@ -4513,6 +4540,17 @@ void vmx_vmexit_handler(struct cpu_user_
> }
> break;
>
> + case EXIT_REASON_RDMSRLIST:
> + case EXIT_REASON_WRMSRLIST:
> + if ( vmx_guest_x86_mode(v) != 8 || !currd->arch.cpuid->feat.msrlist )
> + {
> + ASSERT_UNREACHABLE();
> + hvm_inject_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC);
> + }
> + else if ( !hvm_emulate_one_insn(is_msrlist, "MSR list") )
> + hvm_inject_hw_exception(TRAP_gp_fault, 0);
> + break;
> +
> case EXIT_REASON_VMXOFF:
> case EXIT_REASON_VMXON:
> case EXIT_REASON_VMCLEAR:
> --- a/xen/arch/x86/include/asm/hvm/vmx/vmx.h
> +++ b/xen/arch/x86/include/asm/hvm/vmx/vmx.h
> @@ -211,6 +211,8 @@ static inline void pi_clear_sn(struct pi
> #define EXIT_REASON_XRSTORS 64
> #define EXIT_REASON_BUS_LOCK 74
> #define EXIT_REASON_NOTIFY 75
> +#define EXIT_REASON_RDMSRLIST 78
> +#define EXIT_REASON_WRMSRLIST 79
> /* Remember to also update VMX_PERF_EXIT_REASON_SIZE! */
>
> /*
> --- a/xen/arch/x86/include/asm/msr-index.h
> +++ b/xen/arch/x86/include/asm/msr-index.h
> @@ -24,6 +24,8 @@
> #define APIC_BASE_ENABLE (_AC(1, ULL) << 11)
> #define APIC_BASE_ADDR_MASK 0x000ffffffffff000ULL
>
> +#define MSR_BARRIER 0x0000002f
> +
> #define MSR_TEST_CTRL 0x00000033
> #define TEST_CTRL_SPLITLOCK_DETECT (_AC(1, ULL) << 29)
> #define TEST_CTRL_SPLITLOCK_DISABLE (_AC(1, ULL) << 31)
> --- a/xen/arch/x86/include/asm/perfc_defn.h
> +++ b/xen/arch/x86/include/asm/perfc_defn.h
> @@ -6,7 +6,7 @@ PERFCOUNTER_ARRAY(exceptions,
>
> #ifdef CONFIG_HVM
>
> -#define VMX_PERF_EXIT_REASON_SIZE 76
> +#define VMX_PERF_EXIT_REASON_SIZE 80
> #define VMEXIT_NPF_PERFC 143
> #define SVM_PERF_EXIT_REASON_SIZE (VMEXIT_NPF_PERFC + 1)
> PERFCOUNTER_ARRAY(vmexits, "vmexits",
> --- a/xen/arch/x86/msr.c
> +++ b/xen/arch/x86/msr.c
> @@ -223,6 +223,12 @@ int guest_rdmsr(struct vcpu *v, uint32_t
> case MSR_AMD_PPIN:
> goto gp_fault;
>
> + case MSR_BARRIER:
> + if ( !cp->feat.msrlist )
> + goto gp_fault;
> + *val = 0;
> + break;
> +
> case MSR_IA32_FEATURE_CONTROL:
> /*
> * Architecturally, availability of this MSR is enumerated by the
> @@ -493,6 +499,7 @@ int guest_wrmsr(struct vcpu *v, uint32_t
> uint64_t rsvd;
>
> /* Read-only */
> + case MSR_BARRIER:
> case MSR_IA32_PLATFORM_ID:
> case MSR_CORE_CAPABILITIES:
> case MSR_INTEL_CORE_THREAD_COUNT:
> --- a/xen/arch/x86/x86_emulate/0f01.c
> +++ b/xen/arch/x86/x86_emulate/0f01.c
> @@ -40,6 +40,7 @@ int x86emul_0f01(struct x86_emulate_stat
> switch ( s->modrm )
> {
> unsigned long base, limit, cr0, cr0w, cr4;
> + unsigned int n;
> struct segment_register sreg;
> uint64_t msr_val;
>
> @@ -54,6 +55,56 @@ int x86emul_0f01(struct x86_emulate_stat
> ((uint64_t)regs->r(dx) << 32) | regs->eax,
> ctxt);
> goto done;
> +
> + case vex_f3: /* wrmsrlist */
> + vcpu_must_have(msrlist);
> + generate_exception_if(!mode_64bit(), X86_EXC_UD);
> + generate_exception_if(!mode_ring0() || (regs->r(si) & 7) ||
> + (regs->r(di) & 7),
> + X86_EXC_GP, 0);
> + fail_if(!ops->write_msr);
> + while ( regs->r(cx) )
> + {
> + n = __builtin_ffsl(regs->r(cx)) - 1;
> + if ( (rc = ops->read(x86_seg_none, regs->r(si) + n * 8,
> + &msr_val, 8, ctxt)) != X86EMUL_OKAY )
> + break;
> + generate_exception_if(msr_val != (uint32_t)msr_val,
> + X86_EXC_GP, 0);
> + base = msr_val;
> + if ( (rc = ops->read(x86_seg_none, regs->r(di) + n * 8,
> + &msr_val, 8, ctxt)) != X86EMUL_OKAY ||
> + (rc = ops->write_msr(base, msr_val, ctxt)) !=
> X86EMUL_OKAY )
> + break;
> + regs->r(cx) &= ~(1UL << n);
> + }
> + goto done;
> +
> + case vex_f2: /* rdmsrlist */
> + vcpu_must_have(msrlist);
> + generate_exception_if(!mode_64bit(), X86_EXC_UD);
> + generate_exception_if(!mode_ring0() || (regs->r(si) & 7) ||
> + (regs->r(di) & 7),
> + X86_EXC_GP, 0);
> + fail_if(!ops->read_msr || !ops->write);
> + while ( regs->r(cx) )
> + {
> + n = __builtin_ffsl(regs->r(cx)) - 1;
> + if ( (rc = ops->read(x86_seg_none, regs->r(si) + n * 8,
> + &msr_val, 8, ctxt)) != X86EMUL_OKAY )
> + break;
> + generate_exception_if(msr_val != (uint32_t)msr_val,
> + X86_EXC_GP, 0);
> + if ( (rc = ops->read_msr(msr_val, &msr_val,
> + ctxt)) != X86EMUL_OKAY ||
> + (rc = ops->write(x86_seg_none, regs->r(di) + n * 8,
> + &msr_val, 8, ctxt)) != X86EMUL_OKAY )
> + break;
> + regs->r(cx) &= ~(1UL << n);
> + }
> + if ( rc != X86EMUL_OKAY )
> + ctxt->regs->r(cx) = regs->r(cx);
> + goto done;
> }
> generate_exception(X86_EXC_UD);
>
> --- a/xen/arch/x86/x86_emulate/private.h
> +++ b/xen/arch/x86/x86_emulate/private.h
> @@ -600,6 +600,7 @@ amd_like(const struct x86_emulate_ctxt *
> #define vcpu_has_lkgs() (ctxt->cpuid->feat.lkgs)
> #define vcpu_has_wrmsrns() (ctxt->cpuid->feat.wrmsrns)
> #define vcpu_has_avx_ifma() (ctxt->cpuid->feat.avx_ifma)
> +#define vcpu_has_msrlist() (ctxt->cpuid->feat.msrlist)
> #define vcpu_has_avx_vnni_int8() (ctxt->cpuid->feat.avx_vnni_int8)
> #define vcpu_has_avx_ne_convert() (ctxt->cpuid->feat.avx_ne_convert)
>
> --- a/xen/arch/x86/x86_emulate/util.c
> +++ b/xen/arch/x86/x86_emulate/util.c
> @@ -112,6 +112,9 @@ bool cf_check x86_insn_is_mem_access(con
> break;
>
> case X86EMUL_OPC(0x0f, 0x01):
> + /* {RD,WR}MSRLIST */
> + if ( mode_64bit() && s->modrm == 0xc6 )
> + return s->vex.pfx >= vex_f3;
> /* Cover CLZERO. */
> return (s->modrm_rm & 7) == 4 && (s->modrm_reg & 7) == 7;
> }
> @@ -172,7 +175,11 @@ bool cf_check x86_insn_is_mem_write(cons
> case 0xff: /* Grp5 */
> break;
>
> - case X86EMUL_OPC(0x0f, 0x01): /* CLZERO is the odd one. */
> + case X86EMUL_OPC(0x0f, 0x01):
> + /* RDMSRLIST */
> + if ( mode_64bit() && s->modrm == 0xc6 )
> + return s->vex.pfx == vex_f2;
> + /* CLZERO is another odd one. */
> return (s->modrm_rm & 7) == 4 && (s->modrm_reg & 7) == 7;
>
> default:
> --- a/xen/include/public/arch-x86/cpufeatureset.h
> +++ b/xen/include/public/arch-x86/cpufeatureset.h
> @@ -286,6 +286,7 @@ XEN_CPUFEATURE(FRED, 10*32+17) /
> XEN_CPUFEATURE(LKGS, 10*32+18) /*S Load Kernel GS Base */
> XEN_CPUFEATURE(WRMSRNS, 10*32+19) /*A WRMSR Non-Serialising */
> XEN_CPUFEATURE(AVX_IFMA, 10*32+23) /*A AVX-IFMA Instructions */
> +XEN_CPUFEATURE(MSRLIST, 10*32+27) /* MSR list instructions */
>
> /* AMD-defined CPU features, CPUID level 0x80000021.eax, word 11 */
> XEN_CPUFEATURE(LFENCE_DISPATCH, 11*32+ 2) /*A LFENCE always serializing
> */
>
>
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |