x86/HVM: support (emulate) UMIP There are three noteworthy drawbacks: 1) The intercepts we need to enable here are CPL-independent, i.e. we now have to emulate certain instructions for ring 0. 2) On VMX there's no intercept for SMSW, so the emulation isn't really complete there. 3) The CR4 write intercept on SVM is lower priority than all exception checks, so we need to intercept #GP. Signed-off-by: Jan Beulich --- The tool stack change could be left out - it updates a table which is rather out of date anyway. --- This once again points out that handle_mmio() is rather badly named, as it's about more than just MMIO. Since we have hvm_emulate_one() already, I am, however, lacking an idea for a good alternative name. --- a/tools/libxl/libxl_cpuid.c +++ b/tools/libxl/libxl_cpuid.c @@ -158,6 +158,7 @@ int libxl_cpuid_parse_config(libxl_cpuid {"de", 0x00000001, NA, CPUID_REG_EDX, 2, 1}, {"vme", 0x00000001, NA, CPUID_REG_EDX, 1, 1}, {"fpu", 0x00000001, NA, CPUID_REG_EDX, 0, 1}, + {"umip", 0x00000007, 0, CPUID_REG_ECX, 2, 1}, {"topoext", 0x80000001, NA, CPUID_REG_ECX, 22, 1}, {"tbm", 0x80000001, NA, CPUID_REG_ECX, 21, 1}, {"nodeid", 0x80000001, NA, CPUID_REG_ECX, 19, 1}, --- a/xen/arch/x86/cpuid.c +++ b/xen/arch/x86/cpuid.c @@ -154,6 +154,13 @@ static void __init calculate_hvm_feature __set_bit(X86_FEATURE_APIC, hvm_featureset); /* + * Xen can often provide UMIP emulation to HVM guests even if the host + * doesn't have such functionality. + */ + if ( cpu_has_vmx_dt_exiting || cpu_has_svm ) + __set_bit(X86_FEATURE_UMIP, hvm_featureset); + + /* * On AMD, PV guests are entirely unable to use SYSENTER as Xen runs in * long mode (and init_amd() has cleared it out of host capabilities), but * HVM guests are able if running in protected mode. --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -1008,6 +1008,8 @@ unsigned long hvm_cr4_guest_reserved_bit X86_CR4_OSFXSR : 0) | (leaf1_edx & cpufeat_mask(X86_FEATURE_SSE) ? X86_CR4_OSXMMEXCPT : 0) | + (leaf7_0_ecx & cpufeat_mask(X86_FEATURE_UMIP) ? + X86_CR4_UMIP : 0) | ((restore || nestedhvm_enabled(v->domain)) && (leaf1_ecx & cpufeat_mask(X86_FEATURE_VMX)) ? X86_CR4_VMXE : 0) | --- a/xen/arch/x86/hvm/svm/svm.c +++ b/xen/arch/x86/hvm/svm/svm.c @@ -570,6 +570,29 @@ void svm_update_guest_cr(struct vcpu *v, if ( paging_mode_hap(v->domain) ) value &= ~X86_CR4_PAE; value |= v->arch.hvm_vcpu.guest_cr[4]; + + if ( !cpu_has_umip ) + { + u32 general1_intercepts = vmcb_get_general1_intercepts(vmcb); + + if ( v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_UMIP ) + { + value &= ~X86_CR4_UMIP; + ASSERT(vmcb_get_cr_intercepts(vmcb) & CR_INTERCEPT_CR0_READ); + general1_intercepts |= GENERAL1_INTERCEPT_IDTR_READ | + GENERAL1_INTERCEPT_GDTR_READ | + GENERAL1_INTERCEPT_LDTR_READ | + GENERAL1_INTERCEPT_TR_READ; + } + else + general1_intercepts &= ~(GENERAL1_INTERCEPT_IDTR_READ | + GENERAL1_INTERCEPT_GDTR_READ | + GENERAL1_INTERCEPT_LDTR_READ | + GENERAL1_INTERCEPT_TR_READ); + + vmcb_set_general1_intercepts(vmcb, general1_intercepts); + } + vmcb_set_cr4(vmcb, value); break; default: @@ -2444,6 +2467,13 @@ void svm_vmexit_handler(struct cpu_user_ svm_fpu_dirty_intercept(); break; + case VMEXIT_EXCEPTION_GP: + HVMTRACE_1D(TRAP, TRAP_gp_fault); + /* We only care about ring 0 faults with error code zero. */ + if ( vmcb->exitinfo1 || vmcb_get_cpl(vmcb) || !handle_mmio() ) + hvm_inject_hw_exception(TRAP_gp_fault, vmcb->exitinfo1); + break; + case VMEXIT_EXCEPTION_PF: { unsigned long va; va = vmcb->exitinfo2; @@ -2551,7 +2581,25 @@ void svm_vmexit_handler(struct cpu_user_ hvm_inject_hw_exception(TRAP_gp_fault, 0); break; - case VMEXIT_CR0_READ ... VMEXIT_CR15_READ: + case VMEXIT_IDTR_READ: + case VMEXIT_GDTR_READ: + case VMEXIT_LDTR_READ: + case VMEXIT_TR_READ: + ASSERT((v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_UMIP) && !cpu_has_umip); + if ( vmcb_get_cpl(vmcb) || !handle_mmio() ) + hvm_inject_hw_exception(TRAP_gp_fault, 0); + break; + + case VMEXIT_CR0_READ: + if ( (v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_UMIP) && + vmcb_get_cpl(vmcb) ) + { + ASSERT(!cpu_has_umip); + hvm_inject_hw_exception(TRAP_gp_fault, 0); + break; + } + /* fall through */ + case VMEXIT_CR1_READ ... VMEXIT_CR15_READ: case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE: if ( cpu_has_svm_decode && (vmcb->exitinfo1 & (1ULL << 63)) ) svm_vmexit_do_cr_access(vmcb, regs); --- a/xen/arch/x86/hvm/svm/vmcb.c +++ b/xen/arch/x86/hvm/svm/vmcb.c @@ -194,6 +194,10 @@ static int construct_vmcb(struct vcpu *v HVM_TRAP_MASK | (1U << TRAP_no_device); + /* For UMIP emulation intercept #GP to catch faulting CR4 writes. */ + if ( !cpu_has_umip ) + vmcb->_exception_intercepts |= 1U << TRAP_gp_fault; + if ( paging_mode_hap(v->domain) ) { vmcb->_np_enable = 1; /* enable nested paging */ --- a/xen/arch/x86/hvm/vmx/vmcs.c +++ b/xen/arch/x86/hvm/vmx/vmcs.c @@ -227,6 +227,7 @@ static int vmx_init_vmcs_config(void) opt = (SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | SECONDARY_EXEC_WBINVD_EXITING | SECONDARY_EXEC_ENABLE_EPT | + SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING | SECONDARY_EXEC_ENABLE_RDTSCP | SECONDARY_EXEC_PAUSE_LOOP_EXITING | SECONDARY_EXEC_ENABLE_INVPCID | @@ -987,6 +988,10 @@ static int construct_vmcs(struct vcpu *v v->arch.hvm_vmx.secondary_exec_control = vmx_secondary_exec_control; + /* Disable descriptor table exiting: It's controlled by CR4.UMIP writes. */ + v->arch.hvm_vmx.secondary_exec_control &= + ~SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING; + /* Disable VPID for now: we decide when to enable it on VMENTER. */ v->arch.hvm_vmx.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -1531,6 +1531,22 @@ static void vmx_update_guest_cr(struct v v->arch.hvm_vcpu.hw_cr[4] &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE); } + + if ( !cpu_has_umip ) + { + if ( (v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_UMIP) ) + { + ASSERT(cpu_has_vmx_dt_exiting); + v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_UMIP; + v->arch.hvm_vmx.secondary_exec_control |= + SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING; + } + else + v->arch.hvm_vmx.secondary_exec_control &= + ~SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING; + vmx_update_secondary_exec_control(v); + } + __vmwrite(GUEST_CR4, v->arch.hvm_vcpu.hw_cr[4]); break; default: @@ -3853,6 +3869,11 @@ void vmx_vmexit_handler(struct cpu_user_ case EXIT_REASON_ACCESS_GDTR_OR_IDTR: case EXIT_REASON_ACCESS_LDTR_OR_TR: + ASSERT((v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_UMIP) && !cpu_has_umip); + if ( vmx_get_cpl() || !handle_mmio() ) + hvm_inject_hw_exception(TRAP_gp_fault, 0); + break; + case EXIT_REASON_VMX_PREEMPTION_TIMER_EXPIRED: case EXIT_REASON_INVPCID: /* fall through */ --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -1360,7 +1360,7 @@ protmode_load_seg( const struct x86_emulate_ops *ops) { enum x86_segment sel_seg = (sel & 4) ? x86_seg_ldtr : x86_seg_gdtr; - struct { uint32_t a, b; } desc; + struct { uint32_t a, b; } desc, desc_hi = {}; uint8_t dpl, rpl; int cpl = get_cpl(ctxt, ops); uint32_t a_flag = 0x100; @@ -1411,9 +1411,6 @@ protmode_load_seg( /* System segments must have S flag == 0. */ if ( desc.b & (1u << 12) ) goto raise_exn; - /* We do not support 64-bit descriptor types. */ - if ( in_longmode(ctxt, ops) ) - return X86EMUL_UNHANDLEABLE; } /* User segments must have S flag == 1. */ else if ( !(desc.b & (1u << 12)) ) @@ -1487,6 +1484,33 @@ protmode_load_seg( goto raise_exn; } + if ( !is_x86_user_segment(seg) ) + { + int lm = in_longmode(ctxt, ops); + + if ( lm < 0 ) + return X86EMUL_UNHANDLEABLE; + if ( lm ) + { + switch ( rc = ops->read(sel_seg, (sel & 0xfff8) + 8, + &desc_hi, sizeof(desc_hi), ctxt) ) + { + case X86EMUL_OKAY: + break; + + case X86EMUL_EXCEPTION: + if ( !ctxt->event_pending ) + goto raise_exn; + /* fall through */ + default: + return rc; + } + if ( (desc_hi.b & 0x00001f00) || + !is_canonical_address((uint64_t)desc_hi.a << 32) ) + goto raise_exn; + } + } + /* Ensure Accessed flag is set. */ if ( a_flag && !(desc.b & a_flag) ) { @@ -1513,7 +1537,8 @@ protmode_load_seg( desc.b = new_desc_b; } - sreg->base = (((desc.b << 0) & 0xff000000u) | + sreg->base = (((uint64_t)desc_hi.a << 32) | + ((desc.b << 0) & 0xff000000u) | ((desc.b << 16) & 0x00ff0000u) | ((desc.a >> 16) & 0x0000ffffu)); sreg->attr.bytes = (((desc.b >> 8) & 0x00ffu) | --- a/xen/include/asm-x86/cpufeature.h +++ b/xen/include/asm-x86/cpufeature.h @@ -72,6 +72,7 @@ XEN_CPUFEATURE(XEN_SMAP, (FSCAPIN #define cpu_has_smep boot_cpu_has(X86_FEATURE_SMEP) #define cpu_has_smap boot_cpu_has(X86_FEATURE_SMAP) #define cpu_has_fpu_sel (!boot_cpu_has(X86_FEATURE_NO_FPU_SEL)) +#define cpu_has_umip boot_cpu_has(X86_FEATURE_UMIP) #define cpu_has_ffxsr ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) \ && boot_cpu_has(X86_FEATURE_FFXSR)) #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) --- a/xen/include/asm-x86/hvm/vmx/vmcs.h +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h @@ -346,6 +346,8 @@ extern u64 vmx_ept_vpid_cap; (vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) #define cpu_has_vmx_ept \ (vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) +#define cpu_has_vmx_dt_exiting \ + (vmx_secondary_exec_control & SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING) #define cpu_has_vmx_vpid \ (vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID) #define cpu_has_monitor_trap_flag \ --- a/xen/include/asm-x86/processor.h +++ b/xen/include/asm-x86/processor.h @@ -82,6 +82,7 @@ #define X86_CR4_PCE 0x00000100 /* enable performance counters at ipl 3 */ #define X86_CR4_OSFXSR 0x00000200 /* enable fast FPU save and restore */ #define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */ +#define X86_CR4_UMIP 0x00000800 /* disable user mode S[GIL]DT/STR/SMSW */ #define X86_CR4_VMXE 0x00002000 /* enable VMX */ #define X86_CR4_SMXE 0x00004000 /* enable SMX */ #define X86_CR4_FSGSBASE 0x00010000 /* enable {rd,wr}{fs,gs}base */ --- a/xen/include/public/arch-x86/cpufeatureset.h +++ b/xen/include/public/arch-x86/cpufeatureset.h @@ -224,6 +224,7 @@ XEN_CPUFEATURE(AVX512VL, 5*32+31) / /* Intel-defined CPU features, CPUID level 0x00000007:0.ecx, word 6 */ XEN_CPUFEATURE(PREFETCHWT1, 6*32+ 0) /*A PREFETCHWT1 instruction */ XEN_CPUFEATURE(AVX512VBMI, 6*32+ 1) /*A AVX-512 Vector Byte Manipulation Instrs */ +XEN_CPUFEATURE(UMIP, 6*32+ 2) /*S User Mode Instruction Prevention */ XEN_CPUFEATURE(PKU, 6*32+ 3) /*H Protection Keys for Userspace */ XEN_CPUFEATURE(OSPKE, 6*32+ 4) /*! OS Protection Keys Enable */