[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

RE: [PATCH] x86/HVM: support emulated UMIP


  • To: Jan Beulich <jbeulich@xxxxxxxx>, "xen-devel@xxxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxxx>
  • From: "Tian, Kevin" <kevin.tian@xxxxxxxxx>
  • Date: Wed, 3 Feb 2021 07:36:27 +0000
  • Accept-language: en-US
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=intel.com; dmarc=pass action=none header.from=intel.com; dkim=pass header.d=intel.com; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck; bh=Hr0aN4o1hUCRSLS5hOZTbqv8iiEO3B8wJaVIMAbUu48=; b=LWnCVsqYHbxlfoNfTtiRDGPZF8PZaRtznrANfzu6Qk1nt1Tq+s6nJRyslpv0VO/Xhnh69xYU1vIqTQ0Qd2fX+g1wR6z6VedXSme6jCLQ8PUyuX3PM67BT6mt+wgpBfdep4oGWcv/BlLuH0wcRBJlVA6urhQySJap+0ml5uqiDN5PBjKXA0o/2lKRawXRV84ikG4gRL/daqxKAA7Mg70ngDxh7ApMo3a0ZeDgX+sFIjbYYyStUr78QQ4Wmof5uc9PQD3CwG+gtYLC6l34+t7OINc/bi7BCYUL4DeuFVDWJ/3k7vFXkfQx02Xbus8G4xeNzPRaUhL5hj3dEcShE0ba1Q==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=O4ZeQQAwpictthMg/1lQSyV6UvDyDHFsPkQt+16l8bxYjNpuQb8loztqOFdvwF/CaVPFPcKkEI+yv4LhDYDx5FEiwyBoKwvMM/F/olwtQc6qT5dDQhGMMdxOKK9BgnQO24tl2azAGNCVkovVOjaNq3Be+HC60rxXJSdaE40FgKbo3rilyBQPyhPV7BmKdFfmj081E4y7s0vMmtZLmVZCo0jB0ca3D+z8GdBA8nq7+mqdIgHfBRIcTywJi3N1DhD6Hd5ZUuw4L0NyyFMeTv4Br6BW+SHGEtp0vqqCv++2nJEUWuVRn38QwQrxfgcfdUxj80dL5ybsCYxh3wljTf7ASw==
  • Authentication-results: suse.com; dkim=none (message not signed) header.d=none;suse.com; dmarc=none action=none header.from=intel.com;
  • Cc: "Cooper, Andrew" <andrew.cooper3@xxxxxxxxxx>, Wei Liu <wl@xxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>, "Nakajima, Jun" <jun.nakajima@xxxxxxxxx>
  • Delivery-date: Wed, 03 Feb 2021 07:36:46 +0000
  • Dlp-product: dlpe-windows
  • Dlp-reaction: no-action
  • Dlp-version: 11.5.1.3
  • Ironport-sdr: xNNpGN92qYG4A2qOOwovX4GTmy8CQqObfHitdhckdKKQNonhMYXZo0PuDKXvjwbuVTj4Zjhat5 Re9gqapdnKsA==
  • Ironport-sdr: 6PVknCE7QP7DroU64xvIb5jc3kw0wKVm86s/kKZwb7bRsK0kCkX/zoopZazecmz1BZTTTaOGEB B6pysxPrnGUw==
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>
  • Thread-index: AQHW9jRI88PZs9TFZkae/jqZeEYzrapGEf4g
  • Thread-topic: [PATCH] x86/HVM: support emulated UMIP

> From: Jan Beulich <jbeulich@xxxxxxxx>
> Sent: Friday, January 29, 2021 7:45 PM
> 
> There are three noteworthy drawbacks:
> 1) The intercepts we need to enable here are CPL-independent, i.e. we
>    now have to emulate certain instructions for ring 0.
> 2) On VMX there's no intercept for SMSW, so the emulation isn't really
>    complete there.
> 3) The CR4 write intercept on SVM is lower priority than all exception
>    checks, so we need to intercept #GP.
> Therefore this emulation doesn't get offered to guests by default.
> 
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

Reviewed-by: Kevin Tian <kevin.tian@xxxxxxxxx>

> ---
> v3: Don't offer emulation by default. Re-base.
> v2: Split off the x86 insn emulator part. Re-base. Use hvm_featureset
>     in hvm_cr4_guest_reserved_bits().
> 
> --- a/xen/arch/x86/cpuid.c
> +++ b/xen/arch/x86/cpuid.c
> @@ -453,6 +453,13 @@ static void __init calculate_hvm_max_pol
>      __set_bit(X86_FEATURE_X2APIC, hvm_featureset);
> 
>      /*
> +     * Xen can often provide UMIP emulation to HVM guests even if the host
> +     * doesn't have such functionality.
> +     */
> +    if ( hvm_funcs.set_descriptor_access_exiting )
> +        __set_bit(X86_FEATURE_UMIP, hvm_featureset);
> +
> +    /*
>       * On AMD, PV guests are entirely unable to use SYSENTER as Xen runs in
>       * long mode (and init_amd() has cleared it out of host capabilities), 
> but
>       * HVM guests are able if running in protected mode.
> @@ -504,6 +511,10 @@ static void __init calculate_hvm_def_pol
>      for ( i = 0; i < ARRAY_SIZE(hvm_featureset); ++i )
>          hvm_featureset[i] &= hvm_featuremask[i];
> 
> +    /* Don't offer UMIP emulation by default. */
> +    if ( !cpu_has_umip )
> +        __clear_bit(X86_FEATURE_UMIP, hvm_featureset);
> +
>      guest_common_feature_adjustments(hvm_featureset);
>      guest_common_default_feature_adjustments(hvm_featureset);
> 
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -991,7 +991,8 @@ unsigned long hvm_cr4_guest_valid_bits(c
>                                  X86_CR4_PCE                    |
>              (p->basic.fxsr    ? X86_CR4_OSFXSR            : 0) |
>              (p->basic.sse     ? X86_CR4_OSXMMEXCPT        : 0) |
> -            (p->feat.umip     ? X86_CR4_UMIP              : 0) |
> +            ((p == &host_cpuid_policy ? &hvm_max_cpuid_policy : p)->feat.umip
> +                              ? X86_CR4_UMIP              : 0) |
>              (vmxe             ? X86_CR4_VMXE              : 0) |
>              (p->feat.fsgsbase ? X86_CR4_FSGSBASE          : 0) |
>              (p->basic.pcid    ? X86_CR4_PCIDE             : 0) |
> @@ -3731,6 +3732,13 @@ int hvm_descriptor_access_intercept(uint
>      struct vcpu *curr = current;
>      struct domain *currd = curr->domain;
> 
> +    if ( (is_write || curr->arch.hvm.guest_cr[4] & X86_CR4_UMIP) &&
> +         hvm_get_cpl(curr) )
> +    {
> +        hvm_inject_hw_exception(TRAP_gp_fault, 0);
> +        return X86EMUL_OKAY;
> +    }
> +
>      if ( currd->arch.monitor.descriptor_access_enabled )
>      {
>          ASSERT(curr->arch.vm_event);
> --- a/xen/arch/x86/hvm/svm/svm.c
> +++ b/xen/arch/x86/hvm/svm/svm.c
> @@ -547,6 +547,28 @@ void svm_update_guest_cr(struct vcpu *v,
>              value &= ~(X86_CR4_SMEP | X86_CR4_SMAP);
>          }
> 
> +        if ( v->domain->arch.cpuid->feat.umip && !cpu_has_umip )
> +        {
> +            u32 general1_intercepts = vmcb_get_general1_intercepts(vmcb);
> +
> +            if ( v->arch.hvm.guest_cr[4] & X86_CR4_UMIP )
> +            {
> +                value &= ~X86_CR4_UMIP;
> +                ASSERT(vmcb_get_cr_intercepts(vmcb) &
> CR_INTERCEPT_CR0_READ);
> +                general1_intercepts |= GENERAL1_INTERCEPT_IDTR_READ |
> +                                       GENERAL1_INTERCEPT_GDTR_READ |
> +                                       GENERAL1_INTERCEPT_LDTR_READ |
> +                                       GENERAL1_INTERCEPT_TR_READ;
> +            }
> +            else if ( !v->domain->arch.monitor.descriptor_access_enabled )
> +                general1_intercepts &= ~(GENERAL1_INTERCEPT_IDTR_READ |
> +                                         GENERAL1_INTERCEPT_GDTR_READ |
> +                                         GENERAL1_INTERCEPT_LDTR_READ |
> +                                         GENERAL1_INTERCEPT_TR_READ);
> +
> +            vmcb_set_general1_intercepts(vmcb, general1_intercepts);
> +        }
> +
>          vmcb_set_cr4(vmcb, value);
>          break;
>      default:
> @@ -883,7 +905,14 @@ static void svm_set_descriptor_access_ex
>      if ( enable )
>          general1_intercepts |= mask;
>      else
> +    {
>          general1_intercepts &= ~mask;
> +        if ( (v->arch.hvm.guest_cr[4] & X86_CR4_UMIP) && !cpu_has_umip )
> +            general1_intercepts |= GENERAL1_INTERCEPT_IDTR_READ |
> +                                   GENERAL1_INTERCEPT_GDTR_READ |
> +                                   GENERAL1_INTERCEPT_LDTR_READ |
> +                                   GENERAL1_INTERCEPT_TR_READ;
> +    }
> 
>      vmcb_set_general1_intercepts(vmcb, general1_intercepts);
>  }
> @@ -1781,6 +1810,16 @@ static void svm_vmexit_do_cr_access(
>          __update_guest_eip(regs, vmcb->nextrip - vmcb->rip);
>  }
> 
> +static bool is_cr4_write(const struct x86_emulate_state *state,
> +                         const struct x86_emulate_ctxt *ctxt)
> +{
> +    unsigned int cr;
> +
> +    return ctxt->opcode == X86EMUL_OPC(0x0f, 0x22) &&
> +           x86_insn_modrm(state, NULL, &cr) == 3 &&
> +           cr == 4;
> +}
> +
>  static void svm_dr_access(struct vcpu *v, struct cpu_user_regs *regs)
>  {
>      struct vmcb_struct *vmcb = vcpu_nestedhvm(v).nv_n1vmcx;
> @@ -2728,6 +2767,14 @@ void svm_vmexit_handler(struct cpu_user_
>          svm_fpu_dirty_intercept();
>          break;
> 
> +    case VMEXIT_EXCEPTION_GP:
> +        HVMTRACE_1D(TRAP, TRAP_gp_fault);
> +        /* We only care about ring 0 faults with error code zero. */
> +        if ( vmcb->exitinfo1 || vmcb_get_cpl(vmcb) ||
> +             !hvm_emulate_one_insn(is_cr4_write, "CR4 write") )
> +            hvm_inject_hw_exception(TRAP_gp_fault, vmcb->exitinfo1);
> +        break;
> +
>      case VMEXIT_EXCEPTION_PF:
>      {
>          unsigned long va;
> @@ -2873,7 +2920,16 @@ void svm_vmexit_handler(struct cpu_user_
>              hvm_inject_hw_exception(TRAP_gp_fault, 0);
>          break;
> 
> -    case VMEXIT_CR0_READ ... VMEXIT_CR15_READ:
> +    case VMEXIT_CR0_READ:
> +        if ( (v->arch.hvm.guest_cr[4] & X86_CR4_UMIP) &&
> +             vmcb_get_cpl(vmcb) )
> +        {
> +            ASSERT(!cpu_has_umip);
> +            hvm_inject_hw_exception(TRAP_gp_fault, 0);
> +            break;
> +        }
> +        /* fall through */
> +    case VMEXIT_CR1_READ ... VMEXIT_CR15_READ:
>      case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE:
>          if ( cpu_has_svm_decode && (vmcb->exitinfo1 & (1ULL << 63)) )
>              svm_vmexit_do_cr_access(vmcb, regs);
> --- a/xen/arch/x86/hvm/svm/vmcb.c
> +++ b/xen/arch/x86/hvm/svm/vmcb.c
> @@ -141,6 +141,10 @@ static int construct_vmcb(struct vcpu *v
>          HVM_TRAP_MASK |
>          (v->arch.fully_eager_fpu ? 0 : (1U << TRAP_no_device));
> 
> +    /* For UMIP emulation intercept #GP to catch faulting CR4 writes. */
> +    if ( v->domain->arch.cpuid->feat.umip && !cpu_has_umip )
> +        vmcb->_exception_intercepts |= 1U << TRAP_gp_fault;
> +
>      if ( paging_mode_hap(v->domain) )
>      {
>          vmcb->_np_enable = 1; /* enable nested paging */
> --- a/xen/arch/x86/hvm/vmx/vmcs.c
> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
> @@ -1074,7 +1074,8 @@ static int construct_vmcs(struct vcpu *v
> 
>      /*
>       * Disable features which we don't want active by default:
> -     *  - Descriptor table exiting only if wanted by introspection
> +     *  - Descriptor table exiting only if needed for CR4.UMIP write
> +     *    emulation or wanted by introspection
>       *  - x2APIC - default is xAPIC mode
>       *  - VPID settings chosen at VMEntry time
>       *  - VMCS Shadowing only when in nested VMX mode
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -1264,7 +1264,7 @@ static void vmx_set_descriptor_access_ex
>      if ( enable )
>          v->arch.hvm.vmx.secondary_exec_control |=
>              SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING;
> -    else
> +    else if ( !(v->arch.hvm.guest_cr[4] & X86_CR4_UMIP) || cpu_has_umip )
>          v->arch.hvm.vmx.secondary_exec_control &=
>              ~SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING;
> 
> @@ -1514,6 +1514,21 @@ static void vmx_update_guest_cr(struct v
>              v->arch.hvm.hw_cr[4] &= ~(X86_CR4_SMEP | X86_CR4_SMAP);
>          }
> 
> +        if ( v->domain->arch.cpuid->feat.umip && !cpu_has_umip )
> +        {
> +            if ( (v->arch.hvm.guest_cr[4] & X86_CR4_UMIP) )
> +            {
> +                ASSERT(cpu_has_vmx_dt_exiting);
> +                v->arch.hvm.hw_cr[4] &= ~X86_CR4_UMIP;
> +                v->arch.hvm.vmx.secondary_exec_control |=
> +                    SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING;
> +            }
> +            else if ( !v->domain->arch.monitor.descriptor_access_enabled )
> +                v->arch.hvm.vmx.secondary_exec_control &=
> +                    ~SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING;
> +            vmx_update_secondary_exec_control(v);
> +        }
> +
>          __vmwrite(GUEST_CR4, v->arch.hvm.hw_cr[4]);
> 
>          /*
> @@ -1537,6 +1552,7 @@ static void vmx_update_guest_cr(struct v
>                                               (X86_CR4_PSE | X86_CR4_SMEP |
>                                                X86_CR4_SMAP)
>                                               : 0;
> +            v->arch.hvm.vmx.cr4_host_mask |= cpu_has_umip ? 0 :
> X86_CR4_UMIP;
>              if ( v->domain->arch.monitor.write_ctrlreg_enabled &
>                   monitor_ctrlreg_bitmask(VM_EVENT_X86_CR4) )
>                  v->arch.hvm.vmx.cr4_host_mask |=
> --- a/xen/include/asm-x86/cpufeature.h
> +++ b/xen/include/asm-x86/cpufeature.h
> @@ -110,6 +110,7 @@
> 
>  /* CPUID level 0x00000007:0.ecx */
>  #define cpu_has_avx512_vbmi
> boot_cpu_has(X86_FEATURE_AVX512_VBMI)
> +#define cpu_has_umip            boot_cpu_has(X86_FEATURE_UMIP)
>  #define cpu_has_avx512_vbmi2
> boot_cpu_has(X86_FEATURE_AVX512_VBMI2)
>  #define cpu_has_gfni            boot_cpu_has(X86_FEATURE_GFNI)
>  #define cpu_has_vaes            boot_cpu_has(X86_FEATURE_VAES)

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.