[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling



>>> On 21.10.13 at 17:55, "Liu, Jinsong" <jinsong.liu@xxxxxxxxx> wrote:
> From 4ff1e2955f67954e60562b29a00adea89e5b93ae Mon Sep 17 00:00:00 2001
> From: Liu Jinsong <jinsong.liu@xxxxxxxxx>
> Date: Thu, 17 Oct 2013 05:49:23 +0800
> Subject: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling
> 
> This patch solves XSA-60 security hole:
> 1. For guest w/o VT-d, and for guest with VT-d but snooped, Xen need
> do nothing, since hardware snoop mechanism has ensured cache coherency.
> 
> 2. For guest with VT-d but non-snooped, cache coherency can not be
> guaranteed by h/w snoop, therefore it need emulate UC type to guest:
> 2.1). if it works w/ Intel EPT, set guest IA32_PAT fields as UC so that
> guest memory type are all UC.
> 2.2). if it works w/ shadow, drop all shadows so that any new ones would
> be created on demand w/ UC.
> 
> This patch also fix a bug of shadow cr0.cd setting. Current shadow has a
> small window between cache flush and TLB invalidation, resulting in possilbe
> cache pollution. This patch pause vcpus so that no vcpus context involved
> into the window. 
> 
> Signed-off-by: Liu Jinsong <jinsong.liu@xxxxxxxxx>

This looks fine to me now, but will need acks/reviews at least from
- Keir (whose blessing of the pausing construct I'd like to have even
  if this didn't involve changing non-x86 files)
- one of the VMX maintainers
- one or both of Tim and Andrew

And of course I'd really appreciate if Oracle could arrange for
testing this, to confirm their performance problem is also gone with
this.

Thanks, Jan

> ---
>  xen/arch/x86/hvm/hvm.c            |   75 ++++++++++++++++++++++--------------
>  xen/arch/x86/hvm/vmx/vmcs.c       |    2 +-
>  xen/arch/x86/hvm/vmx/vmx.c        |   58 +++++++++++++++++++++++++++-
>  xen/common/domain.c               |   10 +++++
>  xen/include/asm-x86/hvm/hvm.h     |    1 +
>  xen/include/asm-x86/hvm/support.h |    2 +
>  xen/include/xen/sched.h           |    1 +
>  7 files changed, 117 insertions(+), 32 deletions(-)
> 
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index 688a943..df021de 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -1701,6 +1701,40 @@ int hvm_mov_from_cr(unsigned int cr, unsigned int gpr)
>      return X86EMUL_UNHANDLEABLE;
>  }
>  
> +void hvm_shadow_handle_cd(struct vcpu *v, unsigned long value)
> +{
> +    if ( value & X86_CR0_CD )
> +    {
> +        /* Entering no fill cache mode. */
> +        spin_lock(&v->domain->arch.hvm_domain.uc_lock);
> +        v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE;
> +
> +        if ( !v->domain->arch.hvm_domain.is_in_uc_mode )
> +        {
> +            domain_pause_nosync(v->domain);
> +
> +            /* Flush physical caches. */
> +            on_each_cpu(local_flush_cache, NULL, 1);
> +            hvm_set_uc_mode(v, 1);
> +
> +            domain_unpause(v->domain);
> +        }
> +        spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
> +    }
> +    else if ( !(value & X86_CR0_CD) &&
> +              (v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
> +    {
> +        /* Exit from no fill cache mode. */
> +        spin_lock(&v->domain->arch.hvm_domain.uc_lock);
> +        v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE;
> +
> +        if ( domain_exit_uc_mode(v) )
> +            hvm_set_uc_mode(v, 0);
> +
> +        spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
> +    }
> +}
> +
>  int hvm_set_cr0(unsigned long value)
>  {
>      struct vcpu *v = current;
> @@ -1784,35 +1818,18 @@ int hvm_set_cr0(unsigned long value)
>          }
>      }
>  
> -    if ( has_arch_mmios(v->domain) )
> -    {
> -        if ( (value & X86_CR0_CD) && !(value & X86_CR0_NW) )
> -        {
> -            /* Entering no fill cache mode. */
> -            spin_lock(&v->domain->arch.hvm_domain.uc_lock);
> -            v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE;
> -
> -            if ( !v->domain->arch.hvm_domain.is_in_uc_mode )
> -            {
> -                /* Flush physical caches. */
> -                on_each_cpu(local_flush_cache, NULL, 1);
> -                hvm_set_uc_mode(v, 1);
> -            }
> -            spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
> -        }
> -        else if ( !(value & (X86_CR0_CD | X86_CR0_NW)) &&
> -                  (v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
> -        {
> -            /* Exit from no fill cache mode. */
> -            spin_lock(&v->domain->arch.hvm_domain.uc_lock);
> -            v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE;
> -
> -            if ( domain_exit_uc_mode(v) )
> -                hvm_set_uc_mode(v, 0);
> -
> -            spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
> -        }
> -    }
> +    /*
> +     * When cr0.cd setting
> +     * 1. For guest w/o VT-d, and for guest with VT-d but snooped, Xen need 
> not
> +     * do anything, since hardware snoop mechanism has ensured cache 
> coherency;
> +     * 2. For guest with VT-d but non-snooped, cache coherency cannot be
> +     * guaranteed by h/w so need emulate UC memory type to guest.
> +     */
> +    if ( ((value ^ old_value) & X86_CR0_CD) &&
> +           has_arch_pdevs(v->domain) &&
> +           iommu_enabled && !iommu_snoop &&
> +           hvm_funcs.handle_cd )
> +        hvm_funcs.handle_cd(v, value);
>  
>      v->arch.hvm_vcpu.guest_cr[0] = value;
>      hvm_update_guest_cr(v, 0);
> diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
> index 6916c6d..7a01b1f 100644
> --- a/xen/arch/x86/hvm/vmx/vmcs.c
> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
> @@ -921,7 +921,7 @@ static int construct_vmcs(struct vcpu *v)
>          vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS, MSR_TYPE_R | 
> MSR_TYPE_W);
>          vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP, MSR_TYPE_R 
> | MSR_TYPE_W);
>          vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP, MSR_TYPE_R 
> | MSR_TYPE_W);
> -        if ( paging_mode_hap(d) )
> +        if ( paging_mode_hap(d) && (!iommu_enabled || iommu_snoop) )
>              vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT, MSR_TYPE_R | 
> MSR_TYPE_W);
>      }
>  
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index 6dedb29..d846a9c 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -642,6 +642,10 @@ static void vmx_ctxt_switch_to(struct vcpu *v)
>              __invept(INVEPT_SINGLE_CONTEXT, ept_get_eptp(ept_data), 0);
>      }
>  
> +    /* For guest cr0.cd setting, do not use potentially polluted cache */
> +    if ( unlikely(v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
> +        wbinvd();
> +
>      vmx_restore_guest_msrs(v);
>      vmx_restore_dr(v);
>  }
> @@ -908,7 +912,8 @@ static unsigned long vmx_get_shadow_gs_base(struct vcpu 
> *v)
>  
>  static int vmx_set_guest_pat(struct vcpu *v, u64 gpat)
>  {
> -    if ( !paging_mode_hap(v->domain) )
> +    if ( !paging_mode_hap(v->domain) ||
> +         unlikely(v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
>          return 0;
>  
>      vmx_vmcs_enter(v);
> @@ -919,7 +924,8 @@ static int vmx_set_guest_pat(struct vcpu *v, u64 gpat)
>  
>  static int vmx_get_guest_pat(struct vcpu *v, u64 *gpat)
>  {
> -    if ( !paging_mode_hap(v->domain) )
> +    if ( !paging_mode_hap(v->domain) ||
> +         unlikely(v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
>          return 0;
>  
>      vmx_vmcs_enter(v);
> @@ -928,6 +934,53 @@ static int vmx_get_guest_pat(struct vcpu *v, u64 *gpat)
>      return 1;
>  }
>  
> +static void vmx_handle_cd(struct vcpu *v, unsigned long value)
> +{
> +    if ( !paging_mode_hap(v->domain) )
> +    {
> +        /*
> +         * For shadow, 'load IA32_PAT' VM-entry control is 0, so it cannot
> +         * set guest memory type as UC via IA32_PAT. Xen drop all shadows
> +         * so that any new ones would be created on demand.
> +         */
> +        hvm_shadow_handle_cd(v, value);
> +    }
> +    else
> +    {
> +        u64 *pat = &v->arch.hvm_vcpu.pat_cr;
> +
> +        if ( value & X86_CR0_CD )
> +        {
> +            /*
> +             * For EPT, set guest IA32_PAT fields as UC so that guest
> +             * memory type are all UC.
> +             */
> +            u64 uc_pat =
> +                ((uint64_t)PAT_TYPE_UNCACHABLE)       |       /* PAT0 */
> +                ((uint64_t)PAT_TYPE_UNCACHABLE << 8)  |       /* PAT1 */
> +                ((uint64_t)PAT_TYPE_UNCACHABLE << 16) |       /* PAT2 */
> +                ((uint64_t)PAT_TYPE_UNCACHABLE << 24) |       /* PAT3 */
> +                ((uint64_t)PAT_TYPE_UNCACHABLE << 32) |       /* PAT4 */
> +                ((uint64_t)PAT_TYPE_UNCACHABLE << 40) |       /* PAT5 */
> +                ((uint64_t)PAT_TYPE_UNCACHABLE << 48) |       /* PAT6 */
> +                ((uint64_t)PAT_TYPE_UNCACHABLE << 56);        /* PAT7 */
> +
> +            vmx_get_guest_pat(v, pat);
> +            vmx_set_guest_pat(v, uc_pat);
> +
> +            wbinvd();               /* flush possibly polluted cache */
> +            hvm_asid_flush_vcpu(v); /* invalidate memory type cached in TLB 
> */
> +            v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE;
> +        }
> +        else
> +        {
> +            v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE;
> +            vmx_set_guest_pat(v, *pat);
> +            hvm_asid_flush_vcpu(v); /* no need to flush cache */
> +        }
> +    }
> +}
> +
>  static void vmx_set_tsc_offset(struct vcpu *v, u64 offset)
>  {
>      vmx_vmcs_enter(v);
> @@ -1550,6 +1603,7 @@ static struct hvm_function_table __initdata 
> vmx_function_table = {
>      .msr_read_intercept   = vmx_msr_read_intercept,
>      .msr_write_intercept  = vmx_msr_write_intercept,
>      .invlpg_intercept     = vmx_invlpg_intercept,
> +    .handle_cd            = vmx_handle_cd,
>      .set_info_guest       = vmx_set_info_guest,
>      .set_rdtsc_exiting    = vmx_set_rdtsc_exiting,
>      .nhvm_vcpu_initialise = nvmx_vcpu_initialise,
> diff --git a/xen/common/domain.c b/xen/common/domain.c
> index 5999779..ce20323 100644
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -825,6 +825,16 @@ void domain_pause(struct domain *d)
>          vcpu_sleep_sync(v);
>  }
>  
> +void domain_pause_nosync(struct domain *d)
> +{
> +    struct vcpu *v;
> +
> +    atomic_inc(&d->pause_count);
> +
> +    for_each_vcpu( d, v )
> +        vcpu_sleep_nosync(v);
> +}
> +
>  void domain_unpause(struct domain *d)
>  {
>      struct vcpu *v;
> diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
> index 8dd2b40..c9afb56 100644
> --- a/xen/include/asm-x86/hvm/hvm.h
> +++ b/xen/include/asm-x86/hvm/hvm.h
> @@ -156,6 +156,7 @@ struct hvm_function_table {
>      int (*msr_read_intercept)(unsigned int msr, uint64_t *msr_content);
>      int (*msr_write_intercept)(unsigned int msr, uint64_t msr_content);
>      void (*invlpg_intercept)(unsigned long vaddr);
> +    void (*handle_cd)(struct vcpu *v, unsigned long value);
>      void (*set_info_guest)(struct vcpu *v);
>      void (*set_rdtsc_exiting)(struct vcpu *v, bool_t);
>  
> diff --git a/xen/include/asm-x86/hvm/support.h 
> b/xen/include/asm-x86/hvm/support.h
> index 7ddc806..52aef1f 100644
> --- a/xen/include/asm-x86/hvm/support.h
> +++ b/xen/include/asm-x86/hvm/support.h
> @@ -130,6 +130,8 @@ void hvm_rdtsc_intercept(struct cpu_user_regs *regs);
>  
>  int __must_check hvm_handle_xsetbv(u32 index, u64 new_bv);
>  
> +void hvm_shadow_handle_cd(struct vcpu *v, unsigned long value);
> +
>  /* These functions all return X86EMUL return codes. */
>  int hvm_set_efer(uint64_t value);
>  int hvm_set_cr0(unsigned long value);
> diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
> index 1765e18..82f522e 100644
> --- a/xen/include/xen/sched.h
> +++ b/xen/include/xen/sched.h
> @@ -700,6 +700,7 @@ void vcpu_unblock(struct vcpu *v);
>  void vcpu_pause(struct vcpu *v);
>  void vcpu_pause_nosync(struct vcpu *v);
>  void domain_pause(struct domain *d);
> +void domain_pause_nosync(struct domain *d);
>  void vcpu_unpause(struct vcpu *v);
>  void domain_unpause(struct domain *d);
>  void domain_pause_by_systemcontroller(struct domain *d);
> -- 
> 1.7.1



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.