[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 3/4 RFC] x86/p2m: use large pages for MMIO mappings



On 15/09/15 08:34, Jan Beulich wrote:
> When mapping large BARs (e.g. the frame buffer of a graphics card) the
> overhead or establishing such mappings using onle 4k pages has,
> particularly after the XSA-125 fix, become unacceptable. Alter the
> XEN_DOMCTL_memory_mapping semantics once again, so that there's no
> longer a fixed amount of guest frames that represents the upper limit
> of what a single invocation can map. Instead bound execution time by
> limiting the number of iterations (regardless of page size).
>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
> ---
> RFC reasons:
> - ARM side unimplemented (and hence libxc for now made cope with both
>   models), the main issue (besides my inability to test any change
>   there) being the many internal uses of map_mmio_regions())
> - error unmapping in map_mmio_regions() and error propagation to caller
>   from unmap_mmio_regions() are not satisfactory (for the latter a
>   possible model might be to have the function - and hence the domctl -
>   return the [non-zero] number of completed entries upon error,
>   requiring the caller to re-invoke the hypercall to then obtain the
>   actual error for the failed slot)

Doesn't this mean the caller must always make two hypercalls to confirm
success?

> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -897,39 +897,47 @@ void p2m_change_type_range(struct domain
>  
>  /* Returns: 0 for success, -errno for failure */
>  static int set_typed_p2m_entry(struct domain *d, unsigned long gfn, mfn_t 
> mfn,
> -                               p2m_type_t gfn_p2mt, p2m_access_t access)
> +                               unsigned int order, p2m_type_t gfn_p2mt,
> +                               p2m_access_t access)
>  {
>      int rc = 0;
>      p2m_access_t a;
>      p2m_type_t ot;
>      mfn_t omfn;
> +    unsigned int cur_order = 0;
>      struct p2m_domain *p2m = p2m_get_hostp2m(d);
>  
>      if ( !paging_mode_translate(d) )
>          return -EIO;
>  
> -    gfn_lock(p2m, gfn, 0);
> -    omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, NULL, NULL);
> +    gfn_lock(p2m, gfn, order);
> +    omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, &cur_order, NULL);
> +    if ( cur_order < order )
> +    {
> +        gfn_unlock(p2m, gfn, order);
> +        return cur_order + 1;

This appears to change the error semantics, therefore warrents an update
to the function comment.

> +    }
>      if ( p2m_is_grant(ot) || p2m_is_foreign(ot) )
>      {
> -        gfn_unlock(p2m, gfn, 0);
> +        gfn_unlock(p2m, gfn, order);
>          domain_crash(d);
>          return -ENOENT;
>      }
>      else if ( p2m_is_ram(ot) )
>      {
> +        unsigned long i;
> +
>          ASSERT(mfn_valid(omfn));
> -        set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
> +        for ( i = 0; i < (1UL << order); ++i )
> +            set_gpfn_from_mfn(mfn_x(omfn) + i, INVALID_M2P_ENTRY);
>      }
>  
>      P2M_DEBUG("set %d %lx %lx\n", gfn_p2mt, gfn, mfn_x(mfn));
> -    rc = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, gfn_p2mt,
> -                       access);
> -    gfn_unlock(p2m, gfn, 0);
> +    rc = p2m_set_entry(p2m, gfn, mfn, order, gfn_p2mt, access);
> +    gfn_unlock(p2m, gfn, order);
>      if ( rc )
> -        gdprintk(XENLOG_ERR,
> -                 "p2m_set_entry failed! mfn=%08lx rc:%d\n",
> -                 mfn_x(get_gfn_query_unlocked(p2m->domain, gfn, &ot)), rc);
> +        gdprintk(XENLOG_ERR, "p2m_set_entry: %#lx:%u -> %d (%#lx)\n",

PRI_mfn

> @@ -2037,6 +2052,25 @@ unsigned long paging_gva_to_gfn(struct v
>      return hostmode->gva_to_gfn(v, hostp2m, va, pfec);- 
> iommu_{,un}map_page() interfaces don't support "order" (hence
>   mmio_order() for now returns zero when !iommu_hap_pt_share, which in
>   particular means the AMD side isn't being take care of just yet)
>
> --- a/tools/libxc/xc_domain.c
> +++ b/tools/libxc/xc_domain.c
> @@ -2215,7 +2215,7 @@ int xc_domain_memory_mapping(
>  {
>      DECLARE_DOMCTL;
>      xc_dominfo_t info;
> -    int ret = 0, err;
> +    int ret = 0, rc;
>      unsigned long done = 0, nr, max_batch_sz;
>  
>      if ( xc_domain_getinfo(xch, domid, 1, &info) != 1 ||
> @@ -2240,19 +2240,24 @@ int xc_domain_memory_mapping(
>          domctl.u.memory_mapping.nr_mfns = nr;
>          domctl.u.memory_mapping.first_gfn = first_gfn + done;
>          domctl.u.memory_mapping.first_mfn = first_mfn + done;
> -        err = do_domctl(xch, &domctl);
> -        if ( err && errno == E2BIG )
> +        rc = do_domctl(xch, &domctl);
> +        if ( rc < 0 && errno == E2BIG )
>          {
>              if ( max_batch_sz <= 1 )
>                  break;
>              max_batch_sz >>= 1;
>              continue;
>          }
> +        if ( rc > 0 )
> +        {
> +            done += rc;
> +            continue;
> +        }
>          /* Save the first error... */
>          if ( !ret )
> -            ret = err;
> +            ret = rc;
>          /* .. and ignore the rest of them when removing. */
> -        if ( err && add_mapping != DPCI_REMOVE_MAPPING )
> +        if ( rc && add_mapping != DPCI_REMOVE_MAPPING )
>              break;
>  
>          done += nr;
> --- a/xen/arch/x86/domain_build.c
> +++ b/xen/arch/x86/domain_build.c
> @@ -436,7 +436,7 @@ static __init void pvh_add_mem_mapping(s
>          else
>              a = p2m_access_rwx;
>  
> -        if ( (rc = set_mmio_p2m_entry(d, gfn + i, _mfn(mfn + i), a)) )
> +        if ( (rc = set_mmio_p2m_entry(d, gfn + i, _mfn(mfn + i), 0, a)) )
>              panic("pvh_add_mem_mapping: gfn:%lx mfn:%lx i:%ld rc:%d\n",
>                    gfn, mfn, i, rc);
>          if ( !(i & 0xfffff) )
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -2396,7 +2396,8 @@ static int vmx_alloc_vlapic_mapping(stru
>      share_xen_page_with_guest(virt_to_page(apic_va), d, XENSHARE_writable);
>      d->arch.hvm_domain.vmx.apic_access_mfn = virt_to_mfn(apic_va);
>      set_mmio_p2m_entry(d, paddr_to_pfn(APIC_DEFAULT_PHYS_BASE),
> -        _mfn(virt_to_mfn(apic_va)), p2m_get_hostp2m(d)->default_access);
> +                       _mfn(virt_to_mfn(apic_va)), 0,
> +                       p2m_get_hostp2m(d)->default_access);
>  
>      return 0;
>  }
>
>  }
>  
> +static unsigned int mmio_order(const struct domain *d,
> +                               unsigned long start_fn, unsigned long nr)

Do you mean "start_gfn" ?

> +{
> +    if ( !hap_enabled(d) || !need_iommu(d) || !iommu_hap_pt_share ||
> +         (start_fn & ((1UL << PAGE_ORDER_2M) - 1)) || !(nr >> PAGE_ORDER_2M) 
> )
> +        return 0;
> +
> +    if ( !(start_fn & ((1UL << PAGE_ORDER_1G) - 1)) && (nr >> PAGE_ORDER_1G) 
> &&
> +         opt_hap_1gb && hvm_hap_has_1gb(d) )

opt_hap_1gb should be made to be redundant with hvm_hap_has_1gb() to
avoid all the double checks.  The only place where it is interesting for
them being different is in hvm_enable().

I will throw together a patch.

> +        return PAGE_ORDER_1G;
> +
> +    if ( opt_hap_2mb && hvm_hap_has_2mb(d) )
> +        return PAGE_ORDER_2M;
> +
> +    return 0;
> +}
> +
> +#define MAP_MMIO_MAX_ITER 64 /* pretty arbitrary */
> +
>  int map_mmio_regions(struct domain *d,
>                       unsigned long start_gfn,
>                       unsigned long nr,
> @@ -2044,22 +2078,45 @@ int map_mmio_regions(struct domain *d,
>  {
>      int ret = 0;
>      unsigned long i;
> +    unsigned int iter, order;
>  
>      if ( !paging_mode_translate(d) )
>          return 0;
>  
> -    for ( i = 0; !ret && i < nr; i++ )
> +    for ( iter = i = 0; i < nr && iter < MAP_MMIO_MAX_ITER;
> +          i += 1UL << order, ++iter )
>      {
> -        ret = set_mmio_p2m_entry(d, start_gfn + i, _mfn(mfn + i),
> -                                 p2m_get_hostp2m(d)->default_access);
> -        if ( ret )
> +        for ( order = mmio_order(d, (start_gfn + i) | (mfn + i), nr - i); ;
> +              order = ret - 1 )

It is hard to reason as to whether this loop will terminate.  All it
would take is a bug in set_mmio_p2m_entry() which causes it to
unilaterally return 1 and this loop would never terminate.

Is there any other condition which can be used as a safety check?

~Andrew


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.