[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH RFC] xen/gnttab: Avoid TLB flush if the grant mapped page was not accessed



On 04/07/14 13:54, Malcolm Crossley wrote:
> Implement an x86 specific optimisation to avoid a TLB flush if the grant
> mapped page was not accessed by a CPU whilst it was mapped.
>
> The optimisation works by checking the accessed page bit of the pte to
> determine if the page has been accessed by a cpu. The x86 specification 
> details
> that the processor atomically sets the accessed bit before caching a PTE into
> a TLB.
>
> The grant mapped pte is now setup without the accessed or dirty bit's set and
> so we can determine if any CPU has accessed that page via the grant mapping
> pte.
>
> I believe Xen drops all attempts by guest to clear the accessed or dirty bit's
> on the pte because Xen is the pte owner. This should prevent a malicous guest
> from delibrately circumventing the TLB flush.
>
> Blkback benefits from this optimistation because it does not access the data
> itself from the CPU.
>
> Netback can benefit from the optimistation depending on how much of the header
> is put into the first fragment because the first fragment is grant copied on
> recent netback implementations.
>
> Signed-off-by: Malcolm Crossley <malcolm.crossley@xxxxxxxxxx>

Probably worth a comment regarding the improvements we have observed. 
100% reduction of TLB flushes on the blk path and between 80~90%
reduction on the net path.

>
> diff -r 3e0ee081c616 -r 38f320029371 xen/arch/arm/mm.c
> --- a/xen/arch/arm/mm.c
> +++ b/xen/arch/arm/mm.c
> @@ -1216,7 +1216,7 @@ int create_grant_host_mapping(unsigned l
>  }
>  
>  int replace_grant_host_mapping(unsigned long addr, unsigned long mfn,
> -        unsigned long new_addr, unsigned int flags)
> +        unsigned long new_addr, unsigned int flags, int *page_accessed)

pragmatically, this should be a bool_t rather than an int.

>  {
>      unsigned long gfn = (unsigned long)(addr >> PAGE_SHIFT);
>      struct domain *d = current->domain;
> @@ -1225,6 +1225,7 @@ int replace_grant_host_mapping(unsigned 
>          return GNTST_general_error;
>  
>      guest_physmap_remove_page(d, gfn, mfn, 0);
> +    *page_accessed = 1;
>  
>      return GNTST_okay;
>  }
> diff -r 3e0ee081c616 -r 38f320029371 xen/arch/x86/mm.c
> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -3901,7 +3901,8 @@ static int create_grant_va_mapping(
>  }
>  
>  static int replace_grant_va_mapping(
> -    unsigned long addr, unsigned long frame, l1_pgentry_t nl1e, struct vcpu 
> *v)
> +    unsigned long addr, unsigned long frame, l1_pgentry_t nl1e, struct vcpu 
> *v,
> +    int *page_accessed)
>  {
>      l1_pgentry_t *pl1e, ol1e;
>      unsigned long gl1mfn;
> @@ -3947,12 +3948,13 @@ static int replace_grant_va_mapping(
>      }
>  
>      /* Delete pagetable entry. */
> -    if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0)) )
> +    if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 1)) )
>      {
>          MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
>          rc = GNTST_general_error;
>          goto unlock_and_out;
>      }
> +    *page_accessed = !!(l1e_get_flags(*pl1e) & _PAGE_ACCESSED);
>  
>   unlock_and_out:
>      page_unlock(l1pg);
> @@ -3963,9 +3965,9 @@ static int replace_grant_va_mapping(
>  }
>  
>  static int destroy_grant_va_mapping(
> -    unsigned long addr, unsigned long frame, struct vcpu *v)
> +    unsigned long addr, unsigned long frame, struct vcpu *v, int 
> *page_accessed)
>  {
> -    return replace_grant_va_mapping(addr, frame, l1e_empty(), v);
> +    return replace_grant_va_mapping(addr, frame, l1e_empty(), v, 
> page_accessed);
>  }
>  
>  static int create_grant_p2m_mapping(uint64_t addr, unsigned long frame,
> @@ -4000,7 +4002,7 @@ int create_grant_host_mapping(uint64_t a
>          return create_grant_p2m_mapping(addr, frame, flags, cache_flags);
>  
>      grant_pte_flags =
> -        _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_GNTTAB;
> +        _PAGE_PRESENT | _PAGE_GNTTAB;
>      if ( cpu_has_nx )
>          grant_pte_flags |= _PAGE_NX_BIT;
>  
> @@ -4048,7 +4050,8 @@ static int replace_grant_p2m_mapping(
>  }
>  
>  int replace_grant_host_mapping(
> -    uint64_t addr, unsigned long frame, uint64_t new_addr, unsigned int 
> flags)
> +    uint64_t addr, unsigned long frame, uint64_t new_addr, unsigned int 
> flags,
> +    int *page_accessed)
>  {
>      struct vcpu *curr = current;
>      l1_pgentry_t *pl1e, ol1e;
> @@ -4069,7 +4072,7 @@ int replace_grant_host_mapping(
>      }
>  
>      if ( !new_addr )
> -        return destroy_grant_va_mapping(addr, frame, curr);
> +        return destroy_grant_va_mapping(addr, frame, curr, page_accessed);
>  
>      pl1e = guest_map_l1e(curr, new_addr, &gl1mfn);
>      if ( !pl1e )
> @@ -4117,7 +4120,7 @@ int replace_grant_host_mapping(
>      put_page(l1pg);
>      guest_unmap_l1e(curr, pl1e);
>  
> -    rc = replace_grant_va_mapping(addr, frame, ol1e, curr);
> +    rc = replace_grant_va_mapping(addr, frame, ol1e, curr, page_accessed);
>      if ( rc && !paging_mode_refcounts(curr->domain) )
>          put_page_from_l1e(ol1e, curr->domain);
>  
> diff -r 3e0ee081c616 -r 38f320029371 xen/common/grant_table.c
> --- a/xen/common/grant_table.c
> +++ b/xen/common/grant_table.c
> @@ -73,6 +73,7 @@ struct gnttab_unmap_common {
>  
>      /* Return */
>      int16_t status;
> +    int page_accessed;
>  
>      /* Shared state beteen *_unmap and *_unmap_complete */
>      u16 flags;
> @@ -81,6 +82,12 @@ struct gnttab_unmap_common {
>      struct domain *rd;
>  };
>  
> +#ifndef NDEBUG
> +atomic_t grant_unmap_tlb_flush_avoided;
> +atomic_t grant_unmap_tlb_flush_done;
> +atomic_t grant_unmap_operations;
> +#endif

While these were useful during development, they would not be useful if
the patch were committed to xen.  I suggest they be dropped, and
possibly provided as second "for people interested in the savings they
are getting" patch.

> +
>  /* Number of unmap operations that are done between each tlb flush */
>  #define GNTTAB_UNMAP_BATCH_SIZE 32
>  
> @@ -844,6 +851,7 @@ static void
>      ld = current->domain;
>      lgt = ld->grant_table;
>  
> +    op->page_accessed = 0;
>      op->frame = (unsigned long)(op->dev_bus_addr >> PAGE_SHIFT);
>  
>      if ( unlikely(op->handle >= lgt->maptrack_limit) )
> @@ -924,7 +932,7 @@ static void
>      {
>          if ( (rc = replace_grant_host_mapping(op->host_addr,
>                                                op->frame, op->new_addr, 
> -                                              op->flags)) < 0 )
> +                                              op->flags, 
> &op->page_accessed)) < 0 )
>              goto unmap_out;
>  
>          ASSERT(act->pin & (GNTPIN_hstw_mask | GNTPIN_hstr_mask));
> @@ -1082,20 +1090,25 @@ static long
>  gnttab_unmap_grant_ref(
>      XEN_GUEST_HANDLE_PARAM(gnttab_unmap_grant_ref_t) uop, unsigned int count)
>  {
> -    int i, c, partial_done, done = 0;
> +    int i, c, partial_done, done = 0, grant_map_accessed = 0;
>      struct gnttab_unmap_grant_ref op;
>      struct gnttab_unmap_common common[GNTTAB_UNMAP_BATCH_SIZE];
>  
> +#ifndef NDEBUG
> +    atomic_inc(&grant_unmap_operations);
> +#endif
>      while ( count != 0 )
>      {
>          c = min(count, (unsigned int)GNTTAB_UNMAP_BATCH_SIZE);
>          partial_done = 0;
> +        grant_map_accessed = 0;
>  
>          for ( i = 0; i < c; i++ )
>          {
>              if ( unlikely(__copy_from_guest(&op, uop, 1)) )
>                  goto fault;
>              __gnttab_unmap_grant_ref(&op, &(common[i]));
> +            grant_map_accessed |= common[i].page_accessed;
>              ++partial_done;
>              if ( unlikely(__copy_field_to_guest(uop, &op, status)) )
>                  goto fault;
> @@ -1103,6 +1116,14 @@ gnttab_unmap_grant_ref(
>          }
>  
>          gnttab_flush_tlb(current->domain);
> +        if ( grant_map_accessed ) {

Coding style.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.