[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v2 7/8] xen: arm: use superpages in p2m when pages are suitably aligned



On Wed, 11 Jun 2014, Ian Campbell wrote:
> +static bool_t is_mapping_aligned(const paddr_t start_gpaddr,
> +                                 const paddr_t end_gpaddr,
> +                                 const paddr_t maddr,
> +                                 const paddr_t level_size)
> +{
> +    const paddr_t level_mask = level_size - 1;
> +
> +    /* No hardware superpages at level 0 */
> +    if ( level_size == ZEROETH_SIZE )
> +        return false;
> +
> +    /*
> +     * A range smaller than the size of a superpage at this level
> +     * cannot be superpage aligned.
> +     */
> +    if ( ( end_gpaddr - start_gpaddr ) < level_size - 1 )
> +        return false;

Shouldn't this be
       if ( ( end_gpaddr - start_gpaddr ) < level_size )
?

> +    /* Both the gpaddr and maddr must be aligned */
> +    if ( start_gpaddr & level_mask )
> +        return false;
> +    if ( maddr & level_mask )
> +        return false;
> +    return true;
> +}
> +
> +#define P2M_ONE_DESCEND        0
> +#define P2M_ONE_PROGRESS_NOP   0x1
> +#define P2M_ONE_PROGRESS       0x10
> +
> +/*
> + * 0   == (P2M_ONE_DESCEND) continue to descend the tree
> + * +ve == (P2M_ONE_PROGRESS_*) handled at this level, continue, flush,
> + *        entry, addr and maddr updated.  Return value is an
> + *        indication of the amount of work done (for preemption).
> + * -ve == (-Exxx) error.
> + */
> +static int apply_one_level(struct domain *d,
> +                           lpae_t *entry,
> +                           unsigned int level,
> +                           bool_t flush_cache,
> +                           enum p2m_operation op,
> +                           paddr_t start_gpaddr,
> +                           paddr_t end_gpaddr,
> +                           paddr_t *addr,
> +                           paddr_t *maddr,
> +                           bool_t *flush,
> +                           int mattr,
> +                           p2m_type_t t)
> +{
> +    /* Helpers to lookup the properties of each level */
> +    const paddr_t level_sizes[] =
> +        { ZEROETH_SIZE, FIRST_SIZE, SECOND_SIZE, THIRD_SIZE };
> +    const paddr_t level_masks[] =
> +        { ZEROETH_MASK, FIRST_MASK, SECOND_MASK, THIRD_MASK };
> +    const paddr_t level_shifts[] =
> +        { ZEROETH_SHIFT, FIRST_SHIFT, SECOND_SHIFT, THIRD_SHIFT };
> +    const paddr_t level_size = level_sizes[level];
> +    const paddr_t level_mask = level_masks[level];
> +    const paddr_t level_shift = level_shifts[level];
> +
> +    struct p2m_domain *p2m = &d->arch.p2m;
> +    lpae_t pte;
> +    const lpae_t orig_pte = *entry;
> +    int rc;
> +
> +    BUG_ON(level > 3);
> +
> +    switch ( op )
> +    {
> +    case ALLOCATE:
> +        ASSERT(level < 3 || !p2m_valid(orig_pte));
> +        ASSERT(*maddr == 0);
> +
> +        if ( p2m_valid(orig_pte) )
> +            return P2M_ONE_DESCEND;
> +
> +        if ( is_mapping_aligned(*addr, end_gpaddr, 0, level_size) )
> +        {
> +            struct page_info *page;
> +
> +            page = alloc_domheap_pages(d, level_shift - PAGE_SHIFT, 0);
> +            if ( page )
> +            {
> +                pte = mfn_to_p2m_entry(page_to_mfn(page), mattr, t);
> +                if ( level != 3 )
> +                    pte.p2m.table = 0;
> +                p2m_write_pte(entry, pte, flush_cache);
> +                p2m->stats.mappings[level]++;
> +                return P2M_ONE_PROGRESS;
> +            }
> +            else if ( level == 3 )
> +                return -ENOMEM;
> +        }
> +
> +        BUG_ON(level == 3); /* L3 is always superpage aligned */
> +
> +        /*
> +         * If we get here then we failed to allocate a sufficiently
> +         * large contiguous region for this level (which can't be
> +         * L3). Create a page table and continue to descend so we try
> +         * smaller allocations.
> +         */
> +        rc = p2m_create_table(d, entry, 0, flush_cache);
> +        if ( rc < 0 )
> +            return rc;
> +
> +        return P2M_ONE_DESCEND;
> +
> +    case INSERT:

Shouldn't you add a

        if ( p2m_valid(orig_pte) )
            return P2M_ONE_DESCEND;

test here too?


> +        if ( is_mapping_aligned(*addr, end_gpaddr, *maddr, level_size) &&
> +           /* We do not handle replacing an existing table with a superpage 
> */
> +             (level == 3 || !p2m_table(orig_pte)) )

Why the difference with ALLOCATE?


> +        {
> +            /* New mapping is superpage aligned, make it */
> +            pte = mfn_to_p2m_entry(*maddr >> PAGE_SHIFT, mattr, t);
> +            if ( level < 3 )
> +                pte.p2m.table = 0; /* Superpage entry */
> +
> +            p2m_write_pte(entry, pte, flush_cache);
> +
> +            *flush |= p2m_valid(orig_pte);
> +
> +            *addr += level_size;
> +            *maddr += level_size;
> +
> +            if ( p2m_valid(orig_pte) )
> +            {
> +                /*
> +                 * We can't currently get here for an existing table
> +                 * mapping, since we don't handle replacing an
> +                 * existing table with a superpage. If we did we would
> +                 * need to handle freeing (and accounting) for the bit
> +                 * of the p2m tree which we would be about to lop off.
> +                 */
> +                BUG_ON(level < 3 && p2m_table(orig_pte));
> +                if ( level == 3 )
> +                    p2m_put_l3_page(orig_pte);
> +            }
> +            else /* New mapping */
> +                p2m->stats.mappings[level]++;
> +
> +            return P2M_ONE_PROGRESS;
> +        }
> +        else
> +        {
> +            /* New mapping is not superpage aligned, create a new table 
> entry */
> +            BUG_ON(level == 3); /* L3 is always superpage aligned */
> +
> +            /* Not present -> create table entry and descend */
> +            if ( !p2m_valid(orig_pte) )
> +            {
> +                rc = p2m_create_table(d, entry, 0, flush_cache);
> +                if ( rc < 0 )
> +                    return rc;
> +                return P2M_ONE_DESCEND;
> +            }
> +
> +            /* Existing superpage mapping -> shatter and descend */
> +            if ( p2m_entry(orig_pte) )
> +            {
> +                *flush = true;
> +                rc = p2m_create_table(d, entry,
> +                                      level_shift - PAGE_SHIFT, flush_cache);
> +                if ( rc < 0 )
> +                    return rc;
> +
> +                p2m->stats.shattered[level]++;
> +                p2m->stats.mappings[level]--;
> +                p2m->stats.mappings[level+1] += LPAE_ENTRIES;
> +            } /* else: an existing table mapping -> descend */
> +
> +            BUG_ON(!entry->p2m.table);
> +
> +            return P2M_ONE_DESCEND;
> +        }
> +
> +        break;
> +
> +    case RELINQUISH:
> +    case REMOVE:
> +        if ( !p2m_valid(orig_pte) )
> +        {
> +            /* Progress up to next boundary */
> +            *addr = (*addr + level_size) & level_mask;
> +            return P2M_ONE_PROGRESS_NOP;
> +        }

You might as well have a single !p2m_valid check before the switch


> +        if ( level < 3 && p2m_table(orig_pte) )
> +            return P2M_ONE_DESCEND;
> +
> +        *flush = true;
> +
> +        memset(&pte, 0x00, sizeof(pte));
> +        p2m_write_pte(entry, pte, flush_cache);
> +
> +        *addr += level_size;
> +
> +        p2m->stats.mappings[level]--;
> +
> +        if ( level == 3 )
> +            p2m_put_l3_page(orig_pte);
> +
> +        /*
> +         * This is still a single pte write, no matter the level, so no need 
> to
> +         * scale.
> +         */
> +        return P2M_ONE_PROGRESS;
> +
> +    case CACHEFLUSH:
> +        if ( !p2m_valid(orig_pte) )
> +        {
> +            *addr = (*addr + level_size) & level_mask;
> +            return P2M_ONE_PROGRESS_NOP;
> +        }
> +
> +        /*
> +         * could flush up to the next boundary, but would need to be
> +         * careful about preemption, so just do one page now and loop.
> +         */
> +        *addr += PAGE_SIZE;
> +        if ( p2m_is_ram(orig_pte.p2m.type) )
> +        {
> +            flush_page_to_ram(orig_pte.p2m.base + third_table_offset(*addr));
> +            return P2M_ONE_PROGRESS;
> +        }
> +        else
> +            return P2M_ONE_PROGRESS_NOP;
> +    }
> +
> +    BUG(); /* Should never get here */
> +}
> +
>  static int apply_p2m_changes(struct domain *d,
>                       enum p2m_operation op,
>                       paddr_t start_gpaddr,
> @@ -346,9 +650,7 @@ static int apply_p2m_changes(struct domain *d,
>                    cur_first_offset = ~0,
>                    cur_second_offset = ~0;
>      unsigned long count = 0;
> -    unsigned int flush = 0;
> -    bool_t populate = (op == INSERT || op == ALLOCATE);
> -    lpae_t pte;
> +    bool_t flush = false;
>      bool_t flush_pt;
>  
>      /* Some IOMMU don't support coherent PT walk. When the p2m is
> @@ -362,6 +664,25 @@ static int apply_p2m_changes(struct domain *d,
>      addr = start_gpaddr;
>      while ( addr < end_gpaddr )
>      {
> +        /*
> +         * Arbitrarily, preempt every 512 operations or 8192 nops.
> +         * 512*P2M_ONE_PROGRESS == 8192*P2M_ONE_PROGRESS_NOP == 0x2000
> +         *
> +         * count is initialised to 0 above, so we are guaranteed to
> +         * always make at least one pass.
> +         */
> +
> +        if ( op == RELINQUISH && count >= 0x2000 )
> +        {
> +            if ( hypercall_preempt_check() )
> +            {
> +                p2m->lowest_mapped_gfn = addr >> PAGE_SHIFT;
> +                rc = -ERESTART;
> +                goto out;
> +            }
> +            count = 0;
> +        }
> +
>          if ( cur_first_page != p2m_first_level_index(addr) )
>          {
>              if ( first ) unmap_domain_page(first);
> @@ -374,22 +695,18 @@ static int apply_p2m_changes(struct domain *d,
>              cur_first_page = p2m_first_level_index(addr);
>          }
>  
> -        if ( !p2m_valid(first[first_table_offset(addr)]) )
> -        {
> -            if ( !populate )
> -            {
> -                addr = (addr + FIRST_SIZE) & FIRST_MASK;
> -                continue;
> -            }
> +        /* We only use a 3 level p2m at the moment, so no level 0,
> +         * current hardware doesn't support super page mappings at
> +         * level 0 anyway */
>  
> -            rc = p2m_create_table(d, &first[first_table_offset(addr)],
> -                                  flush_pt);
> -            if ( rc < 0 )
> -            {
> -                printk("p2m_populate_ram: L1 failed\n");
> -                goto out;
> -            }
> -        }
> +        rc = apply_one_level(d, &first[first_table_offset(addr)],
> +                             1, flush_pt, op,
> +                             start_gpaddr, end_gpaddr,
> +                             &addr, &maddr, &flush,
> +                             mattr, t);
> +        if ( rc < 0 ) goto out;
> +        count += rc;

Adding an rc to a counter looks a bit strange.


> +        if ( rc != P2M_ONE_DESCEND ) continue;
>  
>          BUG_ON(!p2m_valid(first[first_table_offset(addr)]));
>  
> @@ -401,23 +718,16 @@ static int apply_p2m_changes(struct domain *d,
>          }
>          /* else: second already valid */
>  
> -        if ( !p2m_valid(second[second_table_offset(addr)]) )
> -        {
> -            if ( !populate )
> -            {
> -                addr = (addr + SECOND_SIZE) & SECOND_MASK;
> -                continue;
> -            }
> -
> -            rc = p2m_create_table(d, &second[second_table_offset(addr)],
> -                                  flush_pt);
> -            if ( rc < 0 ) {
> -                printk("p2m_populate_ram: L2 failed\n");
> -                goto out;
> -            }
> -        }
> +        rc = apply_one_level(d,&second[second_table_offset(addr)],
> +                             2, flush_pt, op,
> +                             start_gpaddr, end_gpaddr,
> +                             &addr, &maddr, &flush,
> +                             mattr, t);
> +        if ( rc < 0 ) goto out;
> +        count += rc;
> +        if ( rc != P2M_ONE_DESCEND ) continue;
>  
> -        BUG_ON(!second[second_table_offset(addr)].p2m.valid);
> +        BUG_ON(!p2m_valid(second[second_table_offset(addr)]));
>  
>          if ( cur_second_offset != second_table_offset(addr) )
>          {
> @@ -427,84 +737,15 @@ static int apply_p2m_changes(struct domain *d,
>              cur_second_offset = second_table_offset(addr);
>          }
>  
> -        pte = third[third_table_offset(addr)];
> -
> -        flush |= pte.p2m.valid;
> -
> -        switch (op) {
> -            case ALLOCATE:
> -                {
> -                    /* Allocate a new RAM page and attach */
> -                    struct page_info *page;
> -
> -                    ASSERT(!pte.p2m.valid);
> -                    rc = -ENOMEM;
> -                    page = alloc_domheap_page(d, 0);
> -                    if ( page == NULL ) {
> -                        printk("p2m_populate_ram: failed to allocate 
> page\n");
> -                        goto out;
> -                    }
> -
> -                    pte = mfn_to_p2m_entry(page_to_mfn(page), mattr, t);
> -
> -                    p2m_write_pte(&third[third_table_offset(addr)],
> -                                  pte, flush_pt);
> -                }
> -                break;
> -            case INSERT:
> -                {
> -                    if ( pte.p2m.valid )
> -                        p2m_put_page(pte);
> -                    pte = mfn_to_p2m_entry(maddr >> PAGE_SHIFT, mattr, t);
> -                    p2m_write_pte(&third[third_table_offset(addr)],
> -                                  pte, flush_pt);
> -                    maddr += PAGE_SIZE;
> -                }
> -                break;
> -            case RELINQUISH:
> -            case REMOVE:
> -                {
> -                    if ( !pte.p2m.valid )
> -                    {
> -                        count++;
> -                        break;
> -                    }
> -
> -                    p2m_put_page(pte);
> -
> -                    count += 0x10;
> -
> -                    memset(&pte, 0x00, sizeof(pte));
> -                    p2m_write_pte(&third[third_table_offset(addr)],
> -                                  pte, flush_pt);
> -                    count++;
> -                }
> -                break;
> -
> -            case CACHEFLUSH:
> -                {
> -                    if ( !pte.p2m.valid || !p2m_is_ram(pte.p2m.type) )
> -                        break;
> -
> -                    flush_page_to_ram(pte.p2m.base);
> -                }
> -                break;
> -        }
> -
> -        /* Preempt every 2MiB (mapped) or 32 MiB (unmapped) - arbitrary */
> -        if ( op == RELINQUISH && count >= 0x2000 )
> -        {
> -            if ( hypercall_preempt_check() )
> -            {
> -                p2m->lowest_mapped_gfn = addr >> PAGE_SHIFT;
> -                rc = -ERESTART;
> -                goto out;
> -            }
> -            count = 0;
> -        }
> -
> -        /* Got the next page */
> -        addr += PAGE_SIZE;
> +        rc = apply_one_level(d, &third[third_table_offset(addr)],
> +                             3, flush_pt, op,
> +                             start_gpaddr, end_gpaddr,
> +                             &addr, &maddr, &flush,
> +                             mattr, t);
> +        if ( rc < 0 ) goto out;
> +        /* L3 had better have done something! We cannot descend any further 
> */
> +        BUG_ON(rc == P2M_ONE_DESCEND);
> +        count += rc;
>      }
>  
>      if ( flush )
> diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
> index 911d32d..821d9ef 100644
> --- a/xen/include/asm-arm/p2m.h
> +++ b/xen/include/asm-arm/p2m.h
> @@ -29,6 +29,14 @@ struct p2m_domain {
>       * resume the search. Apart from during teardown this can only
>       * decrease. */
>      unsigned long lowest_mapped_gfn;
> +
> +    struct {
> +        /* Number of mappings at each p2m tree level */
> +        unsigned long mappings[4];
> +        /* Number of times we have shattered a mapping
> +         * at each p2m tree level. */
> +        unsigned long shattered[4];
> +    } stats;
>  };

Are you introducing stats.mappings just for statistic gathering?
If so, you should write it in the comment.


>  /* List of possible type for each page in the p2m entry.
> @@ -79,6 +87,9 @@ int p2m_alloc_table(struct domain *d);
>  void p2m_save_state(struct vcpu *p);
>  void p2m_restore_state(struct vcpu *n);
>  
> +/* */
> +void p2m_dump_info(struct domain *d);
> +
>  /* Look up the MFN corresponding to a domain's PFN. */
>  paddr_t p2m_lookup(struct domain *d, paddr_t gpfn, p2m_type_t *t);
>  
> -- 
> 1.7.10.4
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.