Re: [Xen-devel] [PATCH 4 of 9] Rework locking in the PoD layer

On Thu, Oct 27, 2011 at 1:33 PM, Andres Lagar-Cavilla
<andres@xxxxxxxxxxxxxxxx> wrote:
>  xen/arch/x86/mm/mm-locks.h |    9 ++
>  xen/arch/x86/mm/p2m-pod.c  |  145 
> +++++++++++++++++++++++++++------------------
>  xen/arch/x86/mm/p2m-pt.c   |    3 +
>  xen/arch/x86/mm/p2m.c      |    7 +-
>  xen/include/asm-x86/p2m.h  |   25 ++-----
>  5 files changed, 113 insertions(+), 76 deletions(-)
>
>
> The PoD layer has a fragile locking discipline. It relies on the
> p2m being globally locked, and it also relies on the page alloc
> lock to protect some of its data structures. Replace this all by an
> explicit pod lock: per p2m, order enforced.
>
> Two consequences:
>    - Critical sections in the pod code protected by the page alloc
>      lock are now reduced to modifications of the domain page list.
>    - When the p2m lock becomes fine-grained, there are no
>      assumptions broken in the PoD layer.
>
> Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
>
> diff -r 332775f72a30 -r 981073d78f7f xen/arch/x86/mm/mm-locks.h
> --- a/xen/arch/x86/mm/mm-locks.h
> +++ b/xen/arch/x86/mm/mm-locks.h
> @@ -155,6 +155,15 @@ declare_mm_lock(p2m)
>  #define p2m_unlock(p)         mm_unlock(&(p)->lock)
>  #define p2m_locked_by_me(p)   mm_locked_by_me(&(p)->lock)
>
> +/* PoD lock (per-p2m-table)
> + *
> + * Protects private PoD data structs. */
> +
> +declare_mm_lock(pod)
> +#define pod_lock(p)           mm_lock(pod, &(p)->pod.lock)
> +#define pod_unlock(p)         mm_unlock(&(p)->pod.lock)
> +#define pod_locked_by_me(p)   mm_locked_by_me(&(p)->pod.lock)
> +
>  /* Page alloc lock (per-domain)
>  *
>  * This is an external lock, not represented by an mm_lock_t. However,
> diff -r 332775f72a30 -r 981073d78f7f xen/arch/x86/mm/p2m-pod.c
> --- a/xen/arch/x86/mm/p2m-pod.c
> +++ b/xen/arch/x86/mm/p2m-pod.c
> @@ -63,6 +63,7 @@ static inline void unlock_page_alloc(str
>  * Populate-on-demand functionality
>  */
>
> +/* PoD lock held on entry */
>  static int
>  p2m_pod_cache_add(struct p2m_domain *p2m,
>                   struct page_info *page,
> @@ -114,43 +115,42 @@ p2m_pod_cache_add(struct p2m_domain *p2m
>         unmap_domain_page(b);
>     }
>
> +    /* First, take all pages off the domain list */
>     lock_page_alloc(p2m);
> -
> -    /* First, take all pages off the domain list */
>     for(i=0; i < 1 << order ; i++)
>     {
>         p = page + i;
>         page_list_del(p, &d->page_list);
>     }
>
> -    /* Then add the first one to the appropriate populate-on-demand list */
> -    switch(order)
> -    {
> -    case PAGE_ORDER_2M:
> -        page_list_add_tail(page, &p2m->pod.super); /* lock: page_alloc */
> -        p2m->pod.count += 1 << order;
> -        break;
> -    case PAGE_ORDER_4K:
> -        page_list_add_tail(page, &p2m->pod.single); /* lock: page_alloc */
> -        p2m->pod.count += 1;
> -        break;
> -    default:
> -        BUG();
> -    }
> -
>     /* Ensure that the PoD cache has never been emptied.
>      * This may cause "zombie domains" since the page will never be freed. */
>     BUG_ON( d->arch.relmem != RELMEM_not_started );
>
>     unlock_page_alloc(p2m);
>
> +    /* Then add the first one to the appropriate populate-on-demand list */
> +    switch(order)
> +    {
> +    case PAGE_ORDER_2M:
> +        page_list_add_tail(page, &p2m->pod.super);
> +        p2m->pod.count += 1 << order;
> +        break;
> +    case PAGE_ORDER_4K:
> +        page_list_add_tail(page, &p2m->pod.single);
> +        p2m->pod.count += 1;
> +        break;
> +    default:
> +        BUG();
> +    }
> +
>     return 0;
>  }
>
>  /* Get a page of size order from the populate-on-demand cache.  Will break
>  * down 2-meg pages into singleton pages automatically.  Returns null if
> - * a superpage is requested and no superpages are available.  Must be called
> - * with the d->page_lock held. */
> + * a superpage is requested and no superpages are available. */
> +/* PoD lock held on entry */
>  static struct page_info * p2m_pod_cache_get(struct p2m_domain *p2m,
>                                             unsigned long order)
>  {
> @@ -185,7 +185,7 @@ static struct page_info * p2m_pod_cache_
>     case PAGE_ORDER_2M:
>         BUG_ON( page_list_empty(&p2m->pod.super) );
>         p = page_list_remove_head(&p2m->pod.super);
> -        p2m->pod.count -= 1 << order; /* Lock: page_alloc */
> +        p2m->pod.count -= 1 << order;
>         break;
>     case PAGE_ORDER_4K:
>         BUG_ON( page_list_empty(&p2m->pod.single) );
> @@ -197,16 +197,19 @@ static struct page_info * p2m_pod_cache_
>     }
>
>     /* Put the pages back on the domain page_list */
> +    lock_page_alloc(p2m);
>     for ( i = 0 ; i < (1 << order); i++ )
>     {
>         BUG_ON(page_get_owner(p + i) != p2m->domain);
>         page_list_add_tail(p + i, &p2m->domain->page_list);
>     }
> +    unlock_page_alloc(p2m);
>
>     return p;
>  }
>
>  /* Set the size of the cache, allocating or freeing as necessary. */
> +/* PoD lock held on entry */
>  static int
>  p2m_pod_set_cache_target(struct p2m_domain *p2m, unsigned long pod_target, 
> int preemptible)
>  {
> @@ -259,8 +262,6 @@ p2m_pod_set_cache_target(struct p2m_doma
>
>         /* Grab the lock before checking that pod.super is empty, or the last
>          * entries may disappear before we grab the lock. */
> -        lock_page_alloc(p2m);
> -
>         if ( (p2m->pod.count - pod_target) > SUPERPAGE_PAGES
>              && !page_list_empty(&p2m->pod.super) )
>             order = PAGE_ORDER_2M;
> @@ -271,8 +272,6 @@ p2m_pod_set_cache_target(struct p2m_doma
>
>         ASSERT(page != NULL);
>
> -        unlock_page_alloc(p2m);
> -
>         /* Then free them */
>         for ( i = 0 ; i < (1 << order) ; i++ )
>         {
> @@ -348,7 +347,7 @@ p2m_pod_set_mem_target(struct domain *d,
>     int ret = 0;
>     unsigned long populated;
>
> -    p2m_lock(p2m);
> +    pod_lock(p2m);
>
>     /* P == B: Nothing to do. */
>     if ( p2m->pod.entry_count == 0 )
> @@ -377,7 +376,7 @@ p2m_pod_set_mem_target(struct domain *d,
>     ret = p2m_pod_set_cache_target(p2m, pod_target, 1/*preemptible*/);
>
>  out:
> -    p2m_unlock(p2m);
> +    pod_unlock(p2m);
>
>     return ret;
>  }
> @@ -390,7 +389,7 @@ p2m_pod_empty_cache(struct domain *d)
>
>     /* After this barrier no new PoD activities can happen. */
>     BUG_ON(!d->is_dying);
> -    spin_barrier(&p2m->lock.lock);
> +    spin_barrier(&p2m->pod.lock.lock);
>
>     lock_page_alloc(p2m);
>
> @@ -431,7 +430,8 @@ p2m_pod_offline_or_broken_hit(struct pag
>     if ( !(d = page_get_owner(p)) || !(p2m = p2m_get_hostp2m(d)) )
>         return 0;
>
> -    lock_page_alloc(p2m);
> +    pod_lock(p2m);
> +
>     bmfn = mfn_x(page_to_mfn(p));
>     page_list_for_each_safe(q, tmp, &p2m->pod.super)
>     {
> @@ -462,12 +462,14 @@ p2m_pod_offline_or_broken_hit(struct pag
>         }
>     }
>
> -    unlock_page_alloc(p2m);
> +    pod_unlock(p2m);
>     return 0;
>
>  pod_hit:
> +    lock_page_alloc(p2m);
>     page_list_add_tail(p, &d->arch.relmem_list);
>     unlock_page_alloc(p2m);
> +    pod_unlock(p2m);
>     return 1;
>  }
>
> @@ -486,9 +488,9 @@ p2m_pod_offline_or_broken_replace(struct
>     if ( unlikely(!p) )
>         return;
>
> -    p2m_lock(p2m);
> +    pod_lock(p2m);
>     p2m_pod_cache_add(p2m, p, PAGE_ORDER_4K);
> -    p2m_unlock(p2m);
> +    pod_unlock(p2m);
>     return;
>  }
>
> @@ -512,6 +514,7 @@ p2m_pod_decrease_reservation(struct doma
>     int steal_for_cache = 0;
>     int pod = 0, nonpod = 0, ram = 0;
>
> +    pod_lock(p2m);
>
>     /* If we don't have any outstanding PoD entries, let things take their
>      * course */
> @@ -521,11 +524,10 @@ p2m_pod_decrease_reservation(struct doma
>     /* Figure out if we need to steal some freed memory for our cache */
>     steal_for_cache =  ( p2m->pod.entry_count > p2m->pod.count );
>
> -    p2m_lock(p2m);
>     audit_p2m(p2m, 1);
>
>     if ( unlikely(d->is_dying) )
> -        goto out_unlock;
> +        goto out;
>
>     /* See what's in here. */
>     /* FIXME: Add contiguous; query for PSE entries? */

I don't think this can be quite right.

The point of holding the p2m lock here is so that the p2m entries
don't change between the gfn_to_mfn_query() here and the
set_p2m_entries() below.  The balloon driver racing with other vcpus
populating pages is exactly the kind of race we expect to experience.
And in any case, this change will cause set_p2m_entry() to ASSERT()
because we're not holding the p2m lock.

Or am I missing something?

I haven't yet looked at the rest of the patch series, but it would
definitely be better for people in the future looking back and trying
to figure out why the code is the way that it is if even transitory
changesets don't introduce "temporary" violations of invariants. :-)

> @@ -547,14 +549,14 @@ p2m_pod_decrease_reservation(struct doma
>
>     /* No populate-on-demand?  Don't need to steal anything?  Then we're 
> done!*/
>     if(!pod && !steal_for_cache)
> -        goto out_unlock;
> +        goto out_audit;
>
>     if ( !nonpod )
>     {
>         /* All PoD: Mark the whole region invalid and tell caller
>          * we're done. */
>         set_p2m_entry(p2m, gpfn, _mfn(INVALID_MFN), order, p2m_invalid, 
> p2m->default_access);
> -        p2m->pod.entry_count-=(1<<order); /* Lock: p2m */
> +        p2m->pod.entry_count-=(1<<order);
>         BUG_ON(p2m->pod.entry_count < 0);
>         ret = 1;
>         goto out_entry_check;
> @@ -577,7 +579,7 @@ p2m_pod_decrease_reservation(struct doma
>         if ( t == p2m_populate_on_demand )
>         {
>             set_p2m_entry(p2m, gpfn + i, _mfn(INVALID_MFN), 0, p2m_invalid, 
> p2m->default_access);
> -            p2m->pod.entry_count--; /* Lock: p2m */
> +            p2m->pod.entry_count--;
>             BUG_ON(p2m->pod.entry_count < 0);
>             pod--;
>         }
> @@ -613,11 +615,11 @@ out_entry_check:
>         p2m_pod_set_cache_target(p2m, p2m->pod.entry_count, 0/*can't 
> preempt*/);
>     }
>
> -out_unlock:
> +out_audit:
>     audit_p2m(p2m, 1);
> -    p2m_unlock(p2m);
>
>  out:
> +    pod_unlock(p2m);
>     return ret;
>  }
>
> @@ -630,20 +632,24 @@ void p2m_pod_dump_data(struct domain *d)
>
>
>  /* Search for all-zero superpages to be reclaimed as superpages for the
> - * PoD cache. Must be called w/ p2m lock held, page_alloc lock not held. */
> -static int
> + * PoD cache. Must be called w/ pod lock held, page_alloc lock not held. */
> +static void

For the same reason, this must be called with the p2m lock held: it
calls gfn_to_mfn_query() and then calls set_p2m_entry().  As it
happens, this always *is* called with the p2m lock held at the moment;
but the comment still needs to reflect this.  Similarly in
p2m_pod_zero_check().

>  p2m_pod_zero_check_superpage(struct p2m_domain *p2m, unsigned long gfn)
>  {
>     mfn_t mfn, mfn0 = _mfn(INVALID_MFN);
>     p2m_type_t type, type0 = 0;
>     unsigned long * map = NULL;
> -    int ret=0, reset = 0;
> +    int success = 0, reset = 0;
>     int i, j;
>     int max_ref = 1;
>     struct domain *d = p2m->domain;
>
>     if ( !superpage_aligned(gfn) )
> -        goto out;
> +        return;
> +
> +    /* If we were enforcing ordering against p2m locks, this is a place
> +     * to drop the PoD lock and re-acquire it once we're done mucking with
> +     * the p2m. */
>
>     /* Allow an extra refcount for one shadow pt mapping in shadowed domains 
> */
>     if ( paging_mode_shadow(d) )
> @@ -751,19 +757,24 @@ p2m_pod_zero_check_superpage(struct p2m_
>         __trace_var(TRC_MEM_POD_ZERO_RECLAIM, 0, sizeof(t), &t);
>     }
>
> -    /* Finally!  We've passed all the checks, and can add the mfn superpage
> -     * back on the PoD cache, and account for the new p2m PoD entries */
> -    p2m_pod_cache_add(p2m, mfn_to_page(mfn0), PAGE_ORDER_2M);
> -    p2m->pod.entry_count += SUPERPAGE_PAGES;
> +    success = 1;
> +
>
>  out_reset:
>     if ( reset )
>         set_p2m_entry(p2m, gfn, mfn0, 9, type0, p2m->default_access);
>
>  out:
> -    return ret;
> +    if ( success )
> +    {
> +        /* Finally!  We've passed all the checks, and can add the mfn 
> superpage
> +         * back on the PoD cache, and account for the new p2m PoD entries */
> +        p2m_pod_cache_add(p2m, mfn_to_page(mfn0), PAGE_ORDER_2M);
> +        p2m->pod.entry_count += SUPERPAGE_PAGES;
> +    }
>  }
>
> +/* On entry, PoD lock is held */
>  static void
>  p2m_pod_zero_check(struct p2m_domain *p2m, unsigned long *gfns, int count)
>  {
> @@ -775,6 +786,8 @@ p2m_pod_zero_check(struct p2m_domain *p2
>     int i, j;
>     int max_ref = 1;
>
> +    /* Also the right time to drop pod_lock if enforcing ordering against 
> p2m_lock */
> +
>     /* Allow an extra refcount for one shadow pt mapping in shadowed domains 
> */
>     if ( paging_mode_shadow(d) )
>         max_ref++;
> @@ -841,7 +854,6 @@ p2m_pod_zero_check(struct p2m_domain *p2
>             if( *(map[i]+j) != 0 )
>                 break;
>
> -        unmap_domain_page(map[i]);
>
>         /* See comment in p2m_pod_zero_check_superpage() re gnttab
>          * check timing.  */
> @@ -849,8 +861,15 @@ p2m_pod_zero_check(struct p2m_domain *p2
>         {
>             set_p2m_entry(p2m, gfns[i], mfns[i], PAGE_ORDER_4K,
>                 types[i], p2m->default_access);
> +            unmap_domain_page(map[i]);
> +            map[i] = NULL;
>         }
> -        else
> +    }
> +
> +    /* Finally, add to cache */
> +    for ( i=0; i < count; i++ )
> +    {
> +        if ( map[i] )
>         {
>             if ( tb_init_done )
>             {
> @@ -867,6 +886,8 @@ p2m_pod_zero_check(struct p2m_domain *p2
>                 __trace_var(TRC_MEM_POD_ZERO_RECLAIM, 0, sizeof(t), &t);
>             }
>
> +            unmap_domain_page(map[i]);
> +
>             /* Add to cache, and account for the new p2m PoD entry */
>             p2m_pod_cache_add(p2m, mfn_to_page(mfns[i]), PAGE_ORDER_4K);
>             p2m->pod.entry_count++;
> @@ -876,6 +897,7 @@ p2m_pod_zero_check(struct p2m_domain *p2
>  }
>
>  #define POD_SWEEP_LIMIT 1024
> +/* Only one CPU at a time is guaranteed to enter a sweep */
>  static void
>  p2m_pod_emergency_sweep_super(struct p2m_domain *p2m)
>  {
> @@ -964,7 +986,8 @@ p2m_pod_demand_populate(struct p2m_domai
>
>     ASSERT(p2m_locked_by_me(p2m));
>
> -    /* This check is done with the p2m lock held.  This will make sure that
> +    pod_lock(p2m);
> +    /* This check is done with the pod lock held.  This will make sure that
>      * even if d->is_dying changes under our feet, p2m_pod_empty_cache()
>      * won't start until we're done. */
>     if ( unlikely(d->is_dying) )
> @@ -974,6 +997,7 @@ p2m_pod_demand_populate(struct p2m_domai
>      * 1GB region to 2MB chunks for a retry. */
>     if ( order == PAGE_ORDER_1G )
>     {
> +        pod_unlock(p2m);
>         gfn_aligned = (gfn >> order) << order;
>         /* Note that we are supposed to call set_p2m_entry() 512 times to
>          * split 1GB into 512 2MB pages here. But We only do once here because
> @@ -983,6 +1007,7 @@ p2m_pod_demand_populate(struct p2m_domai
>         set_p2m_entry(p2m, gfn_aligned, _mfn(0), PAGE_ORDER_2M,
>                       p2m_populate_on_demand, p2m->default_access);
>         audit_p2m(p2m, 1);
> +        /* This is because the ept/pt caller locks the p2m recursively */
>         p2m_unlock(p2m);
>         return 0;
>     }
> @@ -996,11 +1021,15 @@ p2m_pod_demand_populate(struct p2m_domai
>
>         /* If we're low, start a sweep */
>         if ( order == PAGE_ORDER_2M && page_list_empty(&p2m->pod.super) )
> +            /* Note that sweeps scan other ranges in the p2m. In an scenario
> +             * in which p2m locks are order-enforced wrt pod lock and p2m
> +             * locks are fine grained, this will result in deadlock */
>             p2m_pod_emergency_sweep_super(p2m);
>
>         if ( page_list_empty(&p2m->pod.single) &&
>              ( ( order == PAGE_ORDER_4K )
>                || (order == PAGE_ORDER_2M && page_list_empty(&p2m->pod.super) 
> ) ) )
> +            /* Same comment regarding deadlock applies */
>             p2m_pod_emergency_sweep(p2m);
>     }
>
> @@ -1008,8 +1037,6 @@ p2m_pod_demand_populate(struct p2m_domai
>     if ( q == p2m_guest && gfn > p2m->pod.max_guest )
>         p2m->pod.max_guest = gfn;
>
> -    lock_page_alloc(p2m);
> -
>     if ( p2m->pod.count == 0 )
>         goto out_of_memory;
>
> @@ -1022,8 +1049,6 @@ p2m_pod_demand_populate(struct p2m_domai
>
>     BUG_ON((mfn_x(mfn) & ((1 << order)-1)) != 0);
>
> -    unlock_page_alloc(p2m);
> -
>     gfn_aligned = (gfn >> order) << order;
>
>     set_p2m_entry(p2m, gfn_aligned, mfn, order, p2m_ram_rw, 
> p2m->default_access);
> @@ -1034,8 +1059,9 @@ p2m_pod_demand_populate(struct p2m_domai
>         paging_mark_dirty(d, mfn_x(mfn) + i);
>     }
>
> -    p2m->pod.entry_count -= (1 << order); /* Lock: p2m */
> +    p2m->pod.entry_count -= (1 << order);
>     BUG_ON(p2m->pod.entry_count < 0);
> +    pod_unlock(p2m);
>
>     if ( tb_init_done )
>     {
> @@ -1054,16 +1080,17 @@ p2m_pod_demand_populate(struct p2m_domai
>
>     return 0;
>  out_of_memory:
> -    unlock_page_alloc(p2m);
> +    pod_unlock(p2m);
>
>     printk("%s: Out of populate-on-demand memory! tot_pages %" PRIu32 " 
> pod_entries %" PRIi32 "\n",
>            __func__, d->tot_pages, p2m->pod.entry_count);
>     domain_crash(d);
>  out_fail:
> +    pod_unlock(p2m);
>     return -1;
>  remap_and_retry:
>     BUG_ON(order != PAGE_ORDER_2M);
> -    unlock_page_alloc(p2m);
> +    pod_unlock(p2m);
>
>     /* Remap this 2-meg region in singleton chunks */
>     gfn_aligned = (gfn>>order)<<order;
> @@ -1133,9 +1160,11 @@ guest_physmap_mark_populate_on_demand(st
>         rc = -EINVAL;
>     else
>     {
> -        p2m->pod.entry_count += 1 << order; /* Lock: p2m */
> +        pod_lock(p2m);
> +        p2m->pod.entry_count += 1 << order;
>         p2m->pod.entry_count -= pod_count;
>         BUG_ON(p2m->pod.entry_count < 0);
> +        pod_unlock(p2m);
>     }
>
>     audit_p2m(p2m, 1);
> diff -r 332775f72a30 -r 981073d78f7f xen/arch/x86/mm/p2m-pt.c
> --- a/xen/arch/x86/mm/p2m-pt.c
> +++ b/xen/arch/x86/mm/p2m-pt.c
> @@ -1001,6 +1001,7 @@ void audit_p2m(struct p2m_domain *p2m, i
>     if ( !paging_mode_translate(d) )
>         return;
>
> +    pod_lock(p2m);
>     //P2M_PRINTK("p2m audit starts\n");
>
>     test_linear = ( (d == current->domain)
> @@ -1247,6 +1248,8 @@ void audit_p2m(struct p2m_domain *p2m, i
>                    pmbad, mpbad);
>         WARN();
>     }
> +
> +    pod_unlock(p2m);
>  }
>  #endif /* P2M_AUDIT */
>
> diff -r 332775f72a30 -r 981073d78f7f xen/arch/x86/mm/p2m.c
> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -72,6 +72,7 @@ boolean_param("hap_2mb", opt_hap_2mb);
>  static void p2m_initialise(struct domain *d, struct p2m_domain *p2m)
>  {
>     mm_lock_init(&p2m->lock);
> +    mm_lock_init(&p2m->pod.lock);
>     INIT_LIST_HEAD(&p2m->np2m_list);
>     INIT_PAGE_LIST_HEAD(&p2m->pages);
>     INIT_PAGE_LIST_HEAD(&p2m->pod.super);
> @@ -506,8 +507,10 @@ guest_physmap_add_entry(struct domain *d
>             rc = -EINVAL;
>         else
>         {
> -            p2m->pod.entry_count -= pod_count; /* Lock: p2m */
> +            pod_lock(p2m);
> +            p2m->pod.entry_count -= pod_count;
>             BUG_ON(p2m->pod.entry_count < 0);
> +            pod_unlock(p2m);
>         }
>     }
>
> @@ -1125,8 +1128,10 @@ p2m_flush_table(struct p2m_domain *p2m)
>     /* "Host" p2m tables can have shared entries &c that need a bit more
>      * care when discarding them */
>     ASSERT(p2m_is_nestedp2m(p2m));
> +    pod_lock(p2m);
>     ASSERT(page_list_empty(&p2m->pod.super));
>     ASSERT(page_list_empty(&p2m->pod.single));
> +    pod_unlock(p2m);
>
>     /* This is no longer a valid nested p2m for any address space */
>     p2m->cr3 = CR3_EADDR;
> diff -r 332775f72a30 -r 981073d78f7f xen/include/asm-x86/p2m.h
> --- a/xen/include/asm-x86/p2m.h
> +++ b/xen/include/asm-x86/p2m.h
> @@ -257,24 +257,13 @@ struct p2m_domain {
>     unsigned long max_mapped_pfn;
>
>     /* Populate-on-demand variables
> -     * NB on locking.  {super,single,count} are
> -     * covered by d->page_alloc_lock, since they're almost always used in
> -     * conjunction with that functionality.  {entry_count} is covered by
> -     * the domain p2m lock, since it's almost always used in conjunction
> -     * with changing the p2m tables.
>      *
> -     * At this point, both locks are held in two places.  In both,
> -     * the order is [p2m,page_alloc]:
> -     * + p2m_pod_decrease_reservation() calls p2m_pod_cache_add(),
> -     *   which grabs page_alloc
> -     * + p2m_pod_demand_populate() grabs both; the p2m lock to avoid
> -     *   double-demand-populating of pages, the page_alloc lock to
> -     *   protect moving stuff from the PoD cache to the domain page list.
> -     *
> -     * We enforce this lock ordering through a construct in mm-locks.h.
> -     * This demands, however, that we store the previous lock-ordering
> -     * level in effect before grabbing the page_alloc lock.
> -     */
> +     * All variables are protected with the pod lock. We cannot rely on
> +     * the p2m lock if it's turned into a fine-grained lock.
> +     * We only use the domain page_alloc lock for additions and
> +     * deletions to the domain's page list. Because we use it nested
> +     * within the PoD lock, we enforce it's ordering (by remembering
> +     * the unlock level). */
>     struct {
>         struct page_list_head super,   /* List of superpages                */
>                          single;       /* Non-super lists                   */
> @@ -283,6 +272,8 @@ struct p2m_domain {
>         unsigned         reclaim_super; /* Last gpfn of a scan */
>         unsigned         reclaim_single; /* Last gpfn of a scan */
>         unsigned         max_guest;    /* gpfn of max guest demand-populate */
> +        mm_lock_t        lock;         /* Locking of private pod structs,   *
> +                                        * not relying on the p2m lock.     
>  */
>         int              page_alloc_unlock_level; /* To enforce lock ordering 
> */
>     } pod;
>  };
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel
>

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
WARNING - OLD ARCHIVES

xen-devel

Re: [Xen-devel] [PATCH 4 of 9] Rework locking in the PoD layer