[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 2/4] libxc: support of linear p2m list for migration of pv-domains



On 11/12/15 15:51, Andrew Cooper wrote:
> On 11/12/15 11:31, Juergen Gross wrote:
>> In order to be able to migrate pv-domains with more than 512 GB of RAM
>> the p2m information can be specified by the guest kernel via a virtual
>> mapped linear p2m list instead of a 3 level tree.
>>
>> Add support for this new p2m format in libxc.
>>
>> Signed-off-by: Juergen Gross <jgross@xxxxxxxx>
>> ---
>>  tools/libxc/xc_sr_save_x86_pv.c | 139 
>> +++++++++++++++++++++++++++++++++++++++-
>>  1 file changed, 136 insertions(+), 3 deletions(-)
>>
>> diff --git a/tools/libxc/xc_sr_save_x86_pv.c 
>> b/tools/libxc/xc_sr_save_x86_pv.c
>> index d7acd37..0237378 100644
>> --- a/tools/libxc/xc_sr_save_x86_pv.c
>> +++ b/tools/libxc/xc_sr_save_x86_pv.c
>> @@ -116,7 +116,7 @@ static int map_p2m_leaves(struct xc_sr_context *ctx, 
>> xen_pfn_t *mfns,
>>   * frames making up the guests p2m table.  Construct a list of pfns making 
>> up
>>   * the table.
>>   */
>> -static int map_p2m(struct xc_sr_context *ctx)
>> +static int map_p2m_tree(struct xc_sr_context *ctx)
>>  {
>>      /* Terminology:
>>       *
>> @@ -138,8 +138,6 @@ static int map_p2m(struct xc_sr_context *ctx)
>>      void *guest_fl = NULL;
>>      size_t local_fl_size;
>>  
>> -    ctx->x86_pv.max_pfn = GET_FIELD(ctx->x86_pv.shinfo, arch.max_pfn,
>> -                                    ctx->x86_pv.width) - 1;
>>      fpp = PAGE_SIZE / ctx->x86_pv.width;
>>      fll_entries = (ctx->x86_pv.max_pfn / (fpp * fpp)) + 1;
>>      if ( fll_entries > fpp )
>> @@ -270,6 +268,141 @@ err:
>>  }
>>  
>>  /*
>> + * Map the guest p2m frames specified via a cr3 value, a virtual address, 
>> and
>> + * the maximum pfn.
> 
> Probably worth stating that this function assumes PAE paging is in use.

Okay. I don't mind.

> 
>> + */
>> +static int map_p2m_list(struct xc_sr_context *ctx, uint64_t p2m_cr3)
>> +{
>> +    xc_interface *xch = ctx->xch;
>> +    xen_vaddr_t p2m_vaddr, p2m_end, mask, off;
>> +    xen_pfn_t p2m_mfn, mfn, saved_mfn, max_pfn;
>> +    uint64_t *ptes;
>> +    xen_pfn_t *mfns;
>> +    unsigned fpp, n_pages, level, shift, idx_start, idx_end, idx, saved_idx;
>> +    int rc = -1;
>> +
>> +    p2m_mfn = cr3_to_mfn(ctx, p2m_cr3);
>> +    if ( p2m_mfn == 0 || p2m_mfn > ctx->x86_pv.max_mfn )
> 
> mfn 0 isn't invalid to use here.  It could, in principle, be available
> for PV guest use.

No, the value 0 indicates that the linear p2m info isn't valid. See
comments in xen/include/public/arch-x86/xen.h

> 
>> +    {
>> +        ERROR("Bad p2m_cr3 value %#lx", p2m_cr3);
>> +        errno = ERANGE;
>> +        return -1;
>> +    }
>> +
>> +    p2m_vaddr = GET_FIELD(ctx->x86_pv.shinfo, arch.p2m_vaddr,
>> +                          ctx->x86_pv.width);
>> +    fpp = PAGE_SIZE / ctx->x86_pv.width;
>> +    ctx->x86_pv.p2m_frames = (ctx->x86_pv.max_pfn + fpp) / fpp;
> 
> ctx->x86_pv.max_pfn / fpp + 1
> 
> It is mathematically identically, but resilient to overflow.

Okay.

> 
>> +    p2m_end = p2m_vaddr + ctx->x86_pv.p2m_frames * PAGE_SIZE - 1;
> 
> You probably want to sanity check both p2m_vaddr and p2m_end for being
> either <4G or canonical, depending on the guest, and out of the Xen
> mappings.

Yes, you are right.

> 
> I believe this allows you drop 'mask' in its entirety.

Hmm, no. I'd still have to mask possible top 16 '1' bits away.

> 
>> +    DPRINTF("p2m list from %#lx to %#lx, root at %#lx", p2m_vaddr, p2m_end,
>> +            p2m_mfn);
>> +    DPRINTF("max_pfn %#lx, p2m_frames %d", ctx->x86_pv.max_pfn,
>> +            ctx->x86_pv.p2m_frames);
>> +
>> +    mask = (ctx->x86_pv.width == 8) ?
>> +           0x0000ffffffffffffULL : 0x00000000ffffffffULL;
>> +
>> +    mfns = malloc(sizeof(*mfns));
>> +    if ( !mfns )
>> +    {
>> +        ERROR("Cannot allocate memory for array of %u mfns", 1);
>> +        goto err;
>> +    }
>> +    mfns[0] = p2m_mfn;
>> +    off = 0;
>> +    saved_mfn = 0;
>> +    idx_start = idx_end = saved_idx = 0;
>> +
>> +    for ( level = ctx->x86_pv.levels; level > 0; level-- )
>> +    {
>> +        n_pages = idx_end - idx_start + 1;
>> +        ptes = xc_map_foreign_pages(xch, ctx->domid, PROT_READ, mfns, 
>> n_pages);
>> +        if ( !ptes )
>> +        {
>> +            PERROR("Failed to map %u page table pages for p2m list", 
>> n_pages);
>> +            goto err;
>> +        }
>> +        free(mfns);
>> +
>> +        shift = level * 9 + 3;
>> +        idx_start = ((p2m_vaddr - off) & mask) >> shift;
>> +        idx_end = ((p2m_end - off) & mask) >> shift;
>> +        idx = idx_end - idx_start + 1;
>> +        mfns = malloc(sizeof(*mfns) * idx);
>> +        if ( !mfns )
>> +        {
>> +            ERROR("Cannot allocate memory for array of %u mfns", idx);
>> +            goto err;
>> +        }
>> +
>> +        for ( idx = idx_start; idx <= idx_end; idx++ )
>> +        {
>> +            mfn = pte_to_frame(ptes[idx]);
>> +            if ( mfn == 0 || mfn > ctx->x86_pv.max_mfn )
>> +            {
>> +                ERROR("Bad mfn %#lx during page table walk for vaddr %#lx 
>> at level %d of p2m list",
>> +                      mfn, off + ((xen_vaddr_t)idx << shift), level);
>> +                errno = ERANGE;
>> +                goto err;
>> +            }
>> +            mfns[idx - idx_start] = mfn;
>> +
>> +            /* Maximum pfn check at level 2. Same reasoning as for p2m 
>> tree. */
>> +            if ( level == 2 )
>> +            {
>> +                if ( mfn != saved_mfn )
>> +                {
>> +                    saved_mfn = mfn;
>> +                    saved_idx = idx - idx_start;
>> +                }
>> +            }
>> +        }
>> +
>> +        if ( level == 2 )
>> +        {
>> +            max_pfn = ((xen_pfn_t)saved_idx << 9) * fpp - 1;
>> +            if ( max_pfn < ctx->x86_pv.max_pfn )
>> +            {
>> +                ctx->x86_pv.max_pfn = max_pfn;
>> +                ctx->x86_pv.p2m_frames = (ctx->x86_pv.max_pfn + fpp) / fpp;
>> +                p2m_end = p2m_vaddr + ctx->x86_pv.p2m_frames * PAGE_SIZE - 
>> 1;
>> +                idx_end = idx_start + saved_idx;
>> +            }
>> +        }
>> +
>> +        munmap(ptes, n_pages * PAGE_SIZE);
>> +        ptes = NULL;
>> +        off = p2m_vaddr & ((mask >> shift) << shift);
>> +    }
>> +
>> +    /* Map the p2m leaves themselves. */
>> +    rc = map_p2m_leaves(ctx, mfns, idx_end - idx_start + 1);
>> +
>> +err:
>> +    free(mfns);
>> +    if ( ptes )
>> +        munmap(ptes, n_pages * PAGE_SIZE);
> 
> Well - I think I have understood what is going on here, and it looks
> plausible.

I hope so. :-)


Juergen


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.