[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v4 10/14] xen/x86: populate PVHv2 Dom0 physical memory map



>>> On 30.11.16 at 17:49, <roger.pau@xxxxxxxxxx> wrote:
> @@ -302,7 +307,8 @@ static unsigned long __init compute_dom0_nr_pages(
>              avail -= max_pdx >> s;
>      }
>  
> -    need_paging = opt_dom0_shadow || (is_pvh_domain(d) && 
> !iommu_hap_pt_share);
> +    need_paging = opt_dom0_shadow || (has_hvm_container_domain(d) &&
> +                  (!iommu_hap_pt_share || !paging_mode_hap(d)));

Indentation.

> @@ -545,11 +552,12 @@ static __init void pvh_map_all_iomem(struct domain *d, 
> unsigned long nr_pages)
>      ASSERT(nr_holes == 0);
>  }
>  
> -static __init void pvh_setup_e820(struct domain *d, unsigned long nr_pages)
> +static __init void hvm_setup_e820(struct domain *d, unsigned long nr_pages)

Why?

> @@ -577,8 +585,19 @@ static __init void pvh_setup_e820(struct domain *d, 
> unsigned long nr_pages)
>              continue;
>          }
>  
> -        *entry_guest = *entry;
> -        pages = PFN_UP(entry_guest->size);
> +        /*
> +         * Make sure the start and length are aligned to PAGE_SIZE, because
> +         * that's the minimum granularity of the 2nd stage translation.
> +         */
> +        start = ROUNDUP(entry->addr, PAGE_SIZE);
> +        end = (entry->addr + entry->size) & PAGE_MASK;

Taking the comment into consideration, I wonder whether you
wouldn't better use PAGE_ORDER_4K here, as that's what the
p2m code uses.

> @@ -1010,8 +1029,6 @@ static int __init construct_dom0_pv(
>      BUG_ON(d->vcpu[0] == NULL);
>      BUG_ON(v->is_initialised);
>  
> -    process_pending_softirqs();

Wouldn't this adjustment better fit into the previous patch, together
with its companion below?

> +static int __init hvm_steal_ram(struct domain *d, unsigned long size,
> +                                paddr_t limit, paddr_t *addr)
> +{
> +    unsigned int i = d->arch.nr_e820;
> +
> +    while ( i-- )
> +    {
> +        struct e820entry *entry = &d->arch.e820[i];
> +
> +        if ( entry->type != E820_RAM || entry->size < size )
> +            continue;
> +
> +        /* Subtract from the beginning. */
> +        if ( entry->addr + size < limit && entry->addr >= MB(1) )

<= I think (for the left comparison)?

> +static void __init hvm_steal_low_ram(struct domain *d, unsigned long start,
> +                                     unsigned long nr_pages)
> +{
> +    unsigned long mfn;
> +
> +    ASSERT(start + nr_pages < PFN_DOWN(MB(1)));

<= again I think.

> +static int __init hvm_setup_p2m(struct domain *d)
> +{
> +    struct vcpu *v = d->vcpu[0];
> +    unsigned long nr_pages;
> +    unsigned int i;
> +    int rc;
> +    bool preempted;
> +#define MB1_PAGES PFN_DOWN(MB(1))
> +
> +    nr_pages = compute_dom0_nr_pages(d, NULL, 0);
> +
> +    hvm_setup_e820(d, nr_pages);
> +    do {
> +        preempted = false;
> +        paging_set_allocation(d, dom0_paging_pages(d, nr_pages),
> +                              &preempted);
> +        process_pending_softirqs();
> +    } while ( preempted );
> +
> +    /*
> +     * Memory below 1MB is identity mapped.
> +     * NB: this only makes sense when booted from legacy BIOS.
> +     */
> +    rc = modify_identity_mmio(d, 0, PFN_DOWN(MB(1)), true);
> +    if ( rc )
> +    {
> +        printk("Failed to identity map low 1MB: %d\n", rc);
> +        return rc;
> +    }
> +
> +    /* Populate memory map. */
> +    for ( i = 0; i < d->arch.nr_e820; i++ )
> +    {
> +        unsigned long addr, size;
> +
> +        if ( d->arch.e820[i].type != E820_RAM )
> +            continue;
> +
> +        addr = PFN_DOWN(d->arch.e820[i].addr);
> +        size = PFN_DOWN(d->arch.e820[i].size);
> +
> +        if ( addr >= MB1_PAGES )
> +            rc = hvm_populate_memory_range(d, addr, size);
> +        else if ( addr + size > MB1_PAGES )
> +        {
> +            hvm_steal_low_ram(d, addr, MB1_PAGES - addr);
> +            rc = hvm_populate_memory_range(d, MB1_PAGES,
> +                                           size - (MB1_PAGES - addr));

Is this case possible at all? All x86 systems have some form of
BIOS right below the 1Mb boundary, and the E820 map for
Dom0 is being derived from the host one.

> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -475,6 +475,43 @@ void share_xen_page_with_guest(
>      spin_unlock(&d->page_alloc_lock);
>  }
>  
> +int unshare_xen_page_with_guest(struct page_info *page, struct domain *d)

__init

And once its __init, it may be possible to simplify it, as you don't need
to fear races anymore. E.g. you wouldn't need a loop over cmpxchg().

> +{
> +    unsigned long y, x;
> +    bool drop_dom_ref;
> +
> +    if ( page_get_owner(page) != d || !(page->count_info & PGC_xen_heap) )

Please don't open code is_xen_heap_page().

> +        return -EINVAL;
> +
> +    spin_lock(&d->page_alloc_lock);
> +
> +    /* Drop the page reference so we can chanfge the owner. */
> +    y = page->count_info;
> +    do {
> +        x = y;
> +        if ( (x & (PGC_count_mask|PGC_allocated)) != (1 | PGC_allocated) )
> +        {
> +            spin_unlock(&d->page_alloc_lock);
> +            return -EINVAL;
> +        }
> +        y = cmpxchg(&page->count_info, x, PGC_xen_heap);
> +    } while ( y != x );
> +
> +    /* Remove the page form the list of domain pages. */
> +    page_list_del(page, &d->xenpage_list);
> +    drop_dom_ref = (--d->xenheap_pages == 0);

Aren't you open coding

    if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
        put_page(page);

here (except for the check on the initial value, which could be
moved up)?

> +    /* Remove the owner and clear the flags. */
> +    page_set_owner(page, NULL);
> +    page->u.inuse.type_info = 0;

I think you'd better bail early if this is non-zero. Or else please use
the order used elsewhere (clearing type info, then owner) - while
it's benign, it avoids someone later wondering whether the order
is wrong in either place.

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.