[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH RFC v2 6/9] xen/arm: Add handling write fault for dirty-page tracing



On Wed, 3 Jul 2013, Jaeyong Yoo wrote:
> From: Elena Pyatunina <e.pyatunina@xxxxxxxxxxx>
> 
> Add handling write fault in do_trap_data_abort_guest for dirty-page
> tracing. Mark dirty function makes the bitmap of dirty pages.
> 
> Signed-off-by: Elena Pyatunina <e.pyatunina@xxxxxxxxxxx>
> Singed-off-by: Jaeyong Yoo <jaeyong.yoo@xxxxxxxxxxx>
> ---
>  xen/arch/arm/mm.c            | 59 ++++++++++++++++++++++++++++++-
>  xen/arch/arm/p2m.c           | 84 
> ++++++++++++++++++++++++++++++++++++++++++++
>  xen/arch/arm/traps.c         |  7 ++++
>  xen/include/asm-arm/domain.h |  8 +++++
>  xen/include/asm-arm/mm.h     |  2 ++
>  xen/include/asm-arm/p2m.h    |  3 ++
>  6 files changed, 162 insertions(+), 1 deletion(-)
> 
> diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
> index 650b1fc..f509008 100644
> --- a/xen/arch/arm/mm.c
> +++ b/xen/arch/arm/mm.c
> @@ -680,7 +680,6 @@ void destroy_xen_mappings(unsigned long v, unsigned long 
> e)
>      create_xen_entries(REMOVE, v, 0, (e - v) >> PAGE_SHIFT, 0);
>  }
>  
> -enum mg { mg_clear, mg_ro, mg_rw, mg_rx };
>  static void set_pte_flags_on_range(const char *p, unsigned long l, enum mg 
> mg)
>  {
>      lpae_t pte;
> @@ -1142,6 +1141,64 @@ int is_iomem_page(unsigned long mfn)
>          return 1;
>      return 0;
>  }
> +
> +/*
> + * Set l3e entries of P2M table to be read-only.
> + *
> + * On first write, it page faults, its entry is changed to read-write,
> + * and on retry the write succeeds.
> + *
> + * Populate dirty_bitmap by looking for entries that have been
> + * switched to read-write.
> + */
> +int handle_page_fault(struct domain *d, paddr_t addr)
> +{
> +    lpae_t pte, *first = NULL, *second = NULL, *third = NULL;
> +    struct p2m_domain *p2m = &d->arch.p2m;
> +
> +    if ( d->arch.dirty.mode == 0 || first_table_offset(addr) >= LPAE_ENTRIES 
> )
> +        return 1;

if this is an error condition, it would be better to return an error


> +    spin_lock(&p2m->lock);
> +
> +    first = __map_domain_page(p2m->first_level);
> +
> +    if ( !first ||
> +         !first[first_table_offset(addr)].walk.valid ||
> +         !first[first_table_offset(addr)].walk.table )
> +        goto done;
> +
> +    second = map_domain_page(first[first_table_offset(addr)].walk.base);
> +
> +    if ( !second ||
> +         !second[second_table_offset(addr)].walk.valid ||
> +         !second[second_table_offset(addr)].walk.table )
> +        goto done;
> +
> +    third = map_domain_page(second[second_table_offset(addr)].walk.base);
> +
> +    if ( !third )
> +        goto done;
> +
> +    pte = third[third_table_offset(addr)];
> +
> +    if (pte.p2m.valid && pte.p2m.write == 0)
> +    {
> +        mark_dirty(d, addr);
> +        pte.p2m.write = 1;

Would this bit be sufficient? Otherwise could we take one of the other
available bits in the p2m pte? That would free us from having to handle
a separate data structure to handle dirty bits.


> +        write_pte(&third[third_table_offset(addr)], pte);
> +        flush_tlb_local();

flush_tlb_local should be OK because I think that we can always
guarantee that the current VMID is the VMID of the guest we are handling
the fault for.


> +    }
> +
> +done:
> +    if (third) unmap_domain_page(third);
> +    if (second) unmap_domain_page(second);
> +    if (first) unmap_domain_page(first);
> +
> +    spin_unlock(&p2m->lock);
> +    return 0;
> +}
> +
>  /*
>   * Local variables:
>   * mode: C
> diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
> index 8bf7eb7..bae7af7 100644
> --- a/xen/arch/arm/p2m.c
> +++ b/xen/arch/arm/p2m.c
> @@ -412,6 +412,90 @@ unsigned long gmfn_to_mfn(struct domain *d, unsigned 
> long gpfn)
>      return p >> PAGE_SHIFT;
>  }
>  
> +static struct page_info * new_dirty_page(void)
> +{
> +    struct page_info *page = NULL;
> +    void *p;
> +
> +    page = alloc_domheap_page(NULL, 0);
> +    if ( page == NULL )
> +        return NULL;
> +
> +    p = __map_domain_page(page);
> +
> +    clear_page(p);
> +    unmap_domain_page(p);
> +
> +    return page;
> +}
> +
> +void mark_dirty(struct domain *d, paddr_t addr)
> +{
> +    struct page_info *page;
> +    xen_pfn_t *first = NULL, *second = NULL, *third = NULL;
> +    void *p;
> +    int changed;
> +
> +    spin_lock(&d->arch.dirty.lock);
> +
> +    if (d->arch.dirty.top == NULL)
> +    {
> +        page = alloc_domheap_pages(NULL, 1, 0);
> +        if (page == NULL)
> +        {
> +            printk("Error: couldn't allocate page for dirty bitmap!\n");
> +            spin_unlock(&d->arch.dirty.lock);
> +            return;

no error return codes?


> +        }
> +
> +        INIT_PAGE_LIST_HEAD(&d->arch.dirty.pages);
> +        page_list_add(page, &d->arch.dirty.pages);
> +
> +        /* Clear both first level pages */
> +        p = __map_domain_page(page);
> +        clear_page(p);
> +        unmap_domain_page(p);
> +
> +        p = __map_domain_page(page + 1);
> +        clear_page(p);
> +        unmap_domain_page(p);
> +
> +        d->arch.dirty.top = page;
> +    }
> +
> +    first = __map_domain_page(d->arch.dirty.top);
> +    BUG_ON(!first && "Can't map first level p2m.");
>
> +    if ( !first[first_table_offset(addr)])
> +    {
> +        page = new_dirty_page();
> +        page_list_add(page, &d->arch.dirty.pages);
> +        first[first_table_offset(addr)] = page_to_mfn(page);
> +    }
> +
> +    second = map_domain_page(first[first_table_offset(addr)]);
> +    BUG_ON(!second && "Can't map second level p2m.");
>
> +    if (!second[second_table_offset(addr)])
> +    {
> +        page = new_dirty_page();
> +        page_list_add(page, &d->arch.dirty.pages);
> +        second[second_table_offset(addr)] = page_to_mfn(page);
> +    }
> +
> +    third = map_domain_page(second[second_table_offset(addr)]);
> +    BUG_ON(!third && "Can't map third level p2m.");
>
> +    changed = !__test_and_set_bit(third_table_offset(addr), third);

If I am not missing something, the third_table_offset was written to
give you an index into a page to retrieve a 64 bit pte.
If you are only using 1 bit, shouldn't you be using a different indexing
system? Otherwise you are wasting 63 bits for each entry.


> +    if (changed)
           ^ code style

> +    {
> +        d->arch.dirty.count++;
> +    }
> +
> +    if (third) unmap_domain_page(third);
> +    if (second) unmap_domain_page(second);
> +    if (first) unmap_domain_page(first);
> +
> +    spin_unlock(&d->arch.dirty.lock);
> +}
> +
>  /*
>   * Local variables:
>   * mode: C

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.