[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH RFC v2 8/9] xen/arm: Implement hypercall for dirty page tracing (shadow op)




> -----Original Message-----
> From: Stefano Stabellini [mailto:stefano.stabellini@xxxxxxxxxxxxx]
> Sent: Wednesday, July 03, 2013 9:39 PM
> To: Jaeyong Yoo
> Cc: xen-devel@xxxxxxxxxxxxx; Elena Pyatunina
> Subject: Re: [Xen-devel] [PATCH RFC v2 8/9] xen/arm: Implement hypercall
> for dirty page tracing (shadow op)
> 
> On Wed, 3 Jul 2013, Jaeyong Yoo wrote:
> > From: Elena Pyatunina <e.pyatunina@xxxxxxxxxxx>
> >
> > Add hypercall (shadow op: enable/disable and clean/peek dirted page
> > bitmap)
> >
> > Signed-off-by: Elena Pyatunina <e.pyatunina@xxxxxxxxxxx>
> > Singed-off-by: Alexey Sokolov <sokolov.a@xxxxxxxxxxx>
> > ---
> >  xen/arch/arm/domain.c     |   4 +
> >  xen/arch/arm/domctl.c     |  19 ++++
> >  xen/arch/arm/p2m.c        | 219
> +++++++++++++++++++++++++++++++++++++++++++++-
> >  xen/include/asm-arm/p2m.h |   2 +
> >  4 files changed, 242 insertions(+), 2 deletions(-)
> >
> > diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c index
> > 73dd0de..e3fc533 100644
> > --- a/xen/arch/arm/domain.c
> > +++ b/xen/arch/arm/domain.c
> > @@ -484,6 +484,10 @@ int arch_domain_create(struct domain *d, unsigned
> int domcr_flags)
> >      d->arch.vpidr = boot_cpu_data.midr.bits;
> >      d->arch.vmpidr = boot_cpu_data.mpidr.bits;
> >
> > +    /* init for dirty-page tracing */
> > +    d->arch.dirty.count = 0;
> > +    d->arch.dirty.top = NULL;
> > +
> >      clear_page(d->shared_info);
> >      share_xen_page_with_guest(
> >          virt_to_page(d->shared_info), d, XENSHARE_writable); diff
> > --git a/xen/arch/arm/domctl.c b/xen/arch/arm/domctl.c index
> > a0719b0..bc06534 100644
> > --- a/xen/arch/arm/domctl.c
> > +++ b/xen/arch/arm/domctl.c
> > @@ -67,6 +67,14 @@ long arch_do_domctl(struct xen_domctl *domctl, struct
> domain *d,
> >              goto gethvmcontext_out;
> >          }
> >
> > +        /* Allocate our own marshalling buffer */
> > +        ret = -ENOMEM;
> > +        if ( (c.data = xmalloc_bytes(c.size)) == NULL )
> > +        {
> > +            printk("(gethvmcontext) xmalloc_bytes failed: %d\n", c.size
);
> > +            goto gethvmcontext_out;
> > +        }
> > +
> >          domain_pause(d);
> >          ret = hvm_save(d, &c);
> >          domain_unpause(d);
> 
> Shouldn't this hunk belong to patch #2?


Oops my mistake!

> 
> 
> > @@ -85,6 +93,17 @@ long arch_do_domctl(struct xen_domctl *domctl, struct
> domain *d,
> >              xfree(c.data);
> >      }
> >      break;
> > +    case XEN_DOMCTL_shadow_op:
> > +    {
> > +        ret = dirty_mode_op(d, &domctl->u.shadow_op);
> > +        if ( (&domctl->u.shadow_op)->op == XEN_DOMCTL_SHADOW_OP_CLEAN
> > +             || (&domctl->u.shadow_op)->op ==
XEN_DOMCTL_SHADOW_OP_PEEK)
> > +        {
> > +            copyback = 1;
> > +        }
> > +    }
> > +    break;
> > +
> >      default:
> >          return -EINVAL;
> >      }
> > diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c index
> > bae7af7..fb8ce10 100644
> > --- a/xen/arch/arm/p2m.c
> > +++ b/xen/arch/arm/p2m.c
> > @@ -69,8 +69,8 @@ out:
> >
> >      spin_unlock(&p2m->lock);
> >
> > -    return ( ((xen_pfn_t)first_index << FIRST_SHIFT) |
> > -             (second_index << SECOND_SHIFT)          |
> > +    return ( ((xen_pfn_t)first_index << FIRST_SHIFT) |
> > +             (second_index << SECOND_SHIFT)          |
> >               (third_index << THIRD_SHIFT)
> >             )  >> PAGE_SHIFT;
> 
> Does this belong here? If so, why?

Oops, mistake again. It doesn't belong here. Sorry.

> 
> 
> > @@ -496,6 +496,221 @@ void mark_dirty(struct domain *d, paddr_t addr)
> >      spin_unlock(&d->arch.dirty.lock);  }
> >
> > +static void free_dirty_bitmap(struct domain *d) {
> > +    struct page_info *pg;
> > +
> > +    spin_lock(&d->arch.dirty.lock);
> > +
> > +    if(d->arch.dirty.top)
> > +    {
> > +        while ( (pg = page_list_remove_head(&d->arch.dirty.pages)) )
> > +            free_domheap_page(pg);
> > +    }
> > +    d->arch.dirty.top = NULL;
> > +    d->arch.dirty.count = 0;
> > +
> > +    spin_unlock(&d->arch.dirty.lock); }
> > +
> > +/* Change types across all p2m entries in a domain */ static void
> > +p2m_change_entry_type_global(struct domain *d, enum mg ot, enum mg
> > +nt) {
> > +    struct p2m_domain *p2m = &d->arch.p2m;
> > +    int i1, i2, i3;
> > +    int first_index = first_table_offset(GUEST_RAM_BASE);
> > +    int second_index = second_table_offset(GUEST_RAM_BASE);
> > +    int third_index = third_table_offset(GUEST_RAM_BASE);
> 
> you shouldn't assume GUEST_RAM_BASE is static anywhere

Yes, you are right. Since there is no way to figure out this value at
the moment, we use this static value here. I think we need to parse dtb 
of domu guest and get this value from there. Do you have any suggestion?

> 
> 
> > +    lpae_t *first = __map_domain_page(p2m->first_level);
> > +    lpae_t pte, *second = NULL,  *third = NULL;
> > +
> > +    BUG_ON(!first && "Can't map first level p2m.");
> > +
> > +    spin_lock(&p2m->lock);
> > +
> > +    for (i1 = first_index; i1 < LPAE_ENTRIES*2; ++i1)
> > +    {
> > +        lpae_walk_t first_pte = first[i1].walk;
> > +        if (!first_pte.valid || !first_pte.table)
> > +            goto out;
> > +
> > +        second = map_domain_page(first_pte.base);
> > +        BUG_ON(!second && "Can't map second level p2m.");
> > +        for(i2 = second_index; i2 < LPAE_ENTRIES; ++i2)
> > +        {
> > +            lpae_walk_t second_pte = second[i2].walk;
> > +            if (!second_pte.valid || !second_pte.table)
> > +                goto out;
> > +
> > +            third = map_domain_page(second_pte.base);
> > +            BUG_ON(!third && "Can't map third level p2m.");
> > +
> > +            for (i3 = third_index; i3 < LPAE_ENTRIES; ++i3)
> > +            {
> > +                lpae_walk_t third_pte = third[i3].walk;
> > +                int write = 0;
> > +                if (!third_pte.valid)
> > +                    goto out;
> > +
> > +                pte = third[i3];
> > +                if (pte.p2m.write == 1 && nt == mg_ro)
> > +                {
> > +                    pte.p2m.write = 0;
> > +                    write = 1;
> > +                }
> > +                else if (pte.p2m.write == 0 && nt == mg_rw)
> > +                {
> > +                    pte.p2m.write = 1;
> > +                    write = 1;
> > +                }
> > +                if (write)
> > +                    write_pte(&third[i3], pte);
> > +            }
> > +            unmap_domain_page(third);
> > +
> > +            third = NULL;
> > +            third_index = 0;
> > +        }
> > +        unmap_domain_page(second);
> > +
> > +        second = NULL;
> > +        second_index = 0;
> > +        third_index = 0;
> > +    }
> > +
> > +out:
> > +    flush_tlb_all_local();
> > +    if (third) unmap_domain_page(third);
> > +    if (second) unmap_domain_page(second);
> > +    if (first) unmap_domain_page(first);
> > +
> > +    spin_unlock(&p2m->lock);
> > +}
> > +
> > +/* Read a domain's log-dirty bitmap and stats.
> > + * If the operation is a CLEAN, clear the bitmap and stats. */ static
> > +int log_dirty_op(struct domain *d, xen_domctl_shadow_op_t *sc) {
> > +    int i1, i2, rv = 0, clean = 0, peek = 1;
> > +    xen_pfn_t *first = NULL, *second = NULL;
> > +    unsigned long pages = 0, *third = NULL;
> > +
> > +    clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN);
> > +
> > +    if (guest_handle_is_null(sc->dirty_bitmap))
> > +        peek = 0;
> > +
> > +    domain_pause(d);
> > +
> > +    printk("DEBUG: log-dirty %s: dom %u dirty=%u\n",
> > +            (clean) ? "clean" : "peek",
> > +            d->domain_id, d->arch.dirty.count);
> > +
> > +    sc->stats.dirty_count = d->arch.dirty.count;
> > +
> > +    spin_lock(&d->arch.dirty.lock);
> > +    first = d->arch.dirty.top ? __map_domain_page(d->arch.dirty.top)
> > + : NULL;
> > +
> > +    /* 32-bit address space starts from 0 and ends at ffffffff.
> 
> This should be a 40 bit address space due to LPAE support

Yes, you are right.

> 
> 
> > +     * First level of p2m tables is indexed by bits 39-30.
> > +     * So significant bits for 32-bit addresses in first table are
31-30.
> > +     * So we need to iterate from 0 to 4.
> > +     * But RAM starts from 2 gigabytes. So start from 2 instead.
> > +     * TODO remove this hardcode
> 
> I wouldn't assume that the guest ram start at 2GB

Yes, right, and we are thinking of using dtb for guest


> 
> 
> > +     */
> > +    for (i1 = 2; (pages < sc->pages) && (i1 < 4); ++i1)
> > +    {
> > +        second = (first && first[i1]) ? map_domain_page(first[i1]) :
> > + NULL;
> > +
> > +        for (i2 = 0; (i2 < LPAE_ENTRIES); ++i2)
> > +        {
> > +            unsigned int bytes = LPAE_ENTRIES/8;
> > +            third = (second && second[i2]) ?
map_domain_page(second[i2]) :
> NULL;
> > +            if ( unlikely(((sc->pages - pages + 7) >> 3) < bytes) )
> > +                bytes = (unsigned int)((sc->pages - pages + 7) >> 3);
> > +            if (peek)
>                   ^ code style
> > +            {
> > +                if ( (third ? copy_to_guest_offset(sc->dirty_bitmap,
pages
> >> 3,
> > +                                                   (uint8_t *)third,
bytes)
> > +                            : clear_guest_offset(sc->dirty_bitmap,
pages >> 3,
> > +                                                 (uint8_t*)NULL /*only
type
> matters*/,
> > +                                                 bytes)) != 0 )
> > +                {
> > +                    rv = 1;
> > +                    goto out;
> > +                }
> > +            }
> > +            pages += bytes << 3;
> > +            if (third)
>                   ^ code style
> > +            {
> > +                if (clean) clear_page(third);
> > +                unmap_domain_page(third);
> > +            }
> > +        }
> > +        if (second) unmap_domain_page(second);
> > +    }
> > +    if (first) unmap_domain_page(first);
> > +
> > +    spin_unlock(&d->arch.dirty.lock);
> > +
> > +    if ( pages < sc->pages )
> > +        sc->pages = pages;
> > +
> > +    if (clean)
>            ^ code style
> > +    {
> > +        p2m_change_entry_type_global(d, mg_rw, mg_ro);
> > +        free_dirty_bitmap(d);
> > +    }
> > +
> > +out:
> > +    if (rv)
> > +    {
> > +       spin_unlock(&d->arch.dirty.lock);
> > +    }
> > +    domain_unpause(d);
> > +    return rv;
> > +}
> > +
> > +long dirty_mode_op(struct domain *d, xen_domctl_shadow_op_t *sc) {
> > +    long ret = 0;
> > +
> > +    switch (sc->op)
> > +    {
> > +        case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY:
> > +        case XEN_DOMCTL_SHADOW_OP_OFF:
> > +        {
> > +            enum mg ot = mg_clear, nt = mg_clear;
> > +
> > +            ot = sc->op == XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY ? mg_rw
:
> mg_ro;
> > +            nt = sc->op == XEN_DOMCTL_SHADOW_OP_OFF ? mg_rw : mg_ro;
> > +
> > +            domain_pause(d);
> > +
> > +            d->arch.dirty.mode = sc->op == XEN_DOMCTL_SHADOW_OP_OFF ? 0
:
> 1;
> > +            p2m_change_entry_type_global(d, ot, nt);
> > +
> > +            if (sc->op == XEN_DOMCTL_SHADOW_OP_OFF)
> > +                free_dirty_bitmap(d);
> > +
> > +            domain_unpause(d);
> 
> Do we need a lock to make sure that all the operations happen atomically?


Yes right. Since anyone can call xl simultaneously. 

> 
> 
> > +        }
> > +        break;
> > +
> > +        case XEN_DOMCTL_SHADOW_OP_CLEAN:
> > +        case XEN_DOMCTL_SHADOW_OP_PEEK:
> > +        {
> > +            ret = log_dirty_op(d, sc);
> > +        }
> > +        break;
> 
> For symmetry maybe it would be better to pause the domain outside
> log_dirty_op

OK.

> 
> 
> > +        default:
> > +            return -ENOSYS;
> > +    }
> > +    return ret;
> > +}
> > +
> >  /*
> >   * Local variables:
> >   * mode: C
> > diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
> > index fd5890f..498cf0b 100644
> > --- a/xen/include/asm-arm/p2m.h
> > +++ b/xen/include/asm-arm/p2m.h
> > @@ -2,6 +2,7 @@
> >  #define _XEN_P2M_H
> >
> >  #include <xen/mm.h>
> > +#include <public/domctl.h>
> >
> >  struct domain;
> >
> > @@ -112,6 +113,7 @@ static inline int get_page_and_type(struct
> > page_info *page,
> >
> >  enum mg { mg_clear, mg_ro, mg_rw, mg_rx };  void mark_dirty(struct
> > domain *d, paddr_t addr);
> > +long dirty_mode_op(struct domain *d, xen_domctl_shadow_op_t *sc);
> >
> >  #endif /* _XEN_P2M_H */
> >
> > --
> > 1.8.1.2
> >
> >
> > _______________________________________________
> > Xen-devel mailing list
> > Xen-devel@xxxxxxxxxxxxx
> > http://lists.xen.org/xen-devel
> >


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.