[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [RFC v2 5/6] xen/arm: Implement hypercall for dirty page tracing



Hi Wei,

Thank you for the patch.

On 15/04/14 22:05, Wei Huang wrote:
  long arch_do_domctl(struct xen_domctl *domctl, struct domain *d,
                      XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
  {
+    long ret = 0;
+
      switch ( domctl->cmd )
      {
+    case XEN_DOMCTL_shadow_op:
+    {
+        if ( d == current->domain ) /* no domain_pause() */

Error message here?

+            return -EINVAL;
+
+        domain_pause(d);
+        ret = dirty_mode_op(d, &domctl->u.shadow_op);
+        domain_unpause(d);

Why do you pause/unpause the domain? Also you have to check that:
    - You have the right via XSM.
    - The domain is not dying...

See paging_domctl in arch/x86/p2m/paging.c

[..]

diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
index 403fd89..d57a44a 100644
--- a/xen/arch/arm/p2m.c
+++ b/xen/arch/arm/p2m.c
@@ -6,6 +6,8 @@
  #include <xen/bitops.h>
  #include <asm/flushtlb.h>
  #include <asm/gic.h>
+#include <xen/guest_access.h>
+#include <xen/pfn.h>
  #include <asm/event.h>
  #include <asm/hardirq.h>
  #include <asm/page.h>
@@ -208,6 +210,7 @@ static lpae_t mfn_to_p2m_entry(unsigned long mfn, unsigned 
int mattr,
          break;

      case p2m_ram_ro:
+    case p2m_ram_logdirty:
          e.p2m.xn = 0;
          e.p2m.write = 0;
          break;
@@ -261,6 +264,10 @@ static int p2m_create_table(struct domain *d,

      pte = mfn_to_p2m_entry(page_to_mfn(page), MATTR_MEM, p2m_invalid);

+    /* mark the write bit (page table's case, ro bit) as 0. So it is writable

mark the entry as write-only?

+     * in case of vlpt access */
+    pte.pt.ro = 0;
+
      write_pte(entry, pte);

      return 0;
@@ -697,6 +704,210 @@ unsigned long gmfn_to_mfn(struct domain *d, unsigned long 
gpfn)
      return p >> PAGE_SHIFT;
  }

+/* Change types across all p2m entries in a domain */
+void p2m_change_entry_type_global(struct domain *d, enum mg nt)
+{

Can't you reuse apply_p2m_changes? I'm also concerned about preemption. This function might be very long to run (depending on the size of the memory).

+    struct p2m_domain *p2m = &d->arch.p2m;
+    paddr_t ram_base;
+    int i1, i2, i3;
+    int first_index, second_index, third_index;
+    lpae_t *first = __map_domain_page(p2m->first_level);
+    lpae_t pte, *second = NULL, *third = NULL;
+
+    domain_get_gpfn_range(d, &ram_base, NULL);

Just careful, begin and end correspond to the bound for all addresses mapped in the guest (i.e RAM, MMIO, foreign page, grant table...).

We don't want to log other type than RAM.

Modifying the behavior of {max,lowest}_mapped_gfn won't work because the guest might use different banks with MMIO region.

+    first_index = first_table_offset((uint64_t)ram_base);
+    second_index = second_table_offset((uint64_t)ram_base);
+    third_index = third_table_offset((uint64_t)ram_base);
+
+    BUG_ON( !first && "Can't map first level p2m." );
+

This BUG_ON is not necessary. __map_domain_page always return a valid pointer.

+    spin_lock(&p2m->lock);
+
+    for ( i1 = first_index; i1 < LPAE_ENTRIES*2; ++i1 )
+    {
+        lpae_walk_t first_pte = first[i1].walk;
+
+        if ( !first_pte.valid || !first_pte.table )
+            goto out;
+
+        second = map_domain_page(first_pte.base);
+        BUG_ON( !second && "Can't map second level p2m.");
+
+        for ( i2 = second_index; i2 < LPAE_ENTRIES; ++i2 )
+        {
+            lpae_walk_t second_pte = second[i2].walk;
+
+            if ( !second_pte.valid || !second_pte.table )
+                goto out;
+
+            third = map_domain_page(second_pte.base);
+            BUG_ON( !third && "Can't map third level p2m.");
+
+            for ( i3 = third_index; i3 < LPAE_ENTRIES; ++i3 )
+            {
+
+                lpae_walk_t third_pte = third[i3].walk;
+                if ( !third_pte.valid )
+                    goto out;
+
+                pte = third[i3];
+                if ( nt == mg_ro )

It will use a switch case for nt. It will be clearer and easier to extend.

+                {
+                    if ( pte.p2m.write == 1 )

As said earlier, this will trap everything that is writeable. We only want trap RAM.

I would replace by if ( pte.p2m.type == p2m_ram ).

+                    {
+                        pte.p2m.write = 0;
+                        pte.p2m.type = p2m_ram_logdirty;
+                    }
+                    else
+                    {
+                        /* reuse avail bit as an indicator of 'actual'
+                         * read-only */
+                        pte.p2m.type = p2m_ram_rw;

Why do you unconditionally change the type?

+                    }
+                }
+                else if ( nt == mg_rw )
+                {
+                    if ( pte.p2m.write == 0 &&
+                         pte.p2m.type == p2m_ram_logdirty )

Can you add a comment to say what does this if means?

+                    {
+                        pte.p2m.write = p2m_ram_rw;

Wrong field?

+                    }
+                }
+                write_pte(&third[i3], pte);
+            }
+            unmap_domain_page(third);
+
+            third = NULL;
+            third_index = 0;
+        }
+        unmap_domain_page(second);
+
+        second = NULL;
+        second_index = 0;
+        third_index = 0;
+    }
+
+out:
+    flush_tlb_all_local();

you want to flush the P2M on every CPU and only for the current VMID.


You should use flush_tlb(). You might need to switch to the DOMID before.

[..]

diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
index a7edc4e..cca34e9 100644
--- a/xen/arch/arm/traps.c
+++ b/xen/arch/arm/traps.c
@@ -1491,6 +1491,8 @@ static void do_trap_data_abort_guest(struct cpu_user_regs 
*regs,
      struct hsr_dabt dabt = hsr.dabt;
      int rc;
      mmio_info_t info;
+    int page_fault = ( (dabt.dfsc & FSC_MASK) ==
+                       (FSC_FLT_PERM | FSC_3D_LEVEL) && dabt.write );

      if ( !check_conditional_instr(regs, hsr) )
      {
@@ -1512,6 +1514,15 @@ static void do_trap_data_abort_guest(struct 
cpu_user_regs *regs,
      if ( rc == -EFAULT )
          goto bad_data_abort;

+    /* domU page fault handling for guest live migration. dabt.valid can be

I would remove domU in this comment.

+     * 0 here.
+     */
+    if ( page_fault && handle_page_fault(current->domain, info.gpa) )
+    {
+        /* Do not modify pc after page fault to repeat memory operation */
+        return;
+    }
+
      /* XXX: Decode the instruction if ISS is not valid */
      if ( !dabt.valid )
          goto bad_data_abort;

[..]

diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h
index 5fd684f..5f9478b 100644
--- a/xen/include/asm-arm/mm.h
+++ b/xen/include/asm-arm/mm.h
@@ -343,10 +343,18 @@ static inline void put_page_and_type(struct page_info 
*page)
      put_page(page);
  }

+enum mg { mg_clear, mg_ro, mg_rw, mg_rx };

Please describe this enum. Also mg is too generic.

+
+/* routine for dirty-page tracing */
+int handle_page_fault(struct domain *d, paddr_t addr);
+
  int prepare_vlpt(struct domain *d);
  void cleanup_vlpt(struct domain *d);
  void restore_vlpt(struct domain *d);

+int prepare_bitmap(struct domain *d);
+void cleanup_bitmap(struct domain *d);

{prepare,cleanup} bitmap of what? The name is too generic here.

Regards.

--
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.