[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] shadow: track video RAM dirty bits



Hello,

Here is a patch that adds to the shadow code support for tracking the
dirty bits of video RAM.  I will work on additional heuristics (like
taking the time to write-protect the pages when dirty scan has been
reporting no change for some time), but I'd like to see this tested
first, and it's already quite useful: idle qemu CPU usage typically goes
down from 3-6% to 0.2-0.4%.





shadow: track video RAM dirty bits

This adds a new HVM op that enables tracking dirty bits of a range of
video RAM.  The idea is to optimize just for the most common case (only
one guest mapping, with sometimes some temporary other mappings), which
permits to keep the overhead on shadow as low as possible.

Signed-off-by: Samuel Thibault <samuel.thibault@xxxxxxxxxxxxx>

diff -r e6cffb5a0f53 tools/ioemu/hw/cirrus_vga.c
--- a/tools/ioemu/hw/cirrus_vga.c       Thu Apr 24 11:38:49 2008 +0100
+++ b/tools/ioemu/hw/cirrus_vga.c       Fri May 02 14:38:22 2008 +0100
@@ -234,8 +234,6 @@ typedef struct CirrusVGAState {
     int cirrus_linear_io_addr;
     int cirrus_linear_bitblt_io_addr;
     int cirrus_mmio_io_addr;
-    unsigned long cirrus_lfb_addr;
-    unsigned long cirrus_lfb_end;
     uint32_t cirrus_addr_mask;
     uint32_t linear_mmio_mask;
     uint8_t cirrus_shadow_gr0;
@@ -2657,11 +2655,11 @@ static void cirrus_update_memory_access(
         
        mode = s->gr[0x05] & 0x7;
        if (mode < 4 || mode > 5 || ((s->gr[0x0B] & 0x4) == 0)) {
-            if (s->cirrus_lfb_addr && s->cirrus_lfb_end && !s->map_addr) {
+            if (s->lfb_addr && s->lfb_end && !s->map_addr) {
                 void *vram_pointer, *old_vram;
 
-                vram_pointer = set_vram_mapping(s->cirrus_lfb_addr,
-                                                s->cirrus_lfb_end);
+                vram_pointer = set_vram_mapping(s->lfb_addr,
+                                                s->lfb_end);
                 if (!vram_pointer)
                     fprintf(stderr, "NULL vram_pointer\n");
                 else {
@@ -2669,21 +2667,21 @@ static void cirrus_update_memory_access(
                                                VGA_RAM_SIZE);
                     qemu_free(old_vram);
                 }
-                s->map_addr = s->cirrus_lfb_addr;
-                s->map_end = s->cirrus_lfb_end;
+                s->map_addr = s->lfb_addr;
+                s->map_end = s->lfb_end;
             }
             s->cirrus_linear_write[0] = cirrus_linear_mem_writeb;
             s->cirrus_linear_write[1] = cirrus_linear_mem_writew;
             s->cirrus_linear_write[2] = cirrus_linear_mem_writel;
         } else {
         generic_io:
-            if (s->cirrus_lfb_addr && s->cirrus_lfb_end && s->map_addr) {
+            if (s->lfb_addr && s->lfb_end && s->map_addr) {
                 void *old_vram;
 
                 old_vram = vga_update_vram((VGAState *)s, NULL, VGA_RAM_SIZE);
 
-                unset_vram_mapping(s->cirrus_lfb_addr,
-                                   s->cirrus_lfb_end, 
+                unset_vram_mapping(s->lfb_addr,
+                                   s->lfb_end, 
                                    old_vram);
 
                 s->map_addr = s->map_end = 0;
@@ -3049,27 +3047,27 @@ void cirrus_stop_acc(CirrusVGAState *s)
     if (s->map_addr){
         int error;
         s->map_addr = 0;
-        error = unset_vram_mapping(s->cirrus_lfb_addr,
-                s->cirrus_lfb_end, s->vram_ptr);
+        error = unset_vram_mapping(s->lfb_addr,
+                s->lfb_end, s->vram_ptr);
         fprintf(stderr, "cirrus_stop_acc:unset_vram_mapping.\n");
     }
 }
 
 void cirrus_restart_acc(CirrusVGAState *s)
 {
-    if (s->cirrus_lfb_addr && s->cirrus_lfb_end) {
+    if (s->lfb_addr && s->lfb_end) {
         void *vram_pointer, *old_vram;
         fprintf(stderr, "cirrus_vga_load:re-enable vga acc.lfb_addr=0x%lx, 
lfb_end=0x%lx.\n",
-                s->cirrus_lfb_addr, s->cirrus_lfb_end);
-        vram_pointer = set_vram_mapping(s->cirrus_lfb_addr ,s->cirrus_lfb_end);
+                s->lfb_addr, s->lfb_end);
+        vram_pointer = set_vram_mapping(s->lfb_addr ,s->lfb_end);
         if (!vram_pointer){
             fprintf(stderr, "cirrus_vga_load:NULL vram_pointer\n");
         } else {
             old_vram = vga_update_vram((VGAState *)s, vram_pointer,
                     VGA_RAM_SIZE);
             qemu_free(old_vram);
-            s->map_addr = s->cirrus_lfb_addr;
-            s->map_end = s->cirrus_lfb_end;
+            s->map_addr = s->lfb_addr;
+            s->map_end = s->lfb_end;
         }
     }
 }
@@ -3120,8 +3118,8 @@ static void cirrus_vga_save(QEMUFile *f,
 
     vga_acc = (!!s->map_addr);
     qemu_put_8s(f, &vga_acc);
-    qemu_put_be64s(f, (uint64_t*)&s->cirrus_lfb_addr);
-    qemu_put_be64s(f, (uint64_t*)&s->cirrus_lfb_end);
+    qemu_put_be64s(f, (uint64_t*)&s->lfb_addr);
+    qemu_put_be64s(f, (uint64_t*)&s->lfb_end);
     qemu_put_buffer(f, s->vram_ptr, VGA_RAM_SIZE); 
 }
 
@@ -3175,8 +3173,8 @@ static int cirrus_vga_load(QEMUFile *f, 
     qemu_get_be32s(f, &s->hw_cursor_y);
 
     qemu_get_8s(f, &vga_acc);
-    qemu_get_be64s(f, (uint64_t*)&s->cirrus_lfb_addr);
-    qemu_get_be64s(f, (uint64_t*)&s->cirrus_lfb_end);
+    qemu_get_be64s(f, (uint64_t*)&s->lfb_addr);
+    qemu_get_be64s(f, (uint64_t*)&s->lfb_end);
     qemu_get_buffer(f, s->vram_ptr, VGA_RAM_SIZE); 
     if (vga_acc){
         cirrus_restart_acc(s);
@@ -3337,11 +3335,11 @@ static void cirrus_pci_lfb_map(PCIDevice
     /* XXX: add byte swapping apertures */
     cpu_register_physical_memory(addr, s->vram_size,
                                 s->cirrus_linear_io_addr);
-    s->cirrus_lfb_addr = addr;
-    s->cirrus_lfb_end = addr + VGA_RAM_SIZE;
+    s->lfb_addr = addr;
+    s->lfb_end = addr + VGA_RAM_SIZE;
 
-    if (s->map_addr && (s->cirrus_lfb_addr != s->map_addr) &&
-        (s->cirrus_lfb_end != s->map_end))
+    if (s->map_addr && (s->lfb_addr != s->map_addr) &&
+        (s->lfb_end != s->map_end))
         fprintf(logfile, "cirrus vga map change while on lfb mode\n");
 
     cpu_register_physical_memory(addr + 0x1000000, 0x400000,
--- a/tools/ioemu/hw/vga.c      Thu Apr 24 11:38:49 2008 +0100
+++ b/tools/ioemu/hw/vga.c      Fri May 02 14:38:22 2008 +0100
@@ -1086,6 +1086,9 @@static void vga_draw_text(VGAState *s, in
     vga_draw_glyph8_func *vga_draw_glyph8;
     vga_draw_glyph9_func *vga_draw_glyph9;
 
+    /* Disable dirty bit tracking */
+    xc_hvm_track_dirty_vram(xc_handle, domid, 0, 0, NULL);
+
     if (s->ds->dpy_colourdepth != NULL && s->ds->depth != 0)
         s->ds->dpy_colourdepth(s->ds, 0);
     s->rgb_to_pixel = 
@@ -1486,7 +1489,7 @@ static void vga_draw_graphic(VGAState *s
 static void vga_draw_graphic(VGAState *s, int full_update)
 {
     int y1, y, update, linesize, y_start, double_scan, mask, depth;
-    int width, height, shift_control, line_offset, bwidth, ds_depth;
+    int width, height, shift_control, line_offset, bwidth, ds_depth, bits;
     ram_addr_t page0, page1;
     int disp_width, multi_scan, multi_run;
     uint8_t *d;
@@ -1534,6 +1537,7 @@ static void vga_draw_graphic(VGAState *s
         } else {
             v = VGA_DRAW_LINE4;
         }
+        bits = 4;
     } else if (shift_control == 1) {
         full_update |= update_palette16(s);
         if (s->sr[0x01] & 8) {
@@ -1542,28 +1546,35 @@ static void vga_draw_graphic(VGAState *s
         } else {
             v = VGA_DRAW_LINE2;
         }
+        bits = 4;
     } else {
         switch(s->get_bpp(s)) {
         default:
         case 0:
             full_update |= update_palette256(s);
             v = VGA_DRAW_LINE8D2;
+            bits = 4;
             break;
         case 8:
             full_update |= update_palette256(s);
             v = VGA_DRAW_LINE8;
+            bits = 8;
             break;
         case 15:
             v = VGA_DRAW_LINE15;
+            bits = 16;
             break;
         case 16:
             v = VGA_DRAW_LINE16;
+            bits = 16;
             break;
         case 24:
             v = VGA_DRAW_LINE24;
+            bits = 24;
             break;
         case 32:
             v = VGA_DRAW_LINE32;
+            bits = 32;
             break;
         }
     }
@@ -1591,12 +1602,72 @@ static void vga_draw_graphic(VGAState *s
            width, height, v, line_offset, s->cr[9], s->cr[0x17], 
s->line_compare, s->sr[0x01]);
 #endif
 
-    for (y = 0; y < s->vram_size; y += TARGET_PAGE_SIZE)
-        if (vram_dirty(s, y, TARGET_PAGE_SIZE))
+    y = 0;
+
+    if (height - 1 > s->line_compare || multi_run || (s->cr[0x17] & 3) != 3
+            || !s->lfb_addr) {
+        /* Tricky things happen, disable dirty bit tracking */
+        xc_hvm_track_dirty_vram(xc_handle, domid, 0, 0, NULL);
+
+        for ( ; y < s->vram_size; y += TARGET_PAGE_SIZE)
+            if (vram_dirty(s, y, TARGET_PAGE_SIZE))
+                cpu_physical_memory_set_dirty(s->vram_offset + y);
+    } else {
+        /* Tricky things won't have any effect, i.e. we are in the very simple
+         * (and very usual) case of a linear buffer. */
+        unsigned long end;
+
+        for ( ; y < ((s->start_addr * 4) & TARGET_PAGE_MASK); y += 
TARGET_PAGE_SIZE)
+            /* We will not read that anyway. */
             cpu_physical_memory_set_dirty(s->vram_offset + y);
 
+        if (y < (s->start_addr * 4)) {
+            /* start address not aligned on a page, track dirtyness by hand. */
+            if (vram_dirty(s, y, TARGET_PAGE_SIZE))
+                cpu_physical_memory_set_dirty(s->vram_offset + y);
+            y += TARGET_PAGE_SIZE;
+        }
+
+        /* use page table dirty bit tracking for the inner of the LFB */
+        end = s->start_addr * 4 + height * line_offset;
+        {
+            unsigned long npages = ((end & TARGET_PAGE_MASK) - y) / 
TARGET_PAGE_SIZE;
+            const int width = sizeof(unsigned long) * 8;
+            unsigned long bitmap[(npages + width - 1) / width];
+            int err;
+
+            if (!(err = xc_hvm_track_dirty_vram(xc_handle, domid,
+                        (s->lfb_addr + y) / TARGET_PAGE_SIZE, npages, 
bitmap))) {
+                int i, j;
+                for (i = 0; i < sizeof(bitmap) / sizeof(*bitmap); i++) {
+                    unsigned long map = bitmap[i];
+                    for (j = i * width; map && j < npages; map >>= 1, j++)
+                        if (map & 1)
+                            cpu_physical_memory_set_dirty(s->vram_offset + y
+                                + j * TARGET_PAGE_SIZE);
+                }
+                y += npages * TARGET_PAGE_SIZE;
+            } else {
+                /* ENODATA just means we have changed mode and will succeed
+                 * next time */
+                if (err != -ENODATA)
+                    fprintf(stderr, "track_dirty_vram(%lx, %lx) failed 
(%d)\n", s->lfb_addr + y, npages, err);
+            }
+        }
+
+        for ( ; y < s->vram_size && y < end; y += TARGET_PAGE_SIZE)
+            /* failed or end address not aligned on a page, track dirtyness by
+             * hand. */
+            if (vram_dirty(s, y, TARGET_PAGE_SIZE))
+                cpu_physical_memory_set_dirty(s->vram_offset + y);
+
+        for ( ; y < s->vram_size; y += TARGET_PAGE_SIZE)
+            /* We will not read that anyway. */
+            cpu_physical_memory_set_dirty(s->vram_offset + y);
+    }
+
     addr1 = (s->start_addr * 4);
-    bwidth = width * 4;
+    bwidth = (width * bits + 7) / 8;
     y_start = -1;
     page_min = 0;
     page_max = 0;
@@ -1682,6 +1753,10 @@ static void vga_draw_blank(VGAState *s, 
         return;
     if (s->last_scr_width <= 0 || s->last_scr_height <= 0)
         return;
+
+    /* Disable dirty bit tracking */
+    xc_hvm_track_dirty_vram(xc_handle, domid, 0, 0, NULL);
+
     s->rgb_to_pixel = 
         rgb_to_pixel_dup_table[get_depth_index(s->ds)];
     if (s->ds->depth == 8) 
diff -r e6cffb5a0f53 tools/ioemu/hw/vga_int.h
--- a/tools/ioemu/hw/vga_int.h  Thu Apr 24 11:38:49 2008 +0100
+++ b/tools/ioemu/hw/vga_int.h  Fri May 02 14:38:22 2008 +0100
@@ -87,6 +87,8 @@
     unsigned int vram_size;                                             \
     unsigned long bios_offset;                                          \
     unsigned int bios_size;                                             \
+    unsigned long lfb_addr;                                             \
+    unsigned long lfb_end;                                              \
     PCIDevice *pci_dev;                                                 \
     uint32_t latch;                                                     \
     uint8_t sr_index;                                                   \
diff -r e6cffb5a0f53 tools/libxc/xc_misc.c
--- a/tools/libxc/xc_misc.c     Thu Apr 24 11:38:49 2008 +0100
+++ b/tools/libxc/xc_misc.c     Fri May 02 14:38:22 2008 +0100
@@ -236,6 +236,37 @@ int xc_hvm_set_pci_link_route(
     return rc;
 }
 
+int xc_hvm_track_dirty_vram(
+    int xc_handle, domid_t dom,
+    uint64_t first_pfn, uint64_t nr,
+    unsigned long *dirty_bitmap)
+{
+    DECLARE_HYPERCALL;
+    struct xen_hvm_track_dirty_vram arg;
+    int rc;
+
+    hypercall.op     = __HYPERVISOR_hvm_op;
+    hypercall.arg[0] = HVMOP_track_dirty_vram;
+    hypercall.arg[1] = (unsigned long)&arg;
+
+    arg.domid     = dom;
+    arg.first_pfn = first_pfn;
+    arg.nr        = nr;
+    set_xen_guest_handle(arg.dirty_bitmap, (uint8_t *)dirty_bitmap);
+
+    if ( (rc = lock_pages(&arg, sizeof(arg))) != 0 )
+    {
+        PERROR("Could not lock memory");
+        return rc;
+    }
+
+    rc = do_xen_hypercall(xc_handle, &hypercall);
+
+    unlock_pages(&arg, sizeof(arg));
+
+    return rc;
+}
+
 void *xc_map_foreign_pages(int xc_handle, uint32_t dom, int prot,
                            const xen_pfn_t *arr, int num)
 {
diff -r e6cffb5a0f53 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Thu Apr 24 11:38:49 2008 +0100
+++ b/tools/libxc/xenctrl.h     Fri May 02 14:38:22 2008 +0100
@@ -862,6 +862,22 @@ int xc_hvm_set_pci_link_route(
     int xc_handle, domid_t dom, uint8_t link, uint8_t isa_irq);
 
 
+/*
+ * Track dirty bit changes in the VRAM area
+ *
+ * All of this is done atomically:
+ * - get the dirty bitmap since the last call
+ * - set up dirty tracking area for period up to the next call
+ * - clear the dirty tracking area.
+ *
+ * Returns -ENODATA and does not fill bitmap if the area has changed since the
+ * last call.
+ */
+int xc_hvm_track_dirty_vram(
+    int xc_handle, domid_t dom,
+    uint64_t first_pfn, uint64_t nr,
+    unsigned long *bitmap);
+
 typedef enum {
   XC_ERROR_NONE = 0,
   XC_INTERNAL_ERROR = 1,
diff -r e6cffb5a0f53 xen/arch/ia64/vmx/vmx_hypercall.c
--- a/xen/arch/ia64/vmx/vmx_hypercall.c Thu Apr 24 11:38:49 2008 +0100
+++ b/xen/arch/ia64/vmx/vmx_hypercall.c Fri May 02 14:38:22 2008 +0100
@@ -200,6 +200,10 @@ do_hvm_op(unsigned long op, XEN_GUEST_HA
         rc = 0;
         break;
 
+    case HVMOP_track_dirty_vram:
+        rc = -ENOSYS;
+        break;
+
     default:
         gdprintk(XENLOG_INFO, "Bad HVM op %ld.\n", op);
         rc = -ENOSYS;
diff -r e6cffb5a0f53 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Thu Apr 24 11:38:49 2008 +0100
+++ b/xen/arch/x86/hvm/hvm.c    Fri May 02 14:38:22 2008 +0100
@@ -2360,6 +2360,54 @@ long do_hvm_op(unsigned long op, XEN_GUE
         rc = guest_handle_is_null(arg) ? hvmop_flush_tlb_all() : -ENOSYS;
         break;
 
+    case HVMOP_track_dirty_vram:
+    {
+        struct xen_hvm_track_dirty_vram a;
+        struct domain *d;
+
+        if ( copy_from_guest(&a, arg, 1) )
+            return -EFAULT;
+
+        if ( a.domid == DOMID_SELF )
+        {
+            d = rcu_lock_current_domain();
+        }
+        else
+        {
+            if ( (d = rcu_lock_domain_by_id(a.domid)) == NULL )
+                return -ESRCH;
+            if ( !IS_PRIV_FOR(current->domain, d) )
+            {
+                rc = -EPERM;
+                goto param_fail2;
+            }
+        }
+
+        rc = -EINVAL;
+        if ( !is_hvm_domain(d) )
+            goto param_fail2;
+
+        rc = xsm_hvm_param(d, op);
+        if ( rc )
+            goto param_fail2;
+
+        rc = -ESRCH;
+        if ( d->is_dying )
+            goto param_fail2;
+
+        rc = -EINVAL;
+        if ( !shadow_mode_enabled(d))
+            goto param_fail2;
+        if ( d->vcpu[0] == NULL )
+            goto param_fail2;
+
+        rc = shadow_track_dirty_vram(d, a.first_pfn, a.nr, a.dirty_bitmap);
+
+    param_fail2:
+        rcu_unlock_domain(d);
+        break;
+    }
+
     default:
     {
         gdprintk(XENLOG_WARNING, "Bad HVM op %ld.\n", op);
diff -r e6cffb5a0f53 xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c   Thu Apr 24 11:38:49 2008 +0100
+++ b/xen/arch/x86/mm/shadow/common.c   Fri May 02 14:38:22 2008 +0100
@@ -2591,6 +2591,13 @@ void shadow_teardown(struct domain *d)
      * calls now that we've torn down the bitmap */
     d->arch.paging.mode &= ~PG_log_dirty;
 
+    if (d->dirty_vram) {
+        xfree(d->dirty_vram->sl1ma);
+        xfree(d->dirty_vram->dirty_bitmap);
+        xfree(d->dirty_vram);
+        d->dirty_vram = NULL;
+    }
+
     shadow_unlock(d);
 }
 
@@ -2851,6 +2858,164 @@ void shadow_clean_dirty_bitmap(struct do
     shadow_blow_tables(d);
     shadow_unlock(d);
 }
+
+
+/**************************************************************************/
+/* VRAM dirty tracking support */
+int shadow_track_dirty_vram(struct domain *d,
+                            unsigned long begin_pfn,
+                            unsigned long nr,
+                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
+{
+    int rc;
+    unsigned long end_pfn = begin_pfn + nr;
+    unsigned long dirty_size = (nr + 7) / 8;
+    int flush_tlb = 0;
+
+    if (end_pfn < begin_pfn
+            || begin_pfn > d->arch.p2m->max_mapped_pfn
+            || end_pfn >= d->arch.p2m->max_mapped_pfn)
+        return -EINVAL;
+
+    shadow_lock(d);
+
+    if ( d->dirty_vram && (!nr ||
+             ( begin_pfn != d->dirty_vram->begin_pfn
+            || end_pfn   != d->dirty_vram->end_pfn )) ) {
+        /* Different tracking, tear the previous down. */
+        gdprintk(XENLOG_INFO, "stopping tracking VRAM %lx - %lx\n", 
d->dirty_vram->begin_pfn, d->dirty_vram->end_pfn);
+        xfree(d->dirty_vram->sl1ma);
+        xfree(d->dirty_vram->dirty_bitmap);
+        xfree(d->dirty_vram);
+        d->dirty_vram = NULL;
+    }
+
+    if ( !nr ) {
+        rc = 0;
+        goto out;
+    }
+
+    /* This should happen seldomly (Video mode change),
+     * no need to be careful. */
+    if ( !d->dirty_vram ) {
+        unsigned long i;
+        p2m_type_t t;
+
+        /* Just recount from start. */
+        for ( i = begin_pfn; i < end_pfn; i++ )
+            flush_tlb |= sh_remove_all_mappings(d->vcpu[0], gfn_to_mfn(d, i, 
&t));
+
+        gdprintk(XENLOG_INFO, "tracking VRAM %lx - %lx\n", begin_pfn, end_pfn);
+
+        rc = -ENOMEM;
+        if ( (d->dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
+            goto out;
+        d->dirty_vram->begin_pfn = begin_pfn;
+        d->dirty_vram->end_pfn = end_pfn;
+
+        if ( (d->dirty_vram->sl1ma = xmalloc_array(paddr_t, nr)) == NULL )
+            goto out_dirty_vram;
+        memset(d->dirty_vram->sl1ma, ~0, sizeof(paddr_t) * nr);
+
+        if ( (d->dirty_vram->dirty_bitmap = xmalloc_array(uint8_t, 
dirty_size)) == NULL )
+            goto out_sl1ma;
+        memset(d->dirty_vram->dirty_bitmap, 0, dirty_size);
+
+        /* Tell the caller that this time we could not track dirty bits. */
+        rc = -ENODATA;
+    } else {
+        int i;
+#ifdef __i386__
+        unsigned long map_mfn = INVALID_MFN;
+        void *map_sl1p = NULL;
+#endif
+
+        /* Iterate over VRAM to track dirty bits. */
+        for ( i = 0; i < nr; i++ ) {
+            p2m_type_t t;
+            mfn_t mfn = gfn_to_mfn(d, begin_pfn + i, &t);
+            struct page_info *page = mfn_to_page(mfn);
+            u32 count_info = page->u.inuse.type_info & PGT_count_mask;
+            int dirty = 0;
+            paddr_t sl1ma = d->dirty_vram->sl1ma[i];
+
+            switch (count_info) {
+            case 0:
+                /* No guest reference, nothing to track. */
+                break;
+            case 1:
+                /* One guest reference. */
+                if ( sl1ma == INVALID_PADDR ) {
+                    /* We don't know which sl1e points to this, too bad. */
+                    dirty = 1;
+                    /* TODO: Heuristics for finding the single mapping of
+                     * this gmfn */
+                    flush_tlb |= sh_remove_all_mappings(d->vcpu[0], 
gfn_to_mfn(d, begin_pfn + i, &t));
+                } else {
+                    /* Hopefully the most common case: only one mapping,
+                     * whose dirty bit we can use. */
+                    l1_pgentry_t *sl1e;
+#ifdef __i386__
+                    void *sl1p = map_sl1p;
+                    unsigned long sl1mfn = paddr_to_pfn(sl1ma);
+
+                    if ( sl1mfn != map_mfn ) {
+                        if ( map_sl1p )
+                            sh_unmap_domain_page(map_sl1p);
+                        map_sl1p = sl1p = sh_map_domain_page(_mfn(sl1mfn));
+                        map_mfn = sl1mfn;
+                    }
+                    sl1e = sl1p + (sl1ma & ~PAGE_MASK);
+#else
+                    sl1e = maddr_to_virt(sl1ma);
+#endif
+
+                    if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY ) {
+                        dirty = 1;
+                        /* Note: this is atomic, so we may clear a
+                         * _PAGE_ACCESSED set by another processor. */
+                        l1e_remove_flags(*sl1e, _PAGE_DIRTY);
+                        flush_tlb = 1;
+                    }
+                }
+                break;
+            default:
+                /* More than one guest reference,
+                 * we don't afford tracking that. */
+                dirty = 1;
+                break;
+            }
+
+            if ( dirty )
+                d->dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
+        }
+
+#ifdef __i386__
+        if ( map_sl1p )
+            sh_unmap_domain_page(map_sl1p);
+#endif
+
+        rc = -EFAULT;
+        if ( copy_to_guest(dirty_bitmap, d->dirty_vram->dirty_bitmap, 
dirty_size) == 0 ) {
+            memset(d->dirty_vram->dirty_bitmap, 0, dirty_size);
+            rc = 0;
+        }
+    }
+    if ( flush_tlb )
+        flush_tlb_mask(d->domain_dirty_cpumask);    
+    goto out;
+
+out_sl1ma:
+    xfree(d->dirty_vram->sl1ma);
+out_dirty_vram:
+    xfree(d->dirty_vram);
+    d->dirty_vram = NULL;
+
+out:
+    shadow_unlock(d);
+    return rc;
+}
+
 /**************************************************************************/
 /* Shadow-control XEN_DOMCTL dispatcher */
 
diff -r e6cffb5a0f53 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Thu Apr 24 11:38:49 2008 +0100
+++ b/xen/arch/x86/mm/shadow/multi.c    Fri May 02 14:38:22 2008 +0100
@@ -801,7 +801,7 @@ _sh_propagate(struct vcpu *v,
     // Since we know the guest's PRESENT bit is set, we also set the shadow's
     // SHADOW_PRESENT bit.
     //
-    pass_thru_flags = (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER |
+    pass_thru_flags = (_PAGE_ACCESSED | _PAGE_USER |
                        _PAGE_RW | _PAGE_PRESENT);
     if ( guest_supports_nx(v) )
         pass_thru_flags |= _PAGE_NX_BIT;
@@ -1251,6 +1251,80 @@ static int shadow_set_l2e(struct vcpu *v
     return flags;
 }
 
+static inline void shadow_vram_get_l1e(shadow_l1e_t new_sl1e,
+                                       shadow_l1e_t *sl1e,
+                                       mfn_t sl1mfn,
+                                       struct domain *d)
+{ 
+    mfn_t mfn;
+    unsigned long gfn;
+
+    if ( !d->dirty_vram ) return;
+
+    mfn = shadow_l1e_get_mfn(new_sl1e);
+    gfn = mfn_to_gfn(d, mfn);
+
+    if ( (gfn >= d->dirty_vram->begin_pfn) && (gfn < d->dirty_vram->end_pfn) ) 
{
+        unsigned long i = gfn - d->dirty_vram->begin_pfn;
+        struct page_info *page = mfn_to_page(mfn);
+        u32 count_info = page->u.inuse.type_info & PGT_count_mask;
+        
+        if ( count_info == 1 )
+            /* Initial guest reference, record it */
+            d->dirty_vram->sl1ma[i] = pfn_to_paddr(mfn_x(sl1mfn))
+                | ((paddr_t) sl1e & ~PAGE_MASK);
+    }
+}
+
+static inline void shadow_vram_put_l1e(shadow_l1e_t old_sl1e,
+                                       shadow_l1e_t *sl1e,
+                                       mfn_t sl1mfn,
+                                       struct domain *d)
+{
+    mfn_t mfn;
+    unsigned long gfn;
+
+    if ( !d->dirty_vram ) return;
+
+    mfn = shadow_l1e_get_mfn(old_sl1e);
+    gfn = mfn_to_gfn(d, mfn);
+
+    if ( (gfn >= d->dirty_vram->begin_pfn) && (gfn < d->dirty_vram->end_pfn) ) 
{
+        unsigned long i = gfn - d->dirty_vram->begin_pfn;
+        struct page_info *page = mfn_to_page(mfn);
+        u32 count_info = page->u.inuse.type_info & PGT_count_mask;
+        int dirty = 0;
+        paddr_t sl1ma =  pfn_to_paddr(mfn_x(sl1mfn))
+            | ((paddr_t) sl1e & ~PAGE_MASK);
+
+        if ( count_info == 1 ) {
+            /* Last reference */
+            if ( d->dirty_vram->sl1ma[i] == INVALID_PADDR ) {
+                /* We didn't know it was that one, let's say it is dirty */
+                dirty = 1;
+            } else {
+                ASSERT(d->dirty_vram->sl1ma[i] == sl1ma);
+                d->dirty_vram->sl1ma[i] = INVALID_PADDR;
+                if ( shadow_l1e_get_flags(old_sl1e) & _PAGE_DIRTY )
+                    dirty = 1;
+            }
+        } else {
+            /* We had more than one reference, just consider the page dirty. */
+            dirty = 1;
+            /* Check that it's not the one we recorded. */
+            if ( d->dirty_vram->sl1ma[i] == sl1ma ) {
+                /* Too bad, we remembered the wrong one... */
+                d->dirty_vram->sl1ma[i] = INVALID_PADDR;
+            } else {
+                /* Ok, our recorded sl1e is still pointing to this page, let's
+                 * just hope it will remain. */
+            }
+        }
+        if ( dirty )
+            d->dirty_vram->dirty_bitmap[i / 8] |= 
d->dirty_vram->dirty_bitmap[i % 8];
+    }
+}
+
 static int shadow_set_l1e(struct vcpu *v, 
                           shadow_l1e_t *sl1e, 
                           shadow_l1e_t new_sl1e,
@@ -1275,6 +1349,8 @@ static int shadow_set_l1e(struct vcpu *v
                 /* Doesn't look like a pagetable. */
                 flags |= SHADOW_SET_ERROR;
                 new_sl1e = shadow_l1e_empty();
+            } else {
+                shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d);
             }
         }
     } 
@@ -1293,6 +1369,7 @@ static int shadow_set_l1e(struct vcpu *v
          * trigger a flush later. */
         if ( shadow_mode_refcounts(d) ) 
         {
+            shadow_vram_put_l1e(old_sl1e, sl1e, sl1mfn, d);
             shadow_put_page_from_l1e(old_sl1e, d);
         } 
     }
@@ -2248,8 +2325,10 @@ void sh_destroy_l1_shadow(struct vcpu *v
         mfn_t sl1mfn = smfn; 
         SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, 0, {
             if ( (shadow_l1e_get_flags(*sl1e) & _PAGE_PRESENT)
-                 && !sh_l1e_is_magic(*sl1e) )
+                 && !sh_l1e_is_magic(*sl1e) ) {
+                shadow_vram_put_l1e(*sl1e, sl1e, sl1mfn, d);
                 shadow_put_page_from_l1e(*sl1e, d);
+            }
         });
     }
     
diff -r e6cffb5a0f53 xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h  Thu Apr 24 11:38:49 2008 +0100
+++ b/xen/arch/x86/mm/shadow/private.h  Fri May 02 14:38:22 2008 +0100
@@ -528,6 +528,15 @@ sh_mfn_is_dirty(struct domain *d, mfn_t 
     return rv;
 }
 
+/**************************************************************************/
+/* VRAM dirty tracking support */
+
+struct sh_dirty_vram {
+    unsigned long begin_pfn;
+    unsigned long end_pfn;
+    paddr_t *sl1ma;
+    uint8_t *dirty_bitmap;
+};
 
 /**************************************************************************/
 /* Shadow-page refcounting. */
diff -r e6cffb5a0f53 xen/include/asm-ia64/config.h
--- a/xen/include/asm-ia64/config.h     Thu Apr 24 11:38:49 2008 +0100
+++ b/xen/include/asm-ia64/config.h     Fri May 02 14:38:22 2008 +0100
@@ -71,6 +71,7 @@ typedef int pid_t;
 
 // now needed for xen/include/mm.h
 typedef unsigned long paddr_t;
+#define INVALID_PADDR (~0UL)
 // from include/linux/kernel.h
 #define ALIGN(x,a) (((x)+(a)-1)&~((a)-1))
 
diff -r e6cffb5a0f53 xen/include/asm-powerpc/types.h
--- a/xen/include/asm-powerpc/types.h   Thu Apr 24 11:38:49 2008 +0100
+++ b/xen/include/asm-powerpc/types.h   Fri May 02 14:38:22 2008 +0100
@@ -61,6 +61,7 @@ typedef unsigned long size_t;
 #endif
 
 typedef unsigned long paddr_t;
+#define INVALID_PADDR (~0UL)
 #define PRIpaddr "08lx"
 
 /* DMA addresses come in generic and 64-bit flavours.  */
diff -r e6cffb5a0f53 xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h      Thu Apr 24 11:38:49 2008 +0100
+++ b/xen/include/asm-x86/shadow.h      Fri May 02 14:38:22 2008 +0100
@@ -62,6 +62,12 @@ void shadow_vcpu_init(struct vcpu *v);
 /* Enable an arbitrary shadow mode.  Call once at domain creation. */
 int shadow_enable(struct domain *d, u32 mode);
 
+/* Enable VRAM dirty bit tracking. */
+int shadow_track_dirty_vram(struct domain *d,
+                            unsigned long first_pfn,
+                            unsigned long nr,
+                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
+
 /* Handler for shadow control ops: operations from user-space to enable
  * and disable ephemeral shadow modes (test mode and log-dirty mode) and
  * manipulate the log-dirty bitmap. */
diff -r e6cffb5a0f53 xen/include/asm-x86/types.h
--- a/xen/include/asm-x86/types.h       Thu Apr 24 11:38:49 2008 +0100
+++ b/xen/include/asm-x86/types.h       Fri May 02 14:38:22 2008 +0100
@@ -38,15 +38,18 @@ typedef unsigned long long u64;
 typedef unsigned long long u64;
 #if defined(CONFIG_X86_PAE)
 typedef u64 paddr_t;
+#define INVALID_PADDR (~0ULL)
 #define PRIpaddr "016llx"
 #else
 typedef unsigned long paddr_t;
+#define INVALID_PADDR (~0UL)
 #define PRIpaddr "08lx"
 #endif
 #elif defined(__x86_64__)
 typedef signed long s64;
 typedef unsigned long u64;
 typedef unsigned long paddr_t;
+#define INVALID_PADDR (~0UL)
 #define PRIpaddr "016lx"
 #endif
 
diff -r e6cffb5a0f53 xen/include/public/hvm/hvm_op.h
--- a/xen/include/public/hvm/hvm_op.h   Thu Apr 24 11:38:49 2008 +0100
+++ b/xen/include/public/hvm/hvm_op.h   Fri May 02 14:38:22 2008 +0100
@@ -73,4 +73,20 @@ DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_
 /* Flushes all VCPU TLBs: @arg must be NULL. */
 #define HVMOP_flush_tlbs          5
 
+/* Track dirty VRAM. */
+#define HVMOP_track_dirty_vram    6
+struct xen_hvm_track_dirty_vram {
+    /* Domain to be tracked. */
+    domid_t  domid;
+    /* First pfn to track. */
+    uint64_aligned_t first_pfn;
+    /* Number of pages to track. */
+    uint64_aligned_t nr;
+    /* OUT variable. */
+    /* Dirty bitmap buffer. */
+    XEN_GUEST_HANDLE_64(uint8) dirty_bitmap;
+};
+typedef struct xen_hvm_track_dirty_vram xen_hvm_track_dirty_vram_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_track_dirty_vram_t);
+
 #endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */
diff -r e6cffb5a0f53 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Thu Apr 24 11:38:49 2008 +0100
+++ b/xen/include/xen/sched.h   Fri May 02 14:38:22 2008 +0100
@@ -236,6 +236,9 @@ struct domain
      * cause a deadlock. Acquirers don't spin waiting; they preempt.
      */
     spinlock_t hypercall_deadlock_mutex;
+
+    /* VRAM dirty support. */
+    struct sh_dirty_vram *dirty_vram;
 };
 
 struct domain_setup_info

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.