[Xen-devel] [PATCH 1 of 2] tools/libxc: Remus Checkpoint Compres

# HG changeset patch
# User Shriram Rajagopalan <rshriram@xxxxxxxxx>
# Date 1308238095 25200
# Node ID 5dbafaf24c7036f3e24e4387797b32e59d732ac6
# Parent  23c068b109236657ededa3e3b7f180346a5cd9f9
tools/libxc: Remus Checkpoint Compression

Instead of sending dirty pages of guest memory as-is, use a simple compression
algorithm that sends a RLE-encoded XOR of the page against its last sent copy.
A small LRU cache is used to hold recently dirtied pages. Pagetable pages are
sent as-is, as they are canonicalized at sender side and uncanonicalized at
receiver.

Signed-off-by: Shriram Rajagopalan <rshriram@xxxxxxxxx>

diff -r 23c068b10923 -r 5dbafaf24c70 tools/libxc/Makefile
--- a/tools/libxc/Makefile      Wed Jun 15 16:16:41 2011 +0100
+++ b/tools/libxc/Makefile      Thu Jun 16 08:28:15 2011 -0700
@@ -42,7 +42,7 @@
 GUEST_SRCS-y :=
 GUEST_SRCS-y += xg_private.c xc_suspend.c
 GUEST_SRCS-$(CONFIG_MIGRATE) += xc_domain_restore.c xc_domain_save.c
-GUEST_SRCS-$(CONFIG_MIGRATE) += xc_offline_page.c
+GUEST_SRCS-$(CONFIG_MIGRATE) += xc_offline_page.c xc_remus.c
 GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c
 
 vpath %.c ../../xen/common/libelf
diff -r 23c068b10923 -r 5dbafaf24c70 tools/libxc/xc_domain_restore.c
--- a/tools/libxc/xc_domain_restore.c   Wed Jun 15 16:16:41 2011 +0100
+++ b/tools/libxc/xc_domain_restore.c   Thu Jun 16 08:28:15 2011 -0700
@@ -663,6 +663,10 @@
     /* pages is of length nr_physpages, pfn_types is of length nr_pages */
     unsigned int nr_physpages, nr_pages;
 
+    /* remus compression state */
+    int compression;
+    unsigned long compbuf_pos, compbuf_size;
+
     /* Types of the pfns in the current region */
     unsigned long* pfn_types;
 
@@ -700,6 +704,7 @@
 {
     int count, countpages, oldcount, i;
     void* ptmp;
+    unsigned long compbuf_size;
 
     if ( RDEXACT(fd, &count, sizeof(count)) )
     {
@@ -809,6 +814,36 @@
         }
         return pagebuf_get_one(xch, ctx, buf, fd, dom);
 
+    case XC_SAVE_ID_COMPRESSED_DATA:
+
+        /* read the length of compressed chunk coming in */
+        if ( RDEXACT(fd, &compbuf_size, sizeof(unsigned long)) )
+        {
+            PERROR("Error when reading compbuf_size");
+            return -1;
+        }
+        if (!compbuf_size) return 1;
+
+        buf->compbuf_size += compbuf_size;
+        if (!buf->pages) {
+            if (!(buf->pages = malloc(buf->compbuf_size))) {
+                ERROR("Could not allocate compression buffer");
+                return -1;
+            }
+        } else {
+            if (!(ptmp = realloc(buf->pages, buf->compbuf_size))) {
+                ERROR("Could not reallocate compression buffer");
+                return -1;
+            }
+            buf->pages = ptmp;
+        }
+        if ( RDEXACT(fd, buf->pages + (buf->compbuf_size - compbuf_size),
+                     compbuf_size) ) {
+            PERROR("Error when reading compression buffer");
+            return -1;
+        }
+        return compbuf_size;
+
     default:
         if ( (count > MAX_BATCH_SIZE) || (count < 0) ) {
             ERROR("Max batch size exceeded (%d). Giving up.", count);
@@ -846,6 +881,9 @@
     if (!countpages)
         return count;
 
+    if (buf->compression)
+        return pagebuf_get_one(xch, ctx, buf, fd, dom);
+
     oldcount = buf->nr_physpages;
     buf->nr_physpages += countpages;
     if (!buf->pages) {
@@ -874,6 +912,7 @@
     int rc;
 
     buf->nr_physpages = buf->nr_pages = 0;
+    buf->compbuf_pos = buf->compbuf_size = 0;
 
     do {
         rc = pagebuf_get_one(xch, ctx, buf, fd, dom);
@@ -1091,7 +1130,19 @@
         /* In verify mode, we use a copy; otherwise we work in place */
         page = pagebuf->verify ? (void *)buf : (region_base + i*PAGE_SIZE);
 
-        memcpy(page, pagebuf->pages + (curpage + curbatch) * PAGE_SIZE, 
PAGE_SIZE);
+        /* Remus - page decompression */
+        if (pagebuf->compression)
+        {
+            if (xc_remus_uncompress(xch, pagebuf->pages, pagebuf->compbuf_size,
+                                    &pagebuf->compbuf_pos, (char *)page))
+            {
+                ERROR("Failed to uncompress page (pfn=%lx)\n", pfn);
+                goto err_mapped;
+            }
+        }
+        else
+            memcpy(page, pagebuf->pages + (curpage + curbatch) * PAGE_SIZE,
+                   PAGE_SIZE);
 
         pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
 
@@ -1228,6 +1279,8 @@
 
     int orig_io_fd_flags;
 
+    uint32_t remus_flags = 0;
+
     struct restore_ctx _ctx;
     struct restore_ctx *ctx = &_ctx;
     struct domain_info_context *dinfo = &ctx->dinfo;
@@ -1257,6 +1310,11 @@
         PERROR("read: p2m_size");
         goto out;
     }
+    if ( RDEXACT(io_fd, &remus_flags, sizeof(uint32_t)) )
+    {
+        PERROR("read: remus_flags");
+        goto out;
+    }
     DPRINTF("xc_domain_restore start: p2m_size = %lx\n", dinfo->p2m_size);
 
     if ( !get_platform_info(xch, dom,
@@ -1344,6 +1402,7 @@
      */
 
     n = m = 0;
+    pagebuf.compression = (remus_flags & XCFLAGS_REMUS_COMPRESS);
  loadpages:
     for ( ; ; )
     {
@@ -1353,6 +1412,7 @@
 
         if ( !ctx->completed ) {
             pagebuf.nr_physpages = pagebuf.nr_pages = 0;
+            pagebuf.compbuf_pos = pagebuf.compbuf_size = 0;
             if ( pagebuf_get_one(xch, ctx, &pagebuf, io_fd, dom) < 0 ) {
                 PERROR("Error when reading batch");
                 goto out;
@@ -1395,6 +1455,7 @@
         }
 
         pagebuf.nr_physpages = pagebuf.nr_pages = 0;
+        pagebuf.compbuf_pos = pagebuf.compbuf_size = 0;
 
         n += j; /* crude stats */
 
diff -r 23c068b10923 -r 5dbafaf24c70 tools/libxc/xc_domain_save.c
--- a/tools/libxc/xc_domain_save.c      Wed Jun 15 16:16:41 2011 +0100
+++ b/tools/libxc/xc_domain_save.c      Thu Jun 16 08:28:15 2011 -0700
@@ -269,6 +269,57 @@
         return noncached_write(xch, ob, fd, buf, len);
 }
 
+static int write_compressed(xc_interface *xch, void *remus_ctx, int dobuf,
+                            struct outbuf* ob, int fd)
+{
+    int rc = 0;
+    int header = sizeof(int) + sizeof(unsigned long);
+    int marker = XC_SAVE_ID_COMPRESSED_DATA;
+    unsigned long compbuf_len = 0;
+
+    do
+    {
+        /* check for available space (atleast 8k) */
+        if ((ob->pos + header + XC_PAGE_SIZE * 2) > ob->size)
+        {
+            if (outbuf_flush(xch, ob, fd) < 0)
+            {
+                ERROR("Error when flushing outbuf intermediate");
+                return -1;
+            }
+        }
+
+        xc_remus_compbuf_set(xch, remus_ctx, ob->buf + ob->pos + header,
+                             ob->size - ob->pos - header);
+        rc = xc_remus_compress(xch, remus_ctx);
+        if (!rc)
+            break;
+        compbuf_len = xc_remus_get_compbuf_len(xch, remus_ctx);
+
+        if (outbuf_hardwrite(xch, ob, fd, &marker, sizeof(marker)) < 0)
+        {
+            PERROR("Error when writing marker (errno %d)", errno);
+            return -1;
+        }
+
+        if (outbuf_hardwrite(xch, ob, fd, &compbuf_len, sizeof(compbuf_len)) < 
0)
+        {
+            PERROR("Error when writing compbuf_len (errno %d)", errno);
+            return -1;
+        }
+
+        ob->pos += (size_t) compbuf_len;
+        if (!dobuf && outbuf_flush(xch, ob, fd) < 0)
+        {
+            ERROR("Error when writing compressed chunk");
+            return -1;
+        }
+    } while (rc != 0);
+
+    xc_remus_pagebuf_reset(xch, remus_ctx);
+    return 0;
+}
+
 struct time_stats {
     struct timeval wall;
     long long d0_cpu, d1_cpu;
@@ -866,11 +917,15 @@
 
     unsigned long mfn;
 
-    struct outbuf ob;
+    struct outbuf ob_pagebuf, ob_tailbuf, *ob = NULL;
     struct save_ctx _ctx;
     struct save_ctx *ctx = &_ctx;
     struct domain_info_context *dinfo = &ctx->dinfo;
 
+    /* Remus context */
+    void *remus_ctx = NULL;
+    int compression = (flags & XCFLAGS_REMUS_COMPRESS);
+
     int completed = 0;
 
     if ( hvm && !callbacks->switch_qemu_logdirty )
@@ -880,7 +935,7 @@
         return 1;
     }
 
-    outbuf_init(xch, &ob, OUTBUF_SIZE);
+    outbuf_init(xch, &ob_pagebuf, OUTBUF_SIZE);
 
     memset(ctx, 0, sizeof(*ctx));
 
@@ -968,6 +1023,16 @@
         }
     }
 
+    if ( compression )
+    {
+        if (!(remus_ctx = xc_remus_create_context(xch, dinfo->p2m_size)))
+        {
+            ERROR("Failed to create remus context");
+            goto out;
+        }
+        outbuf_init(xch, &ob_tailbuf, OUTBUF_SIZE/4);
+    }
+
     last_iter = !live;
 
     /* pretend we sent all the pages last iteration */
@@ -1031,6 +1096,15 @@
         goto out;
     }
 
+    /* Send the flags so that the receiver knows whether to expect
+     * compressed pages (remus) or just usual live migration pages.
+     */
+    if ( write_exact(io_fd, &flags, sizeof(uint32_t)) )
+    {
+        PERROR("write: flags");
+        goto out;
+    }
+
     if ( !hvm )
     {
         int err = 0;
@@ -1076,9 +1150,11 @@
     }
 
   copypages:
-#define wrexact(fd, buf, len) write_buffer(xch, last_iter, &ob, (fd), (buf), 
(len))
-#define wruncached(fd, live, buf, len) write_uncached(xch, last_iter, &ob, 
(fd), (buf), (len))
+#define wrexact(fd, buf, len) write_buffer(xch, last_iter, ob, (fd), (buf), 
(len))
+#define wruncached(fd, live, buf, len) write_uncached(xch, last_iter, ob, 
(fd), (buf), (len))
+#define wrcompressed(fd) write_compressed(xch, remus_ctx, last_iter, ob, (fd))
 
+    ob = &ob_pagebuf; /* Holds pfn_types, pages/compressed pages */
     /* Now write out each data page, canonicalising page tables as we go... */
     for ( ; ; )
     {
@@ -1321,7 +1397,7 @@
                 {
                     /* If the page is not a normal data page, write out any
                        run of pages we may have previously acumulated */
-                    if ( run )
+                    if ( !compression && run )
                     {
                         if ( wruncached(io_fd, live,
                                        (char*)region_base+(PAGE_SIZE*(j-run)), 
@@ -1356,7 +1432,26 @@
                         goto out;
                     }
 
-                    if ( wruncached(io_fd, live, page, PAGE_SIZE) != PAGE_SIZE 
)
+                    if (compression)
+                    {
+                        /* Add the pagetable page, raw into the page buffer */
+                        if (xc_remus_add_page(xch, remus_ctx, page, pfn, 1) < 
0)
+                        {
+                            /* We are out of buffer space to hold dirty pages.
+                             * Compress and flush the current buffer to make 
space.
+                             * --This is a corner case, that slows down 
checkpointing
+                             * as the compression happens while domain is 
suspended.
+                             * Happens seldom and if you find this occuring 
frequently,
+                             * increase the PAGE_BUFFER_SIZE in xc_remus.c.
+                             */
+                            if (wrcompressed(io_fd) < 0)
+                            {
+                                ERROR("Error when writing compressed data 
(4b)\n");
+                                goto out;
+                            }
+                        }
+                    }
+                    else if ( wruncached(io_fd, live, page, PAGE_SIZE) != 
PAGE_SIZE )
                     {
                         PERROR("Error when writing to state file (4b)"
                               " (errno %d)", errno);
@@ -1366,11 +1461,36 @@
                 else
                 {
                     /* We have a normal page: accumulate it for writing. */
-                    run++;
+                    if (compression)
+                    {
+                        /* For remus/compression, accumulate the page in the
+                         * page buffer, to be compressed later.
+                         */
+                        if (xc_remus_add_page(xch, remus_ctx, spage, pfn,
+                                              !completed) < 0)
+                        {
+                            if (wrcompressed(io_fd) < 0)
+                            {
+                                ERROR("Error when writing compressed data 
(4c)\n");
+                                goto out;
+                            }
+                        }
+                    }
+                    else
+                        run++;
                 }
             } /* end of the write out for this batch */
 
-            if ( run )
+            /* Until last_iter, flush out data without buffering */
+            if (compression && !completed)
+            {
+                if (wrcompressed(io_fd) < 0)
+                {
+                    ERROR("Error when writing compressed data (4d)\n");
+                    goto out;
+                }
+            }
+            else if ( run )
             {
                 /* write out the last accumulated run of pages */
                 if ( wruncached(io_fd, live,
@@ -1474,6 +1594,15 @@
 
     DPRINTF("All memory is saved\n");
 
+    /* After last_iter, buffer the rest of pagebuf & tailbuf data into a
+     * separate output buffer and flush it after the compressed page chunks.
+     */
+    if (compression)
+    {
+        ob = &ob_tailbuf;
+        ob->pos = 0;
+    }
+
     {
         struct {
             int id;
@@ -1817,8 +1946,29 @@
     if ( !rc && callbacks->postcopy )
         callbacks->postcopy(callbacks->data);
 
+    /* guest has been resumed. Now we can compress data
+     * at our own pace.
+     */
+    if (!rc && compression)
+    {
+        ob = &ob_pagebuf;
+        if (wrcompressed(io_fd) < 0)
+        {
+            ERROR("Error when writing compressed data, after postcopy\n");
+            rc = 1;
+            goto out;
+        }
+        /* Copy the tailbuf data into the main outbuf */
+        if ( wrexact(io_fd, ob_tailbuf.buf, ob_tailbuf.pos) )
+        {
+            rc = 1;
+            PERROR("Error when copying tailbuf into outbuf");
+            goto out;
+        }
+    }
+
     /* Flush last write and discard cache for file. */
-    if ( outbuf_flush(xch, &ob, io_fd) < 0 ) {
+    if ( outbuf_flush(xch, ob, io_fd) < 0 ) {
         PERROR("Error when flushing output buffer");
         rc = 1;
     }
@@ -1866,6 +2016,9 @@
             DPRINTF("Warning - couldn't disable qemu log-dirty mode");
     }
 
+    if (remus_ctx)
+        xc_remus_free_context(xch, remus_ctx);
+
     if ( live_shinfo )
         munmap(live_shinfo, PAGE_SIZE);
 
diff -r 23c068b10923 -r 5dbafaf24c70 tools/libxc/xc_remus.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_remus.c    Thu Jun 16 08:28:15 2011 -0700
@@ -0,0 +1,465 @@
+/******************************************************************************
+ * xc_remus.c
+ *
+ * Checkpoint Compression using Page Delta Algorithm.
+ * - A LRU cache of recently dirtied guest pages is maintained.
+ * - For each dirty guest page in the checkpoint, if a previous version of the
+ * page exists in the cache, XOR both pages and send the non-zero sections
+ * to the receiver. The cache is then updated with the newer copy of guest 
page.
+ * - The receiver will XOR the non-zero sections against its copy of the guest
+ * page, thereby bringing the guest page up-to-date with the sender side.
+ *
+ * Copyright (c) 2011 Shriram Rajagopalan (rshriram@xxxxxxxxx).
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  
USA
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <inttypes.h>
+#include <errno.h>
+#include "xenctrl.h"
+#include "xg_save_restore.h"
+#include "xg_private.h"
+
+/* Already defined in xc_dom.h, but it doesnt have
+ * a conditional include macro. So, redifine here.
+ */
+#define INVALID_P2M_ENTRY   ((xen_pfn_t)-1)
+
+/* Page Cache for Delta Compression*/
+#define DELTA_CACHE_SIZE (XC_PAGE_SIZE * 8192)
+
+struct cache_page;
+struct cache_page
+{
+    char *page;
+    unsigned long pfn;
+    struct cache_page *next;
+    struct cache_page *prev;
+};
+
+/* After XORing the older and newer version, the non-zero sections
+ * are sent as a sequence of tuples <2-byte-offset,4-byte-data> called markers.
+ * - Each page begins with a BEGIN marker (for synchronization).
+ * - If the result of XOR is a page filled with zeros (i.e no difference 
between
+ *   old and new page, then only the BEGIN marker is sent for the page.
+ * - If the two versions of the page differ by more than 50%, the page is sent
+ *   as is, with a FULLPAGE marker, without a BEGIN marker.
+ *
+ * About the choice of data types: typical page size is 4K. Each marker is
+ * 6 bytes long, with a 4-byte data word (1024 data words per page). If 50% of
+ * the page changed, then we would be transmitting ~3000 bytes (worst case).
+ * - If we use 8-byte data word (10-byte marker), we end up sending
+ *   ~5000 bytes (>4096).
+ */
+
+typedef unsigned int data_t;
+typedef short int moff_t;
+
+#define BEGIN -100
+#define FULLPAGE -101
+struct marker
+{
+    moff_t off;
+    data_t val;
+} __attribute__((packed));
+
+static struct marker begin_page = { BEGIN, -1};
+static struct marker full_page = {FULLPAGE, -1};
+
+/* Internal page buffer to hold dirty pages of a checkpoint,
+ * to be compressed after the domain is resumed for execution.
+ */
+#define PAGE_BUFFER_SIZE (XC_PAGE_SIZE * 8192)
+
+struct remus_context
+{
+    /* compression buffer - holds compressed data */
+    char *compbuf;
+    unsigned long compbuf_size;
+    unsigned long compbuf_pos;
+
+    /* Page buffer to hold pages to be compressed */
+    char *inputbuf;
+    /* pfns of pages to be compressed */
+    unsigned long *sendbuf_pfns;
+    unsigned int pfns_index;
+    unsigned int pfns_iterator;
+
+    /* Compression Cache (LRU) */
+    char *cache_base;
+    struct cache_page **pfn2cache;
+    struct cache_page *cache2pfn;
+    struct cache_page *page_list_head;
+    struct cache_page *page_list_tail;
+};
+
+static
+int __compress(xc_interface *xch, struct remus_context *ctx, char *srcpage,
+               char *copypage, int israw)
+{
+    struct marker *dest = (struct marker *)(ctx->compbuf+ ctx->compbuf_pos);
+    moff_t off;
+    int j=0, rc = 0;
+    data_t *src, *copy;
+
+    src = (data_t*)srcpage;
+    copy = (data_t*)copypage;
+
+    if ((ctx->compbuf_pos + sizeof(struct marker)) > ctx->compbuf_size)
+        return -1;
+
+    if (!israw && copypage)
+    {
+        dest[j++] = begin_page;
+        for (off = 0; off < XC_PAGE_SIZE/sizeof(data_t); off++)
+        {
+            if (copy[off] != src[off])
+            {
+                if ((ctx->compbuf_pos + (j + 1) *
+                     sizeof(struct marker)) > ctx->compbuf_size)
+                    return -1;
+
+                copy[off] = src[off];
+                dest[j].off = off;
+                dest[j].val = src[off];
+                j++;
+            }
+            if (j > 500) /* more than 50% of page changed */
+                goto FullPage;
+        }
+        rc = (j * sizeof(struct marker));
+    }
+    else
+    {
+    FullPage:
+        if ( (ctx->compbuf_pos + sizeof(struct marker)
+              + XC_PAGE_SIZE) > ctx->compbuf_size)
+            return -1;
+
+        dest[0] = full_page;
+        if (copypage)
+            memcpy(copypage, srcpage, XC_PAGE_SIZE);
+        memcpy((char *)&dest[1], srcpage, XC_PAGE_SIZE);
+        rc = XC_PAGE_SIZE + sizeof(struct marker);
+    }
+    ctx->compbuf_pos += rc;
+
+    return rc;
+}
+
+static
+int __uncompress(xc_interface *xch, char *destpage, unsigned long *compbuf_pos,
+                 char *compbuf, unsigned long compbuf_size)
+{
+    struct marker *src = (struct marker *)(compbuf + *compbuf_pos);
+    int i;
+    data_t *dest = (data_t *)destpage;
+
+    if (*compbuf_pos >= compbuf_size)
+    {
+        ERROR("Out of bounds exception: read ptr:%lu, bufsize = %lu\n",
+              *compbuf_pos, compbuf_size);
+        return -1;
+    }
+
+    if (src[0].off == BEGIN)
+    {
+        *compbuf_pos += sizeof(struct marker);
+        for (i = 1; (*compbuf_pos < compbuf_size) && (src[i].off >= 0);
+             i++, *compbuf_pos += sizeof(struct marker))
+            dest[src[i].off] = src[i].val;
+    }
+    else if (src[0].off == FULLPAGE)
+    {
+        *compbuf_pos += sizeof(struct marker) + XC_PAGE_SIZE;
+        memcpy(destpage, (char *)&src[1], XC_PAGE_SIZE);
+    }
+    else
+    {
+        ERROR("Invalid marker %d in compression buffer at %u\n",
+              src[0].off, *compbuf_pos);
+        return -1;
+    }
+    return 0;
+}
+
+static
+char *get_cache_page(struct remus_context *ctx, unsigned long pfn,
+                     int *israw)
+{
+    struct cache_page *item = NULL;
+
+start:
+    item = ctx->pfn2cache[pfn];
+    /*         if requested item is in cache move to head of list */
+    if (item)
+    {
+        /* item already at head of list */
+        if (item == ctx->page_list_head)
+            goto end;
+        if (item == ctx->page_list_tail)
+        {
+            /* item at tail of list. */
+            ctx->page_list_tail = item->prev;
+            (ctx->page_list_tail)->next = NULL;
+        }
+        else
+        {
+            /* item in middle of list */
+            item->prev->next = item->next;
+            item->next->prev = item->prev;
+        }
+
+        item->prev = NULL;
+        item->next = ctx->page_list_head;
+        (ctx->page_list_head)->prev = item;
+        ctx->page_list_head = item;
+        goto end;
+    }
+    else
+    {
+        *israw = 1;
+        /* Add new item to list. If list is full,
+         * evict a page from tail of list.
+         */
+        if ((ctx->page_list_tail)->pfn != INVALID_P2M_ENTRY)
+            ctx->pfn2cache[(ctx->page_list_tail)->pfn] = NULL;
+        (ctx->page_list_tail)->pfn = pfn;
+        ctx->pfn2cache[pfn] = ctx->page_list_tail;
+
+        /* Will have same effect as cache hit at tail of list */
+        goto start;
+    }
+end:
+    return (ctx->page_list_head)->page;
+}
+
+/* Remove pagetable pages from cache and move to tail, as free pages */
+static
+void invalidate_cache_page(struct remus_context *ctx, unsigned long pfn)
+{
+    struct cache_page *item = NULL;
+
+    item = ctx->pfn2cache[pfn];
+    if (item)
+    {
+        /* item at head of list */
+        if (item == ctx->page_list_head)
+        {
+            ctx->page_list_head = (ctx->page_list_head)->next;
+            (ctx->page_list_head)->prev = NULL;
+        }
+        else if (item == ctx->page_list_tail)
+        {
+            /* item already at tail of list. */
+            goto end;
+        }
+        else
+        {
+            /* item in middle of list */
+            item->prev->next = item->next;
+            item->next->prev = item->prev;
+        }
+        item->next = NULL;
+        item->prev = ctx->page_list_tail;
+        (ctx->page_list_tail)->next = item;
+        ctx->page_list_tail = item;
+    end:
+        ctx->pfn2cache[pfn] = NULL;
+        (ctx->page_list_tail)->pfn = INVALID_P2M_ENTRY;
+    }
+}
+
+int xc_remus_add_page(xc_interface *xch, void *remus_ctx, char *page,
+                      unsigned long pfn, int israw)
+{
+    struct remus_context *ctx = (struct remus_context *)remus_ctx;
+
+    /* pagetable page */
+    if (israw)
+        invalidate_cache_page(ctx, pfn);
+    ctx->sendbuf_pfns[ctx->pfns_index] = israw? INVALID_P2M_ENTRY : pfn;
+    memcpy(ctx->inputbuf + ctx->pfns_index * XC_PAGE_SIZE, page, XC_PAGE_SIZE);
+    ctx->pfns_index++;
+
+    /* check if we have run out of space. If so,
+     * we need to synchronously compress the pages and flush them out
+     */
+    if (ctx->pfns_index == NRPAGES(PAGE_BUFFER_SIZE))
+        return -1;
+    return 0;
+}
+
+int xc_remus_compress(xc_interface *xch, void *remus_ctx)
+{
+    struct remus_context *ctx = (struct remus_context *)remus_ctx;
+    char *cache_copy = NULL;
+    int israw;
+
+    if (!ctx->pfns_index || (ctx->pfns_iterator == ctx->pfns_index))
+        return 0;
+
+    for (; ctx->pfns_iterator < ctx->pfns_index; ctx->pfns_iterator++)
+    {
+        israw = 0;
+        cache_copy = NULL;
+        if (ctx->sendbuf_pfns[ctx->pfns_iterator] == INVALID_P2M_ENTRY)
+            israw = 1;
+        else
+            cache_copy = get_cache_page(ctx, 
ctx->sendbuf_pfns[ctx->pfns_iterator],
+                                        &israw);
+
+        /* Out of space in outbuf! flush and come back */
+        if (__compress(xch, ctx, ctx->inputbuf + ctx->pfns_iterator * 
XC_PAGE_SIZE,
+                       cache_copy, israw) < 0)
+            return -1;
+    }
+
+    return 1;
+}
+
+inline
+unsigned long xc_remus_get_compbuf_len(xc_interface *xch, void *remus_ctx)
+{
+    struct remus_context *ctx = (struct remus_context *)remus_ctx;
+    return ctx->compbuf_pos;
+}
+
+inline
+void xc_remus_compbuf_set(xc_interface *xch, void *remus_ctx,
+                          char *compbuf, unsigned long compbuf_size)
+{
+    struct remus_context *ctx = (struct remus_context *)remus_ctx;
+    ctx->compbuf_pos = 0;
+    ctx->compbuf = compbuf;
+    ctx->compbuf_size = compbuf_size;
+}
+
+inline
+void xc_remus_pagebuf_reset(xc_interface *xch, void *remus_ctx)
+{
+    struct remus_context *ctx = (struct remus_context *)remus_ctx;
+    ctx->pfns_index = ctx->pfns_iterator = 0;
+}
+
+int xc_remus_uncompress(xc_interface *xch, char *compbuf,
+                        unsigned long compbuf_size,
+                        unsigned long *compbuf_pos, char *dest)
+{
+    return __uncompress(xch, dest, compbuf_pos, compbuf, compbuf_size);
+}
+
+void xc_remus_free_context(xc_interface *xch, void *ctx)
+{
+    struct remus_context *remus_ctx = (struct remus_context *)ctx;
+
+    if (!remus_ctx) return;
+
+    if (remus_ctx->inputbuf)
+        free(remus_ctx->inputbuf);
+    if (remus_ctx->sendbuf_pfns)
+        free(remus_ctx->sendbuf_pfns);
+    if (remus_ctx->cache_base)
+        free(remus_ctx->cache_base);
+    if (remus_ctx->pfn2cache)
+        free(remus_ctx->pfn2cache);
+    if (remus_ctx->cache2pfn)
+        free(remus_ctx->cache2pfn);
+    free(remus_ctx);
+}
+
+void *xc_remus_create_context(xc_interface *xch, unsigned long p2m_size)
+{
+    unsigned long i;
+    struct remus_context *remus_ctx = NULL;
+    unsigned long num_cache_pages = DELTA_CACHE_SIZE/XC_PAGE_SIZE;
+
+    remus_ctx = malloc(sizeof(struct remus_context));
+    if (!remus_ctx)
+    {
+        ERROR("Failed to allocate remus_ctx\n");
+        goto error;
+    }
+    memset(remus_ctx, 0, sizeof(struct remus_context));
+
+    if (posix_memalign((void **)&remus_ctx->inputbuf,
+                       XC_PAGE_SIZE, PAGE_BUFFER_SIZE))
+    {
+        ERROR("Failed to allocate page buffer\n");
+        goto error;
+    }
+
+    remus_ctx->sendbuf_pfns = malloc(NRPAGES(PAGE_BUFFER_SIZE) *
+                                     sizeof(unsigned long));
+    if (!remus_ctx->sendbuf_pfns)
+    {
+        ERROR("Could not alloc sendbuf_pfns\n");
+        goto error;
+    }
+    memset(remus_ctx->sendbuf_pfns, -1,
+           NRPAGES(PAGE_BUFFER_SIZE) * sizeof(unsigned long));
+
+    if (posix_memalign((void **)&remus_ctx->cache_base,
+                       XC_PAGE_SIZE, DELTA_CACHE_SIZE))
+    {
+        ERROR("Failed to allocate delta cache\n");
+        goto error;
+    }
+
+    remus_ctx->pfn2cache = calloc(p2m_size, sizeof(struct cache_page *));
+    if (!remus_ctx->pfn2cache)
+    {
+        ERROR("Could not alloc pfn2cache map\n");
+        goto error;
+    }
+
+    remus_ctx->cache2pfn = malloc(num_cache_pages * sizeof(struct cache_page));
+    if (!remus_ctx->cache2pfn)
+    {
+        ERROR("Could not alloc cache2pfn map\n");
+        goto error;
+    }
+
+    for (i = 0; i < num_cache_pages; i++)
+    {
+        remus_ctx->cache2pfn[i].pfn = INVALID_P2M_ENTRY;
+        remus_ctx->cache2pfn[i].page = remus_ctx->cache_base + i * 
XC_PAGE_SIZE;
+        remus_ctx->cache2pfn[i].prev = (i == 0)? NULL : 
&(remus_ctx->cache2pfn[i - 1]);
+        remus_ctx->cache2pfn[i].next = ((i+1) == num_cache_pages)? NULL :
+            &(remus_ctx->cache2pfn[i + 1]);
+    }
+    remus_ctx->page_list_head = &(remus_ctx->cache2pfn[0]);
+    remus_ctx->page_list_tail = &(remus_ctx->cache2pfn[num_cache_pages -1]);
+
+    return (void *)remus_ctx;
+error:
+    xc_remus_free_context(xch, remus_ctx);
+    return NULL;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 23c068b10923 -r 5dbafaf24c70 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Wed Jun 15 16:16:41 2011 +0100
+++ b/tools/libxc/xenctrl.h     Thu Jun 16 08:28:15 2011 -0700
@@ -1820,4 +1820,58 @@
                         int verbose);
 /* Useful for callers who also use libelf. */
 
+/**
+ * Remus Checkpoint Compression
+ */
+void *xc_remus_create_context(xc_interface *xch, unsigned long p2m_size);
+void xc_remus_free_context(xc_interface *xch, void *remus_ctx);
+
+/**
+ * Add a page to remus buffer, to be compressed later.
+ * returns -1 if there is no space in buffer.
+ */
+int xc_remus_add_page(xc_interface *xch, void *remus_ctx, char *page,
+                     unsigned long pfn, int israw);
+
+/**
+ * Should be called before compressing the pages. Caller supplies a
+ * compression buffer compbuf of size compbuf_size.
+ */
+void xc_remus_compbuf_set(xc_interface *xch, void *remus_ctx, char *compbuf,
+                           unsigned long compbuf_size);
+
+/**
+ * Delta compress pages in the remus buffer and inserts the
+ * compressed data into the previously supplied compression buffer, compbuf.
+ * After compression, the page is copied to the internal LRU cache.
+ *
+ * This function compresses as many pages as possible into the
+ * supplied compression buffer. It maintains an internal iterator to
+ * keep track of pages in the input buffer that are yet to be compressed.
+ *
+ * returns -1 if the compression buffer has run out of space.
+ * returns 1 on success.
+ * returns 0 if no more pages are left to be compressed.
+ */
+int xc_remus_compress(xc_interface *xch, void *remus_ctx);
+
+/**
+ * Returns the exact length of data, in the compression buffer.
+ */
+unsigned long xc_remus_get_compbuf_len(xc_interface *xch, void *remus_ctx);
+
+/**
+ * Resets the internal page buffer that holds dirty pages before compression.
+ * Also resets the iterators.
+ */
+void xc_remus_pagebuf_reset(xc_interface *xch, void *remus_ctx);
+
+/**
+ * Caller must supply the compression buffer (compbuf), its size 
(compbuf_size) and
+ * an reference to index variable (compbuf_pos) that is used internally.
+ * Each call pulls out one page from the compressed chunk and copies it to 
dest.
+ */
+int xc_remus_uncompress(xc_interface *xch, char *compbuf, unsigned long 
compbuf_size,
+                       unsigned long *compbuf_pos, char *dest);
+
 #endif /* XENCTRL_H */
diff -r 23c068b10923 -r 5dbafaf24c70 tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h    Wed Jun 15 16:16:41 2011 +0100
+++ b/tools/libxc/xenguest.h    Thu Jun 16 08:28:15 2011 -0700
@@ -29,6 +29,7 @@
 #define XCFLAGS_STDVGA    8
 #define X86_64_B_SIZE   64 
 #define X86_32_B_SIZE   32
+#define XCFLAGS_REMUS_COMPRESS    16
 
 /* callbacks provided by xc_domain_save */
 struct save_callbacks {
diff -r 23c068b10923 -r 5dbafaf24c70 tools/libxc/xg_save_restore.h
--- a/tools/libxc/xg_save_restore.h     Wed Jun 15 16:16:41 2011 +0100
+++ b/tools/libxc/xg_save_restore.h     Thu Jun 16 08:28:15 2011 -0700
@@ -134,6 +134,7 @@
 #define XC_SAVE_ID_HVM_CONSOLE_PFN    -8 /* (HVM-only) */
 #define XC_SAVE_ID_LAST_CHECKPOINT    -9 /* Commit to restoring after 
completion of current iteration. */
 #define XC_SAVE_ID_HVM_ACPI_IOPORTS_LOCATION -10
+#define XC_SAVE_ID_COMPRESSED_DATA    -11 /* Marker to indicate arrival of 
compressed data */
 
 /*
 ** We process save/restore/migrate in batches of pages; the below

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
WARNING - OLD ARCHIVES

xen-devel

[Xen-devel] [PATCH 1 of 2] tools/libxc: Remus Checkpoint Compression