# HG changeset patch
# User Shriram Rajagopalan <rshriram@xxxxxxxxx>
# Date 1308455519 25200
# Node ID b4974a38d10199c1e2b8fd3cf36d91c03ac5eeae
# Parent 23c068b109236657ededa3e3b7f180346a5cd9f9
tools/libxc: Remus Checkpoint Compression
Instead of sending dirty pages of guest memory as-is, use a simple compression
algorithm that sends a RLE-encoded XOR of the page against its last sent copy.
A small LRU cache is used to hold recently dirtied pages. Pagetable pages are
sent as-is, as they are canonicalized at sender side and uncanonicalized at
receiver.
Signed-off-by: Shriram Rajagopalan <rshriram@xxxxxxxxx>
diff -r 23c068b10923 -r b4974a38d101 tools/libxc/Makefile
--- a/tools/libxc/Makefile Wed Jun 15 16:16:41 2011 +0100
+++ b/tools/libxc/Makefile Sat Jun 18 20:51:59 2011 -0700
@@ -42,7 +42,7 @@
GUEST_SRCS-y :=
GUEST_SRCS-y += xg_private.c xc_suspend.c
GUEST_SRCS-$(CONFIG_MIGRATE) += xc_domain_restore.c xc_domain_save.c
-GUEST_SRCS-$(CONFIG_MIGRATE) += xc_offline_page.c
+GUEST_SRCS-$(CONFIG_MIGRATE) += xc_offline_page.c xc_remus.c
GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c
vpath %.c ../../xen/common/libelf
diff -r 23c068b10923 -r b4974a38d101 tools/libxc/xc_domain_restore.c
--- a/tools/libxc/xc_domain_restore.c Wed Jun 15 16:16:41 2011 +0100
+++ b/tools/libxc/xc_domain_restore.c Sat Jun 18 20:51:59 2011 -0700
@@ -43,6 +43,7 @@
xen_pfn_t *p2m_batch; /* A table of P2M mappings in the current region. */
int completed; /* Set when a consistent image is available */
int last_checkpoint; /* Set when we should commit to the current
checkpoint when it completes. */
+ int compression; /* Set when sender signals that pages would be sent
compressed (for Remus) */
struct domain_info_context dinfo;
};
@@ -663,6 +664,10 @@
/* pages is of length nr_physpages, pfn_types is of length nr_pages */
unsigned int nr_physpages, nr_pages;
+ /* remus compression state */
+ int compression;
+ unsigned long compbuf_pos, compbuf_size;
+
/* Types of the pfns in the current region */
unsigned long* pfn_types;
@@ -700,6 +705,7 @@
{
int count, countpages, oldcount, i;
void* ptmp;
+ unsigned long compbuf_size;
if ( RDEXACT(fd, &count, sizeof(count)) )
{
@@ -809,6 +815,46 @@
}
return pagebuf_get_one(xch, ctx, buf, fd, dom);
+ case XC_SAVE_ID_ENABLE_COMPRESSION:
+ /* We cannot set compression flag directly in pagebuf structure,
+ * since this pagebuf still has uncompressed pages that are yet to
+ * be applied. We enable the compression field in pagebuf structure
+ * after receiving the first tailbuf.
+ */
+ ctx->compression = 1;
+ // DPRINTF("compression flag received");
+ return pagebuf_get_one(xch, ctx, buf, fd, dom);
+
+ case XC_SAVE_ID_COMPRESSED_DATA:
+
+ /* read the length of compressed chunk coming in */
+ if ( RDEXACT(fd, &compbuf_size, sizeof(unsigned long)) )
+ {
+ PERROR("Error when reading compbuf_size");
+ return -1;
+ }
+ if (!compbuf_size) return 1;
+
+ buf->compbuf_size += compbuf_size;
+ if (!buf->pages) {
+ if (!(buf->pages = malloc(buf->compbuf_size))) {
+ ERROR("Could not allocate compression buffer");
+ return -1;
+ }
+ } else {
+ if (!(ptmp = realloc(buf->pages, buf->compbuf_size))) {
+ ERROR("Could not reallocate compression buffer");
+ return -1;
+ }
+ buf->pages = ptmp;
+ }
+ if ( RDEXACT(fd, buf->pages + (buf->compbuf_size - compbuf_size),
+ compbuf_size) ) {
+ PERROR("Error when reading compression buffer");
+ return -1;
+ }
+ return compbuf_size;
+
default:
if ( (count > MAX_BATCH_SIZE) || (count < 0) ) {
ERROR("Max batch size exceeded (%d). Giving up.", count);
@@ -846,6 +892,13 @@
if (!countpages)
return count;
+ /* If Remus Checkpoint Compression is turned on, we only receive the
+ * pfn lists now. The compressed pages will come in later, following a
+ * <XC_SAVE_ID_COMPRESSED_DATA, compressedChunkSize> tuple.
+ */
+ if (buf->compression)
+ return pagebuf_get_one(xch, ctx, buf, fd, dom);
+
oldcount = buf->nr_physpages;
buf->nr_physpages += countpages;
if (!buf->pages) {
@@ -874,6 +927,7 @@
int rc;
buf->nr_physpages = buf->nr_pages = 0;
+ buf->compbuf_pos = buf->compbuf_size = 0;
do {
rc = pagebuf_get_one(xch, ctx, buf, fd, dom);
@@ -1091,7 +1145,19 @@
/* In verify mode, we use a copy; otherwise we work in place */
page = pagebuf->verify ? (void *)buf : (region_base + i*PAGE_SIZE);
- memcpy(page, pagebuf->pages + (curpage + curbatch) * PAGE_SIZE,
PAGE_SIZE);
+ /* Remus - page decompression */
+ if (pagebuf->compression)
+ {
+ if (xc_remus_uncompress(xch, pagebuf->pages, pagebuf->compbuf_size,
+ &pagebuf->compbuf_pos, (char *)page))
+ {
+ ERROR("Failed to uncompress page (pfn=%lx)\n", pfn);
+ goto err_mapped;
+ }
+ }
+ else
+ memcpy(page, pagebuf->pages + (curpage + curbatch) * PAGE_SIZE,
+ PAGE_SIZE);
pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
@@ -1353,6 +1419,7 @@
if ( !ctx->completed ) {
pagebuf.nr_physpages = pagebuf.nr_pages = 0;
+ pagebuf.compbuf_pos = pagebuf.compbuf_size = 0;
if ( pagebuf_get_one(xch, ctx, &pagebuf, io_fd, dom) < 0 ) {
PERROR("Error when reading batch");
goto out;
@@ -1395,6 +1462,7 @@
}
pagebuf.nr_physpages = pagebuf.nr_pages = 0;
+ pagebuf.compbuf_pos = pagebuf.compbuf_size = 0;
n += j; /* crude stats */
@@ -1438,6 +1506,13 @@
*/
if ( !ctx->last_checkpoint )
fcntl(io_fd, F_SETFL, orig_io_fd_flags | O_NONBLOCK);
+
+ /*
+ * If sender had sent enable compression flag, switch to compressed
+ * checkpoints mode once the first checkpoint is received.
+ */
+ if (ctx->compression)
+ pagebuf.compression = 1;
}
if (pagebuf.acpi_ioport_location == 1) {
diff -r 23c068b10923 -r b4974a38d101 tools/libxc/xc_domain_save.c
--- a/tools/libxc/xc_domain_save.c Wed Jun 15 16:16:41 2011 +0100
+++ b/tools/libxc/xc_domain_save.c Sat Jun 18 20:51:59 2011 -0700
@@ -269,6 +269,57 @@
return noncached_write(xch, ob, fd, buf, len);
}
+static int write_compressed(xc_interface *xch, void *remus_ctx, int dobuf,
+ struct outbuf* ob, int fd)
+{
+ int rc = 0;
+ int header = sizeof(int) + sizeof(unsigned long);
+ int marker = XC_SAVE_ID_COMPRESSED_DATA;
+ unsigned long compbuf_len = 0;
+
+ do
+ {
+ /* check for available space (atleast 8k) */
+ if ((ob->pos + header + XC_PAGE_SIZE * 2) > ob->size)
+ {
+ if (outbuf_flush(xch, ob, fd) < 0)
+ {
+ ERROR("Error when flushing outbuf intermediate");
+ return -1;
+ }
+ }
+
+ xc_remus_compbuf_set(xch, remus_ctx, ob->buf + ob->pos + header,
+ ob->size - ob->pos - header);
+ rc = xc_remus_compress(xch, remus_ctx);
+ if (!rc)
+ break;
+ compbuf_len = xc_remus_get_compbuf_len(xch, remus_ctx);
+
+ if (outbuf_hardwrite(xch, ob, fd, &marker, sizeof(marker)) < 0)
+ {
+ PERROR("Error when writing marker (errno %d)", errno);
+ return -1;
+ }
+
+ if (outbuf_hardwrite(xch, ob, fd, &compbuf_len, sizeof(compbuf_len)) <
0)
+ {
+ PERROR("Error when writing compbuf_len (errno %d)", errno);
+ return -1;
+ }
+
+ ob->pos += (size_t) compbuf_len;
+ if (!dobuf && outbuf_flush(xch, ob, fd) < 0)
+ {
+ ERROR("Error when writing compressed chunk");
+ return -1;
+ }
+ } while (rc != 0);
+
+ xc_remus_pagebuf_reset(xch, remus_ctx);
+ return 0;
+}
+
struct time_stats {
struct timeval wall;
long long d0_cpu, d1_cpu;
@@ -866,11 +917,19 @@
unsigned long mfn;
- struct outbuf ob;
+ struct outbuf ob_pagebuf, ob_tailbuf, *ob = NULL;
struct save_ctx _ctx;
struct save_ctx *ctx = &_ctx;
struct domain_info_context *dinfo = &ctx->dinfo;
+ /* Remus context */
+ void *remus_ctx = NULL;
+ /* Even if XCFLAGS_REMUS_COMPRESS is set, we enable compression only
+ * after sending XC_SAVE_ID_ENABLE_COMPRESSION and the tailbuf for
+ * first time.
+ */
+ int compression = 0;
+
int completed = 0;
if ( hvm && !callbacks->switch_qemu_logdirty )
@@ -880,7 +939,7 @@
return 1;
}
- outbuf_init(xch, &ob, OUTBUF_SIZE);
+ outbuf_init(xch, &ob_pagebuf, OUTBUF_SIZE);
memset(ctx, 0, sizeof(*ctx));
@@ -968,6 +1027,16 @@
}
}
+ if ( flags & XCFLAGS_REMUS_COMPRESS )
+ {
+ if (!(remus_ctx = xc_remus_create_context(xch, dinfo->p2m_size)))
+ {
+ ERROR("Failed to create remus context");
+ goto out;
+ }
+ outbuf_init(xch, &ob_tailbuf, OUTBUF_SIZE/4);
+ }
+
last_iter = !live;
/* pretend we sent all the pages last iteration */
@@ -1076,9 +1145,11 @@
}
copypages:
-#define wrexact(fd, buf, len) write_buffer(xch, last_iter, &ob, (fd), (buf),
(len))
-#define wruncached(fd, live, buf, len) write_uncached(xch, last_iter, &ob,
(fd), (buf), (len))
+#define wrexact(fd, buf, len) write_buffer(xch, last_iter, ob, (fd), (buf),
(len))
+#define wruncached(fd, live, buf, len) write_uncached(xch, last_iter, ob,
(fd), (buf), (len))
+#define wrcompressed(fd) write_compressed(xch, remus_ctx, last_iter, ob, (fd))
+ ob = &ob_pagebuf; /* Holds pfn_types, pages/compressed pages */
/* Now write out each data page, canonicalising page tables as we go... */
for ( ; ; )
{
@@ -1321,7 +1392,7 @@
{
/* If the page is not a normal data page, write out any
run of pages we may have previously acumulated */
- if ( run )
+ if ( !compression && run )
{
if ( wruncached(io_fd, live,
(char*)region_base+(PAGE_SIZE*(j-run)),
@@ -1356,7 +1427,32 @@
goto out;
}
- if ( wruncached(io_fd, live, page, PAGE_SIZE) != PAGE_SIZE
)
+ if (compression)
+ {
+ /* Mark pagetable page to be sent uncompressed */
+ if (xc_remus_add_page(xch, remus_ctx, page,
+ pfn, 1 /* raw page */) < 0)
+ {
+ /*
+ * We are out of buffer space to hold dirty
+ * pages. Compress and flush the current buffer
+ * to make space. This is a corner case, that
+ * slows down checkpointing as the compression
+ * happens while domain is suspended. Happens
+ * seldom and if you find this occuring
+ * frequently, increase the PAGE_BUFFER_SIZE
+ * in xc_remus.c.
+ */
+ if (wrcompressed(io_fd) < 0)
+ {
+ ERROR("Error when writing compressed"
+ " data (4b)\n");
+ goto out;
+ }
+ }
+ }
+ else if ( wruncached(io_fd, live, page,
+ PAGE_SIZE) != PAGE_SIZE )
{
PERROR("Error when writing to state file (4b)"
" (errno %d)", errno);
@@ -1366,7 +1462,24 @@
else
{
/* We have a normal page: accumulate it for writing. */
- run++;
+ if (compression)
+ {
+ /* For remus/compression, accumulate the page in the
+ * page buffer, to be compressed later.
+ */
+ if (xc_remus_add_page(xch, remus_ctx, spage,
+ pfn, 0 /* not raw page */) < 0)
+ {
+ if (wrcompressed(io_fd) < 0)
+ {
+ ERROR("Error when writing compressed"
+ " data (4c)\n");
+ goto out;
+ }
+ }
+ }
+ else
+ run++;
}
} /* end of the write out for this batch */
@@ -1474,6 +1587,15 @@
DPRINTF("All memory is saved\n");
+ /* After last_iter, buffer the rest of pagebuf & tailbuf data into a
+ * separate output buffer and flush it after the compressed page chunks.
+ */
+ if (compression)
+ {
+ ob = &ob_tailbuf;
+ ob->pos = 0;
+ }
+
{
struct {
int id;
@@ -1573,6 +1695,25 @@
}
}
+ /* Enable compression logic on both sides by sending this
+ * one time marker.
+ * NOTE: We could have simplified this procedure by sending
+ * the enable/disable compression flag before the beginning of
+ * the main for loop. But this would break compatibility for
+ * live migration code, with older versions of xen. So we have
+ * to enable it after the last_iter, when the XC_SAVE_ID_*
+ * elements are sent.
+ */
+ if (!compression && (flags & XCFLAGS_REMUS_COMPRESS))
+ {
+ i = XC_SAVE_ID_ENABLE_COMPRESSION;
+ if ( wrexact(io_fd, &i, sizeof(int)) )
+ {
+ PERROR("Error when writing enable_compression marker");
+ goto out;
+ }
+ }
+
/* Zero terminate */
i = 0;
if ( wrexact(io_fd, &i, sizeof(int)) )
@@ -1817,14 +1958,38 @@
if ( !rc && callbacks->postcopy )
callbacks->postcopy(callbacks->data);
+ /* guest has been resumed. Now we can compress data
+ * at our own pace.
+ */
+ if (!rc && compression)
+ {
+ ob = &ob_pagebuf;
+ if (wrcompressed(io_fd) < 0)
+ {
+ ERROR("Error when writing compressed data, after postcopy\n");
+ rc = 1;
+ goto out;
+ }
+ /* Copy the tailbuf data into the main outbuf */
+ if ( wrexact(io_fd, ob_tailbuf.buf, ob_tailbuf.pos) )
+ {
+ rc = 1;
+ PERROR("Error when copying tailbuf into outbuf");
+ goto out;
+ }
+ }
+
/* Flush last write and discard cache for file. */
- if ( outbuf_flush(xch, &ob, io_fd) < 0 ) {
+ if ( outbuf_flush(xch, ob, io_fd) < 0 ) {
PERROR("Error when flushing output buffer");
rc = 1;
}
discard_file_cache(xch, io_fd, 1 /* flush */);
+ /* Enable compression now, finally */
+ compression = (flags & XCFLAGS_REMUS_COMPRESS);
+
/* checkpoint_cb can spend arbitrarily long in between rounds */
if (!rc && callbacks->checkpoint &&
callbacks->checkpoint(callbacks->data) > 0)
@@ -1866,6 +2031,9 @@
DPRINTF("Warning - couldn't disable qemu log-dirty mode");
}
+ if (remus_ctx)
+ xc_remus_free_context(xch, remus_ctx);
+
if ( live_shinfo )
munmap(live_shinfo, PAGE_SIZE);
diff -r 23c068b10923 -r b4974a38d101 tools/libxc/xc_remus.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_remus.c Sat Jun 18 20:51:59 2011 -0700
@@ -0,0 +1,465 @@
+/******************************************************************************
+ * xc_remus.c
+ *
+ * Checkpoint Compression using Page Delta Algorithm.
+ * - A LRU cache of recently dirtied guest pages is maintained.
+ * - For each dirty guest page in the checkpoint, if a previous version of the
+ * page exists in the cache, XOR both pages and send the non-zero sections
+ * to the receiver. The cache is then updated with the newer copy of guest
page.
+ * - The receiver will XOR the non-zero sections against its copy of the guest
+ * page, thereby bringing the guest page up-to-date with the sender side.
+ *
+ * Copyright (c) 2011 Shriram Rajagopalan (rshriram@xxxxxxxxx).
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
USA
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <inttypes.h>
+#include <errno.h>
+#include "xenctrl.h"
+#include "xg_save_restore.h"
+#include "xg_private.h"
+
+/* Already defined in xc_dom.h, but it doesnt have
+ * a conditional include macro. So, redifine here.
+ */
+#define INVALID_P2M_ENTRY ((xen_pfn_t)-1)
+
+/* Page Cache for Delta Compression*/
+#define DELTA_CACHE_SIZE (XC_PAGE_SIZE * 8192)
+
+struct cache_page;
+struct cache_page
+{
+ char *page;
+ unsigned long pfn;
+ struct cache_page *next;
+ struct cache_page *prev;
+};
+
+/* After XORing the older and newer version, the non-zero sections
+ * are sent as a sequence of tuples <2-byte-offset,4-byte-data> called markers.
+ * - Each page begins with a BEGIN marker (for synchronization).
+ * - If the result of XOR is a page filled with zeros (i.e no difference
between
+ * old and new page, then only the BEGIN marker is sent for the page.
+ * - If the two versions of the page differ by more than 50%, the page is sent
+ * as is, with a FULLPAGE marker, without a BEGIN marker.
+ *
+ * About the choice of data types: typical page size is 4K. Each marker is
+ * 6 bytes long, with a 4-byte data word (1024 data words per page). If 50% of
+ * the page changed, then we would be transmitting ~3000 bytes (worst case).
+ * - If we use 8-byte data word (10-byte marker), we end up sending
+ * ~5000 bytes (>4096).
+ */
+
+typedef unsigned int data_t;
+typedef short int moff_t;
+
+#define BEGIN -100
+#define FULLPAGE -101
+struct marker
+{
+ moff_t off;
+ data_t val;
+} __attribute__((packed));
+
+static struct marker begin_page = { BEGIN, -1};
+static struct marker full_page = {FULLPAGE, -1};
+
+/* Internal page buffer to hold dirty pages of a checkpoint,
+ * to be compressed after the domain is resumed for execution.
+ */
+#define PAGE_BUFFER_SIZE (XC_PAGE_SIZE * 8192)
+
+struct remus_context
+{
+ /* compression buffer - holds compressed data */
+ char *compbuf;
+ unsigned long compbuf_size;
+ unsigned long compbuf_pos;
+
+ /* Page buffer to hold pages to be compressed */
+ char *inputbuf;
+ /* pfns of pages to be compressed */
+ unsigned long *sendbuf_pfns;
+ unsigned int pfns_index;
+ unsigned int pfns_iterator;
+
+ /* Compression Cache (LRU) */
+ char *cache_base;
+ struct cache_page **pfn2cache;
+ struct cache_page *cache2pfn;
+ struct cache_page *page_list_head;
+ struct cache_page *page_list_tail;
+};
+
+static
+int __compress(xc_interface *xch, struct remus_context *ctx, char *srcpage,
+ char *copypage, int israw)
+{
+ struct marker *dest = (struct marker *)(ctx->compbuf+ ctx->compbuf_pos);
+ moff_t off;
+ int j=0, rc = 0;
+ data_t *src, *copy;
+
+ src = (data_t*)srcpage;
+ copy = (data_t*)copypage;
+
+ if ((ctx->compbuf_pos + sizeof(struct marker)) > ctx->compbuf_size)
+ return -1;
+
+ if (!israw && copypage)
+ {
+ dest[j++] = begin_page;
+ for (off = 0; off < XC_PAGE_SIZE/sizeof(data_t); off++)
+ {
+ if (copy[off] != src[off])
+ {
+ if ((ctx->compbuf_pos + (j + 1) *
+ sizeof(struct marker)) > ctx->compbuf_size)
+ return -1;
+
+ copy[off] = src[off];
+ dest[j].off = off;
+ dest[j].val = src[off];
+ j++;
+ }
+ if (j > 500) /* more than 50% of page changed */
+ goto FullPage;
+ }
+ rc = (j * sizeof(struct marker));
+ }
+ else
+ {
+ FullPage:
+ if ( (ctx->compbuf_pos + sizeof(struct marker)
+ + XC_PAGE_SIZE) > ctx->compbuf_size)
+ return -1;
+
+ dest[0] = full_page;
+ if (copypage)
+ memcpy(copypage, srcpage, XC_PAGE_SIZE);
+ memcpy((char *)&dest[1], srcpage, XC_PAGE_SIZE);
+ rc = XC_PAGE_SIZE + sizeof(struct marker);
+ }
+ ctx->compbuf_pos += rc;
+
+ return rc;
+}
+
+static
+int __uncompress(xc_interface *xch, char *destpage, unsigned long *compbuf_pos,
+ char *compbuf, unsigned long compbuf_size)
+{
+ struct marker *src = (struct marker *)(compbuf + *compbuf_pos);
+ int i;
+ data_t *dest = (data_t *)destpage;
+
+ if (*compbuf_pos >= compbuf_size)
+ {
+ ERROR("Out of bounds exception: read ptr:%lu, bufsize = %lu\n",
+ *compbuf_pos, compbuf_size);
+ return -1;
+ }
+
+ if (src[0].off == BEGIN)
+ {
+ *compbuf_pos += sizeof(struct marker);
+ for (i = 1; (*compbuf_pos < compbuf_size) && (src[i].off >= 0);
+ i++, *compbuf_pos += sizeof(struct marker))
+ dest[src[i].off] = src[i].val;
+ }
+ else if (src[0].off == FULLPAGE)
+ {
+ *compbuf_pos += sizeof(struct marker) + XC_PAGE_SIZE;
+ memcpy(destpage, (char *)&src[1], XC_PAGE_SIZE);
+ }
+ else
+ {
+ ERROR("Invalid marker %d in compression buffer at %u\n",
+ src[0].off, *compbuf_pos);
+ return -1;
+ }
+ return 0;
+}
+
+static
+char *get_cache_page(struct remus_context *ctx, unsigned long pfn,
+ int *israw)
+{
+ struct cache_page *item = NULL;
+
+start:
+ item = ctx->pfn2cache[pfn];
+ /* if requested item is in cache move to head of list */
+ if (item)
+ {
+ /* item already at head of list */
+ if (item == ctx->page_list_head)
+ goto end;
+ if (item == ctx->page_list_tail)
+ {
+ /* item at tail of list. */
+ ctx->page_list_tail = item->prev;
+ (ctx->page_list_tail)->next = NULL;
+ }
+ else
+ {
+ /* item in middle of list */
+ item->prev->next = item->next;
+ item->next->prev = item->prev;
+ }
+
+ item->prev = NULL;
+ item->next = ctx->page_list_head;
+ (ctx->page_list_head)->prev = item;
+ ctx->page_list_head = item;
+ goto end;
+ }
+ else
+ {
+ *israw = 1;
+ /* Add new item to list. If list is full,
+ * evict a page from tail of list.
+ */
+ if ((ctx->page_list_tail)->pfn != INVALID_P2M_ENTRY)
+ ctx->pfn2cache[(ctx->page_list_tail)->pfn] = NULL;
+ (ctx->page_list_tail)->pfn = pfn;
+ ctx->pfn2cache[pfn] = ctx->page_list_tail;
+
+ /* Will have same effect as cache hit at tail of list */
+ goto start;
+ }
+end:
+ return (ctx->page_list_head)->page;
+}
+
+/* Remove pagetable pages from cache and move to tail, as free pages */
+static
+void invalidate_cache_page(struct remus_context *ctx, unsigned long pfn)
+{
+ struct cache_page *item = NULL;
+
+ item = ctx->pfn2cache[pfn];
+ if (item)
+ {
+ /* item at head of list */
+ if (item == ctx->page_list_head)
+ {
+ ctx->page_list_head = (ctx->page_list_head)->next;
+ (ctx->page_list_head)->prev = NULL;
+ }
+ else if (item == ctx->page_list_tail)
+ {
+ /* item already at tail of list. */
+ goto end;
+ }
+ else
+ {
+ /* item in middle of list */
+ item->prev->next = item->next;
+ item->next->prev = item->prev;
+ }
+ item->next = NULL;
+ item->prev = ctx->page_list_tail;
+ (ctx->page_list_tail)->next = item;
+ ctx->page_list_tail = item;
+ end:
+ ctx->pfn2cache[pfn] = NULL;
+ (ctx->page_list_tail)->pfn = INVALID_P2M_ENTRY;
+ }
+}
+
+int xc_remus_add_page(xc_interface *xch, void *remus_ctx, char *page,
+ unsigned long pfn, int israw)
+{
+ struct remus_context *ctx = (struct remus_context *)remus_ctx;
+
+ /* pagetable page */
+ if (israw)
+ invalidate_cache_page(ctx, pfn);
+ ctx->sendbuf_pfns[ctx->pfns_index] = israw? INVALID_P2M_ENTRY : pfn;
+ memcpy(ctx->inputbuf + ctx->pfns_index * XC_PAGE_SIZE, page, XC_PAGE_SIZE);
+ ctx->pfns_index++;
+
+ /* check if we have run out of space. If so,
+ * we need to synchronously compress the pages and flush them out
+ */
+ if (ctx->pfns_index == NRPAGES(PAGE_BUFFER_SIZE))
+ return -1;
+ return 0;
+}
+
+int xc_remus_compress(xc_interface *xch, void *remus_ctx)
+{
+ struct remus_context *ctx = (struct remus_context *)remus_ctx;
+ char *cache_copy = NULL;
+ int israw;
+
+ if (!ctx->pfns_index || (ctx->pfns_iterator == ctx->pfns_index))
+ return 0;
+
+ for (; ctx->pfns_iterator < ctx->pfns_index; ctx->pfns_iterator++)
+ {
+ israw = 0;
+ cache_copy = NULL;
+ if (ctx->sendbuf_pfns[ctx->pfns_iterator] == INVALID_P2M_ENTRY)
+ israw = 1;
+ else
+ cache_copy = get_cache_page(ctx,
ctx->sendbuf_pfns[ctx->pfns_iterator],
+ &israw);
+
+ /* Out of space in outbuf! flush and come back */
+ if (__compress(xch, ctx, ctx->inputbuf + ctx->pfns_iterator *
XC_PAGE_SIZE,
+ cache_copy, israw) < 0)
+ return -1;
+ }
+
+ return 1;
+}
+
+inline
+unsigned long xc_remus_get_compbuf_len(xc_interface *xch, void *remus_ctx)
+{
+ struct remus_context *ctx = (struct remus_context *)remus_ctx;
+ return ctx->compbuf_pos;
+}
+
+inline
+void xc_remus_compbuf_set(xc_interface *xch, void *remus_ctx,
+ char *compbuf, unsigned long compbuf_size)
+{
+ struct remus_context *ctx = (struct remus_context *)remus_ctx;
+ ctx->compbuf_pos = 0;
+ ctx->compbuf = compbuf;
+ ctx->compbuf_size = compbuf_size;
+}
+
+inline
+void xc_remus_pagebuf_reset(xc_interface *xch, void *remus_ctx)
+{
+ struct remus_context *ctx = (struct remus_context *)remus_ctx;
+ ctx->pfns_index = ctx->pfns_iterator = 0;
+}
+
+int xc_remus_uncompress(xc_interface *xch, char *compbuf,
+ unsigned long compbuf_size,
+ unsigned long *compbuf_pos, char *dest)
+{
+ return __uncompress(xch, dest, compbuf_pos, compbuf, compbuf_size);
+}
+
+void xc_remus_free_context(xc_interface *xch, void *ctx)
+{
+ struct remus_context *remus_ctx = (struct remus_context *)ctx;
+
+ if (!remus_ctx) return;
+
+ if (remus_ctx->inputbuf)
+ free(remus_ctx->inputbuf);
+ if (remus_ctx->sendbuf_pfns)
+ free(remus_ctx->sendbuf_pfns);
+ if (remus_ctx->cache_base)
+ free(remus_ctx->cache_base);
+ if (remus_ctx->pfn2cache)
+ free(remus_ctx->pfn2cache);
+ if (remus_ctx->cache2pfn)
+ free(remus_ctx->cache2pfn);
+ free(remus_ctx);
+}
+
+void *xc_remus_create_context(xc_interface *xch, unsigned long p2m_size)
+{
+ unsigned long i;
+ struct remus_context *remus_ctx = NULL;
+ unsigned long num_cache_pages = DELTA_CACHE_SIZE/XC_PAGE_SIZE;
+
+ remus_ctx = malloc(sizeof(struct remus_context));
+ if (!remus_ctx)
+ {
+ ERROR("Failed to allocate remus_ctx\n");
+ goto error;
+ }
+ memset(remus_ctx, 0, sizeof(struct remus_context));
+
+ if (posix_memalign((void **)&remus_ctx->inputbuf,
+ XC_PAGE_SIZE, PAGE_BUFFER_SIZE))
+ {
+ ERROR("Failed to allocate page buffer\n");
+ goto error;
+ }
+
+ remus_ctx->sendbuf_pfns = malloc(NRPAGES(PAGE_BUFFER_SIZE) *
+ sizeof(unsigned long));
+ if (!remus_ctx->sendbuf_pfns)
+ {
+ ERROR("Could not alloc sendbuf_pfns\n");
+ goto error;
+ }
+ memset(remus_ctx->sendbuf_pfns, -1,
+ NRPAGES(PAGE_BUFFER_SIZE) * sizeof(unsigned long));
+
+ if (posix_memalign((void **)&remus_ctx->cache_base,
+ XC_PAGE_SIZE, DELTA_CACHE_SIZE))
+ {
+ ERROR("Failed to allocate delta cache\n");
+ goto error;
+ }
+
+ remus_ctx->pfn2cache = calloc(p2m_size, sizeof(struct cache_page *));
+ if (!remus_ctx->pfn2cache)
+ {
+ ERROR("Could not alloc pfn2cache map\n");
+ goto error;
+ }
+
+ remus_ctx->cache2pfn = malloc(num_cache_pages * sizeof(struct cache_page));
+ if (!remus_ctx->cache2pfn)
+ {
+ ERROR("Could not alloc cache2pfn map\n");
+ goto error;
+ }
+
+ for (i = 0; i < num_cache_pages; i++)
+ {
+ remus_ctx->cache2pfn[i].pfn = INVALID_P2M_ENTRY;
+ remus_ctx->cache2pfn[i].page = remus_ctx->cache_base + i *
XC_PAGE_SIZE;
+ remus_ctx->cache2pfn[i].prev = (i == 0)? NULL :
&(remus_ctx->cache2pfn[i - 1]);
+ remus_ctx->cache2pfn[i].next = ((i+1) == num_cache_pages)? NULL :
+ &(remus_ctx->cache2pfn[i + 1]);
+ }
+ remus_ctx->page_list_head = &(remus_ctx->cache2pfn[0]);
+ remus_ctx->page_list_tail = &(remus_ctx->cache2pfn[num_cache_pages -1]);
+
+ return (void *)remus_ctx;
+error:
+ xc_remus_free_context(xch, remus_ctx);
+ return NULL;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 23c068b10923 -r b4974a38d101 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h Wed Jun 15 16:16:41 2011 +0100
+++ b/tools/libxc/xenctrl.h Sat Jun 18 20:51:59 2011 -0700
@@ -1820,4 +1820,58 @@
int verbose);
/* Useful for callers who also use libelf. */
+/**
+ * Remus Checkpoint Compression
+ */
+void *xc_remus_create_context(xc_interface *xch, unsigned long p2m_size);
+void xc_remus_free_context(xc_interface *xch, void *remus_ctx);
+
+/**
+ * Add a page to remus buffer, to be compressed later.
+ * returns -1 if there is no space in buffer.
+ */
+int xc_remus_add_page(xc_interface *xch, void *remus_ctx, char *page,
+ unsigned long pfn, int israw);
+
+/**
+ * Should be called before compressing the pages. Caller supplies a
+ * compression buffer compbuf of size compbuf_size.
+ */
+void xc_remus_compbuf_set(xc_interface *xch, void *remus_ctx, char *compbuf,
+ unsigned long compbuf_size);
+
+/**
+ * Delta compress pages in the remus buffer and inserts the
+ * compressed data into the previously supplied compression buffer, compbuf.
+ * After compression, the page is copied to the internal LRU cache.
+ *
+ * This function compresses as many pages as possible into the
+ * supplied compression buffer. It maintains an internal iterator to
+ * keep track of pages in the input buffer that are yet to be compressed.
+ *
+ * returns -1 if the compression buffer has run out of space.
+ * returns 1 on success.
+ * returns 0 if no more pages are left to be compressed.
+ */
+int xc_remus_compress(xc_interface *xch, void *remus_ctx);
+
+/**
+ * Returns the exact length of data, in the compression buffer.
+ */
+unsigned long xc_remus_get_compbuf_len(xc_interface *xch, void *remus_ctx);
+
+/**
+ * Resets the internal page buffer that holds dirty pages before compression.
+ * Also resets the iterators.
+ */
+void xc_remus_pagebuf_reset(xc_interface *xch, void *remus_ctx);
+
+/**
+ * Caller must supply the compression buffer (compbuf), its size
(compbuf_size) and
+ * an reference to index variable (compbuf_pos) that is used internally.
+ * Each call pulls out one page from the compressed chunk and copies it to
dest.
+ */
+int xc_remus_uncompress(xc_interface *xch, char *compbuf, unsigned long
compbuf_size,
+ unsigned long *compbuf_pos, char *dest);
+
#endif /* XENCTRL_H */
diff -r 23c068b10923 -r b4974a38d101 tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h Wed Jun 15 16:16:41 2011 +0100
+++ b/tools/libxc/xenguest.h Sat Jun 18 20:51:59 2011 -0700
@@ -27,6 +27,7 @@
#define XCFLAGS_DEBUG 2
#define XCFLAGS_HVM 4
#define XCFLAGS_STDVGA 8
+#define XCFLAGS_REMUS_COMPRESS 16
#define X86_64_B_SIZE 64
#define X86_32_B_SIZE 32
diff -r 23c068b10923 -r b4974a38d101 tools/libxc/xg_save_restore.h
--- a/tools/libxc/xg_save_restore.h Wed Jun 15 16:16:41 2011 +0100
+++ b/tools/libxc/xg_save_restore.h Sat Jun 18 20:51:59 2011 -0700
@@ -134,6 +134,8 @@
#define XC_SAVE_ID_HVM_CONSOLE_PFN -8 /* (HVM-only) */
#define XC_SAVE_ID_LAST_CHECKPOINT -9 /* Commit to restoring after
completion of current iteration. */
#define XC_SAVE_ID_HVM_ACPI_IOPORTS_LOCATION -10
+#define XC_SAVE_ID_COMPRESSED_DATA -11 /* Marker to indicate arrival of
compressed data */
+#define XC_SAVE_ID_ENABLE_COMPRESSION -12 /* Marker to enable compression
logic at receiver side */
/*
** We process save/restore/migrate in batches of pages; the below
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|