[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 1 of 2] tools/libxc: Remus Checkpoint Compression
# HG changeset patch # User Shriram Rajagopalan <rshriram@xxxxxxxxx> # Date 1308455519 25200 # Node ID b4974a38d10199c1e2b8fd3cf36d91c03ac5eeae # Parent 23c068b109236657ededa3e3b7f180346a5cd9f9 tools/libxc: Remus Checkpoint Compression Instead of sending dirty pages of guest memory as-is, use a simple compression algorithm that sends a RLE-encoded XOR of the page against its last sent copy. A small LRU cache is used to hold recently dirtied pages. Pagetable pages are sent as-is, as they are canonicalized at sender side and uncanonicalized at receiver. Signed-off-by: Shriram Rajagopalan <rshriram@xxxxxxxxx> diff -r 23c068b10923 -r b4974a38d101 tools/libxc/Makefile --- a/tools/libxc/Makefile Wed Jun 15 16:16:41 2011 +0100 +++ b/tools/libxc/Makefile Sat Jun 18 20:51:59 2011 -0700 @@ -42,7 +42,7 @@ GUEST_SRCS-y := GUEST_SRCS-y += xg_private.c xc_suspend.c GUEST_SRCS-$(CONFIG_MIGRATE) += xc_domain_restore.c xc_domain_save.c -GUEST_SRCS-$(CONFIG_MIGRATE) += xc_offline_page.c +GUEST_SRCS-$(CONFIG_MIGRATE) += xc_offline_page.c xc_remus.c GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c vpath %.c ../../xen/common/libelf diff -r 23c068b10923 -r b4974a38d101 tools/libxc/xc_domain_restore.c --- a/tools/libxc/xc_domain_restore.c Wed Jun 15 16:16:41 2011 +0100 +++ b/tools/libxc/xc_domain_restore.c Sat Jun 18 20:51:59 2011 -0700 @@ -43,6 +43,7 @@ xen_pfn_t *p2m_batch; /* A table of P2M mappings in the current region. */ int completed; /* Set when a consistent image is available */ int last_checkpoint; /* Set when we should commit to the current checkpoint when it completes. */ + int compression; /* Set when sender signals that pages would be sent compressed (for Remus) */ struct domain_info_context dinfo; }; @@ -663,6 +664,10 @@ /* pages is of length nr_physpages, pfn_types is of length nr_pages */ unsigned int nr_physpages, nr_pages; + /* remus compression state */ + int compression; + unsigned long compbuf_pos, compbuf_size; + /* Types of the pfns in the current region */ unsigned long* pfn_types; @@ -700,6 +705,7 @@ { int count, countpages, oldcount, i; void* ptmp; + unsigned long compbuf_size; if ( RDEXACT(fd, &count, sizeof(count)) ) { @@ -809,6 +815,46 @@ } return pagebuf_get_one(xch, ctx, buf, fd, dom); + case XC_SAVE_ID_ENABLE_COMPRESSION: + /* We cannot set compression flag directly in pagebuf structure, + * since this pagebuf still has uncompressed pages that are yet to + * be applied. We enable the compression field in pagebuf structure + * after receiving the first tailbuf. + */ + ctx->compression = 1; + // DPRINTF("compression flag received"); + return pagebuf_get_one(xch, ctx, buf, fd, dom); + + case XC_SAVE_ID_COMPRESSED_DATA: + + /* read the length of compressed chunk coming in */ + if ( RDEXACT(fd, &compbuf_size, sizeof(unsigned long)) ) + { + PERROR("Error when reading compbuf_size"); + return -1; + } + if (!compbuf_size) return 1; + + buf->compbuf_size += compbuf_size; + if (!buf->pages) { + if (!(buf->pages = malloc(buf->compbuf_size))) { + ERROR("Could not allocate compression buffer"); + return -1; + } + } else { + if (!(ptmp = realloc(buf->pages, buf->compbuf_size))) { + ERROR("Could not reallocate compression buffer"); + return -1; + } + buf->pages = ptmp; + } + if ( RDEXACT(fd, buf->pages + (buf->compbuf_size - compbuf_size), + compbuf_size) ) { + PERROR("Error when reading compression buffer"); + return -1; + } + return compbuf_size; + default: if ( (count > MAX_BATCH_SIZE) || (count < 0) ) { ERROR("Max batch size exceeded (%d). Giving up.", count); @@ -846,6 +892,13 @@ if (!countpages) return count; + /* If Remus Checkpoint Compression is turned on, we only receive the + * pfn lists now. The compressed pages will come in later, following a + * <XC_SAVE_ID_COMPRESSED_DATA, compressedChunkSize> tuple. + */ + if (buf->compression) + return pagebuf_get_one(xch, ctx, buf, fd, dom); + oldcount = buf->nr_physpages; buf->nr_physpages += countpages; if (!buf->pages) { @@ -874,6 +927,7 @@ int rc; buf->nr_physpages = buf->nr_pages = 0; + buf->compbuf_pos = buf->compbuf_size = 0; do { rc = pagebuf_get_one(xch, ctx, buf, fd, dom); @@ -1091,7 +1145,19 @@ /* In verify mode, we use a copy; otherwise we work in place */ page = pagebuf->verify ? (void *)buf : (region_base + i*PAGE_SIZE); - memcpy(page, pagebuf->pages + (curpage + curbatch) * PAGE_SIZE, PAGE_SIZE); + /* Remus - page decompression */ + if (pagebuf->compression) + { + if (xc_remus_uncompress(xch, pagebuf->pages, pagebuf->compbuf_size, + &pagebuf->compbuf_pos, (char *)page)) + { + ERROR("Failed to uncompress page (pfn=%lx)\n", pfn); + goto err_mapped; + } + } + else + memcpy(page, pagebuf->pages + (curpage + curbatch) * PAGE_SIZE, + PAGE_SIZE); pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK; @@ -1353,6 +1419,7 @@ if ( !ctx->completed ) { pagebuf.nr_physpages = pagebuf.nr_pages = 0; + pagebuf.compbuf_pos = pagebuf.compbuf_size = 0; if ( pagebuf_get_one(xch, ctx, &pagebuf, io_fd, dom) < 0 ) { PERROR("Error when reading batch"); goto out; @@ -1395,6 +1462,7 @@ } pagebuf.nr_physpages = pagebuf.nr_pages = 0; + pagebuf.compbuf_pos = pagebuf.compbuf_size = 0; n += j; /* crude stats */ @@ -1438,6 +1506,13 @@ */ if ( !ctx->last_checkpoint ) fcntl(io_fd, F_SETFL, orig_io_fd_flags | O_NONBLOCK); + + /* + * If sender had sent enable compression flag, switch to compressed + * checkpoints mode once the first checkpoint is received. + */ + if (ctx->compression) + pagebuf.compression = 1; } if (pagebuf.acpi_ioport_location == 1) { diff -r 23c068b10923 -r b4974a38d101 tools/libxc/xc_domain_save.c --- a/tools/libxc/xc_domain_save.c Wed Jun 15 16:16:41 2011 +0100 +++ b/tools/libxc/xc_domain_save.c Sat Jun 18 20:51:59 2011 -0700 @@ -269,6 +269,57 @@ return noncached_write(xch, ob, fd, buf, len); } +static int write_compressed(xc_interface *xch, void *remus_ctx, int dobuf, + struct outbuf* ob, int fd) +{ + int rc = 0; + int header = sizeof(int) + sizeof(unsigned long); + int marker = XC_SAVE_ID_COMPRESSED_DATA; + unsigned long compbuf_len = 0; + + do + { + /* check for available space (atleast 8k) */ + if ((ob->pos + header + XC_PAGE_SIZE * 2) > ob->size) + { + if (outbuf_flush(xch, ob, fd) < 0) + { + ERROR("Error when flushing outbuf intermediate"); + return -1; + } + } + + xc_remus_compbuf_set(xch, remus_ctx, ob->buf + ob->pos + header, + ob->size - ob->pos - header); + rc = xc_remus_compress(xch, remus_ctx); + if (!rc) + break; + compbuf_len = xc_remus_get_compbuf_len(xch, remus_ctx); + + if (outbuf_hardwrite(xch, ob, fd, &marker, sizeof(marker)) < 0) + { + PERROR("Error when writing marker (errno %d)", errno); + return -1; + } + + if (outbuf_hardwrite(xch, ob, fd, &compbuf_len, sizeof(compbuf_len)) < 0) + { + PERROR("Error when writing compbuf_len (errno %d)", errno); + return -1; + } + + ob->pos += (size_t) compbuf_len; + if (!dobuf && outbuf_flush(xch, ob, fd) < 0) + { + ERROR("Error when writing compressed chunk"); + return -1; + } + } while (rc != 0); + + xc_remus_pagebuf_reset(xch, remus_ctx); + return 0; +} + struct time_stats { struct timeval wall; long long d0_cpu, d1_cpu; @@ -866,11 +917,19 @@ unsigned long mfn; - struct outbuf ob; + struct outbuf ob_pagebuf, ob_tailbuf, *ob = NULL; struct save_ctx _ctx; struct save_ctx *ctx = &_ctx; struct domain_info_context *dinfo = &ctx->dinfo; + /* Remus context */ + void *remus_ctx = NULL; + /* Even if XCFLAGS_REMUS_COMPRESS is set, we enable compression only + * after sending XC_SAVE_ID_ENABLE_COMPRESSION and the tailbuf for + * first time. + */ + int compression = 0; + int completed = 0; if ( hvm && !callbacks->switch_qemu_logdirty ) @@ -880,7 +939,7 @@ return 1; } - outbuf_init(xch, &ob, OUTBUF_SIZE); + outbuf_init(xch, &ob_pagebuf, OUTBUF_SIZE); memset(ctx, 0, sizeof(*ctx)); @@ -968,6 +1027,16 @@ } } + if ( flags & XCFLAGS_REMUS_COMPRESS ) + { + if (!(remus_ctx = xc_remus_create_context(xch, dinfo->p2m_size))) + { + ERROR("Failed to create remus context"); + goto out; + } + outbuf_init(xch, &ob_tailbuf, OUTBUF_SIZE/4); + } + last_iter = !live; /* pretend we sent all the pages last iteration */ @@ -1076,9 +1145,11 @@ } copypages: -#define wrexact(fd, buf, len) write_buffer(xch, last_iter, &ob, (fd), (buf), (len)) -#define wruncached(fd, live, buf, len) write_uncached(xch, last_iter, &ob, (fd), (buf), (len)) +#define wrexact(fd, buf, len) write_buffer(xch, last_iter, ob, (fd), (buf), (len)) +#define wruncached(fd, live, buf, len) write_uncached(xch, last_iter, ob, (fd), (buf), (len)) +#define wrcompressed(fd) write_compressed(xch, remus_ctx, last_iter, ob, (fd)) + ob = &ob_pagebuf; /* Holds pfn_types, pages/compressed pages */ /* Now write out each data page, canonicalising page tables as we go... */ for ( ; ; ) { @@ -1321,7 +1392,7 @@ { /* If the page is not a normal data page, write out any run of pages we may have previously acumulated */ - if ( run ) + if ( !compression && run ) { if ( wruncached(io_fd, live, (char*)region_base+(PAGE_SIZE*(j-run)), @@ -1356,7 +1427,32 @@ goto out; } - if ( wruncached(io_fd, live, page, PAGE_SIZE) != PAGE_SIZE ) + if (compression) + { + /* Mark pagetable page to be sent uncompressed */ + if (xc_remus_add_page(xch, remus_ctx, page, + pfn, 1 /* raw page */) < 0) + { + /* + * We are out of buffer space to hold dirty + * pages. Compress and flush the current buffer + * to make space. This is a corner case, that + * slows down checkpointing as the compression + * happens while domain is suspended. Happens + * seldom and if you find this occuring + * frequently, increase the PAGE_BUFFER_SIZE + * in xc_remus.c. + */ + if (wrcompressed(io_fd) < 0) + { + ERROR("Error when writing compressed" + " data (4b)\n"); + goto out; + } + } + } + else if ( wruncached(io_fd, live, page, + PAGE_SIZE) != PAGE_SIZE ) { PERROR("Error when writing to state file (4b)" " (errno %d)", errno); @@ -1366,7 +1462,24 @@ else { /* We have a normal page: accumulate it for writing. */ - run++; + if (compression) + { + /* For remus/compression, accumulate the page in the + * page buffer, to be compressed later. + */ + if (xc_remus_add_page(xch, remus_ctx, spage, + pfn, 0 /* not raw page */) < 0) + { + if (wrcompressed(io_fd) < 0) + { + ERROR("Error when writing compressed" + " data (4c)\n"); + goto out; + } + } + } + else + run++; } } /* end of the write out for this batch */ @@ -1474,6 +1587,15 @@ DPRINTF("All memory is saved\n"); + /* After last_iter, buffer the rest of pagebuf & tailbuf data into a + * separate output buffer and flush it after the compressed page chunks. + */ + if (compression) + { + ob = &ob_tailbuf; + ob->pos = 0; + } + { struct { int id; @@ -1573,6 +1695,25 @@ } } + /* Enable compression logic on both sides by sending this + * one time marker. + * NOTE: We could have simplified this procedure by sending + * the enable/disable compression flag before the beginning of + * the main for loop. But this would break compatibility for + * live migration code, with older versions of xen. So we have + * to enable it after the last_iter, when the XC_SAVE_ID_* + * elements are sent. + */ + if (!compression && (flags & XCFLAGS_REMUS_COMPRESS)) + { + i = XC_SAVE_ID_ENABLE_COMPRESSION; + if ( wrexact(io_fd, &i, sizeof(int)) ) + { + PERROR("Error when writing enable_compression marker"); + goto out; + } + } + /* Zero terminate */ i = 0; if ( wrexact(io_fd, &i, sizeof(int)) ) @@ -1817,14 +1958,38 @@ if ( !rc && callbacks->postcopy ) callbacks->postcopy(callbacks->data); + /* guest has been resumed. Now we can compress data + * at our own pace. + */ + if (!rc && compression) + { + ob = &ob_pagebuf; + if (wrcompressed(io_fd) < 0) + { + ERROR("Error when writing compressed data, after postcopy\n"); + rc = 1; + goto out; + } + /* Copy the tailbuf data into the main outbuf */ + if ( wrexact(io_fd, ob_tailbuf.buf, ob_tailbuf.pos) ) + { + rc = 1; + PERROR("Error when copying tailbuf into outbuf"); + goto out; + } + } + /* Flush last write and discard cache for file. */ - if ( outbuf_flush(xch, &ob, io_fd) < 0 ) { + if ( outbuf_flush(xch, ob, io_fd) < 0 ) { PERROR("Error when flushing output buffer"); rc = 1; } discard_file_cache(xch, io_fd, 1 /* flush */); + /* Enable compression now, finally */ + compression = (flags & XCFLAGS_REMUS_COMPRESS); + /* checkpoint_cb can spend arbitrarily long in between rounds */ if (!rc && callbacks->checkpoint && callbacks->checkpoint(callbacks->data) > 0) @@ -1866,6 +2031,9 @@ DPRINTF("Warning - couldn't disable qemu log-dirty mode"); } + if (remus_ctx) + xc_remus_free_context(xch, remus_ctx); + if ( live_shinfo ) munmap(live_shinfo, PAGE_SIZE); diff -r 23c068b10923 -r b4974a38d101 tools/libxc/xc_remus.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/libxc/xc_remus.c Sat Jun 18 20:51:59 2011 -0700 @@ -0,0 +1,465 @@ +/****************************************************************************** + * xc_remus.c + * + * Checkpoint Compression using Page Delta Algorithm. + * - A LRU cache of recently dirtied guest pages is maintained. + * - For each dirty guest page in the checkpoint, if a previous version of the + * page exists in the cache, XOR both pages and send the non-zero sections + * to the receiver. The cache is then updated with the newer copy of guest page. + * - The receiver will XOR the non-zero sections against its copy of the guest + * page, thereby bringing the guest page up-to-date with the sender side. + * + * Copyright (c) 2011 Shriram Rajagopalan (rshriram@xxxxxxxxx). + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/types.h> +#include <inttypes.h> +#include <errno.h> +#include "xenctrl.h" +#include "xg_save_restore.h" +#include "xg_private.h" + +/* Already defined in xc_dom.h, but it doesnt have + * a conditional include macro. So, redifine here. + */ +#define INVALID_P2M_ENTRY ((xen_pfn_t)-1) + +/* Page Cache for Delta Compression*/ +#define DELTA_CACHE_SIZE (XC_PAGE_SIZE * 8192) + +struct cache_page; +struct cache_page +{ + char *page; + unsigned long pfn; + struct cache_page *next; + struct cache_page *prev; +}; + +/* After XORing the older and newer version, the non-zero sections + * are sent as a sequence of tuples <2-byte-offset,4-byte-data> called markers. + * - Each page begins with a BEGIN marker (for synchronization). + * - If the result of XOR is a page filled with zeros (i.e no difference between + * old and new page, then only the BEGIN marker is sent for the page. + * - If the two versions of the page differ by more than 50%, the page is sent + * as is, with a FULLPAGE marker, without a BEGIN marker. + * + * About the choice of data types: typical page size is 4K. Each marker is + * 6 bytes long, with a 4-byte data word (1024 data words per page). If 50% of + * the page changed, then we would be transmitting ~3000 bytes (worst case). + * - If we use 8-byte data word (10-byte marker), we end up sending + * ~5000 bytes (>4096). + */ + +typedef unsigned int data_t; +typedef short int moff_t; + +#define BEGIN -100 +#define FULLPAGE -101 +struct marker +{ + moff_t off; + data_t val; +} __attribute__((packed)); + +static struct marker begin_page = { BEGIN, -1}; +static struct marker full_page = {FULLPAGE, -1}; + +/* Internal page buffer to hold dirty pages of a checkpoint, + * to be compressed after the domain is resumed for execution. + */ +#define PAGE_BUFFER_SIZE (XC_PAGE_SIZE * 8192) + +struct remus_context +{ + /* compression buffer - holds compressed data */ + char *compbuf; + unsigned long compbuf_size; + unsigned long compbuf_pos; + + /* Page buffer to hold pages to be compressed */ + char *inputbuf; + /* pfns of pages to be compressed */ + unsigned long *sendbuf_pfns; + unsigned int pfns_index; + unsigned int pfns_iterator; + + /* Compression Cache (LRU) */ + char *cache_base; + struct cache_page **pfn2cache; + struct cache_page *cache2pfn; + struct cache_page *page_list_head; + struct cache_page *page_list_tail; +}; + +static +int __compress(xc_interface *xch, struct remus_context *ctx, char *srcpage, + char *copypage, int israw) +{ + struct marker *dest = (struct marker *)(ctx->compbuf+ ctx->compbuf_pos); + moff_t off; + int j=0, rc = 0; + data_t *src, *copy; + + src = (data_t*)srcpage; + copy = (data_t*)copypage; + + if ((ctx->compbuf_pos + sizeof(struct marker)) > ctx->compbuf_size) + return -1; + + if (!israw && copypage) + { + dest[j++] = begin_page; + for (off = 0; off < XC_PAGE_SIZE/sizeof(data_t); off++) + { + if (copy[off] != src[off]) + { + if ((ctx->compbuf_pos + (j + 1) * + sizeof(struct marker)) > ctx->compbuf_size) + return -1; + + copy[off] = src[off]; + dest[j].off = off; + dest[j].val = src[off]; + j++; + } + if (j > 500) /* more than 50% of page changed */ + goto FullPage; + } + rc = (j * sizeof(struct marker)); + } + else + { + FullPage: + if ( (ctx->compbuf_pos + sizeof(struct marker) + + XC_PAGE_SIZE) > ctx->compbuf_size) + return -1; + + dest[0] = full_page; + if (copypage) + memcpy(copypage, srcpage, XC_PAGE_SIZE); + memcpy((char *)&dest[1], srcpage, XC_PAGE_SIZE); + rc = XC_PAGE_SIZE + sizeof(struct marker); + } + ctx->compbuf_pos += rc; + + return rc; +} + +static +int __uncompress(xc_interface *xch, char *destpage, unsigned long *compbuf_pos, + char *compbuf, unsigned long compbuf_size) +{ + struct marker *src = (struct marker *)(compbuf + *compbuf_pos); + int i; + data_t *dest = (data_t *)destpage; + + if (*compbuf_pos >= compbuf_size) + { + ERROR("Out of bounds exception: read ptr:%lu, bufsize = %lu\n", + *compbuf_pos, compbuf_size); + return -1; + } + + if (src[0].off == BEGIN) + { + *compbuf_pos += sizeof(struct marker); + for (i = 1; (*compbuf_pos < compbuf_size) && (src[i].off >= 0); + i++, *compbuf_pos += sizeof(struct marker)) + dest[src[i].off] = src[i].val; + } + else if (src[0].off == FULLPAGE) + { + *compbuf_pos += sizeof(struct marker) + XC_PAGE_SIZE; + memcpy(destpage, (char *)&src[1], XC_PAGE_SIZE); + } + else + { + ERROR("Invalid marker %d in compression buffer at %u\n", + src[0].off, *compbuf_pos); + return -1; + } + return 0; +} + +static +char *get_cache_page(struct remus_context *ctx, unsigned long pfn, + int *israw) +{ + struct cache_page *item = NULL; + +start: + item = ctx->pfn2cache[pfn]; + /* if requested item is in cache move to head of list */ + if (item) + { + /* item already at head of list */ + if (item == ctx->page_list_head) + goto end; + if (item == ctx->page_list_tail) + { + /* item at tail of list. */ + ctx->page_list_tail = item->prev; + (ctx->page_list_tail)->next = NULL; + } + else + { + /* item in middle of list */ + item->prev->next = item->next; + item->next->prev = item->prev; + } + + item->prev = NULL; + item->next = ctx->page_list_head; + (ctx->page_list_head)->prev = item; + ctx->page_list_head = item; + goto end; + } + else + { + *israw = 1; + /* Add new item to list. If list is full, + * evict a page from tail of list. + */ + if ((ctx->page_list_tail)->pfn != INVALID_P2M_ENTRY) + ctx->pfn2cache[(ctx->page_list_tail)->pfn] = NULL; + (ctx->page_list_tail)->pfn = pfn; + ctx->pfn2cache[pfn] = ctx->page_list_tail; + + /* Will have same effect as cache hit at tail of list */ + goto start; + } +end: + return (ctx->page_list_head)->page; +} + +/* Remove pagetable pages from cache and move to tail, as free pages */ +static +void invalidate_cache_page(struct remus_context *ctx, unsigned long pfn) +{ + struct cache_page *item = NULL; + + item = ctx->pfn2cache[pfn]; + if (item) + { + /* item at head of list */ + if (item == ctx->page_list_head) + { + ctx->page_list_head = (ctx->page_list_head)->next; + (ctx->page_list_head)->prev = NULL; + } + else if (item == ctx->page_list_tail) + { + /* item already at tail of list. */ + goto end; + } + else + { + /* item in middle of list */ + item->prev->next = item->next; + item->next->prev = item->prev; + } + item->next = NULL; + item->prev = ctx->page_list_tail; + (ctx->page_list_tail)->next = item; + ctx->page_list_tail = item; + end: + ctx->pfn2cache[pfn] = NULL; + (ctx->page_list_tail)->pfn = INVALID_P2M_ENTRY; + } +} + +int xc_remus_add_page(xc_interface *xch, void *remus_ctx, char *page, + unsigned long pfn, int israw) +{ + struct remus_context *ctx = (struct remus_context *)remus_ctx; + + /* pagetable page */ + if (israw) + invalidate_cache_page(ctx, pfn); + ctx->sendbuf_pfns[ctx->pfns_index] = israw? INVALID_P2M_ENTRY : pfn; + memcpy(ctx->inputbuf + ctx->pfns_index * XC_PAGE_SIZE, page, XC_PAGE_SIZE); + ctx->pfns_index++; + + /* check if we have run out of space. If so, + * we need to synchronously compress the pages and flush them out + */ + if (ctx->pfns_index == NRPAGES(PAGE_BUFFER_SIZE)) + return -1; + return 0; +} + +int xc_remus_compress(xc_interface *xch, void *remus_ctx) +{ + struct remus_context *ctx = (struct remus_context *)remus_ctx; + char *cache_copy = NULL; + int israw; + + if (!ctx->pfns_index || (ctx->pfns_iterator == ctx->pfns_index)) + return 0; + + for (; ctx->pfns_iterator < ctx->pfns_index; ctx->pfns_iterator++) + { + israw = 0; + cache_copy = NULL; + if (ctx->sendbuf_pfns[ctx->pfns_iterator] == INVALID_P2M_ENTRY) + israw = 1; + else + cache_copy = get_cache_page(ctx, ctx->sendbuf_pfns[ctx->pfns_iterator], + &israw); + + /* Out of space in outbuf! flush and come back */ + if (__compress(xch, ctx, ctx->inputbuf + ctx->pfns_iterator * XC_PAGE_SIZE, + cache_copy, israw) < 0) + return -1; + } + + return 1; +} + +inline +unsigned long xc_remus_get_compbuf_len(xc_interface *xch, void *remus_ctx) +{ + struct remus_context *ctx = (struct remus_context *)remus_ctx; + return ctx->compbuf_pos; +} + +inline +void xc_remus_compbuf_set(xc_interface *xch, void *remus_ctx, + char *compbuf, unsigned long compbuf_size) +{ + struct remus_context *ctx = (struct remus_context *)remus_ctx; + ctx->compbuf_pos = 0; + ctx->compbuf = compbuf; + ctx->compbuf_size = compbuf_size; +} + +inline +void xc_remus_pagebuf_reset(xc_interface *xch, void *remus_ctx) +{ + struct remus_context *ctx = (struct remus_context *)remus_ctx; + ctx->pfns_index = ctx->pfns_iterator = 0; +} + +int xc_remus_uncompress(xc_interface *xch, char *compbuf, + unsigned long compbuf_size, + unsigned long *compbuf_pos, char *dest) +{ + return __uncompress(xch, dest, compbuf_pos, compbuf, compbuf_size); +} + +void xc_remus_free_context(xc_interface *xch, void *ctx) +{ + struct remus_context *remus_ctx = (struct remus_context *)ctx; + + if (!remus_ctx) return; + + if (remus_ctx->inputbuf) + free(remus_ctx->inputbuf); + if (remus_ctx->sendbuf_pfns) + free(remus_ctx->sendbuf_pfns); + if (remus_ctx->cache_base) + free(remus_ctx->cache_base); + if (remus_ctx->pfn2cache) + free(remus_ctx->pfn2cache); + if (remus_ctx->cache2pfn) + free(remus_ctx->cache2pfn); + free(remus_ctx); +} + +void *xc_remus_create_context(xc_interface *xch, unsigned long p2m_size) +{ + unsigned long i; + struct remus_context *remus_ctx = NULL; + unsigned long num_cache_pages = DELTA_CACHE_SIZE/XC_PAGE_SIZE; + + remus_ctx = malloc(sizeof(struct remus_context)); + if (!remus_ctx) + { + ERROR("Failed to allocate remus_ctx\n"); + goto error; + } + memset(remus_ctx, 0, sizeof(struct remus_context)); + + if (posix_memalign((void **)&remus_ctx->inputbuf, + XC_PAGE_SIZE, PAGE_BUFFER_SIZE)) + { + ERROR("Failed to allocate page buffer\n"); + goto error; + } + + remus_ctx->sendbuf_pfns = malloc(NRPAGES(PAGE_BUFFER_SIZE) * + sizeof(unsigned long)); + if (!remus_ctx->sendbuf_pfns) + { + ERROR("Could not alloc sendbuf_pfns\n"); + goto error; + } + memset(remus_ctx->sendbuf_pfns, -1, + NRPAGES(PAGE_BUFFER_SIZE) * sizeof(unsigned long)); + + if (posix_memalign((void **)&remus_ctx->cache_base, + XC_PAGE_SIZE, DELTA_CACHE_SIZE)) + { + ERROR("Failed to allocate delta cache\n"); + goto error; + } + + remus_ctx->pfn2cache = calloc(p2m_size, sizeof(struct cache_page *)); + if (!remus_ctx->pfn2cache) + { + ERROR("Could not alloc pfn2cache map\n"); + goto error; + } + + remus_ctx->cache2pfn = malloc(num_cache_pages * sizeof(struct cache_page)); + if (!remus_ctx->cache2pfn) + { + ERROR("Could not alloc cache2pfn map\n"); + goto error; + } + + for (i = 0; i < num_cache_pages; i++) + { + remus_ctx->cache2pfn[i].pfn = INVALID_P2M_ENTRY; + remus_ctx->cache2pfn[i].page = remus_ctx->cache_base + i * XC_PAGE_SIZE; + remus_ctx->cache2pfn[i].prev = (i == 0)? NULL : &(remus_ctx->cache2pfn[i - 1]); + remus_ctx->cache2pfn[i].next = ((i+1) == num_cache_pages)? NULL : + &(remus_ctx->cache2pfn[i + 1]); + } + remus_ctx->page_list_head = &(remus_ctx->cache2pfn[0]); + remus_ctx->page_list_tail = &(remus_ctx->cache2pfn[num_cache_pages -1]); + + return (void *)remus_ctx; +error: + xc_remus_free_context(xch, remus_ctx); + return NULL; +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r 23c068b10923 -r b4974a38d101 tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Wed Jun 15 16:16:41 2011 +0100 +++ b/tools/libxc/xenctrl.h Sat Jun 18 20:51:59 2011 -0700 @@ -1820,4 +1820,58 @@ int verbose); /* Useful for callers who also use libelf. */ +/** + * Remus Checkpoint Compression + */ +void *xc_remus_create_context(xc_interface *xch, unsigned long p2m_size); +void xc_remus_free_context(xc_interface *xch, void *remus_ctx); + +/** + * Add a page to remus buffer, to be compressed later. + * returns -1 if there is no space in buffer. + */ +int xc_remus_add_page(xc_interface *xch, void *remus_ctx, char *page, + unsigned long pfn, int israw); + +/** + * Should be called before compressing the pages. Caller supplies a + * compression buffer compbuf of size compbuf_size. + */ +void xc_remus_compbuf_set(xc_interface *xch, void *remus_ctx, char *compbuf, + unsigned long compbuf_size); + +/** + * Delta compress pages in the remus buffer and inserts the + * compressed data into the previously supplied compression buffer, compbuf. + * After compression, the page is copied to the internal LRU cache. + * + * This function compresses as many pages as possible into the + * supplied compression buffer. It maintains an internal iterator to + * keep track of pages in the input buffer that are yet to be compressed. + * + * returns -1 if the compression buffer has run out of space. + * returns 1 on success. + * returns 0 if no more pages are left to be compressed. + */ +int xc_remus_compress(xc_interface *xch, void *remus_ctx); + +/** + * Returns the exact length of data, in the compression buffer. + */ +unsigned long xc_remus_get_compbuf_len(xc_interface *xch, void *remus_ctx); + +/** + * Resets the internal page buffer that holds dirty pages before compression. + * Also resets the iterators. + */ +void xc_remus_pagebuf_reset(xc_interface *xch, void *remus_ctx); + +/** + * Caller must supply the compression buffer (compbuf), its size (compbuf_size) and + * an reference to index variable (compbuf_pos) that is used internally. + * Each call pulls out one page from the compressed chunk and copies it to dest. + */ +int xc_remus_uncompress(xc_interface *xch, char *compbuf, unsigned long compbuf_size, + unsigned long *compbuf_pos, char *dest); + #endif /* XENCTRL_H */ diff -r 23c068b10923 -r b4974a38d101 tools/libxc/xenguest.h --- a/tools/libxc/xenguest.h Wed Jun 15 16:16:41 2011 +0100 +++ b/tools/libxc/xenguest.h Sat Jun 18 20:51:59 2011 -0700 @@ -27,6 +27,7 @@ #define XCFLAGS_DEBUG 2 #define XCFLAGS_HVM 4 #define XCFLAGS_STDVGA 8 +#define XCFLAGS_REMUS_COMPRESS 16 #define X86_64_B_SIZE 64 #define X86_32_B_SIZE 32 diff -r 23c068b10923 -r b4974a38d101 tools/libxc/xg_save_restore.h --- a/tools/libxc/xg_save_restore.h Wed Jun 15 16:16:41 2011 +0100 +++ b/tools/libxc/xg_save_restore.h Sat Jun 18 20:51:59 2011 -0700 @@ -134,6 +134,8 @@ #define XC_SAVE_ID_HVM_CONSOLE_PFN -8 /* (HVM-only) */ #define XC_SAVE_ID_LAST_CHECKPOINT -9 /* Commit to restoring after completion of current iteration. */ #define XC_SAVE_ID_HVM_ACPI_IOPORTS_LOCATION -10 +#define XC_SAVE_ID_COMPRESSED_DATA -11 /* Marker to indicate arrival of compressed data */ +#define XC_SAVE_ID_ENABLE_COMPRESSION -12 /* Marker to enable compression logic at receiver side */ /* ** We process save/restore/migrate in batches of pages; the below _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |