|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC Patch v2 05/16] xc_domain_restore: introduce restore_callbacks for colo
In colo mode, SVM also runs. So we should update xc_restore to support it.
The first step is: add some callbacks for colo.
We add the following callbacks:
1. init(): init the private data used for colo
2. free(): free the resource we allocate and store in the private data
3. get_page(): SVM runs, so we can't update the memory in apply_batch().
This callback will return a page buffer, and apply_batch() will copy
the page to this buffer. The content of this buffer should be the current
content of this page, so we can use it to do verify.
4. flush_memory(): update the SVM memory and pagetable.
5. update_p2m(): update the SVM p2m page.
6. finish_restore(): wait a new checkpoint.
We also add a new structure restore_data to avoid pass too many arguments
to these callbacks. This structure stores the variables used in
xc_domain_store(), and these variables will be used in the callback.
Signed-off-by: Ye Wei <wei.ye1987@xxxxxxxxx>
Signed-off-by: Jiang Yunhong <yunhong.jiang@xxxxxxxxx>
Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx>
---
tools/libxc/xc_domain_restore.c | 264 ++++++++++++++++++++++++++-------------
tools/libxc/xenguest.h | 48 +++++++
2 files changed, 225 insertions(+), 87 deletions(-)
diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c
index 63d36cd..aac2de0 100644
--- a/tools/libxc/xc_domain_restore.c
+++ b/tools/libxc/xc_domain_restore.c
@@ -1076,7 +1076,8 @@ static int pagebuf_get(xc_interface *xch, struct
restore_ctx *ctx,
static int apply_batch(xc_interface *xch, uint32_t dom, struct restore_ctx
*ctx,
xen_pfn_t* region_mfn, unsigned long* pfn_type, int
pae_extended_cr3,
struct xc_mmu* mmu,
- pagebuf_t* pagebuf, int curbatch)
+ pagebuf_t* pagebuf, int curbatch,
+ struct restore_callbacks *callbacks)
{
int i, j, curpage, nr_mfns;
int k, scount;
@@ -1085,6 +1086,7 @@ static int apply_batch(xc_interface *xch, uint32_t dom,
struct restore_ctx *ctx,
unsigned long buf[PAGE_SIZE/sizeof(unsigned long)];
/* Our mapping of the current region (batch) */
char *region_base;
+ char *target_buf;
/* A temporary mapping, and a copy, of one frame of guest memory. */
unsigned long *page = NULL;
int nraces = 0;
@@ -1241,21 +1243,24 @@ static int apply_batch(xc_interface *xch, uint32_t dom,
struct restore_ctx *ctx,
region_mfn[i] = ctx->hvm ? pfn : ctx->p2m[pfn];
}
- /* Map relevant mfns */
- pfn_err = calloc(j, sizeof(*pfn_err));
- if ( pfn_err == NULL )
+ if ( !callbacks || !callbacks->get_page)
{
- PERROR("allocation for pfn_err failed");
- return -1;
- }
- region_base = xc_map_foreign_bulk(
- xch, dom, PROT_WRITE, region_mfn, pfn_err, j);
+ /* Map relevant mfns */
+ pfn_err = calloc(j, sizeof(*pfn_err));
+ if ( pfn_err == NULL )
+ {
+ PERROR("allocation for pfn_err failed");
+ return -1;
+ }
+ region_base = xc_map_foreign_bulk(
+ xch, dom, PROT_WRITE, region_mfn, pfn_err, j);
- if ( region_base == NULL )
- {
- PERROR("map batch failed");
- free(pfn_err);
- return -1;
+ if ( region_base == NULL )
+ {
+ PERROR("map batch failed");
+ free(pfn_err);
+ return -1;
+ }
}
for ( i = 0, curpage = -1; i < j; i++ )
@@ -1279,7 +1284,7 @@ static int apply_batch(xc_interface *xch, uint32_t dom,
struct restore_ctx *ctx,
continue;
}
- if (pfn_err[i])
+ if ( (!callbacks || !callbacks->get_page) && pfn_err[i] )
{
ERROR("unexpected PFN mapping failure pfn %lx map_mfn %lx p2m_mfn
%lx",
pfn, region_mfn[i], ctx->p2m[pfn]);
@@ -1298,8 +1303,20 @@ static int apply_batch(xc_interface *xch, uint32_t dom,
struct restore_ctx *ctx,
mfn = ctx->p2m[pfn];
+ if ( callbacks && callbacks->get_page )
+ {
+ target_buf = callbacks->get_page(&callbacks->comm_data,
+ callbacks->data, pfn);
+ if ( !target_buf )
+ {
+ ERROR("Cannot get a buffer to store memory");
+ goto err_mapped;
+ }
+ }
+ else
+ target_buf = region_base + i*PAGE_SIZE;
/* In verify mode, we use a copy; otherwise we work in place */
- page = pagebuf->verify ? (void *)buf : (region_base + i*PAGE_SIZE);
+ page = pagebuf->verify ? (void *)buf : target_buf;
/* Remus - page decompression */
if (pagebuf->compressing)
@@ -1357,27 +1374,26 @@ static int apply_batch(xc_interface *xch, uint32_t dom,
struct restore_ctx *ctx,
if ( pagebuf->verify )
{
- int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE);
+ int res = memcmp(buf, target_buf, PAGE_SIZE);
if ( res )
{
int v;
DPRINTF("************** pfn=%lx type=%lx gotcs=%08lx "
"actualcs=%08lx\n", pfn, pagebuf->pfn_types[pfn],
- csum_page(region_base + (i + curbatch)*PAGE_SIZE),
+ csum_page(target_buf),
csum_page(buf));
for ( v = 0; v < 4; v++ )
{
- unsigned long *p = (unsigned long *)
- (region_base + i*PAGE_SIZE);
+ unsigned long *p = (unsigned long *)target_buf;
if ( buf[v] != p[v] )
DPRINTF(" %d: %08lx %08lx\n", v, buf[v], p[v]);
}
}
}
- if ( !ctx->hvm &&
+ if ( (!callbacks || !callbacks->get_page) && !ctx->hvm &&
xc_add_mmu_update(xch, mmu,
(((unsigned long long)mfn) << PAGE_SHIFT)
| MMU_MACHPHYS_UPDATE, pfn) )
@@ -1390,8 +1406,11 @@ static int apply_batch(xc_interface *xch, uint32_t dom,
struct restore_ctx *ctx,
rc = nraces;
err_mapped:
- munmap(region_base, j*PAGE_SIZE);
- free(pfn_err);
+ if ( !callbacks || !callbacks->get_page )
+ {
+ munmap(region_base, j*PAGE_SIZE);
+ free(pfn_err);
+ }
return rc;
}
@@ -1461,6 +1480,9 @@ int xc_domain_restore(xc_interface *xch, int io_fd,
uint32_t dom,
struct restore_ctx *ctx = &_ctx;
struct domain_info_context *dinfo = &ctx->dinfo;
+ struct restore_data *comm_data = NULL;
+ void *data = NULL;
+
DPRINTF("%s: starting restore of new domid %u", __func__, dom);
pagebuf_init(&pagebuf);
@@ -1582,6 +1604,33 @@ int xc_domain_restore(xc_interface *xch, int io_fd,
uint32_t dom,
goto out;
}
+ /* init callbacks->comm_data */
+ if ( callbacks )
+ {
+ callbacks->comm_data.xch = xch;
+ callbacks->comm_data.dom = dom;
+ callbacks->comm_data.dinfo = dinfo;
+ callbacks->comm_data.io_fd = io_fd;
+ callbacks->comm_data.hvm = hvm;
+ callbacks->comm_data.pfn_type = pfn_type;
+ callbacks->comm_data.mmu = mmu;
+ callbacks->comm_data.p2m_frame_list = p2m_frame_list;
+ callbacks->comm_data.p2m = ctx->p2m;
+ comm_data = &callbacks->comm_data;
+
+ /* init callbacks->data */
+ if ( callbacks->init)
+ {
+ callbacks->data = NULL;
+ if (callbacks->init(&callbacks->comm_data, &callbacks->data) < 0 )
+ {
+ ERROR("Could not initialise restore callbacks private data");
+ goto out;
+ }
+ }
+ data = callbacks->data;
+ }
+
xc_report_progress_start(xch, "Reloading memory pages", dinfo->p2m_size);
/*
@@ -1676,7 +1725,8 @@ int xc_domain_restore(xc_interface *xch, int io_fd,
uint32_t dom,
int brc;
brc = apply_batch(xch, dom, ctx, region_mfn, pfn_type,
- pae_extended_cr3, mmu, &pagebuf, curbatch);
+ pae_extended_cr3, mmu, &pagebuf, curbatch,
+ callbacks);
if ( brc < 0 )
goto out;
@@ -1761,6 +1811,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd,
uint32_t dom,
goto finish;
}
+getpages:
// DPRINTF("Buffered checkpoint\n");
if ( pagebuf_get(xch, ctx, &pagebuf, io_fd, dom) ) {
@@ -1902,58 +1953,69 @@ int xc_domain_restore(xc_interface *xch, int io_fd,
uint32_t dom,
}
}
- /*
- * Pin page tables. Do this after writing to them as otherwise Xen
- * will barf when doing the type-checking.
- */
- nr_pins = 0;
- for ( i = 0; i < dinfo->p2m_size; i++ )
+ if ( callbacks && callbacks->flush_memory )
{
- if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
- continue;
-
- switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
+ if ( callbacks->flush_memory(comm_data, data) < 0 )
{
- case XEN_DOMCTL_PFINFO_L1TAB:
- pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
- break;
+ ERROR("Error doing callbacks->flush_memory()");
+ goto out;
+ }
+ }
+ else
+ {
+ /*
+ * Pin page tables. Do this after writing to them as otherwise Xen
+ * will barf when doing the type-checking.
+ */
+ nr_pins = 0;
+ for ( i = 0; i < dinfo->p2m_size; i++ )
+ {
+ if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
+ continue;
- case XEN_DOMCTL_PFINFO_L2TAB:
- pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
- break;
+ switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
+ {
+ case XEN_DOMCTL_PFINFO_L1TAB:
+ pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
+ break;
- case XEN_DOMCTL_PFINFO_L3TAB:
- pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE;
- break;
+ case XEN_DOMCTL_PFINFO_L2TAB:
+ pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
+ break;
- case XEN_DOMCTL_PFINFO_L4TAB:
- pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE;
- break;
+ case XEN_DOMCTL_PFINFO_L3TAB:
+ pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE;
+ break;
- default:
- continue;
- }
+ case XEN_DOMCTL_PFINFO_L4TAB:
+ pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE;
+ break;
- pin[nr_pins].arg1.mfn = ctx->p2m[i];
- nr_pins++;
+ default:
+ continue;
+ }
- /* Batch full? Then flush. */
- if ( nr_pins == MAX_PIN_BATCH )
- {
- if ( xc_mmuext_op(xch, pin, nr_pins, dom) < 0 )
+ pin[nr_pins].arg1.mfn = ctx->p2m[i];
+ nr_pins++;
+
+ /* Batch full? Then flush. */
+ if ( nr_pins == MAX_PIN_BATCH )
{
- PERROR("Failed to pin batch of %d page tables", nr_pins);
- goto out;
+ if ( xc_mmuext_op(xch, pin, nr_pins, dom) < 0 )
+ {
+ PERROR("Failed to pin batch of %d page tables", nr_pins);
+ goto out;
+ }
+ nr_pins = 0;
}
- nr_pins = 0;
}
- }
- /* Flush final partial batch. */
- if ( (nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0) )
- {
- PERROR("Failed to pin batch of %d page tables", nr_pins);
- goto out;
+ /* Flush final partial batch. */
+ if ( (nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0) )
+ {
+ PERROR("Failed to pin batch of %d page tables", nr_pins);
+ goto out;
+ }
}
DPRINTF("Memory reloaded (%ld pages)\n", ctx->nr_pfns);
@@ -2052,6 +2114,8 @@ int xc_domain_restore(xc_interface *xch, int io_fd,
uint32_t dom,
*console_mfn = ctx->p2m[GET_FIELD(start_info, console.domU.mfn)];
SET_FIELD(start_info, console.domU.mfn, *console_mfn);
SET_FIELD(start_info, console.domU.evtchn, console_evtchn);
+ callbacks->comm_data.store_mfn = *store_mfn;
+ callbacks->comm_data.console_mfn = *console_mfn;
munmap(start_info, PAGE_SIZE);
}
/* Uncanonicalise each GDT frame number. */
@@ -2199,37 +2263,61 @@ int xc_domain_restore(xc_interface *xch, int io_fd,
uint32_t dom,
/* leave wallclock time. set by hypervisor */
munmap(new_shared_info, PAGE_SIZE);
- /* Uncanonicalise the pfn-to-mfn table frame-number list. */
- for ( i = 0; i < P2M_FL_ENTRIES; i++ )
+ if ( callbacks && callbacks->update_p2m )
{
- pfn = p2m_frame_list[i];
- if ( (pfn >= dinfo->p2m_size) || (pfn_type[pfn] !=
XEN_DOMCTL_PFINFO_NOTAB) )
+ if ( callbacks->update_p2m(comm_data, data) < 0 )
{
- ERROR("PFN-to-MFN frame number %i (%#lx) is bad", i, pfn);
+ ERROR("Error doing callbacks->update_p2m()");
goto out;
}
- p2m_frame_list[i] = ctx->p2m[pfn];
}
-
- /* Copy the P2M we've constructed to the 'live' P2M */
- if ( !(ctx->live_p2m = xc_map_foreign_pages(xch, dom, PROT_WRITE,
- p2m_frame_list, P2M_FL_ENTRIES)) )
+ else
{
- PERROR("Couldn't map p2m table");
- goto out;
+ /* Uncanonicalise the pfn-to-mfn table frame-number list. */
+ for ( i = 0; i < P2M_FL_ENTRIES; i++ )
+ {
+ pfn = p2m_frame_list[i];
+ if ( (pfn >= dinfo->p2m_size) || (pfn_type[pfn] !=
XEN_DOMCTL_PFINFO_NOTAB) )
+ {
+ ERROR("PFN-to-MFN frame number %i (%#lx) is bad", i, pfn);
+ goto out;
+ }
+ p2m_frame_list[i] = ctx->p2m[pfn];
+ }
+
+ /* Copy the P2M we've constructed to the 'live' P2M */
+ if ( !(ctx->live_p2m = xc_map_foreign_pages(xch, dom, PROT_WRITE,
+ p2m_frame_list,
P2M_FL_ENTRIES)) )
+ {
+ PERROR("Couldn't map p2m table");
+ goto out;
+ }
+
+ /* If the domain we're restoring has a different word size to ours,
+ * we need to adjust the live_p2m assignment appropriately */
+ if ( dinfo->guest_width > sizeof (xen_pfn_t) )
+ for ( i = dinfo->p2m_size - 1; i >= 0; i-- )
+ ((int64_t *)ctx->live_p2m)[i] = (long)ctx->p2m[i];
+ else if ( dinfo->guest_width < sizeof (xen_pfn_t) )
+ for ( i = 0; i < dinfo->p2m_size; i++ )
+ ((uint32_t *)ctx->live_p2m)[i] = ctx->p2m[i];
+ else
+ memcpy(ctx->live_p2m, ctx->p2m, dinfo->p2m_size *
sizeof(xen_pfn_t));
+ munmap(ctx->live_p2m, P2M_FL_ENTRIES * PAGE_SIZE);
}
- /* If the domain we're restoring has a different word size to ours,
- * we need to adjust the live_p2m assignment appropriately */
- if ( dinfo->guest_width > sizeof (xen_pfn_t) )
- for ( i = dinfo->p2m_size - 1; i >= 0; i-- )
- ((int64_t *)ctx->live_p2m)[i] = (long)ctx->p2m[i];
- else if ( dinfo->guest_width < sizeof (xen_pfn_t) )
- for ( i = 0; i < dinfo->p2m_size; i++ )
- ((uint32_t *)ctx->live_p2m)[i] = ctx->p2m[i];
- else
- memcpy(ctx->live_p2m, ctx->p2m, dinfo->p2m_size * sizeof(xen_pfn_t));
- munmap(ctx->live_p2m, P2M_FL_ENTRIES * PAGE_SIZE);
+ if ( callbacks && callbacks->finish_restotre )
+ {
+ rc = callbacks->finish_restotre(comm_data, data);
+ if ( rc == 1 )
+ goto getpages;
+
+ if ( rc < 0 )
+ {
+ ERROR("Er1ror doing callbacks->finish_restotre()");
+ goto out;
+ }
+ }
rc = xc_dom_gnttab_seed(xch, dom, *console_mfn, *store_mfn,
console_domid, store_domid);
@@ -2329,6 +2417,8 @@ int xc_domain_restore(xc_interface *xch, int io_fd,
uint32_t dom,
rc = 0;
out:
+ if ( callbacks && callbacks->free && callbacks->data)
+ callbacks->free(&callbacks->comm_data, callbacks->data);
if ( (rc != 0) && (dom != 0) )
xc_domain_destroy(xch, dom);
xc_hypercall_buffer_free(xch, ctxt);
diff --git a/tools/libxc/xenguest.h b/tools/libxc/xenguest.h
index 4714bd2..4bb444a 100644
--- a/tools/libxc/xenguest.h
+++ b/tools/libxc/xenguest.h
@@ -90,12 +90,60 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t
dom, uint32_t max_iter
unsigned long vm_generationid_addr);
+/* pass the variable defined in xc_domain_restore() to callback. Use
+ * this structure for the following purpose:
+ * 1. avoid too many arguments.
+ * 2. different callback implemention may need different arguments.
+ * Just add the information you need here.
+ */
+struct restore_data
+{
+ xc_interface *xch;
+ uint32_t dom;
+ struct domain_info_context *dinfo;
+ int io_fd;
+ int hvm;
+ unsigned long *pfn_type;
+ struct xc_mmu *mmu;
+ unsigned long *p2m_frame_list;
+ unsigned long *p2m;
+ unsigned long console_mfn;
+ unsigned long store_mfn;
+};
+
/* callbacks provided by xc_domain_restore */
struct restore_callbacks {
+ /* callback to init data */
+ int (*init)(struct restore_data *comm_data, void **data);
+ /* callback to free data */
+ void (*free)(struct restore_data *comm_data, void *data);
+ /* callback to get a buffer to store memory data that is transfered
+ * from the source machine.
+ */
+ char *(*get_page)(struct restore_data *comm_data, void *data,
+ unsigned long pfn);
+ /* callback to flush memory that is transfered from the source machine
+ * to the guest. Update the guest's pagetable if necessary.
+ */
+ int (*flush_memory)(struct restore_data *comm_data, void *data);
+ /* callback to update the guest's p2m table */
+ int (*update_p2m)(struct restore_data *comm_data, void *data);
+ /* callback to finish restore process. It is called before
xc_domain_restore()
+ * returns.
+ *
+ * Return value:
+ * -1: error
+ * 0: continue to start vm
+ * 1: continue to do a checkpoint
+ */
+ int (*finish_restotre)(struct restore_data *comm_data, void *data);
/* callback to restore toolstack specific data */
int (*toolstack_restore)(uint32_t domid, const uint8_t *buf,
uint32_t size, void* data);
+ /* xc_domain_restore() init it */
+ struct restore_data comm_data;
+
/* to be provided as the last argument to each callback function */
void* data;
};
--
1.7.4
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |