[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC PATCH 2/7] xc_domain_restore: introduce restore_callbacks for colo
In colo mode, SVM also runs. So we should update xc_restore to support it. The first step is: add some callbacks for colo. We add the following callbacks: 1. init(): init the private data used for colo 2. free(): free the resource we allocate and store in the private data 3. get_page(): SVM runs, so we can't update the memory in apply_batch(). This callback will return a page buffer, and apply_batch() will copy the page to this buffer. The content of this buffer should be the current content of this page, so we can use it to do verify. 4. flush_memory(): update the SVM memory and pagetable. 5. update_p2m(): update the SVM p2m page. 6. finish_restore(): wait a new checkpoint. We also add a new structure restore_data to avoid pass too many arguments to these callbacks. This structure stores the variables used in xc_domain_store(), and these variables will be used in the callback. Signed-off-by: Ye Wei <wei.ye1987@xxxxxxxxx> Signed-off-by: Jiang Yunhong <yunhong.jiang@xxxxxxxxx> Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx> --- tools/libxc/ia64/xc_ia64_linux_restore.c | 3 +- tools/libxc/xc_domain_restore.c | 256 +++++++++++++++++++++---------- tools/libxc/xenguest.h | 54 ++++++- tools/libxl/libxl_dom.c | 2 +- tools/xcutils/xc_restore.c | 3 +- 5 files changed, 230 insertions(+), 88 deletions(-) diff --git a/tools/libxc/ia64/xc_ia64_linux_restore.c b/tools/libxc/ia64/xc_ia64_linux_restore.c index b4e9e9c..ca76be6 100644 --- a/tools/libxc/ia64/xc_ia64_linux_restore.c +++ b/tools/libxc/ia64/xc_ia64_linux_restore.c @@ -550,7 +550,8 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, unsigned int store_evtchn, unsigned long *store_mfn, unsigned int console_evtchn, unsigned long *console_mfn, - unsigned int hvm, unsigned int pae, int superpages) + unsigned int hvm, unsigned int pae, int superpages, + struct restore_callbacks *callbacks) { DECLARE_DOMCTL; int rc = 1; diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c index 43e6c52..fa828e9 100644 --- a/tools/libxc/xc_domain_restore.c +++ b/tools/libxc/xc_domain_restore.c @@ -882,13 +882,15 @@ static int pagebuf_get(xc_interface *xch, struct restore_ctx *ctx, static int apply_batch(xc_interface *xch, uint32_t dom, struct restore_ctx *ctx, xen_pfn_t* region_mfn, unsigned long* pfn_type, int pae_extended_cr3, unsigned int hvm, struct xc_mmu* mmu, - pagebuf_t* pagebuf, int curbatch) + pagebuf_t* pagebuf, int curbatch, + struct restore_callbacks *callbacks) { int i, j, curpage, nr_mfns; /* used by debug verify code */ unsigned long buf[PAGE_SIZE/sizeof(unsigned long)]; /* Our mapping of the current region (batch) */ char *region_base; + char *target_buf; /* A temporary mapping, and a copy, of one frame of guest memory. */ unsigned long *page = NULL; int nraces = 0; @@ -954,16 +956,19 @@ static int apply_batch(xc_interface *xch, uint32_t dom, struct restore_ctx *ctx, } } - /* Map relevant mfns */ - pfn_err = calloc(j, sizeof(*pfn_err)); - region_base = xc_map_foreign_bulk( - xch, dom, PROT_WRITE, region_mfn, pfn_err, j); - - if ( region_base == NULL ) + if ( !callbacks || !callbacks->get_page) { - PERROR("map batch failed"); - free(pfn_err); - return -1; + /* Map relevant mfns */ + pfn_err = calloc(j, sizeof(*pfn_err)); + region_base = xc_map_foreign_bulk( + xch, dom, PROT_WRITE, region_mfn, pfn_err, j); + + if ( region_base == NULL ) + { + PERROR("map batch failed"); + free(pfn_err); + return -1; + } } for ( i = 0, curpage = -1; i < j; i++ ) @@ -975,7 +980,7 @@ static int apply_batch(xc_interface *xch, uint32_t dom, struct restore_ctx *ctx, /* a bogus/unmapped page: skip it */ continue; - if (pfn_err[i]) + if ( (!callbacks || !callbacks->get_page) && pfn_err[i] ) { ERROR("unexpected PFN mapping failure"); goto err_mapped; @@ -993,8 +998,20 @@ static int apply_batch(xc_interface *xch, uint32_t dom, struct restore_ctx *ctx, mfn = ctx->p2m[pfn]; + if ( callbacks && callbacks->get_page ) + { + target_buf = callbacks->get_page(&callbacks->comm_data, + callbacks->data, pfn); + if ( !target_buf ) + { + ERROR("Cannot get a buffer to store memory"); + goto err_mapped; + } + } + else + target_buf = region_base + i*PAGE_SIZE; /* In verify mode, we use a copy; otherwise we work in place */ - page = pagebuf->verify ? (void *)buf : (region_base + i*PAGE_SIZE); + page = pagebuf->verify ? (void *)buf : target_buf; memcpy(page, pagebuf->pages + (curpage + curbatch) * PAGE_SIZE, PAGE_SIZE); @@ -1038,27 +1055,26 @@ static int apply_batch(xc_interface *xch, uint32_t dom, struct restore_ctx *ctx, if ( pagebuf->verify ) { - int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE); + int res = memcmp(buf, target_buf, PAGE_SIZE); if ( res ) { int v; DPRINTF("************** pfn=%lx type=%lx gotcs=%08lx " "actualcs=%08lx\n", pfn, pagebuf->pfn_types[pfn], - csum_page(region_base + (i + curbatch)*PAGE_SIZE), + csum_page(target_buf), csum_page(buf)); for ( v = 0; v < 4; v++ ) { - unsigned long *p = (unsigned long *) - (region_base + i*PAGE_SIZE); + unsigned long *p = (unsigned long *)target_buf; if ( buf[v] != p[v] ) DPRINTF(" %d: %08lx %08lx\n", v, buf[v], p[v]); } } } - if ( !hvm && + if ( (!callbacks || !callbacks->get_page) && !hvm && xc_add_mmu_update(xch, mmu, (((unsigned long long)mfn) << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, pfn) ) @@ -1071,8 +1087,11 @@ static int apply_batch(xc_interface *xch, uint32_t dom, struct restore_ctx *ctx, rc = nraces; err_mapped: - munmap(region_base, j*PAGE_SIZE); - free(pfn_err); + if ( !callbacks || !callbacks->get_page ) + { + munmap(region_base, j*PAGE_SIZE); + free(pfn_err); + } return rc; } @@ -1080,7 +1099,8 @@ static int apply_batch(xc_interface *xch, uint32_t dom, struct restore_ctx *ctx, int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, unsigned int store_evtchn, unsigned long *store_mfn, unsigned int console_evtchn, unsigned long *console_mfn, - unsigned int hvm, unsigned int pae, int superpages) + unsigned int hvm, unsigned int pae, int superpages, + struct restore_callbacks *callbacks) { DECLARE_DOMCTL; int rc = 1, frc, i, j, n, m, pae_extended_cr3 = 0, ext_vcpucontext = 0; @@ -1141,6 +1161,9 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, static struct restore_ctx *ctx = &_ctx; struct domain_info_context *dinfo = &ctx->dinfo; + struct restore_data *comm_data = NULL; + void *data = NULL; + pagebuf_init(&pagebuf); memset(&tailbuf, 0, sizeof(tailbuf)); tailbuf.ishvm = hvm; @@ -1249,6 +1272,32 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, goto out; } + /* init callbacks->comm_data */ + if ( callbacks ) + { + callbacks->comm_data.xch = xch; + callbacks->comm_data.dom = dom; + callbacks->comm_data.dinfo = dinfo; + callbacks->comm_data.hvm = hvm; + callbacks->comm_data.pfn_type = pfn_type; + callbacks->comm_data.mmu = mmu; + callbacks->comm_data.p2m_frame_list = p2m_frame_list; + callbacks->comm_data.p2m = ctx->p2m; + comm_data = &callbacks->comm_data; + + /* init callbacks->data */ + if ( callbacks->init) + { + callbacks->data = NULL; + if (callbacks->init(&callbacks->comm_data, &callbacks->data) < 0 ) + { + ERROR("Could not initialise restore callbacks private data"); + goto out; + } + } + data = callbacks->data; + } + xc_report_progress_start(xch, "Reloading memory pages", dinfo->p2m_size); /* @@ -1298,7 +1347,8 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, int brc; brc = apply_batch(xch, dom, ctx, region_mfn, pfn_type, - pae_extended_cr3, hvm, mmu, &pagebuf, curbatch); + pae_extended_cr3, hvm, mmu, &pagebuf, curbatch, + callbacks); if ( brc < 0 ) goto out; @@ -1368,6 +1418,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, goto finish; } +getpages: // DPRINTF("Buffered checkpoint\n"); if ( pagebuf_get(xch, ctx, &pagebuf, io_fd, dom) ) { @@ -1499,58 +1550,69 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, } } - /* - * Pin page tables. Do this after writing to them as otherwise Xen - * will barf when doing the type-checking. - */ - nr_pins = 0; - for ( i = 0; i < dinfo->p2m_size; i++ ) + if ( callbacks && callbacks->flush_memory ) { - if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 ) - continue; - - switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK ) + if ( callbacks->flush_memory(comm_data, data) < 0 ) { - case XEN_DOMCTL_PFINFO_L1TAB: - pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE; - break; + ERROR("Error doing callbacks->flush_memory()"); + goto out; + } + } + else + { + /* + * Pin page tables. Do this after writing to them as otherwise Xen + * will barf when doing the type-checking. + */ + nr_pins = 0; + for ( i = 0; i < dinfo->p2m_size; i++ ) + { + if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 ) + continue; - case XEN_DOMCTL_PFINFO_L2TAB: - pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE; - break; + switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK ) + { + case XEN_DOMCTL_PFINFO_L1TAB: + pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE; + break; - case XEN_DOMCTL_PFINFO_L3TAB: - pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE; - break; + case XEN_DOMCTL_PFINFO_L2TAB: + pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE; + break; - case XEN_DOMCTL_PFINFO_L4TAB: - pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE; - break; + case XEN_DOMCTL_PFINFO_L3TAB: + pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE; + break; - default: - continue; - } + case XEN_DOMCTL_PFINFO_L4TAB: + pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE; + break; + + default: + continue; + } - pin[nr_pins].arg1.mfn = ctx->p2m[i]; - nr_pins++; + pin[nr_pins].arg1.mfn = ctx->p2m[i]; + nr_pins++; - /* Batch full? Then flush. */ - if ( nr_pins == MAX_PIN_BATCH ) - { - if ( xc_mmuext_op(xch, pin, nr_pins, dom) < 0 ) + /* Batch full? Then flush. */ + if ( nr_pins == MAX_PIN_BATCH ) { - PERROR("Failed to pin batch of %d page tables", nr_pins); - goto out; + if ( xc_mmuext_op(xch, pin, nr_pins, dom) < 0 ) + { + PERROR("Failed to pin batch of %d page tables", nr_pins); + goto out; + } + nr_pins = 0; } - nr_pins = 0; } - } - /* Flush final partial batch. */ - if ( (nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0) ) - { - PERROR("Failed to pin batch of %d page tables", nr_pins); - goto out; + /* Flush final partial batch. */ + if ( (nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0) ) + { + PERROR("Failed to pin batch of %d page tables", nr_pins); + goto out; + } } DPRINTF("Memory reloaded (%ld pages)\n", ctx->nr_pfns); @@ -1767,37 +1829,61 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, /* leave wallclock time. set by hypervisor */ munmap(new_shared_info, PAGE_SIZE); - /* Uncanonicalise the pfn-to-mfn table frame-number list. */ - for ( i = 0; i < P2M_FL_ENTRIES; i++ ) + if ( callbacks && callbacks->update_p2m ) { - pfn = p2m_frame_list[i]; - if ( (pfn >= dinfo->p2m_size) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB) ) + if ( callbacks->update_p2m(comm_data, data) < 0 ) { - ERROR("PFN-to-MFN frame number %i (%#lx) is bad", i, pfn); + ERROR("Error doing callbacks->update_p2m()"); goto out; } - p2m_frame_list[i] = ctx->p2m[pfn]; } - - /* Copy the P2M we've constructed to the 'live' P2M */ - if ( !(ctx->live_p2m = xc_map_foreign_pages(xch, dom, PROT_WRITE, - p2m_frame_list, P2M_FL_ENTRIES)) ) + else { - PERROR("Couldn't map p2m table"); - goto out; + /* Uncanonicalise the pfn-to-mfn table frame-number list. */ + for ( i = 0; i < P2M_FL_ENTRIES; i++ ) + { + pfn = p2m_frame_list[i]; + if ( (pfn >= dinfo->p2m_size) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB) ) + { + ERROR("PFN-to-MFN frame number %i (%#lx) is bad", i, pfn); + goto out; + } + p2m_frame_list[i] = ctx->p2m[pfn]; + } + + /* Copy the P2M we've constructed to the 'live' P2M */ + if ( !(ctx->live_p2m = xc_map_foreign_pages(xch, dom, PROT_WRITE, + p2m_frame_list, P2M_FL_ENTRIES)) ) + { + PERROR("Couldn't map p2m table"); + goto out; + } + + /* If the domain we're restoring has a different word size to ours, + * we need to adjust the live_p2m assignment appropriately */ + if ( dinfo->guest_width > sizeof (xen_pfn_t) ) + for ( i = dinfo->p2m_size - 1; i >= 0; i-- ) + ((int64_t *)ctx->live_p2m)[i] = (long)ctx->p2m[i]; + else if ( dinfo->guest_width < sizeof (xen_pfn_t) ) + for ( i = 0; i < dinfo->p2m_size; i++ ) + ((uint32_t *)ctx->live_p2m)[i] = ctx->p2m[i]; + else + memcpy(ctx->live_p2m, ctx->p2m, dinfo->p2m_size * sizeof(xen_pfn_t)); + munmap(ctx->live_p2m, P2M_FL_ENTRIES * PAGE_SIZE); } - /* If the domain we're restoring has a different word size to ours, - * we need to adjust the live_p2m assignment appropriately */ - if ( dinfo->guest_width > sizeof (xen_pfn_t) ) - for ( i = dinfo->p2m_size - 1; i >= 0; i-- ) - ((int64_t *)ctx->live_p2m)[i] = (long)ctx->p2m[i]; - else if ( dinfo->guest_width < sizeof (xen_pfn_t) ) - for ( i = 0; i < dinfo->p2m_size; i++ ) - ((uint32_t *)ctx->live_p2m)[i] = ctx->p2m[i]; - else - memcpy(ctx->live_p2m, ctx->p2m, dinfo->p2m_size * sizeof(xen_pfn_t)); - munmap(ctx->live_p2m, P2M_FL_ENTRIES * PAGE_SIZE); + if ( callbacks && callbacks->finish_restotre ) + { + rc = callbacks->finish_restotre(comm_data, data); + if ( rc == 1 ) + goto getpages; + + if ( rc < 0 ) + { + ERROR("Er1ror doing callbacks->finish_restotre()"); + goto out; + } + } DPRINTF("Domain ready to be built.\n"); rc = 0; @@ -1861,6 +1947,8 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, rc = 0; out: + if ( callbacks && callbacks->free && callbacks->data) + callbacks->free(&callbacks->comm_data, callbacks->data); if ( (rc != 0) && (dom != 0) ) xc_domain_destroy(xch, dom); xc_hypercall_buffer_free(xch, ctxt); diff --git a/tools/libxc/xenguest.h b/tools/libxc/xenguest.h index 9ed0ea4..709a284 100644 --- a/tools/libxc/xenguest.h +++ b/tools/libxc/xenguest.h @@ -60,6 +60,57 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter struct save_callbacks* callbacks, int hvm); +/* pass the variable defined in xc_domain_restore() to callback. Use + * this structure for the following purpose: + * 1. avoid too many arguments. + * 2. different callback implemention may need different arguments. + * Just add the information you need here. + */ +struct restore_data +{ + xc_interface *xch; + uint32_t dom; + struct domain_info_context *dinfo; + int hvm; + unsigned long *pfn_type; + struct xc_mmu *mmu; + unsigned long *p2m_frame_list; + unsigned long *p2m; +}; + +/* callbacks provided by xc_domain_restore */ +struct restore_callbacks { + /* callback to init data */ + int (*init)(struct restore_data *comm_data, void **data); + /* callback to free data */ + void (*free)(struct restore_data *comm_data, void *data); + /* callback to get a buffer to store memory data that is transfered + * from the source machine. + */ + char *(*get_page)(struct restore_data *comm_data, void *data, + unsigned long pfn); + /* callback to flush memory that is transfered from the source machine + * to the guest. Update the guest's pagetable if necessary. + */ + int (*flush_memory)(struct restore_data *comm_data, void *data); + /* callback to update the guest's p2m table */ + int (*update_p2m)(struct restore_data *comm_data, void *data); + /* callback to finish restore process. It is called before xc_domain_restore() + * returns. + * + * Return value: + * -1: error + * 0: continue to start vm + * 1: continue to do a checkpoint + */ + int (*finish_restotre)(struct restore_data *comm_data, void *data); + + /* xc_domain_restore() init it */ + struct restore_data comm_data; + /* to be provided as the last argument to each callback function */ + void* data; +}; + /** * This function will restore a saved domain. * @@ -76,7 +127,8 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, unsigned int store_evtchn, unsigned long *store_mfn, unsigned int console_evtchn, unsigned long *console_mfn, - unsigned int hvm, unsigned int pae, int superpages); + unsigned int hvm, unsigned int pae, int superpages, + struct restore_callbacks *callbacks); /** * xc_domain_restore writes a file to disk that contains the device * model saved state. diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c index c702cf7..32cdd03 100644 --- a/tools/libxl/libxl_dom.c +++ b/tools/libxl/libxl_dom.c @@ -305,7 +305,7 @@ int libxl__domain_restore_common(libxl_ctx *ctx, uint32_t domid, rc = xc_domain_restore(ctx->xch, fd, domid, state->store_port, &state->store_mfn, state->console_port, &state->console_mfn, - info->hvm, info->u.hvm.pae, 0); + info->hvm, info->u.hvm.pae, 0, NULL); if ( rc ) { LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "restoring domain"); return ERROR_FAIL; diff --git a/tools/xcutils/xc_restore.c b/tools/xcutils/xc_restore.c index ea069ac..8af88e4 100644 --- a/tools/xcutils/xc_restore.c +++ b/tools/xcutils/xc_restore.c @@ -46,7 +46,8 @@ main(int argc, char **argv) superpages = 0; ret = xc_domain_restore(xch, io_fd, domid, store_evtchn, &store_mfn, - console_evtchn, &console_mfn, hvm, pae, superpages); + console_evtchn, &console_mfn, hvm, pae, superpages, + NULL); if ( ret == 0 ) { -- 1.8.0 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |