# HG changeset patch
# User Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx>
# Date 1175782282 -3600
# Node ID e518f2fbdd724ca7b21789d2d075c7ee8665ddaa
# Parent 602d061ff51f50d7b46bd5ca78c4b70fbe809d20
[HVM] Save/restore: merge xc_linux_restore and xc_hvm_restore
into one function (and one file) since they share a lot of code
Signed-off-by: Tim Deegan <Tim.Deegan@?ensource.com>
---
tools/libxc/xc_hvm_restore.c | 351 ------------
tools/libxc/xc_linux_restore.c | 955 -----------------------------------
tools/libxc/Makefile | 4
tools/libxc/xc_domain_restore.c | 1086 ++++++++++++++++++++++++++++++++++++++++
tools/libxc/xc_hvm_save.c | 57 +-
tools/libxc/xenguest.h | 22
tools/libxc/xg_private.c | 10
tools/xcutils/xc_restore.c | 10
8 files changed, 1137 insertions(+), 1358 deletions(-)
diff -r 602d061ff51f -r e518f2fbdd72 tools/libxc/Makefile
--- a/tools/libxc/Makefile Thu Apr 05 10:43:50 2007 +0100
+++ b/tools/libxc/Makefile Thu Apr 05 15:11:22 2007 +0100
@@ -26,8 +26,8 @@ CTRL_SRCS-$(CONFIG_X86_Linux) += xc_ptra
GUEST_SRCS-y :=
GUEST_SRCS-y += xg_private.c
-GUEST_SRCS-$(CONFIG_MIGRATE) += xc_linux_restore.c xc_linux_save.c
-GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c xc_hvm_restore.c xc_hvm_save.c
+GUEST_SRCS-$(CONFIG_MIGRATE) += xc_domain_restore.c xc_linux_save.c
+GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c xc_hvm_save.c
# symlink libelf from xen/common/libelf/
LIBELF_SRCS := libelf-tools.c libelf-loader.c
diff -r 602d061ff51f -r e518f2fbdd72 tools/libxc/xc_domain_restore.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_domain_restore.c Thu Apr 05 15:11:22 2007 +0100
@@ -0,0 +1,1086 @@
+/******************************************************************************
+ * xc_domain_restore.c
+ *
+ * Restore the state of a guest session.
+ *
+ * Copyright (c) 2003, K A Fraser.
+ * Copyright (c) 2006, Intel Corporation
+ * Copyright (c) 2007, XenSource Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "xg_private.h"
+#include "xg_save_restore.h"
+#include "xc_dom.h"
+
+#include <xen/hvm/ioreq.h>
+#include <xen/hvm/params.h>
+
+/* max mfn of the current host machine */
+static unsigned long max_mfn;
+
+/* virtual starting address of the hypervisor */
+static unsigned long hvirt_start;
+
+/* #levels of page tables used by the current guest */
+static unsigned int pt_levels;
+
+/* number of pfns this guest has (i.e. number of entries in the P2M) */
+static unsigned long p2m_size;
+
+/* number of 'in use' pfns in the guest (i.e. #P2M entries with a valid mfn) */
+static unsigned long nr_pfns;
+
+/* Live mapping of the table mapping each PFN to its current MFN. */
+static xen_pfn_t *live_p2m = NULL;
+
+/* A table mapping each PFN to its new MFN. */
+static xen_pfn_t *p2m = NULL;
+
+/* A table of P2M mappings in the current region */
+static xen_pfn_t *p2m_batch = NULL;
+
+static ssize_t
+read_exact(int fd, void *buf, size_t count)
+{
+ int r = 0, s;
+ unsigned char *b = buf;
+
+ while (r < count) {
+ s = read(fd, &b[r], count - r);
+ if ((s == -1) && (errno == EINTR))
+ continue;
+ if (s <= 0) {
+ break;
+ }
+ r += s;
+ }
+
+ return (r == count) ? 1 : 0;
+}
+
+/*
+** In the state file (or during transfer), all page-table pages are
+** converted into a 'canonical' form where references to actual mfns
+** are replaced with references to the corresponding pfns.
+** This function inverts that operation, replacing the pfn values with
+** the (now known) appropriate mfn values.
+*/
+static int uncanonicalize_pagetable(int xc_handle, uint32_t dom,
+ unsigned long type, void *page)
+{
+ int i, pte_last;
+ unsigned long pfn;
+ uint64_t pte;
+ int nr_mfns = 0;
+
+ pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8);
+
+ /* First pass: work out how many (if any) MFNs we need to alloc */
+ for(i = 0; i < pte_last; i++) {
+
+ if(pt_levels == 2)
+ pte = ((uint32_t *)page)[i];
+ else
+ pte = ((uint64_t *)page)[i];
+
+ /* XXX SMH: below needs fixing for PROT_NONE etc */
+ if(!(pte & _PAGE_PRESENT))
+ continue;
+
+ pfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86;
+
+ if(pfn >= p2m_size) {
+ /* This "page table page" is probably not one; bail. */
+ ERROR("Frame number in type %lu page table is out of range: "
+ "i=%d pfn=0x%lx p2m_size=%lu",
+ type >> 28, i, pfn, p2m_size);
+ return 0;
+ }
+
+ if(p2m[pfn] == INVALID_P2M_ENTRY) {
+ /* Have a 'valid' PFN without a matching MFN - need to alloc */
+ p2m_batch[nr_mfns++] = pfn;
+ }
+ }
+
+
+ /* Allocate the requistite number of mfns */
+ if (nr_mfns && xc_domain_memory_populate_physmap(
+ xc_handle, dom, nr_mfns, 0, 0, p2m_batch) != 0) {
+ ERROR("Failed to allocate memory for batch.!\n");
+ errno = ENOMEM;
+ return 0;
+ }
+
+ /* Second pass: uncanonicalize each present PTE */
+ nr_mfns = 0;
+ for(i = 0; i < pte_last; i++) {
+
+ if(pt_levels == 2)
+ pte = ((uint32_t *)page)[i];
+ else
+ pte = ((uint64_t *)page)[i];
+
+ /* XXX SMH: below needs fixing for PROT_NONE etc */
+ if(!(pte & _PAGE_PRESENT))
+ continue;
+
+ pfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86;
+
+ if(p2m[pfn] == INVALID_P2M_ENTRY)
+ p2m[pfn] = p2m_batch[nr_mfns++];
+
+ pte &= ~MADDR_MASK_X86;
+ pte |= (uint64_t)p2m[pfn] << PAGE_SHIFT;
+
+ if(pt_levels == 2)
+ ((uint32_t *)page)[i] = (uint32_t)pte;
+ else
+ ((uint64_t *)page)[i] = (uint64_t)pte;
+ }
+
+ return 1;
+}
+
+
+/* Load the p2m frame list, plus potential extended info chunk */
+static xen_pfn_t * load_p2m_frame_list(int io_fd, int *pae_extended_cr3)
+{
+ xen_pfn_t *p2m_frame_list;
+ vcpu_guest_context_t ctxt;
+
+ if (!(p2m_frame_list = malloc(P2M_FL_SIZE))) {
+ ERROR("Couldn't allocate p2m_frame_list array");
+ return NULL;
+ }
+
+ /* Read first entry of P2M list, or extended-info signature (~0UL). */
+ if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) {
+ ERROR("read extended-info signature failed");
+ return NULL;
+ }
+
+ if (p2m_frame_list[0] == ~0UL) {
+ uint32_t tot_bytes;
+
+ /* Next 4 bytes: total size of following extended info. */
+ if (!read_exact(io_fd, &tot_bytes, sizeof(tot_bytes))) {
+ ERROR("read extended-info size failed");
+ return NULL;
+ }
+
+ while (tot_bytes) {
+ uint32_t chunk_bytes;
+ char chunk_sig[4];
+
+ /* 4-character chunk signature + 4-byte remaining chunk size. */
+ if (!read_exact(io_fd, chunk_sig, sizeof(chunk_sig)) ||
+ !read_exact(io_fd, &chunk_bytes, sizeof(chunk_bytes))) {
+ ERROR("read extended-info chunk signature failed");
+ return NULL;
+ }
+ tot_bytes -= 8;
+
+ /* VCPU context structure? */
+ if (!strncmp(chunk_sig, "vcpu", 4)) {
+ if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) {
+ ERROR("read extended-info vcpu context failed");
+ return NULL;
+ }
+ tot_bytes -= sizeof(struct vcpu_guest_context);
+ chunk_bytes -= sizeof(struct vcpu_guest_context);
+
+ if (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3))
+ *pae_extended_cr3 = 1;
+ }
+
+ /* Any remaining bytes of this chunk: read and discard. */
+ while (chunk_bytes) {
+ unsigned long sz = chunk_bytes;
+ if ( sz > P2M_FL_SIZE )
+ sz = P2M_FL_SIZE;
+ if (!read_exact(io_fd, p2m_frame_list, sz)) {
+ ERROR("read-and-discard extended-info chunk bytes failed");
+ return NULL;
+ }
+ chunk_bytes -= sz;
+ tot_bytes -= sz;
+ }
+ }
+
+ /* Now read the real first entry of P2M list. */
+ if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) {
+ ERROR("read first entry of p2m_frame_list failed");
+ return NULL;
+ }
+ }
+
+ /* First entry is already read into the p2m array. */
+ if (!read_exact(io_fd, &p2m_frame_list[1], P2M_FL_SIZE - sizeof(long))) {
+ ERROR("read p2m_frame_list failed");
+ return NULL;
+ }
+
+ return p2m_frame_list;
+}
+
+
+
+int xc_domain_restore(int xc_handle, int io_fd, uint32_t dom,
+ unsigned int store_evtchn, unsigned long *store_mfn,
+ unsigned int console_evtchn, unsigned long *console_mfn,
+ unsigned int hvm, unsigned int pae)
+{
+ DECLARE_DOMCTL;
+ int rc = 1, i, j, n, m, pae_extended_cr3 = 0;
+ unsigned long mfn, pfn;
+ unsigned int prev_pc, this_pc;
+ int verify = 0;
+ int nraces = 0;
+
+ /* The new domain's shared-info frame number. */
+ unsigned long shared_info_frame;
+ unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */
+ shared_info_t *shared_info = (shared_info_t *)shared_info_page;
+
+ /* A copy of the CPU context of the guest. */
+ vcpu_guest_context_t ctxt;
+
+ /* A table containing the type of each PFN (/not/ MFN!). */
+ unsigned long *pfn_type = NULL;
+
+ /* A table of MFNs to map in the current region */
+ xen_pfn_t *region_mfn = NULL;
+
+ /* Types of the pfns in the current region */
+ unsigned long region_pfn_type[MAX_BATCH_SIZE];
+
+ /* A temporary mapping, and a copy, of one frame of guest memory. */
+ unsigned long *page = NULL;
+
+ /* A copy of the pfn-to-mfn table frame list. */
+ xen_pfn_t *p2m_frame_list = NULL;
+
+ /* A temporary mapping of the guest's start_info page. */
+ start_info_t *start_info;
+
+ /* Our mapping of the current region (batch) */
+ char *region_base;
+
+ xc_mmu_t *mmu = NULL;
+
+ /* used by debug verify code */
+ unsigned long buf[PAGE_SIZE/sizeof(unsigned long)];
+
+ struct mmuext_op pin[MAX_PIN_BATCH];
+ unsigned int nr_pins;
+
+ uint64_t vcpumap = 1ULL;
+ unsigned int max_vcpu_id = 0;
+ int new_ctxt_format = 0;
+
+ /* Magic frames in HVM guests: ioreqs and xenstore comms. */
+ uint64_t magic_pfns[3]; /* ioreq_pfn, bufioreq_pfn, store_pfn */
+
+ /* Buffer for holding HVM context */
+ uint8_t *hvm_buf = NULL;
+
+ /* For info only */
+ nr_pfns = 0;
+
+ if ( !read_exact(io_fd, &p2m_size, sizeof(unsigned long)) )
+ {
+ ERROR("read: p2m_size");
+ goto out;
+ }
+ DPRINTF("xc_domain_restore start: p2m_size = %lx\n", p2m_size);
+
+ if ( !hvm )
+ {
+ /*
+ * XXX For now, 32bit dom0's can only save/restore 32bit domUs
+ * on 64bit hypervisors.
+ */
+ memset(&domctl, 0, sizeof(domctl));
+ domctl.domain = dom;
+ domctl.cmd = XEN_DOMCTL_set_address_size;
+ domctl.u.address_size.size = sizeof(unsigned long) * 8;
+ rc = do_domctl(xc_handle, &domctl);
+ if ( rc != 0 ) {
+ ERROR("Unable to set guest address size.");
+ goto out;
+ }
+ rc = 1;
+ }
+
+ if(!get_platform_info(xc_handle, dom,
+ &max_mfn, &hvirt_start, &pt_levels)) {
+ ERROR("Unable to get platform info.");
+ return 1;
+ }
+
+ if (lock_pages(&ctxt, sizeof(ctxt))) {
+ /* needed for build domctl, but might as well do early */
+ ERROR("Unable to lock ctxt");
+ return 1;
+ }
+
+ /* Load the p2m frame list, plus potential extended info chunk */
+ if ( !hvm )
+ {
+ p2m_frame_list = load_p2m_frame_list(io_fd, &pae_extended_cr3);
+ if ( !p2m_frame_list )
+ goto out;
+ }
+
+ /* We want zeroed memory so use calloc rather than malloc. */
+ p2m = calloc(p2m_size, sizeof(xen_pfn_t));
+ pfn_type = calloc(p2m_size, sizeof(unsigned long));
+ region_mfn = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t));
+ p2m_batch = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t));
+
+ if ((p2m == NULL) || (pfn_type == NULL) ||
+ (region_mfn == NULL) || (p2m_batch == NULL)) {
+ ERROR("memory alloc failed");
+ errno = ENOMEM;
+ goto out;
+ }
+
+ if (lock_pages(region_mfn, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) {
+ ERROR("Could not lock region_mfn");
+ goto out;
+ }
+
+ if (lock_pages(p2m_batch, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) {
+ ERROR("Could not lock p2m_batch");
+ goto out;
+ }
+
+ /* Get the domain's shared-info frame. */
+ domctl.cmd = XEN_DOMCTL_getdomaininfo;
+ domctl.domain = (domid_t)dom;
+ if (xc_domctl(xc_handle, &domctl) < 0) {
+ ERROR("Could not get information on new domain");
+ goto out;
+ }
+ shared_info_frame = domctl.u.getdomaininfo.shared_info_frame;
+
+ /* Mark all PFNs as invalid; we allocate on demand */
+ for ( pfn = 0; pfn < p2m_size; pfn++ )
+ p2m[pfn] = INVALID_P2M_ENTRY;
+
+ if(!(mmu = xc_init_mmu_updates(xc_handle, dom))) {
+ ERROR("Could not initialise for MMU updates");
+ goto out;
+ }
+
+ DPRINTF("Reloading memory pages: 0%%\n");
+
+ /*
+ * Now simply read each saved frame into its new machine frame.
+ * We uncanonicalise page tables as we go.
+ */
+ prev_pc = 0;
+
+ n = m = 0;
+ while (1) {
+
+ int j, nr_mfns = 0;
+
+ this_pc = (n * 100) / p2m_size;
+ if ( (this_pc - prev_pc) >= 5 )
+ {
+ PPRINTF("\b\b\b\b%3d%%", this_pc);
+ prev_pc = this_pc;
+ }
+
+ if (!read_exact(io_fd, &j, sizeof(int))) {
+ ERROR("Error when reading batch size");
+ goto out;
+ }
+
+ PPRINTF("batch %d\n",j);
+
+ if (j == -1) {
+ verify = 1;
+ DPRINTF("Entering page verify mode\n");
+ continue;
+ }
+
+ if (j == -2) {
+ new_ctxt_format = 1;
+ if (!read_exact(io_fd, &max_vcpu_id, sizeof(int)) ||
+ (max_vcpu_id >= 64) ||
+ !read_exact(io_fd, &vcpumap, sizeof(uint64_t))) {
+ ERROR("Error when reading max_vcpu_id");
+ goto out;
+ }
+ continue;
+ }
+
+ if (j == 0)
+ break; /* our work here is done */
+
+ if (j > MAX_BATCH_SIZE) {
+ ERROR("Max batch size exceeded. Giving up.");
+ goto out;
+ }
+
+ if (!read_exact(io_fd, region_pfn_type, j*sizeof(unsigned long))) {
+ ERROR("Error when reading region pfn types");
+ goto out;
+ }
+
+ /* First pass for this batch: work out how much memory to alloc */
+ nr_mfns = 0;
+ for ( i = 0; i < j; i++ )
+ {
+ unsigned long pfn, pagetype;
+ pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
+ pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK;
+
+ if ( (pagetype != XEN_DOMCTL_PFINFO_XTAB) &&
+ (p2m[pfn] == INVALID_P2M_ENTRY) )
+ {
+ /* Have a live PFN which hasn't had an MFN allocated */
+ p2m_batch[nr_mfns++] = pfn;
+ }
+ }
+
+
+ /* Now allocate a bunch of mfns for this batch */
+ if (nr_mfns && xc_domain_memory_populate_physmap(
+ xc_handle, dom, nr_mfns, 0, 0, p2m_batch) != 0) {
+ ERROR("Failed to allocate memory for batch.!\n");
+ errno = ENOMEM;
+ goto out;
+ }
+
+ /* Second pass for this batch: update p2m[] and region_mfn[] */
+ nr_mfns = 0;
+ for ( i = 0; i < j; i++ )
+ {
+ unsigned long pfn, pagetype;
+ pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
+ pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK;
+
+ if ( pagetype == XEN_DOMCTL_PFINFO_XTAB)
+ region_mfn[i] = ~0UL; /* map will fail but we don't care */
+ else
+ {
+ if (p2m[pfn] == INVALID_P2M_ENTRY) {
+ /* We just allocated a new mfn above; update p2m */
+ p2m[pfn] = p2m_batch[nr_mfns++];
+ nr_pfns++;
+ }
+
+ /* setup region_mfn[] for batch map.
+ * For HVM guests, this interface takes PFNs, not MFNs */
+ region_mfn[i] = hvm ? pfn : p2m[pfn];
+ }
+ }
+
+ /* Map relevant mfns */
+ region_base = xc_map_foreign_batch(
+ xc_handle, dom, PROT_WRITE, region_mfn, j);
+
+ if ( region_base == NULL )
+ {
+ ERROR("map batch failed");
+ goto out;
+ }
+
+ for ( i = 0; i < j; i++ )
+ {
+ void *page;
+ unsigned long pagetype;
+
+ pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
+ pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK;
+
+ if ( pagetype == XEN_DOMCTL_PFINFO_XTAB )
+ /* a bogus/unmapped page: skip it */
+ continue;
+
+ if ( pfn > p2m_size )
+ {
+ ERROR("pfn out of range");
+ goto out;
+ }
+
+ pfn_type[pfn] = pagetype;
+
+ mfn = p2m[pfn];
+
+ /* In verify mode, we use a copy; otherwise we work in place */
+ page = verify ? (void *)buf : (region_base + i*PAGE_SIZE);
+
+ if (!read_exact(io_fd, page, PAGE_SIZE)) {
+ ERROR("Error when reading page (type was %lx)", pagetype);
+ goto out;
+ }
+
+ pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
+
+ if ( (pagetype >= XEN_DOMCTL_PFINFO_L1TAB) &&
+ (pagetype <= XEN_DOMCTL_PFINFO_L4TAB) )
+ {
+ /*
+ ** A page table page - need to 'uncanonicalize' it, i.e.
+ ** replace all the references to pfns with the corresponding
+ ** mfns for the new domain.
+ **
+ ** On PAE we need to ensure that PGDs are in MFNs < 4G, and
+ ** so we may need to update the p2m after the main loop.
+ ** Hence we defer canonicalization of L1s until then.
+ */
+ if ((pt_levels != 3) ||
+ pae_extended_cr3 ||
+ (pagetype != XEN_DOMCTL_PFINFO_L1TAB)) {
+
+ if (!uncanonicalize_pagetable(xc_handle, dom,
+ pagetype, page)) {
+ /*
+ ** Failing to uncanonicalize a page table can be ok
+ ** under live migration since the pages type may have
+ ** changed by now (and we'll get an update later).
+ */
+ DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n",
+ pagetype >> 28, pfn, mfn);
+ nraces++;
+ continue;
+ }
+ }
+ }
+ else if ( pagetype != XEN_DOMCTL_PFINFO_NOTAB )
+ {
+ ERROR("Bogus page type %lx page table is out of range: "
+ "i=%d p2m_size=%lu", pagetype, i, p2m_size);
+ goto out;
+
+ }
+
+
+ if (verify) {
+
+ int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE);
+
+ if (res) {
+
+ int v;
+
+ DPRINTF("************** pfn=%lx type=%lx gotcs=%08lx "
+ "actualcs=%08lx\n", pfn, pfn_type[pfn],
+ csum_page(region_base + i*PAGE_SIZE),
+ csum_page(buf));
+
+ for (v = 0; v < 4; v++) {
+
+ unsigned long *p = (unsigned long *)
+ (region_base + i*PAGE_SIZE);
+ if (buf[v] != p[v])
+ DPRINTF(" %d: %08lx %08lx\n", v, buf[v], p[v]);
+ }
+ }
+ }
+
+ if (!hvm
+ && xc_add_mmu_update(xc_handle, mmu,
+ (((unsigned long long)mfn) << PAGE_SHIFT)
+ | MMU_MACHPHYS_UPDATE, pfn)) {
+ ERROR("failed machpys update mfn=%lx pfn=%lx", mfn, pfn);
+ goto out;
+ }
+ } /* end of 'batch' for loop */
+
+ munmap(region_base, j*PAGE_SIZE);
+ n+= j; /* crude stats */
+
+ /*
+ * Discard cache for portion of file read so far up to last
+ * page boundary every 16MB or so.
+ */
+ m += j;
+ if ( m > MAX_PAGECACHE_USAGE )
+ {
+ discard_file_cache(io_fd, 0 /* no flush */);
+ m = 0;
+ }
+ }
+
+ /*
+ * Ensure we flush all machphys updates before potential PAE-specific
+ * reallocations below.
+ */
+ if (!hvm && xc_finish_mmu_updates(xc_handle, mmu)) {
+ ERROR("Error doing finish_mmu_updates()");
+ goto out;
+ }
+
+ DPRINTF("Received all pages (%d races)\n", nraces);
+
+ if ( hvm )
+ {
+ uint32_t rec_len;
+
+ /* Set HVM-specific parameters */
+ if ( !read_exact(io_fd, magic_pfns, sizeof(magic_pfns)) )
+ {
+ ERROR("error reading magic page addresses");
+ goto out;
+ }
+
+ /* These comms pages need to be zeroed at the start of day */
+ if ( xc_clear_domain_page(xc_handle, dom, magic_pfns[0]) ||
+ xc_clear_domain_page(xc_handle, dom, magic_pfns[1]) ||
+ xc_clear_domain_page(xc_handle, dom, magic_pfns[2]) )
+ {
+ ERROR("error zeroing magic pages");
+ goto out;
+ }
+
+ xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, magic_pfns[0]);
+ xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN,
magic_pfns[1]);
+ xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, magic_pfns[2]);
+ xc_set_hvm_param(xc_handle, dom, HVM_PARAM_PAE_ENABLED, pae);
+ xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn);
+ *store_mfn = magic_pfns[2];
+
+ /* Read vcpu contexts */
+ for (i = 0; i <= max_vcpu_id; i++)
+ {
+ if (!(vcpumap & (1ULL << i)))
+ continue;
+
+ if ( !read_exact(io_fd, &(ctxt), sizeof(ctxt)) )
+ {
+ ERROR("error read vcpu context.\n");
+ goto out;
+ }
+
+ if ( (rc = xc_vcpu_setcontext(xc_handle, dom, i, &ctxt)) )
+ {
+ ERROR("Could not set vcpu context, rc=%d", rc);
+ goto out;
+ }
+ rc = 1;
+ }
+
+ /* Read HVM context */
+ if ( !read_exact(io_fd, &rec_len, sizeof(uint32_t)) )
+ {
+ ERROR("error read hvm context size!\n");
+ goto out;
+ }
+
+ hvm_buf = malloc(rec_len);
+ if ( hvm_buf == NULL )
+ {
+ ERROR("memory alloc for hvm context buffer failed");
+ errno = ENOMEM;
+ goto out;
+ }
+
+ if ( !read_exact(io_fd, hvm_buf, rec_len) )
+ {
+ ERROR("error loading the HVM context");
+ goto out;
+ }
+
+ rc = xc_domain_hvm_setcontext(xc_handle, dom, hvm_buf, rec_len);
+ if ( rc )
+ ERROR("error setting the HVM context");
+
+ goto out;
+ }
+
+ /* Non-HVM guests only from here on */
+
+ if ((pt_levels == 3) && !pae_extended_cr3) {
+
+ /*
+ ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This
+ ** is a little awkward and involves (a) finding all such PGDs and
+ ** replacing them with 'lowmem' versions; (b) upating the p2m[]
+ ** with the new info; and (c) canonicalizing all the L1s using the
+ ** (potentially updated) p2m[].
+ **
+ ** This is relatively slow (and currently involves two passes through
+ ** the pfn_type[] array), but at least seems to be correct. May wish
+ ** to consider more complex approaches to optimize this later.
+ */
+
+ int j, k;
+
+ /* First pass: find all L3TABs current in > 4G mfns and get new mfns */
+ for ( i = 0; i < p2m_size; i++ )
+ {
+ if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) ==
+ XEN_DOMCTL_PFINFO_L3TAB) &&
+ (p2m[i] > 0xfffffUL) )
+ {
+ unsigned long new_mfn;
+ uint64_t l3ptes[4];
+ uint64_t *l3tab;
+
+ l3tab = (uint64_t *)
+ xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+ PROT_READ, p2m[i]);
+
+ for(j = 0; j < 4; j++)
+ l3ptes[j] = l3tab[j];
+
+ munmap(l3tab, PAGE_SIZE);
+
+ if (!(new_mfn=xc_make_page_below_4G(xc_handle, dom, p2m[i]))) {
+ ERROR("Couldn't get a page below 4GB :-(");
+ goto out;
+ }
+
+ p2m[i] = new_mfn;
+ if (xc_add_mmu_update(xc_handle, mmu,
+ (((unsigned long long)new_mfn)
+ << PAGE_SHIFT) |
+ MMU_MACHPHYS_UPDATE, i)) {
+ ERROR("Couldn't m2p on PAE root pgdir");
+ goto out;
+ }
+
+ l3tab = (uint64_t *)
+ xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+ PROT_READ | PROT_WRITE, p2m[i]);
+
+ for(j = 0; j < 4; j++)
+ l3tab[j] = l3ptes[j];
+
+ munmap(l3tab, PAGE_SIZE);
+
+ }
+ }
+
+ /* Second pass: find all L1TABs and uncanonicalize them */
+ j = 0;
+
+ for ( i = 0; i < p2m_size; i++ )
+ {
+ if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) ==
+ XEN_DOMCTL_PFINFO_L1TAB) )
+ {
+ region_mfn[j] = p2m[i];
+ j++;
+ }
+
+ if(i == (p2m_size-1) || j == MAX_BATCH_SIZE) {
+
+ if (!(region_base = xc_map_foreign_batch(
+ xc_handle, dom, PROT_READ | PROT_WRITE,
+ region_mfn, j))) {
+ ERROR("map batch failed");
+ goto out;
+ }
+
+ for(k = 0; k < j; k++) {
+ if(!uncanonicalize_pagetable(xc_handle, dom,
+ XEN_DOMCTL_PFINFO_L1TAB,
+ region_base + k*PAGE_SIZE)) {
+ ERROR("failed uncanonicalize pt!");
+ goto out;
+ }
+ }
+
+ munmap(region_base, j*PAGE_SIZE);
+ j = 0;
+ }
+ }
+
+ if (xc_finish_mmu_updates(xc_handle, mmu)) {
+ ERROR("Error doing finish_mmu_updates()");
+ goto out;
+ }
+ }
+
+ /*
+ * Pin page tables. Do this after writing to them as otherwise Xen
+ * will barf when doing the type-checking.
+ */
+ nr_pins = 0;
+ for ( i = 0; i < p2m_size; i++ )
+ {
+ if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
+ continue;
+
+ switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
+ {
+ case XEN_DOMCTL_PFINFO_L1TAB:
+ pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
+ break;
+
+ case XEN_DOMCTL_PFINFO_L2TAB:
+ pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
+ break;
+
+ case XEN_DOMCTL_PFINFO_L3TAB:
+ pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE;
+ break;
+
+ case XEN_DOMCTL_PFINFO_L4TAB:
+ pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE;
+ break;
+
+ default:
+ continue;
+ }
+
+ pin[nr_pins].arg1.mfn = p2m[i];
+ nr_pins++;
+
+ /* Batch full? Then flush. */
+ if (nr_pins == MAX_PIN_BATCH) {
+ if (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0) {
+ ERROR("Failed to pin batch of %d page tables", nr_pins);
+ goto out;
+ }
+ nr_pins = 0;
+ }
+ }
+
+ /* Flush final partial batch. */
+ if ((nr_pins != 0) && (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0)) {
+ ERROR("Failed to pin batch of %d page tables", nr_pins);
+ goto out;
+ }
+
+ DPRINTF("\b\b\b\b100%%\n");
+ DPRINTF("Memory reloaded (%ld pages)\n", nr_pfns);
+
+ /* Get the list of PFNs that are not in the psuedo-phys map */
+ {
+ unsigned int count;
+ unsigned long *pfntab;
+ int nr_frees, rc;
+
+ if (!read_exact(io_fd, &count, sizeof(count))) {
+ ERROR("Error when reading pfn count");
+ goto out;
+ }
+
+ if(!(pfntab = malloc(sizeof(unsigned long) * count))) {
+ ERROR("Out of memory");
+ goto out;
+ }
+
+ if (!read_exact(io_fd, pfntab, sizeof(unsigned long)*count)) {
+ ERROR("Error when reading pfntab");
+ goto out;
+ }
+
+ nr_frees = 0;
+ for (i = 0; i < count; i++) {
+
+ unsigned long pfn = pfntab[i];
+
+ if(p2m[pfn] != INVALID_P2M_ENTRY) {
+ /* pfn is not in physmap now, but was at some point during
+ the save/migration process - need to free it */
+ pfntab[nr_frees++] = p2m[pfn];
+ p2m[pfn] = INVALID_P2M_ENTRY; // not in pseudo-physical map
+ }
+ }
+
+ if (nr_frees > 0) {
+
+ struct xen_memory_reservation reservation = {
+ .nr_extents = nr_frees,
+ .extent_order = 0,
+ .domid = dom
+ };
+ set_xen_guest_handle(reservation.extent_start, pfntab);
+
+ if ((rc = xc_memory_op(xc_handle, XENMEM_decrease_reservation,
+ &reservation)) != nr_frees) {
+ ERROR("Could not decrease reservation : %d", rc);
+ goto out;
+ } else
+ DPRINTF("Decreased reservation by %d pages\n", count);
+ }
+ }
+
+ for (i = 0; i <= max_vcpu_id; i++) {
+ if (!(vcpumap & (1ULL << i)))
+ continue;
+
+ if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) {
+ ERROR("Error when reading ctxt %d", i);
+ goto out;
+ }
+
+ if ( !new_ctxt_format )
+ ctxt.flags |= VGCF_online;
+
+ if (i == 0) {
+ /*
+ * Uncanonicalise the suspend-record frame number and poke
+ * resume record.
+ */
+ pfn = ctxt.user_regs.edx;
+ if ((pfn >= p2m_size) ||
+ (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) {
+ ERROR("Suspend record frame number is bad");
+ goto out;
+ }
+ ctxt.user_regs.edx = mfn = p2m[pfn];
+ start_info = xc_map_foreign_range(
+ xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn);
+ start_info->nr_pages = p2m_size;
+ start_info->shared_info = shared_info_frame << PAGE_SHIFT;
+ start_info->flags = 0;
+ *store_mfn = start_info->store_mfn = p2m[start_info->store_mfn];
+ start_info->store_evtchn = store_evtchn;
+ start_info->console.domU.mfn = p2m[start_info->console.domU.mfn];
+ start_info->console.domU.evtchn = console_evtchn;
+ *console_mfn = start_info->console.domU.mfn;
+ munmap(start_info, PAGE_SIZE);
+ }
+
+ /* Uncanonicalise each GDT frame number. */
+ if (ctxt.gdt_ents > 8192) {
+ ERROR("GDT entry count out of range");
+ goto out;
+ }
+
+ for (j = 0; (512*j) < ctxt.gdt_ents; j++) {
+ pfn = ctxt.gdt_frames[j];
+ if ((pfn >= p2m_size) ||
+ (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) {
+ ERROR("GDT frame number is bad");
+ goto out;
+ }
+ ctxt.gdt_frames[j] = p2m[pfn];
+ }
+
+ /* Uncanonicalise the page table base pointer. */
+ pfn = xen_cr3_to_pfn(ctxt.ctrlreg[3]);
+
+ if (pfn >= p2m_size) {
+ ERROR("PT base is bad: pfn=%lu p2m_size=%lu type=%08lx",
+ pfn, p2m_size, pfn_type[pfn]);
+ goto out;
+ }
+
+ if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
+ ((unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) {
+ ERROR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
+ pfn, p2m_size, pfn_type[pfn],
+ (unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT);
+ goto out;
+ }
+
+ ctxt.ctrlreg[3] = xen_pfn_to_cr3(p2m[pfn]);
+
+ /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */
+ if ( (pt_levels == 4) && ctxt.ctrlreg[1] )
+ {
+ pfn = xen_cr3_to_pfn(ctxt.ctrlreg[1]);
+
+ if (pfn >= p2m_size) {
+ ERROR("User PT base is bad: pfn=%lu p2m_size=%lu type=%08lx",
+ pfn, p2m_size, pfn_type[pfn]);
+ goto out;
+ }
+
+ if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
+ ((unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) {
+ ERROR("User PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
+ pfn, p2m_size, pfn_type[pfn],
+ (unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT);
+ goto out;
+ }
+
+ ctxt.ctrlreg[1] = xen_pfn_to_cr3(p2m[pfn]);
+ }
+
+ domctl.cmd = XEN_DOMCTL_setvcpucontext;
+ domctl.domain = (domid_t)dom;
+ domctl.u.vcpucontext.vcpu = i;
+ set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt);
+ rc = xc_domctl(xc_handle, &domctl);
+ if (rc != 0) {
+ ERROR("Couldn't build vcpu%d", i);
+ goto out;
+ }
+ rc = 1;
+ }
+
+ if (!read_exact(io_fd, shared_info_page, PAGE_SIZE)) {
+ ERROR("Error when reading shared info page");
+ goto out;
+ }
+
+ /* clear any pending events and the selector */
+ memset(&(shared_info->evtchn_pending[0]), 0,
+ sizeof (shared_info->evtchn_pending));
+ for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+ shared_info->vcpu_info[i].evtchn_pending_sel = 0;
+
+ /* Copy saved contents of shared-info page. No checking needed. */
+ page = xc_map_foreign_range(
+ xc_handle, dom, PAGE_SIZE, PROT_WRITE, shared_info_frame);
+ memcpy(page, shared_info, PAGE_SIZE);
+ munmap(page, PAGE_SIZE);
+
+ /* Uncanonicalise the pfn-to-mfn table frame-number list. */
+ for (i = 0; i < P2M_FL_ENTRIES; i++) {
+ pfn = p2m_frame_list[i];
+ if ((pfn >= p2m_size) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) {
+ ERROR("PFN-to-MFN frame number is bad");
+ goto out;
+ }
+
+ p2m_frame_list[i] = p2m[pfn];
+ }
+
+ /* Copy the P2M we've constructed to the 'live' P2M */
+ if (!(live_p2m = xc_map_foreign_batch(xc_handle, dom, PROT_WRITE,
+ p2m_frame_list, P2M_FL_ENTRIES))) {
+ ERROR("Couldn't map p2m table");
+ goto out;
+ }
+
+ memcpy(live_p2m, p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
+ munmap(live_p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
+
+ DPRINTF("Domain ready to be built.\n");
+ rc = 0;
+
+ out:
+ if ( (rc != 0) && (dom != 0) )
+ xc_domain_destroy(xc_handle, dom);
+ free(mmu);
+ free(p2m);
+ free(pfn_type);
+ free(hvm_buf);
+
+ /* discard cache for save file */
+ discard_file_cache(io_fd, 1 /*flush*/);
+
+ DPRINTF("Restore exit with rc=%d\n", rc);
+
+ return rc;
+}
diff -r 602d061ff51f -r e518f2fbdd72 tools/libxc/xc_hvm_restore.c
--- a/tools/libxc/xc_hvm_restore.c Thu Apr 05 10:43:50 2007 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,351 +0,0 @@
-/******************************************************************************
- * xc_hvm_restore.c
- *
- * Restore the state of a HVM guest.
- *
- * Copyright (c) 2003, K A Fraser.
- * Copyright (c) 2006 Intel Corperation
- * rewriten for hvm guest by Zhai Edwin <edwin.zhai@xxxxxxxxx>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <stdlib.h>
-#include <unistd.h>
-
-#include "xg_private.h"
-#include "xg_save_restore.h"
-
-#include <xen/hvm/ioreq.h>
-#include <xen/hvm/params.h>
-#include <xen/hvm/e820.h>
-
-static ssize_t
-read_exact(int fd, void *buf, size_t count)
-{
- int r = 0, s;
- unsigned char *b = buf;
-
- while ( r < count )
- {
- s = read(fd, &b[r], count - r);
- if ( (s == -1) && (errno == EINTR) )
- continue;
- if ( s <= 0 )
- break;
- r += s;
- }
-
- return (r == count) ? 1 : 0;
-}
-
-#define BPL (sizeof(long)*8)
-#define test_bit(bit, map) !!((map)[(bit)/BPL] & (1UL << ((bit) % BPL)))
-#define set_bit(bit, map) ((map)[(bit)/BPL] |= (1UL << ((bit) % BPL)))
-static int test_and_set_bit(unsigned long nr, unsigned long *map)
-{
- int rc = test_bit(nr, map);
- if ( !rc )
- set_bit(nr, map);
- return rc;
-}
-
-int xc_hvm_restore(int xc_handle, int io_fd, uint32_t dom,
- unsigned int store_evtchn, unsigned long *store_mfn,
- unsigned int pae, unsigned int apic)
-{
- DECLARE_DOMCTL;
-
- /* A copy of the CPU context of the guest. */
- vcpu_guest_context_t ctxt;
-
- char *region_base;
-
- unsigned long buf[PAGE_SIZE/sizeof(unsigned long)];
-
- xc_dominfo_t info;
- unsigned int rc = 1, n, i;
- uint32_t rec_len, nr_vcpus;
- uint8_t *hvm_buf = NULL;
-
- /* Magic frames: ioreqs and xenstore comms. */
- uint64_t magic_pfns[3]; /* ioreq_pfn, bufioreq_pfn, store_pfn */
-
- unsigned long pfn;
- int verify = 0;
-
- /* Types of the pfns in the current region */
- unsigned long region_pfn_type[MAX_BATCH_SIZE];
- xen_pfn_t pfn_alloc_batch[MAX_BATCH_SIZE];
- unsigned int pfn_alloc_batch_size;
-
- /* The size of an array big enough to contain all guest pfns */
- unsigned long max_pfn = 0xfffffUL; /* initial memory map guess: 4GB */
- unsigned long *pfn_bitmap = NULL, *new_pfn_bitmap;
-
- DPRINTF("xc_hvm_restore:dom=%d, store_evtchn=%d, "
- "pae=%u, apic=%u.\n", dom, store_evtchn, pae, apic);
-
- DPRINTF("xc_hvm_restore start: max_pfn = %lx\n", max_pfn);
-
- if ( mlock(&ctxt, sizeof(ctxt)) )
- {
- /* needed for build dom0 op, but might as well do early */
- ERROR("Unable to mlock ctxt");
- return 1;
- }
-
- if ( xc_domain_getinfo(xc_handle, dom, 1, &info) != 1 )
- {
- ERROR("Could not get domain info");
- return 1;
- }
-
- domctl.cmd = XEN_DOMCTL_getdomaininfo;
- domctl.domain = (domid_t)dom;
- if ( xc_domctl(xc_handle, &domctl) < 0 )
- {
- ERROR("Could not get information on new domain");
- goto out;
- }
-
- pfn_bitmap = calloc((max_pfn+1)/8, 1);
- if ( pfn_bitmap == NULL )
- {
- ERROR("Could not allocate pfn bitmap");
- goto out;
- }
-
- n = 0;
- for ( ; ; )
- {
- int j;
-
- if ( !read_exact(io_fd, &j, sizeof(int)) )
- {
- ERROR("HVM restore Error when reading batch size");
- goto out;
- }
-
- PPRINTF("batch %d\n",j);
-
- if ( j == -1 )
- {
- verify = 1;
- DPRINTF("Entering page verify mode\n");
- continue;
- }
-
- if ( j == 0 )
- break; /* our work here is done */
-
- if ( j > MAX_BATCH_SIZE )
- {
- ERROR("Max batch size exceeded. Giving up.");
- goto out;
- }
-
- if ( !read_exact(io_fd, region_pfn_type, j*sizeof(unsigned long)) )
- {
- ERROR("Error when reading region pfn types");
- goto out;
- }
-
- pfn_alloc_batch_size = 0;
- for ( i = 0; i < j; i++ )
- {
- pfn = region_pfn_type[i];
- if ( pfn & XEN_DOMCTL_PFINFO_LTAB_MASK )
- continue;
-
- while ( pfn > max_pfn )
- {
- if ( max_pfn >= 0xfffffff )
- {
- ERROR("Maximum PFN beyond reason (1TB) %lx\n", pfn);
- goto out;
- }
- max_pfn = 2*max_pfn + 1;
- new_pfn_bitmap = realloc(pfn_bitmap, (max_pfn+1)/8);
- if ( new_pfn_bitmap == NULL )
- {
- ERROR("Could not realloc pfn bitmap for max_pfn=%lx\n",
- max_pfn);
- goto out;
- }
- pfn_bitmap = new_pfn_bitmap;
- memset(&pfn_bitmap[(max_pfn+1)/(2*BPL)], 0, (max_pfn+1)/(2*8));
- }
-
- if ( !test_and_set_bit(pfn, pfn_bitmap) )
- pfn_alloc_batch[pfn_alloc_batch_size++] = pfn;
- }
-
- if ( pfn_alloc_batch_size != 0 )
- {
- rc = xc_domain_memory_populate_physmap(
- xc_handle, dom, pfn_alloc_batch_size, 0, 0, pfn_alloc_batch);
- if ( rc != 0 )
- {
- PERROR("Could not allocate %u pages for HVM guest.\n",
- pfn_alloc_batch_size);
- goto out;
- }
- }
-
- region_base = xc_map_foreign_batch(
- xc_handle, dom, PROT_WRITE, region_pfn_type, j);
-
- for ( i = 0; i < j; i++ )
- {
- void *page;
-
- pfn = region_pfn_type[i];
- if ( pfn & XEN_DOMCTL_PFINFO_LTAB_MASK )
- continue;
-
- /* In verify mode, we use a copy; otherwise we work in place */
- page = verify ? (void *)buf : (region_base + i*PAGE_SIZE);
-
- if ( !read_exact(io_fd, page, PAGE_SIZE) )
- {
- ERROR("Error when reading page (%x)", i);
- goto out;
- }
-
- if ( verify )
- {
- int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE);
- if ( res )
- {
- int v;
-
- DPRINTF("************** pfn=%lx gotcs=%08lx "
- "actualcs=%08lx\n", pfn,
- csum_page(region_base + i*PAGE_SIZE),
- csum_page(buf));
-
- for ( v = 0; v < 4; v++ )
- {
- unsigned long *p = (unsigned long *)
- (region_base + i*PAGE_SIZE);
- if (buf[v] != p[v])
- DPRINTF(" %d: %08lx %08lx\n", v, buf[v], p[v]);
- }
- }
- }
-
- } /* end of 'batch' for loop */
-
- munmap(region_base, j*PAGE_SIZE);
- n += j; /* crude stats */
- }
-
- xc_set_hvm_param(xc_handle, dom, HVM_PARAM_PAE_ENABLED, pae);
- xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn);
-
- if ( !read_exact(io_fd, magic_pfns, sizeof(magic_pfns)) )
- {
- ERROR("error reading magic page addresses\n");
- goto out;
- }
-
- if ( xc_clear_domain_page(xc_handle, dom, magic_pfns[0]) ||
- xc_clear_domain_page(xc_handle, dom, magic_pfns[1]) ||
- xc_clear_domain_page(xc_handle, dom, magic_pfns[2]) )
- {
- rc = -1;
- goto out;
- }
-
- xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, magic_pfns[0]);
- xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, magic_pfns[1]);
- xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, magic_pfns[2]);
- *store_mfn = magic_pfns[2];
- DPRINTF("hvm restore: calculate new store_mfn=0x%lx.\n", *store_mfn);
-
- if ( !read_exact(io_fd, &nr_vcpus, sizeof(uint32_t)) )
- {
- ERROR("error read nr vcpu !\n");
- goto out;
- }
- DPRINTF("hvm restore:get nr_vcpus=%d.\n", nr_vcpus);
-
- for ( i = 0; i < nr_vcpus; i++ )
- {
- if ( !read_exact(io_fd, &rec_len, sizeof(uint32_t)) )
- {
- ERROR("error read vcpu context size!\n");
- goto out;
- }
- if ( rec_len != sizeof(ctxt) )
- {
- ERROR("vcpu context size dismatch!\n");
- goto out;
- }
-
- if ( !read_exact(io_fd, &(ctxt), sizeof(ctxt)) )
- {
- ERROR("error read vcpu context.\n");
- goto out;
- }
-
- if ( (rc = xc_vcpu_setcontext(xc_handle, dom, i, &ctxt)) )
- {
- ERROR("Could not set vcpu context, rc=%d", rc);
- goto out;
- }
- }
-
- /* restore hvm context including pic/pit/shpage */
- if ( !read_exact(io_fd, &rec_len, sizeof(uint32_t)) )
- {
- ERROR("error read hvm context size!\n");
- goto out;
- }
-
- hvm_buf = malloc(rec_len);
- if ( hvm_buf == NULL )
- {
- ERROR("memory alloc for hvm context buffer failed");
- errno = ENOMEM;
- goto out;
- }
-
- if ( !read_exact(io_fd, hvm_buf, rec_len) )
- {
- ERROR("error read hvm buffer!\n");
- goto out;
- }
-
- if ( (rc = xc_domain_hvm_setcontext(xc_handle, dom, hvm_buf, rec_len)) )
- {
- ERROR("error set hvm buffer!\n");
- goto out;
- }
-
- rc = 0;
- goto out;
-
- out:
- if ( (rc != 0) && (dom != 0) )
- xc_domain_destroy(xc_handle, dom);
- free(hvm_buf);
- free(pfn_bitmap);
-
- DPRINTF("Restore exit with rc=%d\n", rc);
-
- return rc;
-}
diff -r 602d061ff51f -r e518f2fbdd72 tools/libxc/xc_hvm_save.c
--- a/tools/libxc/xc_hvm_save.c Thu Apr 05 10:43:50 2007 +0100
+++ b/tools/libxc/xc_hvm_save.c Thu Apr 05 15:11:22 2007 +0100
@@ -305,6 +305,8 @@ int xc_hvm_save(int xc_handle, int io_fd
unsigned long total_sent = 0;
+ uint64_t vcpumap = 1ULL;
+
DPRINTF("xc_hvm_save: dom=%d, max_iters=%d, max_factor=%d, flags=0x%x, "
"live=%d, debug=%d.\n", dom, max_iters, max_factor, flags,
live, debug);
@@ -371,6 +373,12 @@ int xc_hvm_save(int xc_handle, int io_fd
/* Size of any array that covers 0 ... max_pfn */
pfn_array_size = max_pfn + 1;
+ if ( !write_exact(io_fd, &pfn_array_size, sizeof(unsigned long)) )
+ {
+ ERROR("Error when writing to state file (1)");
+ goto out;
+ }
+
/* pretend we sent all the pages last iteration */
sent_last_iter = pfn_array_size;
@@ -644,6 +652,32 @@ int xc_hvm_save(int xc_handle, int io_fd
DPRINTF("All HVM memory is saved\n");
+ {
+ struct {
+ int minustwo;
+ int max_vcpu_id;
+ uint64_t vcpumap;
+ } chunk = { -2, info.max_vcpu_id };
+
+ if (info.max_vcpu_id >= 64) {
+ ERROR("Too many VCPUS in guest!");
+ goto out;
+ }
+
+ for (i = 1; i <= info.max_vcpu_id; i++) {
+ xc_vcpuinfo_t vinfo;
+ if ((xc_vcpu_getinfo(xc_handle, dom, i, &vinfo) == 0) &&
+ vinfo.online)
+ vcpumap |= 1ULL << i;
+ }
+
+ chunk.vcpumap = vcpumap;
+ if(!write_exact(io_fd, &chunk, sizeof(chunk))) {
+ ERROR("Error when writing to state file (errno %d)", errno);
+ goto out;
+ }
+ }
+
/* Zero terminate */
i = 0;
if ( !write_exact(io_fd, &i, sizeof(int)) )
@@ -666,33 +700,22 @@ int xc_hvm_save(int xc_handle, int io_fd
goto out;
}
- /* save vcpu/vmcs context */
- if ( !write_exact(io_fd, &nr_vcpus, sizeof(uint32_t)) )
- {
- ERROR("error write nr vcpus");
- goto out;
- }
-
- /*XXX: need a online map to exclude down cpu */
+ /* save vcpu/vmcs contexts */
for ( i = 0; i < nr_vcpus; i++ )
{
+ if (!(vcpumap & (1ULL << i)))
+ continue;
+
if ( xc_vcpu_getcontext(xc_handle, dom, i, &ctxt) )
{
ERROR("HVM:Could not get vcpu context");
goto out;
}
- rec_size = sizeof(ctxt);
- DPRINTF("write %d vcpucontext of total %d.\n", i, nr_vcpus);
- if ( !write_exact(io_fd, &rec_size, sizeof(uint32_t)) )
- {
- ERROR("error write vcpu ctxt size");
- goto out;
- }
-
+ DPRINTF("write vcpu %d context.\n", i);
if ( !write_exact(io_fd, &(ctxt), sizeof(ctxt)) )
{
- ERROR("write vmcs failed!\n");
+ ERROR("write vcpu context failed!\n");
goto out;
}
}
diff -r 602d061ff51f -r e518f2fbdd72 tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c Thu Apr 05 10:43:50 2007 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,955 +0,0 @@
-/******************************************************************************
- * xc_linux_restore.c
- *
- * Restore the state of a Linux session.
- *
- * Copyright (c) 2003, K A Fraser.
- */
-
-#include <stdlib.h>
-#include <unistd.h>
-
-#include "xg_private.h"
-#include "xg_save_restore.h"
-#include "xc_dom.h"
-
-/* max mfn of the current host machine */
-static unsigned long max_mfn;
-
-/* virtual starting address of the hypervisor */
-static unsigned long hvirt_start;
-
-/* #levels of page tables used by the current guest */
-static unsigned int pt_levels;
-
-/* number of pfns this guest has (i.e. number of entries in the P2M) */
-static unsigned long p2m_size;
-
-/* number of 'in use' pfns in the guest (i.e. #P2M entries with a valid mfn) */
-static unsigned long nr_pfns;
-
-/* Live mapping of the table mapping each PFN to its current MFN. */
-static xen_pfn_t *live_p2m = NULL;
-
-/* A table mapping each PFN to its new MFN. */
-static xen_pfn_t *p2m = NULL;
-
-/* A table of P2M mappings in the current region */
-static xen_pfn_t *p2m_batch = NULL;
-
-static ssize_t
-read_exact(int fd, void *buf, size_t count)
-{
- int r = 0, s;
- unsigned char *b = buf;
-
- while (r < count) {
- s = read(fd, &b[r], count - r);
- if ((s == -1) && (errno == EINTR))
- continue;
- if (s <= 0) {
- break;
- }
- r += s;
- }
-
- return (r == count) ? 1 : 0;
-}
-
-/*
-** In the state file (or during transfer), all page-table pages are
-** converted into a 'canonical' form where references to actual mfns
-** are replaced with references to the corresponding pfns.
-** This function inverts that operation, replacing the pfn values with
-** the (now known) appropriate mfn values.
-*/
-static int uncanonicalize_pagetable(int xc_handle, uint32_t dom,
- unsigned long type, void *page)
-{
- int i, pte_last;
- unsigned long pfn;
- uint64_t pte;
- int nr_mfns = 0;
-
- pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8);
-
- /* First pass: work out how many (if any) MFNs we need to alloc */
- for(i = 0; i < pte_last; i++) {
-
- if(pt_levels == 2)
- pte = ((uint32_t *)page)[i];
- else
- pte = ((uint64_t *)page)[i];
-
- /* XXX SMH: below needs fixing for PROT_NONE etc */
- if(!(pte & _PAGE_PRESENT))
- continue;
-
- pfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86;
-
- if(pfn >= p2m_size) {
- /* This "page table page" is probably not one; bail. */
- ERROR("Frame number in type %lu page table is out of range: "
- "i=%d pfn=0x%lx p2m_size=%lu",
- type >> 28, i, pfn, p2m_size);
- return 0;
- }
-
- if(p2m[pfn] == INVALID_P2M_ENTRY) {
- /* Have a 'valid' PFN without a matching MFN - need to alloc */
- p2m_batch[nr_mfns++] = pfn;
- }
- }
-
-
- /* Alllocate the requistite number of mfns */
- if (nr_mfns && xc_domain_memory_populate_physmap(
- xc_handle, dom, nr_mfns, 0, 0, p2m_batch) != 0) {
- ERROR("Failed to allocate memory for batch.!\n");
- errno = ENOMEM;
- return 0;
- }
-
- /* Second pass: uncanonicalize each present PTE */
- nr_mfns = 0;
- for(i = 0; i < pte_last; i++) {
-
- if(pt_levels == 2)
- pte = ((uint32_t *)page)[i];
- else
- pte = ((uint64_t *)page)[i];
-
- /* XXX SMH: below needs fixing for PROT_NONE etc */
- if(!(pte & _PAGE_PRESENT))
- continue;
-
- pfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86;
-
- if(p2m[pfn] == INVALID_P2M_ENTRY)
- p2m[pfn] = p2m_batch[nr_mfns++];
-
- pte &= ~MADDR_MASK_X86;
- pte |= (uint64_t)p2m[pfn] << PAGE_SHIFT;
-
- if(pt_levels == 2)
- ((uint32_t *)page)[i] = (uint32_t)pte;
- else
- ((uint64_t *)page)[i] = (uint64_t)pte;
- }
-
- return 1;
-}
-
-
-int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom,
- unsigned int store_evtchn, unsigned long *store_mfn,
- unsigned int console_evtchn, unsigned long *console_mfn)
-{
- DECLARE_DOMCTL;
- int rc = 1, i, j, n, m, pae_extended_cr3 = 0;
- unsigned long mfn, pfn;
- unsigned int prev_pc, this_pc;
- int verify = 0;
- int nraces = 0;
-
- /* The new domain's shared-info frame number. */
- unsigned long shared_info_frame;
- unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */
- shared_info_t *shared_info = (shared_info_t *)shared_info_page;
-
- /* A copy of the CPU context of the guest. */
- vcpu_guest_context_t ctxt;
-
- /* A table containing the type of each PFN (/not/ MFN!). */
- unsigned long *pfn_type = NULL;
-
- /* A table of MFNs to map in the current region */
- xen_pfn_t *region_mfn = NULL;
-
- /* Types of the pfns in the current region */
- unsigned long region_pfn_type[MAX_BATCH_SIZE];
-
- /* A temporary mapping, and a copy, of one frame of guest memory. */
- unsigned long *page = NULL;
-
- /* A copy of the pfn-to-mfn table frame list. */
- xen_pfn_t *p2m_frame_list = NULL;
-
- /* A temporary mapping of the guest's start_info page. */
- start_info_t *start_info;
-
- /* Our mapping of the current region (batch) */
- char *region_base;
-
- xc_mmu_t *mmu = NULL;
-
- /* used by debug verify code */
- unsigned long buf[PAGE_SIZE/sizeof(unsigned long)];
-
- struct mmuext_op pin[MAX_PIN_BATCH];
- unsigned int nr_pins;
-
- uint64_t vcpumap = 1ULL;
- unsigned int max_vcpu_id = 0;
- int new_ctxt_format = 0;
-
- /* For info only */
- nr_pfns = 0;
-
- if ( !read_exact(io_fd, &p2m_size, sizeof(unsigned long)) )
- {
- ERROR("read: p2m_size");
- goto out;
- }
- DPRINTF("xc_linux_restore start: p2m_size = %lx\n", p2m_size);
-
- /*
- * XXX For now, 32bit dom0's can only save/restore 32bit domUs
- * on 64bit hypervisors.
- */
- memset(&domctl, 0, sizeof(domctl));
- domctl.domain = dom;
- domctl.cmd = XEN_DOMCTL_set_address_size;
- domctl.u.address_size.size = sizeof(unsigned long) * 8;
- rc = do_domctl(xc_handle, &domctl);
- if ( rc != 0 ) {
- ERROR("Unable to set guest address size.");
- goto out;
- }
-
- if(!get_platform_info(xc_handle, dom,
- &max_mfn, &hvirt_start, &pt_levels)) {
- ERROR("Unable to get platform info.");
- return 1;
- }
-
- if (lock_pages(&ctxt, sizeof(ctxt))) {
- /* needed for build domctl, but might as well do early */
- ERROR("Unable to lock ctxt");
- return 1;
- }
-
- if (!(p2m_frame_list = malloc(P2M_FL_SIZE))) {
- ERROR("Couldn't allocate p2m_frame_list array");
- goto out;
- }
-
- /* Read first entry of P2M list, or extended-info signature (~0UL). */
- if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) {
- ERROR("read extended-info signature failed");
- goto out;
- }
-
- if (p2m_frame_list[0] == ~0UL) {
- uint32_t tot_bytes;
-
- /* Next 4 bytes: total size of following extended info. */
- if (!read_exact(io_fd, &tot_bytes, sizeof(tot_bytes))) {
- ERROR("read extended-info size failed");
- goto out;
- }
-
- while (tot_bytes) {
- uint32_t chunk_bytes;
- char chunk_sig[4];
-
- /* 4-character chunk signature + 4-byte remaining chunk size. */
- if (!read_exact(io_fd, chunk_sig, sizeof(chunk_sig)) ||
- !read_exact(io_fd, &chunk_bytes, sizeof(chunk_bytes))) {
- ERROR("read extended-info chunk signature failed");
- goto out;
- }
- tot_bytes -= 8;
-
- /* VCPU context structure? */
- if (!strncmp(chunk_sig, "vcpu", 4)) {
- if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) {
- ERROR("read extended-info vcpu context failed");
- goto out;
- }
- tot_bytes -= sizeof(struct vcpu_guest_context);
- chunk_bytes -= sizeof(struct vcpu_guest_context);
-
- if (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3))
- pae_extended_cr3 = 1;
- }
-
- /* Any remaining bytes of this chunk: read and discard. */
- while (chunk_bytes) {
- unsigned long sz = chunk_bytes;
- if ( sz > P2M_FL_SIZE )
- sz = P2M_FL_SIZE;
- if (!read_exact(io_fd, p2m_frame_list, sz)) {
- ERROR("read-and-discard extended-info chunk bytes failed");
- goto out;
- }
- chunk_bytes -= sz;
- tot_bytes -= sz;
- }
- }
-
- /* Now read the real first entry of P2M list. */
- if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) {
- ERROR("read first entry of p2m_frame_list failed");
- goto out;
- }
- }
-
- /* First entry is already read into the p2m array. */
- if (!read_exact(io_fd, &p2m_frame_list[1], P2M_FL_SIZE - sizeof(long))) {
- ERROR("read p2m_frame_list failed");
- goto out;
- }
-
- /* We want zeroed memory so use calloc rather than malloc. */
- p2m = calloc(p2m_size, sizeof(xen_pfn_t));
- pfn_type = calloc(p2m_size, sizeof(unsigned long));
- region_mfn = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t));
- p2m_batch = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t));
-
- if ((p2m == NULL) || (pfn_type == NULL) ||
- (region_mfn == NULL) || (p2m_batch == NULL)) {
- ERROR("memory alloc failed");
- errno = ENOMEM;
- goto out;
- }
-
- if (lock_pages(region_mfn, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) {
- ERROR("Could not lock region_mfn");
- goto out;
- }
-
- if (lock_pages(p2m_batch, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) {
- ERROR("Could not lock p2m_batch");
- goto out;
- }
-
- /* Get the domain's shared-info frame. */
- domctl.cmd = XEN_DOMCTL_getdomaininfo;
- domctl.domain = (domid_t)dom;
- if (xc_domctl(xc_handle, &domctl) < 0) {
- ERROR("Could not get information on new domain");
- goto out;
- }
- shared_info_frame = domctl.u.getdomaininfo.shared_info_frame;
-
- /* Mark all PFNs as invalid; we allocate on demand */
- for ( pfn = 0; pfn < p2m_size; pfn++ )
- p2m[pfn] = INVALID_P2M_ENTRY;
-
- if(!(mmu = xc_init_mmu_updates(xc_handle, dom))) {
- ERROR("Could not initialise for MMU updates");
- goto out;
- }
-
- DPRINTF("Reloading memory pages: 0%%\n");
-
- /*
- * Now simply read each saved frame into its new machine frame.
- * We uncanonicalise page tables as we go.
- */
- prev_pc = 0;
-
- n = m = 0;
- while (1) {
-
- int j, nr_mfns = 0;
-
- this_pc = (n * 100) / p2m_size;
- if ( (this_pc - prev_pc) >= 5 )
- {
- PPRINTF("\b\b\b\b%3d%%", this_pc);
- prev_pc = this_pc;
- }
-
- if (!read_exact(io_fd, &j, sizeof(int))) {
- ERROR("Error when reading batch size");
- goto out;
- }
-
- PPRINTF("batch %d\n",j);
-
- if (j == -1) {
- verify = 1;
- DPRINTF("Entering page verify mode\n");
- continue;
- }
-
- if (j == -2) {
- new_ctxt_format = 1;
- if (!read_exact(io_fd, &max_vcpu_id, sizeof(int)) ||
- (max_vcpu_id >= 64) ||
- !read_exact(io_fd, &vcpumap, sizeof(uint64_t))) {
- ERROR("Error when reading max_vcpu_id");
- goto out;
- }
- continue;
- }
-
- if (j == 0)
- break; /* our work here is done */
-
- if (j > MAX_BATCH_SIZE) {
- ERROR("Max batch size exceeded. Giving up.");
- goto out;
- }
-
- if (!read_exact(io_fd, region_pfn_type, j*sizeof(unsigned long))) {
- ERROR("Error when reading region pfn types");
- goto out;
- }
-
- /* First pass for this batch: work out how much memory to alloc */
- nr_mfns = 0;
- for ( i = 0; i < j; i++ )
- {
- unsigned long pfn, pagetype;
- pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
- pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK;
-
- if ( (pagetype != XEN_DOMCTL_PFINFO_XTAB) &&
- (p2m[pfn] == INVALID_P2M_ENTRY) )
- {
- /* Have a live PFN which hasn't had an MFN allocated */
- p2m_batch[nr_mfns++] = pfn;
- }
- }
-
-
- /* Now allocate a bunch of mfns for this batch */
- if (nr_mfns && xc_domain_memory_populate_physmap(
- xc_handle, dom, nr_mfns, 0, 0, p2m_batch) != 0) {
- ERROR("Failed to allocate memory for batch.!\n");
- errno = ENOMEM;
- goto out;
- }
-
- /* Second pass for this batch: update p2m[] and region_mfn[] */
- nr_mfns = 0;
- for ( i = 0; i < j; i++ )
- {
- unsigned long pfn, pagetype;
- pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
- pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK;
-
- if ( pagetype == XEN_DOMCTL_PFINFO_XTAB)
- region_mfn[i] = ~0UL; /* map will fail but we don't care */
- else
- {
- if (p2m[pfn] == INVALID_P2M_ENTRY) {
- /* We just allocated a new mfn above; update p2m */
- p2m[pfn] = p2m_batch[nr_mfns++];
- nr_pfns++;
- }
-
- /* setup region_mfn[] for batch map */
- region_mfn[i] = p2m[pfn];
- }
- }
-
- /* Map relevant mfns */
- region_base = xc_map_foreign_batch(
- xc_handle, dom, PROT_WRITE, region_mfn, j);
-
- if ( region_base == NULL )
- {
- ERROR("map batch failed");
- goto out;
- }
-
- for ( i = 0; i < j; i++ )
- {
- void *page;
- unsigned long pagetype;
-
- pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
- pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK;
-
- if ( pagetype == XEN_DOMCTL_PFINFO_XTAB )
- /* a bogus/unmapped page: skip it */
- continue;
-
- if ( pfn > p2m_size )
- {
- ERROR("pfn out of range");
- goto out;
- }
-
- pfn_type[pfn] = pagetype;
-
- mfn = p2m[pfn];
-
- /* In verify mode, we use a copy; otherwise we work in place */
- page = verify ? (void *)buf : (region_base + i*PAGE_SIZE);
-
- if (!read_exact(io_fd, page, PAGE_SIZE)) {
- ERROR("Error when reading page (type was %lx)", pagetype);
- goto out;
- }
-
- pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
-
- if ( (pagetype >= XEN_DOMCTL_PFINFO_L1TAB) &&
- (pagetype <= XEN_DOMCTL_PFINFO_L4TAB) )
- {
- /*
- ** A page table page - need to 'uncanonicalize' it, i.e.
- ** replace all the references to pfns with the corresponding
- ** mfns for the new domain.
- **
- ** On PAE we need to ensure that PGDs are in MFNs < 4G, and
- ** so we may need to update the p2m after the main loop.
- ** Hence we defer canonicalization of L1s until then.
- */
- if ((pt_levels != 3) ||
- pae_extended_cr3 ||
- (pagetype != XEN_DOMCTL_PFINFO_L1TAB)) {
-
- if (!uncanonicalize_pagetable(xc_handle, dom,
- pagetype, page)) {
- /*
- ** Failing to uncanonicalize a page table can be ok
- ** under live migration since the pages type may have
- ** changed by now (and we'll get an update later).
- */
- DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n",
- pagetype >> 28, pfn, mfn);
- nraces++;
- continue;
- }
- }
- }
- else if ( pagetype != XEN_DOMCTL_PFINFO_NOTAB )
- {
- ERROR("Bogus page type %lx page table is out of range: "
- "i=%d p2m_size=%lu", pagetype, i, p2m_size);
- goto out;
-
- }
-
-
- if (verify) {
-
- int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE);
-
- if (res) {
-
- int v;
-
- DPRINTF("************** pfn=%lx type=%lx gotcs=%08lx "
- "actualcs=%08lx\n", pfn, pfn_type[pfn],
- csum_page(region_base + i*PAGE_SIZE),
- csum_page(buf));
-
- for (v = 0; v < 4; v++) {
-
- unsigned long *p = (unsigned long *)
- (region_base + i*PAGE_SIZE);
- if (buf[v] != p[v])
- DPRINTF(" %d: %08lx %08lx\n", v, buf[v], p[v]);
- }
- }
- }
-
- if (xc_add_mmu_update(xc_handle, mmu,
- (((unsigned long long)mfn) << PAGE_SHIFT)
- | MMU_MACHPHYS_UPDATE, pfn)) {
- ERROR("failed machpys update mfn=%lx pfn=%lx", mfn, pfn);
- goto out;
- }
- } /* end of 'batch' for loop */
-
- munmap(region_base, j*PAGE_SIZE);
- n+= j; /* crude stats */
-
- /*
- * Discard cache for portion of file read so far up to last
- * page boundary every 16MB or so.
- */
- m += j;
- if ( m > MAX_PAGECACHE_USAGE )
- {
- discard_file_cache(io_fd, 0 /* no flush */);
- m = 0;
- }
- }
-
- /*
- * Ensure we flush all machphys updates before potential PAE-specific
- * reallocations below.
- */
- if (xc_finish_mmu_updates(xc_handle, mmu)) {
- ERROR("Error doing finish_mmu_updates()");
- goto out;
- }
-
- DPRINTF("Received all pages (%d races)\n", nraces);
-
- if ((pt_levels == 3) && !pae_extended_cr3) {
-
- /*
- ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This
- ** is a little awkward and involves (a) finding all such PGDs and
- ** replacing them with 'lowmem' versions; (b) upating the p2m[]
- ** with the new info; and (c) canonicalizing all the L1s using the
- ** (potentially updated) p2m[].
- **
- ** This is relatively slow (and currently involves two passes through
- ** the pfn_type[] array), but at least seems to be correct. May wish
- ** to consider more complex approaches to optimize this later.
- */
-
- int j, k;
-
- /* First pass: find all L3TABs current in > 4G mfns and get new mfns */
- for ( i = 0; i < p2m_size; i++ )
- {
- if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) ==
- XEN_DOMCTL_PFINFO_L3TAB) &&
- (p2m[i] > 0xfffffUL) )
- {
- unsigned long new_mfn;
- uint64_t l3ptes[4];
- uint64_t *l3tab;
-
- l3tab = (uint64_t *)
- xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
- PROT_READ, p2m[i]);
-
- for(j = 0; j < 4; j++)
- l3ptes[j] = l3tab[j];
-
- munmap(l3tab, PAGE_SIZE);
-
- if (!(new_mfn=xc_make_page_below_4G(xc_handle, dom, p2m[i]))) {
- ERROR("Couldn't get a page below 4GB :-(");
- goto out;
- }
-
- p2m[i] = new_mfn;
- if (xc_add_mmu_update(xc_handle, mmu,
- (((unsigned long long)new_mfn)
- << PAGE_SHIFT) |
- MMU_MACHPHYS_UPDATE, i)) {
- ERROR("Couldn't m2p on PAE root pgdir");
- goto out;
- }
-
- l3tab = (uint64_t *)
- xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
- PROT_READ | PROT_WRITE, p2m[i]);
-
- for(j = 0; j < 4; j++)
- l3tab[j] = l3ptes[j];
-
- munmap(l3tab, PAGE_SIZE);
-
- }
- }
-
- /* Second pass: find all L1TABs and uncanonicalize them */
- j = 0;
-
- for ( i = 0; i < p2m_size; i++ )
- {
- if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) ==
- XEN_DOMCTL_PFINFO_L1TAB) )
- {
- region_mfn[j] = p2m[i];
- j++;
- }
-
- if(i == (p2m_size-1) || j == MAX_BATCH_SIZE) {
-
- if (!(region_base = xc_map_foreign_batch(
- xc_handle, dom, PROT_READ | PROT_WRITE,
- region_mfn, j))) {
- ERROR("map batch failed");
- goto out;
- }
-
- for(k = 0; k < j; k++) {
- if(!uncanonicalize_pagetable(xc_handle, dom,
- XEN_DOMCTL_PFINFO_L1TAB,
- region_base + k*PAGE_SIZE)) {
- ERROR("failed uncanonicalize pt!");
- goto out;
- }
- }
-
- munmap(region_base, j*PAGE_SIZE);
- j = 0;
- }
- }
-
- if (xc_finish_mmu_updates(xc_handle, mmu)) {
- ERROR("Error doing finish_mmu_updates()");
- goto out;
- }
- }
-
- /*
- * Pin page tables. Do this after writing to them as otherwise Xen
- * will barf when doing the type-checking.
- */
- nr_pins = 0;
- for ( i = 0; i < p2m_size; i++ )
- {
- if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
- continue;
-
- switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
- {
- case XEN_DOMCTL_PFINFO_L1TAB:
- pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
- break;
-
- case XEN_DOMCTL_PFINFO_L2TAB:
- pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
- break;
-
- case XEN_DOMCTL_PFINFO_L3TAB:
- pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE;
- break;
-
- case XEN_DOMCTL_PFINFO_L4TAB:
- pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE;
- break;
-
- default:
- continue;
- }
-
- pin[nr_pins].arg1.mfn = p2m[i];
- nr_pins++;
-
- /* Batch full? Then flush. */
- if (nr_pins == MAX_PIN_BATCH) {
- if (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0) {
- ERROR("Failed to pin batch of %d page tables", nr_pins);
- goto out;
- }
- nr_pins = 0;
- }
- }
-
- /* Flush final partial batch. */
- if ((nr_pins != 0) && (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0)) {
- ERROR("Failed to pin batch of %d page tables", nr_pins);
- goto out;
- }
-
- DPRINTF("\b\b\b\b100%%\n");
- DPRINTF("Memory reloaded (%ld pages)\n", nr_pfns);
-
- /* Get the list of PFNs that are not in the psuedo-phys map */
- {
- unsigned int count;
- unsigned long *pfntab;
- int nr_frees, rc;
-
- if (!read_exact(io_fd, &count, sizeof(count))) {
- ERROR("Error when reading pfn count");
- goto out;
- }
-
- if(!(pfntab = malloc(sizeof(unsigned long) * count))) {
- ERROR("Out of memory");
- goto out;
- }
-
- if (!read_exact(io_fd, pfntab, sizeof(unsigned long)*count)) {
- ERROR("Error when reading pfntab");
- goto out;
- }
-
- nr_frees = 0;
- for (i = 0; i < count; i++) {
-
- unsigned long pfn = pfntab[i];
-
- if(p2m[pfn] != INVALID_P2M_ENTRY) {
- /* pfn is not in physmap now, but was at some point during
- the save/migration process - need to free it */
- pfntab[nr_frees++] = p2m[pfn];
- p2m[pfn] = INVALID_P2M_ENTRY; // not in pseudo-physical map
- }
- }
-
- if (nr_frees > 0) {
-
- struct xen_memory_reservation reservation = {
- .nr_extents = nr_frees,
- .extent_order = 0,
- .domid = dom
- };
- set_xen_guest_handle(reservation.extent_start, pfntab);
-
- if ((rc = xc_memory_op(xc_handle, XENMEM_decrease_reservation,
- &reservation)) != nr_frees) {
- ERROR("Could not decrease reservation : %d", rc);
- goto out;
- } else
- DPRINTF("Decreased reservation by %d pages\n", count);
- }
- }
-
- for (i = 0; i <= max_vcpu_id; i++) {
- if (!(vcpumap & (1ULL << i)))
- continue;
-
- if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) {
- ERROR("Error when reading ctxt %d", i);
- goto out;
- }
-
- if ( !new_ctxt_format )
- ctxt.flags |= VGCF_online;
-
- if (i == 0) {
- /*
- * Uncanonicalise the suspend-record frame number and poke
- * resume record.
- */
- pfn = ctxt.user_regs.edx;
- if ((pfn >= p2m_size) ||
- (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) {
- ERROR("Suspend record frame number is bad");
- goto out;
- }
- ctxt.user_regs.edx = mfn = p2m[pfn];
- start_info = xc_map_foreign_range(
- xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn);
- start_info->nr_pages = p2m_size;
- start_info->shared_info = shared_info_frame << PAGE_SHIFT;
- start_info->flags = 0;
- *store_mfn = start_info->store_mfn = p2m[start_info->store_mfn];
- start_info->store_evtchn = store_evtchn;
- start_info->console.domU.mfn = p2m[start_info->console.domU.mfn];
- start_info->console.domU.evtchn = console_evtchn;
- *console_mfn = start_info->console.domU.mfn;
- munmap(start_info, PAGE_SIZE);
- }
-
- /* Uncanonicalise each GDT frame number. */
- if (ctxt.gdt_ents > 8192) {
- ERROR("GDT entry count out of range");
- goto out;
- }
-
- for (j = 0; (512*j) < ctxt.gdt_ents; j++) {
- pfn = ctxt.gdt_frames[j];
- if ((pfn >= p2m_size) ||
- (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) {
- ERROR("GDT frame number is bad");
- goto out;
- }
- ctxt.gdt_frames[j] = p2m[pfn];
- }
-
- /* Uncanonicalise the page table base pointer. */
- pfn = xen_cr3_to_pfn(ctxt.ctrlreg[3]);
-
- if (pfn >= p2m_size) {
- ERROR("PT base is bad: pfn=%lu p2m_size=%lu type=%08lx",
- pfn, p2m_size, pfn_type[pfn]);
- goto out;
- }
-
- if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
- ((unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) {
- ERROR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
- pfn, p2m_size, pfn_type[pfn],
- (unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT);
- goto out;
- }
-
- ctxt.ctrlreg[3] = xen_pfn_to_cr3(p2m[pfn]);
-
- /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */
- if ( (pt_levels == 4) && ctxt.ctrlreg[1] )
- {
- pfn = xen_cr3_to_pfn(ctxt.ctrlreg[1]);
-
- if (pfn >= p2m_size) {
- ERROR("User PT base is bad: pfn=%lu p2m_size=%lu type=%08lx",
- pfn, p2m_size, pfn_type[pfn]);
- goto out;
- }
-
- if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
- ((unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) {
- ERROR("User PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
- pfn, p2m_size, pfn_type[pfn],
- (unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT);
- goto out;
- }
-
- ctxt.ctrlreg[1] = xen_pfn_to_cr3(p2m[pfn]);
- }
-
- domctl.cmd = XEN_DOMCTL_setvcpucontext;
- domctl.domain = (domid_t)dom;
- domctl.u.vcpucontext.vcpu = i;
- set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt);
- rc = xc_domctl(xc_handle, &domctl);
- if (rc != 0) {
- ERROR("Couldn't build vcpu%d", i);
- goto out;
- }
- }
-
- if (!read_exact(io_fd, shared_info_page, PAGE_SIZE)) {
- ERROR("Error when reading shared info page");
- goto out;
- }
-
- /* clear any pending events and the selector */
- memset(&(shared_info->evtchn_pending[0]), 0,
- sizeof (shared_info->evtchn_pending));
- for ( i = 0; i < MAX_VIRT_CPUS; i++ )
- shared_info->vcpu_info[i].evtchn_pending_sel = 0;
-
- /* Copy saved contents of shared-info page. No checking needed. */
- page = xc_map_foreign_range(
- xc_handle, dom, PAGE_SIZE, PROT_WRITE, shared_info_frame);
- memcpy(page, shared_info, PAGE_SIZE);
- munmap(page, PAGE_SIZE);
-
- /* Uncanonicalise the pfn-to-mfn table frame-number list. */
- for (i = 0; i < P2M_FL_ENTRIES; i++) {
- pfn = p2m_frame_list[i];
- if ((pfn >= p2m_size) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) {
- ERROR("PFN-to-MFN frame number is bad");
- goto out;
- }
-
- p2m_frame_list[i] = p2m[pfn];
- }
-
- /* Copy the P2M we've constructed to the 'live' P2M */
- if (!(live_p2m = xc_map_foreign_batch(xc_handle, dom, PROT_WRITE,
- p2m_frame_list, P2M_FL_ENTRIES))) {
- ERROR("Couldn't map p2m table");
- goto out;
- }
-
- memcpy(live_p2m, p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
- munmap(live_p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
-
- DPRINTF("Domain ready to be built.\n");
-
- out:
- if ( (rc != 0) && (dom != 0) )
- xc_domain_destroy(xc_handle, dom);
- free(mmu);
- free(p2m);
- free(pfn_type);
-
- /* discard cache for save file */
- discard_file_cache(io_fd, 1 /*flush*/);
-
- DPRINTF("Restore exit with rc=%d\n", rc);
-
- return rc;
-}
diff -r 602d061ff51f -r e518f2fbdd72 tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h Thu Apr 05 10:43:50 2007 +0100
+++ b/tools/libxc/xenguest.h Thu Apr 05 15:11:22 2007 +0100
@@ -38,29 +38,21 @@ int xc_hvm_save(int xc_handle, int io_fd
void (*qemu_flip_buffer)(int, int));
/**
- * This function will restore a saved domain running Linux.
+ * This function will restore a saved domain.
*
* @parm xc_handle a handle to an open hypervisor interface
* @parm fd the file descriptor to restore a domain from
* @parm dom the id of the domain
* @parm store_evtchn the store event channel for this domain to use
* @parm store_mfn returned with the mfn of the store page
+ * @parm hvm non-zero if this is a HVM restore
+ * @parm pae non-zero if this HVM domain has PAE support enabled
* @return 0 on success, -1 on failure
*/
-int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom,
- unsigned int store_evtchn, unsigned long *store_mfn,
- unsigned int console_evtchn, unsigned long *console_mfn);
-
-/**
- * This function will restore a saved hvm domain running unmodified guest.
- *
- * @parm store_mfn pass mem size & returned with the mfn of the store page
- * @return 0 on success, -1 on failure
- */
-int xc_hvm_restore(int xc_handle, int io_fd, uint32_t dom,
- unsigned int store_evtchn,
- unsigned long *store_mfn,
- unsigned int pae, unsigned int apic);
+int xc_domain_restore(int xc_handle, int io_fd, uint32_t dom,
+ unsigned int store_evtchn, unsigned long *store_mfn,
+ unsigned int console_evtchn, unsigned long *console_mfn,
+ unsigned int hvm, unsigned int pae);
/**
* This function will create a domain for a paravirtualized Linux
diff -r 602d061ff51f -r e518f2fbdd72 tools/libxc/xg_private.c
--- a/tools/libxc/xg_private.c Thu Apr 05 10:43:50 2007 +0100
+++ b/tools/libxc/xg_private.c Thu Apr 05 15:11:22 2007 +0100
@@ -204,16 +204,6 @@ __attribute__((weak))
int (*suspend)(int domid),
void *(*init_qemu_maps)(int, unsigned),
void (*qemu_flip_buffer)(int, int))
-{
- errno = ENOSYS;
- return -1;
-}
-
-__attribute__((weak))
- int xc_hvm_restore(int xc_handle, int io_fd, uint32_t dom,
- unsigned int store_evtchn,
- unsigned long *store_mfn,
- unsigned int pae, unsigned int apic)
{
errno = ENOSYS;
return -1;
diff -r 602d061ff51f -r e518f2fbdd72 tools/xcutils/xc_restore.c
--- a/tools/xcutils/xc_restore.c Thu Apr 05 10:43:50 2007 +0100
+++ b/tools/xcutils/xc_restore.c Thu Apr 05 15:11:22 2007 +0100
@@ -39,14 +39,8 @@ main(int argc, char **argv)
pae = atoi(argv[6]);
apic = atoi(argv[7]);
- if ( hvm )
- ret = xc_hvm_restore(xc_fd, io_fd, domid,
- store_evtchn, &store_mfn,
- pae, apic);
- else
- ret = xc_linux_restore(xc_fd, io_fd, domid,
- store_evtchn, &store_mfn,
- console_evtchn, &console_mfn);
+ ret = xc_domain_restore(xc_fd, io_fd, domid, store_evtchn, &store_mfn,
+ console_evtchn, &console_mfn, hvm, pae);
if ( ret == 0 )
{
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|