# HG changeset patch
# User yamahata@xxxxxxxxxxxxx
# Date 1192097770 -32400
# Node ID 8321f8577a60f64b4999328183be49c5fa2c7c69
# Parent 6f9435bb6a195a52cb43799b82cc24cdc7a298f3
libxc: vti domain save/restore support
PATCHNAME: libxc_vti_domain_save_restore
Signed-off-by: Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
diff -r 6f9435bb6a19 -r 8321f8577a60 tools/libxc/ia64/xc_ia64_linux_restore.c
--- a/tools/libxc/ia64/xc_ia64_linux_restore.c Thu Oct 11 19:10:56 2007 +0900
+++ b/tools/libxc/ia64/xc_ia64_linux_restore.c Thu Oct 11 19:16:10 2007 +0900
@@ -8,6 +8,7 @@
*
* Copyright (c) 2007 Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
* Use foreign p2m exposure.
+ * VTi domain support
*/
#include <stdlib.h>
@@ -17,6 +18,7 @@
#include "xc_ia64_save_restore.h"
#include "xc_ia64.h"
#include "xc_efi.h"
+#include "xen/hvm/params.h"
#define PFN_TO_KB(_pfn) ((_pfn) << (PAGE_SHIFT - 10))
@@ -75,250 +77,116 @@ read_page(int xc_handle, int io_fd, uint
return 0;
}
-int
-xc_domain_restore(int xc_handle, int io_fd, uint32_t dom,
- unsigned int store_evtchn, unsigned long *store_mfn,
- unsigned int console_evtchn, unsigned long *console_mfn,
- unsigned int hvm, unsigned int pae)
-{
- DECLARE_DOMCTL;
- int rc = 1;
+/*
+ * Get the list of PFNs that are not in the psuedo-phys map.
+ * Although we allocate pages on demand, balloon driver may
+ * decreased simaltenously. So we have to free the freed
+ * pages here.
+ */
+static int
+xc_ia64_recv_unallocated_list(int xc_handle, int io_fd, uint32_t dom,
+ struct xen_ia64_p2m_table *p2m_table)
+{
+ int rc = -1;
unsigned int i;
- unsigned long gmfn;
- unsigned long ver;
-
- /* The new domain's shared-info frame number. */
- unsigned long shared_info_frame;
- unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */
- shared_info_t *shared_info = (shared_info_t *)shared_info_page;
-
- /* A copy of the CPU context of the guest. */
- vcpu_guest_context_t ctxt;
-
- /* A temporary mapping of the guest's start_info page. */
- start_info_t *start_info;
-
- struct xen_ia64_p2m_table p2m_table;
- xc_ia64_p2m_init(&p2m_table);
-
- if (hvm) {
- ERROR("HVM Restore is unsupported");
- goto out;
- }
-
- /* For info only */
- nr_pfns = 0;
-
- if ( !read_exact(io_fd, &p2m_size, sizeof(unsigned long)) )
- {
- ERROR("read: p2m_size");
- goto out;
- }
- DPRINTF("xc_linux_restore start: p2m_size = %lx\n", p2m_size);
-
- if (!read_exact(io_fd, &ver, sizeof(unsigned long))) {
- ERROR("Error when reading version");
- goto out;
- }
- if (ver != XC_IA64_SR_FORMAT_VER_ONE && ver != XC_IA64_SR_FORMAT_VER_TWO) {
- ERROR("version of save doesn't match");
- goto out;
- }
-
- if (lock_pages(&ctxt, sizeof(ctxt))) {
- /* needed for build domctl, but might as well do early */
- ERROR("Unable to lock_pages ctxt");
- return 1;
- }
-
- if (!read_exact(io_fd, &domctl.u.arch_setup, sizeof(domctl.u.arch_setup)))
{
- ERROR("read: domain setup");
- goto out;
- }
-
- /* Build firmware (will be overwritten). */
- domctl.domain = (domid_t)dom;
- domctl.u.arch_setup.flags &= ~XEN_DOMAINSETUP_query;
- domctl.u.arch_setup.bp = 0; /* indicate domain restore */
+ unsigned int count;
+ unsigned long *pfntab = NULL;
+ unsigned int nr_frees;
+
+ if (!read_exact(io_fd, &count, sizeof(count))) {
+ ERROR("Error when reading pfn count");
+ goto out;
+ }
+
+ pfntab = malloc(sizeof(unsigned long) * count);
+ if (pfntab == NULL) {
+ ERROR("Out of memory");
+ goto out;
+ }
+
+ if (!read_exact(io_fd, pfntab, sizeof(unsigned long)*count)) {
+ ERROR("Error when reading pfntab");
+ goto out;
+ }
+
+ nr_frees = 0;
+ for (i = 0; i < count; i++) {
+ if (xc_ia64_p2m_allocated(p2m_table, pfntab[i])) {
+ pfntab[nr_frees] = pfntab[i];
+ nr_frees++;
+ }
+ }
+ if (nr_frees > 0) {
+ if (xc_domain_memory_decrease_reservation(xc_handle, dom, nr_frees,
+ 0, pfntab) < 0) {
+ PERROR("Could not decrease reservation");
+ goto out;
+ }
+ else
+ DPRINTF("Decreased reservation by %d / %d pages\n",
+ nr_frees, count);
+ }
+
+ rc = 0;
- domctl.cmd = XEN_DOMCTL_arch_setup;
- if (xc_domctl(xc_handle, &domctl))
- goto out;
-
- /* Get the domain's shared-info frame. */
- domctl.cmd = XEN_DOMCTL_getdomaininfo;
- domctl.domain = (domid_t)dom;
- if (xc_domctl(xc_handle, &domctl) < 0) {
- ERROR("Could not get information on new domain");
- goto out;
- }
- shared_info_frame = domctl.u.getdomaininfo.shared_info_frame;
-
- if (ver == XC_IA64_SR_FORMAT_VER_TWO) {
- unsigned int memmap_info_num_pages;
- unsigned long memmap_size;
- xen_ia64_memmap_info_t *memmap_info;
-
- if (!read_exact(io_fd, &memmap_info_num_pages,
- sizeof(memmap_info_num_pages))) {
- ERROR("read: memmap_info_num_pages");
- goto out;
- }
- memmap_size = memmap_info_num_pages * PAGE_SIZE;
- memmap_info = malloc(memmap_size);
- if (memmap_info == NULL) {
- ERROR("Could not allocate memory for memmap_info");
- goto out;
- }
- if (!read_exact(io_fd, memmap_info, memmap_size)) {
- ERROR("read: memmap_info");
- goto out;
- }
- if (xc_ia64_p2m_map(&p2m_table, xc_handle,
- dom, memmap_info, IA64_DOM0VP_EFP_ALLOC_PTE)) {
- ERROR("p2m mapping");
- goto out;
- }
- free(memmap_info);
- } else if (ver == XC_IA64_SR_FORMAT_VER_ONE) {
- xen_ia64_memmap_info_t *memmap_info;
- efi_memory_desc_t *memdesc;
- uint64_t buffer[(sizeof(*memmap_info) + sizeof(*memdesc) +
- sizeof(uint64_t) - 1) / sizeof(uint64_t)];
-
- memset(buffer, 0, sizeof(buffer));
- memmap_info = (xen_ia64_memmap_info_t *)buffer;
- memdesc = (efi_memory_desc_t*)&memmap_info->memdesc[0];
- memmap_info->efi_memmap_size = sizeof(*memmap_info) + sizeof(*memdesc);
- memmap_info->efi_memdesc_size = sizeof(*memdesc);
- memmap_info->efi_memdesc_version = EFI_MEMORY_DESCRIPTOR_VERSION;
-
- memdesc->type = EFI_MEMORY_DESCRIPTOR_VERSION;
- memdesc->phys_addr = 0;
- memdesc->virt_addr = 0;
- memdesc->num_pages = nr_pfns << (PAGE_SHIFT - EFI_PAGE_SHIFT);
- memdesc->attribute = EFI_MEMORY_WB;
-
- if (xc_ia64_p2m_map(&p2m_table, xc_handle,
- dom, memmap_info, IA64_DOM0VP_EFP_ALLOC_PTE)) {
- ERROR("p2m mapping");
- goto out;
- }
- } else {
- ERROR("unknown version");
- goto out;
- }
-
- DPRINTF("Reloading memory pages: 0%%\n");
-
- while (1) {
- if (!read_exact(io_fd, &gmfn, sizeof(unsigned long))) {
- ERROR("Error when reading batch size");
- goto out;
- }
- if (gmfn == INVALID_MFN)
- break;
-
- if (populate_page_if_necessary(xc_handle, dom, gmfn, &p2m_table) < 0) {
- ERROR("can not populate page 0x%lx", gmfn);
- goto out;
- }
- if (read_page(xc_handle, io_fd, dom, gmfn) < 0)
- goto out;
- }
-
- DPRINTF("Received all pages\n");
-
- /*
- * Get the list of PFNs that are not in the psuedo-phys map.
- * Although we allocate pages on demand, balloon driver may
- * decreased simaltenously. So we have to free the freed
- * pages here.
- */
- {
- unsigned int count;
- unsigned long *pfntab;
- unsigned int nr_frees;
-
- if (!read_exact(io_fd, &count, sizeof(count))) {
- ERROR("Error when reading pfn count");
- goto out;
- }
-
- pfntab = malloc(sizeof(unsigned long) * count);
- if (!pfntab) {
- ERROR("Out of memory");
- goto out;
- }
-
- if (!read_exact(io_fd, pfntab, sizeof(unsigned long)*count)) {
- ERROR("Error when reading pfntab");
- free(pfntab);
- goto out;
- }
-
- nr_frees = 0;
- for (i = 0; i < count; i++) {
- if (xc_ia64_p2m_allocated(&p2m_table, pfntab[i])) {
- pfntab[nr_frees] = pfntab[i];
- nr_frees++;
- }
- }
- if (nr_frees > 0) {
- if (xc_domain_memory_decrease_reservation(xc_handle, dom, nr_frees,
- 0, pfntab) < 0) {
- ERROR("Could not decrease reservation : %d", rc);
- free(pfntab);
- goto out;
- }
- else
- DPRINTF("Decreased reservation by %d / %d pages\n",
- nr_frees, count);
- }
+ out:
+ if (pfntab != NULL)
free(pfntab);
- }
-
- if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) {
+ return rc;
+}
+
+static int
+xc_ia64_recv_vcpu_context(int xc_handle, int io_fd, uint32_t dom,
+ uint32_t vcpu, vcpu_guest_context_t *ctxt)
+{
+ if (!read_exact(io_fd, ctxt, sizeof(*ctxt))) {
ERROR("Error when reading ctxt");
- goto out;
- }
-
- fprintf(stderr, "ip=%016lx, b0=%016lx\n", ctxt.regs.ip, ctxt.regs.b[0]);
+ return -1;
+ }
+
+ fprintf(stderr, "ip=%016lx, b0=%016lx\n", ctxt->regs.ip, ctxt->regs.b[0]);
/* Initialize and set registers. */
- ctxt.flags = VGCF_EXTRA_REGS;
- domctl.cmd = XEN_DOMCTL_setvcpucontext;
- domctl.domain = (domid_t)dom;
- domctl.u.vcpucontext.vcpu = 0;
- set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt);
- if (xc_domctl(xc_handle, &domctl) != 0) {
+ ctxt->flags = VGCF_EXTRA_REGS;
+ if (xc_vcpu_setcontext(xc_handle, dom, vcpu, ctxt) != 0) {
ERROR("Couldn't set vcpu context");
- goto out;
+ return -1;
}
/* Just a check. */
- if (xc_vcpu_getcontext(xc_handle, dom, 0 /* XXX */, &ctxt)) {
+ ctxt->flags = 0;
+ if (xc_vcpu_getcontext(xc_handle, dom, vcpu, ctxt)) {
ERROR("Could not get vcpu context");
- goto out;
- }
-
- /* Then get privreg page. */
- if (read_page(xc_handle, io_fd, dom, ctxt.privregs_pfn) < 0) {
- ERROR("Could not read vcpu privregs");
- goto out;
- }
-
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Read shared info. */
+static int
+xc_ia64_recv_shared_info(int xc_handle, int io_fd, uint32_t dom,
+ unsigned long shared_info_frame,
+ unsigned long *start_info_pfn)
+{
+ unsigned int i;
+
+ /* The new domain's shared-info frame. */
+ shared_info_t *shared_info;
+
/* Read shared info. */
shared_info = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
- PROT_READ|PROT_WRITE,
shared_info_frame);
+ PROT_READ|PROT_WRITE,
+ shared_info_frame);
if (shared_info == NULL) {
- ERROR("cannot map page");
- goto out;
- }
+ ERROR("cannot map page");
+ return -1;
+ }
+
if (!read_exact(io_fd, shared_info, PAGE_SIZE)) {
- ERROR("Error when reading shared_info page");
- munmap(shared_info, PAGE_SIZE);
- goto out;
+ ERROR("Error when reading shared_info page");
+ munmap(shared_info, PAGE_SIZE);
+ return -1;
}
/* clear any pending events and the selector */
@@ -327,12 +195,53 @@ xc_domain_restore(int xc_handle, int io_
for (i = 0; i < MAX_VIRT_CPUS; i++)
shared_info->vcpu_info[i].evtchn_pending_sel = 0;
- gmfn = shared_info->arch.start_info_pfn;
+ if (start_info_pfn != NULL)
+ *start_info_pfn = shared_info->arch.start_info_pfn;
munmap (shared_info, PAGE_SIZE);
+ return 0;
+}
+
+static int
+xc_ia64_pv_recv_context(int xc_handle, int io_fd, uint32_t dom,
+ unsigned long shared_info_frame,
+ struct xen_ia64_p2m_table *p2m_table,
+ unsigned int store_evtchn, unsigned long *store_mfn,
+ unsigned int console_evtchn,
+ unsigned long *console_mfn)
+{
+ int rc = -1;
+ unsigned long gmfn;
+
+ /* A copy of the CPU context of the guest. */
+ vcpu_guest_context_t ctxt;
+
+ /* A temporary mapping of the guest's start_info page. */
+ start_info_t *start_info;
+
+ if (lock_pages(&ctxt, sizeof(ctxt))) {
+ /* needed for build domctl, but might as well do early */
+ ERROR("Unable to lock_pages ctxt");
+ return -1;
+ }
+
+ if (xc_ia64_recv_vcpu_context(xc_handle, io_fd, dom, 0, &ctxt))
+ goto out;
+
+ /* Then get privreg page. */
+ if (read_page(xc_handle, io_fd, dom, ctxt.privregs_pfn) < 0) {
+ ERROR("Could not read vcpu privregs");
+ goto out;
+ }
+
+ /* Read shared info. */
+ if (xc_ia64_recv_shared_info(xc_handle, io_fd, dom,
+ shared_info_frame, &gmfn))
+ goto out;
+
/* Uncanonicalise the suspend-record frame number and poke resume rec. */
- if (populate_page_if_necessary(xc_handle, dom, gmfn, &p2m_table)) {
+ if (populate_page_if_necessary(xc_handle, dom, gmfn, p2m_table)) {
ERROR("cannot populate page 0x%lx", gmfn);
goto out;
}
@@ -350,6 +259,323 @@ xc_domain_restore(int xc_handle, int io_
*console_mfn = start_info->console.domU.mfn;
start_info->console.domU.evtchn = console_evtchn;
munmap(start_info, PAGE_SIZE);
+
+ rc = 0;
+
+ out:
+ unlock_pages(&ctxt, sizeof(ctxt));
+ return rc;
+}
+
+static int
+xc_ia64_hvm_recv_context(int xc_handle, int io_fd, uint32_t dom,
+ unsigned long shared_info_frame,
+ struct xen_ia64_p2m_table *p2m_table,
+ unsigned int store_evtchn, unsigned long *store_mfn,
+ unsigned int console_evtchn,
+ unsigned long *console_mfn)
+{
+ int rc = -1;
+ xc_dominfo_t info;
+ unsigned int i;
+
+ /* cpu */
+ uint64_t max_virt_cpus;
+ unsigned long vcpumap_size;
+ uint64_t *vcpumap = NULL;
+
+ /* HVM: magic frames for ioreqs and xenstore comms */
+ const int hvm_params[] = {
+ HVM_PARAM_IOREQ_PFN,
+ HVM_PARAM_BUFIOREQ_PFN,
+ HVM_PARAM_STORE_PFN,
+ };
+ const int NR_PARAMS = sizeof(hvm_params) / sizeof(hvm_params[0]);
+ /* ioreq_pfn, bufioreq_pfn, store_pfn */
+ uint64_t magic_pfns[NR_PARAMS];
+
+ /* HVM: a buffer for holding HVM contxt */
+ uint64_t rec_size = 0;
+ uint8_t *hvm_buf = NULL;
+
+ /* Read shared info. */
+ if (xc_ia64_recv_shared_info(xc_handle, io_fd, dom, shared_info_frame,
+ NULL))
+ goto out;
+
+ /* vcpu map */
+ if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) {
+ ERROR("Could not get domain info");
+ goto out;
+ }
+ if (!read_exact(io_fd, &max_virt_cpus, sizeof(max_virt_cpus))) {
+ ERROR("error reading max_virt_cpus");
+ goto out;
+ }
+ if (max_virt_cpus < info.max_vcpu_id) {
+ ERROR("too large max_virt_cpus %i < %i\n",
+ max_virt_cpus, info.max_vcpu_id);
+ goto out;
+ }
+ vcpumap_size = (max_virt_cpus + 1 + sizeof(vcpumap[0]) - 1) /
+ sizeof(vcpumap[0]);
+ vcpumap = malloc(vcpumap_size);
+ if (vcpumap == NULL) {
+ ERROR("memory alloc for vcpumap");
+ goto out;
+ }
+ memset(vcpumap, 0, vcpumap_size);
+ if (!read_exact(io_fd, vcpumap, vcpumap_size)) {
+ ERROR("read vcpumap");
+ goto out;
+ }
+
+ /* vcpu context */
+ for (i = 0; i <= info.max_vcpu_id; i++) {
+ /* A copy of the CPU context of the guest. */
+ vcpu_guest_context_t ctxt;
+
+ if (!__test_bit(i, vcpumap))
+ continue;
+
+ if (xc_ia64_recv_vcpu_context(xc_handle, io_fd, dom, i, &ctxt))
+ goto out;
+
+ // system context of vcpu is recieved as hvm context.
+ }
+
+ /* Set HVM-specific parameters */
+ if (!read_exact(io_fd, magic_pfns, sizeof(magic_pfns))) {
+ ERROR("error reading magic page addresses");
+ goto out;
+ }
+
+ /* These comms pages need to be zeroed at the start of day */
+ for (i = 0; i < NR_PARAMS; i++) {
+ rc = xc_clear_domain_page(xc_handle, dom, magic_pfns[i]);
+ if (rc != 0) {
+ ERROR("error zeroing magic pages: %i", rc);
+ goto out;
+ }
+ rc = xc_set_hvm_param(xc_handle, dom, hvm_params[i], magic_pfns[i]);
+ if (rc != 0) {
+ ERROR("error setting HVM params: %i", rc);
+ goto out;
+ }
+ }
+ rc = xc_set_hvm_param(xc_handle, dom,
+ HVM_PARAM_STORE_EVTCHN, store_evtchn);
+ if (rc != 0) {
+ ERROR("error setting HVM params: %i", rc);
+ goto out;
+ }
+ *store_mfn = magic_pfns[2];
+
+ /* Read HVM context */
+ if (!read_exact(io_fd, &rec_size, sizeof(rec_size))) {
+ ERROR("error read hvm context size!\n");
+ goto out;
+ }
+
+ hvm_buf = malloc(rec_size);
+ if (hvm_buf == NULL) {
+ ERROR("memory alloc for hvm context buffer failed");
+ errno = ENOMEM;
+ goto out;
+ }
+
+ if (!read_exact(io_fd, hvm_buf, rec_size)) {
+ ERROR("error loading the HVM context");
+ goto out;
+ }
+
+ rc = xc_domain_hvm_setcontext(xc_handle, dom, hvm_buf, rec_size);
+ if (rc != 0) {
+ ERROR("error setting the HVM context");
+ goto out;
+ }
+
+ rc = 0;
+
+out:
+ if (vcpumap != NULL)
+ free(vcpumap);
+ if (hvm_buf != NULL)
+ free(hvm_buf);
+ return rc;
+}
+
+/*
+ * hvm domain requires IO pages allocated when XEN_DOMCTL_arch_setup
+ */
+static int
+xc_ia64_hvm_domain_setup(int xc_handle, uint32_t dom)
+{
+ int rc;
+ xen_pfn_t pfn_list[] = {
+ IO_PAGE_START >> PAGE_SHIFT,
+ BUFFER_IO_PAGE_START >> PAGE_SHIFT,
+ BUFFER_PIO_PAGE_START >> PAGE_SHIFT,
+ };
+ unsigned long nr_pages = sizeof(pfn_list) / sizeof(pfn_list[0]);
+
+ rc = xc_domain_memory_populate_physmap(xc_handle, dom, nr_pages,
+ 0, 0, &pfn_list[0]);
+ if (rc != 0)
+ PERROR("Could not allocate IO page or buffer io page.\n");
+ return rc;
+}
+
+int
+xc_domain_restore(int xc_handle, int io_fd, uint32_t dom,
+ unsigned int store_evtchn, unsigned long *store_mfn,
+ unsigned int console_evtchn, unsigned long *console_mfn,
+ unsigned int hvm, unsigned int pae)
+{
+ DECLARE_DOMCTL;
+ int rc = 1;
+ unsigned long ver;
+
+ /* The new domain's shared-info frame number. */
+ unsigned long shared_info_frame;
+
+ struct xen_ia64_p2m_table p2m_table;
+ xc_ia64_p2m_init(&p2m_table);
+
+ /* For info only */
+ nr_pfns = 0;
+
+ if ( !read_exact(io_fd, &p2m_size, sizeof(unsigned long)) )
+ {
+ ERROR("read: p2m_size");
+ goto out;
+ }
+ DPRINTF("xc_linux_restore start: p2m_size = %lx\n", p2m_size);
+
+ if (!read_exact(io_fd, &ver, sizeof(unsigned long))) {
+ ERROR("Error when reading version");
+ goto out;
+ }
+ if (ver != XC_IA64_SR_FORMAT_VER_ONE && ver != XC_IA64_SR_FORMAT_VER_TWO) {
+ ERROR("version of save doesn't match");
+ goto out;
+ }
+
+ if (!read_exact(io_fd, &domctl.u.arch_setup, sizeof(domctl.u.arch_setup)))
{
+ ERROR("read: domain setup");
+ goto out;
+ }
+
+ if (hvm && xc_ia64_hvm_domain_setup(xc_handle, dom) != 0)
+ goto out;
+
+ /* Build firmware (will be overwritten). */
+ domctl.domain = (domid_t)dom;
+ domctl.u.arch_setup.flags &= ~XEN_DOMAINSETUP_query;
+ domctl.u.arch_setup.bp = 0; /* indicate domain restore */
+
+ domctl.cmd = XEN_DOMCTL_arch_setup;
+ if (xc_domctl(xc_handle, &domctl))
+ goto out;
+
+ /* Get the domain's shared-info frame. */
+ domctl.cmd = XEN_DOMCTL_getdomaininfo;
+ domctl.domain = (domid_t)dom;
+ if (xc_domctl(xc_handle, &domctl) < 0) {
+ ERROR("Could not get information on new domain");
+ goto out;
+ }
+ shared_info_frame = domctl.u.getdomaininfo.shared_info_frame;
+
+ if (ver == XC_IA64_SR_FORMAT_VER_TWO) {
+ unsigned int memmap_info_num_pages;
+ unsigned long memmap_size;
+ xen_ia64_memmap_info_t *memmap_info;
+
+ if (!read_exact(io_fd, &memmap_info_num_pages,
+ sizeof(memmap_info_num_pages))) {
+ ERROR("read: memmap_info_num_pages");
+ goto out;
+ }
+ memmap_size = memmap_info_num_pages * PAGE_SIZE;
+ memmap_info = malloc(memmap_size);
+ if (memmap_info == NULL) {
+ ERROR("Could not allocate memory for memmap_info");
+ goto out;
+ }
+ if (!read_exact(io_fd, memmap_info, memmap_size)) {
+ ERROR("read: memmap_info");
+ goto out;
+ }
+ if (xc_ia64_p2m_map(&p2m_table, xc_handle,
+ dom, memmap_info, IA64_DOM0VP_EFP_ALLOC_PTE)) {
+ ERROR("p2m mapping");
+ goto out;
+ }
+ free(memmap_info);
+ } else if (ver == XC_IA64_SR_FORMAT_VER_ONE) {
+ xen_ia64_memmap_info_t *memmap_info;
+ efi_memory_desc_t *memdesc;
+ uint64_t buffer[(sizeof(*memmap_info) + sizeof(*memdesc) +
+ sizeof(uint64_t) - 1) / sizeof(uint64_t)];
+
+ memset(buffer, 0, sizeof(buffer));
+ memmap_info = (xen_ia64_memmap_info_t *)buffer;
+ memdesc = (efi_memory_desc_t*)&memmap_info->memdesc[0];
+ memmap_info->efi_memmap_size = sizeof(*memmap_info) + sizeof(*memdesc);
+ memmap_info->efi_memdesc_size = sizeof(*memdesc);
+ memmap_info->efi_memdesc_version = EFI_MEMORY_DESCRIPTOR_VERSION;
+
+ memdesc->type = EFI_MEMORY_DESCRIPTOR_VERSION;
+ memdesc->phys_addr = 0;
+ memdesc->virt_addr = 0;
+ memdesc->num_pages = nr_pfns << (PAGE_SHIFT - EFI_PAGE_SHIFT);
+ memdesc->attribute = EFI_MEMORY_WB;
+
+ if (xc_ia64_p2m_map(&p2m_table, xc_handle,
+ dom, memmap_info, IA64_DOM0VP_EFP_ALLOC_PTE)) {
+ ERROR("p2m mapping");
+ goto out;
+ }
+ } else {
+ ERROR("unknown version");
+ goto out;
+ }
+
+ DPRINTF("Reloading memory pages: 0%%\n");
+
+ while (1) {
+ unsigned long gmfn;
+ if (!read_exact(io_fd, &gmfn, sizeof(unsigned long))) {
+ ERROR("Error when reading batch size");
+ goto out;
+ }
+ if (gmfn == INVALID_MFN)
+ break;
+
+ if (populate_page_if_necessary(xc_handle, dom, gmfn, &p2m_table) < 0) {
+ ERROR("can not populate page 0x%lx", gmfn);
+ goto out;
+ }
+ if (read_page(xc_handle, io_fd, dom, gmfn) < 0)
+ goto out;
+ }
+
+ DPRINTF("Received all pages\n");
+
+ if (xc_ia64_recv_unallocated_list(xc_handle, io_fd, dom, &p2m_table))
+ goto out;
+
+ if (!hvm)
+ rc = xc_ia64_pv_recv_context(xc_handle, io_fd, dom, shared_info_frame,
+ &p2m_table, store_evtchn, store_mfn,
+ console_evtchn, console_mfn);
+ else
+ rc = xc_ia64_hvm_recv_context(xc_handle, io_fd, dom, shared_info_frame,
+ &p2m_table, store_evtchn, store_mfn,
+ console_evtchn, console_mfn);
+ if (rc)
+ goto out;
/*
* Safety checking of saved context:
@@ -368,12 +594,10 @@ xc_domain_restore(int xc_handle, int io_
rc = 0;
out:
+ xc_ia64_p2m_unmap(&p2m_table);
+
if ((rc != 0) && (dom != 0))
xc_domain_destroy(xc_handle, dom);
-
- xc_ia64_p2m_unmap(&p2m_table);
-
- unlock_pages(&ctxt, sizeof(ctxt));
DPRINTF("Restore exit with rc=%d\n", rc);
diff -r 6f9435bb6a19 -r 8321f8577a60 tools/libxc/ia64/xc_ia64_linux_save.c
--- a/tools/libxc/ia64/xc_ia64_linux_save.c Thu Oct 11 19:10:56 2007 +0900
+++ b/tools/libxc/ia64/xc_ia64_linux_save.c Thu Oct 11 19:16:10 2007 +0900
@@ -8,6 +8,7 @@
*
* Copyright (c) 2007 Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
* Use foreign p2m exposure.
+ * VTi domain support.
*/
#include <inttypes.h>
@@ -20,6 +21,7 @@
#include "xc_ia64.h"
#include "xc_ia64_save_restore.h"
#include "xc_efi.h"
+#include "xen/hvm/params.h"
/*
** Default values for important tuning parameters. Can override by passing
@@ -35,14 +37,6 @@
** During (live) save/migrate, we maintain a number of bitmaps to track
** which pages we have to send, and to skip.
*/
-
-#define BITS_PER_LONG (sizeof(unsigned long) * 8)
-
-#define BITMAP_ENTRY(_nr,_bmap) \
- ((unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG]
-
-#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG)
-
static inline int test_bit(int nr, volatile void * addr)
{
return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1;
@@ -136,6 +130,271 @@ retry:
return -1;
}
+static inline int
+md_is_not_ram(const efi_memory_desc_t *md)
+{
+ return ((md->type != EFI_CONVENTIONAL_MEMORY) ||
+ (md->attribute != EFI_MEMORY_WB) ||
+ (md->num_pages == 0));
+}
+
+/*
+ * Send through a list of all the PFNs that were not in map at the close.
+ * We send pages which was allocated. However balloon driver may
+ * decreased after sending page. So we have to check the freed
+ * page after pausing the domain.
+ */
+static int
+xc_ia64_send_unallocated_list(int xc_handle, int io_fd,
+ struct xen_ia64_p2m_table *p2m_table,
+ xen_ia64_memmap_info_t *memmap_info,
+ void *memmap_desc_start, void *memmap_desc_end)
+{
+ void *p;
+ efi_memory_desc_t *md;
+
+ unsigned long N;
+ unsigned long pfntab[1024];
+ unsigned int j;
+
+ j = 0;
+ for (p = memmap_desc_start;
+ p < memmap_desc_end;
+ p += memmap_info->efi_memdesc_size) {
+ md = p;
+
+ if (md_is_not_ram(md))
+ continue;
+
+ for (N = md->phys_addr >> PAGE_SHIFT;
+ N < (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >>
+ PAGE_SHIFT;
+ N++) {
+ if (!xc_ia64_p2m_allocated(p2m_table, N))
+ j++;
+ }
+ }
+ if (!write_exact(io_fd, &j, sizeof(unsigned int))) {
+ ERROR("Error when writing to state file (6a)");
+ return -1;
+ }
+
+ j = 0;
+ for (p = memmap_desc_start;
+ p < memmap_desc_end;
+ p += memmap_info->efi_memdesc_size) {
+ md = p;
+
+ if (md_is_not_ram(md))
+ continue;
+
+ for (N = md->phys_addr >> PAGE_SHIFT;
+ N < (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >>
+ PAGE_SHIFT;
+ N++) {
+ if (!xc_ia64_p2m_allocated(p2m_table, N))
+ pfntab[j++] = N;
+ if (j == sizeof(pfntab)/sizeof(pfntab[0])) {
+ if (!write_exact(io_fd, &pfntab, sizeof(pfntab[0]) * j)) {
+ ERROR("Error when writing to state file (6b)");
+ return -1;
+ }
+ j = 0;
+ }
+ }
+ }
+ if (j > 0) {
+ if (!write_exact(io_fd, &pfntab, sizeof(pfntab[0]) * j)) {
+ ERROR("Error when writing to state file (6c)");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int
+xc_ia64_send_vcpu_context(int xc_handle, int io_fd, uint32_t dom,
+ uint32_t vcpu, vcpu_guest_context_t *ctxt)
+{
+ if (xc_vcpu_getcontext(xc_handle, dom, vcpu, ctxt)) {
+ ERROR("Could not get vcpu context");
+ return -1;
+ }
+
+ if (!write_exact(io_fd, ctxt, sizeof(*ctxt))) {
+ ERROR("Error when writing to state file (1)");
+ return -1;
+ }
+
+ fprintf(stderr, "ip=%016lx, b0=%016lx\n", ctxt->regs.ip, ctxt->regs.b[0]);
+ return 0;
+}
+
+static int
+xc_ia64_send_shared_info(int xc_handle, int io_fd, shared_info_t *live_shinfo)
+{
+ if (!write_exact(io_fd, live_shinfo, PAGE_SIZE)) {
+ ERROR("Error when writing to state file (1)");
+ return -1;
+ }
+ return 0;
+}
+
+static int
+xc_ia64_pv_send_context(int xc_handle, int io_fd, uint32_t dom,
+ shared_info_t *live_shinfo)
+{
+ /* A copy of the CPU context of the guest. */
+ vcpu_guest_context_t ctxt;
+ char *mem;
+
+ if (xc_ia64_send_vcpu_context(xc_handle, io_fd, dom, 0, &ctxt))
+ return -1;
+
+ mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+ PROT_READ|PROT_WRITE, ctxt.privregs_pfn);
+ if (mem == NULL) {
+ ERROR("cannot map privreg page");
+ return -1;
+ }
+ if (!write_exact(io_fd, mem, PAGE_SIZE)) {
+ ERROR("Error when writing privreg to state file (5)");
+ munmap(mem, PAGE_SIZE);
+ return -1;
+ }
+ munmap(mem, PAGE_SIZE);
+
+ if (xc_ia64_send_shared_info(xc_handle, io_fd, live_shinfo))
+ return -1;
+
+ return 0;
+}
+
+static int
+xc_ia64_hvm_send_context(int xc_handle, int io_fd, uint32_t dom,
+ const xc_dominfo_t *info, shared_info_t *live_shinfo)
+{
+ int rc = -1;
+ unsigned int i;
+
+ /* vcpu map */
+ uint64_t max_virt_cpus;
+ unsigned long vcpumap_size;
+ uint64_t *vcpumap = NULL;
+
+ /* HVM: magic frames for ioreqs and xenstore comms */
+ const int hvm_params[] = {
+ HVM_PARAM_IOREQ_PFN,
+ HVM_PARAM_BUFIOREQ_PFN,
+ HVM_PARAM_STORE_PFN,
+ };
+ const int NR_PARAMS = sizeof(hvm_params) / sizeof(hvm_params[0]);
+ /* ioreq_pfn, bufioreq_pfn, store_pfn */
+ uint64_t magic_pfns[NR_PARAMS];
+
+ /* HVM: a buffer for holding HVM contxt */
+ uint64_t rec_size;
+ uint64_t hvm_buf_size = 0;
+ uint8_t *hvm_buf = NULL;
+
+ if (xc_ia64_send_shared_info(xc_handle, io_fd, live_shinfo))
+ return -1;
+
+ /* vcpu map */
+ max_virt_cpus = MAX_VIRT_CPUS;
+ vcpumap_size = (max_virt_cpus + 1 + sizeof(vcpumap[0]) - 1) /
+ sizeof(vcpumap[0]);
+ vcpumap = malloc(vcpumap_size);
+ if (vcpumap == NULL) {
+ ERROR("memory alloc for vcpumap");
+ goto out;
+ }
+ memset(vcpumap, 0, vcpumap_size);
+
+ for (i = 0; i <= info->max_vcpu_id; i++) {
+ xc_vcpuinfo_t vinfo;
+ if ((xc_vcpu_getinfo(xc_handle, dom, i, &vinfo) == 0) && vinfo.online)
+ __set_bit(i, vcpumap);
+ }
+
+ if (!write_exact(io_fd, &max_virt_cpus, sizeof(max_virt_cpus))) {
+ ERROR("write max_virt_cpus");
+ goto out;
+ }
+
+ if (!write_exact(io_fd, vcpumap, vcpumap_size)) {
+ ERROR("write vcpumap");
+ goto out;
+ }
+
+ /* vcpu context */
+ for (i = 0; i <= info->max_vcpu_id; i++) {
+ /* A copy of the CPU context of the guest. */
+ vcpu_guest_context_t ctxt;
+
+ if (!__test_bit(i, vcpumap))
+ continue;
+
+ if (xc_ia64_send_vcpu_context(xc_handle, io_fd, dom, i, &ctxt))
+ goto out;
+
+ // system context of vcpu is sent as hvm context.
+ }
+
+ /* Save magic-page locations. */
+ memset(magic_pfns, 0, sizeof(magic_pfns));
+ for (i = 0; i < NR_PARAMS; i++) {
+ if (xc_get_hvm_param(xc_handle, dom, hvm_params[i], &magic_pfns[i])) {
+ PERROR("Error when xc_get_hvm_param");
+ goto out;
+ }
+ }
+
+ if (!write_exact(io_fd, magic_pfns, sizeof(magic_pfns))) {
+ ERROR("Error when writing to state file (7)");
+ goto out;
+ }
+
+ /* Need another buffer for HVM context */
+ hvm_buf_size = xc_domain_hvm_getcontext(xc_handle, dom, 0, 0);
+ if (hvm_buf_size == -1) {
+ ERROR("Couldn't get HVM context size from Xen");
+ goto out;
+ }
+
+ hvm_buf = malloc(hvm_buf_size);
+ if (!hvm_buf) {
+ ERROR("Couldn't allocate memory");
+ goto out;
+ }
+
+ /* Get HVM context from Xen and save it too */
+ rec_size = xc_domain_hvm_getcontext(xc_handle, dom, hvm_buf, hvm_buf_size);
+ if (rec_size == -1) {
+ ERROR("HVM:Could not get hvm buffer");
+ goto out;
+ }
+
+ if (!write_exact(io_fd, &rec_size, sizeof(rec_size))) {
+ ERROR("error write hvm buffer size");
+ goto out;
+ }
+
+ if (!write_exact(io_fd, hvm_buf, rec_size)) {
+ ERROR("write HVM info failed!\n");
+ goto out;
+ }
+
+ rc = 0;
+out:
+ if (hvm_buf != NULL)
+ free(hvm_buf);
+ if (vcpumap != NULL)
+ free(vcpumap);
+ return rc;
+}
+
int
xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
uint32_t max_factor, uint32_t flags, int (*suspend)(int),
@@ -147,15 +406,11 @@ xc_domain_save(int xc_handle, int io_fd,
int rc = 1;
- //int live = (flags & XCFLAGS_LIVE);
int debug = (flags & XCFLAGS_DEBUG);
int live = (flags & XCFLAGS_LIVE);
/* The new domain's shared-info frame number. */
unsigned long shared_info_frame;
-
- /* A copy of the CPU context of the guest. */
- vcpu_guest_context_t ctxt;
/* Live mapping of shared info structure */
shared_info_t *live_shinfo = NULL;
@@ -185,6 +440,12 @@ xc_domain_save(int xc_handle, int io_fd,
char *mem;
+ /* HVM: shared-memory bitmaps for getting log-dirty bits from qemu-dm */
+ unsigned long *qemu_bitmaps[2];
+ int qemu_active = 0;
+ int qemu_non_active = 1;
+
+ /* for foreign p2m exposure */
unsigned int memmap_info_num_pages;
unsigned long memmap_size = 0;
xen_ia64_memmap_info_t *memmap_info_live = NULL;
@@ -299,6 +560,14 @@ xc_domain_save(int xc_handle, int io_fd,
goto out;
}
+ if (hvm) {
+ /* Get qemu-dm logging dirty pages too */
+ void *seg = init_qemu_maps(dom, bitmap_size);
+ qemu_bitmaps[0] = seg;
+ qemu_bitmaps[1] = seg + bitmap_size;
+ qemu_active = 0;
+ qemu_non_active = 1;
+ }
} else {
/* This is a non-live suspend. Issue the call back to get the
@@ -374,9 +643,7 @@ xc_domain_save(int xc_handle, int io_fd,
p < memmap_desc_end;
p += memmap_info->efi_memdesc_size) {
md = p;
- if (md->type != EFI_CONVENTIONAL_MEMORY ||
- md->attribute != EFI_MEMORY_WB ||
- md->num_pages == 0)
+ if (md_is_not_ram(md))
continue;
for (N = md->phys_addr >> PAGE_SHIFT;
@@ -455,11 +722,27 @@ xc_domain_save(int xc_handle, int io_fd,
goto out;
}
+ if (hvm) {
+ unsigned int j;
+ /* Pull in the dirty bits from qemu-dm too */
+ if (!last_iter) {
+ qemu_active = qemu_non_active;
+ qemu_non_active = qemu_active ? 0 : 1;
+ qemu_flip_buffer(dom, qemu_active);
+ for (j = 0; j < bitmap_size / sizeof(unsigned long); j++) {
+ to_send[j] |= qemu_bitmaps[qemu_non_active][j];
+ qemu_bitmaps[qemu_non_active][j] = 0;
+ }
+ } else {
+ for (j = 0; j < bitmap_size / sizeof(unsigned long); j++)
+ to_send[j] |= qemu_bitmaps[qemu_active][j];
+ }
+ }
+
sent_last_iter = sent_this_iter;
//print_stats(xc_handle, dom, sent_this_iter, &stats, 1);
}
-
}
fprintf(stderr, "All memory is saved\n");
@@ -473,100 +756,18 @@ xc_domain_save(int xc_handle, int io_fd,
}
}
- /*
- * Send through a list of all the PFNs that were not in map at the close.
- * We send pages which was allocated. However balloon driver may
- * decreased after sending page. So we have to check the freed
- * page after pausing the domain.
- */
- {
- unsigned long N;
- unsigned long pfntab[1024];
- unsigned int j;
-
- j = 0;
- for (p = memmap_desc_start;
- p < memmap_desc_end;
- p += memmap_info->efi_memdesc_size) {
- md = p;
- if (md->type != EFI_CONVENTIONAL_MEMORY ||
- md->attribute != EFI_MEMORY_WB ||
- md->num_pages == 0)
- continue;
- for (N = md->phys_addr >> PAGE_SHIFT;
- N < (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >>
- PAGE_SHIFT;
- N++) {
- if (!xc_ia64_p2m_allocated(&p2m_table, N))
- j++;
- }
- }
- if (!write_exact(io_fd, &j, sizeof(unsigned int))) {
- ERROR("Error when writing to state file (6a)");
- goto out;
- }
-
- j = 0;
- for (p = memmap_desc_start;
- p < memmap_desc_end;
- p += memmap_info->efi_memdesc_size) {
- md = p;
- if (md->type != EFI_CONVENTIONAL_MEMORY ||
- md->attribute != EFI_MEMORY_WB ||
- md->num_pages == 0)
- continue;
- for (N = md->phys_addr >> PAGE_SHIFT;
- N < (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >>
- PAGE_SHIFT;
- N++) {
- if (!xc_ia64_p2m_allocated(&p2m_table, N))
- pfntab[j++] = N;
- if (j == sizeof(pfntab)/sizeof(pfntab[0])) {
- if (!write_exact(io_fd, &pfntab, sizeof(pfntab[0]) * j)) {
- ERROR("Error when writing to state file (6b)");
- goto out;
- }
- j = 0;
- }
- }
- }
- if (j > 0) {
- if (!write_exact(io_fd, &pfntab, sizeof(pfntab[0]) * j)) {
- ERROR("Error when writing to state file (6b)");
- goto out;
- }
- }
- }
-
- if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) {
- ERROR("Could not get vcpu context");
- goto out;
- }
-
- if (!write_exact(io_fd, &ctxt, sizeof(ctxt))) {
- ERROR("Error when writing to state file (1)");
- goto out;
- }
-
- fprintf(stderr, "ip=%016lx, b0=%016lx\n", ctxt.regs.ip, ctxt.regs.b[0]);
-
- mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
- PROT_READ|PROT_WRITE, ctxt.privregs_pfn);
- if (mem == NULL) {
- ERROR("cannot map privreg page");
- goto out;
- }
- if (write(io_fd, mem, PAGE_SIZE) != PAGE_SIZE) {
- ERROR("Error when writing privreg to state file (5)");
- munmap(mem, PAGE_SIZE);
- goto out;
- }
- munmap(mem, PAGE_SIZE);
-
- if (!write_exact(io_fd, live_shinfo, PAGE_SIZE)) {
- ERROR("Error when writing to state file (1)");
- goto out;
- }
+ if (xc_ia64_send_unallocated_list(xc_handle, io_fd, &p2m_table,
+ memmap_info,
+ memmap_desc_start, memmap_desc_end))
+ goto out;
+
+ if (!hvm)
+ rc = xc_ia64_pv_send_context(xc_handle, io_fd, dom, live_shinfo);
+ else
+ rc = xc_ia64_hvm_send_context(xc_handle, io_fd,
+ dom, &info, live_shinfo);
+ if (rc)
+ goto out;
/* Success! */
rc = 0;
diff -r 6f9435bb6a19 -r 8321f8577a60 tools/libxc/ia64/xc_ia64_save_restore.h
--- a/tools/libxc/ia64/xc_ia64_save_restore.h Thu Oct 11 19:10:56 2007 +0900
+++ b/tools/libxc/ia64/xc_ia64_save_restore.h Thu Oct 11 19:16:10 2007 +0900
@@ -31,6 +31,27 @@
#define XC_IA64_SR_FORMAT_VER_CURRENT XC_IA64_SR_FORMAT_VER_TWO
+/*
+** During (live) save/migrate, we maintain a number of bitmaps to track
+** which pages we have to send, and to skip.
+*/
+#define BITS_PER_LONG (sizeof(unsigned long) * 8)
+
+#define BITMAP_ENTRY(_nr,_bmap) \
+ ((unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG]
+
+#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG)
+
+static inline int __test_bit(int nr, void * addr)
+{
+ return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1;
+}
+
+static inline void __set_bit(int nr, void * addr)
+{
+ BITMAP_ENTRY(nr, addr) |= (1UL << BITMAP_SHIFT(nr));
+}
+
#endif /* XC_IA64_SAVE_RESTORE_H */
/*
16070_8321f8577a60_libxc_vti_domain_save_restore.patch
Description: Text Data
_______________________________________________
Xen-ia64-devel mailing list
Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ia64-devel
|